ClangFormat: apply to source, most of intern

Apply clang format as proposed in T53211.

For details on usage and instructions for migrating branches
without conflicts, see:

https://wiki.blender.org/wiki/Tools/ClangFormat
This commit is contained in:
2019-04-17 06:17:24 +02:00
parent b3dabc200a
commit e12c08e8d1
4481 changed files with 1230080 additions and 1155401 deletions

View File

@@ -113,7 +113,8 @@ ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);
ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x); /* Uses CAS loop, see warning below. */
ATOMIC_INLINE size_t
atomic_fetch_and_update_max_z(size_t *p, size_t x); /* Uses CAS loop, see warning below. */
ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x);
ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x);
@@ -123,7 +124,6 @@ ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsig
ATOMIC_INLINE void *atomic_cas_ptr(void **v, void *old, void *_new);
ATOMIC_INLINE float atomic_cas_float(float *v, float old, float _new);
/* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation,

View File

@@ -65,9 +65,9 @@ ATOMIC_INLINE size_t atomic_add_and_fetch_z(size_t *p, size_t x)
ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x)
{
#if (LG_SIZEOF_PTR == 8)
return (size_t)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
return (size_t)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t) - ((int64_t)x));
#elif (LG_SIZEOF_PTR == 4)
return (size_t)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
return (size_t)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t) - ((int32_t)x));
#endif
}
@@ -83,9 +83,9 @@ ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x)
ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x)
{
#if (LG_SIZEOF_PTR == 8)
return (size_t)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
return (size_t)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t) - ((int64_t)x));
#elif (LG_SIZEOF_PTR == 4)
return (size_t)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
return (size_t)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t) - ((int32_t)x));
#endif
}
@@ -101,8 +101,8 @@ ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new)
ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x)
{
size_t prev_value;
while((prev_value = *p) < x) {
if(atomic_cas_z(p, prev_value, x) == prev_value) {
while ((prev_value = *p) < x) {
if (atomic_cas_z(p, prev_value, x) == prev_value) {
break;
}
}
@@ -111,7 +111,8 @@ ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x)
/******************************************************************************/
/* unsigned operations. */
ATOMIC_STATIC_ASSERT(sizeof(unsigned int) == LG_SIZEOF_INT, "sizeof(unsigned int) != LG_SIZEOF_INT");
ATOMIC_STATIC_ASSERT(sizeof(unsigned int) == LG_SIZEOF_INT,
"sizeof(unsigned int) != LG_SIZEOF_INT");
ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x)
{
@@ -125,9 +126,9 @@ ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int
ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x)
{
#if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
return (unsigned int)atomic_add_and_fetch_uint64((uint64_t *)p, (uint64_t) - ((int64_t)x));
#elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
return (unsigned int)atomic_add_and_fetch_uint32((uint32_t *)p, (uint32_t) - ((int32_t)x));
#endif
}
@@ -143,9 +144,9 @@ ATOMIC_INLINE unsigned int atomic_fetch_and_add_u(unsigned int *p, unsigned int
ATOMIC_INLINE unsigned int atomic_fetch_and_sub_u(unsigned int *p, unsigned int x)
{
#if (LG_SIZEOF_INT == 8)
return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t)-((int64_t)x));
return (unsigned int)atomic_fetch_and_add_uint64((uint64_t *)p, (uint64_t) - ((int64_t)x));
#elif (LG_SIZEOF_INT == 4)
return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t)-((int32_t)x));
return (unsigned int)atomic_fetch_and_add_uint32((uint32_t *)p, (uint32_t) - ((int32_t)x));
#endif
}

View File

@@ -40,7 +40,7 @@
#include <windows.h>
#include <intrin.h>
#if defined (__clang__)
#if defined(__clang__)
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wincompatible-pointer-types"
#endif
@@ -209,8 +209,7 @@ ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b)
#endif
}
#if defined (__clang__)
#if defined(__clang__)
# pragma GCC diagnostic pop
#endif

View File

@@ -109,10 +109,9 @@ ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
/* Unsigned */
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
{
asm volatile (
"lock; xaddq %0, %1;"
: "+r" (x), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
asm volatile("lock; xaddq %0, %1;"
: "+r"(x), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return x;
}
@@ -120,10 +119,9 @@ ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x)
{
x = (uint64_t)(-(int64_t)x);
asm volatile (
"lock; xaddq %0, %1;"
: "+r" (x), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
asm volatile("lock; xaddq %0, %1;"
: "+r"(x), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return x;
}
@@ -141,21 +139,16 @@ ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x)
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
{
uint64_t ret;
asm volatile (
"lock; cmpxchgq %2,%1"
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
asm volatile("lock; cmpxchgq %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
return ret;
}
/* Signed */
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
{
asm volatile (
"lock; xaddq %0, %1;"
: "+r" (x), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
asm volatile("lock; xaddq %0, %1;"
: "+r"(x), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return x;
}
@@ -163,10 +156,9 @@ ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x)
{
x = -x;
asm volatile (
"lock; xaddq %0, %1;"
: "+r" (x), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
asm volatile("lock; xaddq %0, %1;"
: "+r"(x), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return x;
}
@@ -184,11 +176,7 @@ ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x)
ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
{
int64_t ret;
asm volatile (
"lock; cmpxchgq %2,%1"
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
asm volatile("lock; cmpxchgq %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
return ret;
}
# else
@@ -236,10 +224,9 @@ ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
{
uint32_t ret = x;
asm volatile (
"lock; xaddl %0, %1;"
: "+r" (ret), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
asm volatile("lock; xaddl %0, %1;"
: "+r"(ret), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return ret + x;
}
@@ -247,10 +234,9 @@ ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x)
{
uint32_t ret = (uint32_t)(-(int32_t)x);
asm volatile (
"lock; xaddl %0, %1;"
: "+r" (ret), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
asm volatile("lock; xaddl %0, %1;"
: "+r"(ret), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return ret - x;
}
@@ -258,11 +244,7 @@ ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x)
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
{
uint32_t ret;
asm volatile (
"lock; cmpxchgl %2,%1"
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
asm volatile("lock; cmpxchgl %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
return ret;
}
@@ -270,10 +252,9 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
{
int32_t ret = x;
asm volatile (
"lock; xaddl %0, %1;"
: "+r" (ret), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
asm volatile("lock; xaddl %0, %1;"
: "+r"(ret), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return ret + x;
}
@@ -281,10 +262,9 @@ ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x)
{
int32_t ret = -x;
asm volatile (
"lock; xaddl %0, %1;"
: "+r" (ret), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
asm volatile("lock; xaddl %0, %1;"
: "+r"(ret), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
return ret - x;
}
@@ -292,11 +272,7 @@ ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x)
ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
{
int32_t ret;
asm volatile (
"lock; cmpxchgl %2,%1"
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
asm volatile("lock; cmpxchgl %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
return ret;
}

View File

@@ -100,8 +100,7 @@
/* Copied from BLI_utils... */
/* C++ can't use _Static_assert, expects static_assert() but c++0x only,
* Coverity also errors out. */
#if (!defined(__cplusplus)) && \
(!defined(__COVERITY__)) && \
#if (!defined(__cplusplus)) && (!defined(__COVERITY__)) && \
(defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 406)) /* gcc4.6+ only */
# define ATOMIC_STATIC_ASSERT(a, msg) __extension__ _Static_assert(a, msg);
#else
@@ -110,17 +109,19 @@
* expand __LINE__ with one indirection before doing the actual concatenation. */
# define ATOMIC_ASSERT_CONCAT_(a, b) a##b
# define ATOMIC_ASSERT_CONCAT(a, b) ATOMIC_ASSERT_CONCAT_(a, b)
/* These can't be used after statements in c89. */
/* These can't be used after statements in c89. */
# if defined(__COUNTER__) /* MSVC */
# define ATOMIC_STATIC_ASSERT(a, msg) \
; enum { ATOMIC_ASSERT_CONCAT(static_assert_, __COUNTER__) = 1 / (int)(!!(a)) };
; \
enum { ATOMIC_ASSERT_CONCAT(static_assert_, __COUNTER__) = 1 / (int)(!!(a)) };
# else /* older gcc, clang... */
/* This can't be used twice on the same line so ensure if using in headers
/* This can't be used twice on the same line so ensure if using in headers
* that the headers are not included twice (by wrapping in #ifndef...#endif)
* Note it doesn't cause an issue when used on same line of separate modules
* compiled with gcc -combine -fwhole-program. */
# define ATOMIC_STATIC_ASSERT(a, msg) \
; enum { ATOMIC_ASSERT_CONCAT(assert_line_, __LINE__) = 1 / (int)(!!(a)) };
; \
enum { ATOMIC_ASSERT_CONCAT(assert_line_, __LINE__) = 1 / (int)(!!(a)) };
# endif
#endif

View File

@@ -38,13 +38,13 @@ static PyObject *AUD_getSoundFromPointer(PyObject *self, PyObject *args)
if (PyArg_Parse(args, "l:_sound_from_pointer", &lptr)) {
if (lptr) {
AUD_Sound* sound = BKE_sound_get_factory((void *) lptr);
AUD_Sound *sound = BKE_sound_get_factory((void *)lptr);
if (sound) {
Sound *obj = (Sound *)Sound_empty();
if (obj) {
obj->sound = AUD_Sound_copy(sound);
return (PyObject *) obj;
return (PyObject *)obj;
}
}
}
@@ -54,14 +54,15 @@ static PyObject *AUD_getSoundFromPointer(PyObject *self, PyObject *args)
}
static PyMethodDef meth_sound_from_pointer[] = {
{"_sound_from_pointer", (PyCFunction)AUD_getSoundFromPointer, METH_O,
{"_sound_from_pointer",
(PyCFunction)AUD_getSoundFromPointer,
METH_O,
"_sound_from_pointer(pointer)\n\n"
"Returns the corresponding :class:`Factory` object.\n\n"
":arg pointer: The pointer to the bSound object as long.\n"
":type pointer: long\n"
":return: The corresponding :class:`Factory` object.\n"
":rtype: :class:`Factory`"}
};
":rtype: :class:`Factory`"}};
PyObject *AUD_initPython(void)
{
@@ -71,9 +72,9 @@ PyObject *AUD_initPython(void)
return NULL;
}
PyModule_AddObject(module, "_sound_from_pointer", (PyObject *)PyCFunction_New(meth_sound_from_pointer, NULL));
PyModule_AddObject(
module, "_sound_from_pointer", (PyObject *)PyCFunction_New(meth_sound_from_pointer, NULL));
PyDict_SetItemString(PyImport_GetModuleDict(), "aud", module);
return module;
}

View File

@@ -22,25 +22,24 @@
* \ingroup audaspaceintern
*/
#ifndef __AUD_PYINIT_H__
#define __AUD_PYINIT_H__
#ifdef WITH_PYTHON
#include "Python.h"
# include "Python.h"
#ifdef __cplusplus
# ifdef __cplusplus
extern "C" {
#endif
# endif
/**
* Initializes the Python module.
*/
extern PyObject *AUD_initPython(void);
#ifdef __cplusplus
# ifdef __cplusplus
}
#endif
# endif
#endif

View File

@@ -33,26 +33,26 @@ void *AUD_createSet()
void AUD_destroySet(void *set)
{
delete reinterpret_cast<std::set<void *>*>(set);
delete reinterpret_cast<std::set<void *> *>(set);
}
char AUD_removeSet(void *set, void *entry)
{
if (set)
return reinterpret_cast<std::set<void *>*>(set)->erase(entry);
return reinterpret_cast<std::set<void *> *>(set)->erase(entry);
return 0;
}
void AUD_addSet(void *set, void *entry)
{
if (entry)
reinterpret_cast<std::set<void *>*>(set)->insert(entry);
reinterpret_cast<std::set<void *> *>(set)->insert(entry);
}
void *AUD_getSet(void *set)
{
if (set) {
std::set<void *>* rset = reinterpret_cast<std::set<void *>*>(set);
std::set<void *> *rset = reinterpret_cast<std::set<void *> *>(set);
if (!rset->empty()) {
std::set<void *>::iterator it = rset->begin();
void *result = *it;
@@ -61,5 +61,5 @@ void *AUD_getSet(void *set)
}
}
return (void*) 0;
return (void *)0;
}

View File

@@ -73,13 +73,14 @@ extern "C" {
#endif /* __cplusplus */
#ifdef __GNUC__
# define _CLOG_ATTR_NONNULL(args ...) __attribute__((nonnull(args)))
# define _CLOG_ATTR_NONNULL(args...) __attribute__((nonnull(args)))
#else
# define _CLOG_ATTR_NONNULL(...)
#endif
#ifdef __GNUC__
# define _CLOG_ATTR_PRINTF_FORMAT(format_param, dots_param) __attribute__((format(printf, format_param, dots_param)))
# define _CLOG_ATTR_PRINTF_FORMAT(format_param, dots_param) \
__attribute__((format(printf, format_param, dots_param)))
#else
# define _CLOG_ATTR_PRINTF_FORMAT(format_param, dots_param)
#endif
@@ -119,14 +120,17 @@ typedef struct CLG_LogRef {
CLG_LogType *type;
} CLG_LogRef;
void CLG_log_str(
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn,
const char *message)
_CLOG_ATTR_NONNULL(1, 3, 4, 5);
void CLG_logf(
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn,
const char *format, ...)
_CLOG_ATTR_NONNULL(1, 3, 4, 5) _CLOG_ATTR_PRINTF_FORMAT(5, 6);
void CLG_log_str(CLG_LogType *lg,
enum CLG_Severity severity,
const char *file_line,
const char *fn,
const char *message) _CLOG_ATTR_NONNULL(1, 3, 4, 5);
void CLG_logf(CLG_LogType *lg,
enum CLG_Severity severity,
const char *file_line,
const char *fn,
const char *format,
...) _CLOG_ATTR_NONNULL(1, 3, 4, 5) _CLOG_ATTR_PRINTF_FORMAT(5, 6);
/* Main initializer and distructor (per session, not logger). */
void CLG_init(void);
@@ -147,48 +151,60 @@ void CLG_logref_init(CLG_LogRef *clg_ref);
/** Declare outside function, declare as extern in header. */
#define CLG_LOGREF_DECLARE_GLOBAL(var, id) \
static CLG_LogRef _static_ ## var = {id}; \
CLG_LogRef *var = &_static_ ## var
static CLG_LogRef _static_##var = {id}; \
CLG_LogRef *var = &_static_##var
/** Initialize struct once. */
#define CLOG_ENSURE(clg_ref) \
((clg_ref)->type ? (clg_ref)->type : (CLG_logref_init(clg_ref), (clg_ref)->type))
#define CLOG_AT_SEVERITY(clg_ref, severity, verbose_level, ...) { \
#define CLOG_AT_SEVERITY(clg_ref, severity, verbose_level, ...) \
{ \
CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || (severity >= CLG_SEVERITY_WARN)) { \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || \
(severity >= CLG_SEVERITY_WARN)) { \
CLG_logf(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, __VA_ARGS__); \
} \
} ((void)0)
} \
((void)0)
#define CLOG_STR_AT_SEVERITY(clg_ref, severity, verbose_level, str) { \
#define CLOG_STR_AT_SEVERITY(clg_ref, severity, verbose_level, str) \
{ \
CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || (severity >= CLG_SEVERITY_WARN)) { \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || \
(severity >= CLG_SEVERITY_WARN)) { \
CLG_log_str(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, str); \
} \
} ((void)0)
} \
((void)0)
#define CLOG_STR_AT_SEVERITY_N(clg_ref, severity, verbose_level, str) { \
#define CLOG_STR_AT_SEVERITY_N(clg_ref, severity, verbose_level, str) \
{ \
CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || (severity >= CLG_SEVERITY_WARN)) { \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || \
(severity >= CLG_SEVERITY_WARN)) { \
const char *_str = str; \
CLG_log_str(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, _str); \
MEM_freeN((void *)_str); \
} \
} ((void)0)
} \
((void)0)
#define CLOG_INFO(clg_ref, level, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, __VA_ARGS__)
#define CLOG_INFO(clg_ref, level, ...) \
CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, __VA_ARGS__)
#define CLOG_WARN(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_WARN, 0, __VA_ARGS__)
#define CLOG_ERROR(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_ERROR, 0, __VA_ARGS__)
#define CLOG_FATAL(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_FATAL, 0, __VA_ARGS__)
#define CLOG_STR_INFO(clg_ref, level, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, str)
#define CLOG_STR_INFO(clg_ref, level, str) \
CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, str)
#define CLOG_STR_WARN(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_WARN, 0, str)
#define CLOG_STR_ERROR(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_ERROR, 0, str)
#define CLOG_STR_FATAL(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_FATAL, 0, str)
/* Allocated string which is immediately freed. */
#define CLOG_STR_INFO_N(clg_ref, level, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_INFO, level, str)
#define CLOG_STR_INFO_N(clg_ref, level, str) \
CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_INFO, level, str)
#define CLOG_STR_WARN_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_WARN, 0, str)
#define CLOG_STR_ERROR_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_ERROR, 0, str)
#define CLOG_STR_FATAL_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_FATAL, 0, str)

View File

@@ -46,7 +46,6 @@
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
/* Only other dependency (could use regular malloc too). */
#include "MEM_guardedalloc.h"
@@ -300,15 +299,12 @@ static bool clg_ctx_filter_check(CLogContext *ctx, const char *identifier)
const CLG_IDFilter *flt = ctx->filters[i];
while (flt != NULL) {
const int len = strlen(flt->match);
if (STREQ(flt->match, "*") ||
((len == identifier_len) && (STREQ(identifier, flt->match))))
{
if (STREQ(flt->match, "*") || ((len == identifier_len) && (STREQ(identifier, flt->match)))) {
return (bool)i;
}
if ((len >= 2) && (STREQLEN(".*", &flt->match[len - 2], 2))) {
if (((identifier_len == len - 2) && STREQLEN(identifier, flt->match, len - 2)) ||
((identifier_len >= len - 1) && STREQLEN(identifier, flt->match, len - 1)))
{
((identifier_len >= len - 1) && STREQLEN(identifier, flt->match, len - 1))) {
return (bool)i;
}
}
@@ -388,9 +384,11 @@ static void write_timestamp(CLogStringBuf *cstr, const uint64_t timestamp_tick_s
{
char timestamp_str[64];
const uint64_t timestamp = clg_timestamp_ticks_get() - timestamp_tick_start;
const uint timestamp_len = snprintf(
timestamp_str, sizeof(timestamp_str), "%" PRIu64 ".%03u ",
timestamp / 1000, (uint)(timestamp % 1000));
const uint timestamp_len = snprintf(timestamp_str,
sizeof(timestamp_str),
"%" PRIu64 ".%03u ",
timestamp / 1000,
(uint)(timestamp % 1000));
clg_str_append_with_len(cstr, timestamp_str, timestamp_len);
}
@@ -415,7 +413,10 @@ static void write_type(CLogStringBuf *cstr, CLG_LogType *lg)
clg_str_append(cstr, "): ");
}
static void write_file_line_fn(CLogStringBuf *cstr, const char *file_line, const char *fn, const bool use_basename)
static void write_file_line_fn(CLogStringBuf *cstr,
const char *file_line,
const char *fn,
const bool use_basename)
{
uint file_line_len = strlen(file_line);
if (use_basename) {
@@ -431,14 +432,15 @@ static void write_file_line_fn(CLogStringBuf *cstr, const char *file_line, const
}
clg_str_append_with_len(cstr, file_line, file_line_len);
clg_str_append(cstr, " ");
clg_str_append(cstr, fn);
clg_str_append(cstr, ": ");
}
void CLG_log_str(
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn,
void CLG_log_str(CLG_LogType *lg,
enum CLG_Severity severity,
const char *file_line,
const char *fn,
const char *message)
{
CLogStringBuf cstr;
@@ -473,9 +475,12 @@ void CLG_log_str(
}
}
void CLG_logf(
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn,
const char *fmt, ...)
void CLG_logf(CLG_LogType *lg,
enum CLG_Severity severity,
const char *file_line,
const char *fn,
const char *fmt,
...)
{
CLogStringBuf cstr;
char cstr_stack_buf[CLOG_BUF_LEN_INIT];
@@ -552,7 +557,9 @@ static void CLG_ctx_backtrace_fn_set(CLogContext *ctx, void (*backtrace_fn)(void
ctx->callbacks.backtrace_fn = backtrace_fn;
}
static void clg_ctx_type_filter_append(CLG_IDFilter **flt_list, const char *type_match, int type_match_len)
static void clg_ctx_type_filter_append(CLG_IDFilter **flt_list,
const char *type_match,
int type_match_len)
{
if (type_match_len == 0) {
return;
@@ -564,12 +571,16 @@ static void clg_ctx_type_filter_append(CLG_IDFilter **flt_list, const char *type
/* no need to null terminate since we calloc'd */
}
static void CLG_ctx_type_filter_exclude(CLogContext *ctx, const char *type_match, int type_match_len)
static void CLG_ctx_type_filter_exclude(CLogContext *ctx,
const char *type_match,
int type_match_len)
{
clg_ctx_type_filter_append(&ctx->filters[0], type_match, type_match_len);
}
static void CLG_ctx_type_filter_include(CLogContext *ctx, const char *type_match, int type_match_len)
static void CLG_ctx_type_filter_include(CLogContext *ctx,
const char *type_match,
int type_match_len)
{
clg_ctx_type_filter_append(&ctx->filters[1], type_match, type_match_len);
}
@@ -679,7 +690,6 @@ void CLG_level_set(int level)
CLG_ctx_level_set(g_ctx, level);
}
/** \} */
/* -------------------------------------------------------------------- */

View File

@@ -33,23 +33,19 @@ using std::string;
using std::vector;
namespace std {
template<typename T>
std::string to_string(const T &n) {
template<typename T> std::string to_string(const T &n)
{
std::ostringstream s;
s << n;
return s.str();
}
}
} // namespace std
class CompilationSettings
{
public:
CompilationSettings()
: target_arch(0),
bits(64),
verbose(false),
fast_math(false)
{}
class CompilationSettings {
public:
CompilationSettings() : target_arch(0), bits(64), verbose(false), fast_math(false)
{
}
string cuda_toolkit_dir;
string input_file;
@@ -65,53 +61,53 @@ public:
static bool compile_cuda(CompilationSettings &settings)
{
const char* headers[] = {"stdlib.h" , "float.h", "math.h", "stdio.h"};
const char* header_content[] = {"\n", "\n", "\n", "\n"};
const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h"};
const char *header_content[] = {"\n", "\n", "\n", "\n"};
printf("Building %s\n", settings.input_file.c_str());
string code;
if(!OIIO::Filesystem::read_text_file(settings.input_file, code)) {
if (!OIIO::Filesystem::read_text_file(settings.input_file, code)) {
fprintf(stderr, "Error: unable to read %s\n", settings.input_file.c_str());
return false;
}
vector<string> options;
for(size_t i = 0; i < settings.includes.size(); i++) {
for (size_t i = 0; i < settings.includes.size(); i++) {
options.push_back("-I" + settings.includes[i]);
}
for(size_t i = 0; i < settings.defines.size(); i++) {
for (size_t i = 0; i < settings.defines.size(); i++) {
options.push_back("-D" + settings.defines[i]);
}
options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion()));
options.push_back("-arch=compute_" + std::to_string(settings.target_arch));
options.push_back("--device-as-default-execution-space");
if(settings.fast_math)
if (settings.fast_math)
options.push_back("--use_fast_math");
nvrtcProgram prog;
nvrtcResult result = nvrtcCreateProgram(&prog,
code.c_str(), // buffer
NULL, // name
sizeof(headers) / sizeof(void*), // numHeaders
sizeof(headers) / sizeof(void *), // numHeaders
header_content, // headers
headers); // includeNames
if(result != NVRTC_SUCCESS) {
if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcCreateProgram failed (%d)\n\n", (int)result);
return false;
}
/* Tranfer options to a classic C array. */
vector<const char*> opts(options.size());
for(size_t i = 0; i < options.size(); i++) {
vector<const char *> opts(options.size());
for (size_t i = 0; i < options.size(); i++) {
opts[i] = options[i].c_str();
}
result = nvrtcCompileProgram(prog, options.size(), &opts[0]);
if(result != NVRTC_SUCCESS) {
if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcCompileProgram failed (%d)\n\n", (int)result);
size_t log_size;
@@ -127,21 +123,22 @@ static bool compile_cuda(CompilationSettings &settings)
/* Retrieve the ptx code. */
size_t ptx_size;
result = nvrtcGetPTXSize(prog, &ptx_size);
if(result != NVRTC_SUCCESS) {
if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcGetPTXSize failed (%d)\n\n", (int)result);
return false;
}
vector<char> ptx_code(ptx_size);
result = nvrtcGetPTX(prog, &ptx_code[0]);
if(result != NVRTC_SUCCESS) {
if (result != NVRTC_SUCCESS) {
fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result);
return false;
}
/* Write a file in the temp folder with the ptx code. */
settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" + OIIO::Filesystem::unique_path();
FILE * f= fopen(settings.ptx_file.c_str(), "wb");
settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" +
OIIO::Filesystem::unique_path();
FILE *f = fopen(settings.ptx_file.c_str(), "wb");
fwrite(&ptx_code[0], 1, ptx_size, f);
fclose(f);
@@ -151,26 +148,25 @@ static bool compile_cuda(CompilationSettings &settings)
static bool link_ptxas(CompilationSettings &settings)
{
string cudapath = "";
if(settings.cuda_toolkit_dir.size())
if (settings.cuda_toolkit_dir.size())
cudapath = settings.cuda_toolkit_dir + "/bin/";
string ptx = "\"" +cudapath + "ptxas\" " + settings.ptx_file +
" -o " + settings.output_file +
" --gpu-name sm_" + std::to_string(settings.target_arch) +
" -m" + std::to_string(settings.bits);
string ptx = "\"" + cudapath + "ptxas\" " + settings.ptx_file + " -o " + settings.output_file +
" --gpu-name sm_" + std::to_string(settings.target_arch) + " -m" +
std::to_string(settings.bits);
if(settings.verbose) {
if (settings.verbose) {
ptx += " --verbose";
printf("%s\n", ptx.c_str());
}
int pxresult = system(ptx.c_str());
if(pxresult) {
if (pxresult) {
fprintf(stderr, "Error: ptxas failed (%d)\n\n", pxresult);
return false;
}
if(!OIIO::Filesystem::remove(settings.ptx_file)) {
if (!OIIO::Filesystem::remove(settings.ptx_file)) {
fprintf(stderr, "Error: removing %s\n\n", settings.ptx_file.c_str());
}
@@ -180,7 +176,7 @@ static bool link_ptxas(CompilationSettings &settings)
static bool init(CompilationSettings &settings)
{
#ifdef _MSC_VER
if(settings.cuda_toolkit_dir.size()) {
if (settings.cuda_toolkit_dir.size()) {
SetDllDirectory((settings.cuda_toolkit_dir + "/bin").c_str());
}
#else
@@ -188,42 +184,42 @@ static bool init(CompilationSettings &settings)
#endif
int cuewresult = cuewInit(CUEW_INIT_NVRTC);
if(cuewresult != CUEW_SUCCESS) {
if (cuewresult != CUEW_SUCCESS) {
fprintf(stderr, "Error: cuew init fialed (0x%d)\n\n", cuewresult);
return false;
}
if(cuewNvrtcVersion() < 80) {
if (cuewNvrtcVersion() < 80) {
fprintf(stderr, "Error: only cuda 8 and higher is supported, %d\n\n", cuewCompilerVersion());
return false;
}
if(!nvrtcCreateProgram) {
if (!nvrtcCreateProgram) {
fprintf(stderr, "Error: nvrtcCreateProgram not resolved\n");
return false;
}
if(!nvrtcCompileProgram) {
if (!nvrtcCompileProgram) {
fprintf(stderr, "Error: nvrtcCompileProgram not resolved\n");
return false;
}
if(!nvrtcGetProgramLogSize) {
if (!nvrtcGetProgramLogSize) {
fprintf(stderr, "Error: nvrtcGetProgramLogSize not resolved\n");
return false;
}
if(!nvrtcGetProgramLog) {
if (!nvrtcGetProgramLog) {
fprintf(stderr, "Error: nvrtcGetProgramLog not resolved\n");
return false;
}
if(!nvrtcGetPTXSize) {
if (!nvrtcGetPTXSize) {
fprintf(stderr, "Error: nvrtcGetPTXSize not resolved\n");
return false;
}
if(!nvrtcGetPTX) {
if (!nvrtcGetPTX) {
fprintf(stderr, "Error: nvrtcGetPTX not resolved\n");
return false;
}
@@ -235,34 +231,52 @@ static bool parse_parameters(int argc, const char **argv, CompilationSettings &s
{
OIIO::ArgParse ap;
ap.options("Usage: cycles_cubin_cc [options]",
"-target %d", &settings.target_arch, "target shader model",
"-m %d", &settings.bits, "Cuda architecture bits",
"-i %s", &settings.input_file, "Input source filename",
"-o %s", &settings.output_file, "Output cubin filename",
"-I %L", &settings.includes, "Add additional includepath",
"-D %L", &settings.defines, "Add additional defines",
"-v", &settings.verbose, "Use verbose logging",
"--use_fast_math", &settings.fast_math, "Use fast math",
"-cuda-toolkit-dir %s", &settings.cuda_toolkit_dir, "path to the cuda toolkit binary directory",
"-target %d",
&settings.target_arch,
"target shader model",
"-m %d",
&settings.bits,
"Cuda architecture bits",
"-i %s",
&settings.input_file,
"Input source filename",
"-o %s",
&settings.output_file,
"Output cubin filename",
"-I %L",
&settings.includes,
"Add additional includepath",
"-D %L",
&settings.defines,
"Add additional defines",
"-v",
&settings.verbose,
"Use verbose logging",
"--use_fast_math",
&settings.fast_math,
"Use fast math",
"-cuda-toolkit-dir %s",
&settings.cuda_toolkit_dir,
"path to the cuda toolkit binary directory",
NULL);
if(ap.parse(argc, argv) < 0) {
if (ap.parse(argc, argv) < 0) {
fprintf(stderr, "%s\n", ap.geterror().c_str());
ap.usage();
return false;
}
if(!settings.output_file.size()) {
if (!settings.output_file.size()) {
fprintf(stderr, "Error: Output file not set(-o), required\n\n");
return false;
}
if(!settings.input_file.size()) {
if (!settings.input_file.size()) {
fprintf(stderr, "Error: Input file not set(-i, required\n\n");
return false;
}
if(!settings.target_arch) {
if (!settings.target_arch) {
fprintf(stderr, "Error: target shader model not set (-target), required\n\n");
return false;
}
@@ -274,22 +288,22 @@ int main(int argc, const char **argv)
{
CompilationSettings settings;
if(!parse_parameters(argc, argv, settings)) {
if (!parse_parameters(argc, argv, settings)) {
fprintf(stderr, "Error: invalid parameters, exiting\n");
exit(EXIT_FAILURE);
}
if(!init(settings)) {
if (!init(settings)) {
fprintf(stderr, "Error: initialization error, exiting\n");
exit(EXIT_FAILURE);
}
if(!compile_cuda(settings)) {
if (!compile_cuda(settings)) {
fprintf(stderr, "Error: compilation error, exiting\n");
exit(EXIT_FAILURE);
}
if(!link_ptxas(settings)) {
if (!link_ptxas(settings)) {
exit(EXIT_FAILURE);
}

View File

@@ -39,10 +39,10 @@ int main(int argc, const char **argv)
bool list = false, debug = false;
int threads = 0, verbosity = 1;
vector<DeviceType>& types = Device::available_types();
vector<DeviceType> &types = Device::available_types();
foreach(DeviceType type, types) {
if(devicelist != "")
foreach (DeviceType type, types) {
if (devicelist != "")
devicelist += ", ";
devicelist += Device::string_from_type(type);
@@ -51,36 +51,44 @@ int main(int argc, const char **argv)
/* parse options */
ArgParse ap;
ap.options ("Usage: cycles_server [options]",
"--device %s", &devicename, ("Devices to use: " + devicelist).c_str(),
"--list-devices", &list, "List information about all available devices",
"--threads %d", &threads, "Number of threads to use for CPU device",
ap.options("Usage: cycles_server [options]",
"--device %s",
&devicename,
("Devices to use: " + devicelist).c_str(),
"--list-devices",
&list,
"List information about all available devices",
"--threads %d",
&threads,
"Number of threads to use for CPU device",
#ifdef WITH_CYCLES_LOGGING
"--debug", &debug, "Enable debug logging",
"--verbose %d", &verbosity, "Set verbosity of the logger",
"--debug",
&debug,
"Enable debug logging",
"--verbose %d",
&verbosity,
"Set verbosity of the logger",
#endif
NULL);
if(ap.parse(argc, argv) < 0) {
if (ap.parse(argc, argv) < 0) {
fprintf(stderr, "%s\n", ap.geterror().c_str());
ap.usage();
exit(EXIT_FAILURE);
}
if(debug) {
if (debug) {
util_logging_start();
util_logging_verbosity_set(verbosity);
}
if(list) {
vector<DeviceInfo>& devices = Device::available_devices();
if (list) {
vector<DeviceInfo> &devices = Device::available_devices();
printf("Devices:\n");
foreach(DeviceInfo& info, devices) {
printf(" %s%s\n",
info.description.c_str(),
(info.display_device)? " (display)": "");
foreach (DeviceInfo &info, devices) {
printf(" %s%s\n", info.description.c_str(), (info.display_device) ? " (display)" : "");
}
exit(EXIT_SUCCESS);
@@ -88,11 +96,11 @@ int main(int argc, const char **argv)
/* find matching device */
DeviceType device_type = Device::type_from_string(devicename.c_str());
vector<DeviceInfo>& devices = Device::available_devices();
vector<DeviceInfo> &devices = Device::available_devices();
DeviceInfo device_info;
foreach(DeviceInfo& device, devices) {
if(device_type == device.type) {
foreach (DeviceInfo &device, devices) {
if (device_type == device.type) {
device_info = device;
break;
}
@@ -100,7 +108,7 @@ int main(int argc, const char **argv)
TaskScheduler::init(threads);
while(1) {
while (1) {
Stats stats;
Device *device = Device::create(device_info, stats, true);
printf("Cycles Server with device: %s\n", device->info.description.c_str());

View File

@@ -36,7 +36,7 @@
#include "util/util_version.h"
#ifdef WITH_CYCLES_STANDALONE_GUI
#include "util/util_view.h"
# include "util/util_view.h"
#endif
#include "app/cycles_xml.h"
@@ -55,7 +55,7 @@ struct Options {
string output_path;
} options;
static void session_print(const string& str)
static void session_print(const string &str)
{
/* print with carriage return to overwrite previous */
printf("\r%s", str.c_str());
@@ -65,7 +65,7 @@ static void session_print(const string& str)
int len = str.size();
maxlen = max(len, maxlen);
for(int i = len; i < maxlen; i++)
for (int i = len; i < maxlen; i++)
printf(" ");
/* flush because we don't write an end of line */
@@ -80,11 +80,11 @@ static void session_print_status()
float progress = options.session->progress.get_progress();
options.session->progress.get_status(status, substatus);
if(substatus != "")
if (substatus != "")
status += ": " + substatus;
/* print status */
status = string_printf("Progress %05.2f %s", (double) progress*100, status.c_str());
status = string_printf("Progress %05.2f %s", (double)progress * 100, status.c_str());
session_print(status);
}
@@ -94,28 +94,25 @@ static bool write_render(const uchar *pixels, int w, int h, int channels)
session_print(msg);
unique_ptr<ImageOutput> out = unique_ptr<ImageOutput>(ImageOutput::create(options.output_path));
if(!out) {
if (!out) {
return false;
}
ImageSpec spec(w, h, channels, TypeDesc::UINT8);
if(!out->open(options.output_path, spec)) {
if (!out->open(options.output_path, spec)) {
return false;
}
/* conversion for different top/bottom convention */
out->write_image(TypeDesc::UINT8,
pixels + (h - 1) * w * channels,
AutoStride,
-w * channels,
AutoStride);
out->write_image(
TypeDesc::UINT8, pixels + (h - 1) * w * channels, AutoStride, -w * channels, AutoStride);
out->close();
return true;
}
static BufferParams& session_buffer_params()
static BufferParams &session_buffer_params()
{
static BufferParams buffer_params;
buffer_params.width = options.width;
@@ -134,7 +131,7 @@ static void scene_init()
xml_read_file(options.scene, options.filepath.c_str());
/* Camera width/height override? */
if(!(options.width == 0 || options.height == 0)) {
if (!(options.width == 0 || options.height == 0)) {
options.scene->camera->width = options.width;
options.scene->camera->height = options.height;
}
@@ -152,7 +149,7 @@ static void session_init()
options.session_params.write_render_cb = write_render;
options.session = new Session(options.session_params);
if(options.session_params.background && !options.quiet)
if (options.session_params.background && !options.quiet)
options.session->progress.set_update_callback(function_bind(&session_print_status));
#ifdef WITH_CYCLES_STANDALONE_GUI
else
@@ -169,19 +166,19 @@ static void session_init()
static void session_exit()
{
if(options.session) {
if (options.session) {
delete options.session;
options.session = NULL;
}
if(options.session_params.background && !options.quiet) {
if (options.session_params.background && !options.quiet) {
session_print("Finished Rendering.");
printf("\n");
}
}
#ifdef WITH_CYCLES_STANDALONE_GUI
static void display_info(Progress& progress)
static void display_info(Progress &progress)
{
static double latency = 0.0;
static double last = 0;
@@ -198,10 +195,10 @@ static void display_info(Progress& progress)
progress.get_status(status, substatus);
float progress_val = progress.get_progress();
if(substatus != "")
if (substatus != "")
status += ": " + substatus;
interactive = options.interactive? "On":"Off";
interactive = options.interactive ? "On" : "Off";
str = string_printf(
"%s"
@@ -210,11 +207,16 @@ static void display_info(Progress& progress)
" Progress: %05.2f"
" Average: %.4f"
" Interactive: %s",
status.c_str(), total_time, latency, (double) progress_val*100, sample_time, interactive.c_str());
status.c_str(),
total_time,
latency,
(double)progress_val * 100,
sample_time,
interactive.c_str());
view_display_info(str.c_str());
if(options.show_help)
if (options.show_help)
view_display_help();
}
@@ -229,17 +231,17 @@ static void display()
static void motion(int x, int y, int button)
{
if(options.interactive) {
if (options.interactive) {
Transform matrix = options.session->scene->camera->matrix;
/* Translate */
if(button == 0) {
if (button == 0) {
float3 translate = make_float3(x * 0.01f, -(y * 0.01f), 0.0f);
matrix = matrix * transform_translate(translate);
}
/* Rotate */
else if(button == 2) {
else if (button == 2) {
float4 r1 = make_float4((float)x * 0.1f, 0.0f, 1.0f, 0.0f);
matrix = matrix * transform_rotate(DEG2RADF(r1.x), make_float3(r1.y, r1.z, r1.w));
@@ -261,7 +263,7 @@ static void resize(int width, int height)
options.width = width;
options.height = height;
if(options.session) {
if (options.session) {
/* Update camera */
options.session->scene->camera->width = width;
options.session->scene->camera->height = height;
@@ -276,39 +278,39 @@ static void resize(int width, int height)
static void keyboard(unsigned char key)
{
/* Toggle help */
if(key == 'h')
if (key == 'h')
options.show_help = !(options.show_help);
/* Reset */
else if(key == 'r')
else if (key == 'r')
options.session->reset(session_buffer_params(), options.session_params.samples);
/* Cancel */
else if(key == 27) // escape
else if (key == 27) // escape
options.session->progress.set_cancel("Canceled");
/* Pause */
else if(key == 'p') {
else if (key == 'p') {
options.pause = !options.pause;
options.session->set_pause(options.pause);
}
/* Interactive Mode */
else if(key == 'i')
else if (key == 'i')
options.interactive = !(options.interactive);
/* Navigation */
else if(options.interactive && (key == 'w' || key == 'a' || key == 's' || key == 'd')) {
else if (options.interactive && (key == 'w' || key == 'a' || key == 's' || key == 'd')) {
Transform matrix = options.session->scene->camera->matrix;
float3 translate;
if(key == 'w')
if (key == 'w')
translate = make_float3(0.0f, 0.0f, 0.1f);
else if(key == 's')
else if (key == 's')
translate = make_float3(0.0f, 0.0f, -0.1f);
else if(key == 'a')
else if (key == 'a')
translate = make_float3(-0.1f, 0.0f, 0.0f);
else if(key == 'd')
else if (key == 'd')
translate = make_float3(0.1f, 0.0f, 0.0f);
matrix = matrix * transform_translate(translate);
@@ -322,14 +324,24 @@ static void keyboard(unsigned char key)
}
/* Set Max Bounces */
else if(options.interactive && (key == '0' || key == '1' || key == '2' || key == '3')) {
else if (options.interactive && (key == '0' || key == '1' || key == '2' || key == '3')) {
int bounce;
switch(key) {
case '0': bounce = 0; break;
case '1': bounce = 1; break;
case '2': bounce = 2; break;
case '3': bounce = 3; break;
default: bounce = 0; break;
switch (key) {
case '0':
bounce = 0;
break;
case '1':
bounce = 1;
break;
case '2':
bounce = 2;
break;
case '3':
bounce = 3;
break;
default:
bounce = 0;
break;
}
options.session->scene->integrator->max_bounce = bounce;
@@ -344,7 +356,7 @@ static void keyboard(unsigned char key)
static int files_parse(int argc, const char *argv[])
{
if(argc > 0)
if (argc > 0)
options.filepath = argv[0];
return 0;
@@ -365,8 +377,8 @@ static void options_parse(int argc, const char **argv)
/* List devices for which support is compiled in. */
vector<DeviceType> types = Device::available_types();
foreach(DeviceType type, types) {
if(device_names != "")
foreach (DeviceType type, types) {
if (device_names != "")
device_names += ", ";
device_names += Device::string_from_type(type);
@@ -380,66 +392,100 @@ static void options_parse(int argc, const char **argv)
bool help = false, debug = false, version = false;
int verbosity = 1;
ap.options ("Usage: cycles [options] file.xml",
"%*", files_parse, "",
"--device %s", &devicename, ("Devices to use: " + device_names).c_str(),
ap.options("Usage: cycles [options] file.xml",
"%*",
files_parse,
"",
"--device %s",
&devicename,
("Devices to use: " + device_names).c_str(),
#ifdef WITH_OSL
"--shadingsys %s", &ssname, "Shading system to use: svm, osl",
"--shadingsys %s",
&ssname,
"Shading system to use: svm, osl",
#endif
"--background", &options.session_params.background, "Render in background, without user interface",
"--quiet", &options.quiet, "In background mode, don't print progress messages",
"--samples %d", &options.session_params.samples, "Number of samples to render",
"--output %s", &options.output_path, "File path to write output image",
"--threads %d", &options.session_params.threads, "CPU Rendering Threads",
"--width %d", &options.width, "Window width in pixel",
"--height %d", &options.height, "Window height in pixel",
"--tile-width %d", &options.session_params.tile_size.x, "Tile width in pixels",
"--tile-height %d", &options.session_params.tile_size.y, "Tile height in pixels",
"--list-devices", &list, "List information about all available devices",
"--background",
&options.session_params.background,
"Render in background, without user interface",
"--quiet",
&options.quiet,
"In background mode, don't print progress messages",
"--samples %d",
&options.session_params.samples,
"Number of samples to render",
"--output %s",
&options.output_path,
"File path to write output image",
"--threads %d",
&options.session_params.threads,
"CPU Rendering Threads",
"--width %d",
&options.width,
"Window width in pixel",
"--height %d",
&options.height,
"Window height in pixel",
"--tile-width %d",
&options.session_params.tile_size.x,
"Tile width in pixels",
"--tile-height %d",
&options.session_params.tile_size.y,
"Tile height in pixels",
"--list-devices",
&list,
"List information about all available devices",
#ifdef WITH_CYCLES_LOGGING
"--debug", &debug, "Enable debug logging",
"--verbose %d", &verbosity, "Set verbosity of the logger",
"--debug",
&debug,
"Enable debug logging",
"--verbose %d",
&verbosity,
"Set verbosity of the logger",
#endif
"--help", &help, "Print help message",
"--version", &version, "Print version number",
"--help",
&help,
"Print help message",
"--version",
&version,
"Print version number",
NULL);
if(ap.parse(argc, argv) < 0) {
if (ap.parse(argc, argv) < 0) {
fprintf(stderr, "%s\n", ap.geterror().c_str());
ap.usage();
exit(EXIT_FAILURE);
}
if(debug) {
if (debug) {
util_logging_start();
util_logging_verbosity_set(verbosity);
}
if(list) {
if (list) {
vector<DeviceInfo> devices = Device::available_devices();
printf("Devices:\n");
foreach(DeviceInfo& info, devices) {
foreach (DeviceInfo &info, devices) {
printf(" %-10s%s%s\n",
Device::string_from_type(info.type).c_str(),
info.description.c_str(),
(info.display_device)? " (display)": "");
(info.display_device) ? " (display)" : "");
}
exit(EXIT_SUCCESS);
}
else if(version) {
else if (version) {
printf("%s\n", CYCLES_VERSION_STRING);
exit(EXIT_SUCCESS);
}
else if(help || options.filepath == "") {
else if (help || options.filepath == "") {
ap.usage();
exit(EXIT_SUCCESS);
}
if(ssname == "osl")
if (ssname == "osl")
options.scene_params.shadingsystem = SHADINGSYSTEM_OSL;
else if(ssname == "svm")
else if (ssname == "svm")
options.scene_params.shadingsystem = SHADINGSYSTEM_SVM;
#ifndef WITH_CYCLES_STANDALONE_GUI
@@ -460,25 +506,26 @@ static void options_parse(int argc, const char **argv)
}
/* handle invalid configurations */
if(options.session_params.device.type == DEVICE_NONE || !device_available) {
if (options.session_params.device.type == DEVICE_NONE || !device_available) {
fprintf(stderr, "Unknown device: %s\n", devicename.c_str());
exit(EXIT_FAILURE);
}
#ifdef WITH_OSL
else if(!(ssname == "osl" || ssname == "svm")) {
else if (!(ssname == "osl" || ssname == "svm")) {
fprintf(stderr, "Unknown shading system: %s\n", ssname.c_str());
exit(EXIT_FAILURE);
}
else if(options.scene_params.shadingsystem == SHADINGSYSTEM_OSL && options.session_params.device.type != DEVICE_CPU) {
else if (options.scene_params.shadingsystem == SHADINGSYSTEM_OSL &&
options.session_params.device.type != DEVICE_CPU) {
fprintf(stderr, "OSL shading system only works with CPU device\n");
exit(EXIT_FAILURE);
}
#endif
else if(options.session_params.samples < 0) {
else if (options.session_params.samples < 0) {
fprintf(stderr, "Invalid number of samples: %d\n", options.session_params.samples);
exit(EXIT_FAILURE);
}
else if(options.filepath == "") {
else if (options.filepath == "") {
fprintf(stderr, "No file path specified\n");
exit(EXIT_FAILURE);
}
@@ -498,7 +545,7 @@ int main(int argc, const char **argv)
options_parse(argc, argv);
#ifdef WITH_CYCLES_STANDALONE_GUI
if(options.session_params.background) {
if (options.session_params.background) {
#endif
session_init();
options.session->wait();
@@ -509,8 +556,15 @@ int main(int argc, const char **argv)
string title = "Cycles: " + path_filename(options.filepath);
/* init/exit are callback so they run while GL is initialized */
view_main_loop(title.c_str(), options.width, options.height,
session_init, session_exit, resize, display, keyboard, motion);
view_main_loop(title.c_str(),
options.width,
options.height,
session_init,
session_exit,
resize,
display,
keyboard,
motion);
}
#endif

View File

@@ -58,11 +58,7 @@ struct XMLReadState : public XMLReader {
string base; /* base path to current file*/
float dicing_rate; /* current dicing rate */
XMLReadState()
: scene(NULL),
smooth(false),
shader(NULL),
dicing_rate(1.0f)
XMLReadState() : scene(NULL), smooth(false), shader(NULL), dicing_rate(1.0f)
{
tfm = transform_identity();
}
@@ -74,7 +70,7 @@ static bool xml_read_int(int *value, xml_node node, const char *name)
{
xml_attribute attr = node.attribute(name);
if(attr) {
if (attr) {
*value = atoi(attr.value());
return true;
}
@@ -82,15 +78,15 @@ static bool xml_read_int(int *value, xml_node node, const char *name)
return false;
}
static bool xml_read_int_array(vector<int>& value, xml_node node, const char *name)
static bool xml_read_int_array(vector<int> &value, xml_node node, const char *name)
{
xml_attribute attr = node.attribute(name);
if(attr) {
if (attr) {
vector<string> tokens;
string_split(tokens, attr.value());
foreach(const string& token, tokens)
foreach (const string &token, tokens)
value.push_back(atoi(token.c_str()));
return true;
@@ -103,7 +99,7 @@ static bool xml_read_float(float *value, xml_node node, const char *name)
{
xml_attribute attr = node.attribute(name);
if(attr) {
if (attr) {
*value = (float)atof(attr.value());
return true;
}
@@ -111,15 +107,15 @@ static bool xml_read_float(float *value, xml_node node, const char *name)
return false;
}
static bool xml_read_float_array(vector<float>& value, xml_node node, const char *name)
static bool xml_read_float_array(vector<float> &value, xml_node node, const char *name)
{
xml_attribute attr = node.attribute(name);
if(attr) {
if (attr) {
vector<string> tokens;
string_split(tokens, attr.value());
foreach(const string& token, tokens)
foreach (const string &token, tokens)
value.push_back((float)atof(token.c_str()));
return true;
@@ -132,7 +128,7 @@ static bool xml_read_float3(float3 *value, xml_node node, const char *name)
{
vector<float> array;
if(xml_read_float_array(array, node, name) && array.size() == 3) {
if (xml_read_float_array(array, node, name) && array.size() == 3) {
*value = make_float3(array[0], array[1], array[2]);
return true;
}
@@ -140,13 +136,13 @@ static bool xml_read_float3(float3 *value, xml_node node, const char *name)
return false;
}
static bool xml_read_float3_array(vector<float3>& value, xml_node node, const char *name)
static bool xml_read_float3_array(vector<float3> &value, xml_node node, const char *name)
{
vector<float> array;
if(xml_read_float_array(array, node, name)) {
for(size_t i = 0; i < array.size(); i += 3)
value.push_back(make_float3(array[i+0], array[i+1], array[i+2]));
if (xml_read_float_array(array, node, name)) {
for (size_t i = 0; i < array.size(); i += 3)
value.push_back(make_float3(array[i + 0], array[i + 1], array[i + 2]));
return true;
}
@@ -158,7 +154,7 @@ static bool xml_read_float4(float4 *value, xml_node node, const char *name)
{
vector<float> array;
if(xml_read_float_array(array, node, name) && array.size() == 4) {
if (xml_read_float_array(array, node, name) && array.size() == 4) {
*value = make_float4(array[0], array[1], array[2], array[3]);
return true;
}
@@ -170,7 +166,7 @@ static bool xml_read_string(string *str, xml_node node, const char *name)
{
xml_attribute attr = node.attribute(name);
if(attr) {
if (attr) {
*str = attr.value();
return true;
}
@@ -182,7 +178,7 @@ static bool xml_equal_string(xml_node node, const char *name, const char *value)
{
xml_attribute attr = node.attribute(name);
if(attr)
if (attr)
return string_iequals(attr.value(), value);
return false;
@@ -190,7 +186,7 @@ static bool xml_equal_string(xml_node node, const char *name, const char *value)
/* Camera */
static void xml_read_camera(XMLReadState& state, xml_node node)
static void xml_read_camera(XMLReadState &state, xml_node node)
{
Camera *cam = state.scene->camera;
@@ -210,7 +206,7 @@ static void xml_read_camera(XMLReadState& state, xml_node node)
/* Shader */
static void xml_read_shader_graph(XMLReadState& state, Shader *shader, xml_node graph_node)
static void xml_read_shader_graph(XMLReadState &state, Shader *shader, xml_node graph_node)
{
xml_read_node(state, shader, graph_node);
@@ -220,17 +216,17 @@ static void xml_read_shader_graph(XMLReadState& state, Shader *shader, xml_node
XMLReader graph_reader;
graph_reader.node_map[ustring("output")] = graph->output();
for(xml_node node = graph_node.first_child(); node; node = node.next_sibling()) {
for (xml_node node = graph_node.first_child(); node; node = node.next_sibling()) {
ustring node_name(node.name());
if(node_name == "connect") {
if (node_name == "connect") {
/* connect nodes */
vector<string> from_tokens, to_tokens;
string_split(from_tokens, node.attribute("from").value());
string_split(to_tokens, node.attribute("to").value());
if(from_tokens.size() == 2 && to_tokens.size() == 2) {
if (from_tokens.size() == 2 && to_tokens.size() == 2) {
ustring from_node_name(from_tokens[0]);
ustring from_socket_name(from_tokens[1]);
ustring to_node_name(to_tokens[0]);
@@ -240,34 +236,40 @@ static void xml_read_shader_graph(XMLReadState& state, Shader *shader, xml_node
ShaderOutput *output = NULL;
ShaderInput *input = NULL;
if(graph_reader.node_map.find(from_node_name) != graph_reader.node_map.end()) {
ShaderNode *fromnode = (ShaderNode*)graph_reader.node_map[from_node_name];
if (graph_reader.node_map.find(from_node_name) != graph_reader.node_map.end()) {
ShaderNode *fromnode = (ShaderNode *)graph_reader.node_map[from_node_name];
foreach(ShaderOutput *out, fromnode->outputs)
if(string_iequals(out->socket_type.name.string(), from_socket_name.string()))
foreach (ShaderOutput *out, fromnode->outputs)
if (string_iequals(out->socket_type.name.string(), from_socket_name.string()))
output = out;
if(!output)
fprintf(stderr, "Unknown output socket name \"%s\" on \"%s\".\n", from_node_name.c_str(), from_socket_name.c_str());
if (!output)
fprintf(stderr,
"Unknown output socket name \"%s\" on \"%s\".\n",
from_node_name.c_str(),
from_socket_name.c_str());
}
else
fprintf(stderr, "Unknown shader node name \"%s\".\n", from_node_name.c_str());
if(graph_reader.node_map.find(to_node_name) != graph_reader.node_map.end()) {
ShaderNode *tonode = (ShaderNode*)graph_reader.node_map[to_node_name];
if (graph_reader.node_map.find(to_node_name) != graph_reader.node_map.end()) {
ShaderNode *tonode = (ShaderNode *)graph_reader.node_map[to_node_name];
foreach(ShaderInput *in, tonode->inputs)
if(string_iequals(in->socket_type.name.string(), to_socket_name.string()))
foreach (ShaderInput *in, tonode->inputs)
if (string_iequals(in->socket_type.name.string(), to_socket_name.string()))
input = in;
if(!input)
fprintf(stderr, "Unknown input socket name \"%s\" on \"%s\".\n", to_socket_name.c_str(), to_node_name.c_str());
if (!input)
fprintf(stderr,
"Unknown input socket name \"%s\" on \"%s\".\n",
to_socket_name.c_str(),
to_node_name.c_str());
}
else
fprintf(stderr, "Unknown shader node name \"%s\".\n", to_node_name.c_str());
/* connect */
if(output && input)
if (output && input)
graph->connect(output, input);
}
else
@@ -279,20 +281,20 @@ static void xml_read_shader_graph(XMLReadState& state, Shader *shader, xml_node
ShaderNode *snode = NULL;
#ifdef WITH_OSL
if(node_name == "osl_shader") {
if (node_name == "osl_shader") {
ShaderManager *manager = state.scene->shader_manager;
if(manager->use_osl()) {
if (manager->use_osl()) {
std::string filepath;
if(xml_read_string(&filepath, node, "src")) {
if(path_is_relative(filepath)) {
if (xml_read_string(&filepath, node, "src")) {
if (path_is_relative(filepath)) {
filepath = path_join(state.base, filepath);
}
snode = ((OSLShaderManager*)manager)->osl_node(filepath);
snode = ((OSLShaderManager *)manager)->osl_node(filepath);
if(!snode) {
if (!snode) {
fprintf(stderr, "Failed to create OSL node from \"%s\".\n", filepath.c_str());
continue;
}
@@ -311,35 +313,35 @@ static void xml_read_shader_graph(XMLReadState& state, Shader *shader, xml_node
#endif
{
/* exception for name collision */
if(node_name == "background")
if (node_name == "background")
node_name = "background_shader";
const NodeType *node_type = NodeType::find(node_name);
if(!node_type) {
if (!node_type) {
fprintf(stderr, "Unknown shader node \"%s\".\n", node.name());
continue;
}
else if(node_type->type != NodeType::SHADER) {
else if (node_type->type != NodeType::SHADER) {
fprintf(stderr, "Node type \"%s\" is not a shader node.\n", node_type->name.c_str());
continue;
}
snode = (ShaderNode*) node_type->create(node_type);
snode = (ShaderNode *)node_type->create(node_type);
}
xml_read_node(graph_reader, snode, node);
if(node_name == "image_texture") {
ImageTextureNode *img = (ImageTextureNode*) snode;
if (node_name == "image_texture") {
ImageTextureNode *img = (ImageTextureNode *)snode;
img->filename = path_join(state.base, img->filename.string());
}
else if(node_name == "environment_texture") {
EnvironmentTextureNode *env = (EnvironmentTextureNode*) snode;
else if (node_name == "environment_texture") {
EnvironmentTextureNode *env = (EnvironmentTextureNode *)snode;
env->filename = path_join(state.base, env->filename.string());
}
if(snode) {
if (snode) {
/* add to graph */
graph->add(snode);
}
@@ -349,7 +351,7 @@ static void xml_read_shader_graph(XMLReadState& state, Shader *shader, xml_node
shader->tag_update(state.scene);
}
static void xml_read_shader(XMLReadState& state, xml_node node)
static void xml_read_shader(XMLReadState &state, xml_node node)
{
Shader *shader = new Shader();
xml_read_shader_graph(state, shader, node);
@@ -358,7 +360,7 @@ static void xml_read_shader(XMLReadState& state, xml_node node)
/* Background */
static void xml_read_background(XMLReadState& state, xml_node node)
static void xml_read_background(XMLReadState &state, xml_node node)
{
/* Background Settings */
xml_read_node(state, state.scene->background, node);
@@ -370,7 +372,7 @@ static void xml_read_background(XMLReadState& state, xml_node node)
/* Mesh */
static Mesh *xml_add_mesh(Scene *scene, const Transform& tfm)
static Mesh *xml_add_mesh(Scene *scene, const Transform &tfm)
{
/* create mesh */
Mesh *mesh = new Mesh();
@@ -385,7 +387,7 @@ static Mesh *xml_add_mesh(Scene *scene, const Transform& tfm)
return mesh;
}
static void xml_read_mesh(const XMLReadState& state, xml_node node)
static void xml_read_mesh(const XMLReadState &state, xml_node node)
{
/* add mesh */
Mesh *mesh = xml_add_mesh(state.scene, state.tfm);
@@ -404,27 +406,27 @@ static void xml_read_mesh(const XMLReadState& state, xml_node node)
xml_read_int_array(verts, node, "verts");
xml_read_int_array(nverts, node, "nverts");
if(xml_equal_string(node, "subdivision", "catmull-clark")) {
if (xml_equal_string(node, "subdivision", "catmull-clark")) {
mesh->subdivision_type = Mesh::SUBDIVISION_CATMULL_CLARK;
}
else if(xml_equal_string(node, "subdivision", "linear")) {
else if (xml_equal_string(node, "subdivision", "linear")) {
mesh->subdivision_type = Mesh::SUBDIVISION_LINEAR;
}
if(mesh->subdivision_type == Mesh::SUBDIVISION_NONE) {
if (mesh->subdivision_type == Mesh::SUBDIVISION_NONE) {
/* create vertices */
mesh->verts = P;
size_t num_triangles = 0;
for(size_t i = 0; i < nverts.size(); i++)
num_triangles += nverts[i]-2;
for (size_t i = 0; i < nverts.size(); i++)
num_triangles += nverts[i] - 2;
mesh->reserve_mesh(mesh->verts.size(), num_triangles);
/* create triangles */
int index_offset = 0;
for(size_t i = 0; i < nverts.size(); i++) {
for(int j = 0; j < nverts[i]-2; j++) {
for (size_t i = 0; i < nverts.size(); i++) {
for (int j = 0; j < nverts[i] - 2; j++) {
int v0 = verts[index_offset];
int v1 = verts[index_offset + j + 1];
int v2 = verts[index_offset + j + 2];
@@ -439,26 +441,26 @@ static void xml_read_mesh(const XMLReadState& state, xml_node node)
index_offset += nverts[i];
}
if(xml_read_float_array(UV, node, "UV")) {
if (xml_read_float_array(UV, node, "UV")) {
ustring name = ustring("UVMap");
Attribute *attr = mesh->attributes.add(ATTR_STD_UV, name);
float2 *fdata = attr->data_float2();
/* loop over the triangles */
index_offset = 0;
for(size_t i = 0; i < nverts.size(); i++) {
for(int j = 0; j < nverts[i]-2; j++) {
for (size_t i = 0; i < nverts.size(); i++) {
for (int j = 0; j < nverts[i] - 2; j++) {
int v0 = index_offset;
int v1 = index_offset + j + 1;
int v2 = index_offset + j + 2;
assert(v0*2+1 < (int)UV.size());
assert(v1*2+1 < (int)UV.size());
assert(v2*2+1 < (int)UV.size());
assert(v0 * 2 + 1 < (int)UV.size());
assert(v1 * 2 + 1 < (int)UV.size());
assert(v2 * 2 + 1 < (int)UV.size());
fdata[0] = make_float2(UV[v0*2], UV[v0*2+1]);
fdata[1] = make_float2(UV[v1*2], UV[v1*2+1]);
fdata[2] = make_float2(UV[v2*2], UV[v2*2+1]);
fdata[0] = make_float2(UV[v0 * 2], UV[v0 * 2 + 1]);
fdata[1] = make_float2(UV[v1 * 2], UV[v1 * 2 + 1]);
fdata[2] = make_float2(UV[v2 * 2], UV[v2 * 2 + 1]);
fdata += 3;
}
@@ -472,7 +474,7 @@ static void xml_read_mesh(const XMLReadState& state, xml_node node)
size_t num_ngons = 0;
size_t num_corners = 0;
for(size_t i = 0; i < nverts.size(); i++) {
for (size_t i = 0; i < nverts.size(); i++) {
num_ngons += (nverts[i] == 4) ? 0 : 1;
num_corners += nverts[i];
}
@@ -481,13 +483,13 @@ static void xml_read_mesh(const XMLReadState& state, xml_node node)
/* create subd_faces */
int index_offset = 0;
for(size_t i = 0; i < nverts.size(); i++) {
for (size_t i = 0; i < nverts.size(); i++) {
mesh->add_subd_face(&verts[index_offset], nverts[i], shader, smooth);
index_offset += nverts[i];
}
/* uv map */
if(xml_read_float_array(UV, node, "UV")) {
if (xml_read_float_array(UV, node, "UV")) {
ustring name = ustring("UVMap");
Attribute *attr = mesh->subd_attributes.add(ATTR_STD_UV, name);
float3 *fdata = attr->data_float3();
@@ -499,18 +501,18 @@ static void xml_read_mesh(const XMLReadState& state, xml_node node)
#endif
index_offset = 0;
for(size_t i = 0; i < nverts.size(); i++) {
for(int j = 0; j < nverts[i]; j++) {
for (size_t i = 0; i < nverts.size(); i++) {
for (int j = 0; j < nverts[i]; j++) {
*(fdata++) = make_float3(UV[index_offset++]);
}
}
}
/* setup subd params */
if(!mesh->subd_params) {
if (!mesh->subd_params) {
mesh->subd_params = new SubdParams(mesh);
}
SubdParams& sdparams = *mesh->subd_params;
SubdParams &sdparams = *mesh->subd_params;
sdparams.dicing_rate = state.dicing_rate;
xml_read_float(&sdparams.dicing_rate, node, "dicing_rate");
@@ -521,15 +523,15 @@ static void xml_read_mesh(const XMLReadState& state, xml_node node)
/* we don't yet support arbitrary attributes, for now add vertex
* coordinates as generated coordinates if requested */
if(mesh->need_attribute(state.scene, ATTR_STD_GENERATED)) {
if (mesh->need_attribute(state.scene, ATTR_STD_GENERATED)) {
Attribute *attr = mesh->attributes.add(ATTR_STD_GENERATED);
memcpy(attr->data_float3(), mesh->verts.data(), sizeof(float3)*mesh->verts.size());
memcpy(attr->data_float3(), mesh->verts.data(), sizeof(float3) * mesh->verts.size());
}
}
/* Light */
static void xml_read_light(XMLReadState& state, xml_node node)
static void xml_read_light(XMLReadState &state, xml_node node)
{
Light *light = new Light();
@@ -541,29 +543,29 @@ static void xml_read_light(XMLReadState& state, xml_node node)
/* Transform */
static void xml_read_transform(xml_node node, Transform& tfm)
static void xml_read_transform(xml_node node, Transform &tfm)
{
if(node.attribute("matrix")) {
if (node.attribute("matrix")) {
vector<float> matrix;
if(xml_read_float_array(matrix, node, "matrix") && matrix.size() == 16) {
ProjectionTransform projection = *(ProjectionTransform*)&matrix[0];
if (xml_read_float_array(matrix, node, "matrix") && matrix.size() == 16) {
ProjectionTransform projection = *(ProjectionTransform *)&matrix[0];
tfm = tfm * projection_to_transform(projection_transpose(projection));
}
}
if(node.attribute("translate")) {
if (node.attribute("translate")) {
float3 translate = make_float3(0.0f, 0.0f, 0.0f);
xml_read_float3(&translate, node, "translate");
tfm = tfm * transform_translate(translate);
}
if(node.attribute("rotate")) {
if (node.attribute("rotate")) {
float4 rotate = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
xml_read_float4(&rotate, node, "rotate");
tfm = tfm * transform_rotate(DEG2RADF(rotate.x), make_float3(rotate.y, rotate.z, rotate.w));
}
if(node.attribute("scale")) {
if (node.attribute("scale")) {
float3 scale = make_float3(0.0f, 0.0f, 0.0f);
xml_read_float3(&scale, node, "scale");
tfm = tfm * transform_scale(scale);
@@ -572,79 +574,79 @@ static void xml_read_transform(xml_node node, Transform& tfm)
/* State */
static void xml_read_state(XMLReadState& state, xml_node node)
static void xml_read_state(XMLReadState &state, xml_node node)
{
/* read shader */
string shadername;
if(xml_read_string(&shadername, node, "shader")) {
if (xml_read_string(&shadername, node, "shader")) {
bool found = false;
foreach(Shader *shader, state.scene->shaders) {
if(shader->name == shadername) {
foreach (Shader *shader, state.scene->shaders) {
if (shader->name == shadername) {
state.shader = shader;
found = true;
break;
}
}
if(!found)
if (!found)
fprintf(stderr, "Unknown shader \"%s\".\n", shadername.c_str());
}
xml_read_float(&state.dicing_rate, node, "dicing_rate");
/* read smooth/flat */
if(xml_equal_string(node, "interpolation", "smooth"))
if (xml_equal_string(node, "interpolation", "smooth"))
state.smooth = true;
else if(xml_equal_string(node, "interpolation", "flat"))
else if (xml_equal_string(node, "interpolation", "flat"))
state.smooth = false;
}
/* Scene */
static void xml_read_include(XMLReadState& state, const string& src);
static void xml_read_include(XMLReadState &state, const string &src);
static void xml_read_scene(XMLReadState& state, xml_node scene_node)
static void xml_read_scene(XMLReadState &state, xml_node scene_node)
{
for(xml_node node = scene_node.first_child(); node; node = node.next_sibling()) {
if(string_iequals(node.name(), "film")) {
for (xml_node node = scene_node.first_child(); node; node = node.next_sibling()) {
if (string_iequals(node.name(), "film")) {
xml_read_node(state, state.scene->film, node);
}
else if(string_iequals(node.name(), "integrator")) {
else if (string_iequals(node.name(), "integrator")) {
xml_read_node(state, state.scene->integrator, node);
}
else if(string_iequals(node.name(), "camera")) {
else if (string_iequals(node.name(), "camera")) {
xml_read_camera(state, node);
}
else if(string_iequals(node.name(), "shader")) {
else if (string_iequals(node.name(), "shader")) {
xml_read_shader(state, node);
}
else if(string_iequals(node.name(), "background")) {
else if (string_iequals(node.name(), "background")) {
xml_read_background(state, node);
}
else if(string_iequals(node.name(), "mesh")) {
else if (string_iequals(node.name(), "mesh")) {
xml_read_mesh(state, node);
}
else if(string_iequals(node.name(), "light")) {
else if (string_iequals(node.name(), "light")) {
xml_read_light(state, node);
}
else if(string_iequals(node.name(), "transform")) {
else if (string_iequals(node.name(), "transform")) {
XMLReadState substate = state;
xml_read_transform(node, substate.tfm);
xml_read_scene(substate, node);
}
else if(string_iequals(node.name(), "state")) {
else if (string_iequals(node.name(), "state")) {
XMLReadState substate = state;
xml_read_state(substate, node);
xml_read_scene(substate, node);
}
else if(string_iequals(node.name(), "include")) {
else if (string_iequals(node.name(), "include")) {
string src;
if(xml_read_string(&src, node, "src"))
if (xml_read_string(&src, node, "src"))
xml_read_include(state, src);
}
else
@@ -654,7 +656,7 @@ static void xml_read_scene(XMLReadState& state, xml_node scene_node)
/* Include */
static void xml_read_include(XMLReadState& state, const string& src)
static void xml_read_include(XMLReadState &state, const string &src)
{
/* open XML document */
xml_document doc;
@@ -663,7 +665,7 @@ static void xml_read_include(XMLReadState& state, const string& src)
string path = path_join(state.base, src);
parse_result = doc.load_file(path.c_str());
if(parse_result) {
if (parse_result) {
XMLReadState substate = state;
substate.base = path_dirname(path);

View File

@@ -87,8 +87,7 @@ struct BlenderCamera {
int motion_steps;
};
static void blender_camera_init(BlenderCamera *bcam,
BL::RenderSettings& b_render)
static void blender_camera_init(BlenderCamera *bcam, BL::RenderSettings &b_render)
{
memset((void *)bcam, 0, sizeof(BlenderCamera));
@@ -115,14 +114,14 @@ static void blender_camera_init(BlenderCamera *bcam,
bcam->full_height = render_resolution_y(b_render);
}
static float blender_camera_focal_distance(BL::RenderEngine& b_engine,
BL::Object& b_ob,
BL::Camera& b_camera,
static float blender_camera_focal_distance(BL::RenderEngine &b_engine,
BL::Object &b_ob,
BL::Camera &b_camera,
BlenderCamera *bcam)
{
BL::Object b_dof_object = b_camera.dof_object();
if(!b_dof_object)
if (!b_dof_object)
return b_camera.dof_distance();
/* for dof object, return distance along camera Z direction */
@@ -136,26 +135,25 @@ static float blender_camera_focal_distance(BL::RenderEngine& b_engine,
}
static void blender_camera_from_object(BlenderCamera *bcam,
BL::RenderEngine& b_engine,
BL::Object& b_ob,
BL::RenderEngine &b_engine,
BL::Object &b_ob,
bool skip_panorama = false)
{
BL::ID b_ob_data = b_ob.data();
if(b_ob_data.is_a(&RNA_Camera)) {
if (b_ob_data.is_a(&RNA_Camera)) {
BL::Camera b_camera(b_ob_data);
PointerRNA ccamera = RNA_pointer_get(&b_camera.ptr, "cycles");
bcam->nearclip = b_camera.clip_start();
bcam->farclip = b_camera.clip_end();
switch(b_camera.type())
{
switch (b_camera.type()) {
case BL::Camera::type_ORTHO:
bcam->type = CAMERA_ORTHOGRAPHIC;
break;
case BL::Camera::type_PANO:
if(!skip_panorama)
if (!skip_panorama)
bcam->type = CAMERA_PANORAMA;
else
bcam->type = CAMERA_PERSPECTIVE;
@@ -166,10 +164,8 @@ static void blender_camera_from_object(BlenderCamera *bcam,
break;
}
bcam->panorama_type = (PanoramaType)get_enum(ccamera,
"panorama_type",
PANORAMA_NUM_TYPES,
PANORAMA_EQUIRECTANGULAR);
bcam->panorama_type = (PanoramaType)get_enum(
ccamera, "panorama_type", PANORAMA_NUM_TYPES, PANORAMA_EQUIRECTANGULAR);
bcam->fisheye_fov = RNA_float_get(&ccamera, "fisheye_fov");
bcam->fisheye_lens = RNA_float_get(&ccamera, "fisheye_lens");
@@ -179,7 +175,7 @@ static void blender_camera_from_object(BlenderCamera *bcam,
bcam->longitude_max = RNA_float_get(&ccamera, "longitude_max");
bcam->interocular_distance = b_camera.stereo().interocular_distance();
if(b_camera.stereo().convergence_mode() == BL::CameraStereoData::convergence_mode_PARALLEL) {
if (b_camera.stereo().convergence_mode() == BL::CameraStereoData::convergence_mode_PARALLEL) {
bcam->convergence_distance = FLT_MAX;
}
else {
@@ -199,14 +195,14 @@ static void blender_camera_from_object(BlenderCamera *bcam,
* give manual control over aperture radius */
int aperture_type = get_enum(ccamera, "aperture_type");
if(aperture_type == 1) {
if (aperture_type == 1) {
float fstop = RNA_float_get(&ccamera, "aperture_fstop");
fstop = max(fstop, 1e-5f);
if(bcam->type == CAMERA_ORTHOGRAPHIC)
bcam->aperturesize = 1.0f/(2.0f*fstop);
if (bcam->type == CAMERA_ORTHOGRAPHIC)
bcam->aperturesize = 1.0f / (2.0f * fstop);
else
bcam->aperturesize = (bcam->lens*1e-3f)/(2.0f*fstop);
bcam->aperturesize = (bcam->lens * 1e-3f) / (2.0f * fstop);
}
else
bcam->aperturesize = RNA_float_get(&ccamera, "aperture_size");
@@ -222,14 +218,14 @@ static void blender_camera_from_object(BlenderCamera *bcam,
bcam->sensor_width = b_camera.sensor_width();
bcam->sensor_height = b_camera.sensor_height();
if(b_camera.sensor_fit() == BL::Camera::sensor_fit_AUTO)
if (b_camera.sensor_fit() == BL::Camera::sensor_fit_AUTO)
bcam->sensor_fit = BlenderCamera::AUTO;
else if(b_camera.sensor_fit() == BL::Camera::sensor_fit_HORIZONTAL)
else if (b_camera.sensor_fit() == BL::Camera::sensor_fit_HORIZONTAL)
bcam->sensor_fit = BlenderCamera::HORIZONTAL;
else
bcam->sensor_fit = BlenderCamera::VERTICAL;
}
else if(b_ob_data.is_a(&RNA_Light)) {
else if (b_ob_data.is_a(&RNA_Light)) {
/* Can also look through spot light. */
BL::SpotLight b_light(b_ob_data);
float lens = 16.0f / tanf(b_light.spot_size() * 0.5f);
@@ -241,31 +237,27 @@ static void blender_camera_from_object(BlenderCamera *bcam,
bcam->motion_steps = object_motion_steps(b_ob, b_ob);
}
static Transform blender_camera_matrix(const Transform& tfm,
static Transform blender_camera_matrix(const Transform &tfm,
const CameraType type,
const PanoramaType panorama_type)
{
Transform result;
if(type == CAMERA_PANORAMA) {
if(panorama_type == PANORAMA_MIRRORBALL) {
if (type == CAMERA_PANORAMA) {
if (panorama_type == PANORAMA_MIRRORBALL) {
/* Mirror ball camera is looking into the negative Y direction
* which matches texture mirror ball mapping.
*/
result = tfm *
make_transform(1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f);
result = tfm * make_transform(
1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f);
}
else {
/* Make it so environment camera needs to be pointed in the direction
* of the positive x-axis to match an environment texture, this way
* it is looking at the center of the texture
*/
result = tfm *
make_transform( 0.0f, -1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
-1.0f, 0.0f, 0.0f, 0.0f);
result = tfm * make_transform(
0.0f, -1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, -1.0f, 0.0f, 0.0f, 0.0f);
}
}
else {
@@ -277,72 +269,73 @@ static Transform blender_camera_matrix(const Transform& tfm,
}
static void blender_camera_viewplane(BlenderCamera *bcam,
int width, int height,
int width,
int height,
BoundBox2D *viewplane,
float *aspectratio,
float *sensor_size)
{
/* dimensions */
float xratio = (float)width*bcam->pixelaspect.x;
float yratio = (float)height*bcam->pixelaspect.y;
float xratio = (float)width * bcam->pixelaspect.x;
float yratio = (float)height * bcam->pixelaspect.y;
/* compute x/y aspect and ratio */
float xaspect, yaspect;
bool horizontal_fit;
/* sensor fitting */
if(bcam->sensor_fit == BlenderCamera::AUTO) {
if (bcam->sensor_fit == BlenderCamera::AUTO) {
horizontal_fit = (xratio > yratio);
if(sensor_size != NULL) {
if (sensor_size != NULL) {
*sensor_size = bcam->sensor_width;
}
}
else if(bcam->sensor_fit == BlenderCamera::HORIZONTAL) {
else if (bcam->sensor_fit == BlenderCamera::HORIZONTAL) {
horizontal_fit = true;
if(sensor_size != NULL) {
if (sensor_size != NULL) {
*sensor_size = bcam->sensor_width;
}
}
else {
horizontal_fit = false;
if(sensor_size != NULL) {
if (sensor_size != NULL) {
*sensor_size = bcam->sensor_height;
}
}
if(horizontal_fit) {
if(aspectratio != NULL) {
*aspectratio = xratio/yratio;
if (horizontal_fit) {
if (aspectratio != NULL) {
*aspectratio = xratio / yratio;
}
xaspect = *aspectratio;
yaspect = 1.0f;
}
else {
if(aspectratio != NULL) {
*aspectratio = yratio/xratio;
if (aspectratio != NULL) {
*aspectratio = yratio / xratio;
}
xaspect = 1.0f;
yaspect = *aspectratio;
}
/* modify aspect for orthographic scale */
if(bcam->type == CAMERA_ORTHOGRAPHIC) {
xaspect = xaspect*bcam->ortho_scale/(*aspectratio*2.0f);
yaspect = yaspect*bcam->ortho_scale/(*aspectratio*2.0f);
if(aspectratio != NULL) {
*aspectratio = bcam->ortho_scale/2.0f;
if (bcam->type == CAMERA_ORTHOGRAPHIC) {
xaspect = xaspect * bcam->ortho_scale / (*aspectratio * 2.0f);
yaspect = yaspect * bcam->ortho_scale / (*aspectratio * 2.0f);
if (aspectratio != NULL) {
*aspectratio = bcam->ortho_scale / 2.0f;
}
}
if(bcam->type == CAMERA_PANORAMA) {
if (bcam->type == CAMERA_PANORAMA) {
/* set viewplane */
if(viewplane != NULL) {
if (viewplane != NULL) {
*viewplane = bcam->pano_viewplane;
}
}
else {
/* set viewplane */
if(viewplane != NULL) {
if (viewplane != NULL) {
viewplane->left = -xaspect;
viewplane->right = xaspect;
viewplane->bottom = -yaspect;
@@ -352,8 +345,8 @@ static void blender_camera_viewplane(BlenderCamera *bcam,
*viewplane = (*viewplane) * bcam->zoom;
/* modify viewplane with camera shift and 3d camera view offset */
float dx = 2.0f*(*aspectratio*bcam->shift.x + bcam->offset.x*xaspect*2.0f);
float dy = 2.0f*(*aspectratio*bcam->shift.y + bcam->offset.y*yaspect*2.0f);
float dx = 2.0f * (*aspectratio * bcam->shift.x + bcam->offset.x * xaspect * 2.0f);
float dy = 2.0f * (*aspectratio * bcam->shift.y + bcam->offset.y * yaspect * 2.0f);
viewplane->left += dx;
viewplane->right += dx;
@@ -365,7 +358,8 @@ static void blender_camera_viewplane(BlenderCamera *bcam,
static void blender_camera_sync(Camera *cam,
BlenderCamera *bcam,
int width, int height,
int width,
int height,
const char *viewname,
PointerRNA *cscene)
{
@@ -374,8 +368,7 @@ static void blender_camera_sync(Camera *cam,
float aspectratio, sensor_size;
/* viewplane */
blender_camera_viewplane(bcam, width, height,
&cam->viewplane, &aspectratio, &sensor_size);
blender_camera_viewplane(bcam, width, height, &cam->viewplane, &aspectratio, &sensor_size);
cam->width = bcam->full_width;
cam->height = bcam->full_height;
@@ -384,17 +377,17 @@ static void blender_camera_sync(Camera *cam,
cam->full_height = height;
/* panorama sensor */
if(bcam->type == CAMERA_PANORAMA && bcam->panorama_type == PANORAMA_FISHEYE_EQUISOLID) {
float fit_xratio = (float)bcam->full_width*bcam->pixelaspect.x;
float fit_yratio = (float)bcam->full_height*bcam->pixelaspect.y;
if (bcam->type == CAMERA_PANORAMA && bcam->panorama_type == PANORAMA_FISHEYE_EQUISOLID) {
float fit_xratio = (float)bcam->full_width * bcam->pixelaspect.x;
float fit_yratio = (float)bcam->full_height * bcam->pixelaspect.y;
bool horizontal_fit;
float sensor_size;
if(bcam->sensor_fit == BlenderCamera::AUTO) {
if (bcam->sensor_fit == BlenderCamera::AUTO) {
horizontal_fit = (fit_xratio > fit_yratio);
sensor_size = bcam->sensor_width;
}
else if(bcam->sensor_fit == BlenderCamera::HORIZONTAL) {
else if (bcam->sensor_fit == BlenderCamera::HORIZONTAL) {
horizontal_fit = true;
sensor_size = bcam->sensor_width;
}
@@ -403,7 +396,7 @@ static void blender_camera_sync(Camera *cam,
sensor_size = bcam->sensor_height;
}
if(horizontal_fit) {
if (horizontal_fit) {
cam->sensorwidth = sensor_size;
cam->sensorheight = sensor_size * fit_yratio / fit_xratio;
}
@@ -435,10 +428,10 @@ static void blender_camera_sync(Camera *cam,
cam->convergence_distance = bcam->convergence_distance;
cam->use_spherical_stereo = bcam->use_spherical_stereo;
if(cam->use_spherical_stereo) {
if(strcmp(viewname, "left") == 0)
if (cam->use_spherical_stereo) {
if (strcmp(viewname, "left") == 0)
cam->stereo_eye = Camera::STEREO_LEFT;
else if(strcmp(viewname, "right") == 0)
else if (strcmp(viewname, "right") == 0)
cam->stereo_eye = Camera::STEREO_RIGHT;
else
cam->stereo_eye = Camera::STEREO_NONE;
@@ -459,9 +452,7 @@ static void blender_camera_sync(Camera *cam,
cam->bladesrotation = bcam->aperturerotation;
/* transform */
cam->matrix = blender_camera_matrix(bcam->matrix,
bcam->type,
bcam->panorama_type);
cam->matrix = blender_camera_matrix(bcam->matrix, bcam->type, bcam->panorama_type);
cam->motion.clear();
cam->motion.resize(bcam->motion_steps, cam->matrix);
cam->use_perspective_motion = false;
@@ -483,15 +474,16 @@ static void blender_camera_sync(Camera *cam,
cam->offscreen_dicing_scale = bcam->offscreen_dicing_scale;
/* set update flag */
if(cam->modified(prevcam))
if (cam->modified(prevcam))
cam->tag_update();
}
/* Sync Render Camera */
void BlenderSync::sync_camera(BL::RenderSettings& b_render,
BL::Object& b_override,
int width, int height,
void BlenderSync::sync_camera(BL::RenderSettings &b_render,
BL::Object &b_override,
int width,
int height,
const char *viewname)
{
BlenderCamera bcam;
@@ -506,20 +498,19 @@ void BlenderSync::sync_camera(BL::RenderSettings& b_render,
curvemapping_to_array(b_shutter_curve, bcam.shutter_curve, RAMP_TABLE_SIZE);
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
bcam.motion_position =
(Camera::MotionPosition)get_enum(cscene,
bcam.motion_position = (Camera::MotionPosition)get_enum(cscene,
"motion_blur_position",
Camera::MOTION_NUM_POSITIONS,
Camera::MOTION_POSITION_CENTER);
bcam.rolling_shutter_type =
(Camera::RollingShutterType)get_enum(cscene,
bcam.rolling_shutter_type = (Camera::RollingShutterType)get_enum(
cscene,
"rolling_shutter_type",
Camera::ROLLING_SHUTTER_NUM_TYPES,
Camera::ROLLING_SHUTTER_NONE);
bcam.rolling_shutter_duration = RNA_float_get(&cscene, "rolling_shutter_duration");
/* border */
if(b_render.use_border()) {
if (b_render.use_border()) {
bcam.border.left = b_render.border_min_x();
bcam.border.right = b_render.border_max_x();
bcam.border.bottom = b_render.border_min_y();
@@ -529,10 +520,10 @@ void BlenderSync::sync_camera(BL::RenderSettings& b_render,
/* camera object */
BL::Object b_ob = b_scene.camera();
if(b_override)
if (b_override)
b_ob = b_override;
if(b_ob) {
if (b_ob) {
BL::Array<float, 16> b_ob_matrix;
blender_camera_from_object(&bcam, b_engine, b_ob);
b_engine.camera_model_matrix(b_ob, bcam.use_spherical_stereo, b_ob_matrix);
@@ -545,7 +536,7 @@ void BlenderSync::sync_camera(BL::RenderSettings& b_render,
/* dicing camera */
b_ob = BL::Object(RNA_pointer_get(&cscene, "dicing_camera"));
if(b_ob) {
if (b_ob) {
BL::Array<float, 16> b_ob_matrix;
blender_camera_from_object(&bcam, b_engine, b_ob);
b_engine.camera_model_matrix(b_ob, bcam.use_spherical_stereo, b_ob_matrix);
@@ -558,12 +549,10 @@ void BlenderSync::sync_camera(BL::RenderSettings& b_render,
}
}
void BlenderSync::sync_camera_motion(BL::RenderSettings& b_render,
BL::Object& b_ob,
int width, int height,
float motion_time)
void BlenderSync::sync_camera_motion(
BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time)
{
if(!b_ob)
if (!b_ob)
return;
Camera *cam = scene->camera;
@@ -572,18 +561,18 @@ void BlenderSync::sync_camera_motion(BL::RenderSettings& b_render,
Transform tfm = get_transform(b_ob_matrix);
tfm = blender_camera_matrix(tfm, cam->type, cam->panorama_type);
if(motion_time == 0.0f) {
if (motion_time == 0.0f) {
/* When motion blur is not centered in frame, cam->matrix gets reset. */
cam->matrix = tfm;
}
/* Set transform in motion array. */
int motion_step = cam->motion_step(motion_time);
if(motion_step >= 0) {
if (motion_step >= 0) {
cam->motion[motion_step] = tfm;
}
if(cam->type == CAMERA_PERSPECTIVE) {
if (cam->type == CAMERA_PERSPECTIVE) {
BlenderCamera bcam;
float aspectratio, sensor_size;
blender_camera_init(&bcam, b_render);
@@ -593,23 +582,19 @@ void BlenderSync::sync_camera_motion(BL::RenderSettings& b_render,
bcam.pixelaspect.y = b_render.pixel_aspect_y();
blender_camera_from_object(&bcam, b_engine, b_ob);
blender_camera_viewplane(&bcam,
width, height,
NULL,
&aspectratio,
&sensor_size);
blender_camera_viewplane(&bcam, width, height, NULL, &aspectratio, &sensor_size);
/* TODO(sergey): De-duplicate calculation with camera sync. */
float fov = 2.0f * atanf((0.5f * sensor_size) / bcam.lens / aspectratio);
if(fov != cam->fov) {
if (fov != cam->fov) {
VLOG(1) << "Camera " << b_ob.name() << " FOV change detected.";
if(motion_time == 0.0f) {
if (motion_time == 0.0f) {
cam->fov = fov;
}
else if(motion_time == -1.0f) {
else if (motion_time == -1.0f) {
cam->fov_pre = fov;
cam->use_perspective_motion = true;
}
else if(motion_time == 1.0f) {
else if (motion_time == 1.0f) {
cam->fov_post = fov;
cam->use_perspective_motion = true;
}
@@ -619,22 +604,24 @@ void BlenderSync::sync_camera_motion(BL::RenderSettings& b_render,
/* Sync 3D View Camera */
static void blender_camera_view_subset(BL::RenderEngine& b_engine,
BL::RenderSettings& b_render,
BL::Scene& b_scene,
BL::Object& b_ob,
BL::SpaceView3D& b_v3d,
BL::RegionView3D& b_rv3d,
int width, int height,
static void blender_camera_view_subset(BL::RenderEngine &b_engine,
BL::RenderSettings &b_render,
BL::Scene &b_scene,
BL::Object &b_ob,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
int width,
int height,
BoundBox2D *view_box,
BoundBox2D *cam_box);
static void blender_camera_from_view(BlenderCamera *bcam,
BL::RenderEngine& b_engine,
BL::Scene& b_scene,
BL::SpaceView3D& b_v3d,
BL::RegionView3D& b_rv3d,
int width, int height,
BL::RenderEngine &b_engine,
BL::Scene &b_scene,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
int width,
int height,
bool skip_panorama = false)
{
/* 3d view parameters */
@@ -646,14 +633,14 @@ static void blender_camera_from_view(BlenderCamera *bcam,
BL::CurveMapping b_shutter_curve(b_scene.render().motion_blur_shutter_curve());
curvemapping_to_array(b_shutter_curve, bcam->shutter_curve, RAMP_TABLE_SIZE);
if(b_rv3d.view_perspective() == BL::RegionView3D::view_perspective_CAMERA) {
if (b_rv3d.view_perspective() == BL::RegionView3D::view_perspective_CAMERA) {
/* camera view */
BL::Object b_ob = (b_v3d.use_local_camera())? b_v3d.camera(): b_scene.camera();
BL::Object b_ob = (b_v3d.use_local_camera()) ? b_v3d.camera() : b_scene.camera();
if(b_ob) {
if (b_ob) {
blender_camera_from_object(bcam, b_engine, b_ob, skip_panorama);
if(!skip_panorama && bcam->type == CAMERA_PANORAMA) {
if (!skip_panorama && bcam->type == CAMERA_PANORAMA) {
/* in panorama camera view, we map viewplane to camera border */
BoundBox2D view_box, cam_box;
@@ -664,7 +651,8 @@ static void blender_camera_from_view(BlenderCamera *bcam,
b_ob,
b_v3d,
b_rv3d,
width, height,
width,
height,
&view_box,
&cam_box);
@@ -673,22 +661,22 @@ static void blender_camera_from_view(BlenderCamera *bcam,
else {
/* magic zoom formula */
bcam->zoom = (float)b_rv3d.view_camera_zoom();
bcam->zoom = (1.41421f + bcam->zoom/50.0f);
bcam->zoom = (1.41421f + bcam->zoom / 50.0f);
bcam->zoom *= bcam->zoom;
bcam->zoom = 2.0f/bcam->zoom;
bcam->zoom = 2.0f / bcam->zoom;
/* offset */
bcam->offset = get_float2(b_rv3d.view_camera_offset());
}
}
}
else if(b_rv3d.view_perspective() == BL::RegionView3D::view_perspective_ORTHO) {
else if (b_rv3d.view_perspective() == BL::RegionView3D::view_perspective_ORTHO) {
/* orthographic view */
bcam->farclip *= 0.5f;
bcam->nearclip = -bcam->farclip;
float sensor_size;
if(bcam->sensor_fit == BlenderCamera::VERTICAL)
if (bcam->sensor_fit == BlenderCamera::VERTICAL)
sensor_size = bcam->sensor_height;
else
sensor_size = bcam->sensor_width;
@@ -703,13 +691,14 @@ static void blender_camera_from_view(BlenderCamera *bcam,
bcam->matrix = transform_inverse(get_transform(b_rv3d.view_matrix()));
}
static void blender_camera_view_subset(BL::RenderEngine& b_engine,
BL::RenderSettings& b_render,
BL::Scene& b_scene,
BL::Object& b_ob,
BL::SpaceView3D& b_v3d,
BL::RegionView3D& b_rv3d,
int width, int height,
static void blender_camera_view_subset(BL::RenderEngine &b_engine,
BL::RenderSettings &b_render,
BL::Scene &b_scene,
BL::Object &b_ob,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
int width,
int height,
BoundBox2D *view_box,
BoundBox2D *cam_box)
{
@@ -721,36 +710,36 @@ static void blender_camera_view_subset(BL::RenderEngine& b_engine,
blender_camera_init(&view_bcam, b_render);
blender_camera_from_view(&view_bcam, b_engine, b_scene, b_v3d, b_rv3d, width, height, true);
blender_camera_viewplane(&view_bcam, width, height,
&view, &view_aspect, &sensor_size);
blender_camera_viewplane(&view_bcam, width, height, &view, &view_aspect, &sensor_size);
/* get camera viewplane */
BlenderCamera cam_bcam;
blender_camera_init(&cam_bcam, b_render);
blender_camera_from_object(&cam_bcam, b_engine, b_ob, true);
blender_camera_viewplane(&cam_bcam, cam_bcam.full_width, cam_bcam.full_height,
&cam, &cam_aspect, &sensor_size);
blender_camera_viewplane(
&cam_bcam, cam_bcam.full_width, cam_bcam.full_height, &cam, &cam_aspect, &sensor_size);
/* return */
*view_box = view * (1.0f/view_aspect);
*cam_box = cam * (1.0f/cam_aspect);
*view_box = view * (1.0f / view_aspect);
*cam_box = cam * (1.0f / cam_aspect);
}
static void blender_camera_border_subset(BL::RenderEngine& b_engine,
BL::RenderSettings& b_render,
BL::Scene& b_scene,
BL::SpaceView3D& b_v3d,
BL::RegionView3D& b_rv3d,
BL::Object& b_ob,
int width, int height,
static void blender_camera_border_subset(BL::RenderEngine &b_engine,
BL::RenderSettings &b_render,
BL::Scene &b_scene,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
BL::Object &b_ob,
int width,
int height,
const BoundBox2D &border,
BoundBox2D *result)
{
/* Determine camera viewport subset. */
BoundBox2D view_box, cam_box;
blender_camera_view_subset(b_engine, b_render, b_scene, b_ob, b_v3d, b_rv3d, width, height,
&view_box, &cam_box);
blender_camera_view_subset(
b_engine, b_render, b_scene, b_ob, b_v3d, b_rv3d, width, height, &view_box, &cam_box);
/* Determine viewport subset matching given border. */
cam_box = cam_box.make_relative_to(view_box);
@@ -758,24 +747,25 @@ static void blender_camera_border_subset(BL::RenderEngine& b_engine,
}
static void blender_camera_border(BlenderCamera *bcam,
BL::RenderEngine& b_engine,
BL::RenderSettings& b_render,
BL::Scene& b_scene,
BL::SpaceView3D& b_v3d,
BL::RegionView3D& b_rv3d,
int width, int height)
BL::RenderEngine &b_engine,
BL::RenderSettings &b_render,
BL::Scene &b_scene,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
int width,
int height)
{
bool is_camera_view;
/* camera view? */
is_camera_view = b_rv3d.view_perspective() == BL::RegionView3D::view_perspective_CAMERA;
if(!is_camera_view) {
if (!is_camera_view) {
/* for non-camera view check whether render border is enabled for viewport
* and if so use border from 3d viewport
* assume viewport has got correctly clamped border already
*/
if(b_v3d.use_render_border()) {
if (b_v3d.use_render_border()) {
bcam->border.left = b_v3d.render_border_min_x();
bcam->border.right = b_v3d.render_border_max_x();
bcam->border.bottom = b_v3d.render_border_min_y();
@@ -784,9 +774,9 @@ static void blender_camera_border(BlenderCamera *bcam,
return;
}
BL::Object b_ob = (b_v3d.use_local_camera())? b_v3d.camera(): b_scene.camera();
BL::Object b_ob = (b_v3d.use_local_camera()) ? b_v3d.camera() : b_scene.camera();
if(!b_ob)
if (!b_ob)
return;
/* Determine camera border inside the viewport. */
@@ -797,11 +787,12 @@ static void blender_camera_border(BlenderCamera *bcam,
b_v3d,
b_rv3d,
b_ob,
width, height,
width,
height,
full_border,
&bcam->viewport_camera_border);
if(!b_render.use_border()) {
if (!b_render.use_border()) {
return;
}
@@ -817,38 +808,29 @@ static void blender_camera_border(BlenderCamera *bcam,
b_v3d,
b_rv3d,
b_ob,
width, height,
width,
height,
bcam->border,
&bcam->border);
bcam->border = bcam->border.clamp();
}
void BlenderSync::sync_view(BL::SpaceView3D& b_v3d,
BL::RegionView3D& b_rv3d,
int width, int height)
void BlenderSync::sync_view(BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
int width,
int height)
{
BlenderCamera bcam;
BL::RenderSettings b_render_settings(b_scene.render());
blender_camera_init(&bcam, b_render_settings);
blender_camera_from_view(&bcam,
b_engine,
b_scene,
b_v3d,
b_rv3d,
width, height);
blender_camera_border(&bcam,
b_engine,
b_render_settings,
b_scene,
b_v3d,
b_rv3d,
width, height);
blender_camera_from_view(&bcam, b_engine, b_scene, b_v3d, b_rv3d, width, height);
blender_camera_border(&bcam, b_engine, b_render_settings, b_scene, b_v3d, b_rv3d, width, height);
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
blender_camera_sync(scene->camera, &bcam, width, height, "", &cscene);
/* dicing camera */
BL::Object b_ob = BL::Object(RNA_pointer_get(&cscene, "dicing_camera"));
if(b_ob) {
if (b_ob) {
BL::Array<float, 16> b_ob_matrix;
blender_camera_from_object(&bcam, b_engine, b_ob);
b_engine.camera_model_matrix(b_ob, bcam.use_spherical_stereo, b_ob_matrix);
@@ -861,11 +843,12 @@ void BlenderSync::sync_view(BL::SpaceView3D& b_v3d,
}
}
BufferParams BlenderSync::get_buffer_params(BL::RenderSettings& b_render,
BL::SpaceView3D& b_v3d,
BL::RegionView3D& b_rv3d,
BufferParams BlenderSync::get_buffer_params(BL::RenderSettings &b_render,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,
int width, int height)
int width,
int height)
{
BufferParams params;
bool use_border = false;
@@ -873,12 +856,12 @@ BufferParams BlenderSync::get_buffer_params(BL::RenderSettings& b_render,
params.full_width = width;
params.full_height = height;
if(b_v3d && b_rv3d && b_rv3d.view_perspective() != BL::RegionView3D::view_perspective_CAMERA)
if (b_v3d && b_rv3d && b_rv3d.view_perspective() != BL::RegionView3D::view_perspective_CAMERA)
use_border = b_v3d.use_render_border();
else
use_border = b_render.use_border();
if(use_border) {
if (use_border) {
/* border render */
/* the viewport may offset the border outside the view */
BoundBox2D border = cam->border.clamp();

File diff suppressed because it is too large Load Diff

View File

@@ -19,37 +19,38 @@
CCL_NAMESPACE_BEGIN
int blender_device_threads(BL::Scene& b_scene)
int blender_device_threads(BL::Scene &b_scene)
{
BL::RenderSettings b_r = b_scene.render();
if(b_r.threads_mode() == BL::RenderSettings::threads_mode_FIXED)
if (b_r.threads_mode() == BL::RenderSettings::threads_mode_FIXED)
return b_r.threads();
else
return 0;
}
DeviceInfo blender_device_info(BL::Preferences& b_preferences, BL::Scene& b_scene, bool background)
DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scene, bool background)
{
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
/* Default to CPU device. */
DeviceInfo device = Device::available_devices(DEVICE_MASK_CPU).front();
if(get_enum(cscene, "device") == 2) {
if (get_enum(cscene, "device") == 2) {
/* Find network device. */
vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK_NETWORK);
if(!devices.empty()) {
if (!devices.empty()) {
device = devices.front();
}
}
else if(get_enum(cscene, "device") == 1) {
else if (get_enum(cscene, "device") == 1) {
/* Find cycles preferences. */
PointerRNA cpreferences;
BL::Preferences::addons_iterator b_addon_iter;
for(b_preferences.addons.begin(b_addon_iter); b_addon_iter != b_preferences.addons.end(); ++b_addon_iter) {
if(b_addon_iter->module() == "cycles") {
for (b_preferences.addons.begin(b_addon_iter); b_addon_iter != b_preferences.addons.end();
++b_addon_iter) {
if (b_addon_iter->module() == "cycles") {
cpreferences = b_addon_iter->preferences().ptr;
break;
}
@@ -63,41 +64,38 @@ DeviceInfo blender_device_info(BL::Preferences& b_preferences, BL::Scene& b_scen
COMPUTE_DEVICE_NUM = 3,
};
ComputeDevice compute_device = (ComputeDevice)get_enum(cpreferences,
"compute_device_type",
COMPUTE_DEVICE_NUM,
COMPUTE_DEVICE_CPU);
ComputeDevice compute_device = (ComputeDevice)get_enum(
cpreferences, "compute_device_type", COMPUTE_DEVICE_NUM, COMPUTE_DEVICE_CPU);
if(compute_device != COMPUTE_DEVICE_CPU) {
if (compute_device != COMPUTE_DEVICE_CPU) {
/* Query GPU devices with matching types. */
uint mask = DEVICE_MASK_CPU;
if(compute_device == COMPUTE_DEVICE_CUDA) {
if (compute_device == COMPUTE_DEVICE_CUDA) {
mask |= DEVICE_MASK_CUDA;
}
else if(compute_device == COMPUTE_DEVICE_OPENCL) {
else if (compute_device == COMPUTE_DEVICE_OPENCL) {
mask |= DEVICE_MASK_OPENCL;
}
vector<DeviceInfo> devices = Device::available_devices(mask);
/* Match device preferences and available devices. */
vector<DeviceInfo> used_devices;
RNA_BEGIN(&cpreferences, device, "devices") {
if(get_boolean(device, "use")) {
RNA_BEGIN (&cpreferences, device, "devices") {
if (get_boolean(device, "use")) {
string id = get_string(device, "id");
foreach(DeviceInfo& info, devices) {
if(info.id == id) {
foreach (DeviceInfo &info, devices) {
if (info.id == id) {
used_devices.push_back(info);
break;
}
}
}
} RNA_END;
}
RNA_END;
if(!used_devices.empty()) {
if (!used_devices.empty()) {
int threads = blender_device_threads(b_scene);
device = Device::get_multi_device(used_devices,
threads,
background);
device = Device::get_multi_device(used_devices, threads, background);
}
/* Else keep using the CPU device that was set before. */
}

View File

@@ -27,10 +27,12 @@
CCL_NAMESPACE_BEGIN
/* Get number of threads to use for rendering. */
int blender_device_threads(BL::Scene& b_scene);
int blender_device_threads(BL::Scene &b_scene);
/* Convert Blender settings to device specification. */
DeviceInfo blender_device_info(BL::Preferences& b_preferences, BL::Scene& b_scene, bool background);
DeviceInfo blender_device_info(BL::Preferences &b_preferences,
BL::Scene &b_scene,
bool background);
CCL_NAMESPACE_END

File diff suppressed because it is too large Load Diff

View File

@@ -37,92 +37,88 @@ CCL_NAMESPACE_BEGIN
/* Utilities */
bool BlenderSync::BKE_object_is_modified(BL::Object& b_ob)
bool BlenderSync::BKE_object_is_modified(BL::Object &b_ob)
{
/* test if we can instance or if the object is modified */
if(b_ob.type() == BL::Object::type_META) {
if (b_ob.type() == BL::Object::type_META) {
/* multi-user and dupli metaballs are fused, can't instance */
return true;
}
else if(ccl::BKE_object_is_modified(b_ob, b_scene, preview)) {
else if (ccl::BKE_object_is_modified(b_ob, b_scene, preview)) {
/* modifiers */
return true;
}
else {
/* object level material links */
BL::Object::material_slots_iterator slot;
for(b_ob.material_slots.begin(slot); slot != b_ob.material_slots.end(); ++slot)
if(slot->link() == BL::MaterialSlot::link_OBJECT)
for (b_ob.material_slots.begin(slot); slot != b_ob.material_slots.end(); ++slot)
if (slot->link() == BL::MaterialSlot::link_OBJECT)
return true;
}
return false;
}
bool BlenderSync::object_is_mesh(BL::Object& b_ob)
bool BlenderSync::object_is_mesh(BL::Object &b_ob)
{
BL::ID b_ob_data = b_ob.data();
if(!b_ob_data) {
if (!b_ob_data) {
return false;
}
if(b_ob.type() == BL::Object::type_CURVE) {
if (b_ob.type() == BL::Object::type_CURVE) {
/* Skip exporting curves without faces, overhead can be
* significant if there are many for path animation. */
BL::Curve b_curve(b_ob.data());
return (b_curve.bevel_object() ||
b_curve.extrude() != 0.0f ||
b_curve.bevel_depth() != 0.0f ||
b_curve.dimensions() == BL::Curve::dimensions_2D ||
b_ob.modifiers.length());
return (b_curve.bevel_object() || b_curve.extrude() != 0.0f || b_curve.bevel_depth() != 0.0f ||
b_curve.dimensions() == BL::Curve::dimensions_2D || b_ob.modifiers.length());
}
else {
return (b_ob_data.is_a(&RNA_Mesh) ||
b_ob_data.is_a(&RNA_Curve) ||
return (b_ob_data.is_a(&RNA_Mesh) || b_ob_data.is_a(&RNA_Curve) ||
b_ob_data.is_a(&RNA_MetaBall));
}
}
bool BlenderSync::object_is_light(BL::Object& b_ob)
bool BlenderSync::object_is_light(BL::Object &b_ob)
{
BL::ID b_ob_data = b_ob.data();
return (b_ob_data && b_ob_data.is_a(&RNA_Light));
}
static uint object_ray_visibility(BL::Object& b_ob)
static uint object_ray_visibility(BL::Object &b_ob)
{
PointerRNA cvisibility = RNA_pointer_get(&b_ob.ptr, "cycles_visibility");
uint flag = 0;
flag |= get_boolean(cvisibility, "camera")? PATH_RAY_CAMERA: 0;
flag |= get_boolean(cvisibility, "diffuse")? PATH_RAY_DIFFUSE: 0;
flag |= get_boolean(cvisibility, "glossy")? PATH_RAY_GLOSSY: 0;
flag |= get_boolean(cvisibility, "transmission")? PATH_RAY_TRANSMIT: 0;
flag |= get_boolean(cvisibility, "shadow")? PATH_RAY_SHADOW: 0;
flag |= get_boolean(cvisibility, "scatter")? PATH_RAY_VOLUME_SCATTER: 0;
flag |= get_boolean(cvisibility, "camera") ? PATH_RAY_CAMERA : 0;
flag |= get_boolean(cvisibility, "diffuse") ? PATH_RAY_DIFFUSE : 0;
flag |= get_boolean(cvisibility, "glossy") ? PATH_RAY_GLOSSY : 0;
flag |= get_boolean(cvisibility, "transmission") ? PATH_RAY_TRANSMIT : 0;
flag |= get_boolean(cvisibility, "shadow") ? PATH_RAY_SHADOW : 0;
flag |= get_boolean(cvisibility, "scatter") ? PATH_RAY_VOLUME_SCATTER : 0;
return flag;
}
/* Light */
void BlenderSync::sync_light(BL::Object& b_parent,
void BlenderSync::sync_light(BL::Object &b_parent,
int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
BL::Object& b_ob,
BL::Object& b_ob_instance,
BL::Object &b_ob,
BL::Object &b_ob_instance,
int random_id,
Transform& tfm,
Transform &tfm,
bool *use_portal)
{
/* test if we need to sync */
Light *light;
ObjectKey key(b_parent, persistent_id, b_ob_instance);
if(!light_map.sync(&light, b_ob, b_parent, key)) {
if(light->is_portal)
if (!light_map.sync(&light, b_ob, b_parent, key)) {
if (light->is_portal)
*use_portal = true;
return;
}
@@ -130,7 +126,7 @@ void BlenderSync::sync_light(BL::Object& b_parent,
BL::Light b_light(b_ob.data());
/* type */
switch(b_light.type()) {
switch (b_light.type()) {
case BL::Light::type_POINT: {
BL::PointLight b_point_light(b_light);
light->size = b_point_light.shadow_soft_size();
@@ -163,7 +159,7 @@ void BlenderSync::sync_light(BL::Object& b_parent,
light->axisu = transform_get_column(&tfm, 0);
light->axisv = transform_get_column(&tfm, 1);
light->sizeu = b_area_light.size();
switch(b_area_light.shape()) {
switch (b_area_light.shape()) {
case BL::AreaLight::shape_SQUARE:
light->sizev = light->sizeu;
light->round = false;
@@ -192,7 +188,7 @@ void BlenderSync::sync_light(BL::Object& b_parent,
light->tfm = tfm;
/* shader */
vector<Shader*> used_shaders;
vector<Shader *> used_shaders;
find_shader(b_light, used_shaders, scene->default_light);
light->shader = used_shaders[0];
@@ -203,26 +199,26 @@ void BlenderSync::sync_light(BL::Object& b_parent,
light->use_mis = get_boolean(clight, "use_multiple_importance_sampling");
int samples = get_int(clight, "samples");
if(get_boolean(cscene, "use_square_samples"))
if (get_boolean(cscene, "use_square_samples"))
light->samples = samples * samples;
else
light->samples = samples;
light->max_bounces = get_int(clight, "max_bounces");
if(b_ob != b_ob_instance) {
if (b_ob != b_ob_instance) {
light->random_id = random_id;
}
else {
light->random_id = hash_int_2d(hash_string(b_ob.name().c_str()), 0);
}
if(light->type == LIGHT_AREA)
if (light->type == LIGHT_AREA)
light->is_portal = get_boolean(clight, "is_portal");
else
light->is_portal = false;
if(light->is_portal)
if (light->is_portal)
*use_portal = true;
/* visibility */
@@ -240,30 +236,23 @@ void BlenderSync::sync_background_light(bool use_portal)
{
BL::World b_world = b_scene.world();
if(b_world) {
if (b_world) {
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
PointerRNA cworld = RNA_pointer_get(&b_world.ptr, "cycles");
enum SamplingMethod {
SAMPLING_NONE = 0,
SAMPLING_AUTOMATIC,
SAMPLING_MANUAL,
SAMPLING_NUM
};
enum SamplingMethod { SAMPLING_NONE = 0, SAMPLING_AUTOMATIC, SAMPLING_MANUAL, SAMPLING_NUM };
int sampling_method = get_enum(cworld, "sampling_method", SAMPLING_NUM, SAMPLING_AUTOMATIC);
bool sample_as_light = (sampling_method != SAMPLING_NONE);
if(sample_as_light || use_portal) {
if (sample_as_light || use_portal) {
/* test if we need to sync */
Light *light;
ObjectKey key(b_world, 0, b_world);
if(light_map.sync(&light, b_world, b_world, key) ||
world_recalc ||
b_world.ptr.data != world_map)
{
if (light_map.sync(&light, b_world, b_world, key) || world_recalc ||
b_world.ptr.data != world_map) {
light->type = LIGHT_BACKGROUND;
if(sampling_method == SAMPLING_MANUAL) {
if (sampling_method == SAMPLING_MANUAL) {
light->map_resolution = get_int(cworld, "sample_map_resolution");
}
else {
@@ -274,7 +263,7 @@ void BlenderSync::sync_background_light(bool use_portal)
light->max_bounces = get_int(cworld, "max_bounces");
int samples = get_int(cworld, "samples");
if(get_boolean(cscene, "use_square_samples"))
if (get_boolean(cscene, "use_square_samples"))
light->samples = samples * samples;
else
light->samples = samples;
@@ -291,32 +280,30 @@ void BlenderSync::sync_background_light(bool use_portal)
/* Object */
Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
BL::ViewLayer& b_view_layer,
BL::DepsgraphObjectInstance& b_instance,
Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
BL::ViewLayer &b_view_layer,
BL::DepsgraphObjectInstance &b_instance,
float motion_time,
bool show_self,
bool show_particles,
BlenderObjectCulling& culling,
BlenderObjectCulling &culling,
bool *use_portal)
{
const bool is_instance = b_instance.is_instance();
BL::Object b_ob = b_instance.object();
BL::Object b_parent = is_instance ? b_instance.parent()
: b_instance.object();
BL::Object b_ob_instance = is_instance ? b_instance.instance_object()
: b_ob;
BL::Object b_parent = is_instance ? b_instance.parent() : b_instance.object();
BL::Object b_ob_instance = is_instance ? b_instance.instance_object() : b_ob;
const bool motion = motion_time != 0.0f;
/*const*/ Transform tfm = get_transform(b_ob.matrix_world());
int *persistent_id = NULL;
BL::Array<int, OBJECT_PERSISTENT_ID_SIZE> persistent_id_array;
if(is_instance) {
if (is_instance) {
persistent_id_array = b_instance.persistent_id();
persistent_id = persistent_id_array.data;
}
/* light is handled separately */
if(!motion && object_is_light(b_ob)) {
if (!motion && object_is_light(b_ob)) {
/* TODO: don't use lights for excluded layers used as mask layer,
* when dynamic overrides are back. */
#if 0
@@ -337,12 +324,12 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
}
/* only interested in object that we can create meshes from */
if(!object_is_mesh(b_ob)) {
if (!object_is_mesh(b_ob)) {
return NULL;
}
/* Perform object culling. */
if(culling.test(scene, b_ob, tfm)) {
if (culling.test(scene, b_ob, tfm)) {
return NULL;
}
@@ -352,7 +339,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
b_parent.holdout_get(PointerRNA_NULL, b_view_layer);
uint visibility = object_ray_visibility(b_ob) & PATH_RAY_ALL_VISIBILITY;
if(b_parent.ptr.data != b_ob.ptr.data) {
if (b_parent.ptr.data != b_ob.ptr.data) {
visibility &= object_ray_visibility(b_parent);
}
@@ -365,12 +352,12 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
/* Clear camera visibility for indirect only objects. */
bool use_indirect_only = b_parent.indirect_only_get(PointerRNA_NULL, b_view_layer);
if(use_indirect_only) {
if (use_indirect_only) {
visibility &= ~PATH_RAY_CAMERA;
}
/* Don't export completely invisible objects. */
if(visibility == 0) {
if (visibility == 0) {
return NULL;
}
@@ -379,18 +366,18 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
Object *object;
/* motion vector case */
if(motion) {
if (motion) {
object = object_map.find(key);
if(object && object->use_motion()) {
if (object && object->use_motion()) {
/* Set transform at matching motion time step. */
int time_index = object->motion_step(motion_time);
if(time_index >= 0) {
if (time_index >= 0) {
object->motion[time_index] = tfm;
}
/* mesh deformation */
if(object->mesh)
if (object->mesh)
sync_mesh_motion(b_depsgraph, b_ob, object, motion_time);
}
@@ -400,28 +387,29 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
/* test if we need to sync */
bool object_updated = false;
if(object_map.sync(&object, b_ob, b_parent, key))
if (object_map.sync(&object, b_ob, b_parent, key))
object_updated = true;
/* mesh sync */
object->mesh = sync_mesh(b_depsgraph, b_ob, b_ob_instance, object_updated, show_self, show_particles);
object->mesh = sync_mesh(
b_depsgraph, b_ob, b_ob_instance, object_updated, show_self, show_particles);
/* special case not tracked by object update flags */
/* holdout */
if(use_holdout != object->use_holdout) {
if (use_holdout != object->use_holdout) {
object->use_holdout = use_holdout;
scene->object_manager->tag_update(scene);
object_updated = true;
}
if(visibility != object->visibility) {
if (visibility != object->visibility) {
object->visibility = visibility;
object_updated = true;
}
bool is_shadow_catcher = get_boolean(cobject, "is_shadow_catcher");
if(is_shadow_catcher != object->is_shadow_catcher) {
if (is_shadow_catcher != object->is_shadow_catcher) {
object->is_shadow_catcher = is_shadow_catcher;
object_updated = true;
}
@@ -429,8 +417,8 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
/* sync the asset name for Cryptomatte */
BL::Object parent = b_ob.parent();
ustring parent_name;
if(parent) {
while(parent.parent()) {
if (parent) {
while (parent.parent()) {
parent = parent.parent();
}
parent_name = parent.name();
@@ -438,7 +426,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
else {
parent_name = b_ob.name();
}
if(object->asset_name != parent_name) {
if (object->asset_name != parent_name) {
object->asset_name = parent_name;
object_updated = true;
}
@@ -446,7 +434,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
/* object sync
* transform comparison should not be needed, but duplis don't work perfect
* in the depsgraph and may not signal changes, so this is a workaround */
if(object_updated || (object->mesh && object->mesh->need_update) || tfm != object->tfm) {
if (object_updated || (object->mesh && object->mesh->need_update) || tfm != object->tfm) {
object->name = b_ob.name().c_str();
object->pass_id = b_ob.pass_index();
object->tfm = tfm;
@@ -454,17 +442,17 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
/* motion blur */
Scene::MotionType need_motion = scene->need_motion();
if(need_motion != Scene::MOTION_NONE && object->mesh) {
if (need_motion != Scene::MOTION_NONE && object->mesh) {
Mesh *mesh = object->mesh;
mesh->use_motion_blur = false;
mesh->motion_steps = 0;
uint motion_steps;
if(need_motion == Scene::MOTION_BLUR) {
if (need_motion == Scene::MOTION_BLUR) {
motion_steps = object_motion_steps(b_parent, b_ob);
mesh->motion_steps = motion_steps;
if(motion_steps && object_use_deform_motion(b_parent, b_ob)) {
if (motion_steps && object_use_deform_motion(b_parent, b_ob)) {
mesh->use_motion_blur = true;
}
}
@@ -476,18 +464,19 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
object->motion.clear();
object->motion.resize(motion_steps, transform_empty());
if(motion_steps) {
object->motion[motion_steps/2] = tfm;
if (motion_steps) {
object->motion[motion_steps / 2] = tfm;
for(size_t step = 0; step < motion_steps; step++) {
for (size_t step = 0; step < motion_steps; step++) {
motion_times.insert(object->motion_time(step));
}
}
}
/* dupli texture coordinates and random_id */
if(is_instance) {
object->dupli_generated = 0.5f*get_float3(b_instance.orco()) - make_float3(0.5f, 0.5f, 0.5f);
if (is_instance) {
object->dupli_generated = 0.5f * get_float3(b_instance.orco()) -
make_float3(0.5f, 0.5f, 0.5f);
object->dupli_uv = get_float2(b_instance.uv());
object->random_id = b_instance.random_id();
}
@@ -500,7 +489,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
object->tag_update(scene);
}
if(is_instance) {
if (is_instance) {
/* Sync possible particle data. */
sync_dupli_particle(b_parent, b_instance, object);
}
@@ -510,12 +499,12 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
/* Object Loop */
void BlenderSync::sync_objects(BL::Depsgraph& b_depsgraph, float motion_time)
void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph, float motion_time)
{
/* layer data */
bool motion = motion_time != 0.0f;
if(!motion) {
if (!motion) {
/* prepare for sync */
light_map.pre_sync();
mesh_map.pre_sync();
@@ -537,10 +526,9 @@ void BlenderSync::sync_objects(BL::Depsgraph& b_depsgraph, float motion_time)
BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
BL::Depsgraph::object_instances_iterator b_instance_iter;
for(b_depsgraph.object_instances.begin(b_instance_iter);
for (b_depsgraph.object_instances.begin(b_instance_iter);
b_instance_iter != b_depsgraph.object_instances.end() && !cancel;
++b_instance_iter)
{
++b_instance_iter) {
BL::DepsgraphObjectInstance b_instance = *b_instance_iter;
BL::Object b_ob = b_instance.object();
@@ -551,7 +539,7 @@ void BlenderSync::sync_objects(BL::Depsgraph& b_depsgraph, float motion_time)
const bool show_self = b_instance.show_self();
const bool show_particles = b_instance.show_particles();
if(show_self || show_particles) {
if (show_self || show_particles) {
/* object itself */
sync_object(b_depsgraph,
b_view_layer,
@@ -568,36 +556,37 @@ void BlenderSync::sync_objects(BL::Depsgraph& b_depsgraph, float motion_time)
progress.set_sync_status("");
if(!cancel && !motion) {
if (!cancel && !motion) {
sync_background_light(use_portal);
/* handle removed data and modified pointers */
if(light_map.post_sync())
if (light_map.post_sync())
scene->light_manager->tag_update(scene);
if(mesh_map.post_sync())
if (mesh_map.post_sync())
scene->mesh_manager->tag_update(scene);
if(object_map.post_sync())
if (object_map.post_sync())
scene->object_manager->tag_update(scene);
if(particle_system_map.post_sync())
if (particle_system_map.post_sync())
scene->particle_system_manager->tag_update(scene);
}
if(motion)
if (motion)
mesh_motion_synced.clear();
}
void BlenderSync::sync_motion(BL::RenderSettings& b_render,
BL::Depsgraph& b_depsgraph,
BL::Object& b_override,
int width, int height,
void BlenderSync::sync_motion(BL::RenderSettings &b_render,
BL::Depsgraph &b_depsgraph,
BL::Object &b_override,
int width,
int height,
void **python_thread_state)
{
if(scene->need_motion() == Scene::MOTION_NONE)
if (scene->need_motion() == Scene::MOTION_NONE)
return;
/* get camera object here to deal with camera switch */
BL::Object b_cam = b_scene.camera();
if(b_override)
if (b_override)
b_cam = b_override;
Camera prevcam = *(scene->camera);
@@ -606,11 +595,10 @@ void BlenderSync::sync_motion(BL::RenderSettings& b_render,
float subframe_center = b_scene.frame_subframe();
float frame_center_delta = 0.0f;
if(scene->need_motion() != Scene::MOTION_PASS &&
scene->camera->motion_position != Camera::MOTION_POSITION_CENTER)
{
if (scene->need_motion() != Scene::MOTION_PASS &&
scene->camera->motion_position != Camera::MOTION_POSITION_CENTER) {
float shuttertime = scene->camera->shuttertime;
if(scene->camera->motion_position == Camera::MOTION_POSITION_END) {
if (scene->camera->motion_position == Camera::MOTION_POSITION_END) {
frame_center_delta = -shuttertime * 0.5f;
}
else {
@@ -633,20 +621,20 @@ void BlenderSync::sync_motion(BL::RenderSettings& b_render,
motion_times.insert(1.0f);
/* note iteration over motion_times set happens in sorted order */
foreach(float relative_time, motion_times) {
foreach (float relative_time, motion_times) {
/* center time is already handled. */
if(relative_time == 0.0f) {
if (relative_time == 0.0f) {
continue;
}
VLOG(1) << "Synchronizing motion for the relative time "
<< relative_time << ".";
VLOG(1) << "Synchronizing motion for the relative time " << relative_time << ".";
/* fixed shutter time to get previous and next frame for motion pass */
float shuttertime = scene->motion_shutter_time();
/* compute frame and subframe time */
float time = frame_center + subframe_center + frame_center_delta + relative_time * shuttertime * 0.5f;
float time = frame_center + subframe_center + frame_center_delta +
relative_time * shuttertime * 0.5f;
int frame = (int)floorf(time);
float subframe = time - frame;
@@ -656,11 +644,8 @@ void BlenderSync::sync_motion(BL::RenderSettings& b_render,
python_thread_state_save(python_thread_state);
/* sync camera, only supports two times at the moment */
if(relative_time == -1.0f || relative_time == 1.0f) {
sync_camera_motion(b_render,
b_cam,
width, height,
relative_time);
if (relative_time == -1.0f || relative_time == 1.0f) {
sync_camera_motion(b_render, b_cam, width, height, relative_time);
}
/* sync object */
@@ -675,7 +660,7 @@ void BlenderSync::sync_motion(BL::RenderSettings& b_render,
python_thread_state_save(python_thread_state);
/* tag camera for motion update */
if(scene->camera->motion_modified(prevcam))
if (scene->camera->motion_modified(prevcam))
scene->camera->tag_update();
}

View File

@@ -22,7 +22,7 @@
CCL_NAMESPACE_BEGIN
BlenderObjectCulling::BlenderObjectCulling(Scene *scene, BL::Scene& b_scene)
BlenderObjectCulling::BlenderObjectCulling(Scene *scene, BL::Scene &b_scene)
: use_scene_camera_cull_(false),
use_camera_cull_(false),
camera_cull_margin_(0.0f),
@@ -30,7 +30,7 @@ BlenderObjectCulling::BlenderObjectCulling(Scene *scene, BL::Scene& b_scene)
use_distance_cull_(false),
distance_cull_margin_(0.0f)
{
if(b_scene.render().use_simplify()) {
if (b_scene.render().use_simplify()) {
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
use_scene_camera_cull_ = scene->camera->type != CAMERA_PANORAMA &&
@@ -43,15 +43,15 @@ BlenderObjectCulling::BlenderObjectCulling(Scene *scene, BL::Scene& b_scene)
camera_cull_margin_ = get_float(cscene, "camera_cull_margin");
distance_cull_margin_ = get_float(cscene, "distance_cull_margin");
if(distance_cull_margin_ == 0.0f) {
if (distance_cull_margin_ == 0.0f) {
use_scene_distance_cull_ = false;
}
}
}
void BlenderObjectCulling::init_object(Scene *scene, BL::Object& b_ob)
void BlenderObjectCulling::init_object(Scene *scene, BL::Object &b_ob)
{
if(!use_scene_camera_cull_ && !use_scene_distance_cull_) {
if (!use_scene_camera_cull_ && !use_scene_distance_cull_) {
return;
}
@@ -60,33 +60,30 @@ void BlenderObjectCulling::init_object(Scene *scene, BL::Object& b_ob)
use_camera_cull_ = use_scene_camera_cull_ && get_boolean(cobject, "use_camera_cull");
use_distance_cull_ = use_scene_distance_cull_ && get_boolean(cobject, "use_distance_cull");
if(use_camera_cull_ || use_distance_cull_) {
if (use_camera_cull_ || use_distance_cull_) {
/* Need to have proper projection matrix. */
scene->camera->update(scene);
}
}
bool BlenderObjectCulling::test(Scene *scene, BL::Object& b_ob, Transform& tfm)
bool BlenderObjectCulling::test(Scene *scene, BL::Object &b_ob, Transform &tfm)
{
if(!use_camera_cull_ && !use_distance_cull_) {
if (!use_camera_cull_ && !use_distance_cull_) {
return false;
}
/* Compute world space bounding box corners. */
float3 bb[8];
BL::Array<float, 24> boundbox = b_ob.bound_box();
for(int i = 0; i < 8; ++i) {
float3 p = make_float3(boundbox[3 * i + 0],
boundbox[3 * i + 1],
boundbox[3 * i + 2]);
for (int i = 0; i < 8; ++i) {
float3 p = make_float3(boundbox[3 * i + 0], boundbox[3 * i + 1], boundbox[3 * i + 2]);
bb[i] = transform_point(&tfm, p);
}
bool camera_culled = use_camera_cull_ && test_camera(scene, bb);
bool distance_culled = use_distance_cull_ && test_distance(scene, bb);
return ((camera_culled && distance_culled) ||
(camera_culled && !use_distance_cull_) ||
return ((camera_culled && distance_culled) || (camera_culled && !use_distance_cull_) ||
(distance_culled && !use_camera_cull_));
}
@@ -96,35 +93,31 @@ bool BlenderObjectCulling::test(Scene *scene, BL::Object& b_ob, Transform& tfm)
bool BlenderObjectCulling::test_camera(Scene *scene, float3 bb[8])
{
Camera *cam = scene->camera;
const ProjectionTransform& worldtondc = cam->worldtondc;
const ProjectionTransform &worldtondc = cam->worldtondc;
float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
bool all_behind = true;
for(int i = 0; i < 8; ++i) {
for (int i = 0; i < 8; ++i) {
float3 p = bb[i];
float4 b = make_float4(p.x, p.y, p.z, 1.0f);
float4 c = make_float4(dot(worldtondc.x, b),
dot(worldtondc.y, b),
dot(worldtondc.z, b),
dot(worldtondc.w, b));
float4 c = make_float4(
dot(worldtondc.x, b), dot(worldtondc.y, b), dot(worldtondc.z, b), dot(worldtondc.w, b));
p = float4_to_float3(c / c.w);
if(c.z < 0.0f) {
if (c.z < 0.0f) {
p.x = 1.0f - p.x;
p.y = 1.0f - p.y;
}
if(c.z >= -camera_cull_margin_) {
if (c.z >= -camera_cull_margin_) {
all_behind = false;
}
bb_min = min(bb_min, p);
bb_max = max(bb_max, p);
}
if(all_behind) {
if (all_behind) {
return true;
}
return (bb_min.x >= 1.0f + camera_cull_margin_ ||
bb_min.y >= 1.0f + camera_cull_margin_ ||
bb_max.x <= -camera_cull_margin_ ||
bb_max.y <= -camera_cull_margin_);
return (bb_min.x >= 1.0f + camera_cull_margin_ || bb_min.y >= 1.0f + camera_cull_margin_ ||
bb_max.x <= -camera_cull_margin_ || bb_max.y <= -camera_cull_margin_);
}
bool BlenderObjectCulling::test_distance(Scene *scene, float3 bb[8])
@@ -134,13 +127,13 @@ bool BlenderObjectCulling::test_distance(Scene *scene, float3 bb[8])
bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
/* Find min & max points for x & y & z on bounding box */
for(int i = 0; i < 8; ++i) {
for (int i = 0; i < 8; ++i) {
float3 p = bb[i];
bb_min = min(bb_min, p);
bb_max = max(bb_max, p);
}
float3 closest_point = max(min(bb_max,camera_position),bb_min);
float3 closest_point = max(min(bb_max, camera_position), bb_min);
return (len_squared(camera_position - closest_point) >
distance_cull_margin_ * distance_cull_margin_);
}

View File

@@ -24,15 +24,14 @@ CCL_NAMESPACE_BEGIN
class Scene;
class BlenderObjectCulling
{
public:
BlenderObjectCulling(Scene *scene, BL::Scene& b_scene);
class BlenderObjectCulling {
public:
BlenderObjectCulling(Scene *scene, BL::Scene &b_scene);
void init_object(Scene *scene, BL::Object& b_ob);
bool test(Scene *scene, BL::Object& b_ob, Transform& tfm);
void init_object(Scene *scene, BL::Object &b_ob);
bool test(Scene *scene, BL::Object &b_ob, Transform &tfm);
private:
private:
bool test_camera(Scene *scene, float3 bb[8]);
bool test_distance(Scene *scene, float3 bb[8]);

View File

@@ -27,25 +27,25 @@ CCL_NAMESPACE_BEGIN
/* Utilities */
bool BlenderSync::sync_dupli_particle(BL::Object& b_ob,
BL::DepsgraphObjectInstance& b_instance,
bool BlenderSync::sync_dupli_particle(BL::Object &b_ob,
BL::DepsgraphObjectInstance &b_instance,
Object *object)
{
/* test if this dupli was generated from a particle sytem */
BL::ParticleSystem b_psys = b_instance.particle_system();
if(!b_psys)
if (!b_psys)
return false;
object->hide_on_missing_motion = true;
/* test if we need particle data */
if(!object->mesh->need_attribute(scene, ATTR_STD_PARTICLE))
if (!object->mesh->need_attribute(scene, ATTR_STD_PARTICLE))
return false;
/* don't handle child particles yet */
BL::Array<int, OBJECT_PERSISTENT_ID_SIZE> persistent_id = b_instance.persistent_id();
if(persistent_id[0] >= b_psys.particles.length())
if (persistent_id[0] >= b_psys.particles.length())
return false;
/* find particle system */
@@ -56,11 +56,11 @@ bool BlenderSync::sync_dupli_particle(BL::Object& b_ob,
bool need_update = particle_system_map.sync(&psys, b_ob, b_instance.object(), key);
/* no update needed? */
if(!need_update && !object->mesh->need_update && !scene->object_manager->need_update)
if (!need_update && !object->mesh->need_update && !scene->object_manager->need_update)
return true;
/* first time used in this sync loop? clear and tag update */
if(first_use) {
if (first_use) {
psys->particles.clear();
psys->tag_update(scene);
}
@@ -80,7 +80,7 @@ bool BlenderSync::sync_dupli_particle(BL::Object& b_ob,
psys->particles.push_back_slow(pa);
if(object->particle_index != psys->particles.size() - 1)
if (object->particle_index != psys->particles.size() - 1)
scene->object_manager->tag_update(scene);
object->particle_system = psys;
object->particle_index = psys->particles.size() - 1;

View File

@@ -35,14 +35,14 @@
#include "util/util_types.h"
#ifdef WITH_OSL
#include "render/osl.h"
# include "render/osl.h"
#include <OSL/oslquery.h>
#include <OSL/oslconfig.h>
# include <OSL/oslquery.h>
# include <OSL/oslconfig.h>
#endif
#ifdef WITH_OPENCL
#include "device/device_intern.h"
# include "device/device_intern.h"
#endif
CCL_NAMESPACE_BEGIN
@@ -54,7 +54,7 @@ bool debug_flags_set = false;
void *pylong_as_voidptr_typesafe(PyObject *object)
{
if(object == Py_None)
if (object == Py_None)
return NULL;
return PyLong_AsVoidPtr(object);
}
@@ -82,7 +82,7 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
flags.cuda.adaptive_compile = get_boolean(cscene, "debug_use_cuda_adaptive_compile");
flags.cuda.split_kernel = get_boolean(cscene, "debug_use_cuda_split_kernel");
/* Synchronize OpenCL device type. */
switch(get_enum(cscene, "debug_opencl_device_type")) {
switch (get_enum(cscene, "debug_opencl_device_type")) {
case 0:
flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_NONE;
break;
@@ -104,7 +104,7 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene)
}
/* Synchronize other OpenCL flags. */
flags.opencl.debug = get_boolean(cscene, "debug_use_opencl_debug");
flags.opencl.mem_limit = ((size_t)get_int(cscene, "debug_opencl_mem_limit"))*1024*1024;
flags.opencl.mem_limit = ((size_t)get_int(cscene, "debug_opencl_mem_limit")) * 1024 * 1024;
return flags.opencl.device_type != opencl_device_type;
}
@@ -124,19 +124,19 @@ bool debug_flags_reset()
void python_thread_state_save(void **python_thread_state)
{
*python_thread_state = (void*)PyEval_SaveThread();
*python_thread_state = (void *)PyEval_SaveThread();
}
void python_thread_state_restore(void **python_thread_state)
{
PyEval_RestoreThread((PyThreadState*)*python_thread_state);
PyEval_RestoreThread((PyThreadState *)*python_thread_state);
*python_thread_state = NULL;
}
static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)
{
const char *result = _PyUnicode_AsString(py_str);
if(result) {
if (result) {
/* 99% of the time this is enough but we better support non unicode
* chars since blender doesnt limit this.
*/
@@ -144,10 +144,10 @@ static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)
}
else {
PyErr_Clear();
if(PyBytes_Check(py_str)) {
if (PyBytes_Check(py_str)) {
return PyBytes_AS_STRING(py_str);
}
else if((*coerce = PyUnicode_EncodeFSDefault(py_str))) {
else if ((*coerce = PyUnicode_EncodeFSDefault(py_str))) {
return PyBytes_AS_STRING(*coerce);
}
else {
@@ -165,7 +165,7 @@ static PyObject *init_func(PyObject * /*self*/, PyObject *args)
PyObject *path, *user_path;
int headless;
if(!PyArg_ParseTuple(args, "OOi", &path, &user_path, &headless)) {
if (!PyArg_ParseTuple(args, "OOi", &path, &user_path, &headless)) {
return NULL;
}
@@ -177,13 +177,11 @@ static PyObject *init_func(PyObject * /*self*/, PyObject *args)
BlenderSession::headless = headless;
VLOG(2) << "Debug flags initialized to:\n"
<< DebugFlags();
VLOG(2) << "Debug flags initialized to:\n" << DebugFlags();
Py_RETURN_NONE;
}
static PyObject *exit_func(PyObject * /*self*/, PyObject * /*args*/)
{
ShaderManager::free_memory();
@@ -197,23 +195,30 @@ static PyObject *create_func(PyObject * /*self*/, PyObject *args)
PyObject *pyengine, *pypreferences, *pydata, *pyregion, *pyv3d, *pyrv3d;
int preview_osl;
if(!PyArg_ParseTuple(args, "OOOOOOi", &pyengine, &pypreferences, &pydata,
&pyregion, &pyv3d, &pyrv3d, &preview_osl))
{
if (!PyArg_ParseTuple(args,
"OOOOOOi",
&pyengine,
&pypreferences,
&pydata,
&pyregion,
&pyv3d,
&pyrv3d,
&preview_osl)) {
return NULL;
}
/* RNA */
PointerRNA engineptr;
RNA_pointer_create(NULL, &RNA_RenderEngine, (void*)PyLong_AsVoidPtr(pyengine), &engineptr);
RNA_pointer_create(NULL, &RNA_RenderEngine, (void *)PyLong_AsVoidPtr(pyengine), &engineptr);
BL::RenderEngine engine(engineptr);
PointerRNA preferencesptr;
RNA_pointer_create(NULL, &RNA_Preferences, (void*)PyLong_AsVoidPtr(pypreferences), &preferencesptr);
RNA_pointer_create(
NULL, &RNA_Preferences, (void *)PyLong_AsVoidPtr(pypreferences), &preferencesptr);
BL::Preferences preferences(preferencesptr);
PointerRNA dataptr;
RNA_main_pointer_create((Main*)PyLong_AsVoidPtr(pydata), &dataptr);
RNA_main_pointer_create((Main *)PyLong_AsVoidPtr(pydata), &dataptr);
BL::BlendData data(dataptr);
PointerRNA regionptr;
@@ -231,7 +236,7 @@ static PyObject *create_func(PyObject * /*self*/, PyObject *args)
/* create session */
BlenderSession *session;
if(rv3d) {
if (rv3d) {
/* interactive viewport session */
int width = region.width();
int height = region.height();
@@ -248,7 +253,7 @@ static PyObject *create_func(PyObject * /*self*/, PyObject *args)
static PyObject *free_func(PyObject * /*self*/, PyObject *value)
{
delete (BlenderSession*)PyLong_AsVoidPtr(value);
delete (BlenderSession *)PyLong_AsVoidPtr(value);
Py_RETURN_NONE;
}
@@ -257,13 +262,13 @@ static PyObject *render_func(PyObject * /*self*/, PyObject *args)
{
PyObject *pysession, *pydepsgraph;
if(!PyArg_ParseTuple(args, "OO", &pysession, &pydepsgraph))
if (!PyArg_ParseTuple(args, "OO", &pysession, &pydepsgraph))
return NULL;
BlenderSession *session = (BlenderSession*)PyLong_AsVoidPtr(pysession);
BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
PointerRNA depsgraphptr;
RNA_pointer_create(NULL, &RNA_Depsgraph, (ID*)PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
RNA_pointer_create(NULL, &RNA_Depsgraph, (ID *)PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
BL::Depsgraph b_depsgraph(depsgraphptr);
python_thread_state_save(&session->python_thread_state);
@@ -283,17 +288,28 @@ static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
const char *pass_type;
int num_pixels, depth, object_id, pass_filter;
if(!PyArg_ParseTuple(args, "OOOsiiOiiO", &pysession, &pydepsgraph, &pyobject, &pass_type, &pass_filter, &object_id, &pypixel_array, &num_pixels, &depth, &pyresult))
if (!PyArg_ParseTuple(args,
"OOOsiiOiiO",
&pysession,
&pydepsgraph,
&pyobject,
&pass_type,
&pass_filter,
&object_id,
&pypixel_array,
&num_pixels,
&depth,
&pyresult))
return NULL;
BlenderSession *session = (BlenderSession*)PyLong_AsVoidPtr(pysession);
BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
PointerRNA depsgraphptr;
RNA_pointer_create(NULL, &RNA_Depsgraph, PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
BL::Depsgraph b_depsgraph(depsgraphptr);
PointerRNA objectptr;
RNA_id_pointer_create((ID*)PyLong_AsVoidPtr(pyobject), &objectptr);
RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyobject), &objectptr);
BL::Object b_object(objectptr);
void *b_result = PyLong_AsVoidPtr(pyresult);
@@ -304,7 +320,15 @@ static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
python_thread_state_save(&session->python_thread_state);
session->bake(b_depsgraph, b_object, pass_type, pass_filter, object_id, b_bake_pixel, (size_t)num_pixels, depth, (float *)b_result);
session->bake(b_depsgraph,
b_object,
pass_type,
pass_filter,
object_id,
b_bake_pixel,
(size_t)num_pixels,
depth,
(float *)b_result);
python_thread_state_restore(&session->python_thread_state);
@@ -315,12 +339,12 @@ static PyObject *draw_func(PyObject * /*self*/, PyObject *args)
{
PyObject *pysession, *pygraph, *pyv3d, *pyrv3d;
if(!PyArg_ParseTuple(args, "OOOO", &pysession, &pygraph, &pyv3d, &pyrv3d))
if (!PyArg_ParseTuple(args, "OOOO", &pysession, &pygraph, &pyv3d, &pyrv3d))
return NULL;
BlenderSession *session = (BlenderSession*)PyLong_AsVoidPtr(pysession);
BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
if(PyLong_AsVoidPtr(pyrv3d)) {
if (PyLong_AsVoidPtr(pyrv3d)) {
/* 3d view drawing */
int viewport[4];
glGetIntegerv(GL_VIEWPORT, viewport);
@@ -335,13 +359,13 @@ static PyObject *reset_func(PyObject * /*self*/, PyObject *args)
{
PyObject *pysession, *pydata, *pydepsgraph;
if(!PyArg_ParseTuple(args, "OOO", &pysession, &pydata, &pydepsgraph))
if (!PyArg_ParseTuple(args, "OOO", &pysession, &pydata, &pydepsgraph))
return NULL;
BlenderSession *session = (BlenderSession*)PyLong_AsVoidPtr(pysession);
BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
PointerRNA dataptr;
RNA_main_pointer_create((Main*)PyLong_AsVoidPtr(pydata), &dataptr);
RNA_main_pointer_create((Main *)PyLong_AsVoidPtr(pydata), &dataptr);
BL::BlendData b_data(dataptr);
PointerRNA depsgraphptr;
@@ -361,10 +385,10 @@ static PyObject *sync_func(PyObject * /*self*/, PyObject *args)
{
PyObject *pysession, *pydepsgraph;
if(!PyArg_ParseTuple(args, "OO", &pysession, &pydepsgraph))
if (!PyArg_ParseTuple(args, "OO", &pysession, &pydepsgraph))
return NULL;
BlenderSession *session = (BlenderSession*)PyLong_AsVoidPtr(pysession);
BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
PointerRNA depsgraphptr;
RNA_pointer_create(NULL, &RNA_Depsgraph, PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
@@ -379,10 +403,10 @@ static PyObject *sync_func(PyObject * /*self*/, PyObject *args)
Py_RETURN_NONE;
}
static PyObject *available_devices_func(PyObject * /*self*/, PyObject * args)
static PyObject *available_devices_func(PyObject * /*self*/, PyObject *args)
{
const char *type_name;
if(!PyArg_ParseTuple(args, "s", &type_name)) {
if (!PyArg_ParseTuple(args, "s", &type_name)) {
return NULL;
}
@@ -393,8 +417,8 @@ static PyObject *available_devices_func(PyObject * /*self*/, PyObject * args)
vector<DeviceInfo> devices = Device::available_devices(mask);
PyObject *ret = PyTuple_New(devices.size());
for(size_t i = 0; i < devices.size(); i++) {
DeviceInfo& device = devices[i];
for (size_t i = 0; i < devices.size(); i++) {
DeviceInfo &device = devices[i];
string type_name = Device::string_from_type(device.type);
PyObject *device_tuple = PyTuple_New(3);
PyTuple_SET_ITEM(device_tuple, 0, PyUnicode_FromString(device.description.c_str()));
@@ -413,24 +437,27 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
PyObject *pydata, *pynodegroup, *pynode;
const char *filepath = NULL;
if(!PyArg_ParseTuple(args, "OOOs", &pydata, &pynodegroup, &pynode, &filepath))
if (!PyArg_ParseTuple(args, "OOOs", &pydata, &pynodegroup, &pynode, &filepath))
return NULL;
/* RNA */
PointerRNA dataptr;
RNA_main_pointer_create((Main*)PyLong_AsVoidPtr(pydata), &dataptr);
RNA_main_pointer_create((Main *)PyLong_AsVoidPtr(pydata), &dataptr);
BL::BlendData b_data(dataptr);
PointerRNA nodeptr;
RNA_pointer_create((ID*)PyLong_AsVoidPtr(pynodegroup), &RNA_ShaderNodeScript, (void*)PyLong_AsVoidPtr(pynode), &nodeptr);
RNA_pointer_create((ID *)PyLong_AsVoidPtr(pynodegroup),
&RNA_ShaderNodeScript,
(void *)PyLong_AsVoidPtr(pynode),
&nodeptr);
BL::ShaderNodeScript b_node(nodeptr);
/* update bytecode hash */
string bytecode = b_node.bytecode();
if(!bytecode.empty()) {
if (!bytecode.empty()) {
MD5Hash md5;
md5.append((const uint8_t*)bytecode.c_str(), bytecode.size());
md5.append((const uint8_t *)bytecode.c_str(), bytecode.size());
b_node.bytecode_hash(md5.get_hex().c_str());
}
else
@@ -439,17 +466,17 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
/* query from file path */
OSL::OSLQuery query;
if(!OSLShaderManager::osl_query(query, filepath))
if (!OSLShaderManager::osl_query(query, filepath))
Py_RETURN_FALSE;
/* add new sockets from parameters */
set<void*> used_sockets;
set<void *> used_sockets;
for(int i = 0; i < query.nparams(); i++) {
for (int i = 0; i < query.nparams(); i++) {
const OSL::OSLQuery::Parameter *param = query.getparam(i);
/* skip unsupported types */
if(param->varlenarray || param->isstruct || param->type.arraylen > 1)
if (param->varlenarray || param->isstruct || param->type.arraylen > 1)
continue;
/* determine socket type */
@@ -460,50 +487,49 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
int default_int = 0;
string default_string = "";
if(param->isclosure) {
if (param->isclosure) {
socket_type = "NodeSocketShader";
data_type = BL::NodeSocket::type_SHADER;
}
else if(param->type.vecsemantics == TypeDesc::COLOR) {
else if (param->type.vecsemantics == TypeDesc::COLOR) {
socket_type = "NodeSocketColor";
data_type = BL::NodeSocket::type_RGBA;
if(param->validdefault) {
if (param->validdefault) {
default_float4[0] = param->fdefault[0];
default_float4[1] = param->fdefault[1];
default_float4[2] = param->fdefault[2];
}
}
else if(param->type.vecsemantics == TypeDesc::POINT ||
else if (param->type.vecsemantics == TypeDesc::POINT ||
param->type.vecsemantics == TypeDesc::VECTOR ||
param->type.vecsemantics == TypeDesc::NORMAL)
{
param->type.vecsemantics == TypeDesc::NORMAL) {
socket_type = "NodeSocketVector";
data_type = BL::NodeSocket::type_VECTOR;
if(param->validdefault) {
if (param->validdefault) {
default_float4[0] = param->fdefault[0];
default_float4[1] = param->fdefault[1];
default_float4[2] = param->fdefault[2];
}
}
else if(param->type.aggregate == TypeDesc::SCALAR) {
if(param->type.basetype == TypeDesc::INT) {
else if (param->type.aggregate == TypeDesc::SCALAR) {
if (param->type.basetype == TypeDesc::INT) {
socket_type = "NodeSocketInt";
data_type = BL::NodeSocket::type_INT;
if(param->validdefault)
if (param->validdefault)
default_int = param->idefault[0];
}
else if(param->type.basetype == TypeDesc::FLOAT) {
else if (param->type.basetype == TypeDesc::FLOAT) {
socket_type = "NodeSocketFloat";
data_type = BL::NodeSocket::type_VALUE;
if(param->validdefault)
if (param->validdefault)
default_float = param->fdefault[0];
}
else if(param->type.basetype == TypeDesc::STRING) {
else if (param->type.basetype == TypeDesc::STRING) {
socket_type = "NodeSocketString";
data_type = BL::NodeSocket::type_STRING;
if(param->validdefault)
if (param->validdefault)
default_string = param->sdefault[0].string();
}
else
@@ -514,10 +540,10 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
/* find socket socket */
BL::NodeSocket b_sock(PointerRNA_NULL);
if(param->isoutput) {
if (param->isoutput) {
b_sock = b_node.outputs[param->name.string()];
/* remove if type no longer matches */
if(b_sock && b_sock.bl_idname() != socket_type) {
if (b_sock && b_sock.bl_idname() != socket_type) {
b_node.outputs.remove(b_data, b_sock);
b_sock = BL::NodeSocket(PointerRNA_NULL);
}
@@ -525,33 +551,35 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
else {
b_sock = b_node.inputs[param->name.string()];
/* remove if type no longer matches */
if(b_sock && b_sock.bl_idname() != socket_type) {
if (b_sock && b_sock.bl_idname() != socket_type) {
b_node.inputs.remove(b_data, b_sock);
b_sock = BL::NodeSocket(PointerRNA_NULL);
}
}
if(!b_sock) {
if (!b_sock) {
/* create new socket */
if(param->isoutput)
b_sock = b_node.outputs.create(b_data, socket_type.c_str(), param->name.c_str(), param->name.c_str());
if (param->isoutput)
b_sock = b_node.outputs.create(
b_data, socket_type.c_str(), param->name.c_str(), param->name.c_str());
else
b_sock = b_node.inputs.create(b_data, socket_type.c_str(), param->name.c_str(), param->name.c_str());
b_sock = b_node.inputs.create(
b_data, socket_type.c_str(), param->name.c_str(), param->name.c_str());
/* set default value */
if(data_type == BL::NodeSocket::type_VALUE) {
if (data_type == BL::NodeSocket::type_VALUE) {
set_float(b_sock.ptr, "default_value", default_float);
}
else if(data_type == BL::NodeSocket::type_INT) {
else if (data_type == BL::NodeSocket::type_INT) {
set_int(b_sock.ptr, "default_value", default_int);
}
else if(data_type == BL::NodeSocket::type_RGBA) {
else if (data_type == BL::NodeSocket::type_RGBA) {
set_float4(b_sock.ptr, "default_value", default_float4);
}
else if(data_type == BL::NodeSocket::type_VECTOR) {
else if (data_type == BL::NodeSocket::type_VECTOR) {
set_float3(b_sock.ptr, "default_value", float4_to_float3(default_float4));
}
else if(data_type == BL::NodeSocket::type_STRING) {
else if (data_type == BL::NodeSocket::type_STRING) {
set_string(b_sock.ptr, "default_value", default_string);
}
}
@@ -568,22 +596,22 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
removed = false;
for(b_node.inputs.begin(b_input); b_input != b_node.inputs.end(); ++b_input) {
if(used_sockets.find(b_input->ptr.data) == used_sockets.end()) {
for (b_node.inputs.begin(b_input); b_input != b_node.inputs.end(); ++b_input) {
if (used_sockets.find(b_input->ptr.data) == used_sockets.end()) {
b_node.inputs.remove(b_data, *b_input);
removed = true;
break;
}
}
for(b_node.outputs.begin(b_output); b_output != b_node.outputs.end(); ++b_output) {
if(used_sockets.find(b_output->ptr.data) == used_sockets.end()) {
for (b_node.outputs.begin(b_output); b_output != b_node.outputs.end(); ++b_output) {
if (used_sockets.find(b_output->ptr.data) == used_sockets.end()) {
b_node.outputs.remove(b_data, *b_output);
removed = true;
break;
}
}
} while(removed);
} while (removed);
Py_RETURN_TRUE;
}
@@ -592,11 +620,11 @@ static PyObject *osl_compile_func(PyObject * /*self*/, PyObject *args)
{
const char *inputfile = NULL, *outputfile = NULL;
if(!PyArg_ParseTuple(args, "ss", &inputfile, &outputfile))
if (!PyArg_ParseTuple(args, "ss", &inputfile, &outputfile))
return NULL;
/* return */
if(!OSLShaderManager::osl_compile(inputfile, outputfile))
if (!OSLShaderManager::osl_compile(inputfile, outputfile))
Py_RETURN_FALSE;
Py_RETURN_TRUE;
@@ -620,12 +648,12 @@ static PyObject *opencl_disable_func(PyObject * /*self*/, PyObject * /*value*/)
static PyObject *opencl_compile_func(PyObject * /*self*/, PyObject *args)
{
PyObject *sequence = PySequence_Fast(args, "Arguments must be a sequence");
if(sequence == NULL) {
if (sequence == NULL) {
Py_RETURN_FALSE;
}
vector<string> parameters;
for(Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(sequence); i++) {
for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(sequence); i++) {
PyObject *item = PySequence_Fast_GET_ITEM(sequence, i);
PyObject *item_as_string = PyObject_Str(item);
const char *parameter_string = PyUnicode_AsUTF8(item_as_string);
@@ -643,23 +671,24 @@ static PyObject *opencl_compile_func(PyObject * /*self*/, PyObject *args)
}
#endif
static bool image_parse_filepaths(PyObject *pyfilepaths, vector<string>& filepaths)
static bool image_parse_filepaths(PyObject *pyfilepaths, vector<string> &filepaths)
{
if(PyUnicode_Check(pyfilepaths)) {
if (PyUnicode_Check(pyfilepaths)) {
const char *filepath = PyUnicode_AsUTF8(pyfilepaths);
filepaths.push_back(filepath);
return true;
}
PyObject *sequence = PySequence_Fast(pyfilepaths, "File paths must be a string or sequence of strings");
if(sequence == NULL) {
PyObject *sequence = PySequence_Fast(pyfilepaths,
"File paths must be a string or sequence of strings");
if (sequence == NULL) {
return false;
}
for(Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(sequence); i++) {
for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(sequence); i++) {
PyObject *item = PySequence_Fast_GET_ITEM(sequence, i);
const char *filepath = PyUnicode_AsUTF8(item);
if(filepath == NULL) {
if (filepath == NULL) {
PyErr_SetString(PyExc_ValueError, "File paths must be a string or sequence of strings.");
Py_DECREF(sequence);
return false;
@@ -673,34 +702,44 @@ static bool image_parse_filepaths(PyObject *pyfilepaths, vector<string>& filepat
static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *keywords)
{
static const char *keyword_list[] = {"preferences", "scene", "view_layer",
"input", "output",
"tile_size", "samples", NULL};
static const char *keyword_list[] = {
"preferences", "scene", "view_layer", "input", "output", "tile_size", "samples", NULL};
PyObject *pypreferences, *pyscene, *pyviewlayer;
PyObject *pyinput, *pyoutput = NULL;
int tile_size = 0, samples = 0;
if (!PyArg_ParseTupleAndKeywords(args, keywords, "OOOO|Oii", (char**)keyword_list,
&pypreferences, &pyscene, &pyviewlayer,
&pyinput, &pyoutput,
&tile_size, &samples)) {
if (!PyArg_ParseTupleAndKeywords(args,
keywords,
"OOOO|Oii",
(char **)keyword_list,
&pypreferences,
&pyscene,
&pyviewlayer,
&pyinput,
&pyoutput,
&tile_size,
&samples)) {
return NULL;
}
/* Get device specification from preferences and scene. */
PointerRNA preferencesptr;
RNA_pointer_create(NULL, &RNA_Preferences, (void*)PyLong_AsVoidPtr(pypreferences), &preferencesptr);
RNA_pointer_create(
NULL, &RNA_Preferences, (void *)PyLong_AsVoidPtr(pypreferences), &preferencesptr);
BL::Preferences b_preferences(preferencesptr);
PointerRNA sceneptr;
RNA_id_pointer_create((ID*)PyLong_AsVoidPtr(pyscene), &sceneptr);
RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyscene), &sceneptr);
BL::Scene b_scene(sceneptr);
DeviceInfo device = blender_device_info(b_preferences, b_scene, true);
/* Get denoising parameters from view layer. */
PointerRNA viewlayerptr;
RNA_pointer_create((ID*)PyLong_AsVoidPtr(pyscene), &RNA_ViewLayer, PyLong_AsVoidPtr(pyviewlayer), &viewlayerptr);
RNA_pointer_create((ID *)PyLong_AsVoidPtr(pyscene),
&RNA_ViewLayer,
PyLong_AsVoidPtr(pyviewlayer),
&viewlayerptr);
PointerRNA cviewlayer = RNA_pointer_get(&viewlayerptr, "cycles");
DenoiseParams params;
@@ -713,12 +752,12 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
/* Parse file paths list. */
vector<string> input, output;
if(!image_parse_filepaths(pyinput, input)) {
if (!image_parse_filepaths(pyinput, input)) {
return NULL;
}
if(pyoutput) {
if(!image_parse_filepaths(pyoutput, output)) {
if (pyoutput) {
if (!image_parse_filepaths(pyoutput, output)) {
return NULL;
}
}
@@ -726,11 +765,11 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
output = input;
}
if(input.empty()) {
if (input.empty()) {
PyErr_SetString(PyExc_ValueError, "No input file paths specified.");
return NULL;
}
if(input.size() != output.size()) {
if (input.size() != output.size()) {
PyErr_SetString(PyExc_ValueError, "Number of input and output file paths does not match.");
return NULL;
}
@@ -749,7 +788,7 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
}
/* Run denoiser. */
if(!denoiser.run()) {
if (!denoiser.run()) {
PyErr_SetString(PyExc_ValueError, denoiser.error.c_str());
return NULL;
}
@@ -762,18 +801,19 @@ static PyObject *merge_func(PyObject * /*self*/, PyObject *args, PyObject *keywo
static const char *keyword_list[] = {"input", "output", NULL};
PyObject *pyinput, *pyoutput = NULL;
if (!PyArg_ParseTupleAndKeywords(args, keywords, "OO", (char**)keyword_list, &pyinput, &pyoutput)) {
if (!PyArg_ParseTupleAndKeywords(
args, keywords, "OO", (char **)keyword_list, &pyinput, &pyoutput)) {
return NULL;
}
/* Parse input list. */
vector<string> input;
if(!image_parse_filepaths(pyinput, input)) {
if (!image_parse_filepaths(pyinput, input)) {
return NULL;
}
/* Parse output string. */
if(!PyUnicode_Check(pyoutput)) {
if (!PyUnicode_Check(pyoutput)) {
PyErr_SetString(PyExc_ValueError, "Output must be a string.");
return NULL;
}
@@ -784,7 +824,7 @@ static PyObject *merge_func(PyObject * /*self*/, PyObject *args, PyObject *keywo
merger.input = input;
merger.output = output;
if(!merger.run()) {
if (!merger.run()) {
PyErr_SetString(PyExc_ValueError, merger.error.c_str());
return NULL;
}
@@ -792,25 +832,23 @@ static PyObject *merge_func(PyObject * /*self*/, PyObject *args, PyObject *keywo
Py_RETURN_NONE;
}
static PyObject *debug_flags_update_func(PyObject * /*self*/, PyObject *args)
{
PyObject *pyscene;
if(!PyArg_ParseTuple(args, "O", &pyscene)) {
if (!PyArg_ParseTuple(args, "O", &pyscene)) {
return NULL;
}
PointerRNA sceneptr;
RNA_id_pointer_create((ID*)PyLong_AsVoidPtr(pyscene), &sceneptr);
RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyscene), &sceneptr);
BL::Scene b_scene(sceneptr);
if(debug_flags_sync_from_scene(b_scene)) {
if (debug_flags_sync_from_scene(b_scene)) {
VLOG(2) << "Tagging device list for update.";
Device::tag_update();
}
VLOG(2) << "Debug flags set to:\n"
<< DebugFlags();
VLOG(2) << "Debug flags set to:\n" << DebugFlags();
debug_flags_set = true;
@@ -819,13 +857,12 @@ static PyObject *debug_flags_update_func(PyObject * /*self*/, PyObject *args)
static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/)
{
if(debug_flags_reset()) {
if (debug_flags_reset()) {
VLOG(2) << "Tagging device list for update.";
Device::tag_update();
}
if(debug_flags_set) {
VLOG(2) << "Debug flags reset to:\n"
<< DebugFlags();
if (debug_flags_set) {
VLOG(2) << "Debug flags reset to:\n" << DebugFlags();
debug_flags_set = false;
}
Py_RETURN_NONE;
@@ -834,20 +871,16 @@ static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/
static PyObject *set_resumable_chunk_func(PyObject * /*self*/, PyObject *args)
{
int num_resumable_chunks, current_resumable_chunk;
if(!PyArg_ParseTuple(args, "ii",
&num_resumable_chunks,
&current_resumable_chunk)) {
if (!PyArg_ParseTuple(args, "ii", &num_resumable_chunks, &current_resumable_chunk)) {
Py_RETURN_NONE;
}
if(num_resumable_chunks <= 0) {
if (num_resumable_chunks <= 0) {
fprintf(stderr, "Cycles: Bad value for number of resumable chunks.\n");
abort();
Py_RETURN_NONE;
}
if(current_resumable_chunk < 1 ||
current_resumable_chunk > num_resumable_chunks)
{
if (current_resumable_chunk < 1 || current_resumable_chunk > num_resumable_chunks) {
fprintf(stderr, "Cycles: Bad value for current resumable chunk number.\n");
abort();
Py_RETURN_NONE;
@@ -859,9 +892,7 @@ static PyObject *set_resumable_chunk_func(PyObject * /*self*/, PyObject *args)
BlenderSession::num_resumable_chunks = num_resumable_chunks;
BlenderSession::current_resumable_chunk = current_resumable_chunk;
printf("Cycles: Will render chunk %d of %d\n",
current_resumable_chunk,
num_resumable_chunks);
printf("Cycles: Will render chunk %d of %d\n", current_resumable_chunk, num_resumable_chunks);
Py_RETURN_NONE;
}
@@ -869,29 +900,26 @@ static PyObject *set_resumable_chunk_func(PyObject * /*self*/, PyObject *args)
static PyObject *set_resumable_chunk_range_func(PyObject * /*self*/, PyObject *args)
{
int num_chunks, start_chunk, end_chunk;
if(!PyArg_ParseTuple(args, "iii",
&num_chunks,
&start_chunk,
&end_chunk)) {
if (!PyArg_ParseTuple(args, "iii", &num_chunks, &start_chunk, &end_chunk)) {
Py_RETURN_NONE;
}
if(num_chunks <= 0) {
if (num_chunks <= 0) {
fprintf(stderr, "Cycles: Bad value for number of resumable chunks.\n");
abort();
Py_RETURN_NONE;
}
if(start_chunk < 1 || start_chunk > num_chunks) {
if (start_chunk < 1 || start_chunk > num_chunks) {
fprintf(stderr, "Cycles: Bad value for start chunk number.\n");
abort();
Py_RETURN_NONE;
}
if(end_chunk < 1 || end_chunk > num_chunks) {
if (end_chunk < 1 || end_chunk > num_chunks) {
fprintf(stderr, "Cycles: Bad value for start chunk number.\n");
abort();
Py_RETURN_NONE;
}
if(start_chunk > end_chunk) {
if (start_chunk > end_chunk) {
fprintf(stderr, "Cycles: End chunk should be higher than start one.\n");
abort();
Py_RETURN_NONE;
@@ -899,16 +927,12 @@ static PyObject *set_resumable_chunk_range_func(PyObject * /*self*/, PyObject *a
VLOG(1) << "Initialized resumable render: "
<< "num_resumable_chunks=" << num_chunks << ", "
<< "start_resumable_chunk=" << start_chunk
<< "end_resumable_chunk=" << end_chunk;
<< "start_resumable_chunk=" << start_chunk << "end_resumable_chunk=" << end_chunk;
BlenderSession::num_resumable_chunks = num_chunks;
BlenderSession::start_resumable_chunk = start_chunk;
BlenderSession::end_resumable_chunk = end_chunk;
printf("Cycles: Will render chunks %d to %d of %d\n",
start_chunk,
end_chunk,
num_chunks);
printf("Cycles: Will render chunks %d to %d of %d\n", start_chunk, end_chunk, num_chunks);
Py_RETURN_NONE;
}
@@ -923,7 +947,7 @@ static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
{
vector<DeviceType> device_types = Device::available_types();
bool has_cuda = false, has_opencl = false;
foreach(DeviceType device_type, device_types) {
foreach (DeviceType device_type, device_types) {
has_cuda |= (device_type == DEVICE_CUDA);
has_opencl |= (device_type == DEVICE_OPENCL);
}
@@ -955,8 +979,8 @@ static PyMethodDef methods[] = {
#endif
/* Standalone denoising */
{"denoise", (PyCFunction)denoise_func, METH_VARARGS|METH_KEYWORDS, ""},
{"merge", (PyCFunction)merge_func, METH_VARARGS|METH_KEYWORDS, ""},
{"denoise", (PyCFunction)denoise_func, METH_VARARGS | METH_KEYWORDS, ""},
{"merge", (PyCFunction)merge_func, METH_VARARGS | METH_KEYWORDS, ""},
/* Debugging routines */
{"debug_flags_update", debug_flags_update_func, METH_VARARGS, ""},
@@ -981,7 +1005,10 @@ static struct PyModuleDef module = {
"Blender cycles render integration",
-1,
methods,
NULL, NULL, NULL, NULL,
NULL,
NULL,
NULL,
NULL,
};
CCL_NAMESPACE_END
@@ -999,12 +1026,15 @@ void *CCL_python_module_init()
int curversion = OSL_LIBRARY_VERSION_CODE;
PyModule_AddObject(mod, "with_osl", Py_True);
Py_INCREF(Py_True);
PyModule_AddObject(mod, "osl_version",
Py_BuildValue("(iii)",
curversion / 10000, (curversion / 100) % 100, curversion % 100));
PyModule_AddObject(mod, "osl_version_string",
PyUnicode_FromFormat("%2d, %2d, %2d",
curversion / 10000, (curversion / 100) % 100, curversion % 100));
PyModule_AddObject(
mod,
"osl_version",
Py_BuildValue("(iii)", curversion / 10000, (curversion / 100) % 100, curversion % 100));
PyModule_AddObject(
mod,
"osl_version_string",
PyUnicode_FromFormat(
"%2d, %2d, %2d", curversion / 10000, (curversion / 100) % 100, curversion % 100));
#else
PyModule_AddObject(mod, "with_osl", Py_False);
Py_INCREF(Py_False);
@@ -1036,5 +1066,5 @@ void *CCL_python_module_init()
Py_INCREF(Py_False);
#endif /* WITH_EMBREE */
return (void*)mod;
return (void *)mod;
}

File diff suppressed because it is too large Load Diff

View File

@@ -33,18 +33,19 @@ class RenderBuffers;
class RenderTile;
class BlenderSession {
public:
BlenderSession(BL::RenderEngine& b_engine,
BL::Preferences& b_userpref,
BL::BlendData& b_data,
public:
BlenderSession(BL::RenderEngine &b_engine,
BL::Preferences &b_userpref,
BL::BlendData &b_data,
bool preview_osl);
BlenderSession(BL::RenderEngine& b_engine,
BL::Preferences& b_userpref,
BL::BlendData& b_data,
BL::SpaceView3D& b_v3d,
BL::RegionView3D& b_rv3d,
int width, int height);
BlenderSession(BL::RenderEngine &b_engine,
BL::Preferences &b_userpref,
BL::BlendData &b_data,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
int width,
int height);
~BlenderSession();
@@ -54,44 +55,39 @@ public:
void create_session();
void free_session();
void reset_session(BL::BlendData& b_data,
BL::Depsgraph& b_depsgraph);
void reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsgraph);
/* offline render */
void render(BL::Depsgraph& b_depsgraph);
void render(BL::Depsgraph &b_depsgraph);
void bake(BL::Depsgraph& b_depsgrah,
BL::Object& b_object,
const string& pass_type,
void bake(BL::Depsgraph &b_depsgrah,
BL::Object &b_object,
const string &pass_type,
const int custom_flag,
const int object_id,
BL::BakePixel& pixel_array,
BL::BakePixel &pixel_array,
const size_t num_pixels,
const int depth,
float pixels[]);
void write_render_result(BL::RenderResult& b_rr,
BL::RenderLayer& b_rlay,
RenderTile& rtile);
void write_render_tile(RenderTile& rtile);
void write_render_result(BL::RenderResult &b_rr, BL::RenderLayer &b_rlay, RenderTile &rtile);
void write_render_tile(RenderTile &rtile);
/* update functions are used to update display buffer only after sample was rendered
* only needed for better visual feedback */
void update_render_result(BL::RenderResult& b_rr,
BL::RenderLayer& b_rlay,
RenderTile& rtile);
void update_render_tile(RenderTile& rtile, bool highlight);
void update_render_result(BL::RenderResult &b_rr, BL::RenderLayer &b_rlay, RenderTile &rtile);
void update_render_tile(RenderTile &rtile, bool highlight);
/* interactive updates */
void synchronize(BL::Depsgraph& b_depsgraph);
void synchronize(BL::Depsgraph &b_depsgraph);
/* drawing */
bool draw(int w, int h);
void tag_redraw();
void tag_update();
void get_status(string& status, string& substatus);
void get_kernel_status(string& kernel_status);
void get_progress(float& progress, double& total_time, double& render_time);
void get_status(string &status, string &substatus);
void get_kernel_status(string &kernel_status);
void get_progress(float &progress, double &total_time, double &render_time);
void test_cancel();
void update_status_progress();
void update_bake_progress();
@@ -151,19 +147,17 @@ public:
static bool print_render_stats;
protected:
void stamp_view_layer_metadata(Scene *scene, const string& view_layer_name);
protected:
void stamp_view_layer_metadata(Scene *scene, const string &view_layer_name);
void do_write_update_render_result(BL::RenderResult& b_rr,
BL::RenderLayer& b_rlay,
RenderTile& rtile,
void do_write_update_render_result(BL::RenderResult &b_rr,
BL::RenderLayer &b_rlay,
RenderTile &rtile,
bool do_update_only);
void do_write_update_render_tile(RenderTile& rtile, bool do_update_only, bool highlight);
void do_write_update_render_tile(RenderTile &rtile, bool do_update_only, bool highlight);
int builtin_image_frame(const string &builtin_name);
void builtin_image_info(const string &builtin_name,
void *builtin_data,
ImageMetaData& metadata);
void builtin_image_info(const string &builtin_name, void *builtin_data, ImageMetaData &metadata);
bool builtin_image_pixels(const string &builtin_name,
void *builtin_data,
unsigned char *pixels,

File diff suppressed because it is too large Load Diff

View File

@@ -45,13 +45,13 @@ static const char *cryptomatte_prefix = "Crypto";
/* Constructor */
BlenderSync::BlenderSync(BL::RenderEngine& b_engine,
BL::BlendData& b_data,
BL::Scene& b_scene,
BlenderSync::BlenderSync(BL::RenderEngine &b_engine,
BL::BlendData &b_data,
BL::Scene &b_scene,
Scene *scene,
bool preview,
Progress &progress)
: b_engine(b_engine),
: b_engine(b_engine),
b_data(b_data),
b_scene(b_scene),
shader_map(&scene->shaders),
@@ -69,7 +69,8 @@ BlenderSync::BlenderSync(BL::RenderEngine& b_engine,
progress(progress)
{
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") : RNA_float_get(&cscene, "dicing_rate");
dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") :
RNA_float_get(&cscene, "dicing_rate");
max_subdivisions = RNA_int_get(&cscene, "max_subdivisions");
}
@@ -79,37 +80,37 @@ BlenderSync::~BlenderSync()
/* Sync */
void BlenderSync::sync_recalc(BL::Depsgraph& b_depsgraph)
void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph)
{
/* Sync recalc flags from blender to cycles. Actual update is done separate,
* so we can do it later on if doing it immediate is not suitable. */
bool has_updated_objects = b_depsgraph.id_type_updated(BL::DriverTarget::id_type_OBJECT);
if(experimental) {
if (experimental) {
/* Mark all meshes as needing to be exported again if dicing changed. */
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
bool dicing_prop_changed = false;
float updated_dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate")
: RNA_float_get(&cscene, "dicing_rate");
float updated_dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") :
RNA_float_get(&cscene, "dicing_rate");
if(dicing_rate != updated_dicing_rate) {
if (dicing_rate != updated_dicing_rate) {
dicing_rate = updated_dicing_rate;
dicing_prop_changed = true;
}
int updated_max_subdivisions = RNA_int_get(&cscene, "max_subdivisions");
if(max_subdivisions != updated_max_subdivisions) {
if (max_subdivisions != updated_max_subdivisions) {
max_subdivisions = updated_max_subdivisions;
dicing_prop_changed = true;
}
if(dicing_prop_changed) {
for(const pair<void*, Mesh*>& iter: mesh_map.key_to_scene_data()) {
if (dicing_prop_changed) {
for (const pair<void *, Mesh *> &iter : mesh_map.key_to_scene_data()) {
Mesh *mesh = iter.second;
if(mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
mesh_map.set_recalc(iter.first);
}
}
@@ -118,82 +119,82 @@ void BlenderSync::sync_recalc(BL::Depsgraph& b_depsgraph)
/* Iterate over all IDs in this depsgraph. */
BL::Depsgraph::updates_iterator b_update;
for(b_depsgraph.updates.begin(b_update); b_update != b_depsgraph.updates.end(); ++b_update) {
for (b_depsgraph.updates.begin(b_update); b_update != b_depsgraph.updates.end(); ++b_update) {
BL::ID b_id(b_update->id());
/* Material */
if(b_id.is_a(&RNA_Material)) {
if (b_id.is_a(&RNA_Material)) {
BL::Material b_mat(b_id);
shader_map.set_recalc(b_mat);
}
/* Light */
else if(b_id.is_a(&RNA_Light)) {
else if (b_id.is_a(&RNA_Light)) {
BL::Light b_light(b_id);
shader_map.set_recalc(b_light);
}
/* Object */
else if(b_id.is_a(&RNA_Object)) {
else if (b_id.is_a(&RNA_Object)) {
BL::Object b_ob(b_id);
const bool updated_geometry = b_update->is_updated_geometry();
if(b_update->is_updated_transform()) {
if (b_update->is_updated_transform()) {
object_map.set_recalc(b_ob);
light_map.set_recalc(b_ob);
}
if(object_is_mesh(b_ob)) {
if(updated_geometry ||
(object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE))
{
BL::ID key = BKE_object_is_modified(b_ob)? b_ob: b_ob.data();
if (object_is_mesh(b_ob)) {
if (updated_geometry ||
(object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE)) {
BL::ID key = BKE_object_is_modified(b_ob) ? b_ob : b_ob.data();
mesh_map.set_recalc(key);
}
}
else if(object_is_light(b_ob)) {
if(updated_geometry) {
else if (object_is_light(b_ob)) {
if (updated_geometry) {
light_map.set_recalc(b_ob);
}
}
if(updated_geometry) {
if (updated_geometry) {
BL::Object::particle_systems_iterator b_psys;
for(b_ob.particle_systems.begin(b_psys); b_psys != b_ob.particle_systems.end(); ++b_psys)
for (b_ob.particle_systems.begin(b_psys); b_psys != b_ob.particle_systems.end(); ++b_psys)
particle_system_map.set_recalc(b_ob);
}
}
/* Mesh */
else if(b_id.is_a(&RNA_Mesh)) {
else if (b_id.is_a(&RNA_Mesh)) {
BL::Mesh b_mesh(b_id);
mesh_map.set_recalc(b_mesh);
}
/* World */
else if(b_id.is_a(&RNA_World)) {
else if (b_id.is_a(&RNA_World)) {
BL::World b_world(b_id);
if(world_map == b_world.ptr.data) {
if (world_map == b_world.ptr.data) {
world_recalc = true;
}
}
}
/* Updates shader with object dependency if objects changed. */
if(has_updated_objects) {
if(scene->default_background->has_object_dependency) {
if (has_updated_objects) {
if (scene->default_background->has_object_dependency) {
world_recalc = true;
}
foreach(Shader *shader, scene->shaders) {
if(shader->has_object_dependency) {
foreach (Shader *shader, scene->shaders) {
if (shader->has_object_dependency) {
shader->need_sync_object = true;
}
}
}
}
void BlenderSync::sync_data(BL::RenderSettings& b_render,
BL::Depsgraph& b_depsgraph,
BL::SpaceView3D& b_v3d,
BL::Object& b_override,
int width, int height,
void BlenderSync::sync_data(BL::RenderSettings &b_render,
BL::Depsgraph &b_depsgraph,
BL::SpaceView3D &b_v3d,
BL::Object &b_override,
int width,
int height,
void **python_thread_state)
{
BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
@@ -207,17 +208,11 @@ void BlenderSync::sync_data(BL::RenderSettings& b_render,
mesh_synced.clear(); /* use for objects and motion sync */
if(scene->need_motion() == Scene::MOTION_PASS ||
scene->need_motion() == Scene::MOTION_NONE ||
scene->camera->motion_position == Camera::MOTION_POSITION_CENTER)
{
if (scene->need_motion() == Scene::MOTION_PASS || scene->need_motion() == Scene::MOTION_NONE ||
scene->camera->motion_position == Camera::MOTION_POSITION_CENTER) {
sync_objects(b_depsgraph);
}
sync_motion(b_render,
b_depsgraph,
b_override,
width, height,
python_thread_state);
sync_motion(b_render, b_depsgraph, b_override, width, height, python_thread_state);
mesh_synced.clear();
@@ -253,10 +248,9 @@ void BlenderSync::sync_integrator()
integrator->filter_glossy = get_float(cscene, "blur_glossy");
integrator->seed = get_int(cscene, "seed");
if(get_boolean(cscene, "use_animated_seed")) {
integrator->seed = hash_int_2d(b_scene.frame_current(),
get_int(cscene, "seed"));
if(b_scene.frame_subframe() != 0.0f) {
if (get_boolean(cscene, "use_animated_seed")) {
integrator->seed = hash_int_2d(b_scene.frame_current(), get_int(cscene, "seed"));
if (b_scene.frame_subframe() != 0.0f) {
/* TODO(sergey): Ideally should be some sort of hash_merge,
* but this is good enough for now.
*/
@@ -266,15 +260,12 @@ void BlenderSync::sync_integrator()
}
integrator->sampling_pattern = (SamplingPattern)get_enum(
cscene,
"sampling_pattern",
SAMPLING_NUM_PATTERNS,
SAMPLING_PATTERN_SOBOL);
cscene, "sampling_pattern", SAMPLING_NUM_PATTERNS, SAMPLING_PATTERN_SOBOL);
integrator->sample_clamp_direct = get_float(cscene, "sample_clamp_direct");
integrator->sample_clamp_indirect = get_float(cscene, "sample_clamp_indirect");
if(!preview) {
if(integrator->motion_blur != r.use_motion_blur()) {
if (!preview) {
if (integrator->motion_blur != r.use_motion_blur()) {
scene->object_manager->tag_update(scene);
scene->camera->tag_update();
}
@@ -282,10 +273,8 @@ void BlenderSync::sync_integrator()
integrator->motion_blur = r.use_motion_blur();
}
integrator->method = (Integrator::Method)get_enum(cscene,
"progressive",
Integrator::NUM_METHODS,
Integrator::PATH);
integrator->method = (Integrator::Method)get_enum(
cscene, "progressive", Integrator::NUM_METHODS, Integrator::PATH);
integrator->sample_all_lights_direct = get_boolean(cscene, "sample_all_lights_direct");
integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect");
@@ -299,7 +288,7 @@ void BlenderSync::sync_integrator()
int subsurface_samples = get_int(cscene, "subsurface_samples");
int volume_samples = get_int(cscene, "volume_samples");
if(get_boolean(cscene, "use_square_samples")) {
if (get_boolean(cscene, "use_square_samples")) {
integrator->diffuse_samples = diffuse_samples * diffuse_samples;
integrator->glossy_samples = glossy_samples * glossy_samples;
integrator->transmission_samples = transmission_samples * transmission_samples;
@@ -318,8 +307,8 @@ void BlenderSync::sync_integrator()
integrator->volume_samples = volume_samples;
}
if(b_scene.render().use_simplify()) {
if(preview) {
if (b_scene.render().use_simplify()) {
if (preview) {
integrator->ao_bounces = get_int(cscene, "ao_bounces");
}
else {
@@ -330,7 +319,7 @@ void BlenderSync::sync_integrator()
integrator->ao_bounces = 0;
}
if(integrator->modified(previntegrator))
if (integrator->modified(previntegrator))
integrator->tag_update(scene);
}
@@ -344,19 +333,18 @@ void BlenderSync::sync_film()
Film prevfilm = *film;
film->exposure = get_float(cscene, "film_exposure");
film->filter_type = (FilterType)get_enum(cscene,
"pixel_filter_type",
FILTER_NUM_TYPES,
FILTER_BLACKMAN_HARRIS);
film->filter_width = (film->filter_type == FILTER_BOX)? 1.0f: get_float(cscene, "filter_width");
film->filter_type = (FilterType)get_enum(
cscene, "pixel_filter_type", FILTER_NUM_TYPES, FILTER_BLACKMAN_HARRIS);
film->filter_width = (film->filter_type == FILTER_BOX) ? 1.0f :
get_float(cscene, "filter_width");
if(b_scene.world()) {
if (b_scene.world()) {
BL::WorldMistSettings b_mist = b_scene.world().mist_settings();
film->mist_start = b_mist.start();
film->mist_depth = b_mist.depth();
switch(b_mist.falloff()) {
switch (b_mist.falloff()) {
case BL::WorldMistSettings::falloff_QUADRATIC:
film->mist_falloff = 2.0f;
break;
@@ -369,13 +357,13 @@ void BlenderSync::sync_film()
}
}
if(film->modified(prevfilm))
if (film->modified(prevfilm))
film->tag_update(scene);
}
/* Render Layer */
void BlenderSync::sync_view_layer(BL::SpaceView3D& /*b_v3d*/, BL::ViewLayer& b_view_layer)
void BlenderSync::sync_view_layer(BL::SpaceView3D & /*b_v3d*/, BL::ViewLayer &b_view_layer)
{
/* render layer */
view_layer.name = b_view_layer.name();
@@ -394,14 +382,13 @@ void BlenderSync::sync_view_layer(BL::SpaceView3D& /*b_v3d*/, BL::ViewLayer& b_v
view_layer.bound_samples = (use_layer_samples == 1);
view_layer.samples = 0;
if(use_layer_samples != 2) {
if (use_layer_samples != 2) {
int samples = b_view_layer.samples();
if(get_boolean(cscene, "use_square_samples"))
if (get_boolean(cscene, "use_square_samples"))
view_layer.samples = samples * samples;
else
view_layer.samples = samples;
}
}
/* Images */
@@ -409,9 +396,8 @@ void BlenderSync::sync_images()
{
/* Sync is a convention for this API, but currently it frees unused buffers. */
const bool is_interface_locked = b_engine.render() &&
b_engine.render().use_lock_interface();
if(is_interface_locked == false && BlenderSession::headless == false) {
const bool is_interface_locked = b_engine.render() && b_engine.render().use_lock_interface();
if (is_interface_locked == false && BlenderSession::headless == false) {
/* If interface is not locked, it's possible image is needed for
* the display.
*/
@@ -419,18 +405,14 @@ void BlenderSync::sync_images()
}
/* Free buffers used by images which are not needed for render. */
BL::BlendData::images_iterator b_image;
for(b_data.images.begin(b_image);
b_image != b_data.images.end();
++b_image)
{
for (b_data.images.begin(b_image); b_image != b_data.images.end(); ++b_image) {
/* TODO(sergey): Consider making it an utility function to check
* whether image is considered builtin.
*/
const bool is_builtin = b_image->packed_file() ||
b_image->source() == BL::Image::source_GENERATED ||
b_image->source() == BL::Image::source_MOVIE ||
b_engine.is_preview();
if(is_builtin == false) {
b_image->source() == BL::Image::source_MOVIE || b_engine.is_preview();
if (is_builtin == false) {
b_image->buffers_free();
}
/* TODO(sergey): Free builtin images not used by any shader. */
@@ -438,10 +420,12 @@ void BlenderSync::sync_images()
}
/* Passes */
PassType BlenderSync::get_pass_type(BL::RenderPass& b_pass)
PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
{
string name = b_pass.name();
#define MAP_PASS(passname, passtype) if(name == passname) return passtype;
#define MAP_PASS(passname, passtype) \
if (name == passname) \
return passtype;
/* NOTE: Keep in sync with defined names from DNA_scene_types.h */
MAP_PASS("Combined", PASS_COMBINED);
MAP_PASS("Depth", PASS_DEPTH);
@@ -481,7 +465,7 @@ PassType BlenderSync::get_pass_type(BL::RenderPass& b_pass)
MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES);
#endif
MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
if(string_startswith(name, cryptomatte_prefix)) {
if (string_startswith(name, cryptomatte_prefix)) {
return PASS_CRYPTOMATTE;
}
#undef MAP_PASS
@@ -489,18 +473,21 @@ PassType BlenderSync::get_pass_type(BL::RenderPass& b_pass)
return PASS_NONE;
}
int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass)
int BlenderSync::get_denoising_pass(BL::RenderPass &b_pass)
{
string name = b_pass.name();
if(name == "Noisy Image") return DENOISING_PASS_PREFILTERED_COLOR;
if (name == "Noisy Image")
return DENOISING_PASS_PREFILTERED_COLOR;
if(name.substr(0, 10) != "Denoising ") {
if (name.substr(0, 10) != "Denoising ") {
return -1;
}
name = name.substr(10);
#define MAP_PASS(passname, offset) if(name == passname) return offset;
#define MAP_PASS(passname, offset) \
if (name == passname) \
return offset;
MAP_PASS("Normal", DENOISING_PASS_PREFILTERED_NORMAL);
MAP_PASS("Albedo", DENOISING_PASS_PREFILTERED_ALBEDO);
MAP_PASS("Depth", DENOISING_PASS_PREFILTERED_DEPTH);
@@ -513,8 +500,7 @@ int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass)
return -1;
}
vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
BL::ViewLayer& b_view_layer)
vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer)
{
vector<Pass> passes;
Pass::add(PASS_COMBINED, passes);
@@ -522,13 +508,13 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
/* loop over passes */
BL::RenderLayer::passes_iterator b_pass_iter;
for(b_rlay.passes.begin(b_pass_iter); b_pass_iter != b_rlay.passes.end(); ++b_pass_iter) {
for (b_rlay.passes.begin(b_pass_iter); b_pass_iter != b_rlay.passes.end(); ++b_pass_iter) {
BL::RenderPass b_pass(*b_pass_iter);
PassType pass_type = get_pass_type(b_pass);
if(pass_type == PASS_MOTION && scene->integrator->motion_blur)
if (pass_type == PASS_MOTION && scene->integrator->motion_blur)
continue;
if(pass_type != PASS_NONE)
if (pass_type != PASS_NONE)
Pass::add(pass_type, passes);
}
@@ -537,8 +523,10 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
bool write_denoising_passes = get_boolean(crp, "denoising_store_passes");
scene->film->denoising_flags = 0;
if(full_denoising || write_denoising_passes) {
#define MAP_OPTION(name, flag) if(!get_boolean(crp, name)) scene->film->denoising_flags |= flag;
if (full_denoising || write_denoising_passes) {
#define MAP_OPTION(name, flag) \
if (!get_boolean(crp, name)) \
scene->film->denoising_flags |= flag;
MAP_OPTION("denoising_diffuse_direct", DENOISING_CLEAN_DIFFUSE_DIR);
MAP_OPTION("denoising_diffuse_indirect", DENOISING_CLEAN_DIFFUSE_IND);
MAP_OPTION("denoising_glossy_direct", DENOISING_CLEAN_GLOSSY_DIR);
@@ -551,7 +539,7 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
b_engine.add_pass("Noisy Image", 4, "RGBA", b_view_layer.name().c_str());
}
if(write_denoising_passes) {
if (write_denoising_passes) {
b_engine.add_pass("Denoising Normal", 3, "XYZ", b_view_layer.name().c_str());
b_engine.add_pass("Denoising Albedo", 3, "RGB", b_view_layer.name().c_str());
b_engine.add_pass("Denoising Depth", 1, "Z", b_view_layer.name().c_str());
@@ -559,37 +547,37 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
b_engine.add_pass("Denoising Variance", 3, "RGB", b_view_layer.name().c_str());
b_engine.add_pass("Denoising Intensity", 1, "X", b_view_layer.name().c_str());
if(scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES) {
if (scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES) {
b_engine.add_pass("Denoising Clean", 3, "RGB", b_view_layer.name().c_str());
}
}
#ifdef __KERNEL_DEBUG__
if(get_boolean(crp, "pass_debug_bvh_traversed_nodes")) {
if (get_boolean(crp, "pass_debug_bvh_traversed_nodes")) {
b_engine.add_pass("Debug BVH Traversed Nodes", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_BVH_TRAVERSED_NODES, passes);
}
if(get_boolean(crp, "pass_debug_bvh_traversed_instances")) {
if (get_boolean(crp, "pass_debug_bvh_traversed_instances")) {
b_engine.add_pass("Debug BVH Traversed Instances", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_BVH_TRAVERSED_INSTANCES, passes);
}
if(get_boolean(crp, "pass_debug_bvh_intersections")) {
if (get_boolean(crp, "pass_debug_bvh_intersections")) {
b_engine.add_pass("Debug BVH Intersections", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_BVH_INTERSECTIONS, passes);
}
if(get_boolean(crp, "pass_debug_ray_bounces")) {
if (get_boolean(crp, "pass_debug_ray_bounces")) {
b_engine.add_pass("Debug Ray Bounces", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_RAY_BOUNCES, passes);
}
#endif
if(get_boolean(crp, "pass_debug_render_time")) {
if (get_boolean(crp, "pass_debug_render_time")) {
b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_RENDER_TIME, passes);
}
if(get_boolean(crp, "use_pass_volume_direct")) {
if (get_boolean(crp, "use_pass_volume_direct")) {
b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str());
Pass::add(PASS_VOLUME_DIRECT, passes);
}
if(get_boolean(crp, "use_pass_volume_indirect")) {
if (get_boolean(crp, "use_pass_volume_indirect")) {
b_engine.add_pass("VolumeInd", 3, "RGB", b_view_layer.name().c_str());
Pass::add(PASS_VOLUME_INDIRECT, passes);
}
@@ -599,77 +587,76 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
int crypto_depth = min(16, get_int(crp, "pass_crypto_depth")) / 2;
scene->film->cryptomatte_depth = crypto_depth;
scene->film->cryptomatte_passes = CRYPT_NONE;
if(get_boolean(crp, "use_pass_crypto_object")) {
for(int i = 0; i < crypto_depth; ++i) {
if (get_boolean(crp, "use_pass_crypto_object")) {
for (int i = 0; i < crypto_depth; ++i) {
string passname = cryptomatte_prefix + string_printf("Object%02d", i);
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
}
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_OBJECT);
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_OBJECT);
}
if(get_boolean(crp, "use_pass_crypto_material")) {
for(int i = 0; i < crypto_depth; ++i) {
if (get_boolean(crp, "use_pass_crypto_material")) {
for (int i = 0; i < crypto_depth; ++i) {
string passname = cryptomatte_prefix + string_printf("Material%02d", i);
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
}
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_MATERIAL);
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_MATERIAL);
}
if(get_boolean(crp, "use_pass_crypto_asset")) {
for(int i = 0; i < crypto_depth; ++i) {
if (get_boolean(crp, "use_pass_crypto_asset")) {
for (int i = 0; i < crypto_depth; ++i) {
string passname = cryptomatte_prefix + string_printf("Asset%02d", i);
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
}
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_ASSET);
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_ASSET);
}
if(get_boolean(crp, "pass_crypto_accurate") && scene->film->cryptomatte_passes != CRYPT_NONE) {
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_ACCURATE);
if (get_boolean(crp, "pass_crypto_accurate") && scene->film->cryptomatte_passes != CRYPT_NONE) {
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_ACCURATE);
}
return passes;
}
void BlenderSync::free_data_after_sync(BL::Depsgraph& b_depsgraph)
void BlenderSync::free_data_after_sync(BL::Depsgraph &b_depsgraph)
{
/* When viewport display is not needed during render we can force some
* caches to be releases from blender side in order to reduce peak memory
* footprint during synchronization process.
*/
const bool is_interface_locked = b_engine.render() &&
b_engine.render().use_lock_interface();
const bool is_interface_locked = b_engine.render() && b_engine.render().use_lock_interface();
const bool can_free_caches = BlenderSession::headless || is_interface_locked;
if(!can_free_caches) {
if (!can_free_caches) {
return;
}
/* TODO(sergey): We can actually remove the whole dependency graph,
* but that will need some API support first.
*/
BL::Depsgraph::objects_iterator b_ob;
for(b_depsgraph.objects.begin(b_ob);
b_ob != b_depsgraph.objects.end();
++b_ob)
{
for (b_depsgraph.objects.begin(b_ob); b_ob != b_depsgraph.objects.end(); ++b_ob) {
b_ob->cache_release();
}
}
/* Scene Parameters */
SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
bool background)
SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background)
{
BL::RenderSettings r = b_scene.render();
SceneParams params;
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
const bool shadingsystem = RNA_boolean_get(&cscene, "shading_system");
if(shadingsystem == 0)
if (shadingsystem == 0)
params.shadingsystem = SHADINGSYSTEM_SVM;
else if(shadingsystem == 1)
else if (shadingsystem == 1)
params.shadingsystem = SHADINGSYSTEM_OSL;
if(background || DebugFlags().viewport_static_bvh)
if (background || DebugFlags().viewport_static_bvh)
params.bvh_type = SceneParams::BVH_STATIC;
else
params.bvh_type = SceneParams::BVH_DYNAMIC;
@@ -678,19 +665,19 @@ SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
params.use_bvh_unaligned_nodes = RNA_boolean_get(&cscene, "debug_use_hair_bvh");
params.num_bvh_time_steps = RNA_int_get(&cscene, "debug_bvh_time_steps");
if(background && params.shadingsystem != SHADINGSYSTEM_OSL)
if (background && params.shadingsystem != SHADINGSYSTEM_OSL)
params.persistent_data = r.use_persistent_data();
else
params.persistent_data = false;
int texture_limit;
if(background) {
if (background) {
texture_limit = RNA_enum_get(&cscene, "texture_limit_render");
}
else {
texture_limit = RNA_enum_get(&cscene, "texture_limit");
}
if(texture_limit > 0 && b_scene.render().use_simplify()) {
if (texture_limit > 0 && b_scene.render().use_simplify()) {
params.texture_limit = 1 << (texture_limit + 6);
}
else {
@@ -700,7 +687,7 @@ SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
/* TODO(sergey): Once OSL supports per-microarchitecture optimization get
* rid of this.
*/
if(params.shadingsystem == SHADINGSYSTEM_OSL) {
if (params.shadingsystem == SHADINGSYSTEM_OSL) {
params.bvh_layout = BVH_LAYOUT_BVH4;
}
else {
@@ -708,22 +695,23 @@ SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
}
#ifdef WITH_EMBREE
params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree") ? BVH_LAYOUT_EMBREE : params.bvh_layout;
params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree") ? BVH_LAYOUT_EMBREE :
params.bvh_layout;
#endif
return params;
}
/* Session Parameters */
bool BlenderSync::get_session_pause(BL::Scene& b_scene, bool background)
bool BlenderSync::get_session_pause(BL::Scene &b_scene, bool background)
{
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
return (background)? false: get_boolean(cscene, "preview_pause");
return (background) ? false : get_boolean(cscene, "preview_pause");
}
SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
BL::Preferences& b_preferences,
BL::Scene& b_scene,
SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
BL::Preferences &b_preferences,
BL::Scene &b_scene,
bool background)
{
SessionParams params;
@@ -745,7 +733,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
int preview_samples = get_int(cscene, "preview_samples");
int preview_aa_samples = get_int(cscene, "preview_aa_samples");
if(get_boolean(cscene, "use_square_samples")) {
if (get_boolean(cscene, "use_square_samples")) {
aa_samples = aa_samples * aa_samples;
preview_aa_samples = preview_aa_samples * preview_aa_samples;
@@ -753,23 +741,23 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
preview_samples = preview_samples * preview_samples;
}
if(get_enum(cscene, "progressive") == 0) {
if(background) {
if (get_enum(cscene, "progressive") == 0) {
if (background) {
params.samples = aa_samples;
}
else {
params.samples = preview_aa_samples;
if(params.samples == 0)
if (params.samples == 0)
params.samples = INT_MAX;
}
}
else {
if(background) {
if (background) {
params.samples = samples;
}
else {
params.samples = preview_samples;
if(params.samples == 0)
if (params.samples == 0)
params.samples = INT_MAX;
}
}
@@ -779,7 +767,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
/* tiles */
const bool is_cpu = (params.device.type == DEVICE_CPU);
if(!is_cpu && !background) {
if (!is_cpu && !background) {
/* currently GPU could be much slower than CPU when using tiles,
* still need to be investigated, but meanwhile make it possible
* to work in viewport smoothly
@@ -795,7 +783,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
params.tile_size = make_int2(tile_x, tile_y);
}
if((BlenderSession::headless == false) && background) {
if ((BlenderSession::headless == false) && background) {
params.tile_order = (TileOrder)get_enum(cscene, "tile_order");
}
else {
@@ -817,18 +805,19 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
get_boolean(cscene, "use_progressive_refine")) &&
!b_r.use_save_buffers();
if(params.progressive_refine) {
if (params.progressive_refine) {
BL::Scene::view_layers_iterator b_view_layer;
for(b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end(); ++b_view_layer) {
for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end();
++b_view_layer) {
PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles");
if(get_boolean(crl, "use_denoising")) {
if (get_boolean(crl, "use_denoising")) {
params.progressive_refine = false;
}
}
}
if(background) {
if(params.progressive_refine)
if (background) {
if (params.progressive_refine)
params.progressive = true;
else
params.progressive = false;
@@ -842,23 +831,23 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
/* shading system - scene level needs full refresh */
const bool shadingsystem = RNA_boolean_get(&cscene, "shading_system");
if(shadingsystem == 0)
if (shadingsystem == 0)
params.shadingsystem = SHADINGSYSTEM_SVM;
else if(shadingsystem == 1)
else if (shadingsystem == 1)
params.shadingsystem = SHADINGSYSTEM_OSL;
/* color managagement */
params.display_buffer_linear = b_engine.support_display_space_shader(b_scene);
if(b_engine.is_preview()) {
if (b_engine.is_preview()) {
/* For preview rendering we're using same timeout as
* blender's job update.
*/
params.progressive_update_timeout = 0.1;
}
params.use_profiling = params.device.has_profiling && !b_engine.is_preview() &&
background && BlenderSession::print_render_stats;
params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background &&
BlenderSession::print_render_stats;
return params;
}

View File

@@ -49,136 +49,137 @@ class ShaderGraph;
class ShaderNode;
class BlenderSync {
public:
BlenderSync(BL::RenderEngine& b_engine,
BL::BlendData& b_data,
BL::Scene& b_scene,
public:
BlenderSync(BL::RenderEngine &b_engine,
BL::BlendData &b_data,
BL::Scene &b_scene,
Scene *scene,
bool preview,
Progress &progress);
~BlenderSync();
/* sync */
void sync_recalc(BL::Depsgraph& b_depsgraph);
void sync_data(BL::RenderSettings& b_render,
BL::Depsgraph& b_depsgraph,
BL::SpaceView3D& b_v3d,
BL::Object& b_override,
int width, int height,
void sync_recalc(BL::Depsgraph &b_depsgraph);
void sync_data(BL::RenderSettings &b_render,
BL::Depsgraph &b_depsgraph,
BL::SpaceView3D &b_v3d,
BL::Object &b_override,
int width,
int height,
void **python_thread_state);
void sync_view_layer(BL::SpaceView3D& b_v3d, BL::ViewLayer& b_view_layer);
vector<Pass> sync_render_passes(BL::RenderLayer& b_render_layer,
BL::ViewLayer& b_view_layer);
void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer);
vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
void sync_integrator();
void sync_camera(BL::RenderSettings& b_render,
BL::Object& b_override,
int width, int height,
void sync_camera(BL::RenderSettings &b_render,
BL::Object &b_override,
int width,
int height,
const char *viewname);
void sync_view(BL::SpaceView3D& b_v3d,
BL::RegionView3D& b_rv3d,
int width, int height);
inline int get_layer_samples() { return view_layer.samples; }
inline int get_layer_bound_samples() { return view_layer.bound_samples; }
void sync_view(BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, int width, int height);
inline int get_layer_samples()
{
return view_layer.samples;
}
inline int get_layer_bound_samples()
{
return view_layer.bound_samples;
}
/* get parameters */
static SceneParams get_scene_params(BL::Scene& b_scene,
static SceneParams get_scene_params(BL::Scene &b_scene, bool background);
static SessionParams get_session_params(BL::RenderEngine &b_engine,
BL::Preferences &b_userpref,
BL::Scene &b_scene,
bool background);
static SessionParams get_session_params(BL::RenderEngine& b_engine,
BL::Preferences& b_userpref,
BL::Scene& b_scene,
bool background);
static bool get_session_pause(BL::Scene& b_scene, bool background);
static BufferParams get_buffer_params(BL::RenderSettings& b_render,
BL::SpaceView3D& b_v3d,
BL::RegionView3D& b_rv3d,
static bool get_session_pause(BL::Scene &b_scene, bool background);
static BufferParams get_buffer_params(BL::RenderSettings &b_render,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,
int width, int height);
int width,
int height);
static PassType get_pass_type(BL::RenderPass& b_pass);
static int get_denoising_pass(BL::RenderPass& b_pass);
static PassType get_pass_type(BL::RenderPass &b_pass);
static int get_denoising_pass(BL::RenderPass &b_pass);
private:
private:
/* sync */
void sync_lights(BL::Depsgraph& b_depsgraph, bool update_all);
void sync_materials(BL::Depsgraph& b_depsgraph, bool update_all);
void sync_objects(BL::Depsgraph& b_depsgraph, float motion_time = 0.0f);
void sync_motion(BL::RenderSettings& b_render,
BL::Depsgraph& b_depsgraph,
BL::Object& b_override,
int width, int height,
void sync_lights(BL::Depsgraph &b_depsgraph, bool update_all);
void sync_materials(BL::Depsgraph &b_depsgraph, bool update_all);
void sync_objects(BL::Depsgraph &b_depsgraph, float motion_time = 0.0f);
void sync_motion(BL::RenderSettings &b_render,
BL::Depsgraph &b_depsgraph,
BL::Object &b_override,
int width,
int height,
void **python_thread_state);
void sync_film();
void sync_view();
void sync_world(BL::Depsgraph& b_depsgraph, bool update_all);
void sync_shaders(BL::Depsgraph& b_depsgraph);
void sync_world(BL::Depsgraph &b_depsgraph, bool update_all);
void sync_shaders(BL::Depsgraph &b_depsgraph);
void sync_curve_settings();
void sync_nodes(Shader *shader, BL::ShaderNodeTree& b_ntree);
Mesh *sync_mesh(BL::Depsgraph& b_depsgrpah,
BL::Object& b_ob,
BL::Object& b_ob_instance,
void sync_nodes(Shader *shader, BL::ShaderNodeTree &b_ntree);
Mesh *sync_mesh(BL::Depsgraph &b_depsgrpah,
BL::Object &b_ob,
BL::Object &b_ob_instance,
bool object_updated,
bool show_self,
bool show_particles);
void sync_curves(Mesh *mesh,
BL::Mesh& b_mesh,
BL::Object& b_ob,
bool motion,
int motion_step = 0);
Object *sync_object(BL::Depsgraph& b_depsgraph,
BL::ViewLayer& b_view_layer,
BL::DepsgraphObjectInstance& b_instance,
void sync_curves(
Mesh *mesh, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step = 0);
Object *sync_object(BL::Depsgraph &b_depsgraph,
BL::ViewLayer &b_view_layer,
BL::DepsgraphObjectInstance &b_instance,
float motion_time,
bool show_self,
bool show_particles,
BlenderObjectCulling& culling,
BlenderObjectCulling &culling,
bool *use_portal);
void sync_light(BL::Object& b_parent,
void sync_light(BL::Object &b_parent,
int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
BL::Object& b_ob,
BL::Object& b_ob_instance,
BL::Object &b_ob,
BL::Object &b_ob_instance,
int random_id,
Transform& tfm,
Transform &tfm,
bool *use_portal);
void sync_background_light(bool use_portal);
void sync_mesh_motion(BL::Depsgraph& b_depsgraph,
BL::Object& b_ob,
void sync_mesh_motion(BL::Depsgraph &b_depsgraph,
BL::Object &b_ob,
Object *object,
float motion_time);
void sync_camera_motion(BL::RenderSettings& b_render,
BL::Object& b_ob,
int width, int height,
float motion_time);
void sync_camera_motion(
BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);
/* particles */
bool sync_dupli_particle(BL::Object& b_ob,
BL::DepsgraphObjectInstance& b_instance,
bool sync_dupli_particle(BL::Object &b_ob,
BL::DepsgraphObjectInstance &b_instance,
Object *object);
/* Images. */
void sync_images();
/* Early data free. */
void free_data_after_sync(BL::Depsgraph& b_depsgraph);
void free_data_after_sync(BL::Depsgraph &b_depsgraph);
/* util */
void find_shader(BL::ID& id, vector<Shader*>& used_shaders, Shader *default_shader);
bool BKE_object_is_modified(BL::Object& b_ob);
bool object_is_mesh(BL::Object& b_ob);
bool object_is_light(BL::Object& b_ob);
void find_shader(BL::ID &id, vector<Shader *> &used_shaders, Shader *default_shader);
bool BKE_object_is_modified(BL::Object &b_ob);
bool object_is_mesh(BL::Object &b_ob);
bool object_is_light(BL::Object &b_ob);
/* variables */
BL::RenderEngine b_engine;
BL::BlendData b_data;
BL::Scene b_scene;
id_map<void*, Shader> shader_map;
id_map<void *, Shader> shader_map;
id_map<ObjectKey, Object> object_map;
id_map<void*, Mesh> mesh_map;
id_map<void *, Mesh> mesh_map;
id_map<ObjectKey, Light> light_map;
id_map<ParticleSystemKey, ParticleSystem> particle_system_map;
set<Mesh*> mesh_synced;
set<Mesh*> mesh_motion_synced;
set<Mesh *> mesh_synced;
set<Mesh *> mesh_motion_synced;
set<float> motion_times;
void *world_map;
bool world_recalc;
@@ -199,7 +200,8 @@ private:
use_hair(true),
samples(0),
bound_samples(false)
{}
{
}
string name;
BL::Material material_override;

View File

@@ -22,33 +22,33 @@ namespace {
/* Point density helpers. */
void density_texture_space_invert(float3& loc,
float3& size)
void density_texture_space_invert(float3 &loc, float3 &size)
{
if(size.x != 0.0f) size.x = 0.5f/size.x;
if(size.y != 0.0f) size.y = 0.5f/size.y;
if(size.z != 0.0f) size.z = 0.5f/size.z;
if (size.x != 0.0f)
size.x = 0.5f / size.x;
if (size.y != 0.0f)
size.y = 0.5f / size.y;
if (size.z != 0.0f)
size.z = 0.5f / size.z;
loc = loc*size - make_float3(0.5f, 0.5f, 0.5f);
loc = loc * size - make_float3(0.5f, 0.5f, 0.5f);
}
} /* namespace */
void point_density_texture_space(BL::Depsgraph& b_depsgraph,
BL::ShaderNodeTexPointDensity& b_point_density_node,
float3& loc,
float3& size)
void point_density_texture_space(BL::Depsgraph &b_depsgraph,
BL::ShaderNodeTexPointDensity &b_point_density_node,
float3 &loc,
float3 &size)
{
BL::Object b_ob(b_point_density_node.object());
if(!b_ob) {
if (!b_ob) {
loc = make_float3(0.0f, 0.0f, 0.0f);
size = make_float3(0.0f, 0.0f, 0.0f);
return;
}
float3 min, max;
b_point_density_node.calc_point_density_minmax(b_depsgraph,
&min[0],
&max[0]);
b_point_density_node.calc_point_density_minmax(b_depsgraph, &min[0], &max[0]);
loc = (min + max) * 0.5f;
size = (max - min) * 0.5f;
density_texture_space_invert(loc, size);

View File

@@ -22,10 +22,10 @@
CCL_NAMESPACE_BEGIN
void point_density_texture_space(BL::Depsgraph& b_depsgraph,
BL::ShaderNodeTexPointDensity& b_point_density_node,
float3& loc,
float3& size);
void point_density_texture_space(BL::Depsgraph &b_depsgraph,
BL::ShaderNodeTexPointDensity &b_point_density_node,
float3 &loc,
float3 &size);
CCL_NAMESPACE_END

View File

@@ -43,9 +43,9 @@ CCL_NAMESPACE_BEGIN
void python_thread_state_save(void **python_thread_state);
void python_thread_state_restore(void **python_thread_state);
static inline BL::Mesh object_to_mesh(BL::BlendData& data,
BL::Object& object,
BL::Depsgraph& depsgraph,
static inline BL::Mesh object_to_mesh(BL::BlendData &data,
BL::Object &object,
BL::Depsgraph &depsgraph,
bool calc_undeformed,
Mesh::SubdivisionType subdivision_type)
{
@@ -66,7 +66,7 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
#endif
BL::Mesh mesh(PointerRNA_NULL);
if(object.type() == BL::Object::type_MESH) {
if (object.type() == BL::Object::type_MESH) {
/* TODO: calc_undeformed is not used. */
mesh = BL::Mesh(object.data());
@@ -74,8 +74,7 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
* Also in edit mode do we need to make a copy, to ensure data layers like
* UV are not empty. */
if (mesh.is_editmode() ||
(mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE))
{
(mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
mesh = data.meshes.new_from_object(depsgraph, object, false, false);
}
}
@@ -92,8 +91,8 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
}
#endif
if((bool)mesh && subdivision_type == Mesh::SUBDIVISION_NONE) {
if(mesh.use_auto_smooth()) {
if ((bool)mesh && subdivision_type == Mesh::SUBDIVISION_NONE) {
if (mesh.use_auto_smooth()) {
mesh.split_faces(false);
}
@@ -103,71 +102,65 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
return mesh;
}
static inline void free_object_to_mesh(BL::BlendData& data,
BL::Object& object,
BL::Mesh& mesh)
static inline void free_object_to_mesh(BL::BlendData &data, BL::Object &object, BL::Mesh &mesh)
{
/* Free mesh if we didn't just use the existing one. */
if(object.data().ptr.data != mesh.ptr.data) {
if (object.data().ptr.data != mesh.ptr.data) {
data.meshes.remove(mesh, false, true, false);
}
}
static inline void colorramp_to_array(BL::ColorRamp& ramp,
array<float3>& ramp_color,
array<float>& ramp_alpha,
static inline void colorramp_to_array(BL::ColorRamp &ramp,
array<float3> &ramp_color,
array<float> &ramp_alpha,
int size)
{
ramp_color.resize(size);
ramp_alpha.resize(size);
for(int i = 0; i < size; i++) {
for (int i = 0; i < size; i++) {
float color[4];
ramp.evaluate((float)i/(float)(size-1), color);
ramp.evaluate((float)i / (float)(size - 1), color);
ramp_color[i] = make_float3(color[0], color[1], color[2]);
ramp_alpha[i] = color[3];
}
}
static inline void curvemap_minmax_curve(/*const*/ BL::CurveMap& curve,
float *min_x,
float *max_x)
static inline void curvemap_minmax_curve(/*const*/ BL::CurveMap &curve, float *min_x, float *max_x)
{
*min_x = min(*min_x, curve.points[0].location()[0]);
*max_x = max(*max_x, curve.points[curve.points.length() - 1].location()[0]);
}
static inline void curvemapping_minmax(/*const*/ BL::CurveMapping& cumap,
static inline void curvemapping_minmax(/*const*/ BL::CurveMapping &cumap,
bool rgb_curve,
float *min_x,
float *max_x)
{
/* const int num_curves = cumap.curves.length(); */ /* Gives linking error so far. */
const int num_curves = rgb_curve? 4: 3;
const int num_curves = rgb_curve ? 4 : 3;
*min_x = FLT_MAX;
*max_x = -FLT_MAX;
for(int i = 0; i < num_curves; ++i) {
for (int i = 0; i < num_curves; ++i) {
BL::CurveMap map(cumap.curves[i]);
curvemap_minmax_curve(map, min_x, max_x);
}
}
static inline void curvemapping_to_array(BL::CurveMapping& cumap,
array<float>& data,
int size)
static inline void curvemapping_to_array(BL::CurveMapping &cumap, array<float> &data, int size)
{
cumap.update();
BL::CurveMap curve = cumap.curves[0];
data.resize(size);
for(int i = 0; i < size; i++) {
float t = (float)i/(float)(size-1);
for (int i = 0; i < size; i++) {
float t = (float)i / (float)(size - 1);
data[i] = curve.evaluate(t);
}
}
static inline void curvemapping_color_to_array(BL::CurveMapping& cumap,
array<float3>& data,
static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
array<float3> &data,
int size,
bool rgb_curve)
{
@@ -196,52 +189,44 @@ static inline void curvemapping_color_to_array(BL::CurveMapping& cumap,
data.resize(size);
if(rgb_curve) {
if (rgb_curve) {
BL::CurveMap mapI = cumap.curves[3];
for(int i = 0; i < size; i++) {
const float t = min_x + (float)i/(float)(size-1) * range_x;
for (int i = 0; i < size; i++) {
const float t = min_x + (float)i / (float)(size - 1) * range_x;
data[i] = make_float3(mapR.evaluate(mapI.evaluate(t)),
mapG.evaluate(mapI.evaluate(t)),
mapB.evaluate(mapI.evaluate(t)));
}
}
else {
for(int i = 0; i < size; i++) {
float t = min_x + (float)i/(float)(size-1) * range_x;
data[i] = make_float3(mapR.evaluate(t),
mapG.evaluate(t),
mapB.evaluate(t));
for (int i = 0; i < size; i++) {
float t = min_x + (float)i / (float)(size - 1) * range_x;
data[i] = make_float3(mapR.evaluate(t), mapG.evaluate(t), mapB.evaluate(t));
}
}
}
static inline bool BKE_object_is_modified(BL::Object& self,
BL::Scene& scene,
bool preview)
static inline bool BKE_object_is_modified(BL::Object &self, BL::Scene &scene, bool preview)
{
return self.is_modified(scene, (preview)? (1<<0): (1<<1))? true: false;
return self.is_modified(scene, (preview) ? (1 << 0) : (1 << 1)) ? true : false;
}
static inline bool BKE_object_is_deform_modified(BL::Object& self,
BL::Scene& scene,
bool preview)
static inline bool BKE_object_is_deform_modified(BL::Object &self, BL::Scene &scene, bool preview)
{
return self.is_deform_modified(scene, (preview)? (1<<0): (1<<1))? true: false;
return self.is_deform_modified(scene, (preview) ? (1 << 0) : (1 << 1)) ? true : false;
}
static inline int render_resolution_x(BL::RenderSettings& b_render)
static inline int render_resolution_x(BL::RenderSettings &b_render)
{
return b_render.resolution_x()*b_render.resolution_percentage()/100;
return b_render.resolution_x() * b_render.resolution_percentage() / 100;
}
static inline int render_resolution_y(BL::RenderSettings& b_render)
static inline int render_resolution_y(BL::RenderSettings &b_render)
{
return b_render.resolution_y()*b_render.resolution_percentage()/100;
return b_render.resolution_y() * b_render.resolution_percentage() / 100;
}
static inline string image_user_file_path(BL::ImageUser& iuser,
BL::Image& ima,
int cfra)
static inline string image_user_file_path(BL::ImageUser &iuser, BL::Image &ima, int cfra)
{
char filepath[1024];
BKE_image_user_frame_calc(iuser.ptr.data, cfra);
@@ -249,130 +234,127 @@ static inline string image_user_file_path(BL::ImageUser& iuser,
return string(filepath);
}
static inline int image_user_frame_number(BL::ImageUser& iuser, int cfra)
static inline int image_user_frame_number(BL::ImageUser &iuser, int cfra)
{
BKE_image_user_frame_calc(iuser.ptr.data, cfra);
return iuser.frame_current();
}
static inline unsigned char *image_get_pixels_for_frame(BL::Image& image,
int frame)
static inline unsigned char *image_get_pixels_for_frame(BL::Image &image, int frame)
{
return BKE_image_get_pixels_for_frame(image.ptr.data, frame);
}
static inline float *image_get_float_pixels_for_frame(BL::Image& image,
int frame)
static inline float *image_get_float_pixels_for_frame(BL::Image &image, int frame)
{
return BKE_image_get_float_pixels_for_frame(image.ptr.data, frame);
}
static inline void render_add_metadata(BL::RenderResult& b_rr, string name, string value)
static inline void render_add_metadata(BL::RenderResult &b_rr, string name, string value)
{
b_rr.stamp_data_add_field(name.c_str(), value.c_str());
}
/* Utilities */
static inline Transform get_transform(const BL::Array<float, 16>& array)
static inline Transform get_transform(const BL::Array<float, 16> &array)
{
ProjectionTransform projection;
/* We assume both types to be just 16 floats, and transpose because blender
* use column major matrix order while we use row major. */
memcpy((void *)&projection, &array, sizeof(float)*16);
memcpy((void *)&projection, &array, sizeof(float) * 16);
projection = projection_transpose(projection);
/* Drop last row, matrix is assumed to be affine transform. */
return projection_to_transform(projection);
}
static inline float2 get_float2(const BL::Array<float, 2>& array)
static inline float2 get_float2(const BL::Array<float, 2> &array)
{
return make_float2(array[0], array[1]);
}
static inline float3 get_float3(const BL::Array<float, 2>& array)
static inline float3 get_float3(const BL::Array<float, 2> &array)
{
return make_float3(array[0], array[1], 0.0f);
}
static inline float3 get_float3(const BL::Array<float, 3>& array)
static inline float3 get_float3(const BL::Array<float, 3> &array)
{
return make_float3(array[0], array[1], array[2]);
}
static inline float3 get_float3(const BL::Array<float, 4>& array)
static inline float3 get_float3(const BL::Array<float, 4> &array)
{
return make_float3(array[0], array[1], array[2]);
}
static inline float4 get_float4(const BL::Array<float, 4>& array)
static inline float4 get_float4(const BL::Array<float, 4> &array)
{
return make_float4(array[0], array[1], array[2], array[3]);
}
static inline int3 get_int3(const BL::Array<int, 3>& array)
static inline int3 get_int3(const BL::Array<int, 3> &array)
{
return make_int3(array[0], array[1], array[2]);
}
static inline int4 get_int4(const BL::Array<int, 4>& array)
static inline int4 get_int4(const BL::Array<int, 4> &array)
{
return make_int4(array[0], array[1], array[2], array[3]);
}
static inline float3 get_float3(PointerRNA& ptr, const char *name)
static inline float3 get_float3(PointerRNA &ptr, const char *name)
{
float3 f;
RNA_float_get_array(&ptr, name, &f.x);
return f;
}
static inline void set_float3(PointerRNA& ptr, const char *name, float3 value)
static inline void set_float3(PointerRNA &ptr, const char *name, float3 value)
{
RNA_float_set_array(&ptr, name, &value.x);
}
static inline float4 get_float4(PointerRNA& ptr, const char *name)
static inline float4 get_float4(PointerRNA &ptr, const char *name)
{
float4 f;
RNA_float_get_array(&ptr, name, &f.x);
return f;
}
static inline void set_float4(PointerRNA& ptr, const char *name, float4 value)
static inline void set_float4(PointerRNA &ptr, const char *name, float4 value)
{
RNA_float_set_array(&ptr, name, &value.x);
}
static inline bool get_boolean(PointerRNA& ptr, const char *name)
static inline bool get_boolean(PointerRNA &ptr, const char *name)
{
return RNA_boolean_get(&ptr, name)? true: false;
return RNA_boolean_get(&ptr, name) ? true : false;
}
static inline void set_boolean(PointerRNA& ptr, const char *name, bool value)
static inline void set_boolean(PointerRNA &ptr, const char *name, bool value)
{
RNA_boolean_set(&ptr, name, (int)value);
}
static inline float get_float(PointerRNA& ptr, const char *name)
static inline float get_float(PointerRNA &ptr, const char *name)
{
return RNA_float_get(&ptr, name);
}
static inline void set_float(PointerRNA& ptr, const char *name, float value)
static inline void set_float(PointerRNA &ptr, const char *name, float value)
{
RNA_float_set(&ptr, name, value);
}
static inline int get_int(PointerRNA& ptr, const char *name)
static inline int get_int(PointerRNA &ptr, const char *name)
{
return RNA_int_get(&ptr, name);
}
static inline void set_int(PointerRNA& ptr, const char *name, int value)
static inline void set_int(PointerRNA &ptr, const char *name, int value)
{
RNA_int_set(&ptr, name, value);
}
@@ -384,20 +366,20 @@ static inline void set_int(PointerRNA& ptr, const char *name, int value)
* from 0 to num_values-1. Be careful to use it with enums where some values are
* deprecated!
*/
static inline int get_enum(PointerRNA& ptr,
static inline int get_enum(PointerRNA &ptr,
const char *name,
int num_values = -1,
int default_value = -1)
{
int value = RNA_enum_get(&ptr, name);
if(num_values != -1 && value >= num_values) {
if (num_values != -1 && value >= num_values) {
assert(default_value != -1);
value = default_value;
}
return value;
}
static inline string get_enum_identifier(PointerRNA& ptr, const char *name)
static inline string get_enum_identifier(PointerRNA &ptr, const char *name)
{
PropertyRNA *prop = RNA_struct_find_property(&ptr, name);
const char *identifier = "";
@@ -408,46 +390,42 @@ static inline string get_enum_identifier(PointerRNA& ptr, const char *name)
return string(identifier);
}
static inline void set_enum(PointerRNA& ptr, const char *name, int value)
static inline void set_enum(PointerRNA &ptr, const char *name, int value)
{
RNA_enum_set(&ptr, name, value);
}
static inline void set_enum(PointerRNA& ptr, const char *name, const string &identifier)
static inline void set_enum(PointerRNA &ptr, const char *name, const string &identifier)
{
RNA_enum_set_identifier(NULL, &ptr, name, identifier.c_str());
}
static inline string get_string(PointerRNA& ptr, const char *name)
static inline string get_string(PointerRNA &ptr, const char *name)
{
char cstrbuf[1024];
char *cstr = RNA_string_get_alloc(&ptr, name, cstrbuf, sizeof(cstrbuf));
string str(cstr);
if(cstr != cstrbuf)
if (cstr != cstrbuf)
MEM_freeN(cstr);
return str;
}
static inline void set_string(PointerRNA& ptr, const char *name, const string &value)
static inline void set_string(PointerRNA &ptr, const char *name, const string &value)
{
RNA_string_set(&ptr, name, value.c_str());
}
/* Relative Paths */
static inline string blender_absolute_path(BL::BlendData& b_data,
BL::ID& b_id,
const string& path)
static inline string blender_absolute_path(BL::BlendData &b_data, BL::ID &b_id, const string &path)
{
if(path.size() >= 2 && path[0] == '/' && path[1] == '/') {
if (path.size() >= 2 && path[0] == '/' && path[1] == '/') {
string dirname;
if(b_id.library()) {
if (b_id.library()) {
BL::ID b_library_id(b_id.library());
dirname = blender_absolute_path(b_data,
b_library_id,
b_id.library().filepath());
dirname = blender_absolute_path(b_data, b_library_id, b_id.library().filepath());
}
else
dirname = b_data.filepath();
@@ -458,15 +436,15 @@ static inline string blender_absolute_path(BL::BlendData& b_data,
return path;
}
static inline string get_text_datablock_content(const PointerRNA& ptr)
static inline string get_text_datablock_content(const PointerRNA &ptr)
{
if(ptr.data == NULL) {
if (ptr.data == NULL) {
return "";
}
string content;
BL::Text::lines_iterator iter;
for(iter.begin(ptr); iter; ++iter) {
for (iter.begin(ptr); iter; ++iter) {
content += iter->body() + "\n";
}
@@ -475,27 +453,28 @@ static inline string get_text_datablock_content(const PointerRNA& ptr)
/* Texture Space */
static inline void mesh_texture_space(BL::Mesh& b_mesh,
float3& loc,
float3& size)
static inline void mesh_texture_space(BL::Mesh &b_mesh, float3 &loc, float3 &size)
{
loc = get_float3(b_mesh.texspace_location());
size = get_float3(b_mesh.texspace_size());
if(size.x != 0.0f) size.x = 0.5f/size.x;
if(size.y != 0.0f) size.y = 0.5f/size.y;
if(size.z != 0.0f) size.z = 0.5f/size.z;
if (size.x != 0.0f)
size.x = 0.5f / size.x;
if (size.y != 0.0f)
size.y = 0.5f / size.y;
if (size.z != 0.0f)
size.z = 0.5f / size.z;
loc = loc*size - make_float3(0.5f, 0.5f, 0.5f);
loc = loc * size - make_float3(0.5f, 0.5f, 0.5f);
}
/* Object motion steps, returns 0 if no motion blur needed. */
static inline uint object_motion_steps(BL::Object& b_parent, BL::Object& b_ob)
static inline uint object_motion_steps(BL::Object &b_parent, BL::Object &b_ob)
{
/* Get motion enabled and steps from object itself. */
PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
bool use_motion = get_boolean(cobject, "use_motion_blur");
if(!use_motion) {
if (!use_motion) {
return 0;
}
@@ -503,11 +482,11 @@ static inline uint object_motion_steps(BL::Object& b_parent, BL::Object& b_ob)
/* Also check parent object, so motion blur and steps can be
* controlled by dupligroup duplicator for linked groups. */
if(b_parent.ptr.data != b_ob.ptr.data) {
if (b_parent.ptr.data != b_ob.ptr.data) {
PointerRNA parent_cobject = RNA_pointer_get(&b_parent.ptr, "cycles");
use_motion &= get_boolean(parent_cobject, "use_motion_blur");
if(!use_motion) {
if (!use_motion) {
return 0;
}
@@ -521,8 +500,7 @@ static inline uint object_motion_steps(BL::Object& b_parent, BL::Object& b_ob)
}
/* object uses deformation motion blur */
static inline bool object_use_deform_motion(BL::Object& b_parent,
BL::Object& b_ob)
static inline bool object_use_deform_motion(BL::Object &b_parent, BL::Object &b_ob)
{
PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
bool use_deform_motion = get_boolean(cobject, "use_deform_motion");
@@ -532,22 +510,22 @@ static inline bool object_use_deform_motion(BL::Object& b_parent,
* This way we can control motion blur from the dupligroup
* duplicator much easier.
*/
if(use_deform_motion && b_parent.ptr.data != b_ob.ptr.data) {
if (use_deform_motion && b_parent.ptr.data != b_ob.ptr.data) {
PointerRNA parent_cobject = RNA_pointer_get(&b_parent.ptr, "cycles");
use_deform_motion &= get_boolean(parent_cobject, "use_deform_motion");
}
return use_deform_motion;
}
static inline BL::SmokeDomainSettings object_smoke_domain_find(BL::Object& b_ob)
static inline BL::SmokeDomainSettings object_smoke_domain_find(BL::Object &b_ob)
{
BL::Object::modifiers_iterator b_mod;
for(b_ob.modifiers.begin(b_mod); b_mod != b_ob.modifiers.end(); ++b_mod) {
if(b_mod->is_a(&RNA_SmokeModifier)) {
for (b_ob.modifiers.begin(b_mod); b_mod != b_ob.modifiers.end(); ++b_mod) {
if (b_mod->is_a(&RNA_SmokeModifier)) {
BL::SmokeModifier b_smd(*b_mod);
if(b_smd.smoke_type() == BL::SmokeModifier::smoke_type_DOMAIN)
if (b_smd.smoke_type() == BL::SmokeModifier::smoke_type_DOMAIN)
return b_smd.domain_settings();
}
}
@@ -559,12 +537,12 @@ static inline BL::DomainFluidSettings object_fluid_domain_find(BL::Object b_ob)
{
BL::Object::modifiers_iterator b_mod;
for(b_ob.modifiers.begin(b_mod); b_mod != b_ob.modifiers.end(); ++b_mod) {
if(b_mod->is_a(&RNA_FluidSimulationModifier)) {
for (b_ob.modifiers.begin(b_mod); b_mod != b_ob.modifiers.end(); ++b_mod) {
if (b_mod->is_a(&RNA_FluidSimulationModifier)) {
BL::FluidSimulationModifier b_fmd(*b_mod);
BL::FluidSettings fss = b_fmd.settings();
if(fss.type() == BL::FluidSettings::type_DOMAIN)
if (fss.type() == BL::FluidSettings::type_DOMAIN)
return (BL::DomainFluidSettings)b_fmd.settings();
}
}
@@ -572,18 +550,21 @@ static inline BL::DomainFluidSettings object_fluid_domain_find(BL::Object b_ob)
return BL::DomainFluidSettings(PointerRNA_NULL);
}
static inline Mesh::SubdivisionType object_subdivision_type(BL::Object& b_ob, bool preview, bool experimental)
static inline Mesh::SubdivisionType object_subdivision_type(BL::Object &b_ob,
bool preview,
bool experimental)
{
PointerRNA cobj = RNA_pointer_get(&b_ob.ptr, "cycles");
if(cobj.data && b_ob.modifiers.length() > 0 && experimental) {
BL::Modifier mod = b_ob.modifiers[b_ob.modifiers.length()-1];
if (cobj.data && b_ob.modifiers.length() > 0 && experimental) {
BL::Modifier mod = b_ob.modifiers[b_ob.modifiers.length() - 1];
bool enabled = preview ? mod.show_viewport() : mod.show_render();
if(enabled && mod.type() == BL::Modifier::type_SUBSURF && RNA_boolean_get(&cobj, "use_adaptive_subdivision")) {
if (enabled && mod.type() == BL::Modifier::type_SUBSURF &&
RNA_boolean_get(&cobj, "use_adaptive_subdivision")) {
BL::SubsurfModifier subsurf(mod);
if(subsurf.subdivision_type() == BL::SubsurfModifier::subdivision_type_CATMULL_CLARK) {
if (subsurf.subdivision_type() == BL::SubsurfModifier::subdivision_type_CATMULL_CLARK) {
return Mesh::SUBDIVISION_CATMULL_CLARK;
}
else {
@@ -600,22 +581,21 @@ static inline Mesh::SubdivisionType object_subdivision_type(BL::Object& b_ob, bo
* Utility class to keep in sync with blender data.
* Used for objects, meshes, lights and shaders. */
template<typename K, typename T>
class id_map {
public:
id_map(vector<T*> *scene_data_)
template<typename K, typename T> class id_map {
public:
id_map(vector<T *> *scene_data_)
{
scene_data = scene_data_;
}
T *find(const BL::ID& id)
T *find(const BL::ID &id)
{
return find(id.ptr.id.data);
}
T *find(const K& key)
T *find(const K &key)
{
if(b_map.find(key) != b_map.end()) {
if (b_map.find(key) != b_map.end()) {
T *data = b_map[key];
return data;
}
@@ -623,7 +603,7 @@ public:
return NULL;
}
void set_recalc(const BL::ID& id)
void set_recalc(const BL::ID &id)
{
b_recalc.insert(id.ptr.data);
}
@@ -643,17 +623,17 @@ public:
used_set.clear();
}
bool sync(T **r_data, const BL::ID& id)
bool sync(T **r_data, const BL::ID &id)
{
return sync(r_data, id, id, id.ptr.id.data);
}
bool sync(T **r_data, const BL::ID& id, const BL::ID& parent, const K& key)
bool sync(T **r_data, const BL::ID &id, const BL::ID &parent, const K &key)
{
T *data = find(key);
bool recalc;
if(!data) {
if (!data) {
/* add data if it didn't exist yet */
data = new T();
scene_data->push_back(data);
@@ -662,7 +642,7 @@ public:
}
else {
recalc = (b_recalc.find(id.ptr.data) != b_recalc.end());
if(parent.ptr.data)
if (parent.ptr.data)
recalc = recalc || (b_recalc.find(parent.ptr.data) != b_recalc.end());
}
@@ -672,7 +652,7 @@ public:
return recalc;
}
bool is_used(const K& key)
bool is_used(const K &key)
{
T *data = find(key);
return (data) ? used_set.find(data) != used_set.end() : false;
@@ -692,14 +672,14 @@ public:
bool post_sync(bool do_delete = true)
{
/* remove unused data */
vector<T*> new_scene_data;
typename vector<T*>::iterator it;
vector<T *> new_scene_data;
typename vector<T *>::iterator it;
bool deleted = false;
for(it = scene_data->begin(); it != scene_data->end(); it++) {
for (it = scene_data->begin(); it != scene_data->end(); it++) {
T *data = *it;
if(do_delete && used_set.find(data) == used_set.end()) {
if (do_delete && used_set.find(data) == used_set.end()) {
delete data;
deleted = true;
}
@@ -710,14 +690,14 @@ public:
*scene_data = new_scene_data;
/* update mapping */
map<K, T*> new_map;
typedef pair<const K, T*> TMapPair;
typename map<K, T*>::iterator jt;
map<K, T *> new_map;
typedef pair<const K, T *> TMapPair;
typename map<K, T *>::iterator jt;
for(jt = b_map.begin(); jt != b_map.end(); jt++) {
TMapPair& pair = *jt;
for (jt = b_map.begin(); jt != b_map.end(); jt++) {
TMapPair &pair = *jt;
if(used_set.find(pair.second) != used_set.end())
if (used_set.find(pair.second) != used_set.end())
new_map[pair.first] = pair.second;
}
@@ -728,16 +708,16 @@ public:
return deleted;
}
const map<K, T*>& key_to_scene_data()
const map<K, T *> &key_to_scene_data()
{
return b_map;
}
protected:
vector<T*> *scene_data;
map<K, T*> b_map;
set<T*> used_set;
set<void*> b_recalc;
protected:
vector<T *> *scene_data;
map<K, T *> b_map;
set<T *> used_set;
set<void *> b_recalc;
};
/* Object Key */
@@ -752,21 +732,21 @@ struct ObjectKey {
ObjectKey(void *parent_, int id_[OBJECT_PERSISTENT_ID_SIZE], void *ob_)
: parent(parent_), ob(ob_)
{
if(id_)
if (id_)
memcpy(id, id_, sizeof(id));
else
memset(id, 0, sizeof(id));
}
bool operator<(const ObjectKey& k) const
bool operator<(const ObjectKey &k) const
{
if(ob < k.ob) {
if (ob < k.ob) {
return true;
}
else if(ob == k.ob) {
if(parent < k.parent)
else if (ob == k.ob) {
if (parent < k.parent)
return true;
else if(parent == k.parent)
else if (parent == k.parent)
return memcmp(id, k.id, sizeof(id)) < 0;
}
@@ -780,54 +760,58 @@ struct ParticleSystemKey {
void *ob;
int id[OBJECT_PERSISTENT_ID_SIZE];
ParticleSystemKey(void *ob_, int id_[OBJECT_PERSISTENT_ID_SIZE])
: ob(ob_)
ParticleSystemKey(void *ob_, int id_[OBJECT_PERSISTENT_ID_SIZE]) : ob(ob_)
{
if(id_)
if (id_)
memcpy(id, id_, sizeof(id));
else
memset(id, 0, sizeof(id));
}
bool operator<(const ParticleSystemKey& k) const
bool operator<(const ParticleSystemKey &k) const
{
/* first id is particle index, we don't compare that */
if(ob < k.ob)
if (ob < k.ob)
return true;
else if(ob == k.ob)
return memcmp(id+1, k.id+1, sizeof(int)*(OBJECT_PERSISTENT_ID_SIZE-1)) < 0;
else if (ob == k.ob)
return memcmp(id + 1, k.id + 1, sizeof(int) * (OBJECT_PERSISTENT_ID_SIZE - 1)) < 0;
return false;
}
};
class EdgeMap {
public:
EdgeMap() {
public:
EdgeMap()
{
}
void clear() {
void clear()
{
edges_.clear();
}
void insert(int v0, int v1) {
void insert(int v0, int v1)
{
get_sorted_verts(v0, v1);
edges_.insert(std::pair<int, int>(v0, v1));
}
bool exists(int v0, int v1) {
bool exists(int v0, int v1)
{
get_sorted_verts(v0, v1);
return edges_.find(std::pair<int, int>(v0, v1)) != edges_.end();
}
protected:
void get_sorted_verts(int& v0, int& v1) {
if(v0 > v1) {
protected:
void get_sorted_verts(int &v0, int &v1)
{
if (v0 > v1) {
swap(v0, v1);
}
}
set< std::pair<int, int> > edges_;
set<std::pair<int, int>> edges_;
};
CCL_NAMESPACE_END

View File

@@ -27,7 +27,7 @@
#include "bvh/bvh_node.h"
#ifdef WITH_EMBREE
#include "bvh/bvh_embree.h"
# include "bvh/bvh_embree.h"
#endif
#include "util/util_foreach.h"
@@ -40,34 +40,38 @@ CCL_NAMESPACE_BEGIN
const char *bvh_layout_name(BVHLayout layout)
{
switch(layout) {
case BVH_LAYOUT_BVH2: return "BVH2";
case BVH_LAYOUT_BVH4: return "BVH4";
case BVH_LAYOUT_BVH8: return "BVH8";
case BVH_LAYOUT_NONE: return "NONE";
case BVH_LAYOUT_EMBREE: return "EMBREE";
case BVH_LAYOUT_ALL: return "ALL";
switch (layout) {
case BVH_LAYOUT_BVH2:
return "BVH2";
case BVH_LAYOUT_BVH4:
return "BVH4";
case BVH_LAYOUT_BVH8:
return "BVH8";
case BVH_LAYOUT_NONE:
return "NONE";
case BVH_LAYOUT_EMBREE:
return "EMBREE";
case BVH_LAYOUT_ALL:
return "ALL";
}
LOG(DFATAL) << "Unsupported BVH layout was passed.";
return "";
}
BVHLayout BVHParams::best_bvh_layout(BVHLayout requested_layout,
BVHLayoutMask supported_layouts)
BVHLayout BVHParams::best_bvh_layout(BVHLayout requested_layout, BVHLayoutMask supported_layouts)
{
const BVHLayoutMask requested_layout_mask = (BVHLayoutMask)requested_layout;
/* Check whether requested layout is supported, if so -- no need to do
* any extra computation.
*/
if(supported_layouts & requested_layout_mask) {
if (supported_layouts & requested_layout_mask) {
return requested_layout;
}
/* Some bit magic to get widest supported BVH layout. */
/* This is a mask of supported BVH layouts which are narrower than the
* requested one.
*/
const BVHLayoutMask allowed_layouts_mask =
(supported_layouts & (requested_layout_mask - 1));
const BVHLayoutMask allowed_layouts_mask = (supported_layouts & (requested_layout_mask - 1));
/* We get widest from allowed ones and convert mask to actual layout. */
const BVHLayoutMask widest_allowed_layout_mask = __bsr(allowed_layouts_mask);
return (BVHLayout)(1 << widest_allowed_layout_mask);
@@ -75,26 +79,25 @@ BVHLayout BVHParams::best_bvh_layout(BVHLayout requested_layout,
/* Pack Utility */
BVHStackEntry::BVHStackEntry(const BVHNode *n, int i)
: node(n), idx(i)
BVHStackEntry::BVHStackEntry(const BVHNode *n, int i) : node(n), idx(i)
{
}
int BVHStackEntry::encodeIdx() const
{
return (node->is_leaf())? ~idx: idx;
return (node->is_leaf()) ? ~idx : idx;
}
/* BVH */
BVH::BVH(const BVHParams& params_, const vector<Object*>& objects_)
: params(params_), objects(objects_)
BVH::BVH(const BVHParams &params_, const vector<Object *> &objects_)
: params(params_), objects(objects_)
{
}
BVH *BVH::create(const BVHParams& params, const vector<Object*>& objects)
BVH *BVH::create(const BVHParams &params, const vector<Object *> &objects)
{
switch(params.bvh_layout) {
switch (params.bvh_layout) {
case BVH_LAYOUT_BVH2:
return new BVH2(params, objects);
case BVH_LAYOUT_BVH4:
@@ -115,7 +118,7 @@ BVH *BVH::create(const BVHParams& params, const vector<Object*>& objects)
/* Building */
void BVH::build(Progress& progress, Stats*)
void BVH::build(Progress &progress, Stats *)
{
progress.set_substatus("Building BVH");
@@ -129,8 +132,8 @@ void BVH::build(Progress& progress, Stats*)
progress);
BVHNode *bvh2_root = bvh_build.run();
if(progress.get_cancel()) {
if(bvh2_root != NULL) {
if (progress.get_cancel()) {
if (bvh2_root != NULL) {
bvh2_root->deleteSubtree();
}
return;
@@ -139,12 +142,12 @@ void BVH::build(Progress& progress, Stats*)
/* BVH builder returns tree in a binary mode (with two children per inner
* node. Need to adopt that for a wider BVH implementations. */
BVHNode *root = widen_children_nodes(bvh2_root);
if(root != bvh2_root) {
if (root != bvh2_root) {
bvh2_root->deleteSubtree();
}
if(progress.get_cancel()) {
if(root != NULL) {
if (progress.get_cancel()) {
if (root != NULL) {
root->deleteSubtree();
}
return;
@@ -154,7 +157,7 @@ void BVH::build(Progress& progress, Stats*)
progress.set_substatus("Packing BVH triangles and strands");
pack_primitives();
if(progress.get_cancel()) {
if (progress.get_cancel()) {
root->deleteSubtree();
return;
}
@@ -169,26 +172,27 @@ void BVH::build(Progress& progress, Stats*)
/* Refitting */
void BVH::refit(Progress& progress)
void BVH::refit(Progress &progress)
{
progress.set_substatus("Packing BVH primitives");
pack_primitives();
if(progress.get_cancel()) return;
if (progress.get_cancel())
return;
progress.set_substatus("Refitting BVH nodes");
refit_nodes();
}
void BVH::refit_primitives(int start, int end, BoundBox& bbox, uint& visibility)
void BVH::refit_primitives(int start, int end, BoundBox &bbox, uint &visibility)
{
/* Refit range of primitives. */
for(int prim = start; prim < end; prim++) {
for (int prim = start; prim < end; prim++) {
int pidx = pack.prim_index[prim];
int tob = pack.prim_object[prim];
Object *ob = objects[tob];
if(pidx == -1) {
if (pidx == -1) {
/* Object instance. */
bbox.grow(ob->bounds);
}
@@ -196,9 +200,9 @@ void BVH::refit_primitives(int start, int end, BoundBox& bbox, uint& visibility)
/* Primitives. */
const Mesh *mesh = ob->mesh;
if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
/* Curves. */
int str_offset = (params.top_level)? mesh->curve_offset: 0;
int str_offset = (params.top_level) ? mesh->curve_offset : 0;
Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
@@ -207,44 +211,43 @@ void BVH::refit_primitives(int start, int end, BoundBox& bbox, uint& visibility)
visibility |= PATH_RAY_CURVE;
/* Motion curves. */
if(mesh->use_motion_blur) {
if (mesh->use_motion_blur) {
Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr) {
if (attr) {
size_t mesh_size = mesh->curve_keys.size();
size_t steps = mesh->motion_steps - 1;
float3 *key_steps = attr->data_float3();
for(size_t i = 0; i < steps; i++)
curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bbox);
for (size_t i = 0; i < steps; i++)
curve.bounds_grow(k, key_steps + i * mesh_size, &mesh->curve_radius[0], bbox);
}
}
}
else {
/* Triangles. */
int tri_offset = (params.top_level)? mesh->tri_offset: 0;
int tri_offset = (params.top_level) ? mesh->tri_offset : 0;
Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset);
const float3 *vpos = &mesh->verts[0];
triangle.bounds_grow(vpos, bbox);
/* Motion triangles. */
if(mesh->use_motion_blur) {
if (mesh->use_motion_blur) {
Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr) {
if (attr) {
size_t mesh_size = mesh->verts.size();
size_t steps = mesh->motion_steps - 1;
float3 *vert_steps = attr->data_float3();
for(size_t i = 0; i < steps; i++)
triangle.bounds_grow(vert_steps + i*mesh_size, bbox);
for (size_t i = 0; i < steps; i++)
triangle.bounds_grow(vert_steps + i * mesh_size, bbox);
}
}
}
}
visibility |= ob->visibility_for_tracing();
}
}
@@ -273,9 +276,9 @@ void BVH::pack_primitives()
const size_t tidx_size = pack.prim_index.size();
size_t num_prim_triangles = 0;
/* Count number of triangles primitives in BVH. */
for(unsigned int i = 0; i < tidx_size; i++) {
if((pack.prim_index[i] != -1)) {
if((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
for (unsigned int i = 0; i < tidx_size; i++) {
if ((pack.prim_index[i] != -1)) {
if ((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
++num_prim_triangles;
}
}
@@ -289,12 +292,12 @@ void BVH::pack_primitives()
pack.prim_visibility.resize(tidx_size);
/* Fill in all the arrays. */
size_t prim_triangle_index = 0;
for(unsigned int i = 0; i < tidx_size; i++) {
if(pack.prim_index[i] != -1) {
for (unsigned int i = 0; i < tidx_size; i++) {
if (pack.prim_index[i] != -1) {
int tob = pack.prim_object[i];
Object *ob = objects[tob];
if((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
pack_triangle(i, (float4*)&pack.prim_tri_verts[3 * prim_triangle_index]);
if ((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) {
pack_triangle(i, (float4 *)&pack.prim_tri_verts[3 * prim_triangle_index]);
pack.prim_tri_index[i] = 3 * prim_triangle_index;
++prim_triangle_index;
}
@@ -302,7 +305,7 @@ void BVH::pack_primitives()
pack.prim_tri_index[i] = -1;
}
pack.prim_visibility[i] = ob->visibility_for_tracing();
if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
if (pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
pack.prim_visibility[i] |= PATH_RAY_CURVE;
}
}
@@ -327,9 +330,9 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
/* Adjust primitive index to point to the triangle in the global array, for
* meshes with transform applied and already in the top level BVH.
*/
for(size_t i = 0; i < pack.prim_index.size(); i++)
if(pack.prim_index[i] != -1) {
if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
for (size_t i = 0; i < pack.prim_index.size(); i++)
if (pack.prim_index[i] != -1) {
if (pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset;
else
pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset;
@@ -353,14 +356,14 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
size_t pack_leaf_nodes_offset = leaf_nodes_size;
size_t object_offset = 0;
map<Mesh*, int> mesh_map;
map<Mesh *, int> mesh_map;
foreach(Object *ob, objects) {
foreach (Object *ob, objects) {
Mesh *mesh = ob->mesh;
BVH *bvh = mesh->bvh;
if(mesh->need_build_bvh()) {
if(mesh_map.find(mesh) == mesh_map.end()) {
if (mesh->need_build_bvh()) {
if (mesh_map.find(mesh) == mesh_map.end()) {
prim_index_size += bvh->pack.prim_index.size();
prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
nodes_size += bvh->pack.nodes.size();
@@ -383,37 +386,37 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
pack.leaf_nodes.resize(leaf_nodes_size);
pack.object_node.resize(objects.size());
if(params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0) {
if (params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0) {
pack.prim_time.resize(prim_index_size);
}
int *pack_prim_index = (pack.prim_index.size())? &pack.prim_index[0]: NULL;
int *pack_prim_type = (pack.prim_type.size())? &pack.prim_type[0]: NULL;
int *pack_prim_object = (pack.prim_object.size())? &pack.prim_object[0]: NULL;
uint *pack_prim_visibility = (pack.prim_visibility.size())? &pack.prim_visibility[0]: NULL;
float4 *pack_prim_tri_verts = (pack.prim_tri_verts.size())? &pack.prim_tri_verts[0]: NULL;
uint *pack_prim_tri_index = (pack.prim_tri_index.size())? &pack.prim_tri_index[0]: NULL;
int4 *pack_nodes = (pack.nodes.size())? &pack.nodes[0]: NULL;
int4 *pack_leaf_nodes = (pack.leaf_nodes.size())? &pack.leaf_nodes[0]: NULL;
float2 *pack_prim_time = (pack.prim_time.size())? &pack.prim_time[0]: NULL;
int *pack_prim_index = (pack.prim_index.size()) ? &pack.prim_index[0] : NULL;
int *pack_prim_type = (pack.prim_type.size()) ? &pack.prim_type[0] : NULL;
int *pack_prim_object = (pack.prim_object.size()) ? &pack.prim_object[0] : NULL;
uint *pack_prim_visibility = (pack.prim_visibility.size()) ? &pack.prim_visibility[0] : NULL;
float4 *pack_prim_tri_verts = (pack.prim_tri_verts.size()) ? &pack.prim_tri_verts[0] : NULL;
uint *pack_prim_tri_index = (pack.prim_tri_index.size()) ? &pack.prim_tri_index[0] : NULL;
int4 *pack_nodes = (pack.nodes.size()) ? &pack.nodes[0] : NULL;
int4 *pack_leaf_nodes = (pack.leaf_nodes.size()) ? &pack.leaf_nodes[0] : NULL;
float2 *pack_prim_time = (pack.prim_time.size()) ? &pack.prim_time[0] : NULL;
/* merge */
foreach(Object *ob, objects) {
foreach (Object *ob, objects) {
Mesh *mesh = ob->mesh;
/* We assume that if mesh doesn't need own BVH it was already included
* into a top-level BVH and no packing here is needed.
*/
if(!mesh->need_build_bvh()) {
if (!mesh->need_build_bvh()) {
pack.object_node[object_offset++] = 0;
continue;
}
/* if mesh already added once, don't add it again, but used set
* node offset for this object */
map<Mesh*, int>::iterator it = mesh_map.find(mesh);
map<Mesh *, int>::iterator it = mesh_map.find(mesh);
if(mesh_map.find(mesh) != mesh_map.end()) {
if (mesh_map.find(mesh) != mesh_map.end()) {
int noffset = it->second;
pack.object_node[object_offset++] = noffset;
continue;
@@ -427,37 +430,37 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
int mesh_curve_offset = mesh->curve_offset;
/* fill in node indexes for instances */
if(bvh->pack.root_index == -1)
pack.object_node[object_offset++] = -noffset_leaf-1;
if (bvh->pack.root_index == -1)
pack.object_node[object_offset++] = -noffset_leaf - 1;
else
pack.object_node[object_offset++] = noffset;
mesh_map[mesh] = pack.object_node[object_offset-1];
mesh_map[mesh] = pack.object_node[object_offset - 1];
/* merge primitive, object and triangle indexes */
if(bvh->pack.prim_index.size()) {
if (bvh->pack.prim_index.size()) {
size_t bvh_prim_index_size = bvh->pack.prim_index.size();
int *bvh_prim_index = &bvh->pack.prim_index[0];
int *bvh_prim_type = &bvh->pack.prim_type[0];
uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0];
uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0];
float2 *bvh_prim_time = bvh->pack.prim_time.size()? &bvh->pack.prim_time[0]: NULL;
float2 *bvh_prim_time = bvh->pack.prim_time.size() ? &bvh->pack.prim_time[0] : NULL;
for(size_t i = 0; i < bvh_prim_index_size; i++) {
if(bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
for (size_t i = 0; i < bvh_prim_index_size; i++) {
if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset;
pack_prim_tri_index[pack_prim_index_offset] = -1;
}
else {
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
pack_prim_tri_index[pack_prim_index_offset] =
bvh_prim_tri_index[i] + pack_prim_tri_verts_offset;
pack_prim_tri_index[pack_prim_index_offset] = bvh_prim_tri_index[i] +
pack_prim_tri_verts_offset;
}
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i];
pack_prim_object[pack_prim_index_offset] = 0; // unused for instances
if(bvh_prim_time != NULL) {
if (bvh_prim_time != NULL) {
pack_prim_time[pack_prim_index_offset] = bvh_prim_time[i];
}
pack_prim_index_offset++;
@@ -465,70 +468,63 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
}
/* Merge triangle vertices data. */
if(bvh->pack.prim_tri_verts.size()) {
if (bvh->pack.prim_tri_verts.size()) {
const size_t prim_tri_size = bvh->pack.prim_tri_verts.size();
memcpy(pack_prim_tri_verts + pack_prim_tri_verts_offset,
&bvh->pack.prim_tri_verts[0],
prim_tri_size*sizeof(float4));
prim_tri_size * sizeof(float4));
pack_prim_tri_verts_offset += prim_tri_size;
}
/* merge nodes */
if(bvh->pack.leaf_nodes.size()) {
if (bvh->pack.leaf_nodes.size()) {
int4 *leaf_nodes_offset = &bvh->pack.leaf_nodes[0];
size_t leaf_nodes_offset_size = bvh->pack.leaf_nodes.size();
for(size_t i = 0, j = 0;
i < leaf_nodes_offset_size;
i += BVH_NODE_LEAF_SIZE, j++)
{
for (size_t i = 0, j = 0; i < leaf_nodes_offset_size; i += BVH_NODE_LEAF_SIZE, j++) {
int4 data = leaf_nodes_offset[i];
data.x += prim_offset;
data.y += prim_offset;
pack_leaf_nodes[pack_leaf_nodes_offset] = data;
for(int j = 1; j < BVH_NODE_LEAF_SIZE; ++j) {
for (int j = 1; j < BVH_NODE_LEAF_SIZE; ++j) {
pack_leaf_nodes[pack_leaf_nodes_offset + j] = leaf_nodes_offset[i + j];
}
pack_leaf_nodes_offset += BVH_NODE_LEAF_SIZE;
}
}
if(bvh->pack.nodes.size()) {
if (bvh->pack.nodes.size()) {
int4 *bvh_nodes = &bvh->pack.nodes[0];
size_t bvh_nodes_size = bvh->pack.nodes.size();
for(size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
for (size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
size_t nsize, nsize_bbox;
if(bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
if(use_obvh) {
if (bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
if (use_obvh) {
nsize = BVH_UNALIGNED_ONODE_SIZE;
nsize_bbox = BVH_UNALIGNED_ONODE_SIZE-1;
nsize_bbox = BVH_UNALIGNED_ONODE_SIZE - 1;
}
else {
nsize = use_qbvh
? BVH_UNALIGNED_QNODE_SIZE
: BVH_UNALIGNED_NODE_SIZE;
nsize_bbox = (use_qbvh) ? BVH_UNALIGNED_QNODE_SIZE-1 : 0;
nsize = use_qbvh ? BVH_UNALIGNED_QNODE_SIZE : BVH_UNALIGNED_NODE_SIZE;
nsize_bbox = (use_qbvh) ? BVH_UNALIGNED_QNODE_SIZE - 1 : 0;
}
}
else {
if(use_obvh) {
if (use_obvh) {
nsize = BVH_ONODE_SIZE;
nsize_bbox = BVH_ONODE_SIZE-1;
nsize_bbox = BVH_ONODE_SIZE - 1;
}
else {
nsize = (use_qbvh)? BVH_QNODE_SIZE: BVH_NODE_SIZE;
nsize_bbox = (use_qbvh)? BVH_QNODE_SIZE-1 : 0;
nsize = (use_qbvh) ? BVH_QNODE_SIZE : BVH_NODE_SIZE;
nsize_bbox = (use_qbvh) ? BVH_QNODE_SIZE - 1 : 0;
}
}
memcpy(pack_nodes + pack_nodes_offset,
bvh_nodes + i,
nsize_bbox*sizeof(int4));
memcpy(pack_nodes + pack_nodes_offset, bvh_nodes + i, nsize_bbox * sizeof(int4));
/* Modify offsets into arrays */
int4 data = bvh_nodes[i + nsize_bbox];
int4 data1 = bvh_nodes[i + nsize_bbox-1];
if(use_obvh) {
int4 data1 = bvh_nodes[i + nsize_bbox - 1];
if (use_obvh) {
data.z += (data.z < 0) ? -noffset_leaf : noffset;
data.w += (data.w < 0) ? -noffset_leaf : noffset;
data.x += (data.x < 0) ? -noffset_leaf : noffset;
@@ -541,22 +537,22 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
else {
data.z += (data.z < 0) ? -noffset_leaf : noffset;
data.w += (data.w < 0) ? -noffset_leaf : noffset;
if(use_qbvh) {
data.x += (data.x < 0)? -noffset_leaf: noffset;
data.y += (data.y < 0)? -noffset_leaf: noffset;
if (use_qbvh) {
data.x += (data.x < 0) ? -noffset_leaf : noffset;
data.y += (data.y < 0) ? -noffset_leaf : noffset;
}
}
pack_nodes[pack_nodes_offset + nsize_bbox] = data;
if(use_obvh) {
if (use_obvh) {
pack_nodes[pack_nodes_offset + nsize_bbox - 1] = data1;
}
/* Usually this copies nothing, but we better
* be prepared for possible node size extension.
*/
memcpy(&pack_nodes[pack_nodes_offset + nsize_bbox+1],
&bvh_nodes[i + nsize_bbox+1],
sizeof(int4) * (nsize - (nsize_bbox+1)));
memcpy(&pack_nodes[pack_nodes_offset + nsize_bbox + 1],
&bvh_nodes[i + nsize_bbox + 1],
sizeof(int4) * (nsize - (nsize_bbox + 1)));
pack_nodes_offset += nsize;
i += nsize;

View File

@@ -73,32 +73,29 @@ struct PackedBVH {
}
};
enum BVH_TYPE {
bvh2,
bvh4,
bvh8
};
enum BVH_TYPE { bvh2, bvh4, bvh8 };
/* BVH */
class BVH
{
public:
class BVH {
public:
PackedBVH pack;
BVHParams params;
vector<Object*> objects;
vector<Object *> objects;
static BVH *create(const BVHParams& params, const vector<Object*>& objects);
virtual ~BVH() {}
static BVH *create(const BVHParams &params, const vector<Object *> &objects);
virtual ~BVH()
{
}
virtual void build(Progress& progress, Stats *stats=NULL);
void refit(Progress& progress);
virtual void build(Progress &progress, Stats *stats = NULL);
void refit(Progress &progress);
protected:
BVH(const BVHParams& params, const vector<Object*>& objects);
protected:
BVH(const BVHParams &params, const vector<Object *> &objects);
/* Refit range of primitives. */
void refit_primitives(int start, int end, BoundBox& bbox, uint& visibility);
void refit_primitives(int start, int end, BoundBox &bbox, uint &visibility);
/* triangles and strands */
void pack_primitives();
@@ -115,8 +112,7 @@ protected:
};
/* Pack Utility */
struct BVHStackEntry
{
struct BVHStackEntry {
const BVHNode *node;
int idx;

View File

@@ -25,8 +25,7 @@
CCL_NAMESPACE_BEGIN
BVH2::BVH2(const BVHParams& params_, const vector<Object*>& objects_)
: BVH(params_, objects_)
BVH2::BVH2(const BVHParams &params_, const vector<Object *> &objects_) : BVH(params_, objects_)
{
}
@@ -35,13 +34,12 @@ BVHNode *BVH2::widen_children_nodes(const BVHNode *root)
return const_cast<BVHNode *>(root);
}
void BVH2::pack_leaf(const BVHStackEntry& e,
const LeafNode *leaf)
void BVH2::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
{
assert(e.idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size());
float4 data[BVH_NODE_LEAF_SIZE];
memset(data, 0, sizeof(data));
if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
if (leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
/* object */
data[0].x = __int_as_float(~(leaf->lo));
data[0].y = __int_as_float(0);
@@ -52,48 +50,51 @@ void BVH2::pack_leaf(const BVHStackEntry& e,
data[0].y = __int_as_float(leaf->hi);
}
data[0].z = __uint_as_float(leaf->visibility);
if(leaf->num_triangles() != 0) {
if (leaf->num_triangles() != 0) {
data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
}
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_NODE_LEAF_SIZE);
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_NODE_LEAF_SIZE);
}
void BVH2::pack_inner(const BVHStackEntry& e,
const BVHStackEntry& e0,
const BVHStackEntry& e1)
void BVH2::pack_inner(const BVHStackEntry &e, const BVHStackEntry &e0, const BVHStackEntry &e1)
{
if(e0.node->is_unaligned || e1.node->is_unaligned) {
if (e0.node->is_unaligned || e1.node->is_unaligned) {
pack_unaligned_inner(e, e0, e1);
} else {
}
else {
pack_aligned_inner(e, e0, e1);
}
}
void BVH2::pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry& e0,
const BVHStackEntry& e1)
void BVH2::pack_aligned_inner(const BVHStackEntry &e,
const BVHStackEntry &e0,
const BVHStackEntry &e1)
{
pack_aligned_node(e.idx,
e0.node->bounds, e1.node->bounds,
e0.encodeIdx(), e1.encodeIdx(),
e0.node->visibility, e1.node->visibility);
e0.node->bounds,
e1.node->bounds,
e0.encodeIdx(),
e1.encodeIdx(),
e0.node->visibility,
e1.node->visibility);
}
void BVH2::pack_aligned_node(int idx,
const BoundBox& b0,
const BoundBox& b1,
int c0, int c1,
uint visibility0, uint visibility1)
const BoundBox &b0,
const BoundBox &b1,
int c0,
int c1,
uint visibility0,
uint visibility1)
{
assert(idx + BVH_NODE_SIZE <= pack.nodes.size());
assert(c0 < 0 || c0 < pack.nodes.size());
assert(c1 < 0 || c1 < pack.nodes.size());
int4 data[BVH_NODE_SIZE] = {
make_int4(visibility0 & ~PATH_RAY_NODE_UNALIGNED,
visibility1 & ~PATH_RAY_NODE_UNALIGNED,
c0, c1),
make_int4(
visibility0 & ~PATH_RAY_NODE_UNALIGNED, visibility1 & ~PATH_RAY_NODE_UNALIGNED, c0, c1),
make_int4(__float_as_int(b0.min.x),
__float_as_int(b1.min.x),
__float_as_int(b0.max.x),
@@ -108,39 +109,41 @@ void BVH2::pack_aligned_node(int idx,
__float_as_int(b1.max.z)),
};
memcpy(&pack.nodes[idx], data, sizeof(int4)*BVH_NODE_SIZE);
memcpy(&pack.nodes[idx], data, sizeof(int4) * BVH_NODE_SIZE);
}
void BVH2::pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry& e0,
const BVHStackEntry& e1)
void BVH2::pack_unaligned_inner(const BVHStackEntry &e,
const BVHStackEntry &e0,
const BVHStackEntry &e1)
{
pack_unaligned_node(e.idx,
e0.node->get_aligned_space(),
e1.node->get_aligned_space(),
e0.node->bounds,
e1.node->bounds,
e0.encodeIdx(), e1.encodeIdx(),
e0.node->visibility, e1.node->visibility);
e0.encodeIdx(),
e1.encodeIdx(),
e0.node->visibility,
e1.node->visibility);
}
void BVH2::pack_unaligned_node(int idx,
const Transform& aligned_space0,
const Transform& aligned_space1,
const BoundBox& bounds0,
const BoundBox& bounds1,
int c0, int c1,
uint visibility0, uint visibility1)
const Transform &aligned_space0,
const Transform &aligned_space1,
const BoundBox &bounds0,
const BoundBox &bounds1,
int c0,
int c1,
uint visibility0,
uint visibility1)
{
assert(idx + BVH_UNALIGNED_NODE_SIZE <= pack.nodes.size());
assert(c0 < 0 || c0 < pack.nodes.size());
assert(c1 < 0 || c1 < pack.nodes.size());
float4 data[BVH_UNALIGNED_NODE_SIZE];
Transform space0 = BVHUnaligned::compute_node_transform(bounds0,
aligned_space0);
Transform space1 = BVHUnaligned::compute_node_transform(bounds1,
aligned_space1);
Transform space0 = BVHUnaligned::compute_node_transform(bounds0, aligned_space0);
Transform space1 = BVHUnaligned::compute_node_transform(bounds1, aligned_space1);
data[0] = make_float4(__int_as_float(visibility0 | PATH_RAY_NODE_UNALIGNED),
__int_as_float(visibility1 | PATH_RAY_NODE_UNALIGNED),
__int_as_float(c0),
@@ -153,7 +156,7 @@ void BVH2::pack_unaligned_node(int idx,
data[5] = space1.y;
data[6] = space1.z;
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_NODE_SIZE);
memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_UNALIGNED_NODE_SIZE);
}
void BVH2::pack_nodes(const BVHNode *root)
@@ -163,9 +166,8 @@ void BVH2::pack_nodes(const BVHNode *root)
assert(num_leaf_nodes <= num_nodes);
const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
size_t node_size;
if(params.use_unaligned_nodes) {
const size_t num_unaligned_nodes =
root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
if (params.use_unaligned_nodes) {
const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
node_size = (num_unaligned_nodes * BVH_UNALIGNED_NODE_SIZE) +
(num_inner_nodes - num_unaligned_nodes) * BVH_NODE_SIZE;
}
@@ -176,60 +178,58 @@ void BVH2::pack_nodes(const BVHNode *root)
pack.nodes.clear();
pack.leaf_nodes.clear();
/* For top level BVH, first merge existing BVH's so we know the offsets. */
if(params.top_level) {
pack_instances(node_size, num_leaf_nodes*BVH_NODE_LEAF_SIZE);
if (params.top_level) {
pack_instances(node_size, num_leaf_nodes * BVH_NODE_LEAF_SIZE);
}
else {
pack.nodes.resize(node_size);
pack.leaf_nodes.resize(num_leaf_nodes*BVH_NODE_LEAF_SIZE);
pack.leaf_nodes.resize(num_leaf_nodes * BVH_NODE_LEAF_SIZE);
}
int nextNodeIdx = 0, nextLeafNodeIdx = 0;
vector<BVHStackEntry> stack;
stack.reserve(BVHParams::MAX_DEPTH*2);
if(root->is_leaf()) {
stack.reserve(BVHParams::MAX_DEPTH * 2);
if (root->is_leaf()) {
stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
}
else {
stack.push_back(BVHStackEntry(root, nextNodeIdx));
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_NODE_SIZE
: BVH_NODE_SIZE;
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_NODE_SIZE : BVH_NODE_SIZE;
}
while(stack.size()) {
while (stack.size()) {
BVHStackEntry e = stack.back();
stack.pop_back();
if(e.node->is_leaf()) {
if (e.node->is_leaf()) {
/* leaf node */
const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node);
const LeafNode *leaf = reinterpret_cast<const LeafNode *>(e.node);
pack_leaf(e, leaf);
}
else {
/* inner node */
int idx[2];
for(int i = 0; i < 2; ++i) {
if(e.node->get_child(i)->is_leaf()) {
for (int i = 0; i < 2; ++i) {
if (e.node->get_child(i)->is_leaf()) {
idx[i] = nextLeafNodeIdx++;
}
else {
idx[i] = nextNodeIdx;
nextNodeIdx += e.node->get_child(i)->has_unaligned()
? BVH_UNALIGNED_NODE_SIZE
: BVH_NODE_SIZE;
nextNodeIdx += e.node->get_child(i)->has_unaligned() ? BVH_UNALIGNED_NODE_SIZE :
BVH_NODE_SIZE;
}
}
stack.push_back(BVHStackEntry(e.node->get_child(0), idx[0]));
stack.push_back(BVHStackEntry(e.node->get_child(1), idx[1]));
pack_inner(e, stack[stack.size()-2], stack[stack.size()-1]);
pack_inner(e, stack[stack.size() - 2], stack[stack.size() - 1]);
}
}
assert(node_size == nextNodeIdx);
/* root index to start traversal at, to handle case of single leaf node */
pack.root_index = (root->is_leaf())? -1: 0;
pack.root_index = (root->is_leaf()) ? -1 : 0;
}
void BVH2::refit_nodes()
@@ -238,12 +238,12 @@ void BVH2::refit_nodes()
BoundBox bbox = BoundBox::empty;
uint visibility = 0;
refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility);
refit_node(0, (pack.root_index == -1) ? true : false, bbox, visibility);
}
void BVH2::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
void BVH2::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
{
if(leaf) {
if (leaf) {
/* refit leaf node */
assert(idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size());
const int4 *data = &pack.leaf_nodes[idx];
@@ -258,7 +258,7 @@ void BVH2::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
leaf_data[0].y = __int_as_float(c1);
leaf_data[0].z = __uint_as_float(visibility);
leaf_data[0].w = __uint_as_float(data[0].w);
memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_NODE_LEAF_SIZE);
memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_NODE_LEAF_SIZE);
}
else {
assert(idx + BVH_NODE_SIZE <= pack.nodes.size());
@@ -271,29 +271,21 @@ void BVH2::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
BoundBox bbox0 = BoundBox::empty, bbox1 = BoundBox::empty;
uint visibility0 = 0, visibility1 = 0;
refit_node((c0 < 0)? -c0-1: c0, (c0 < 0), bbox0, visibility0);
refit_node((c1 < 0)? -c1-1: c1, (c1 < 0), bbox1, visibility1);
refit_node((c0 < 0) ? -c0 - 1 : c0, (c0 < 0), bbox0, visibility0);
refit_node((c1 < 0) ? -c1 - 1 : c1, (c1 < 0), bbox1, visibility1);
if(is_unaligned) {
if (is_unaligned) {
Transform aligned_space = transform_identity();
pack_unaligned_node(idx,
aligned_space, aligned_space,
bbox0, bbox1,
c0, c1,
visibility0,
visibility1);
pack_unaligned_node(
idx, aligned_space, aligned_space, bbox0, bbox1, c0, c1, visibility0, visibility1);
}
else {
pack_aligned_node(idx,
bbox0, bbox1,
c0, c1,
visibility0,
visibility1);
pack_aligned_node(idx, bbox0, bbox1, c0, c1, visibility0, visibility1);
}
bbox.grow(bbox0);
bbox.grow(bbox1);
visibility = visibility0|visibility1;
visibility = visibility0 | visibility1;
}
}

View File

@@ -43,10 +43,10 @@ class Progress;
* Typical BVH with each node having two children.
*/
class BVH2 : public BVH {
protected:
protected:
/* constructor */
friend class BVH;
BVH2(const BVHParams& params, const vector<Object*>& objects);
BVH2(const BVHParams &params, const vector<Object *> &objects);
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
@@ -54,35 +54,36 @@ protected:
/* pack */
void pack_nodes(const BVHNode *root) override;
void pack_leaf(const BVHStackEntry& e,
const LeafNode *leaf);
void pack_inner(const BVHStackEntry& e,
const BVHStackEntry& e0,
const BVHStackEntry& e1);
void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry &e, const BVHStackEntry &e0, const BVHStackEntry &e1);
void pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry& e0,
const BVHStackEntry& e1);
void pack_aligned_inner(const BVHStackEntry &e,
const BVHStackEntry &e0,
const BVHStackEntry &e1);
void pack_aligned_node(int idx,
const BoundBox& b0,
const BoundBox& b1,
int c0, int c1,
uint visibility0, uint visibility1);
const BoundBox &b0,
const BoundBox &b1,
int c0,
int c1,
uint visibility0,
uint visibility1);
void pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry& e0,
const BVHStackEntry& e1);
void pack_unaligned_inner(const BVHStackEntry &e,
const BVHStackEntry &e0,
const BVHStackEntry &e1);
void pack_unaligned_node(int idx,
const Transform& aligned_space0,
const Transform& aligned_space1,
const BoundBox& b0,
const BoundBox& b1,
int c0, int c1,
uint visibility0, uint visibility1);
const Transform &aligned_space0,
const Transform &aligned_space1,
const BoundBox &b0,
const BoundBox &b1,
int c0,
int c1,
uint visibility0,
uint visibility1);
/* refit */
void refit_nodes() override;
void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility);
void refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility);
};
CCL_NAMESPACE_END

View File

@@ -31,8 +31,7 @@ CCL_NAMESPACE_BEGIN
* life easier all over the place.
*/
BVH4::BVH4(const BVHParams& params_, const vector<Object*>& objects_)
: BVH(params_, objects_)
BVH4::BVH4(const BVHParams &params_, const vector<Object *> &objects_) : BVH(params_, objects_)
{
params.bvh_layout = BVH_LAYOUT_BVH4;
}
@@ -41,7 +40,7 @@ namespace {
BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
{
if(node->is_leaf()) {
if (node->is_leaf()) {
return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
}
/* Collect nodes of one layer deeper, allowing us to have more childrem in
@@ -51,14 +50,14 @@ BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
const BVHNode *child0 = node->get_child(0);
const BVHNode *child1 = node->get_child(1);
int num_children = 0;
if(child0->is_leaf()) {
if (child0->is_leaf()) {
children[num_children++] = child0;
}
else {
children[num_children++] = child0->get_child(0);
children[num_children++] = child0->get_child(1);
}
if(child1->is_leaf()) {
if (child1->is_leaf()) {
children[num_children++] = child1;
}
else {
@@ -67,7 +66,7 @@ BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
}
/* Merge children in subtrees. */
BVHNode *children4[4];
for(int i = 0; i < num_children; ++i) {
for (int i = 0; i < num_children; ++i) {
children4[i] = bvh_node_merge_children_recursively(children[i]);
}
/* Allocate new node. */
@@ -75,7 +74,7 @@ BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
/* TODO(sergey): Consider doing this from the InnerNode() constructor.
* But in order to do this nicely need to think of how to pass all the
* parameters there. */
if(node->is_unaligned) {
if (node->is_unaligned) {
node4->is_unaligned = true;
node4->aligned_space = new Transform();
*node4->aligned_space = *node->aligned_space;
@@ -87,10 +86,10 @@ BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
BVHNode *BVH4::widen_children_nodes(const BVHNode *root)
{
if(root == NULL) {
if (root == NULL) {
return NULL;
}
if(root->is_leaf()) {
if (root->is_leaf()) {
return const_cast<BVHNode *>(root);
}
BVHNode *root4 = bvh_node_merge_children_recursively(root);
@@ -99,11 +98,11 @@ BVHNode *BVH4::widen_children_nodes(const BVHNode *root)
return root4;
}
void BVH4::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
void BVH4::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
{
float4 data[BVH_QNODE_LEAF_SIZE];
memset(data, 0, sizeof(data));
if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
if (leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
/* object */
data[0].x = __int_as_float(~(leaf->lo));
data[0].y = __int_as_float(0);
@@ -114,30 +113,28 @@ void BVH4::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
data[0].y = __int_as_float(leaf->hi);
}
data[0].z = __uint_as_float(leaf->visibility);
if(leaf->num_triangles() != 0) {
if (leaf->num_triangles() != 0) {
data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
}
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_QNODE_LEAF_SIZE);
}
void BVH4::pack_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH4::pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
bool has_unaligned = false;
/* Check whether we have to create unaligned node or all nodes are aligned
* and we can cut some corner here.
*/
if(params.use_unaligned_nodes) {
for(int i = 0; i < num; i++) {
if(en[i].node->is_unaligned) {
if (params.use_unaligned_nodes) {
for (int i = 0; i < num; i++) {
if (en[i].node->is_unaligned) {
has_unaligned = true;
break;
}
}
}
if(has_unaligned) {
if (has_unaligned) {
/* There's no unaligned children, pack into AABB node. */
pack_unaligned_inner(e, en, num);
}
@@ -149,23 +146,16 @@ void BVH4::pack_inner(const BVHStackEntry& e,
}
}
void BVH4::pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH4::pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
BoundBox bounds[4];
int child[4];
for(int i = 0; i < num; ++i) {
for (int i = 0; i < num; ++i) {
bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx();
}
pack_aligned_node(e.idx,
bounds,
child,
e.node->visibility,
e.node->time_from,
e.node->time_to,
num);
pack_aligned_node(
e.idx, bounds, child, e.node->visibility, e.node->time_from, e.node->time_to, num);
}
void BVH4::pack_aligned_node(int idx,
@@ -183,7 +173,7 @@ void BVH4::pack_aligned_node(int idx,
data[0].y = time_from;
data[0].z = time_to;
for(int i = 0; i < num; i++) {
for (int i = 0; i < num; i++) {
float3 bb_min = bounds[i].min;
float3 bb_max = bounds[i].max;
@@ -197,7 +187,7 @@ void BVH4::pack_aligned_node(int idx,
data[7][i] = __int_as_float(child[i]);
}
for(int i = num; i < 4; i++) {
for (int i = num; i < 4; i++) {
/* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes.
*/
@@ -213,17 +203,15 @@ void BVH4::pack_aligned_node(int idx,
data[7][i] = __int_as_float(0);
}
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_QNODE_SIZE);
memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_QNODE_SIZE);
}
void BVH4::pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH4::pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
Transform aligned_space[4];
BoundBox bounds[4];
int child[4];
for(int i = 0; i < num; ++i) {
for (int i = 0; i < num; ++i) {
aligned_space[i] = en[i].node->get_aligned_space();
bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx();
@@ -254,10 +242,8 @@ void BVH4::pack_unaligned_node(int idx,
data[0].y = time_from;
data[0].z = time_to;
for(int i = 0; i < num; i++) {
Transform space = BVHUnaligned::compute_node_transform(
bounds[i],
aligned_space[i]);
for (int i = 0; i < num; i++) {
Transform space = BVHUnaligned::compute_node_transform(bounds[i], aligned_space[i]);
data[1][i] = space.x.x;
data[2][i] = space.x.y;
@@ -278,7 +264,7 @@ void BVH4::pack_unaligned_node(int idx,
data[13][i] = __int_as_float(child[i]);
}
for(int i = num; i < 4; i++) {
for (int i = num; i < 4; i++) {
/* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes.
*/
@@ -302,7 +288,7 @@ void BVH4::pack_unaligned_node(int idx,
data[13][i] = __int_as_float(0);
}
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_QNODE_SIZE);
memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_UNALIGNED_QNODE_SIZE);
}
/* Quad SIMD Nodes */
@@ -315,9 +301,8 @@ void BVH4::pack_nodes(const BVHNode *root)
assert(num_leaf_nodes <= num_nodes);
const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
size_t node_size;
if(params.use_unaligned_nodes) {
const size_t num_unaligned_nodes =
root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
if (params.use_unaligned_nodes) {
const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
node_size = (num_unaligned_nodes * BVH_UNALIGNED_QNODE_SIZE) +
(num_inner_nodes - num_unaligned_nodes) * BVH_QNODE_SIZE;
}
@@ -328,34 +313,33 @@ void BVH4::pack_nodes(const BVHNode *root)
pack.nodes.clear();
pack.leaf_nodes.clear();
/* For top level BVH, first merge existing BVH's so we know the offsets. */
if(params.top_level) {
pack_instances(node_size, num_leaf_nodes*BVH_QNODE_LEAF_SIZE);
if (params.top_level) {
pack_instances(node_size, num_leaf_nodes * BVH_QNODE_LEAF_SIZE);
}
else {
pack.nodes.resize(node_size);
pack.leaf_nodes.resize(num_leaf_nodes*BVH_QNODE_LEAF_SIZE);
pack.leaf_nodes.resize(num_leaf_nodes * BVH_QNODE_LEAF_SIZE);
}
int nextNodeIdx = 0, nextLeafNodeIdx = 0;
vector<BVHStackEntry> stack;
stack.reserve(BVHParams::MAX_DEPTH*2);
if(root->is_leaf()) {
stack.reserve(BVHParams::MAX_DEPTH * 2);
if (root->is_leaf()) {
stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
}
else {
stack.push_back(BVHStackEntry(root, nextNodeIdx));
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE
: BVH_QNODE_SIZE;
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE : BVH_QNODE_SIZE;
}
while(stack.size()) {
while (stack.size()) {
BVHStackEntry e = stack.back();
stack.pop_back();
if(e.node->is_leaf()) {
if (e.node->is_leaf()) {
/* leaf node */
const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node);
const LeafNode *leaf = reinterpret_cast<const LeafNode *>(e.node);
pack_leaf(e, leaf);
}
else {
@@ -364,18 +348,16 @@ void BVH4::pack_nodes(const BVHNode *root)
const BVHNode *children[4];
const int num_children = e.node->num_children();
/* Push entries on the stack. */
for(int i = 0; i < num_children; ++i) {
for (int i = 0; i < num_children; ++i) {
int idx;
children[i] = e.node->get_child(i);
assert(children[i] != NULL);
if(children[i]->is_leaf()) {
if (children[i]->is_leaf()) {
idx = nextLeafNodeIdx++;
}
else {
idx = nextNodeIdx;
nextNodeIdx += children[i]->has_unaligned()
? BVH_UNALIGNED_QNODE_SIZE
: BVH_QNODE_SIZE;
nextNodeIdx += children[i]->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE : BVH_QNODE_SIZE;
}
stack.push_back(BVHStackEntry(children[i], idx));
}
@@ -386,7 +368,7 @@ void BVH4::pack_nodes(const BVHNode *root)
assert(node_size == nextNodeIdx);
/* Root index to start traversal at, to handle case of single leaf node. */
pack.root_index = (root->is_leaf())? -1: 0;
pack.root_index = (root->is_leaf()) ? -1 : 0;
}
void BVH4::refit_nodes()
@@ -395,12 +377,12 @@ void BVH4::refit_nodes()
BoundBox bbox = BoundBox::empty;
uint visibility = 0;
refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility);
refit_node(0, (pack.root_index == -1) ? true : false, bbox, visibility);
}
void BVH4::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
void BVH4::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
{
if(leaf) {
if (leaf) {
/* Refit leaf node. */
int4 *data = &pack.leaf_nodes[idx];
int4 c = data[0];
@@ -422,58 +404,40 @@ void BVH4::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
leaf_data[0].y = __int_as_float(c.y);
leaf_data[0].z = __uint_as_float(visibility);
leaf_data[0].w = __uint_as_float(c.w);
memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_QNODE_LEAF_SIZE);
memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_QNODE_LEAF_SIZE);
}
else {
int4 *data = &pack.nodes[idx];
bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
int4 c;
if(is_unaligned) {
if (is_unaligned) {
c = data[13];
}
else {
c = data[7];
}
/* Refit inner node, set bbox from children. */
BoundBox child_bbox[4] = {BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty};
BoundBox child_bbox[4] = {BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty};
uint child_visibility[4] = {0};
int num_nodes = 0;
for(int i = 0; i < 4; ++i) {
if(c[i] != 0) {
refit_node((c[i] < 0)? -c[i]-1: c[i], (c[i] < 0),
child_bbox[i], child_visibility[i]);
for (int i = 0; i < 4; ++i) {
if (c[i] != 0) {
refit_node((c[i] < 0) ? -c[i] - 1 : c[i], (c[i] < 0), child_bbox[i], child_visibility[i]);
++num_nodes;
bbox.grow(child_bbox[i]);
visibility |= child_visibility[i];
}
}
if(is_unaligned) {
Transform aligned_space[4] = {transform_identity(),
transform_identity(),
transform_identity(),
transform_identity()};
pack_unaligned_node(idx,
aligned_space,
child_bbox,
&c[0],
visibility,
0.0f,
1.0f,
num_nodes);
if (is_unaligned) {
Transform aligned_space[4] = {
transform_identity(), transform_identity(), transform_identity(), transform_identity()};
pack_unaligned_node(
idx, aligned_space, child_bbox, &c[0], visibility, 0.0f, 1.0f, num_nodes);
}
else {
pack_aligned_node(idx,
child_bbox,
&c[0],
visibility,
0.0f,
1.0f,
num_nodes);
pack_aligned_node(idx, child_bbox, &c[0], visibility, 0.0f, 1.0f, num_nodes);
}
}
}

View File

@@ -43,10 +43,10 @@ class Progress;
* Quad BVH, with each node having four children, to use with SIMD instructions.
*/
class BVH4 : public BVH {
protected:
protected:
/* constructor */
friend class BVH;
BVH4(const BVHParams& params, const vector<Object*>& objects);
BVH4(const BVHParams &params, const vector<Object *> &objects);
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
@@ -54,12 +54,10 @@ protected:
/* pack */
void pack_nodes(const BVHNode *root) override;
void pack_leaf(const BVHStackEntry& e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry& e, const BVHStackEntry *en, int num);
void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num);
void pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_aligned_node(int idx,
const BoundBox *bounds,
const int *child,
@@ -68,9 +66,7 @@ protected:
const float time_to,
const int num);
void pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num);
void pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_unaligned_node(int idx,
const Transform *aligned_space,
const BoundBox *bounds,
@@ -82,7 +78,7 @@ protected:
/* refit */
void refit_nodes() override;
void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility);
void refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility);
};
CCL_NAMESPACE_END

View File

@@ -36,8 +36,7 @@
CCL_NAMESPACE_BEGIN
BVH8::BVH8(const BVHParams& params_, const vector<Object*>& objects_)
: BVH(params_, objects_)
BVH8::BVH8(const BVHParams &params_, const vector<Object *> &objects_) : BVH(params_, objects_)
{
}
@@ -45,7 +44,7 @@ namespace {
BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
{
if(node->is_leaf()) {
if (node->is_leaf()) {
return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
}
/* Collect nodes of two layer deeper, allowing us to have more childrem in
@@ -55,20 +54,19 @@ BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
const BVHNode *child0 = node->get_child(0);
const BVHNode *child1 = node->get_child(1);
int num_children = 0;
if(child0->is_leaf()) {
if (child0->is_leaf()) {
children[num_children++] = child0;
}
else {
const BVHNode *child00 = child0->get_child(0),
*child01 = child0->get_child(1);
if(child00->is_leaf()) {
const BVHNode *child00 = child0->get_child(0), *child01 = child0->get_child(1);
if (child00->is_leaf()) {
children[num_children++] = child00;
}
else {
children[num_children++] = child00->get_child(0);
children[num_children++] = child00->get_child(1);
}
if(child01->is_leaf()) {
if (child01->is_leaf()) {
children[num_children++] = child01;
}
else {
@@ -76,20 +74,19 @@ BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
children[num_children++] = child01->get_child(1);
}
}
if(child1->is_leaf()) {
if (child1->is_leaf()) {
children[num_children++] = child1;
}
else {
const BVHNode *child10 = child1->get_child(0),
*child11 = child1->get_child(1);
if(child10->is_leaf()) {
const BVHNode *child10 = child1->get_child(0), *child11 = child1->get_child(1);
if (child10->is_leaf()) {
children[num_children++] = child10;
}
else {
children[num_children++] = child10->get_child(0);
children[num_children++] = child10->get_child(1);
}
if(child11->is_leaf()) {
if (child11->is_leaf()) {
children[num_children++] = child11;
}
else {
@@ -99,7 +96,7 @@ BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
}
/* Merge children in subtrees. */
BVHNode *children4[8];
for(int i = 0; i < num_children; ++i) {
for (int i = 0; i < num_children; ++i) {
children4[i] = bvh_node_merge_children_recursively(children[i]);
}
/* Allocate new node. */
@@ -107,7 +104,7 @@ BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
/* TODO(sergey): Consider doing this from the InnerNode() constructor.
* But in order to do this nicely need to think of how to pass all the
* parameters there. */
if(node->is_unaligned) {
if (node->is_unaligned) {
node8->is_unaligned = true;
node8->aligned_space = new Transform();
*node8->aligned_space = *node->aligned_space;
@@ -119,10 +116,10 @@ BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
BVHNode *BVH8::widen_children_nodes(const BVHNode *root)
{
if(root == NULL) {
if (root == NULL) {
return NULL;
}
if(root->is_leaf()) {
if (root->is_leaf()) {
return const_cast<BVHNode *>(root);
}
BVHNode *root8 = bvh_node_merge_children_recursively(root);
@@ -131,11 +128,11 @@ BVHNode *BVH8::widen_children_nodes(const BVHNode *root)
return root8;
}
void BVH8::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
void BVH8::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
{
float4 data[BVH_ONODE_LEAF_SIZE];
memset(data, 0, sizeof(data));
if(leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
if (leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
/* object */
data[0].x = __int_as_float(~(leaf->lo));
data[0].y = __int_as_float(0);
@@ -146,30 +143,28 @@ void BVH8::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
data[0].y = __int_as_float(leaf->hi);
}
data[0].z = __uint_as_float(leaf->visibility);
if(leaf->num_triangles() != 0) {
if (leaf->num_triangles() != 0) {
data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
}
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4)*BVH_ONODE_LEAF_SIZE);
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_ONODE_LEAF_SIZE);
}
void BVH8::pack_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH8::pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
bool has_unaligned = false;
/* Check whether we have to create unaligned node or all nodes are aligned
* and we can cut some corner here.
*/
if(params.use_unaligned_nodes) {
for(int i = 0; i < num; i++) {
if(en[i].node->is_unaligned) {
if (params.use_unaligned_nodes) {
for (int i = 0; i < num; i++) {
if (en[i].node->is_unaligned) {
has_unaligned = true;
break;
}
}
}
if(has_unaligned) {
if (has_unaligned) {
/* There's no unaligned children, pack into AABB node. */
pack_unaligned_inner(e, en, num);
}
@@ -181,23 +176,16 @@ void BVH8::pack_inner(const BVHStackEntry& e,
}
}
void BVH8::pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH8::pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
BoundBox bounds[8];
int child[8];
for(int i = 0; i < num; ++i) {
for (int i = 0; i < num; ++i) {
bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx();
}
pack_aligned_node(e.idx,
bounds,
child,
e.node->visibility,
e.node->time_from,
e.node->time_to,
num);
pack_aligned_node(
e.idx, bounds, child, e.node->visibility, e.node->time_from, e.node->time_to, num);
}
void BVH8::pack_aligned_node(int idx,
@@ -215,7 +203,7 @@ void BVH8::pack_aligned_node(int idx,
data[0].b = time_from;
data[0].c = time_to;
for(int i = 0; i < num; i++) {
for (int i = 0; i < num; i++) {
float3 bb_min = bounds[i].min;
float3 bb_max = bounds[i].max;
@@ -229,7 +217,7 @@ void BVH8::pack_aligned_node(int idx,
data[7][i] = __int_as_float(child[i]);
}
for(int i = num; i < 8; i++) {
for (int i = num; i < 8; i++) {
/* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes.
*/
@@ -245,17 +233,15 @@ void BVH8::pack_aligned_node(int idx,
data[7][i] = __int_as_float(0);
}
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_ONODE_SIZE);
memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_ONODE_SIZE);
}
void BVH8::pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH8::pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
Transform aligned_space[8];
BoundBox bounds[8];
int child[8];
for(int i = 0; i < num; ++i) {
for (int i = 0; i < num; ++i) {
aligned_space[i] = en[i].node->get_aligned_space();
bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx();
@@ -286,10 +272,8 @@ void BVH8::pack_unaligned_node(int idx,
data[0].b = time_from;
data[0].c = time_to;
for(int i = 0; i < num; i++) {
Transform space = BVHUnaligned::compute_node_transform(
bounds[i],
aligned_space[i]);
for (int i = 0; i < num; i++) {
Transform space = BVHUnaligned::compute_node_transform(bounds[i], aligned_space[i]);
data[1][i] = space.x.x;
data[2][i] = space.x.y;
@@ -310,7 +294,7 @@ void BVH8::pack_unaligned_node(int idx,
data[13][i] = __int_as_float(child[i]);
}
for(int i = num; i < 8; i++) {
for (int i = num; i < 8; i++) {
/* We store BB which would never be recorded as intersection
* so kernel might safely assume there are always 4 child nodes.
*/
@@ -334,7 +318,7 @@ void BVH8::pack_unaligned_node(int idx,
data[13][i] = __int_as_float(0);
}
memcpy(&pack.nodes[idx], data, sizeof(float4)*BVH_UNALIGNED_ONODE_SIZE);
memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_UNALIGNED_ONODE_SIZE);
}
/* Quad SIMD Nodes */
@@ -347,9 +331,8 @@ void BVH8::pack_nodes(const BVHNode *root)
assert(num_leaf_nodes <= num_nodes);
const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
size_t node_size;
if(params.use_unaligned_nodes) {
const size_t num_unaligned_nodes =
root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
if (params.use_unaligned_nodes) {
const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
node_size = (num_unaligned_nodes * BVH_UNALIGNED_ONODE_SIZE) +
(num_inner_nodes - num_unaligned_nodes) * BVH_ONODE_SIZE;
}
@@ -360,34 +343,33 @@ void BVH8::pack_nodes(const BVHNode *root)
pack.nodes.clear();
pack.leaf_nodes.clear();
/* For top level BVH, first merge existing BVH's so we know the offsets. */
if(params.top_level) {
pack_instances(node_size, num_leaf_nodes*BVH_ONODE_LEAF_SIZE);
if (params.top_level) {
pack_instances(node_size, num_leaf_nodes * BVH_ONODE_LEAF_SIZE);
}
else {
pack.nodes.resize(node_size);
pack.leaf_nodes.resize(num_leaf_nodes*BVH_ONODE_LEAF_SIZE);
pack.leaf_nodes.resize(num_leaf_nodes * BVH_ONODE_LEAF_SIZE);
}
int nextNodeIdx = 0, nextLeafNodeIdx = 0;
vector<BVHStackEntry> stack;
stack.reserve(BVHParams::MAX_DEPTH*2);
if(root->is_leaf()) {
stack.reserve(BVHParams::MAX_DEPTH * 2);
if (root->is_leaf()) {
stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
}
else {
stack.push_back(BVHStackEntry(root, nextNodeIdx));
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE
: BVH_ONODE_SIZE;
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE : BVH_ONODE_SIZE;
}
while(stack.size()) {
while (stack.size()) {
BVHStackEntry e = stack.back();
stack.pop_back();
if(e.node->is_leaf()) {
if (e.node->is_leaf()) {
/* leaf node */
const LeafNode *leaf = reinterpret_cast<const LeafNode*>(e.node);
const LeafNode *leaf = reinterpret_cast<const LeafNode *>(e.node);
pack_leaf(e, leaf);
}
else {
@@ -396,17 +378,15 @@ void BVH8::pack_nodes(const BVHNode *root)
const BVHNode *children[8];
int num_children = e.node->num_children();
/* Push entries on the stack. */
for(int i = 0; i < num_children; ++i) {
for (int i = 0; i < num_children; ++i) {
int idx;
children[i] = e.node->get_child(i);
if(children[i]->is_leaf()) {
if (children[i]->is_leaf()) {
idx = nextLeafNodeIdx++;
}
else {
idx = nextNodeIdx;
nextNodeIdx += children[i]->has_unaligned()
? BVH_UNALIGNED_ONODE_SIZE
: BVH_ONODE_SIZE;
nextNodeIdx += children[i]->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE : BVH_ONODE_SIZE;
}
stack.push_back(BVHStackEntry(children[i], idx));
}
@@ -426,21 +406,21 @@ void BVH8::refit_nodes()
BoundBox bbox = BoundBox::empty;
uint visibility = 0;
refit_node(0, (pack.root_index == -1)? true: false, bbox, visibility);
refit_node(0, (pack.root_index == -1) ? true : false, bbox, visibility);
}
void BVH8::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
void BVH8::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
{
if(leaf) {
if (leaf) {
int4 *data = &pack.leaf_nodes[idx];
int4 c = data[0];
/* Refit leaf node. */
for(int prim = c.x; prim < c.y; prim++) {
for (int prim = c.x; prim < c.y; prim++) {
int pidx = pack.prim_index[prim];
int tob = pack.prim_object[prim];
Object *ob = objects[tob];
if(pidx == -1) {
if (pidx == -1) {
/* Object instance. */
bbox.grow(ob->bounds);
}
@@ -448,7 +428,7 @@ void BVH8::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
/* Primitives. */
const Mesh *mesh = ob->mesh;
if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
/* Curves. */
int str_offset = (params.top_level) ? mesh->curve_offset : 0;
Mesh::Curve curve = mesh->get_curve(pidx - str_offset);
@@ -459,16 +439,16 @@ void BVH8::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
visibility |= PATH_RAY_CURVE;
/* Motion curves. */
if(mesh->use_motion_blur) {
if (mesh->use_motion_blur) {
Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr) {
if (attr) {
size_t mesh_size = mesh->curve_keys.size();
size_t steps = mesh->motion_steps - 1;
float3 *key_steps = attr->data_float3();
for(size_t i = 0; i < steps; i++) {
curve.bounds_grow(k, key_steps + i*mesh_size, &mesh->curve_radius[0], bbox);
for (size_t i = 0; i < steps; i++) {
curve.bounds_grow(k, key_steps + i * mesh_size, &mesh->curve_radius[0], bbox);
}
}
}
@@ -482,16 +462,16 @@ void BVH8::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
triangle.bounds_grow(vpos, bbox);
/* Motion triangles. */
if(mesh->use_motion_blur) {
if (mesh->use_motion_blur) {
Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr) {
if (attr) {
size_t mesh_size = mesh->verts.size();
size_t steps = mesh->motion_steps - 1;
float3 *vert_steps = attr->data_float3();
for(size_t i = 0; i < steps; i++) {
triangle.bounds_grow(vert_steps + i*mesh_size, bbox);
for (size_t i = 0; i < steps; i++) {
triangle.bounds_grow(vert_steps + i * mesh_size, bbox);
}
}
}
@@ -506,54 +486,52 @@ void BVH8::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
leaf_data[0].y = __int_as_float(c.y);
leaf_data[0].z = __uint_as_float(visibility);
leaf_data[0].w = __uint_as_float(c.w);
memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4)*BVH_ONODE_LEAF_SIZE);
memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_ONODE_LEAF_SIZE);
}
else {
float8 *data = (float8*)&pack.nodes[idx];
float8 *data = (float8 *)&pack.nodes[idx];
bool is_unaligned = (__float_as_uint(data[0].a) & PATH_RAY_NODE_UNALIGNED) != 0;
/* Refit inner node, set bbox from children. */
BoundBox child_bbox[8] = { BoundBox::empty, BoundBox::empty,
BoundBox::empty, BoundBox::empty,
BoundBox::empty, BoundBox::empty,
BoundBox::empty, BoundBox::empty };
BoundBox child_bbox[8] = {BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty};
int child[8];
uint child_visibility[8] = { 0 };
uint child_visibility[8] = {0};
int num_nodes = 0;
for(int i = 0; i < 8; ++i) {
child[i] = __float_as_int(data[(is_unaligned) ? 13: 7][i]);
for (int i = 0; i < 8; ++i) {
child[i] = __float_as_int(data[(is_unaligned) ? 13 : 7][i]);
if(child[i] != 0) {
refit_node((child[i] < 0)? -child[i]-1: child[i], (child[i] < 0),
child_bbox[i], child_visibility[i]);
if (child[i] != 0) {
refit_node((child[i] < 0) ? -child[i] - 1 : child[i],
(child[i] < 0),
child_bbox[i],
child_visibility[i]);
++num_nodes;
bbox.grow(child_bbox[i]);
visibility |= child_visibility[i];
}
}
if(is_unaligned) {
Transform aligned_space[8] = { transform_identity(), transform_identity(),
transform_identity(), transform_identity(),
transform_identity(), transform_identity(),
transform_identity(), transform_identity()};
pack_unaligned_node(idx,
aligned_space,
child_bbox,
child,
visibility,
0.0f,
1.0f,
num_nodes);
if (is_unaligned) {
Transform aligned_space[8] = {transform_identity(),
transform_identity(),
transform_identity(),
transform_identity(),
transform_identity(),
transform_identity(),
transform_identity(),
transform_identity()};
pack_unaligned_node(
idx, aligned_space, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
}
else {
pack_aligned_node(idx,
child_bbox,
child,
visibility,
0.0f,
1.0f,
num_nodes);
pack_aligned_node(idx, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
}
}
}

View File

@@ -54,10 +54,10 @@ class Progress;
* Octo BVH, with each node having eight children, to use with SIMD instructions.
*/
class BVH8 : public BVH {
protected:
protected:
/* constructor */
friend class BVH;
BVH8(const BVHParams& params, const vector<Object*>& objects);
BVH8(const BVHParams &params, const vector<Object *> &objects);
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
@@ -65,12 +65,10 @@ protected:
/* pack */
void pack_nodes(const BVHNode *root) override;
void pack_leaf(const BVHStackEntry& e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry& e, const BVHStackEntry *en, int num);
void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num);
void pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_aligned_node(int idx,
const BoundBox *bounds,
const int *child,
@@ -79,9 +77,7 @@ protected:
const float time_to,
const int num);
void pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num);
void pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_unaligned_node(int idx,
const Transform *aligned_space,
const BoundBox *bounds,
@@ -93,7 +89,7 @@ protected:
/* refit */
void refit_nodes() override;
void refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility);
void refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility);
};
CCL_NAMESPACE_END

View File

@@ -29,41 +29,58 @@ CCL_NAMESPACE_BEGIN
/* SSE replacements */
__forceinline void prefetch_L1 (const void* /*ptr*/) { }
__forceinline void prefetch_L2 (const void* /*ptr*/) { }
__forceinline void prefetch_L3 (const void* /*ptr*/) { }
__forceinline void prefetch_NTA(const void* /*ptr*/) { }
__forceinline void prefetch_L1(const void * /*ptr*/)
{
}
__forceinline void prefetch_L2(const void * /*ptr*/)
{
}
__forceinline void prefetch_L3(const void * /*ptr*/)
{
}
__forceinline void prefetch_NTA(const void * /*ptr*/)
{
}
template<size_t src> __forceinline float extract(const int4& b)
{ return b[src]; }
template<size_t dst> __forceinline const float4 insert(const float4& a, const float b)
{ float4 r = a; r[dst] = b; return r; }
template<size_t src> __forceinline float extract(const int4 &b)
{
return b[src];
}
template<size_t dst> __forceinline const float4 insert(const float4 &a, const float b)
{
float4 r = a;
r[dst] = b;
return r;
}
__forceinline int get_best_dimension(const float4& bestSAH)
__forceinline int get_best_dimension(const float4 &bestSAH)
{
// return (int)__bsf(movemask(reduce_min(bestSAH) == bestSAH));
float minSAH = min(bestSAH.x, min(bestSAH.y, bestSAH.z));
if(bestSAH.x == minSAH) return 0;
else if(bestSAH.y == minSAH) return 1;
else return 2;
if (bestSAH.x == minSAH)
return 0;
else if (bestSAH.y == minSAH)
return 1;
else
return 2;
}
/* BVH Object Binning */
BVHObjectBinning::BVHObjectBinning(const BVHRange& job,
BVHObjectBinning::BVHObjectBinning(const BVHRange &job,
BVHReference *prims,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
: BVHRange(job),
: BVHRange(job),
splitSAH(FLT_MAX),
dim(0),
pos(0),
unaligned_heuristic_(unaligned_heuristic),
aligned_space_(aligned_space)
{
if(aligned_space_ == NULL) {
if (aligned_space_ == NULL) {
bounds_ = bounds();
cent_bounds_ = cent_bounds();
}
@@ -72,21 +89,18 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job,
* need in re-calculating this.
*/
bounds_ = unaligned_heuristic->compute_aligned_boundbox(
*this,
prims,
*aligned_space,
&cent_bounds_);
*this, prims, *aligned_space, &cent_bounds_);
}
/* compute number of bins to use and precompute scaling factor for binning */
num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f*size()));
num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f * size()));
scale = rcp(cent_bounds_.size()) * make_float3((float)num_bins);
/* initialize binning counter and bounds */
BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */
int4 bin_count[MAX_BINS]; /* number of primitives mapped to bin */
for(size_t i = 0; i < num_bins; i++) {
for (size_t i = 0; i < num_bins; i++) {
bin_count[i] = make_int4(0);
bin_bounds[i][0] = bin_bounds[i][1] = bin_bounds[i][2] = BoundBox::empty;
}
@@ -95,12 +109,12 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job,
{
ssize_t i;
for(i = 0; i < ssize_t(size()) - 1; i += 2) {
for (i = 0; i < ssize_t(size()) - 1; i += 2) {
prefetch_L2(&prims[start() + i + 8]);
/* map even and odd primitive to bin */
const BVHReference& prim0 = prims[start() + i + 0];
const BVHReference& prim1 = prims[start() + i + 1];
const BVHReference &prim0 = prims[start() + i + 0];
const BVHReference &prim1 = prims[start() + i + 1];
BoundBox bounds0 = get_prim_bounds(prim0);
BoundBox bounds1 = get_prim_bounds(prim1);
@@ -109,27 +123,45 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job,
int4 bin1 = get_bin(bounds1);
/* increase bounds for bins for even primitive */
int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(bounds0);
int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(bounds0);
int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(bounds0);
int b00 = (int)extract<0>(bin0);
bin_count[b00][0]++;
bin_bounds[b00][0].grow(bounds0);
int b01 = (int)extract<1>(bin0);
bin_count[b01][1]++;
bin_bounds[b01][1].grow(bounds0);
int b02 = (int)extract<2>(bin0);
bin_count[b02][2]++;
bin_bounds[b02][2].grow(bounds0);
/* increase bounds of bins for odd primitive */
int b10 = (int)extract<0>(bin1); bin_count[b10][0]++; bin_bounds[b10][0].grow(bounds1);
int b11 = (int)extract<1>(bin1); bin_count[b11][1]++; bin_bounds[b11][1].grow(bounds1);
int b12 = (int)extract<2>(bin1); bin_count[b12][2]++; bin_bounds[b12][2].grow(bounds1);
int b10 = (int)extract<0>(bin1);
bin_count[b10][0]++;
bin_bounds[b10][0].grow(bounds1);
int b11 = (int)extract<1>(bin1);
bin_count[b11][1]++;
bin_bounds[b11][1].grow(bounds1);
int b12 = (int)extract<2>(bin1);
bin_count[b12][2]++;
bin_bounds[b12][2].grow(bounds1);
}
/* for uneven number of primitives */
if(i < ssize_t(size())) {
if (i < ssize_t(size())) {
/* map primitive to bin */
const BVHReference& prim0 = prims[start() + i];
const BVHReference &prim0 = prims[start() + i];
BoundBox bounds0 = get_prim_bounds(prim0);
int4 bin0 = get_bin(bounds0);
/* increase bounds of bins */
int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(bounds0);
int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(bounds0);
int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(bounds0);
int b00 = (int)extract<0>(bin0);
bin_count[b00][0]++;
bin_bounds[b00][0].grow(bounds0);
int b01 = (int)extract<1>(bin0);
bin_count[b01][1]++;
bin_bounds[b01][1].grow(bounds0);
int b02 = (int)extract<2>(bin0);
bin_count[b02][2]++;
bin_bounds[b02][2].grow(bounds0);
}
}
@@ -142,13 +174,16 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job,
BoundBox by = BoundBox::empty;
BoundBox bz = BoundBox::empty;
for(size_t i = num_bins - 1; i > 0; i--) {
for (size_t i = num_bins - 1; i > 0; i--) {
count = count + bin_count[i];
r_count[i] = blocks(count);
bx = merge(bx,bin_bounds[i][0]); r_area[i][0] = bx.half_area();
by = merge(by,bin_bounds[i][1]); r_area[i][1] = by.half_area();
bz = merge(bz,bin_bounds[i][2]); r_area[i][2] = bz.half_area();
bx = merge(bx, bin_bounds[i][0]);
r_area[i][0] = bx.half_area();
by = merge(by, bin_bounds[i][1]);
r_area[i][1] = by.half_area();
bz = merge(bz, bin_bounds[i][2]);
r_area[i][2] = bz.half_area();
r_area[i][3] = r_area[i][2];
}
@@ -163,19 +198,22 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job,
by = BoundBox::empty;
bz = BoundBox::empty;
for(size_t i = 1; i < num_bins; i++, ii += make_int4(1)) {
count = count + bin_count[i-1];
for (size_t i = 1; i < num_bins; i++, ii += make_int4(1)) {
count = count + bin_count[i - 1];
bx = merge(bx,bin_bounds[i-1][0]); float Ax = bx.half_area();
by = merge(by,bin_bounds[i-1][1]); float Ay = by.half_area();
bz = merge(bz,bin_bounds[i-1][2]); float Az = bz.half_area();
bx = merge(bx, bin_bounds[i - 1][0]);
float Ax = bx.half_area();
by = merge(by, bin_bounds[i - 1][1]);
float Ay = by.half_area();
bz = merge(bz, bin_bounds[i - 1][2]);
float Az = bz.half_area();
float4 lCount = blocks(count);
float4 lArea = make_float4(Ax,Ay,Az,Az);
float4 sah = lArea*lCount + r_area[i]*r_count[i];
float4 lArea = make_float4(Ax, Ay, Az, Az);
float4 sah = lArea * lCount + r_area[i] * r_count[i];
bestSplit = select(sah < bestSAH,ii,bestSplit);
bestSAH = min(sah,bestSAH);
bestSplit = select(sah < bestSAH, ii, bestSplit);
bestSAH = min(sah, bestSAH);
}
int4 mask = float3_to_float4(cent_bounds_.size()) <= make_float4(0.0f);
@@ -188,9 +226,9 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job,
leafSAH = bounds_.half_area() * blocks(size());
}
void BVHObjectBinning::split(BVHReference* prims,
BVHObjectBinning& left_o,
BVHObjectBinning& right_o) const
void BVHObjectBinning::split(BVHReference *prims,
BVHObjectBinning &left_o,
BVHObjectBinning &right_o) const
{
size_t N = size();
@@ -199,9 +237,9 @@ void BVHObjectBinning::split(BVHReference* prims,
BoundBox lcent_bounds = BoundBox::empty;
BoundBox rcent_bounds = BoundBox::empty;
ssize_t l = 0, r = N-1;
ssize_t l = 0, r = N - 1;
while(l <= r) {
while (l <= r) {
prefetch_L2(&prims[start() + l + 8]);
prefetch_L2(&prims[start() + r - 8]);
@@ -210,7 +248,7 @@ void BVHObjectBinning::split(BVHReference* prims,
float3 unaligned_center = unaligned_bounds.center2();
float3 center = prim.bounds().center2();
if(get_bin(unaligned_center)[dim] < pos) {
if (get_bin(unaligned_center)[dim] < pos) {
lgeom_bounds.grow(prim.bounds());
lcent_bounds.grow(center);
l++;
@@ -218,13 +256,14 @@ void BVHObjectBinning::split(BVHReference* prims,
else {
rgeom_bounds.grow(prim.bounds());
rcent_bounds.grow(center);
swap(prims[start()+l],prims[start()+r]);
swap(prims[start() + l], prims[start() + r]);
r--;
}
}
/* finish */
if(l != 0 && N-1-r != 0) {
right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N-1-r), prims);
if (l != 0 && N - 1 - r != 0) {
right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N - 1 - r),
prims);
left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), l), prims);
return;
}
@@ -236,18 +275,19 @@ void BVHObjectBinning::split(BVHReference* prims,
lcent_bounds = BoundBox::empty;
rcent_bounds = BoundBox::empty;
for(size_t i = 0; i < N/2; i++) {
lgeom_bounds.grow(prims[start()+i].bounds());
lcent_bounds.grow(prims[start()+i].bounds().center2());
for (size_t i = 0; i < N / 2; i++) {
lgeom_bounds.grow(prims[start() + i].bounds());
lcent_bounds.grow(prims[start() + i].bounds().center2());
}
for(size_t i = N/2; i < N; i++) {
rgeom_bounds.grow(prims[start()+i].bounds());
rcent_bounds.grow(prims[start()+i].bounds().center2());
for (size_t i = N / 2; i < N; i++) {
rgeom_bounds.grow(prims[start() + i].bounds());
rcent_bounds.grow(prims[start() + i].bounds().center2());
}
right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N/2, N/2 + N%2), prims);
left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N/2), prims);
right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N / 2, N / 2 + N % 2),
prims);
left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N / 2), prims);
}
CCL_NAMESPACE_END

View File

@@ -34,26 +34,28 @@ class BVHBuild;
* location to different sets. The SAH is evaluated by computing the number of
* blocks occupied by the primitives in the partitions. */
class BVHObjectBinning : public BVHRange
{
public:
__forceinline BVHObjectBinning() : leafSAH(FLT_MAX) {}
class BVHObjectBinning : public BVHRange {
public:
__forceinline BVHObjectBinning() : leafSAH(FLT_MAX)
{
}
BVHObjectBinning(const BVHRange& job,
BVHObjectBinning(const BVHRange &job,
BVHReference *prims,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
void split(BVHReference *prims,
BVHObjectBinning& left_o,
BVHObjectBinning& right_o) const;
void split(BVHReference *prims, BVHObjectBinning &left_o, BVHObjectBinning &right_o) const;
__forceinline const BoundBox& unaligned_bounds() { return bounds_; }
__forceinline const BoundBox &unaligned_bounds()
{
return bounds_;
}
float splitSAH; /* SAH cost of the best split */
float leafSAH; /* SAH cost of creating a leaf */
protected:
protected:
int dim; /* best split dimension */
int pos; /* best split position */
size_t num_bins; /* actual number of bins to use */
@@ -70,41 +72,40 @@ protected:
enum { LOG_BLOCK_SIZE = 2 };
/* computes the bin numbers for each dimension for a box. */
__forceinline int4 get_bin(const BoundBox& box) const
__forceinline int4 get_bin(const BoundBox &box) const
{
int4 a = make_int4((box.center2() - cent_bounds_.min)*scale - make_float3(0.5f));
int4 a = make_int4((box.center2() - cent_bounds_.min) * scale - make_float3(0.5f));
int4 mn = make_int4(0);
int4 mx = make_int4((int)num_bins-1);
int4 mx = make_int4((int)num_bins - 1);
return clamp(a, mn, mx);
}
/* computes the bin numbers for each dimension for a point. */
__forceinline int4 get_bin(const float3& c) const
__forceinline int4 get_bin(const float3 &c) const
{
return make_int4((c - cent_bounds_.min)*scale - make_float3(0.5f));
return make_int4((c - cent_bounds_.min) * scale - make_float3(0.5f));
}
/* compute the number of blocks occupied for each dimension. */
__forceinline float4 blocks(const int4& a) const
__forceinline float4 blocks(const int4 &a) const
{
return make_float4((a + make_int4((1 << LOG_BLOCK_SIZE)-1)) >> LOG_BLOCK_SIZE);
return make_float4((a + make_int4((1 << LOG_BLOCK_SIZE) - 1)) >> LOG_BLOCK_SIZE);
}
/* compute the number of blocks occupied in one dimension. */
__forceinline int blocks(size_t a) const
{
return (int)((a+((1LL << LOG_BLOCK_SIZE)-1)) >> LOG_BLOCK_SIZE);
return (int)((a + ((1LL << LOG_BLOCK_SIZE) - 1)) >> LOG_BLOCK_SIZE);
}
__forceinline BoundBox get_prim_bounds(const BVHReference& prim) const
__forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
{
if(aligned_space_ == NULL) {
if (aligned_space_ == NULL) {
return prim.bounds();
}
else {
return unaligned_heuristic_->compute_aligned_prim_boundbox(
prim, *aligned_space_);
return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
}
}
};

File diff suppressed because it is too large Load Diff

View File

@@ -41,22 +41,21 @@ class Progress;
/* BVH Builder */
class BVHBuild
{
public:
class BVHBuild {
public:
/* Constructor/Destructor */
BVHBuild(const vector<Object*>& objects,
array<int>& prim_type,
array<int>& prim_index,
array<int>& prim_object,
array<float2>& prim_time,
const BVHParams& params,
Progress& progress);
BVHBuild(const vector<Object *> &objects,
array<int> &prim_type,
array<int> &prim_index,
array<int> &prim_object,
array<float2> &prim_time,
const BVHParams &params,
Progress &progress);
~BVHBuild();
BVHNode *run();
protected:
protected:
friend class BVHMixedSplit;
friend class BVHObjectSplit;
friend class BVHSpatialSplit;
@@ -65,31 +64,27 @@ protected:
friend class BVHObjectBinning;
/* Adding references. */
void add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
void add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
void add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i);
void add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i);
void add_references(BVHRange& root);
void add_reference_triangles(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
void add_reference_curves(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
void add_reference_mesh(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
void add_reference_object(BoundBox &root, BoundBox &center, Object *ob, int i);
void add_references(BVHRange &root);
/* Building. */
BVHNode *build_node(const BVHRange& range,
BVHNode *build_node(const BVHRange &range,
vector<BVHReference> *references,
int level,
int thread_id);
BVHNode *build_node(const BVHObjectBinning& range, int level);
BVHNode *create_leaf_node(const BVHRange& range,
const vector<BVHReference>& references);
BVHNode *build_node(const BVHObjectBinning &range, int level);
BVHNode *create_leaf_node(const BVHRange &range, const vector<BVHReference> &references);
BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num);
bool range_within_max_leaf_size(const BVHRange& range,
const vector<BVHReference>& references) const;
bool range_within_max_leaf_size(const BVHRange &range,
const vector<BVHReference> &references) const;
/* Threads. */
enum { THREAD_TASK_SIZE = 4096 };
void thread_build_node(InnerNode *node,
int child,
BVHObjectBinning *range,
int level);
void thread_build_node(InnerNode *node, int child, BVHObjectBinning *range, int level);
void thread_build_spatial_split_node(InnerNode *node,
int child,
BVHRange *range,
@@ -106,15 +101,15 @@ protected:
void rotate(BVHNode *node, int max_depth, int iterations);
/* Objects and primitive references. */
vector<Object*> objects;
vector<Object *> objects;
vector<BVHReference> references;
int num_original_references;
/* Output primitive indexes and objects. */
array<int>& prim_type;
array<int>& prim_index;
array<int>& prim_object;
array<float2>& prim_time;
array<int> &prim_type;
array<int> &prim_index;
array<int> &prim_object;
array<float2> &prim_time;
bool need_prim_time;
@@ -122,7 +117,7 @@ protected:
BVHParams params;
/* Progress reporting. */
Progress& progress;
Progress &progress;
double progress_start_time;
size_t progress_count;
size_t progress_total;

View File

@@ -34,28 +34,28 @@
#ifdef WITH_EMBREE
#include <pmmintrin.h>
#include <xmmintrin.h>
#include <embree3/rtcore_geometry.h>
# include <pmmintrin.h>
# include <xmmintrin.h>
# include <embree3/rtcore_geometry.h>
#include "bvh/bvh_embree.h"
# include "bvh/bvh_embree.h"
/* Kernel includes are necessary so that the filter function for Embree can access the packed BVH. */
#include "kernel/bvh/bvh_embree.h"
#include "kernel/kernel_compat_cpu.h"
#include "kernel/split/kernel_split_data_types.h"
#include "kernel/kernel_globals.h"
#include "kernel/kernel_random.h"
# include "kernel/bvh/bvh_embree.h"
# include "kernel/kernel_compat_cpu.h"
# include "kernel/split/kernel_split_data_types.h"
# include "kernel/kernel_globals.h"
# include "kernel/kernel_random.h"
#include "render/mesh.h"
#include "render/object.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_progress.h"
# include "render/mesh.h"
# include "render/object.h"
# include "util/util_foreach.h"
# include "util/util_logging.h"
# include "util/util_progress.h"
CCL_NAMESPACE_BEGIN
#define IS_HAIR(x) (x & 1)
# define IS_HAIR(x) (x & 1)
/* This gets called by Embree at every valid ray/object intersection.
* Things like recording subsurface or shadow hits for later evaluation
@@ -67,51 +67,52 @@ static void rtc_filter_func(const RTCFilterFunctionNArguments *args)
/* Current implementation in Cycles assumes only single-ray intersection queries. */
assert(args->N == 1);
const RTCRay *ray = (RTCRay*)args->ray;
const RTCHit *hit = (RTCHit*)args->hit;
CCLIntersectContext *ctx = ((IntersectContext*)args->context)->userRayExt;
const RTCRay *ray = (RTCRay *)args->ray;
const RTCHit *hit = (RTCHit *)args->hit;
CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
KernelGlobals *kg = ctx->kg;
/* Check if there is backfacing hair to ignore. */
if(IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
&& !(kernel_data.curve.curveflags & CURVE_KN_BACKFACING)
&& !(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
if(dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z), make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
if (IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) &&
!(kernel_data.curve.curveflags & CURVE_KN_BACKFACING) &&
!(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
*args->valid = 0;
return;
}
}
}
static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
{
assert(args->N == 1);
const RTCRay *ray = (RTCRay*)args->ray;
RTCHit *hit = (RTCHit*)args->hit;
CCLIntersectContext *ctx = ((IntersectContext*)args->context)->userRayExt;
const RTCRay *ray = (RTCRay *)args->ray;
RTCHit *hit = (RTCHit *)args->hit;
CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
KernelGlobals *kg = ctx->kg;
/* For all ray types: Check if there is backfacing hair to ignore */
if(IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
&& !(kernel_data.curve.curveflags & CURVE_KN_BACKFACING)
&& !(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
if(dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z), make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
if (IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) &&
!(kernel_data.curve.curveflags & CURVE_KN_BACKFACING) &&
!(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
*args->valid = 0;
return;
}
}
switch(ctx->type) {
switch (ctx->type) {
case CCLIntersectContext::RAY_SHADOW_ALL: {
/* Append the intersection to the end of the array. */
if(ctx->num_hits < ctx->max_hits) {
if (ctx->num_hits < ctx->max_hits) {
Intersection current_isect;
kernel_embree_convert_hit(kg, ray, hit, &current_isect);
for(size_t i = 0; i < ctx->max_hits; ++i) {
if(current_isect.object == ctx->isect_s[i].object &&
current_isect.prim == ctx->isect_s[i].prim &&
current_isect.t == ctx->isect_s[i].t) {
for (size_t i = 0; i < ctx->max_hits; ++i) {
if (current_isect.object == ctx->isect_s[i].object &&
current_isect.prim == ctx->isect_s[i].prim && current_isect.t == ctx->isect_s[i].t) {
/* This intersection was already recorded, skip it. */
*args->valid = 0;
break;
@@ -122,7 +123,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
*isect = current_isect;
int prim = kernel_tex_fetch(__prim_index, isect->prim);
int shader = 0;
if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
if (kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
shader = kernel_tex_fetch(__tri_shader, prim);
}
else {
@@ -131,7 +132,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
}
int flag = kernel_tex_fetch(__shaders, shader & SHADER_MASK).flags;
/* If no transparent shadows, all light is blocked. */
if(flag & (SD_HAS_TRANSPARENT_SHADOW)) {
if (flag & (SD_HAS_TRANSPARENT_SHADOW)) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
}
@@ -145,20 +146,20 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
}
case CCLIntersectContext::RAY_SSS: {
/* No intersection information requested, just return a hit. */
if(ctx->max_hits == 0) {
if (ctx->max_hits == 0) {
break;
}
/* Ignore curves. */
if(hit->geomID & 1) {
if (hit->geomID & 1) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
break;
}
/* See triangle_intersect_subsurface() for the native equivalent. */
for(int i = min(ctx->max_hits, ctx->ss_isect->num_hits) - 1; i >= 0; --i) {
if(ctx->ss_isect->hits[i].t == ray->tfar) {
for (int i = min(ctx->max_hits, ctx->ss_isect->num_hits) - 1; i >= 0; --i) {
if (ctx->ss_isect->hits[i].t == ray->tfar) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
break;
@@ -168,7 +169,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
++ctx->ss_isect->num_hits;
int hit_idx;
if(ctx->ss_isect->num_hits <= ctx->max_hits) {
if (ctx->ss_isect->num_hits <= ctx->max_hits) {
hit_idx = ctx->ss_isect->num_hits - 1;
}
else {
@@ -176,14 +177,15 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
* hits, randomly replace element or skip it */
hit_idx = lcg_step_uint(ctx->lcg_state) % ctx->ss_isect->num_hits;
if(hit_idx >= ctx->max_hits) {
if (hit_idx >= ctx->max_hits) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
break;
}
}
/* record intersection */
kernel_embree_convert_local_hit(kg, ray, hit, &ctx->ss_isect->hits[hit_idx], ctx->sss_object_id);
kernel_embree_convert_local_hit(
kg, ray, hit, &ctx->ss_isect->hits[hit_idx], ctx->sss_object_id);
ctx->ss_isect->Ng[hit_idx].x = hit->Ng_x;
ctx->ss_isect->Ng[hit_idx].y = hit->Ng_y;
ctx->ss_isect->Ng[hit_idx].z = hit->Ng_z;
@@ -194,13 +196,12 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
}
case CCLIntersectContext::RAY_VOLUME_ALL: {
/* Append the intersection to the end of the array. */
if(ctx->num_hits < ctx->max_hits) {
if (ctx->num_hits < ctx->max_hits) {
Intersection current_isect;
kernel_embree_convert_hit(kg, ray, hit, &current_isect);
for(size_t i = 0; i < ctx->max_hits; ++i) {
if(current_isect.object == ctx->isect_s[i].object &&
current_isect.prim == ctx->isect_s[i].prim &&
current_isect.t == ctx->isect_s[i].t) {
for (size_t i = 0; i < ctx->max_hits; ++i) {
if (current_isect.object == ctx->isect_s[i].object &&
current_isect.prim == ctx->isect_s[i].prim && current_isect.t == ctx->isect_s[i].t) {
/* This intersection was already recorded, skip it. */
*args->valid = 0;
break;
@@ -211,9 +212,10 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
*isect = current_isect;
/* Only primitives from volume object. */
uint tri_object = (isect->object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, isect->prim) : isect->object;
kernel_tex_fetch(__prim_object, isect->prim) :
isect->object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
--ctx->num_hits;
}
/* This tells Embree to continue tracing. */
@@ -230,11 +232,11 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
static size_t unaccounted_mem = 0;
static bool rtc_memory_monitor_func(void* userPtr, const ssize_t bytes, const bool)
static bool rtc_memory_monitor_func(void *userPtr, const ssize_t bytes, const bool)
{
Stats *stats = (Stats*)userPtr;
if(stats) {
if(bytes > 0) {
Stats *stats = (Stats *)userPtr;
if (stats) {
if (bytes > 0) {
stats->mem_alloc(bytes);
}
else {
@@ -243,7 +245,7 @@ static bool rtc_memory_monitor_func(void* userPtr, const ssize_t bytes, const bo
}
else {
/* A stats pointer may not yet be available. Keep track of the memory usage for later. */
if(bytes >= 0) {
if (bytes >= 0) {
atomic_add_and_fetch_z(&unaccounted_mem, bytes);
}
else {
@@ -253,18 +255,18 @@ static bool rtc_memory_monitor_func(void* userPtr, const ssize_t bytes, const bo
return true;
}
static void rtc_error_func(void*, enum RTCError, const char* str)
static void rtc_error_func(void *, enum RTCError, const char *str)
{
VLOG(1) << str;
}
static double progress_start_time = 0.0f;
static bool rtc_progress_func(void* user_ptr, const double n)
static bool rtc_progress_func(void *user_ptr, const double n)
{
Progress *progress = (Progress*)user_ptr;
Progress *progress = (Progress *)user_ptr;
if(time_dt() - progress_start_time < 0.25) {
if (time_dt() - progress_start_time < 0.25) {
return true;
}
@@ -281,46 +283,55 @@ RTCDevice BVHEmbree::rtc_shared_device = NULL;
int BVHEmbree::rtc_shared_users = 0;
thread_mutex BVHEmbree::rtc_shared_mutex;
BVHEmbree::BVHEmbree(const BVHParams& params_, const vector<Object*>& objects_)
: BVH(params_, objects_), scene(NULL), mem_used(0), top_level(NULL), stats(NULL),
curve_subdivisions(params.curve_subdivisions), build_quality(RTC_BUILD_QUALITY_REFIT),
BVHEmbree::BVHEmbree(const BVHParams &params_, const vector<Object *> &objects_)
: BVH(params_, objects_),
scene(NULL),
mem_used(0),
top_level(NULL),
stats(NULL),
curve_subdivisions(params.curve_subdivisions),
build_quality(RTC_BUILD_QUALITY_REFIT),
use_curves(params_.curve_flags & CURVE_KN_INTERPOLATE),
use_ribbons(params.curve_flags & CURVE_KN_RIBBONS), dynamic_scene(true)
use_ribbons(params.curve_flags & CURVE_KN_RIBBONS),
dynamic_scene(true)
{
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
thread_scoped_lock lock(rtc_shared_mutex);
if(rtc_shared_users == 0) {
if (rtc_shared_users == 0) {
rtc_shared_device = rtcNewDevice("verbose=0");
/* Check here if Embree was built with the correct flags. */
ssize_t ret = rtcGetDeviceProperty (rtc_shared_device,RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED);
if(ret != 1) {
ssize_t ret = rtcGetDeviceProperty(rtc_shared_device, RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED);
if (ret != 1) {
assert(0);
VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED flag."\
VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED flag."
"Ray visiblity will not work.";
}
ret = rtcGetDeviceProperty (rtc_shared_device,RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED);
if(ret != 1) {
ret = rtcGetDeviceProperty(rtc_shared_device, RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED);
if (ret != 1) {
assert(0);
VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED flag."\
VLOG(1)
<< "Embree is compiled without the RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED flag."
"Renders may not look as expected.";
}
ret = rtcGetDeviceProperty (rtc_shared_device,RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED);
if(ret != 1) {
ret = rtcGetDeviceProperty(rtc_shared_device, RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED);
if (ret != 1) {
assert(0);
VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED flag. "\
VLOG(1)
<< "Embree is compiled without the RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED flag. "
"Line primitives will not be rendered.";
}
ret = rtcGetDeviceProperty (rtc_shared_device,RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED);
if(ret != 1) {
ret = rtcGetDeviceProperty(rtc_shared_device, RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED);
if (ret != 1) {
assert(0);
VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED flag. "\
VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED "
"flag. "
"Triangle primitives will not be rendered.";
}
ret = rtcGetDeviceProperty (rtc_shared_device,RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED);
if(ret != 0) {
ret = rtcGetDeviceProperty(rtc_shared_device, RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED);
if (ret != 0) {
assert(0);
VLOG(1) << "Embree is compiled with the RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED flag. "\
VLOG(1) << "Embree is compiled with the RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED flag. "
"Renders may not look as expected.";
}
}
@@ -333,37 +344,37 @@ BVHEmbree::BVHEmbree(const BVHParams& params_, const vector<Object*>& objects_)
BVHEmbree::~BVHEmbree()
{
if(!params.top_level) {
if (!params.top_level) {
destroy(scene);
}
}
void BVHEmbree::destroy(RTCScene scene)
{
if(scene) {
if (scene) {
rtcReleaseScene(scene);
scene = NULL;
}
thread_scoped_lock lock(rtc_shared_mutex);
--rtc_shared_users;
if(rtc_shared_users == 0) {
rtcReleaseDevice (rtc_shared_device);
if (rtc_shared_users == 0) {
rtcReleaseDevice(rtc_shared_device);
rtc_shared_device = NULL;
}
}
void BVHEmbree::delete_rtcScene()
{
if(scene) {
if (scene) {
/* When this BVH is used as an instance in a top level BVH, don't delete now
* Let the top_level BVH know that it should delete it later. */
if(top_level) {
if (top_level) {
top_level->add_delayed_delete_scene(scene);
}
else {
rtcReleaseScene(scene);
if(delayed_delete_scenes.size()) {
foreach(RTCScene s, delayed_delete_scenes) {
if (delayed_delete_scenes.size()) {
foreach (RTCScene s, delayed_delete_scenes) {
rtcReleaseScene(s);
}
}
@@ -373,7 +384,7 @@ void BVHEmbree::delete_rtcScene()
}
}
void BVHEmbree::build(Progress& progress, Stats *stats_)
void BVHEmbree::build(Progress &progress, Stats *stats_)
{
assert(rtc_shared_device);
stats = stats_;
@@ -381,7 +392,7 @@ void BVHEmbree::build(Progress& progress, Stats *stats_)
progress.set_substatus("Building BVH");
if(scene) {
if (scene) {
rtcReleaseScene(scene);
scene = NULL;
}
@@ -393,19 +404,20 @@ void BVHEmbree::build(Progress& progress, Stats *stats_)
RTC_SCENE_FLAG_COMPACT | RTC_SCENE_FLAG_ROBUST;
rtcSetSceneFlags(scene, scene_flags);
build_quality = dynamic ? RTC_BUILD_QUALITY_LOW :
(params.use_spatial_split ? RTC_BUILD_QUALITY_HIGH : RTC_BUILD_QUALITY_MEDIUM);
(params.use_spatial_split ? RTC_BUILD_QUALITY_HIGH :
RTC_BUILD_QUALITY_MEDIUM);
rtcSetSceneBuildQuality(scene, build_quality);
/* Count triangles and curves first, reserve arrays once. */
size_t prim_count = 0;
foreach(Object *ob, objects) {
foreach (Object *ob, objects) {
if (params.top_level) {
if (!ob->is_traceable()) {
continue;
}
if (!ob->mesh->is_instanced()) {
if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
prim_count += ob->mesh->num_triangles();
}
if (params.primitive_mask & PRIMITIVE_ALL_CURVE) {
@@ -439,13 +451,13 @@ void BVHEmbree::build(Progress& progress, Stats *stats_)
pack.object_node.clear();
foreach(Object *ob, objects) {
if(params.top_level) {
if(!ob->is_traceable()) {
foreach (Object *ob, objects) {
if (params.top_level) {
if (!ob->is_traceable()) {
++i;
continue;
}
if(!ob->mesh->is_instanced()) {
if (!ob->mesh->is_instanced()) {
add_object(ob, i);
}
else {
@@ -456,10 +468,11 @@ void BVHEmbree::build(Progress& progress, Stats *stats_)
add_object(ob, i);
}
++i;
if(progress.get_cancel()) return;
if (progress.get_cancel())
return;
}
if(progress.get_cancel()) {
if (progress.get_cancel()) {
delete_rtcScene();
stats = NULL;
return;
@@ -470,7 +483,7 @@ void BVHEmbree::build(Progress& progress, Stats *stats_)
pack_primitives();
if(progress.get_cancel()) {
if (progress.get_cancel()) {
delete_rtcScene();
stats = NULL;
return;
@@ -491,23 +504,23 @@ BVHNode *BVHEmbree::widen_children_nodes(const BVHNode * /*root*/)
void BVHEmbree::add_object(Object *ob, int i)
{
Mesh *mesh = ob->mesh;
if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && mesh->num_triangles() > 0) {
if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && mesh->num_triangles() > 0) {
add_triangles(ob, i);
}
if(params.primitive_mask & PRIMITIVE_ALL_CURVE && mesh->num_curves() > 0) {
if (params.primitive_mask & PRIMITIVE_ALL_CURVE && mesh->num_curves() > 0) {
add_curves(ob, i);
}
}
void BVHEmbree::add_instance(Object *ob, int i)
{
if(!ob || !ob->mesh) {
if (!ob || !ob->mesh) {
assert(0);
return;
}
BVHEmbree *instance_bvh = (BVHEmbree*)(ob->mesh->bvh);
BVHEmbree *instance_bvh = (BVHEmbree *)(ob->mesh->bvh);
if(instance_bvh->top_level != this) {
if (instance_bvh->top_level != this) {
instance_bvh->top_level = this;
}
@@ -516,13 +529,14 @@ void BVHEmbree::add_instance(Object *ob, int i)
rtcSetGeometryInstancedScene(geom_id, instance_bvh->scene);
rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
if(ob->use_motion()) {
for(size_t step = 0; step < num_motion_steps; ++step) {
rtcSetGeometryTransform(geom_id, step, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (const float*)&ob->motion[step]);
if (ob->use_motion()) {
for (size_t step = 0; step < num_motion_steps; ++step) {
rtcSetGeometryTransform(
geom_id, step, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (const float *)&ob->motion[step]);
}
}
else {
rtcSetGeometryTransform(geom_id, 0, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (const float*)&ob->tfm);
rtcSetGeometryTransform(geom_id, 0, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (const float *)&ob->tfm);
}
pack.prim_index.push_back_slow(-1);
@@ -530,11 +544,11 @@ void BVHEmbree::add_instance(Object *ob, int i)
pack.prim_type.push_back_slow(PRIMITIVE_NONE);
pack.prim_tri_index.push_back_slow(-1);
rtcSetGeometryUserData(geom_id, (void*) instance_bvh->scene);
rtcSetGeometryUserData(geom_id, (void *)instance_bvh->scene);
rtcSetGeometryMask(geom_id, ob->visibility);
rtcCommitGeometry(geom_id);
rtcAttachGeometryByID(scene, geom_id, i*2);
rtcAttachGeometryByID(scene, geom_id, i * 2);
rtcReleaseGeometry(geom_id);
}
@@ -544,11 +558,11 @@ void BVHEmbree::add_triangles(Object *ob, int i)
Mesh *mesh = ob->mesh;
const Attribute *attr_mP = NULL;
size_t num_motion_steps = 1;
if(mesh->has_motion_blur()) {
if (mesh->has_motion_blur()) {
attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr_mP) {
if (attr_mP) {
num_motion_steps = mesh->motion_steps;
if(num_motion_steps > RTC_MAX_TIME_STEP_COUNT) {
if (num_motion_steps > RTC_MAX_TIME_STEP_COUNT) {
assert(0);
num_motion_steps = RTC_MAX_TIME_STEP_COUNT;
}
@@ -560,18 +574,19 @@ void BVHEmbree::add_triangles(Object *ob, int i)
rtcSetGeometryBuildQuality(geom_id, build_quality);
rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
unsigned *rtc_indices = (unsigned*)rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_INDEX, 0,
RTC_FORMAT_UINT3, sizeof (int) * 3, num_triangles);
unsigned *rtc_indices = (unsigned *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(int) * 3, num_triangles);
assert(rtc_indices);
if(!rtc_indices) {
VLOG(1) << "Embree could not create new geometry buffer for mesh " << mesh->name.c_str() << ".\n";
if (!rtc_indices) {
VLOG(1) << "Embree could not create new geometry buffer for mesh " << mesh->name.c_str()
<< ".\n";
return;
}
for(size_t j = 0; j < num_triangles; ++j) {
for (size_t j = 0; j < num_triangles; ++j) {
Mesh::Triangle t = mesh->get_triangle(j);
rtc_indices[j*3] = t.v[0];
rtc_indices[j*3+1] = t.v[1];
rtc_indices[j*3+2] = t.v[2];
rtc_indices[j * 3] = t.v[0];
rtc_indices[j * 3 + 1] = t.v[1];
rtc_indices[j * 3 + 2] = t.v[2];
}
update_tri_vertex_buffer(geom_id, mesh);
@@ -585,34 +600,34 @@ void BVHEmbree::add_triangles(Object *ob, int i)
pack.prim_tri_index.resize(prim_index_size + num_triangles);
int prim_type = (num_motion_steps > 1 ? PRIMITIVE_MOTION_TRIANGLE : PRIMITIVE_TRIANGLE);
for(size_t j = 0; j < num_triangles; ++j) {
for (size_t j = 0; j < num_triangles; ++j) {
pack.prim_object[prim_object_size + j] = i;
pack.prim_type[prim_type_size + j] = prim_type;
pack.prim_index[prim_index_size + j] = j;
pack.prim_tri_index[prim_index_size + j] = j;
}
rtcSetGeometryUserData(geom_id, (void*) prim_offset);
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func);
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
rtcSetGeometryMask(geom_id, ob->visibility);
rtcCommitGeometry(geom_id);
rtcAttachGeometryByID(scene, geom_id, i*2);
rtcAttachGeometryByID(scene, geom_id, i * 2);
rtcReleaseGeometry(geom_id);
}
void BVHEmbree::update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh)
void BVHEmbree::update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh)
{
const Attribute *attr_mP = NULL;
size_t num_motion_steps = 1;
int t_mid = 0;
if(mesh->has_motion_blur()) {
if (mesh->has_motion_blur()) {
attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr_mP) {
if (attr_mP) {
num_motion_steps = mesh->motion_steps;
t_mid = (num_motion_steps - 1) / 2;
if(num_motion_steps > RTC_MAX_TIME_STEP_COUNT) {
if (num_motion_steps > RTC_MAX_TIME_STEP_COUNT) {
assert(0);
num_motion_steps = RTC_MAX_TIME_STEP_COUNT;
}
@@ -620,9 +635,9 @@ void BVHEmbree::update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh)
}
const size_t num_verts = mesh->verts.size();
for(int t = 0; t < num_motion_steps; ++t) {
for (int t = 0; t < num_motion_steps; ++t) {
const float3 *verts;
if(t == t_mid) {
if (t == t_mid) {
verts = &mesh->verts[0];
}
else {
@@ -630,11 +645,11 @@ void BVHEmbree::update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh)
verts = &attr_mP->data_float3()[t_ * num_verts];
}
float *rtc_verts = (float*) rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t,
RTC_FORMAT_FLOAT3, sizeof(float) * 3, num_verts + 1);
float *rtc_verts = (float *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_VERTEX, t, RTC_FORMAT_FLOAT3, sizeof(float) * 3, num_verts + 1);
assert(rtc_verts);
if(rtc_verts) {
for(size_t j = 0; j < num_verts; ++j) {
if (rtc_verts) {
for (size_t j = 0; j < num_verts; ++j) {
rtc_verts[0] = verts[j].x;
rtc_verts[1] = verts[j].y;
rtc_verts[2] = verts[j].z;
@@ -644,20 +659,20 @@ void BVHEmbree::update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh)
}
}
void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh)
void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh)
{
const Attribute *attr_mP = NULL;
size_t num_motion_steps = 1;
if(mesh->has_motion_blur()) {
if (mesh->has_motion_blur()) {
attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr_mP) {
if (attr_mP) {
num_motion_steps = mesh->motion_steps;
}
}
const size_t num_curves = mesh->num_curves();
size_t num_keys = 0;
for(size_t j = 0; j < num_curves; ++j) {
for (size_t j = 0; j < num_curves; ++j) {
const Mesh::Curve c = mesh->get_curve(j);
num_keys += c.num_keys;
}
@@ -665,9 +680,9 @@ void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh
/* Copy the CV data to Embree */
const int t_mid = (num_motion_steps - 1) / 2;
const float *curve_radius = &mesh->curve_radius[0];
for(int t = 0; t < num_motion_steps; ++t) {
for (int t = 0; t < num_motion_steps; ++t) {
const float3 *verts;
if(t == t_mid || attr_mP == NULL) {
if (t == t_mid || attr_mP == NULL) {
verts = &mesh->curve_keys[0];
}
else {
@@ -675,19 +690,19 @@ void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh
verts = &attr_mP->data_float3()[t_ * num_keys];
}
float4 *rtc_verts = (float4*)rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t,
RTC_FORMAT_FLOAT4, sizeof (float) * 4, num_keys);
float4 *rtc_verts = (float4 *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_VERTEX, t, RTC_FORMAT_FLOAT4, sizeof(float) * 4, num_keys);
float4 *rtc_tangents = NULL;
if(use_curves) {
rtc_tangents = (float4*)rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_TANGENT, t,
RTC_FORMAT_FLOAT4, sizeof (float) * 4, num_keys);
if (use_curves) {
rtc_tangents = (float4 *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_TANGENT, t, RTC_FORMAT_FLOAT4, sizeof(float) * 4, num_keys);
assert(rtc_tangents);
}
assert(rtc_verts);
if(rtc_verts) {
if(use_curves && rtc_tangents) {
if (rtc_verts) {
if (use_curves && rtc_tangents) {
const size_t num_curves = mesh->num_curves();
for(size_t j = 0; j < num_curves; ++j) {
for (size_t j = 0; j < num_curves; ++j) {
Mesh::Curve c = mesh->get_curve(j);
int fk = c.first_key;
rtc_verts[0] = float3_to_float4(verts[fk]);
@@ -696,7 +711,7 @@ void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh
rtc_tangents[0].w = curve_radius[fk + 1] - curve_radius[fk];
++fk;
int k = 1;
for(;k < c.num_segments(); ++k, ++fk) {
for (; k < c.num_segments(); ++k, ++fk) {
rtc_verts[k] = float3_to_float4(verts[fk]);
rtc_verts[k].w = curve_radius[fk];
rtc_tangents[k] = float3_to_float4((verts[fk + 1] - verts[fk - 1]) * 0.5f);
@@ -711,7 +726,7 @@ void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh
}
}
else {
for(size_t j = 0; j < num_keys; ++j) {
for (size_t j = 0; j < num_keys; ++j) {
rtc_verts[j] = float3_to_float4(verts[j]);
rtc_verts[j].w = curve_radius[j];
}
@@ -726,16 +741,16 @@ void BVHEmbree::add_curves(Object *ob, int i)
const Mesh *mesh = ob->mesh;
const Attribute *attr_mP = NULL;
size_t num_motion_steps = 1;
if(mesh->has_motion_blur()) {
if (mesh->has_motion_blur()) {
attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if(attr_mP) {
if (attr_mP) {
num_motion_steps = mesh->motion_steps;
}
}
const size_t num_curves = mesh->num_curves();
size_t num_segments = 0;
for(size_t j = 0; j < num_curves; ++j) {
for (size_t j = 0; j < num_curves; ++j) {
Mesh::Curve c = mesh->get_curve(j);
assert(c.num_segments() > 0);
num_segments += c.num_segments();
@@ -751,23 +766,24 @@ void BVHEmbree::add_curves(Object *ob, int i)
size_t prim_tri_index_size = pack.prim_index.size();
pack.prim_tri_index.resize(prim_tri_index_size + num_segments);
enum RTCGeometryType type = (!use_curves) ? RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE :
enum RTCGeometryType type = (!use_curves) ?
RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE :
(use_ribbons ? RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE :
RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE);
RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, type);
rtcSetGeometryTessellationRate(geom_id, curve_subdivisions);
unsigned *rtc_indices = (unsigned*) rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_INDEX, 0,
RTC_FORMAT_UINT, sizeof (int), num_segments);
unsigned *rtc_indices = (unsigned *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT, sizeof(int), num_segments);
size_t rtc_index = 0;
for(size_t j = 0; j < num_curves; ++j) {
for (size_t j = 0; j < num_curves; ++j) {
Mesh::Curve c = mesh->get_curve(j);
for(size_t k = 0; k < c.num_segments(); ++k) {
for (size_t k = 0; k < c.num_segments(); ++k) {
rtc_indices[rtc_index] = c.first_key + k;
/* Cycles specific data. */
pack.prim_object[prim_object_size + rtc_index] = i;
pack.prim_type[prim_type_size + rtc_index] = (PRIMITIVE_PACK_SEGMENT(num_motion_steps > 1 ?
PRIMITIVE_MOTION_CURVE : PRIMITIVE_CURVE, k));
pack.prim_type[prim_type_size + rtc_index] = (PRIMITIVE_PACK_SEGMENT(
num_motion_steps > 1 ? PRIMITIVE_MOTION_CURVE : PRIMITIVE_CURVE, k));
pack.prim_index[prim_index_size + rtc_index] = j;
pack.prim_tri_index[prim_tri_index_size + rtc_index] = rtc_index;
@@ -780,7 +796,7 @@ void BVHEmbree::add_curves(Object *ob, int i)
update_curve_vertex_buffer(geom_id, mesh);
rtcSetGeometryUserData(geom_id, (void*) prim_offset);
rtcSetGeometryUserData(geom_id, (void *)prim_offset);
rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func);
rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
rtcSetGeometryMask(geom_id, ob->visibility);
@@ -793,13 +809,13 @@ void BVHEmbree::add_curves(Object *ob, int i)
void BVHEmbree::pack_nodes(const BVHNode *)
{
/* Quite a bit of this code is for compatibility with Cycles' native BVH. */
if(!params.top_level) {
if (!params.top_level) {
return;
}
for(size_t i = 0; i < pack.prim_index.size(); ++i) {
if(pack.prim_index[i] != -1) {
if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
for (size_t i = 0; i < pack.prim_index.size(); ++i) {
if (pack.prim_index[i] != -1) {
if (pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset;
else
pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset;
@@ -816,14 +832,14 @@ void BVHEmbree::pack_nodes(const BVHNode *)
size_t pack_prim_tri_verts_offset = prim_tri_verts_size;
size_t object_offset = 0;
map<Mesh*, int> mesh_map;
map<Mesh *, int> mesh_map;
foreach(Object *ob, objects) {
foreach (Object *ob, objects) {
Mesh *mesh = ob->mesh;
BVH *bvh = mesh->bvh;
if(mesh->need_build_bvh()) {
if(mesh_map.find(mesh) == mesh_map.end()) {
if (mesh->need_build_bvh()) {
if (mesh_map.find(mesh) == mesh_map.end()) {
prim_index_size += bvh->pack.prim_index.size();
prim_tri_verts_size += bvh->pack.prim_tri_verts.size();
mesh_map[mesh] = 1;
@@ -841,35 +857,35 @@ void BVHEmbree::pack_nodes(const BVHNode *)
pack.prim_tri_index.resize(prim_index_size);
pack.object_node.resize(objects.size());
int *pack_prim_index = (pack.prim_index.size())? &pack.prim_index[0]: NULL;
int *pack_prim_type = (pack.prim_type.size())? &pack.prim_type[0]: NULL;
int *pack_prim_object = (pack.prim_object.size())? &pack.prim_object[0]: NULL;
float4 *pack_prim_tri_verts = (pack.prim_tri_verts.size())? &pack.prim_tri_verts[0]: NULL;
uint *pack_prim_tri_index = (pack.prim_tri_index.size())? &pack.prim_tri_index[0]: NULL;
int *pack_prim_index = (pack.prim_index.size()) ? &pack.prim_index[0] : NULL;
int *pack_prim_type = (pack.prim_type.size()) ? &pack.prim_type[0] : NULL;
int *pack_prim_object = (pack.prim_object.size()) ? &pack.prim_object[0] : NULL;
float4 *pack_prim_tri_verts = (pack.prim_tri_verts.size()) ? &pack.prim_tri_verts[0] : NULL;
uint *pack_prim_tri_index = (pack.prim_tri_index.size()) ? &pack.prim_tri_index[0] : NULL;
/* merge */
foreach(Object *ob, objects) {
foreach (Object *ob, objects) {
Mesh *mesh = ob->mesh;
/* We assume that if mesh doesn't need own BVH it was already included
* into a top-level BVH and no packing here is needed.
*/
if(!mesh->need_build_bvh()) {
if (!mesh->need_build_bvh()) {
pack.object_node[object_offset++] = prim_offset;
continue;
}
/* if mesh already added once, don't add it again, but used set
* node offset for this object */
map<Mesh*, int>::iterator it = mesh_map.find(mesh);
map<Mesh *, int>::iterator it = mesh_map.find(mesh);
if(mesh_map.find(mesh) != mesh_map.end()) {
if (mesh_map.find(mesh) != mesh_map.end()) {
int noffset = it->second;
pack.object_node[object_offset++] = noffset;
continue;
}
BVHEmbree *bvh = (BVHEmbree*)mesh->bvh;
BVHEmbree *bvh = (BVHEmbree *)mesh->bvh;
rtc_memory_monitor_func(stats, unaccounted_mem, true);
unaccounted_mem = 0;
@@ -880,24 +896,24 @@ void BVHEmbree::pack_nodes(const BVHNode *)
/* fill in node indexes for instances */
pack.object_node[object_offset++] = prim_offset;
mesh_map[mesh] = pack.object_node[object_offset-1];
mesh_map[mesh] = pack.object_node[object_offset - 1];
/* merge primitive, object and triangle indexes */
if(bvh->pack.prim_index.size()) {
if (bvh->pack.prim_index.size()) {
size_t bvh_prim_index_size = bvh->pack.prim_index.size();
int *bvh_prim_index = &bvh->pack.prim_index[0];
int *bvh_prim_type = &bvh->pack.prim_type[0];
uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0];
for(size_t i = 0; i < bvh_prim_index_size; ++i) {
if(bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
for (size_t i = 0; i < bvh_prim_index_size; ++i) {
if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) {
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset;
pack_prim_tri_index[pack_prim_index_offset] = -1;
}
else {
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
pack_prim_tri_index[pack_prim_index_offset] =
bvh_prim_tri_index[i] + pack_prim_tri_verts_offset;
pack_prim_tri_index[pack_prim_index_offset] = bvh_prim_tri_index[i] +
pack_prim_tri_verts_offset;
}
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
@@ -908,11 +924,11 @@ void BVHEmbree::pack_nodes(const BVHNode *)
}
/* Merge triangle vertices data. */
if(bvh->pack.prim_tri_verts.size()) {
if (bvh->pack.prim_tri_verts.size()) {
const size_t prim_tri_size = bvh->pack.prim_tri_verts.size();
memcpy(pack_prim_tri_verts + pack_prim_tri_verts_offset,
&bvh->pack.prim_tri_verts[0],
prim_tri_size*sizeof(float4));
prim_tri_size * sizeof(float4));
pack_prim_tri_verts_offset += prim_tri_size;
}
@@ -924,16 +940,16 @@ void BVHEmbree::refit_nodes()
{
/* Update all vertex buffers, then tell Embree to rebuild/-fit the BVHs. */
unsigned geom_id = 0;
foreach(Object *ob, objects) {
if(!params.top_level || (ob->is_traceable() && !ob->mesh->is_instanced())) {
if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && ob->mesh->num_triangles() > 0) {
foreach (Object *ob, objects) {
if (!params.top_level || (ob->is_traceable() && !ob->mesh->is_instanced())) {
if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE && ob->mesh->num_triangles() > 0) {
update_tri_vertex_buffer(rtcGetGeometry(scene, geom_id), ob->mesh);
rtcCommitGeometry(rtcGetGeometry(scene,geom_id));
rtcCommitGeometry(rtcGetGeometry(scene, geom_id));
}
if(params.primitive_mask & PRIMITIVE_ALL_CURVE && ob->mesh->num_curves() > 0) {
update_curve_vertex_buffer(rtcGetGeometry(scene, geom_id+1), ob->mesh);
rtcCommitGeometry(rtcGetGeometry(scene,geom_id+1));
if (params.primitive_mask & PRIMITIVE_ALL_CURVE && ob->mesh->num_curves() > 0) {
update_curve_vertex_buffer(rtcGetGeometry(scene, geom_id + 1), ob->mesh);
rtcCommitGeometry(rtcGetGeometry(scene, geom_id + 1));
}
}
geom_id += 2;

View File

@@ -19,24 +19,23 @@
#ifdef WITH_EMBREE
#include <embree3/rtcore.h>
#include <embree3/rtcore_scene.h>
# include <embree3/rtcore.h>
# include <embree3/rtcore_scene.h>
#include "bvh/bvh.h"
#include "bvh/bvh_params.h"
# include "bvh/bvh.h"
# include "bvh/bvh_params.h"
#include "util/util_thread.h"
#include "util/util_types.h"
#include "util/util_vector.h"
# include "util/util_thread.h"
# include "util/util_types.h"
# include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
class Mesh;
class BVHEmbree : public BVH
{
public:
virtual void build(Progress& progress, Stats *stats) override;
class BVHEmbree : public BVH {
public:
virtual void build(Progress &progress, Stats *stats) override;
virtual ~BVHEmbree();
RTCScene scene;
static void destroy(RTCScene);
@@ -44,11 +43,11 @@ public:
/* Building process. */
virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
protected:
protected:
friend class BVH;
BVHEmbree(const BVHParams& params, const vector<Object*>& objects);
BVHEmbree(const BVHParams &params, const vector<Object *> &objects);
virtual void pack_nodes(const BVHNode*) override;
virtual void pack_nodes(const BVHNode *) override;
virtual void refit_nodes() override;
void add_object(Object *ob, int i);
@@ -58,12 +57,16 @@ protected:
ssize_t mem_used;
void add_delayed_delete_scene(RTCScene scene) { delayed_delete_scenes.push_back(scene); }
void add_delayed_delete_scene(RTCScene scene)
{
delayed_delete_scenes.push_back(scene);
}
BVHEmbree *top_level;
private:
private:
void delete_rtcScene();
void update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh);
void update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh);
void update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh);
void update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh);
static RTCDevice rtc_shared_device;
static int rtc_shared_users;

View File

@@ -30,8 +30,7 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
{
int cnt = 0;
switch(stat)
{
switch (stat) {
case BVH_STAT_NODE_COUNT:
cnt = 1;
break;
@@ -42,37 +41,37 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
cnt = is_leaf() ? 0 : 1;
break;
case BVH_STAT_TRIANGLE_COUNT:
cnt = is_leaf() ? reinterpret_cast<const LeafNode*>(this)->num_triangles() : 0;
cnt = is_leaf() ? reinterpret_cast<const LeafNode *>(this)->num_triangles() : 0;
break;
case BVH_STAT_CHILDNODE_COUNT:
cnt = num_children();
break;
case BVH_STAT_ALIGNED_COUNT:
if(!is_unaligned) {
if (!is_unaligned) {
cnt = 1;
}
break;
case BVH_STAT_UNALIGNED_COUNT:
if(is_unaligned) {
if (is_unaligned) {
cnt = 1;
}
break;
case BVH_STAT_ALIGNED_INNER_COUNT:
if(!is_leaf()) {
if (!is_leaf()) {
bool has_unaligned = false;
for(int j = 0; j < num_children(); j++) {
for (int j = 0; j < num_children(); j++) {
has_unaligned |= get_child(j)->is_unaligned;
}
cnt += has_unaligned? 0: 1;
cnt += has_unaligned ? 0 : 1;
}
break;
case BVH_STAT_UNALIGNED_INNER_COUNT:
if(!is_leaf()) {
if (!is_leaf()) {
bool has_unaligned = false;
for(int j = 0; j < num_children(); j++) {
for (int j = 0; j < num_children(); j++) {
has_unaligned |= get_child(j)->is_unaligned;
}
cnt += has_unaligned? 1: 0;
cnt += has_unaligned ? 1 : 0;
}
break;
case BVH_STAT_ALIGNED_LEAF_COUNT:
@@ -82,11 +81,11 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
cnt = (is_leaf() && is_unaligned) ? 1 : 0;
break;
case BVH_STAT_DEPTH:
if(is_leaf()) {
if (is_leaf()) {
cnt = 1;
}
else {
for(int i = 0; i < num_children(); i++) {
for (int i = 0; i < num_children(); i++) {
cnt = max(cnt, get_child(i)->getSubtreeSize(stat));
}
cnt += 1;
@@ -96,8 +95,8 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
assert(0); /* unknown mode */
}
if(!is_leaf())
for(int i = 0; i < num_children(); i++)
if (!is_leaf())
for (int i = 0; i < num_children(); i++)
cnt += get_child(i)->getSubtreeSize(stat);
return cnt;
@@ -105,20 +104,21 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
void BVHNode::deleteSubtree()
{
for(int i = 0; i < num_children(); i++)
if(get_child(i))
for (int i = 0; i < num_children(); i++)
if (get_child(i))
get_child(i)->deleteSubtree();
delete this;
}
float BVHNode::computeSubtreeSAHCost(const BVHParams& p, float probability) const
float BVHNode::computeSubtreeSAHCost(const BVHParams &p, float probability) const
{
float SAH = probability * p.cost(num_children(), num_triangles());
for(int i = 0; i < num_children(); i++) {
for (int i = 0; i < num_children(); i++) {
BVHNode *child = get_child(i);
SAH += child->computeSubtreeSAHCost(p, probability * child->bounds.safe_area()/bounds.safe_area());
SAH += child->computeSubtreeSAHCost(
p, probability * child->bounds.safe_area() / bounds.safe_area());
}
return SAH;
@@ -126,12 +126,12 @@ float BVHNode::computeSubtreeSAHCost(const BVHParams& p, float probability) cons
uint BVHNode::update_visibility()
{
if(!is_leaf() && visibility == 0) {
InnerNode *inner = (InnerNode*)this;
if (!is_leaf() && visibility == 0) {
InnerNode *inner = (InnerNode *)this;
BVHNode *child0 = inner->children[0];
BVHNode *child1 = inner->children[1];
visibility = child0->update_visibility()|child1->update_visibility();
visibility = child0->update_visibility() | child1->update_visibility();
}
return visibility;
@@ -139,8 +139,8 @@ uint BVHNode::update_visibility()
void BVHNode::update_time()
{
if(!is_leaf()) {
InnerNode *inner = (InnerNode*)this;
if (!is_leaf()) {
InnerNode *inner = (InnerNode *)this;
BVHNode *child0 = inner->children[0];
BVHNode *child1 = inner->children[1];
child0->update_time();
@@ -159,11 +159,9 @@ struct DumpTraversalContext {
int id;
};
void dump_subtree(DumpTraversalContext *context,
const BVHNode *node,
const BVHNode *parent = NULL)
void dump_subtree(DumpTraversalContext *context, const BVHNode *node, const BVHNode *parent = NULL)
{
if(node->is_leaf()) {
if (node->is_leaf()) {
fprintf(context->stream,
" node_%p [label=\"%d\",fillcolor=\"#ccccee\",style=filled]\n",
node,
@@ -175,11 +173,11 @@ void dump_subtree(DumpTraversalContext *context,
node,
context->id);
}
if(parent != NULL) {
if (parent != NULL) {
fprintf(context->stream, " node_%p -> node_%p;\n", parent, node);
}
context->id += 1;
for(int i = 0; i < node->num_children(); ++i) {
for (int i = 0; i < node->num_children(); ++i) {
dump_subtree(context, node->get_child(i), node);
}
}
@@ -190,7 +188,7 @@ void BVHNode::dump_graph(const char *filename)
{
DumpTraversalContext context;
context.stream = fopen(filename, "w");
if(context.stream == NULL) {
if (context.stream == NULL) {
return;
}
context.id = 0;
@@ -204,20 +202,20 @@ void BVHNode::dump_graph(const char *filename)
void InnerNode::print(int depth) const
{
for(int i = 0; i < depth; i++)
for (int i = 0; i < depth; i++)
printf(" ");
printf("inner node %p\n", (void*)this);
printf("inner node %p\n", (void *)this);
if(children[0])
children[0]->print(depth+1);
if(children[1])
children[1]->print(depth+1);
if (children[0])
children[0]->print(depth + 1);
if (children[1])
children[1]->print(depth + 1);
}
void LeafNode::print(int depth) const
{
for(int i = 0; i < depth; i++)
for (int i = 0; i < depth; i++)
printf(" ");
printf("leaf node %d to %d\n", lo, hi);

View File

@@ -40,9 +40,8 @@ enum BVH_STAT {
class BVHParams;
class BVHNode
{
public:
class BVHNode {
public:
virtual ~BVHNode()
{
delete aligned_space;
@@ -51,13 +50,16 @@ public:
virtual bool is_leaf() const = 0;
virtual int num_children() const = 0;
virtual BVHNode *get_child(int i) const = 0;
virtual int num_triangles() const { return 0; }
virtual int num_triangles() const
{
return 0;
}
virtual void print(int depth = 0) const = 0;
inline void set_aligned_space(const Transform& aligned_space)
inline void set_aligned_space(const Transform &aligned_space)
{
is_unaligned = true;
if(this->aligned_space == NULL) {
if (this->aligned_space == NULL) {
this->aligned_space = new Transform(aligned_space);
}
else {
@@ -67,7 +69,7 @@ public:
inline Transform get_aligned_space() const
{
if(aligned_space == NULL) {
if (aligned_space == NULL) {
return transform_identity();
}
return *aligned_space;
@@ -75,11 +77,11 @@ public:
inline bool has_unaligned() const
{
if(is_leaf()) {
if (is_leaf()) {
return false;
}
for(int i = 0; i < num_children(); ++i) {
if(get_child(i)->is_unaligned) {
for (int i = 0; i < num_children(); ++i) {
if (get_child(i)->is_unaligned) {
return true;
}
}
@@ -87,8 +89,8 @@ public:
}
// Subtree functions
int getSubtreeSize(BVH_STAT stat=BVH_STAT_NODE_COUNT) const;
float computeSubtreeSAHCost(const BVHParams& p, float probability = 1.0f) const;
int getSubtreeSize(BVH_STAT stat = BVH_STAT_NODE_COUNT) const;
float computeSubtreeSAHCost(const BVHParams &p, float probability = 1.0f) const;
void deleteSubtree();
uint update_visibility();
@@ -110,8 +112,8 @@ public:
float time_from, time_to;
protected:
explicit BVHNode(const BoundBox& bounds)
protected:
explicit BVHNode(const BoundBox &bounds)
: bounds(bounds),
visibility(0),
is_unaligned(false),
@@ -121,7 +123,7 @@ protected:
{
}
explicit BVHNode(const BVHNode& other)
explicit BVHNode(const BVHNode &other)
: bounds(other.bounds),
visibility(other.visibility),
is_unaligned(other.is_unaligned),
@@ -129,7 +131,7 @@ protected:
time_from(other.time_from),
time_to(other.time_to)
{
if(other.aligned_space != NULL) {
if (other.aligned_space != NULL) {
assert(other.is_unaligned);
aligned_space = new Transform();
*aligned_space = *other.aligned_space;
@@ -140,22 +142,18 @@ protected:
}
};
class InnerNode : public BVHNode
{
public:
class InnerNode : public BVHNode {
public:
static constexpr int kNumMaxChildren = 8;
InnerNode(const BoundBox& bounds,
BVHNode* child0,
BVHNode* child1)
: BVHNode(bounds),
num_children_(2)
InnerNode(const BoundBox &bounds, BVHNode *child0, BVHNode *child1)
: BVHNode(bounds), num_children_(2)
{
children[0] = child0;
children[1] = child1;
reset_unused_children();
if(child0 && child1) {
if (child0 && child1) {
visibility = child0->visibility | child1->visibility;
}
else {
@@ -164,16 +162,13 @@ public:
}
}
InnerNode(const BoundBox& bounds,
BVHNode** children,
const int num_children)
: BVHNode(bounds),
num_children_(num_children)
InnerNode(const BoundBox &bounds, BVHNode **children, const int num_children)
: BVHNode(bounds), num_children_(num_children)
{
visibility = 0;
time_from = FLT_MAX;
time_to = -FLT_MAX;
for(int i = 0; i < num_children; ++i) {
for (int i = 0; i < num_children; ++i) {
assert(children[i] != NULL);
visibility |= children[i]->visibility;
this->children[i] = children[i];
@@ -186,17 +181,21 @@ public:
/* NOTE: This function is only used during binary BVH builder, and it
* supposed to be configured to have 2 children which will be filled in in a
* bit. But this is important to have children reset to NULL. */
explicit InnerNode(const BoundBox& bounds)
: BVHNode(bounds),
num_children_(0)
explicit InnerNode(const BoundBox &bounds) : BVHNode(bounds), num_children_(0)
{
reset_unused_children();
visibility = 0;
num_children_ = 2;
}
bool is_leaf() const { return false; }
int num_children() const { return num_children_; }
bool is_leaf() const
{
return false;
}
int num_children() const
{
return num_children_;
}
BVHNode *get_child(int i) const
{
assert(i >= 0 && i < num_children_);
@@ -207,38 +206,44 @@ public:
int num_children_;
BVHNode *children[kNumMaxChildren];
protected:
protected:
void reset_unused_children()
{
for(int i = num_children_; i < kNumMaxChildren; ++i) {
for (int i = num_children_; i < kNumMaxChildren; ++i) {
children[i] = NULL;
}
}
};
class LeafNode : public BVHNode
{
public:
LeafNode(const BoundBox& bounds, uint visibility, int lo, int hi)
: BVHNode(bounds),
lo(lo),
hi(hi)
class LeafNode : public BVHNode {
public:
LeafNode(const BoundBox &bounds, uint visibility, int lo, int hi)
: BVHNode(bounds), lo(lo), hi(hi)
{
this->bounds = bounds;
this->visibility = visibility;
}
LeafNode(const LeafNode& other)
: BVHNode(other),
lo(other.lo),
hi(other.hi)
LeafNode(const LeafNode &other) : BVHNode(other), lo(other.lo), hi(other.hi)
{
}
bool is_leaf() const { return true; }
int num_children() const { return 0; }
BVHNode *get_child(int) const { return NULL; }
int num_triangles() const { return hi - lo; }
bool is_leaf() const
{
return true;
}
int num_children() const
{
return 0;
}
BVHNode *get_child(int) const
{
return NULL;
}
int num_triangles() const
{
return hi - lo;
}
void print(int depth) const;
int lo;

View File

@@ -43,10 +43,8 @@ const char *bvh_layout_name(BVHLayout layout);
/* BVH Parameters */
class BVHParams
{
public:
class BVHParams {
public:
/* spatial split area threshold */
bool use_spatial_split;
float spatial_split_alpha;
@@ -98,11 +96,7 @@ public:
int curve_subdivisions;
/* fixed parameters */
enum {
MAX_DEPTH = 64,
MAX_SPATIAL_DEPTH = 48,
NUM_SPATIAL_BINS = 32
};
enum { MAX_DEPTH = 64, MAX_SPATIAL_DEPTH = 48, NUM_SPATIAL_BINS = 32 };
BVHParams()
{
@@ -139,24 +133,31 @@ public:
/* SAH costs */
__forceinline float cost(int num_nodes, int num_primitives) const
{ return node_cost(num_nodes) + primitive_cost(num_primitives); }
{
return node_cost(num_nodes) + primitive_cost(num_primitives);
}
__forceinline float primitive_cost(int n) const
{ return n*sah_primitive_cost; }
{
return n * sah_primitive_cost;
}
__forceinline float node_cost(int n) const
{ return n*sah_node_cost; }
{
return n * sah_node_cost;
}
__forceinline bool small_enough_for_leaf(int size, int level)
{ return (size <= min_leaf_size || level >= MAX_DEPTH); }
{
return (size <= min_leaf_size || level >= MAX_DEPTH);
}
/* Gets best matching BVH.
*
* If the requested layout is supported by the device, it will be used.
* Otherwise, widest supported layout below that will be used.
*/
static BVHLayout best_bvh_layout(BVHLayout requested_layout,
BVHLayoutMask supported_layouts);
static BVHLayout best_bvh_layout(BVHLayout requested_layout, BVHLayoutMask supported_layouts);
};
/* BVH Reference
@@ -164,36 +165,53 @@ public:
* Reference to a primitive. Primitive index and object are sneakily packed
* into BoundBox to reduce memory usage and align nicely */
class BVHReference
{
public:
__forceinline BVHReference() {}
class BVHReference {
public:
__forceinline BVHReference()
{
}
__forceinline BVHReference(const BoundBox& bounds_,
__forceinline BVHReference(const BoundBox &bounds_,
int prim_index_,
int prim_object_,
int prim_type,
float time_from = 0.0f,
float time_to = 1.0f)
: rbounds(bounds_),
time_from_(time_from),
time_to_(time_to)
: rbounds(bounds_), time_from_(time_from), time_to_(time_to)
{
rbounds.min.w = __int_as_float(prim_index_);
rbounds.max.w = __int_as_float(prim_object_);
type = prim_type;
}
__forceinline const BoundBox& bounds() const { return rbounds; }
__forceinline int prim_index() const { return __float_as_int(rbounds.min.w); }
__forceinline int prim_object() const { return __float_as_int(rbounds.max.w); }
__forceinline int prim_type() const { return type; }
__forceinline float time_from() const { return time_from_; }
__forceinline float time_to() const { return time_to_; }
__forceinline const BoundBox &bounds() const
{
return rbounds;
}
__forceinline int prim_index() const
{
return __float_as_int(rbounds.min.w);
}
__forceinline int prim_object() const
{
return __float_as_int(rbounds.max.w);
}
__forceinline int prim_type() const
{
return type;
}
__forceinline float time_from() const
{
return time_from_;
}
__forceinline float time_to() const
{
return time_to_;
}
BVHReference& operator=(const BVHReference &arg) {
if(&arg != this) {
BVHReference &operator=(const BVHReference &arg)
{
if (&arg != this) {
/* TODO(sergey): Check if it is still faster to memcpy() with
* modern compilers.
*/
@@ -202,8 +220,7 @@ public:
return *this;
}
protected:
protected:
BoundBox rbounds;
uint type;
float time_from_, time_to_;
@@ -215,46 +232,61 @@ protected:
* the reference array of a subset of primitives Again uses trickery to pack
* integers into BoundBox for alignment purposes. */
class BVHRange
{
public:
class BVHRange {
public:
__forceinline BVHRange()
{
rbounds.min.w = __int_as_float(0);
rbounds.max.w = __int_as_float(0);
}
__forceinline BVHRange(const BoundBox& bounds_, int start_, int size_)
: rbounds(bounds_)
__forceinline BVHRange(const BoundBox &bounds_, int start_, int size_) : rbounds(bounds_)
{
rbounds.min.w = __int_as_float(start_);
rbounds.max.w = __int_as_float(size_);
}
__forceinline BVHRange(const BoundBox& bounds_, const BoundBox& cbounds_, int start_, int size_)
__forceinline BVHRange(const BoundBox &bounds_, const BoundBox &cbounds_, int start_, int size_)
: rbounds(bounds_), cbounds(cbounds_)
{
rbounds.min.w = __int_as_float(start_);
rbounds.max.w = __int_as_float(size_);
}
__forceinline void set_start(int start_) { rbounds.min.w = __int_as_float(start_); }
__forceinline void set_start(int start_)
{
rbounds.min.w = __int_as_float(start_);
}
__forceinline const BoundBox& bounds() const { return rbounds; }
__forceinline const BoundBox& cent_bounds() const { return cbounds; }
__forceinline int start() const { return __float_as_int(rbounds.min.w); }
__forceinline int size() const { return __float_as_int(rbounds.max.w); }
__forceinline int end() const { return start() + size(); }
__forceinline const BoundBox &bounds() const
{
return rbounds;
}
__forceinline const BoundBox &cent_bounds() const
{
return cbounds;
}
__forceinline int start() const
{
return __float_as_int(rbounds.min.w);
}
__forceinline int size() const
{
return __float_as_int(rbounds.max.w);
}
__forceinline int end() const
{
return start() + size();
}
protected:
protected:
BoundBox rbounds;
BoundBox cbounds;
};
/* BVH Spatial Bin */
struct BVHSpatialBin
{
struct BVHSpatialBin {
BoundBox bounds;
int enter;
int exit;

View File

@@ -27,7 +27,7 @@ CCL_NAMESPACE_BEGIN
static const int BVH_SORT_THRESHOLD = 4096;
struct BVHReferenceCompare {
public:
public:
int dim;
const BVHUnaligned *unaligned_heuristic;
const Transform *aligned_space;
@@ -35,45 +35,48 @@ public:
BVHReferenceCompare(int dim,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
: dim(dim),
unaligned_heuristic(unaligned_heuristic),
aligned_space(aligned_space)
: dim(dim), unaligned_heuristic(unaligned_heuristic), aligned_space(aligned_space)
{
}
__forceinline BoundBox get_prim_bounds(const BVHReference& prim) const
__forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
{
return (aligned_space != NULL)
? unaligned_heuristic->compute_aligned_prim_boundbox(
prim, *aligned_space)
: prim.bounds();
return (aligned_space != NULL) ?
unaligned_heuristic->compute_aligned_prim_boundbox(prim, *aligned_space) :
prim.bounds();
}
/* Compare two references.
*
* Returns value is similar to return value of strcmp().
*/
__forceinline int compare(const BVHReference& ra,
const BVHReference& rb) const
__forceinline int compare(const BVHReference &ra, const BVHReference &rb) const
{
BoundBox ra_bounds = get_prim_bounds(ra),
rb_bounds = get_prim_bounds(rb);
BoundBox ra_bounds = get_prim_bounds(ra), rb_bounds = get_prim_bounds(rb);
float ca = ra_bounds.min[dim] + ra_bounds.max[dim];
float cb = rb_bounds.min[dim] + rb_bounds.max[dim];
if(ca < cb) return -1;
else if(ca > cb) return 1;
else if(ra.prim_object() < rb.prim_object()) return -1;
else if(ra.prim_object() > rb.prim_object()) return 1;
else if(ra.prim_index() < rb.prim_index()) return -1;
else if(ra.prim_index() > rb.prim_index()) return 1;
else if(ra.prim_type() < rb.prim_type()) return -1;
else if(ra.prim_type() > rb.prim_type()) return 1;
if (ca < cb)
return -1;
else if (ca > cb)
return 1;
else if (ra.prim_object() < rb.prim_object())
return -1;
else if (ra.prim_object() > rb.prim_object())
return 1;
else if (ra.prim_index() < rb.prim_index())
return -1;
else if (ra.prim_index() > rb.prim_index())
return 1;
else if (ra.prim_type() < rb.prim_type())
return -1;
else if (ra.prim_type() > rb.prim_type())
return 1;
return 0;
}
bool operator()(const BVHReference& ra, const BVHReference& rb)
bool operator()(const BVHReference &ra, const BVHReference &rb)
{
return (compare(ra, rb) < 0);
}
@@ -83,22 +86,17 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
BVHReference *data,
const int job_start,
const int job_end,
const BVHReferenceCompare& compare);
const BVHReferenceCompare &compare);
class BVHSortTask : public Task {
public:
public:
BVHSortTask(TaskPool *task_pool,
BVHReference *data,
const int job_start,
const int job_end,
const BVHReferenceCompare& compare)
const BVHReferenceCompare &compare)
{
run = function_bind(bvh_reference_sort_threaded,
task_pool,
data,
job_start,
job_end,
compare);
run = function_bind(bvh_reference_sort_threaded, task_pool, data, job_start, job_end, compare);
}
};
@@ -107,17 +105,17 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
BVHReference *data,
const int job_start,
const int job_end,
const BVHReferenceCompare& compare)
const BVHReferenceCompare &compare)
{
int start = job_start, end = job_end;
bool have_work = (start < end);
while(have_work) {
while (have_work) {
const int count = job_end - job_start;
if(count < BVH_SORT_THRESHOLD) {
if (count < BVH_SORT_THRESHOLD) {
/* Number of reference low enough, faster to finish the job
* in one thread rather than to spawn more threads.
*/
sort(data+job_start, data+job_end+1, compare);
sort(data + job_start, data + job_end + 1, compare);
break;
}
/* Single QSort step.
@@ -125,30 +123,30 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
*/
int left = start, right = end;
int center = (left + right) >> 1;
if(compare.compare(data[left], data[center]) > 0) {
if (compare.compare(data[left], data[center]) > 0) {
swap(data[left], data[center]);
}
if(compare.compare(data[left], data[right]) > 0) {
if (compare.compare(data[left], data[right]) > 0) {
swap(data[left], data[right]);
}
if(compare.compare(data[center], data[right]) > 0) {
if (compare.compare(data[center], data[right]) > 0) {
swap(data[center], data[right]);
}
swap(data[center], data[right - 1]);
BVHReference median = data[right - 1];
do {
while(compare.compare(data[left], median) < 0) {
while (compare.compare(data[left], median) < 0) {
++left;
}
while(compare.compare(data[right], median) > 0) {
while (compare.compare(data[right], median) > 0) {
--right;
}
if(left <= right) {
if (left <= right) {
swap(data[left], data[right]);
++left;
--right;
}
} while(left <= right);
} while (left <= right);
/* We only create one new task here to reduce downside effects of
* latency in TaskScheduler.
* So generally current thread keeps working on the left part of the
@@ -158,19 +156,16 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
* right side.
*/
have_work = false;
if(left < end) {
if(start < right) {
task_pool->push(new BVHSortTask(task_pool,
data,
left, end,
compare), true);
if (left < end) {
if (start < right) {
task_pool->push(new BVHSortTask(task_pool, data, left, end, compare), true);
}
else {
start = left;
have_work = true;
}
}
if(start < right) {
if (start < right) {
end = right;
have_work = true;
}
@@ -186,12 +181,12 @@ void bvh_reference_sort(int start,
{
const int count = end - start;
BVHReferenceCompare compare(dim, unaligned_heuristic, aligned_space);
if(count < BVH_SORT_THRESHOLD) {
if (count < BVH_SORT_THRESHOLD) {
/* It is important to not use any mutex if array is small enough,
* otherwise we end up in situation when we're going to sleep far
* too often.
*/
sort(data+start, data+end, compare);
sort(data + start, data + end, compare);
}
else {
TaskPool task_pool;

View File

@@ -31,12 +31,12 @@ CCL_NAMESPACE_BEGIN
BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange& range,
const BVHRange &range,
vector<BVHReference> *references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
: sah(FLT_MAX),
: sah(FLT_MAX),
dim(0),
num_left(0),
left_bounds(BoundBox::empty),
@@ -51,7 +51,7 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
storage_->right_bounds.resize(range.size());
for(int dim = 0; dim < 3; dim++) {
for (int dim = 0; dim < 3; dim++) {
/* Sort references. */
bvh_reference_sort(range.start(),
range.end(),
@@ -62,7 +62,7 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
/* sweep right to left and determine bounds. */
BoundBox right_bounds = BoundBox::empty;
for(int i = range.size() - 1; i > 0; i--) {
for (int i = range.size() - 1; i > 0; i--) {
BoundBox prim_bounds = get_prim_bounds(ref_ptr[i]);
right_bounds.grow(prim_bounds);
storage_->right_bounds[i - 1] = right_bounds;
@@ -71,16 +71,15 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
/* sweep left to right and select lowest SAH. */
BoundBox left_bounds = BoundBox::empty;
for(int i = 1; i < range.size(); i++) {
for (int i = 1; i < range.size(); i++) {
BoundBox prim_bounds = get_prim_bounds(ref_ptr[i - 1]);
left_bounds.grow(prim_bounds);
right_bounds = storage_->right_bounds[i - 1];
float sah = nodeSAH +
left_bounds.safe_area() * builder->params.primitive_cost(i) +
float sah = nodeSAH + left_bounds.safe_area() * builder->params.primitive_cost(i) +
right_bounds.safe_area() * builder->params.primitive_cost(range.size() - i);
if(sah < min_sah) {
if (sah < min_sah) {
min_sah = sah;
this->sah = sah;
@@ -93,9 +92,7 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
}
}
void BVHObjectSplit::split(BVHRange& left,
BVHRange& right,
const BVHRange& range)
void BVHObjectSplit::split(BVHRange &left, BVHRange &right, const BVHRange &range)
{
assert(references_->size() > 0);
/* sort references according to split */
@@ -108,18 +105,18 @@ void BVHObjectSplit::split(BVHRange& left,
BoundBox effective_left_bounds, effective_right_bounds;
const int num_right = range.size() - this->num_left;
if(aligned_space_ == NULL) {
if (aligned_space_ == NULL) {
effective_left_bounds = left_bounds;
effective_right_bounds = right_bounds;
}
else {
effective_left_bounds = BoundBox::empty;
effective_right_bounds = BoundBox::empty;
for(int i = 0; i < this->num_left; ++i) {
for (int i = 0; i < this->num_left; ++i) {
BoundBox prim_boundbox = references_->at(range.start() + i).bounds();
effective_left_bounds.grow(prim_boundbox);
}
for(int i = 0; i < num_right; ++i) {
for (int i = 0; i < num_right; ++i) {
BoundBox prim_boundbox = references_->at(range.start() + this->num_left + i).bounds();
effective_right_bounds.grow(prim_boundbox);
}
@@ -132,14 +129,14 @@ void BVHObjectSplit::split(BVHRange& left,
/* Spatial Split */
BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder,
BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
BVHSpatialStorage *storage,
const BVHRange& range,
const BVHRange &range,
vector<BVHReference> *references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
: sah(FLT_MAX),
: sah(FLT_MAX),
dim(0),
pos(0.0f),
storage_(storage),
@@ -149,23 +146,21 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder,
{
/* initialize bins. */
BoundBox range_bounds;
if(aligned_space == NULL) {
if (aligned_space == NULL) {
range_bounds = range.bounds();
}
else {
range_bounds = unaligned_heuristic->compute_aligned_boundbox(
range,
&references->at(0),
*aligned_space);
range, &references->at(0), *aligned_space);
}
float3 origin = range_bounds.min;
float3 binSize = (range_bounds.max - origin) * (1.0f / (float)BVHParams::NUM_SPATIAL_BINS);
float3 invBinSize = 1.0f / binSize;
for(int dim = 0; dim < 3; dim++) {
for(int i = 0; i < BVHParams::NUM_SPATIAL_BINS; i++) {
BVHSpatialBin& bin = storage_->bins[dim][i];
for (int dim = 0; dim < 3; dim++) {
for (int i = 0; i < BVHParams::NUM_SPATIAL_BINS; i++) {
BVHSpatialBin &bin = storage_->bins[dim][i];
bin.bounds = BoundBox::empty;
bin.enter = 0;
@@ -174,8 +169,8 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder,
}
/* chop references into bins. */
for(unsigned int refIdx = range.start(); refIdx < range.end(); refIdx++) {
const BVHReference& ref = references_->at(refIdx);
for (unsigned int refIdx = range.start(); refIdx < range.end(); refIdx++) {
const BVHReference &ref = references_->at(refIdx);
BoundBox prim_bounds = get_prim_bounds(ref);
float3 firstBinf = (prim_bounds.min - origin) * invBinSize;
float3 lastBinf = (prim_bounds.max - origin) * invBinSize;
@@ -185,16 +180,15 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder,
firstBin = clamp(firstBin, 0, BVHParams::NUM_SPATIAL_BINS - 1);
lastBin = clamp(lastBin, firstBin, BVHParams::NUM_SPATIAL_BINS - 1);
for(int dim = 0; dim < 3; dim++) {
BVHReference currRef(get_prim_bounds(ref),
ref.prim_index(),
ref.prim_object(),
ref.prim_type());
for (int dim = 0; dim < 3; dim++) {
BVHReference currRef(
get_prim_bounds(ref), ref.prim_index(), ref.prim_object(), ref.prim_type());
for(int i = firstBin[dim]; i < lastBin[dim]; i++) {
for (int i = firstBin[dim]; i < lastBin[dim]; i++) {
BVHReference leftRef, rightRef;
split_reference(builder, leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1));
split_reference(
builder, leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1));
storage_->bins[dim][i].bounds.grow(leftRef.bounds());
currRef = rightRef;
}
@@ -207,10 +201,10 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder,
/* select best split plane. */
storage_->right_bounds.resize(BVHParams::NUM_SPATIAL_BINS);
for(int dim = 0; dim < 3; dim++) {
for (int dim = 0; dim < 3; dim++) {
/* sweep right to left and determine bounds. */
BoundBox right_bounds = BoundBox::empty;
for(int i = BVHParams::NUM_SPATIAL_BINS - 1; i > 0; i--) {
for (int i = BVHParams::NUM_SPATIAL_BINS - 1; i > 0; i--) {
right_bounds.grow(storage_->bins[dim][i].bounds);
storage_->right_bounds[i - 1] = right_bounds;
}
@@ -220,16 +214,16 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder,
int leftNum = 0;
int rightNum = range.size();
for(int i = 1; i < BVHParams::NUM_SPATIAL_BINS; i++) {
for (int i = 1; i < BVHParams::NUM_SPATIAL_BINS; i++) {
left_bounds.grow(storage_->bins[dim][i - 1].bounds);
leftNum += storage_->bins[dim][i - 1].enter;
rightNum -= storage_->bins[dim][i - 1].exit;
float sah = nodeSAH +
left_bounds.safe_area() * builder.params.primitive_cost(leftNum) +
storage_->right_bounds[i - 1].safe_area() * builder.params.primitive_cost(rightNum);
float sah = nodeSAH + left_bounds.safe_area() * builder.params.primitive_cost(leftNum) +
storage_->right_bounds[i - 1].safe_area() *
builder.params.primitive_cost(rightNum);
if(sah < this->sah) {
if (sah < this->sah) {
this->sah = sah;
this->dim = dim;
this->pos = origin[dim] + binSize[dim] * (float)i;
@@ -239,9 +233,9 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder,
}
void BVHSpatialSplit::split(BVHBuild *builder,
BVHRange& left,
BVHRange& right,
const BVHRange& range)
BVHRange &left,
BVHRange &right,
const BVHRange &range)
{
/* Categorize references and compute bounds.
*
@@ -249,7 +243,7 @@ void BVHSpatialSplit::split(BVHBuild *builder,
* Uncategorized/split: [left_end, right_start[
* Right-hand side: [right_start, refs.size()[ */
vector<BVHReference>& refs = *references_;
vector<BVHReference> &refs = *references_;
int left_start = range.start();
int left_end = left_start;
int right_start = range.end();
@@ -257,14 +251,14 @@ void BVHSpatialSplit::split(BVHBuild *builder,
BoundBox left_bounds = BoundBox::empty;
BoundBox right_bounds = BoundBox::empty;
for(int i = left_end; i < right_start; i++) {
for (int i = left_end; i < right_start; i++) {
BoundBox prim_bounds = get_prim_bounds(refs[i]);
if(prim_bounds.max[this->dim] <= this->pos) {
if (prim_bounds.max[this->dim] <= this->pos) {
/* entirely on the left-hand side */
left_bounds.grow(prim_bounds);
swap(refs[i], refs[left_end++]);
}
else if(prim_bounds.min[this->dim] >= this->pos) {
else if (prim_bounds.min[this->dim] >= this->pos) {
/* entirely on the right-hand side */
right_bounds.grow(prim_bounds);
swap(refs[i--], refs[--right_start]);
@@ -276,10 +270,10 @@ void BVHSpatialSplit::split(BVHBuild *builder,
* Duplication happens into a temporary pre-allocated vector in order to
* reduce number of memmove() calls happening in vector.insert().
*/
vector<BVHReference>& new_refs = storage_->new_references;
vector<BVHReference> &new_refs = storage_->new_references;
new_refs.clear();
new_refs.reserve(right_start - left_end);
while(left_end < right_start) {
while (left_end < right_start) {
/* split reference. */
BVHReference curr_ref(get_prim_bounds(refs[left_end]),
refs[left_end].prim_index(),
@@ -309,12 +303,12 @@ void BVHSpatialSplit::split(BVHBuild *builder,
float duplicateSAH = ldb.safe_area() * lbc + rdb.safe_area() * rbc;
float minSAH = min(min(unsplitLeftSAH, unsplitRightSAH), duplicateSAH);
if(minSAH == unsplitLeftSAH) {
if (minSAH == unsplitLeftSAH) {
/* unsplit to left */
left_bounds = lub;
left_end++;
}
else if(minSAH == unsplitRightSAH) {
else if (minSAH == unsplitRightSAH) {
/* unsplit to right */
right_bounds = rub;
swap(refs[left_end], refs[--right_start]);
@@ -329,18 +323,16 @@ void BVHSpatialSplit::split(BVHBuild *builder,
}
}
/* Insert duplicated references into actual array in one go. */
if(new_refs.size() != 0) {
refs.insert(refs.begin() + (right_end - new_refs.size()),
new_refs.begin(),
new_refs.end());
if (new_refs.size() != 0) {
refs.insert(refs.begin() + (right_end - new_refs.size()), new_refs.begin(), new_refs.end());
}
if(aligned_space_ != NULL) {
if (aligned_space_ != NULL) {
left_bounds = right_bounds = BoundBox::empty;
for(int i = left_start; i < left_end - left_start; ++i) {
for (int i = left_start; i < left_end - left_start; ++i) {
BoundBox prim_boundbox = references_->at(i).bounds();
left_bounds.grow(prim_boundbox);
}
for(int i = right_start; i < right_end - right_start; ++i) {
for (int i = right_start; i < right_end - right_start; ++i) {
BoundBox prim_boundbox = references_->at(i).bounds();
right_bounds.grow(prim_boundbox);
}
@@ -354,15 +346,15 @@ void BVHSpatialSplit::split_triangle_primitive(const Mesh *mesh,
int prim_index,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds)
BoundBox &left_bounds,
BoundBox &right_bounds)
{
Mesh::Triangle t = mesh->get_triangle(prim_index);
const float3 *verts = &mesh->verts[0];
float3 v1 = tfm ? transform_point(tfm, verts[t.v[2]]) : verts[t.v[2]];
v1 = get_unaligned_point(v1);
for(int i = 0; i < 3; i++) {
for (int i = 0; i < 3; i++) {
float3 v0 = v1;
int vindex = t.v[i];
v1 = tfm ? transform_point(tfm, verts[vindex]) : verts[vindex];
@@ -371,14 +363,14 @@ void BVHSpatialSplit::split_triangle_primitive(const Mesh *mesh,
float v1p = v1[dim];
/* insert vertex to the boxes it belongs to. */
if(v0p <= pos)
if (v0p <= pos)
left_bounds.grow(v0);
if(v0p >= pos)
if (v0p >= pos)
right_bounds.grow(v0);
/* edge intersects the plane => insert intersection to both boxes. */
if((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
if ((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
left_bounds.grow(t);
right_bounds.grow(t);
@@ -392,8 +384,8 @@ void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh,
int segment_index,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds)
BoundBox &left_bounds,
BoundBox &right_bounds)
{
/* curve split: NOTE - Currently ignores curve width and needs to be fixed.*/
Mesh::Curve curve = mesh->get_curve(prim_index);
@@ -402,7 +394,7 @@ void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh,
float3 v0 = mesh->curve_keys[k0];
float3 v1 = mesh->curve_keys[k1];
if(tfm != NULL) {
if (tfm != NULL) {
v0 = transform_point(tfm, v0);
v1 = transform_point(tfm, v1);
}
@@ -413,48 +405,42 @@ void BVHSpatialSplit::split_curve_primitive(const Mesh *mesh,
float v1p = v1[dim];
/* insert vertex to the boxes it belongs to. */
if(v0p <= pos)
if (v0p <= pos)
left_bounds.grow(v0);
if(v0p >= pos)
if (v0p >= pos)
right_bounds.grow(v0);
if(v1p <= pos)
if (v1p <= pos)
left_bounds.grow(v1);
if(v1p >= pos)
if (v1p >= pos)
right_bounds.grow(v1);
/* edge intersects the plane => insert intersection to both boxes. */
if((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
if ((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
left_bounds.grow(t);
right_bounds.grow(t);
}
}
void BVHSpatialSplit::split_triangle_reference(const BVHReference& ref,
void BVHSpatialSplit::split_triangle_reference(const BVHReference &ref,
const Mesh *mesh,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds)
BoundBox &left_bounds,
BoundBox &right_bounds)
{
split_triangle_primitive(mesh,
NULL,
ref.prim_index(),
dim,
pos,
left_bounds,
right_bounds);
split_triangle_primitive(mesh, NULL, ref.prim_index(), dim, pos, left_bounds, right_bounds);
}
void BVHSpatialSplit::split_curve_reference(const BVHReference& ref,
void BVHSpatialSplit::split_curve_reference(const BVHReference &ref,
const Mesh *mesh,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds)
BoundBox &left_bounds,
BoundBox &right_bounds)
{
split_curve_primitive(mesh,
NULL,
@@ -466,44 +452,26 @@ void BVHSpatialSplit::split_curve_reference(const BVHReference& ref,
right_bounds);
}
void BVHSpatialSplit::split_object_reference(const Object *object,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds)
void BVHSpatialSplit::split_object_reference(
const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds)
{
Mesh *mesh = object->mesh;
for(int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) {
split_triangle_primitive(mesh,
&object->tfm,
tri_idx,
dim,
pos,
left_bounds,
right_bounds);
for (int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) {
split_triangle_primitive(mesh, &object->tfm, tri_idx, dim, pos, left_bounds, right_bounds);
}
for(int curve_idx = 0; curve_idx < mesh->num_curves(); ++curve_idx) {
for (int curve_idx = 0; curve_idx < mesh->num_curves(); ++curve_idx) {
Mesh::Curve curve = mesh->get_curve(curve_idx);
for(int segment_idx = 0;
segment_idx < curve.num_keys - 1;
++segment_idx)
{
split_curve_primitive(mesh,
&object->tfm,
curve_idx,
segment_idx,
dim,
pos,
left_bounds,
right_bounds);
for (int segment_idx = 0; segment_idx < curve.num_keys - 1; ++segment_idx) {
split_curve_primitive(
mesh, &object->tfm, curve_idx, segment_idx, dim, pos, left_bounds, right_bounds);
}
}
}
void BVHSpatialSplit::split_reference(const BVHBuild& builder,
BVHReference& left,
BVHReference& right,
const BVHReference& ref,
void BVHSpatialSplit::split_reference(const BVHBuild &builder,
BVHReference &left,
BVHReference &right,
const BVHReference &ref,
int dim,
float pos)
{
@@ -515,28 +483,14 @@ void BVHSpatialSplit::split_reference(const BVHBuild& builder,
const Object *ob = builder.objects[ref.prim_object()];
const Mesh *mesh = ob->mesh;
if(ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
split_triangle_reference(ref,
mesh,
dim,
pos,
left_bounds,
right_bounds);
if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
split_triangle_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
}
else if(ref.prim_type() & PRIMITIVE_ALL_CURVE) {
split_curve_reference(ref,
mesh,
dim,
pos,
left_bounds,
right_bounds);
else if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
split_curve_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
}
else {
split_object_reference(ob,
dim,
pos,
left_bounds,
right_bounds);
split_object_reference(ob, dim, pos, left_bounds, right_bounds);
}
/* intersect with original bounds. */

View File

@@ -28,81 +28,73 @@ struct Transform;
/* Object Split */
class BVHObjectSplit
{
public:
class BVHObjectSplit {
public:
float sah;
int dim;
int num_left;
BoundBox left_bounds;
BoundBox right_bounds;
BVHObjectSplit() {}
BVHObjectSplit()
{
}
BVHObjectSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange& range,
const BVHRange &range,
vector<BVHReference> *references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
void split(BVHRange& left,
BVHRange& right,
const BVHRange& range);
void split(BVHRange &left, BVHRange &right, const BVHRange &range);
protected:
protected:
BVHSpatialStorage *storage_;
vector<BVHReference> *references_;
const BVHUnaligned *unaligned_heuristic_;
const Transform *aligned_space_;
__forceinline BoundBox get_prim_bounds(const BVHReference& prim) const
__forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
{
if(aligned_space_ == NULL) {
if (aligned_space_ == NULL) {
return prim.bounds();
}
else {
return unaligned_heuristic_->compute_aligned_prim_boundbox(
prim, *aligned_space_);
return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
}
}
};
/* Spatial Split */
class BVHSpatialSplit
{
public:
class BVHSpatialSplit {
public:
float sah;
int dim;
float pos;
BVHSpatialSplit() : sah(FLT_MAX),
dim(0),
pos(0.0f),
storage_(NULL),
references_(NULL) {}
BVHSpatialSplit(const BVHBuild& builder,
BVHSpatialSplit() : sah(FLT_MAX), dim(0), pos(0.0f), storage_(NULL), references_(NULL)
{
}
BVHSpatialSplit(const BVHBuild &builder,
BVHSpatialStorage *storage,
const BVHRange& range,
const BVHRange &range,
vector<BVHReference> *references,
float nodeSAH,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
void split(BVHBuild *builder,
BVHRange& left,
BVHRange& right,
const BVHRange& range);
void split(BVHBuild *builder, BVHRange &left, BVHRange &right, const BVHRange &range);
void split_reference(const BVHBuild& builder,
BVHReference& left,
BVHReference& right,
const BVHReference& ref,
void split_reference(const BVHBuild &builder,
BVHReference &left,
BVHReference &right,
const BVHReference &ref,
int dim,
float pos);
protected:
protected:
BVHSpatialStorage *storage_;
vector<BVHReference> *references_;
const BVHUnaligned *unaligned_heuristic_;
@@ -119,54 +111,50 @@ protected:
int prim_index,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds);
BoundBox &left_bounds,
BoundBox &right_bounds);
void split_curve_primitive(const Mesh *mesh,
const Transform *tfm,
int prim_index,
int segment_index,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds);
BoundBox &left_bounds,
BoundBox &right_bounds);
/* Lower-level functions which calculates boundaries of left and right nodes
* needed for spatial split.
*
* Operates with BVHReference, internally uses lower level API functions.
*/
void split_triangle_reference(const BVHReference& ref,
void split_triangle_reference(const BVHReference &ref,
const Mesh *mesh,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds);
void split_curve_reference(const BVHReference& ref,
BoundBox &left_bounds,
BoundBox &right_bounds);
void split_curve_reference(const BVHReference &ref,
const Mesh *mesh,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds);
void split_object_reference(const Object *object,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds);
BoundBox &left_bounds,
BoundBox &right_bounds);
void split_object_reference(
const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds);
__forceinline BoundBox get_prim_bounds(const BVHReference& prim) const
__forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
{
if(aligned_space_ == NULL) {
if (aligned_space_ == NULL) {
return prim.bounds();
}
else {
return unaligned_heuristic_->compute_aligned_prim_boundbox(
prim, *aligned_space_);
return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
}
}
__forceinline float3 get_unaligned_point(const float3& point) const
__forceinline float3 get_unaligned_point(const float3 &point) const
{
if(aligned_space_ == NULL) {
if (aligned_space_ == NULL) {
return point;
}
else {
@@ -177,9 +165,8 @@ protected:
/* Mixed Object-Spatial Split */
class BVHMixedSplit
{
public:
class BVHMixedSplit {
public:
BVHObjectSplit object;
BVHSpatialSplit spatial;
@@ -191,24 +178,24 @@ public:
BoundBox bounds;
BVHMixedSplit() {}
BVHMixedSplit()
{
}
__forceinline BVHMixedSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange& range,
const BVHRange &range,
vector<BVHReference> *references,
int level,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL)
{
if(aligned_space == NULL) {
if (aligned_space == NULL) {
bounds = range.bounds();
}
else {
bounds = unaligned_heuristic->compute_aligned_boundbox(
range,
&references->at(0),
*aligned_space);
range, &references->at(0), *aligned_space);
}
/* find split candidates. */
float area = bounds.safe_area();
@@ -216,43 +203,32 @@ public:
leafSAH = area * builder->params.primitive_cost(range.size());
nodeSAH = area * builder->params.node_cost(2);
object = BVHObjectSplit(builder,
storage,
range,
references,
nodeSAH,
unaligned_heuristic,
aligned_space);
object = BVHObjectSplit(
builder, storage, range, references, nodeSAH, unaligned_heuristic, aligned_space);
if(builder->params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) {
if (builder->params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) {
BoundBox overlap = object.left_bounds;
overlap.intersect(object.right_bounds);
if(overlap.safe_area() >= builder->spatial_min_overlap) {
spatial = BVHSpatialSplit(*builder,
storage,
range,
references,
nodeSAH,
unaligned_heuristic,
aligned_space);
if (overlap.safe_area() >= builder->spatial_min_overlap) {
spatial = BVHSpatialSplit(
*builder, storage, range, references, nodeSAH, unaligned_heuristic, aligned_space);
}
}
/* leaf SAH is the lowest => create leaf. */
minSAH = min(min(leafSAH, object.sah), spatial.sah);
no_split = (minSAH == leafSAH &&
builder->range_within_max_leaf_size(range, *references));
no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range, *references));
}
__forceinline void split(BVHBuild *builder,
BVHRange& left,
BVHRange& right,
const BVHRange& range)
BVHRange &left,
BVHRange &right,
const BVHRange &range)
{
if(builder->params.use_spatial_split && minSAH == spatial.sah)
if (builder->params.use_spatial_split && minSAH == spatial.sah)
spatial.split(builder, left, right, range);
if(!left.size() || !right.size())
if (!left.size() || !right.size())
object.split(left, right, range);
}
};

View File

@@ -27,63 +27,57 @@
CCL_NAMESPACE_BEGIN
BVHUnaligned::BVHUnaligned(const vector<Object*>& objects)
: objects_(objects)
BVHUnaligned::BVHUnaligned(const vector<Object *> &objects) : objects_(objects)
{
}
Transform BVHUnaligned::compute_aligned_space(
const BVHObjectBinning& range,
Transform BVHUnaligned::compute_aligned_space(const BVHObjectBinning &range,
const BVHReference *references) const
{
for(int i = range.start(); i < range.end(); ++i) {
const BVHReference& ref = references[i];
for (int i = range.start(); i < range.end(); ++i) {
const BVHReference &ref = references[i];
Transform aligned_space;
/* Use first primitive which defines correct direction to define
* the orientation space.
*/
if(compute_aligned_space(ref, &aligned_space)) {
if (compute_aligned_space(ref, &aligned_space)) {
return aligned_space;
}
}
return transform_identity();
}
Transform BVHUnaligned::compute_aligned_space(
const BVHRange& range,
Transform BVHUnaligned::compute_aligned_space(const BVHRange &range,
const BVHReference *references) const
{
for(int i = range.start(); i < range.end(); ++i) {
const BVHReference& ref = references[i];
for (int i = range.start(); i < range.end(); ++i) {
const BVHReference &ref = references[i];
Transform aligned_space;
/* Use first primitive which defines correct direction to define
* the orientation space.
*/
if(compute_aligned_space(ref, &aligned_space)) {
if (compute_aligned_space(ref, &aligned_space)) {
return aligned_space;
}
}
return transform_identity();
}
bool BVHUnaligned::compute_aligned_space(const BVHReference& ref,
Transform *aligned_space) const
bool BVHUnaligned::compute_aligned_space(const BVHReference &ref, Transform *aligned_space) const
{
const Object *object = objects_[ref.prim_object()];
const int packed_type = ref.prim_type();
const int type = (packed_type & PRIMITIVE_ALL);
if(type & PRIMITIVE_CURVE) {
if (type & PRIMITIVE_CURVE) {
const int curve_index = ref.prim_index();
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
const Mesh *mesh = object->mesh;
const Mesh::Curve& curve = mesh->get_curve(curve_index);
const Mesh::Curve &curve = mesh->get_curve(curve_index);
const int key = curve.first_key + segment;
const float3 v1 = mesh->curve_keys[key],
v2 = mesh->curve_keys[key + 1];
const float3 v1 = mesh->curve_keys[key], v2 = mesh->curve_keys[key + 1];
float length;
const float3 axis = normalize_len(v2 - v1, &length);
if(length > 1e-6f) {
if (length > 1e-6f) {
*aligned_space = make_transform_frame(axis);
return true;
}
@@ -92,24 +86,20 @@ bool BVHUnaligned::compute_aligned_space(const BVHReference& ref,
return false;
}
BoundBox BVHUnaligned::compute_aligned_prim_boundbox(
const BVHReference& prim,
const Transform& aligned_space) const
BoundBox BVHUnaligned::compute_aligned_prim_boundbox(const BVHReference &prim,
const Transform &aligned_space) const
{
BoundBox bounds = BoundBox::empty;
const Object *object = objects_[prim.prim_object()];
const int packed_type = prim.prim_type();
const int type = (packed_type & PRIMITIVE_ALL);
if(type & PRIMITIVE_CURVE) {
if (type & PRIMITIVE_CURVE) {
const int curve_index = prim.prim_index();
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
const Mesh *mesh = object->mesh;
const Mesh::Curve& curve = mesh->get_curve(curve_index);
curve.bounds_grow(segment,
&mesh->curve_keys[0],
&mesh->curve_radius[0],
aligned_space,
bounds);
const Mesh::Curve &curve = mesh->get_curve(curve_index);
curve.bounds_grow(
segment, &mesh->curve_keys[0], &mesh->curve_radius[0], aligned_space, bounds);
}
else {
bounds = prim.bounds().transformed(&aligned_space);
@@ -117,60 +107,57 @@ BoundBox BVHUnaligned::compute_aligned_prim_boundbox(
return bounds;
}
BoundBox BVHUnaligned::compute_aligned_boundbox(
const BVHObjectBinning& range,
BoundBox BVHUnaligned::compute_aligned_boundbox(const BVHObjectBinning &range,
const BVHReference *references,
const Transform& aligned_space,
const Transform &aligned_space,
BoundBox *cent_bounds) const
{
BoundBox bounds = BoundBox::empty;
if(cent_bounds != NULL) {
if (cent_bounds != NULL) {
*cent_bounds = BoundBox::empty;
}
for(int i = range.start(); i < range.end(); ++i) {
const BVHReference& ref = references[i];
for (int i = range.start(); i < range.end(); ++i) {
const BVHReference &ref = references[i];
BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space);
bounds.grow(ref_bounds);
if(cent_bounds != NULL) {
if (cent_bounds != NULL) {
cent_bounds->grow(ref_bounds.center2());
}
}
return bounds;
}
BoundBox BVHUnaligned::compute_aligned_boundbox(
const BVHRange& range,
BoundBox BVHUnaligned::compute_aligned_boundbox(const BVHRange &range,
const BVHReference *references,
const Transform& aligned_space,
const Transform &aligned_space,
BoundBox *cent_bounds) const
{
BoundBox bounds = BoundBox::empty;
if(cent_bounds != NULL) {
if (cent_bounds != NULL) {
*cent_bounds = BoundBox::empty;
}
for(int i = range.start(); i < range.end(); ++i) {
const BVHReference& ref = references[i];
for (int i = range.start(); i < range.end(); ++i) {
const BVHReference &ref = references[i];
BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space);
bounds.grow(ref_bounds);
if(cent_bounds != NULL) {
if (cent_bounds != NULL) {
cent_bounds->grow(ref_bounds.center2());
}
}
return bounds;
}
Transform BVHUnaligned::compute_node_transform(
const BoundBox& bounds,
const Transform& aligned_space)
Transform BVHUnaligned::compute_node_transform(const BoundBox &bounds,
const Transform &aligned_space)
{
Transform space = aligned_space;
space.x.w -= bounds.min.x;
space.y.w -= bounds.min.y;
space.z.w -= bounds.min.z;
float3 dim = bounds.max - bounds.min;
return transform_scale(1.0f / max(1e-18f, dim.x),
1.0f / max(1e-18f, dim.y),
1.0f / max(1e-18f, dim.z)) * space;
return transform_scale(
1.0f / max(1e-18f, dim.x), 1.0f / max(1e-18f, dim.y), 1.0f / max(1e-18f, dim.z)) *
space;
}
CCL_NAMESPACE_END

View File

@@ -30,49 +30,42 @@ class Object;
/* Helper class to perform calculations needed for unaligned nodes. */
class BVHUnaligned {
public:
BVHUnaligned(const vector<Object*>& objects);
public:
BVHUnaligned(const vector<Object *> &objects);
/* Calculate alignment for the oriented node for a given range. */
Transform compute_aligned_space(
const BVHObjectBinning& range,
const BVHReference *references) const;
Transform compute_aligned_space(
const BVHRange& range,
Transform compute_aligned_space(const BVHObjectBinning &range,
const BVHReference *references) const;
Transform compute_aligned_space(const BVHRange &range, const BVHReference *references) const;
/* Calculate alignment for the oriented node for a given reference.
*
* Return true when space was calculated successfully.
*/
bool compute_aligned_space(const BVHReference& ref,
Transform *aligned_space) const;
bool compute_aligned_space(const BVHReference &ref, Transform *aligned_space) const;
/* Calculate primitive's bounding box in given space. */
BoundBox compute_aligned_prim_boundbox(
const BVHReference& prim,
const Transform& aligned_space) const;
BoundBox compute_aligned_prim_boundbox(const BVHReference &prim,
const Transform &aligned_space) const;
/* Calculate bounding box in given space. */
BoundBox compute_aligned_boundbox(
const BVHObjectBinning& range,
BoundBox compute_aligned_boundbox(const BVHObjectBinning &range,
const BVHReference *references,
const Transform& aligned_space,
const Transform &aligned_space,
BoundBox *cent_bounds = NULL) const;
BoundBox compute_aligned_boundbox(
const BVHRange& range,
BoundBox compute_aligned_boundbox(const BVHRange &range,
const BVHReference *references,
const Transform& aligned_space,
const Transform &aligned_space,
BoundBox *cent_bounds = NULL) const;
/* Calculate affine transform for node packing.
* Bounds will be in the range of 0..1.
*/
static Transform compute_node_transform(const BoundBox& bounds,
const Transform& aligned_space);
protected:
static Transform compute_node_transform(const BoundBox &bounds, const Transform &aligned_space);
protected:
/* List of objects BVH is being created for. */
const vector<Object*>& objects_;
const vector<Object *> &objects_;
};
CCL_NAMESPACE_END

View File

@@ -44,40 +44,33 @@ uint Device::devices_initialized_mask = 0;
/* Device Requested Features */
std::ostream& operator <<(std::ostream &os,
const DeviceRequestedFeatures& requested_features)
std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &requested_features)
{
os << "Experimental features: "
<< (requested_features.experimental ? "On" : "Off") << std::endl;
os << "Experimental features: " << (requested_features.experimental ? "On" : "Off") << std::endl;
os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
/* TODO(sergey): Decode bitflag into list of names. */
os << "Nodes features: " << requested_features.nodes_features << std::endl;
os << "Use Hair: "
<< string_from_bool(requested_features.use_hair) << std::endl;
os << "Use Object Motion: "
<< string_from_bool(requested_features.use_object_motion) << std::endl;
os << "Use Camera Motion: "
<< string_from_bool(requested_features.use_camera_motion) << std::endl;
os << "Use Baking: "
<< string_from_bool(requested_features.use_baking) << std::endl;
os << "Use Subsurface: "
<< string_from_bool(requested_features.use_subsurface) << std::endl;
os << "Use Volume: "
<< string_from_bool(requested_features.use_volume) << std::endl;
os << "Use Branched Integrator: "
<< string_from_bool(requested_features.use_integrator_branched) << std::endl;
os << "Use Patch Evaluation: "
<< string_from_bool(requested_features.use_patch_evaluation) << std::endl;
os << "Use Transparent Shadows: "
<< string_from_bool(requested_features.use_transparent) << std::endl;
os << "Use Principled BSDF: "
<< string_from_bool(requested_features.use_principled) << std::endl;
os << "Use Denoising: "
<< string_from_bool(requested_features.use_denoising) << std::endl;
os << "Use Displacement: "
<< string_from_bool(requested_features.use_true_displacement) << std::endl;
os << "Use Background Light: "
<< string_from_bool(requested_features.use_background_light) << std::endl;
os << "Use Hair: " << string_from_bool(requested_features.use_hair) << std::endl;
os << "Use Object Motion: " << string_from_bool(requested_features.use_object_motion)
<< std::endl;
os << "Use Camera Motion: " << string_from_bool(requested_features.use_camera_motion)
<< std::endl;
os << "Use Baking: " << string_from_bool(requested_features.use_baking) << std::endl;
os << "Use Subsurface: " << string_from_bool(requested_features.use_subsurface) << std::endl;
os << "Use Volume: " << string_from_bool(requested_features.use_volume) << std::endl;
os << "Use Branched Integrator: " << string_from_bool(requested_features.use_integrator_branched)
<< std::endl;
os << "Use Patch Evaluation: " << string_from_bool(requested_features.use_patch_evaluation)
<< std::endl;
os << "Use Transparent Shadows: " << string_from_bool(requested_features.use_transparent)
<< std::endl;
os << "Use Principled BSDF: " << string_from_bool(requested_features.use_principled)
<< std::endl;
os << "Use Denoising: " << string_from_bool(requested_features.use_denoising) << std::endl;
os << "Use Displacement: " << string_from_bool(requested_features.use_true_displacement)
<< std::endl;
os << "Use Background Light: " << string_from_bool(requested_features.use_background_light)
<< std::endl;
return os;
}
@@ -85,11 +78,11 @@ std::ostream& operator <<(std::ostream &os,
Device::~Device()
{
if(!background) {
if(vertex_buffer != 0) {
if (!background) {
if (vertex_buffer != 0) {
glDeleteBuffers(1, &vertex_buffer);
}
if(fallback_shader_program != 0) {
if (fallback_shader_program != 0) {
glDeleteProgram(fallback_shader_program);
}
}
@@ -97,33 +90,33 @@ Device::~Device()
/* TODO move shaders to standalone .glsl file. */
const char *FALLBACK_VERTEX_SHADER =
"#version 330\n"
"uniform vec2 fullscreen;\n"
"in vec2 texCoord;\n"
"in vec2 pos;\n"
"out vec2 texCoord_interp;\n"
"\n"
"vec2 normalize_coordinates()\n"
"{\n"
" return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
"}\n"
"\n"
"void main()\n"
"{\n"
" gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
" texCoord_interp = texCoord;\n"
"}\n\0";
"#version 330\n"
"uniform vec2 fullscreen;\n"
"in vec2 texCoord;\n"
"in vec2 pos;\n"
"out vec2 texCoord_interp;\n"
"\n"
"vec2 normalize_coordinates()\n"
"{\n"
" return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
"}\n"
"\n"
"void main()\n"
"{\n"
" gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
" texCoord_interp = texCoord;\n"
"}\n\0";
const char *FALLBACK_FRAGMENT_SHADER =
"#version 330\n"
"uniform sampler2D image_texture;\n"
"in vec2 texCoord_interp;\n"
"out vec4 fragColor;\n"
"\n"
"void main()\n"
"{\n"
" fragColor = texture(image_texture, texCoord_interp);\n"
"}\n\0";
"#version 330\n"
"uniform sampler2D image_texture;\n"
"in vec2 texCoord_interp;\n"
"out vec4 fragColor;\n"
"\n"
"void main()\n"
"{\n"
" fragColor = texture(image_texture, texCoord_interp);\n"
"}\n\0";
static void shader_print_errors(const char *task, const char *log, const char *code)
{
@@ -134,8 +127,8 @@ static void shader_print_errors(const char *task, const char *log, const char *c
string partial;
int line = 1;
while(getline(stream, partial, '\n')) {
if(line < 10) {
while (getline(stream, partial, '\n')) {
if (line < 10) {
LOG(ERROR) << " " << line << " " << partial;
}
else {
@@ -156,14 +149,12 @@ static int bind_fallback_shader(void)
struct Shader {
const char *source;
GLenum type;
} shaders[2] = {
{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
{FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}
};
} shaders[2] = {{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
{FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}};
program = glCreateProgram();
for(int i = 0; i < 2; i++) {
for (int i = 0; i < 2; i++) {
GLuint shader = glCreateShader(shaders[i].type);
string source_str = shaders[i].source;
@@ -174,7 +165,7 @@ static int bind_fallback_shader(void)
glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
if(!status) {
if (!status) {
glGetShaderInfoLog(shader, sizeof(log), &length, log);
shader_print_errors("compile", log, c_str);
return 0;
@@ -190,7 +181,7 @@ static int bind_fallback_shader(void)
glLinkProgram(program);
glGetProgramiv(program, GL_LINK_STATUS, &status);
if(!status) {
if (!status) {
glGetShaderInfoLog(program, sizeof(log), &length, log);
shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER);
shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER);
@@ -202,27 +193,27 @@ static int bind_fallback_shader(void)
bool Device::bind_fallback_display_space_shader(const float width, const float height)
{
if(fallback_status == FALLBACK_SHADER_STATUS_ERROR) {
if (fallback_status == FALLBACK_SHADER_STATUS_ERROR) {
return false;
}
if(fallback_status == FALLBACK_SHADER_STATUS_NONE) {
if (fallback_status == FALLBACK_SHADER_STATUS_NONE) {
fallback_shader_program = bind_fallback_shader();
fallback_status = FALLBACK_SHADER_STATUS_ERROR;
if(fallback_shader_program == 0) {
if (fallback_shader_program == 0) {
return false;
}
glUseProgram(fallback_shader_program);
image_texture_location = glGetUniformLocation(fallback_shader_program, "image_texture");
if(image_texture_location < 0) {
if (image_texture_location < 0) {
LOG(ERROR) << "Shader doesn't containt the 'image_texture' uniform.";
return false;
}
fullscreen_location = glGetUniformLocation(fallback_shader_program, "fullscreen");
if(fullscreen_location < 0) {
if (fullscreen_location < 0) {
LOG(ERROR) << "Shader doesn't containt the 'fullscreen' uniform.";
return false;
}
@@ -237,11 +228,18 @@ bool Device::bind_fallback_display_space_shader(const float width, const float h
return true;
}
void Device::draw_pixels(
device_memory& rgba, int y,
int w, int h, int width, int height,
int dx, int dy, int dw, int dh,
bool transparent, const DeviceDrawParams &draw_params)
void Device::draw_pixels(device_memory &rgba,
int y,
int w,
int h,
int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params)
{
const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
@@ -253,13 +251,13 @@ void Device::draw_pixels(
glGenTextures(1, &texid);
glBindTexture(GL_TEXTURE_2D, texid);
if(rgba.data_type == TYPE_HALF) {
GLhalf *data_pointer = (GLhalf*)rgba.host_pointer;
if (rgba.data_type == TYPE_HALF) {
GLhalf *data_pointer = (GLhalf *)rgba.host_pointer;
data_pointer += 4 * y * w;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer);
}
else {
uint8_t *data_pointer = (uint8_t*)rgba.host_pointer;
uint8_t *data_pointer = (uint8_t *)rgba.host_pointer;
data_pointer += 4 * y * w;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data_pointer);
}
@@ -267,14 +265,14 @@ void Device::draw_pixels(
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
if(transparent) {
if (transparent) {
glEnable(GL_BLEND);
glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
}
GLint shader_program;
if(use_fallback_shader) {
if(!bind_fallback_display_space_shader(dw, dh)) {
if (use_fallback_shader) {
if (!bind_fallback_display_space_shader(dw, dh)) {
return;
}
shader_program = fallback_shader_program;
@@ -284,7 +282,7 @@ void Device::draw_pixels(
glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
}
if(!vertex_buffer) {
if (!vertex_buffer) {
glGenBuffers(1, &vertex_buffer);
}
@@ -294,7 +292,7 @@ void Device::draw_pixels(
float *vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
if(vpointer) {
if (vpointer) {
/* texture coordinate - vertex pair */
vpointer[0] = 0.0f;
vpointer[1] = 0.0f;
@@ -316,7 +314,7 @@ void Device::draw_pixels(
vpointer[14] = dx;
vpointer[15] = (float)height + dy;
if(vertex_buffer) {
if (vertex_buffer) {
glUnmapBuffer(GL_ARRAY_BUFFER);
}
}
@@ -333,16 +331,22 @@ void Device::draw_pixels(
glEnableVertexAttribArray(texcoord_attribute);
glEnableVertexAttribArray(position_attribute);
glVertexAttribPointer(texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
glVertexAttribPointer(position_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)(sizeof(float) * 2));
glVertexAttribPointer(
texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
glVertexAttribPointer(position_attribute,
2,
GL_FLOAT,
GL_FALSE,
4 * sizeof(float),
(const GLvoid *)(sizeof(float) * 2));
glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
if(vertex_buffer) {
if (vertex_buffer) {
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
if(use_fallback_shader) {
if (use_fallback_shader) {
glUseProgram(0);
}
else {
@@ -353,22 +357,22 @@ void Device::draw_pixels(
glBindTexture(GL_TEXTURE_2D, 0);
glDeleteTextures(1, &texid);
if(transparent) {
if (transparent) {
glDisable(GL_BLEND);
}
}
Device *Device::create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background)
Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
Device *device;
switch(info.type) {
switch (info.type) {
case DEVICE_CPU:
device = device_cpu_create(info, stats, profiler, background);
break;
#ifdef WITH_CUDA
case DEVICE_CUDA:
if(device_cuda_init())
if (device_cuda_init())
device = device_cuda_create(info, stats, profiler, background);
else
device = NULL;
@@ -386,7 +390,7 @@ Device *Device::create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool
#endif
#ifdef WITH_OPENCL
case DEVICE_OPENCL:
if(device_opencl_init())
if (device_opencl_init())
device = device_opencl_create(info, stats, profiler, background);
else
device = NULL;
@@ -401,15 +405,15 @@ Device *Device::create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool
DeviceType Device::type_from_string(const char *name)
{
if(strcmp(name, "CPU") == 0)
if (strcmp(name, "CPU") == 0)
return DEVICE_CPU;
else if(strcmp(name, "CUDA") == 0)
else if (strcmp(name, "CUDA") == 0)
return DEVICE_CUDA;
else if(strcmp(name, "OPENCL") == 0)
else if (strcmp(name, "OPENCL") == 0)
return DEVICE_OPENCL;
else if(strcmp(name, "NETWORK") == 0)
else if (strcmp(name, "NETWORK") == 0)
return DEVICE_NETWORK;
else if(strcmp(name, "MULTI") == 0)
else if (strcmp(name, "MULTI") == 0)
return DEVICE_MULTI;
return DEVICE_NONE;
@@ -417,15 +421,15 @@ DeviceType Device::type_from_string(const char *name)
string Device::string_from_type(DeviceType type)
{
if(type == DEVICE_CPU)
if (type == DEVICE_CPU)
return "CPU";
else if(type == DEVICE_CUDA)
else if (type == DEVICE_CUDA)
return "CUDA";
else if(type == DEVICE_OPENCL)
else if (type == DEVICE_OPENCL)
return "OPENCL";
else if(type == DEVICE_NETWORK)
else if (type == DEVICE_NETWORK)
return "NETWORK";
else if(type == DEVICE_MULTI)
else if (type == DEVICE_MULTI)
return "MULTI";
return "";
@@ -456,50 +460,50 @@ vector<DeviceInfo> Device::available_devices(uint mask)
vector<DeviceInfo> devices;
#ifdef WITH_OPENCL
if(mask & DEVICE_MASK_OPENCL) {
if(!(devices_initialized_mask & DEVICE_MASK_OPENCL)) {
if(device_opencl_init()) {
if (mask & DEVICE_MASK_OPENCL) {
if (!(devices_initialized_mask & DEVICE_MASK_OPENCL)) {
if (device_opencl_init()) {
device_opencl_info(opencl_devices);
}
devices_initialized_mask |= DEVICE_MASK_OPENCL;
}
foreach(DeviceInfo& info, opencl_devices) {
foreach (DeviceInfo &info, opencl_devices) {
devices.push_back(info);
}
}
#endif
#ifdef WITH_CUDA
if(mask & DEVICE_MASK_CUDA) {
if(!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
if(device_cuda_init()) {
if (mask & DEVICE_MASK_CUDA) {
if (!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
if (device_cuda_init()) {
device_cuda_info(cuda_devices);
}
devices_initialized_mask |= DEVICE_MASK_CUDA;
}
foreach(DeviceInfo& info, cuda_devices) {
foreach (DeviceInfo &info, cuda_devices) {
devices.push_back(info);
}
}
#endif
if(mask & DEVICE_MASK_CPU) {
if(!(devices_initialized_mask & DEVICE_MASK_CPU)) {
if (mask & DEVICE_MASK_CPU) {
if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
device_cpu_info(cpu_devices);
devices_initialized_mask |= DEVICE_MASK_CPU;
}
foreach(DeviceInfo& info, cpu_devices) {
foreach (DeviceInfo &info, cpu_devices) {
devices.push_back(info);
}
}
#ifdef WITH_NETWORK
if(mask & DEVICE_MASK_NETWORK) {
if(!(devices_initialized_mask & DEVICE_MASK_NETWORK)) {
if (mask & DEVICE_MASK_NETWORK) {
if (!(devices_initialized_mask & DEVICE_MASK_NETWORK)) {
device_network_info(network_devices);
devices_initialized_mask |= DEVICE_MASK_NETWORK;
}
foreach(DeviceInfo& info, network_devices) {
foreach (DeviceInfo &info, network_devices) {
devices.push_back(info);
}
}
@@ -513,14 +517,14 @@ string Device::device_capabilities(uint mask)
thread_scoped_lock lock(device_mutex);
string capabilities = "";
if(mask & DEVICE_MASK_CPU) {
if (mask & DEVICE_MASK_CPU) {
capabilities += "\nCPU device capabilities: ";
capabilities += device_cpu_capabilities() + "\n";
}
#ifdef WITH_OPENCL
if(mask & DEVICE_MASK_OPENCL) {
if(device_opencl_init()) {
if (mask & DEVICE_MASK_OPENCL) {
if (device_opencl_init()) {
capabilities += "\nOpenCL device capabilities:\n";
capabilities += device_opencl_capabilities();
}
@@ -528,8 +532,8 @@ string Device::device_capabilities(uint mask)
#endif
#ifdef WITH_CUDA
if(mask & DEVICE_MASK_CUDA) {
if(device_cuda_init()) {
if (mask & DEVICE_MASK_CUDA) {
if (device_cuda_init()) {
capabilities += "\nCUDA device capabilities:\n";
capabilities += device_cuda_capabilities();
}
@@ -539,11 +543,13 @@ string Device::device_capabilities(uint mask)
return capabilities;
}
DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int threads, bool background)
DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
int threads,
bool background)
{
assert(subdevices.size() > 0);
if(subdevices.size() == 1) {
if (subdevices.size() == 1) {
/* No multi device needed. */
return subdevices.front();
}
@@ -559,18 +565,17 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
info.has_osl = true;
info.has_profiling = true;
foreach(const DeviceInfo &device, subdevices) {
foreach (const DeviceInfo &device, subdevices) {
/* Ensure CPU device does not slow down GPU. */
if(device.type == DEVICE_CPU && subdevices.size() > 1) {
if(background) {
int orig_cpu_threads = (threads)? threads: system_cpu_thread_count();
if (device.type == DEVICE_CPU && subdevices.size() > 1) {
if (background) {
int orig_cpu_threads = (threads) ? threads : system_cpu_thread_count();
int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);
VLOG(1) << "CPU render threads reduced from "
<< orig_cpu_threads << " to " << cpu_threads
VLOG(1) << "CPU render threads reduced from " << orig_cpu_threads << " to " << cpu_threads
<< ", to dedicate to GPU.";
if(cpu_threads >= 1) {
if (cpu_threads >= 1) {
DeviceInfo cpu_device = device;
cpu_device.cpu_threads = cpu_threads;
info.multi_devices.push_back(cpu_device);

View File

@@ -67,7 +67,7 @@ enum DeviceKernelStatus {
#define DEVICE_MASK(type) (DeviceTypeMask)(1 << type)
class DeviceInfo {
public:
public:
DeviceType type;
string description;
string id; /* used for user preferences, should stay fixed with changing hardware config */
@@ -95,15 +95,17 @@ public:
has_profiling = false;
}
bool operator==(const DeviceInfo &info) {
bool operator==(const DeviceInfo &info)
{
/* Multiple Devices with the same ID would be very bad. */
assert(id != info.id || (type == info.type && num == info.num && description == info.description));
assert(id != info.id ||
(type == info.type && num == info.num && description == info.description));
return id == info.id;
}
};
class DeviceRequestedFeatures {
public:
public:
/* Use experimental feature set. */
bool experimental;
@@ -184,7 +186,7 @@ public:
use_background_light = false;
}
bool modified(const DeviceRequestedFeatures& requested_features)
bool modified(const DeviceRequestedFeatures &requested_features)
{
return !(experimental == requested_features.experimental &&
max_nodes_group == requested_features.max_nodes_group &&
@@ -212,58 +214,55 @@ public:
string get_build_options() const
{
string build_options = "";
if(experimental) {
if (experimental) {
build_options += "-D__KERNEL_EXPERIMENTAL__ ";
}
build_options += "-D__NODES_MAX_GROUP__=" +
string_printf("%d", max_nodes_group);
build_options += " -D__NODES_FEATURES__=" +
string_printf("%d", nodes_features);
if(!use_hair) {
build_options += "-D__NODES_MAX_GROUP__=" + string_printf("%d", max_nodes_group);
build_options += " -D__NODES_FEATURES__=" + string_printf("%d", nodes_features);
if (!use_hair) {
build_options += " -D__NO_HAIR__";
}
if(!use_object_motion) {
if (!use_object_motion) {
build_options += " -D__NO_OBJECT_MOTION__";
}
if(!use_camera_motion) {
if (!use_camera_motion) {
build_options += " -D__NO_CAMERA_MOTION__";
}
if(!use_baking) {
if (!use_baking) {
build_options += " -D__NO_BAKING__";
}
if(!use_volume) {
if (!use_volume) {
build_options += " -D__NO_VOLUME__";
}
if(!use_subsurface) {
if (!use_subsurface) {
build_options += " -D__NO_SUBSURFACE__";
}
if(!use_integrator_branched) {
if (!use_integrator_branched) {
build_options += " -D__NO_BRANCHED_PATH__";
}
if(!use_patch_evaluation) {
if (!use_patch_evaluation) {
build_options += " -D__NO_PATCH_EVAL__";
}
if(!use_transparent && !use_volume) {
if (!use_transparent && !use_volume) {
build_options += " -D__NO_TRANSPARENT__";
}
if(!use_shadow_tricks) {
if (!use_shadow_tricks) {
build_options += " -D__NO_SHADOW_TRICKS__";
}
if(!use_principled) {
if (!use_principled) {
build_options += " -D__NO_PRINCIPLED__";
}
if(!use_denoising) {
if (!use_denoising) {
build_options += " -D__NO_DENOISING__";
}
if(!use_shader_raytrace) {
if (!use_shader_raytrace) {
build_options += " -D__NO_SHADER_RAYTRACE__";
}
return build_options;
}
};
std::ostream& operator <<(std::ostream &os,
const DeviceRequestedFeatures& requested_features);
std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &requested_features);
/* Device */
@@ -274,17 +273,24 @@ struct DeviceDrawParams {
class Device {
friend class device_sub_ptr;
protected:
protected:
enum {
FALLBACK_SHADER_STATUS_NONE = 0,
FALLBACK_SHADER_STATUS_ERROR,
FALLBACK_SHADER_STATUS_SUCCESS,
};
Device(DeviceInfo& info_, Stats &stats_, Profiler &profiler_, bool background) : background(background),
Device(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background)
: background(background),
vertex_buffer(0),
fallback_status(FALLBACK_SHADER_STATUS_NONE), fallback_shader_program(0),
info(info_), stats(stats_), profiler(profiler_) {}
fallback_status(FALLBACK_SHADER_STATUS_NONE),
fallback_shader_program(0),
info(info_),
stats(stats_),
profiler(profiler_)
{
}
bool background;
string error_msg;
@@ -296,30 +302,39 @@ protected:
bool bind_fallback_display_space_shader(const float width, const float height);
virtual device_ptr mem_alloc_sub_ptr(device_memory& /*mem*/, int /*offset*/, int /*size*/)
virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, int /*offset*/, int /*size*/)
{
/* Only required for devices that implement denoising. */
assert(false);
return (device_ptr) 0;
return (device_ptr)0;
}
virtual void mem_free_sub_ptr(device_ptr /*ptr*/) {};
virtual void mem_free_sub_ptr(device_ptr /*ptr*/){};
public:
public:
virtual ~Device();
/* info */
DeviceInfo info;
virtual const string& error_message() { return error_msg; }
bool have_error() { return !error_message().empty(); }
virtual void set_error(const string& error)
virtual const string &error_message()
{
if(!have_error()) {
return error_msg;
}
bool have_error()
{
return !error_message().empty();
}
virtual void set_error(const string &error)
{
if (!have_error()) {
error_msg = error;
}
fprintf(stderr, "%s\n", error.c_str());
fflush(stderr);
}
virtual bool show_samples() const { return false; }
virtual bool show_samples() const
{
return false;
}
virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
/* statistics */
@@ -327,44 +342,62 @@ public:
Profiler &profiler;
/* memory alignment */
virtual int mem_sub_ptr_alignment() { return MIN_ALIGNMENT_CPU_DATA_TYPES; }
virtual int mem_sub_ptr_alignment()
{
return MIN_ALIGNMENT_CPU_DATA_TYPES;
}
/* constant memory */
virtual void const_copy_to(const char *name, void *host, size_t size) = 0;
/* open shading language, only for CPU device */
virtual void *osl_memory() { return NULL; }
virtual void *osl_memory()
{
return NULL;
}
/* load/compile kernels, must be called before adding tasks */
virtual bool load_kernels(
const DeviceRequestedFeatures& /*requested_features*/)
{ return true; }
virtual bool load_kernels(const DeviceRequestedFeatures & /*requested_features*/)
{
return true;
}
/* Wait for device to become available to upload data and receive tasks
* This method is used by the OpenCL device to load the
* optimized kernels or when not (yet) available load the
* generic kernels (only during foreground rendering) */
virtual bool wait_for_availability(
const DeviceRequestedFeatures& /*requested_features*/)
{ return true; }
virtual bool wait_for_availability(const DeviceRequestedFeatures & /*requested_features*/)
{
return true;
}
/* Check if there are 'better' kernels available to be used
* We can switch over to these kernels
* This method is used to determine if we can switch the preview kernels
* to regular kernels */
virtual DeviceKernelStatus get_active_kernel_switch_state()
{ return DEVICE_KERNEL_USING_FEATURE_KERNEL; }
{
return DEVICE_KERNEL_USING_FEATURE_KERNEL;
}
/* tasks */
virtual int get_split_task_count(DeviceTask& task) = 0;
virtual void task_add(DeviceTask& task) = 0;
virtual int get_split_task_count(DeviceTask &task) = 0;
virtual void task_add(DeviceTask &task) = 0;
virtual void task_wait() = 0;
virtual void task_cancel() = 0;
/* opengl drawing */
virtual void draw_pixels(device_memory& mem, int y,
int w, int h, int width, int height,
int dx, int dy, int dw, int dh,
bool transparent, const DeviceDrawParams &draw_params);
virtual void draw_pixels(device_memory &mem,
int y,
int w,
int h,
int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params);
#ifdef WITH_NETWORK
/* networking */
@@ -372,20 +405,32 @@ public:
#endif
/* multi device */
virtual void map_tile(Device * /*sub_device*/, RenderTile& /*tile*/) {}
virtual int device_number(Device * /*sub_device*/) { return 0; }
virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) {}
virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) {}
virtual void map_tile(Device * /*sub_device*/, RenderTile & /*tile*/)
{
}
virtual int device_number(Device * /*sub_device*/)
{
return 0;
}
virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
{
}
virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
{
}
/* static */
static Device *create(DeviceInfo& info, Stats &stats, Profiler& profiler, bool background = true);
static Device *create(DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool background = true);
static DeviceType type_from_string(const char *name);
static string string_from_type(DeviceType type);
static vector<DeviceType> available_types();
static vector<DeviceInfo> available_devices(uint device_type_mask = DEVICE_MASK_ALL);
static string device_capabilities(uint device_type_mask = DEVICE_MASK_ALL);
static DeviceInfo get_multi_device(const vector<DeviceInfo>& subdevices,
static DeviceInfo get_multi_device(const vector<DeviceInfo> &subdevices,
int threads,
bool background);
@@ -394,20 +439,19 @@ public:
static void free_memory();
protected:
protected:
/* Memory allocation, only accessed through device_memory. */
friend class MultiDevice;
friend class DeviceServer;
friend class device_memory;
virtual void mem_alloc(device_memory& mem) = 0;
virtual void mem_copy_to(device_memory& mem) = 0;
virtual void mem_copy_from(device_memory& mem,
int y, int w, int h, int elem) = 0;
virtual void mem_zero(device_memory& mem) = 0;
virtual void mem_free(device_memory& mem) = 0;
virtual void mem_alloc(device_memory &mem) = 0;
virtual void mem_copy_to(device_memory &mem) = 0;
virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) = 0;
virtual void mem_zero(device_memory &mem) = 0;
virtual void mem_free(device_memory &mem) = 0;
private:
private:
/* Indicted whether device types and devices lists were initialized. */
static bool need_types_update, need_devices_update;
static thread_mutex device_mutex;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -21,7 +21,7 @@
CCL_NAMESPACE_BEGIN
DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task)
: tile_info_mem(device, "denoising tile info mem", MEM_READ_WRITE),
: tile_info_mem(device, "denoising tile info mem", MEM_READ_WRITE),
profiler(NULL),
storage(device),
buffer(device),
@@ -29,7 +29,7 @@ DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task)
{
radius = task.denoising.radius;
nlm_k_2 = powf(2.0f, lerp(-5.0f, 3.0f, task.denoising.strength));
if(task.denoising.relative_pca) {
if (task.denoising.relative_pca) {
pca_threshold = -powf(10.0f, lerp(-8.0f, 0.0f, task.denoising.feature_strength));
}
else {
@@ -47,13 +47,13 @@ DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task)
functions.map_neighbor_tiles = function_bind(task.map_neighbor_tiles, _1, device);
functions.unmap_neighbor_tiles = function_bind(task.unmap_neighbor_tiles, _1, device);
tile_info = (TileInfo*) tile_info_mem.alloc(sizeof(TileInfo)/sizeof(int));
tile_info->from_render = task.denoising_from_render? 1 : 0;
tile_info = (TileInfo *)tile_info_mem.alloc(sizeof(TileInfo) / sizeof(int));
tile_info->from_render = task.denoising_from_render ? 1 : 0;
tile_info->frames[0] = 0;
tile_info->num_frames = min(task.denoising_frames.size() + 1, DENOISE_MAX_FRAMES);
for(int i = 1; i < tile_info->num_frames; i++) {
tile_info->frames[i] = task.denoising_frames[i-1];
for (int i = 1; i < tile_info->num_frames; i++) {
tile_info->frames[i] = task.denoising_frames[i - 1];
}
write_passes = task.denoising_write_passes;
@@ -73,7 +73,7 @@ DenoisingTask::~DenoisingTask()
void DenoisingTask::set_render_buffer(RenderTile *rtiles)
{
for(int i = 0; i < 9; i++) {
for (int i = 0; i < 9; i++) {
tile_info->offsets[i] = rtiles[i].offset;
tile_info->strides[i] = rtiles[i].stride;
tile_info->buffers[i] = rtiles[i].buffer;
@@ -91,8 +91,9 @@ void DenoisingTask::set_render_buffer(RenderTile *rtiles)
target_buffer.stride = rtiles[9].stride;
target_buffer.ptr = rtiles[9].buffer;
if(write_passes && rtiles[9].buffers) {
target_buffer.denoising_output_offset = rtiles[9].buffers->params.get_denoising_prefiltered_offset();
if (write_passes && rtiles[9].buffers) {
target_buffer.denoising_output_offset =
rtiles[9].buffers->params.get_denoising_prefiltered_offset();
}
else {
target_buffer.denoising_output_offset = 0;
@@ -106,10 +107,11 @@ void DenoisingTask::setup_denoising_buffer()
/* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring tiles */
rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w);
rect = rect_expand(rect, radius);
rect = rect_clip(rect, make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3]));
rect = rect_clip(rect,
make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3]));
buffer.use_intensity = write_passes || (tile_info->num_frames > 1);
buffer.passes = buffer.use_intensity? 15 : 14;
buffer.passes = buffer.use_intensity ? 15 : 14;
buffer.width = rect.z - rect.x;
buffer.stride = align_up(buffer.width, 4);
buffer.h = rect.w - rect.y;
@@ -123,11 +125,11 @@ void DenoisingTask::setup_denoising_buffer()
/* CPUs process shifts sequentially while GPUs process them in parallel. */
int num_layers;
if(buffer.gpu_temporary_mem) {
if (buffer.gpu_temporary_mem) {
/* Shadowing prefiltering uses a radius of 6, so allocate at least that much. */
int max_radius = max(radius, 6);
int num_shifts = (2*max_radius + 1) * (2*max_radius + 1);
num_layers = 2*num_shifts + 1;
int num_shifts = (2 * max_radius + 1) * (2 * max_radius + 1);
num_layers = 2 * num_shifts + 1;
}
else {
num_layers = 3;
@@ -138,14 +140,14 @@ void DenoisingTask::setup_denoising_buffer()
void DenoisingTask::prefilter_shadowing()
{
device_ptr null_ptr = (device_ptr) 0;
device_ptr null_ptr = (device_ptr)0;
device_sub_ptr unfiltered_a (buffer.mem, 0, buffer.pass_stride);
device_sub_ptr unfiltered_b (buffer.mem, 1*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr sample_var (buffer.mem, 2*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr sample_var_var (buffer.mem, 3*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr buffer_var (buffer.mem, 5*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr filtered_var (buffer.mem, 6*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr unfiltered_a(buffer.mem, 0, buffer.pass_stride);
device_sub_ptr unfiltered_b(buffer.mem, 1 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr sample_var(buffer.mem, 2 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr sample_var_var(buffer.mem, 3 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr buffer_var(buffer.mem, 5 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr filtered_var(buffer.mem, 6 * buffer.pass_stride, buffer.pass_stride);
/* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */
functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var);
@@ -155,8 +157,7 @@ void DenoisingTask::prefilter_shadowing()
functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var);
/* Reuse memory, the previous data isn't needed anymore. */
device_ptr filtered_a = *buffer_var,
filtered_b = *sample_var;
device_ptr filtered_a = *buffer_var, filtered_b = *sample_var;
/* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */
nlm_state.set_parameters(5, 3, 1.0f, 0.25f, false);
functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a);
@@ -166,30 +167,34 @@ void DenoisingTask::prefilter_shadowing()
/* Estimate the residual variance between the two filtered halves. */
functions.combine_halves(filtered_a, filtered_b, null_ptr, residual_var, 2, rect);
device_ptr final_a = *unfiltered_a,
final_b = *unfiltered_b;
device_ptr final_a = *unfiltered_a, final_b = *unfiltered_b;
/* Use the residual variance for a second filter pass. */
nlm_state.set_parameters(4, 2, 1.0f, 0.5f, false);
functions.non_local_means(filtered_a, filtered_b, residual_var, final_a);
functions.non_local_means(filtered_b, filtered_a, residual_var, final_b);
/* Combine the two double-filtered halves to a final shadow feature. */
device_sub_ptr shadow_pass(buffer.mem, 4*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr shadow_pass(buffer.mem, 4 * buffer.pass_stride, buffer.pass_stride);
functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect);
}
void DenoisingTask::prefilter_features()
{
device_sub_ptr unfiltered (buffer.mem, 8*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr variance (buffer.mem, 9*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr unfiltered(buffer.mem, 8 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr variance(buffer.mem, 9 * buffer.pass_stride, buffer.pass_stride);
int mean_from[] = { 0, 1, 2, 12, 6, 7, 8 };
int variance_from[] = { 3, 4, 5, 13, 9, 10, 11};
int pass_to[] = { 1, 2, 3, 0, 5, 6, 7};
for(int pass = 0; pass < 7; pass++) {
device_sub_ptr feature_pass(buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride);
int mean_from[] = {0, 1, 2, 12, 6, 7, 8};
int variance_from[] = {3, 4, 5, 13, 9, 10, 11};
int pass_to[] = {1, 2, 3, 0, 5, 6, 7};
for (int pass = 0; pass < 7; pass++) {
device_sub_ptr feature_pass(
buffer.mem, pass_to[pass] * buffer.pass_stride, buffer.pass_stride);
/* Get the unfiltered pass and its variance from the RenderBuffers. */
functions.get_feature(mean_from[pass], variance_from[pass], *unfiltered, *variance, 1.0f / render_buffer.samples);
functions.get_feature(mean_from[pass],
variance_from[pass],
*unfiltered,
*variance,
1.0f / render_buffer.samples);
/* Smooth the pass and store the result in the denoising buffers. */
nlm_state.set_parameters(2, 2, 1.0f, 0.25f, false);
functions.non_local_means(*unfiltered, *unfiltered, *variance, *feature_pass);
@@ -200,43 +205,52 @@ void DenoisingTask::prefilter_color()
{
int mean_from[] = {20, 21, 22};
int variance_from[] = {23, 24, 25};
int mean_to[] = { 8, 9, 10};
int mean_to[] = {8, 9, 10};
int variance_to[] = {11, 12, 13};
int num_color_passes = 3;
device_only_memory<float> temporary_color(device, "denoising temporary color");
temporary_color.alloc_to_device(3*buffer.pass_stride, false);
temporary_color.alloc_to_device(3 * buffer.pass_stride, false);
for(int pass = 0; pass < num_color_passes; pass++) {
device_sub_ptr color_pass(temporary_color, pass*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr color_var_pass(buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride);
functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass, 1.0f / render_buffer.samples);
for (int pass = 0; pass < num_color_passes; pass++) {
device_sub_ptr color_pass(temporary_color, pass * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr color_var_pass(
buffer.mem, variance_to[pass] * buffer.pass_stride, buffer.pass_stride);
functions.get_feature(mean_from[pass],
variance_from[pass],
*color_pass,
*color_var_pass,
1.0f / render_buffer.samples);
}
device_sub_ptr depth_pass (buffer.mem, 0, buffer.pass_stride);
device_sub_ptr color_var_pass(buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
device_sub_ptr output_pass (buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
functions.detect_outliers(temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
device_sub_ptr depth_pass(buffer.mem, 0, buffer.pass_stride);
device_sub_ptr color_var_pass(
buffer.mem, variance_to[0] * buffer.pass_stride, 3 * buffer.pass_stride);
device_sub_ptr output_pass(buffer.mem, mean_to[0] * buffer.pass_stride, 3 * buffer.pass_stride);
functions.detect_outliers(
temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
if(buffer.use_intensity) {
device_sub_ptr intensity_pass(buffer.mem, 14*buffer.pass_stride, buffer.pass_stride);
nlm_state.set_parameters(radius, 4, 2.0f, nlm_k_2*4.0f, true);
if (buffer.use_intensity) {
device_sub_ptr intensity_pass(buffer.mem, 14 * buffer.pass_stride, buffer.pass_stride);
nlm_state.set_parameters(radius, 4, 2.0f, nlm_k_2 * 4.0f, true);
functions.non_local_means(*output_pass, *output_pass, *color_var_pass, *intensity_pass);
}
}
void DenoisingTask::load_buffer()
{
device_ptr null_ptr = (device_ptr) 0;
device_ptr null_ptr = (device_ptr)0;
int original_offset = render_buffer.offset;
int num_passes = buffer.use_intensity? 15 : 14;
for(int i = 0; i < tile_info->num_frames; i++) {
for(int pass = 0; pass < num_passes; pass++) {
device_sub_ptr to_pass(buffer.mem, i*buffer.frame_stride + pass*buffer.pass_stride, buffer.pass_stride);
int num_passes = buffer.use_intensity ? 15 : 14;
for (int i = 0; i < tile_info->num_frames; i++) {
for (int pass = 0; pass < num_passes; pass++) {
device_sub_ptr to_pass(
buffer.mem, i * buffer.frame_stride + pass * buffer.pass_stride, buffer.pass_stride);
bool is_variance = (pass >= 11) && (pass <= 13);
functions.get_feature(pass, -1, *to_pass, null_ptr, is_variance? (1.0f / render_buffer.samples) : 1.0f);
functions.get_feature(
pass, -1, *to_pass, null_ptr, is_variance ? (1.0f / render_buffer.samples) : 1.0f);
}
render_buffer.offset += render_buffer.frame_stride;
}
@@ -250,9 +264,9 @@ void DenoisingTask::write_buffer()
target_buffer.stride,
target_buffer.pass_stride,
target_buffer.denoising_clean_offset);
int num_passes = buffer.use_intensity? 15 : 14;
for(int pass = 0; pass < num_passes; pass++) {
device_sub_ptr from_pass(buffer.mem, pass*buffer.pass_stride, buffer.pass_stride);
int num_passes = buffer.use_intensity ? 15 : 14;
for (int pass = 0; pass < num_passes; pass++) {
device_sub_ptr from_pass(buffer.mem, pass * buffer.pass_stride, buffer.pass_stride);
int out_offset = pass + target_buffer.denoising_output_offset;
functions.write_feature(out_offset, *from_pass, target_buffer.ptr);
}
@@ -263,35 +277,36 @@ void DenoisingTask::construct_transform()
storage.w = filter_area.z;
storage.h = filter_area.w;
storage.transform.alloc_to_device(storage.w*storage.h*TRANSFORM_SIZE, false);
storage.rank.alloc_to_device(storage.w*storage.h, false);
storage.transform.alloc_to_device(storage.w * storage.h * TRANSFORM_SIZE, false);
storage.rank.alloc_to_device(storage.w * storage.h, false);
functions.construct_transform();
}
void DenoisingTask::reconstruct()
{
storage.XtWX.alloc_to_device(storage.w*storage.h*XTWX_SIZE, false);
storage.XtWY.alloc_to_device(storage.w*storage.h*XTWY_SIZE, false);
storage.XtWX.alloc_to_device(storage.w * storage.h * XTWX_SIZE, false);
storage.XtWY.alloc_to_device(storage.w * storage.h * XTWY_SIZE, false);
storage.XtWX.zero_to_device();
storage.XtWY.zero_to_device();
reconstruction_state.filter_window = rect_from_shape(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h);
int tile_coordinate_offset = filter_area.y*target_buffer.stride + filter_area.x;
reconstruction_state.filter_window = rect_from_shape(
filter_area.x - rect.x, filter_area.y - rect.y, storage.w, storage.h);
int tile_coordinate_offset = filter_area.y * target_buffer.stride + filter_area.x;
reconstruction_state.buffer_params = make_int4(target_buffer.offset + tile_coordinate_offset,
target_buffer.stride,
target_buffer.pass_stride,
target_buffer.denoising_clean_offset);
reconstruction_state.source_w = rect.z-rect.x;
reconstruction_state.source_h = rect.w-rect.y;
reconstruction_state.source_w = rect.z - rect.x;
reconstruction_state.source_h = rect.w - rect.y;
device_sub_ptr color_ptr (buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride);
device_sub_ptr color_var_ptr(buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride);
for(int f = 0; f < tile_info->num_frames; f++) {
device_sub_ptr color_ptr(buffer.mem, 8 * buffer.pass_stride, 3 * buffer.pass_stride);
device_sub_ptr color_var_ptr(buffer.mem, 11 * buffer.pass_stride, 3 * buffer.pass_stride);
for (int f = 0; f < tile_info->num_frames; f++) {
device_ptr scale_ptr = 0;
device_sub_ptr *scale_sub_ptr = NULL;
if(tile_info->frames[f] != 0 && (tile_info->num_frames > 1)) {
scale_sub_ptr = new device_sub_ptr(buffer.mem, 14*buffer.pass_stride, buffer.pass_stride);
if (tile_info->frames[f] != 0 && (tile_info->num_frames > 1)) {
scale_sub_ptr = new device_sub_ptr(buffer.mem, 14 * buffer.pass_stride, buffer.pass_stride);
scale_ptr = **scale_sub_ptr;
}
@@ -310,7 +325,7 @@ void DenoisingTask::run_denoising(RenderTile *tile)
setup_denoising_buffer();
if(tile_info->from_render) {
if (tile_info->from_render) {
prefilter_shadowing();
prefilter_features();
prefilter_color();
@@ -319,12 +334,12 @@ void DenoisingTask::run_denoising(RenderTile *tile)
load_buffer();
}
if(do_filter) {
if (do_filter) {
construct_transform();
reconstruct();
}
if(write_passes) {
if (write_passes) {
write_buffer();
}

View File

@@ -28,7 +28,7 @@
CCL_NAMESPACE_BEGIN
class DenoisingTask {
public:
public:
/* Parameters of the denoising algorithm. */
int radius;
float nlm_k_2;
@@ -64,16 +64,16 @@ public:
bool do_filter;
struct DeviceFunctions {
function<bool(device_ptr image_ptr, /* Contains the values that are smoothed. */
function<bool(
device_ptr image_ptr, /* Contains the values that are smoothed. */
device_ptr guide_ptr, /* Contains the values that are used to calculate weights. */
device_ptr variance_ptr, /* Contains the variance of the guide image. */
device_ptr out_ptr /* The filtered output is written into this image. */
)> non_local_means;
function<bool(device_ptr color_ptr,
device_ptr color_variance_ptr,
device_ptr scale_ptr,
int frame
)> accumulate;
)>
non_local_means;
function<bool(
device_ptr color_ptr, device_ptr color_variance_ptr, device_ptr scale_ptr, int frame)>
accumulate;
function<bool(device_ptr output_ptr)> solve;
function<bool()> construct_transform;
@@ -82,29 +82,26 @@ public:
device_ptr mean_ptr,
device_ptr variance_ptr,
int r,
int4 rect
)> combine_halves;
int4 rect)>
combine_halves;
function<bool(device_ptr a_ptr,
device_ptr b_ptr,
device_ptr sample_variance_ptr,
device_ptr sv_variance_ptr,
device_ptr buffer_variance_ptr
)> divide_shadow;
device_ptr buffer_variance_ptr)>
divide_shadow;
function<bool(int mean_offset,
int variance_offset,
device_ptr mean_ptr,
device_ptr variance_ptr,
float scale
)> get_feature;
float scale)>
get_feature;
function<bool(device_ptr image_ptr,
device_ptr variance_ptr,
device_ptr depth_ptr,
device_ptr output_ptr
)> detect_outliers;
function<bool(int out_offset,
device_ptr frop_ptr,
device_ptr buffer_ptr
)> write_feature;
device_ptr output_ptr)>
detect_outliers;
function<bool(int out_offset, device_ptr frop_ptr, device_ptr buffer_ptr)> write_feature;
function<void(RenderTile *rtiles)> map_neighbor_tiles;
function<void(RenderTile *rtiles)> unmap_neighbor_tiles;
} functions;
@@ -128,7 +125,13 @@ public:
float k_2; /* Squared value of the k parameter of the filter. */
bool is_color;
void set_parameters(int r_, int f_, float a_, float k_2_, bool is_color_) { r = r_; f = f_; a = a_, k_2 = k_2_; is_color = is_color_; }
void set_parameters(int r_, int f_, float a_, float k_2_, bool is_color_)
{
r = r_;
f = f_;
a = a_, k_2 = k_2_;
is_color = is_color_;
}
} nlm_state;
struct Storage {
@@ -144,7 +147,8 @@ public:
rank(device, "denoising rank"),
XtWX(device, "denoising XtWX"),
XtWY(device, "denoising XtWY")
{}
{
}
} storage;
DenoisingTask(Device *device, const DeviceTask &task);
@@ -167,12 +171,12 @@ public:
bool gpu_temporary_mem;
DenoiseBuffers(Device *device)
: mem(device, "denoising pixel buffer"),
temporary_mem(device, "denoising temporary mem")
{}
: mem(device, "denoising pixel buffer"), temporary_mem(device, "denoising temporary mem")
{
}
} buffer;
protected:
protected:
Device *device;
void set_render_buffer(RenderTile *rtiles);

View File

@@ -21,19 +21,22 @@ CCL_NAMESPACE_BEGIN
class Device;
Device *device_cpu_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
Device *device_cpu_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
bool device_opencl_init();
Device *device_opencl_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
bool device_opencl_compile_kernel(const vector<string>& parameters);
Device *device_opencl_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
bool device_opencl_compile_kernel(const vector<string> &parameters);
bool device_cuda_init();
Device *device_cuda_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
Device *device_network_create(DeviceInfo& info, Stats &stats, Profiler &profiler, const char *address);
Device *device_multi_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
Device *device_network_create(DeviceInfo &info,
Stats &stats,
Profiler &profiler,
const char *address);
Device *device_multi_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
void device_cpu_info(vector<DeviceInfo>& devices);
void device_opencl_info(vector<DeviceInfo>& devices);
void device_cuda_info(vector<DeviceInfo>& devices);
void device_network_info(vector<DeviceInfo>& devices);
void device_cpu_info(vector<DeviceInfo> &devices);
void device_opencl_info(vector<DeviceInfo> &devices);
void device_cuda_info(vector<DeviceInfo> &devices);
void device_network_info(vector<DeviceInfo> &devices);
string device_cpu_capabilities();
string device_opencl_capabilities();

View File

@@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN
/* Device Memory */
device_memory::device_memory(Device *device, const char *name, MemoryType type)
: data_type(device_type_traits<uchar>::data_type),
: data_type(device_type_traits<uchar>::data_type),
data_elements(device_type_traits<uchar>::num_elements),
data_size(0),
device_size(0),
@@ -46,13 +46,13 @@ device_memory::~device_memory()
void *device_memory::host_alloc(size_t size)
{
if(!size) {
if (!size) {
return 0;
}
void *ptr = util_aligned_malloc(size, MIN_ALIGNMENT_CPU_DATA_TYPES);
if(ptr) {
if (ptr) {
util_guarded_mem_alloc(size);
}
else {
@@ -64,9 +64,9 @@ void *device_memory::host_alloc(size_t size)
void device_memory::host_free()
{
if(host_pointer) {
if (host_pointer) {
util_guarded_mem_free(memory_size());
util_aligned_free((void*)host_pointer);
util_aligned_free((void *)host_pointer);
host_pointer = 0;
}
}
@@ -79,14 +79,14 @@ void device_memory::device_alloc()
void device_memory::device_free()
{
if(device_pointer) {
if (device_pointer) {
device->mem_free(*this);
}
}
void device_memory::device_copy_to()
{
if(host_pointer) {
if (host_pointer) {
device->mem_copy_to(*this);
}
}
@@ -99,7 +99,7 @@ void device_memory::device_copy_from(int y, int w, int h, int elem)
void device_memory::device_zero()
{
if(data_size) {
if (data_size) {
device->mem_zero(*this);
}
}
@@ -126,8 +126,7 @@ void device_memory::restore_device()
/* Device Sub Ptr */
device_sub_ptr::device_sub_ptr(device_memory& mem, int offset, int size)
: device(mem.device)
device_sub_ptr::device_sub_ptr(device_memory &mem, int offset, int size) : device(mem.device)
{
ptr = device->mem_alloc_sub_ptr(mem, offset, size);
}

View File

@@ -31,13 +31,7 @@ CCL_NAMESPACE_BEGIN
class Device;
enum MemoryType {
MEM_READ_ONLY,
MEM_READ_WRITE,
MEM_DEVICE_ONLY,
MEM_TEXTURE,
MEM_PIXELS
};
enum MemoryType { MEM_READ_ONLY, MEM_READ_WRITE, MEM_DEVICE_ONLY, MEM_TEXTURE, MEM_PIXELS };
/* Supported Data Types */
@@ -54,16 +48,25 @@ enum DataType {
static inline size_t datatype_size(DataType datatype)
{
switch(datatype) {
case TYPE_UNKNOWN: return 1;
case TYPE_UCHAR: return sizeof(uchar);
case TYPE_FLOAT: return sizeof(float);
case TYPE_UINT: return sizeof(uint);
case TYPE_UINT16: return sizeof(uint16_t);
case TYPE_INT: return sizeof(int);
case TYPE_HALF: return sizeof(half);
case TYPE_UINT64: return sizeof(uint64_t);
default: return 0;
switch (datatype) {
case TYPE_UNKNOWN:
return 1;
case TYPE_UCHAR:
return sizeof(uchar);
case TYPE_FLOAT:
return sizeof(float);
case TYPE_UINT:
return sizeof(uint);
case TYPE_UINT16:
return sizeof(uint16_t);
case TYPE_INT:
return sizeof(int);
case TYPE_HALF:
return sizeof(half);
case TYPE_UINT64:
return sizeof(uint64_t);
default:
return 0;
}
}
@@ -184,12 +187,15 @@ template<> struct device_type_traits<uint64_t> {
* Base class for all device memory. This should not be allocated directly,
* instead the appropriate subclass can be used. */
class device_memory
{
public:
size_t memory_size() { return data_size*data_elements*datatype_size(data_type); }
size_t memory_elements_size(int elements) {
return elements*data_elements*datatype_size(data_type);
class device_memory {
public:
size_t memory_size()
{
return data_size * data_elements * datatype_size(data_type);
}
size_t memory_elements_size(int elements)
{
return elements * data_elements * datatype_size(data_type);
}
/* Data information. */
@@ -216,15 +222,15 @@ public:
void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr);
void restore_device();
protected:
protected:
friend class CUDADevice;
/* Only create through subclasses. */
device_memory(Device *device, const char *name, MemoryType type);
/* No copying allowed. */
device_memory(const device_memory&);
device_memory& operator = (const device_memory&);
device_memory(const device_memory &);
device_memory &operator=(const device_memory &);
/* Host allocation on the device. All host_pointer memory should be
* allocated with these functions, for devices that support using
@@ -249,10 +255,8 @@ protected:
* Working memory only needed by the device, with no corresponding allocation
* on the host. Only used internally in the device implementations. */
template<typename T>
class device_only_memory : public device_memory
{
public:
template<typename T> class device_only_memory : public device_memory {
public:
device_only_memory(Device *device, const char *name)
: device_memory(device, name, MEM_DEVICE_ONLY)
{
@@ -270,14 +274,14 @@ public:
size_t new_size = num;
bool reallocate;
if(shrink_to_fit) {
if (shrink_to_fit) {
reallocate = (data_size != new_size);
}
else {
reallocate = (data_size < new_size);
}
if(reallocate) {
if (reallocate) {
device_free();
data_size = new_size;
device_alloc();
@@ -307,9 +311,8 @@ public:
* automatically attached to kernel globals, using the provided name
* matching an entry in kernel_textures.h. */
template<typename T> class device_vector : public device_memory
{
public:
template<typename T> class device_vector : public device_memory {
public:
device_vector(Device *device, const char *name, MemoryType type)
: device_memory(device, name, type)
{
@@ -329,10 +332,10 @@ public:
{
size_t new_size = size(width, height, depth);
if(new_size != data_size) {
if (new_size != data_size) {
device_free();
host_free();
host_pointer = host_alloc(sizeof(T)*new_size);
host_pointer = host_alloc(sizeof(T) * new_size);
assert(device_pointer == 0);
}
@@ -350,12 +353,12 @@ public:
{
size_t new_size = size(width, height, depth);
if(new_size != data_size) {
void *new_ptr = host_alloc(sizeof(T)*new_size);
if (new_size != data_size) {
void *new_ptr = host_alloc(sizeof(T) * new_size);
if(new_size && data_size) {
size_t min_size = ((new_size < data_size)? new_size: data_size);
memcpy((T*)new_ptr, (T*)host_pointer, sizeof(T)*min_size);
if (new_size && data_size) {
size_t min_size = ((new_size < data_size) ? new_size : data_size);
memcpy((T *)new_ptr, (T *)host_pointer, sizeof(T) * min_size);
}
device_free();
@@ -373,7 +376,7 @@ public:
}
/* Take over data from an existing array. */
void steal_data(array<T>& from)
void steal_data(array<T> &from)
{
device_free();
host_free();
@@ -405,12 +408,12 @@ public:
return data_size;
}
T* data()
T *data()
{
return (T*)host_pointer;
return (T *)host_pointer;
}
T& operator[](size_t i)
T &operator[](size_t i)
{
assert(i < data_size);
return data()[i];
@@ -431,10 +434,10 @@ public:
device_zero();
}
protected:
protected:
size_t size(size_t width, size_t height, size_t depth)
{
return width * ((height == 0)? 1: height) * ((depth == 0)? 1: depth);
return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
}
};
@@ -443,11 +446,9 @@ protected:
* Device memory to efficiently draw as pixels to the screen in interactive
* rendering. Only copying pixels from the device is supported, not copying to. */
template<typename T> class device_pixels : public device_vector<T>
{
public:
device_pixels(Device *device, const char *name)
: device_vector<T>(device, name, MEM_PIXELS)
template<typename T> class device_pixels : public device_vector<T> {
public:
device_pixels(Device *device, const char *name) : device_vector<T>(device, name, MEM_PIXELS)
{
}
@@ -455,7 +456,7 @@ public:
{
device_vector<T>::alloc(width, height, depth);
if(!device_memory::device_pointer) {
if (!device_memory::device_pointer) {
device_memory::device_alloc();
}
}
@@ -476,10 +477,9 @@ public:
* Note: some devices require offset and size of the sub_ptr to be properly
* aligned to device->mem_address_alingment(). */
class device_sub_ptr
{
public:
device_sub_ptr(device_memory& mem, int offset, int size);
class device_sub_ptr {
public:
device_sub_ptr(device_memory &mem, int offset, int size);
~device_sub_ptr();
device_ptr operator*() const
@@ -487,9 +487,9 @@ public:
return ptr;
}
protected:
protected:
/* No copying. */
device_sub_ptr& operator = (const device_sub_ptr&);
device_sub_ptr &operator=(const device_sub_ptr &);
Device *device;
device_ptr ptr;

View File

@@ -31,12 +31,12 @@
CCL_NAMESPACE_BEGIN
class MultiDevice : public Device
{
public:
class MultiDevice : public Device {
public:
struct SubDevice {
explicit SubDevice(Device *device_)
: device(device_) {}
explicit SubDevice(Device *device_) : device(device_)
{
}
Device *device;
map<device_ptr, device_ptr> ptr_map;
@@ -45,15 +45,15 @@ public:
list<SubDevice> devices;
device_ptr unique_key;
MultiDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background_)
MultiDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
: Device(info, stats, profiler, background_), unique_key(1)
{
foreach(DeviceInfo& subinfo, info.multi_devices) {
foreach (DeviceInfo &subinfo, info.multi_devices) {
Device *device = Device::create(subinfo, sub_stats_, profiler, background);
/* Always add CPU devices at the back since GPU devices can change
* host memory pointers, which CPU uses as device pointer. */
if(subinfo.type == DEVICE_CPU) {
if (subinfo.type == DEVICE_CPU) {
devices.push_back(SubDevice(device));
}
else {
@@ -68,9 +68,9 @@ public:
vector<string> servers = discovery.get_server_list();
foreach(string& server, servers) {
foreach (string &server, servers) {
Device *device = device_network_create(info, stats, profiler, server.c_str());
if(device)
if (device)
devices.push_back(SubDevice(device));
}
#endif
@@ -78,15 +78,15 @@ public:
~MultiDevice()
{
foreach(SubDevice& sub, devices)
foreach (SubDevice &sub, devices)
delete sub.device;
}
const string& error_message()
const string &error_message()
{
foreach(SubDevice& sub, devices) {
if(sub.device->error_message() != "") {
if(error_msg == "")
foreach (SubDevice &sub, devices) {
if (sub.device->error_message() != "") {
if (error_msg == "")
error_msg = sub.device->error_message();
break;
}
@@ -97,33 +97,34 @@ public:
virtual bool show_samples() const
{
if(devices.size() > 1) {
if (devices.size() > 1) {
return false;
}
return devices.front().device->show_samples();
}
virtual BVHLayoutMask get_bvh_layout_mask() const {
virtual BVHLayoutMask get_bvh_layout_mask() const
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
foreach(const SubDevice& sub_device, devices) {
foreach (const SubDevice &sub_device, devices) {
bvh_layout_mask &= sub_device.device->get_bvh_layout_mask();
}
return bvh_layout_mask;
}
bool load_kernels(const DeviceRequestedFeatures& requested_features)
bool load_kernels(const DeviceRequestedFeatures &requested_features)
{
foreach(SubDevice& sub, devices)
if(!sub.device->load_kernels(requested_features))
foreach (SubDevice &sub, devices)
if (!sub.device->load_kernels(requested_features))
return false;
return true;
}
bool wait_for_availability(const DeviceRequestedFeatures& requested_features)
bool wait_for_availability(const DeviceRequestedFeatures &requested_features)
{
foreach(SubDevice& sub, devices)
if(!sub.device->wait_for_availability(requested_features))
foreach (SubDevice &sub, devices)
if (!sub.device->wait_for_availability(requested_features))
return false;
return true;
@@ -133,7 +134,7 @@ public:
{
DeviceKernelStatus result = DEVICE_KERNEL_USING_FEATURE_KERNEL;
foreach(SubDevice& sub, devices) {
foreach (SubDevice &sub, devices) {
DeviceKernelStatus subresult = sub.device->get_active_kernel_switch_state();
switch (subresult) {
case DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL:
@@ -152,11 +153,11 @@ public:
return result;
}
void mem_alloc(device_memory& mem)
void mem_alloc(device_memory &mem)
{
device_ptr key = unique_key++;
foreach(SubDevice& sub, devices) {
foreach (SubDevice &sub, devices) {
mem.device = sub.device;
mem.device_pointer = 0;
mem.device_size = 0;
@@ -170,15 +171,15 @@ public:
stats.mem_alloc(mem.device_size);
}
void mem_copy_to(device_memory& mem)
void mem_copy_to(device_memory &mem)
{
device_ptr existing_key = mem.device_pointer;
device_ptr key = (existing_key)? existing_key: unique_key++;
device_ptr key = (existing_key) ? existing_key : unique_key++;
size_t existing_size = mem.device_size;
foreach(SubDevice& sub, devices) {
foreach (SubDevice &sub, devices) {
mem.device = sub.device;
mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0;
mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
mem.device_size = existing_size;
sub.device->mem_copy_to(mem);
@@ -190,14 +191,14 @@ public:
stats.mem_alloc(mem.device_size - existing_size);
}
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
void mem_copy_from(device_memory &mem, int y, int w, int h, int elem)
{
device_ptr key = mem.device_pointer;
int i = 0, sub_h = h/devices.size();
int i = 0, sub_h = h / devices.size();
foreach(SubDevice& sub, devices) {
int sy = y + i*sub_h;
int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
foreach (SubDevice &sub, devices) {
int sy = y + i * sub_h;
int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
mem.device = sub.device;
mem.device_pointer = sub.ptr_map[key];
@@ -210,15 +211,15 @@ public:
mem.device_pointer = key;
}
void mem_zero(device_memory& mem)
void mem_zero(device_memory &mem)
{
device_ptr existing_key = mem.device_pointer;
device_ptr key = (existing_key)? existing_key: unique_key++;
device_ptr key = (existing_key) ? existing_key : unique_key++;
size_t existing_size = mem.device_size;
foreach(SubDevice& sub, devices) {
foreach (SubDevice &sub, devices) {
mem.device = sub.device;
mem.device_pointer = (existing_key)? sub.ptr_map[existing_key]: 0;
mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
mem.device_size = existing_size;
sub.device->mem_zero(mem);
@@ -230,12 +231,12 @@ public:
stats.mem_alloc(mem.device_size - existing_size);
}
void mem_free(device_memory& mem)
void mem_free(device_memory &mem)
{
device_ptr key = mem.device_pointer;
size_t existing_size = mem.device_size;
foreach(SubDevice& sub, devices) {
foreach (SubDevice &sub, devices) {
mem.device = sub.device;
mem.device_pointer = sub.ptr_map[key];
mem.device_size = existing_size;
@@ -252,40 +253,49 @@ public:
void const_copy_to(const char *name, void *host, size_t size)
{
foreach(SubDevice& sub, devices)
foreach (SubDevice &sub, devices)
sub.device->const_copy_to(name, host, size);
}
void draw_pixels(
device_memory& rgba, int y,
int w, int h, int width, int height,
int dx, int dy, int dw, int dh,
bool transparent, const DeviceDrawParams &draw_params)
void draw_pixels(device_memory &rgba,
int y,
int w,
int h,
int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params)
{
device_ptr key = rgba.device_pointer;
int i = 0, sub_h = h/devices.size();
int sub_height = height/devices.size();
int i = 0, sub_h = h / devices.size();
int sub_height = height / devices.size();
foreach(SubDevice& sub, devices) {
int sy = y + i*sub_h;
int sh = (i == (int)devices.size() - 1)? h - sub_h*i: sub_h;
int sheight = (i == (int)devices.size() - 1)? height - sub_height*i: sub_height;
int sdy = dy + i*sub_height;
foreach (SubDevice &sub, devices) {
int sy = y + i * sub_h;
int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
int sheight = (i == (int)devices.size() - 1) ? height - sub_height * i : sub_height;
int sdy = dy + i * sub_height;
/* adjust math for w/width */
rgba.device_pointer = sub.ptr_map[key];
sub.device->draw_pixels(rgba, sy, w, sh, width, sheight, dx, sdy, dw, dh, transparent, draw_params);
sub.device->draw_pixels(
rgba, sy, w, sh, width, sheight, dx, sdy, dw, dh, transparent, draw_params);
i++;
}
rgba.device_pointer = key;
}
void map_tile(Device *sub_device, RenderTile& tile)
void map_tile(Device *sub_device, RenderTile &tile)
{
foreach(SubDevice& sub, devices) {
if(sub.device == sub_device) {
if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer];
foreach (SubDevice &sub, devices) {
if (sub.device == sub_device) {
if (tile.buffer)
tile.buffer = sub.ptr_map[tile.buffer];
}
}
}
@@ -294,8 +304,8 @@ public:
{
int i = 0;
foreach(SubDevice& sub, devices) {
if(sub.device == sub_device)
foreach (SubDevice &sub, devices) {
if (sub.device == sub_device)
return i;
i++;
}
@@ -305,8 +315,8 @@ public:
void map_neighbor_tiles(Device *sub_device, RenderTile *tiles)
{
for(int i = 0; i < 9; i++) {
if(!tiles[i].buffers) {
for (int i = 0; i < 9; i++) {
if (!tiles[i].buffers) {
continue;
}
@@ -315,12 +325,12 @@ public:
* Note that this temporarily modifies the RenderBuffers and calls
* the device, so this function is not thread safe. */
device_vector<float> &mem = tiles[i].buffers->buffer;
if(mem.device != sub_device) {
if (mem.device != sub_device) {
/* Only copy from device to host once. This is faster, but
* also required for the case where a CPU thread is denoising
* a tile rendered on the GPU. In that case we have to avoid
* overwriting the buffer being denoised by the CPU thread. */
if(!tiles[i].buffers->map_neighbor_copied) {
if (!tiles[i].buffers->map_neighbor_copied) {
tiles[i].buffers->map_neighbor_copied = true;
mem.copy_from_device(0, mem.data_size, 1);
}
@@ -336,7 +346,7 @@ public:
}
}
void unmap_neighbor_tiles(Device * sub_device, RenderTile * tiles)
void unmap_neighbor_tiles(Device *sub_device, RenderTile *tiles)
{
/* Copy denoised result back to the host. */
device_vector<float> &mem = tiles[9].buffers->buffer;
@@ -346,13 +356,13 @@ public:
/* Copy denoised result to the original device. */
mem.copy_to_device();
for(int i = 0; i < 9; i++) {
if(!tiles[i].buffers) {
for (int i = 0; i < 9; i++) {
if (!tiles[i].buffers) {
continue;
}
device_vector<float> &mem = tiles[i].buffers->buffer;
if(mem.device != sub_device) {
if (mem.device != sub_device) {
mem.swap_device(sub_device, tiles[i].device_size, tiles[i].buffer);
sub_device->mem_free(mem);
mem.restore_device();
@@ -360,13 +370,13 @@ public:
}
}
int get_split_task_count(DeviceTask& task)
int get_split_task_count(DeviceTask &task)
{
int total_tasks = 0;
list<DeviceTask> tasks;
task.split(tasks, devices.size());
foreach(SubDevice& sub, devices) {
if(!tasks.empty()) {
foreach (SubDevice &sub, devices) {
if (!tasks.empty()) {
DeviceTask subtask = tasks.front();
tasks.pop_front();
@@ -376,21 +386,26 @@ public:
return total_tasks;
}
void task_add(DeviceTask& task)
void task_add(DeviceTask &task)
{
list<DeviceTask> tasks;
task.split(tasks, devices.size());
foreach(SubDevice& sub, devices) {
if(!tasks.empty()) {
foreach (SubDevice &sub, devices) {
if (!tasks.empty()) {
DeviceTask subtask = tasks.front();
tasks.pop_front();
if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer];
if(task.rgba_byte) subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half];
if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];
if (task.buffer)
subtask.buffer = sub.ptr_map[task.buffer];
if (task.rgba_byte)
subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
if (task.rgba_half)
subtask.rgba_half = sub.ptr_map[task.rgba_half];
if (task.shader_input)
subtask.shader_input = sub.ptr_map[task.shader_input];
if (task.shader_output)
subtask.shader_output = sub.ptr_map[task.shader_output];
sub.device->task_add(subtask);
}
@@ -399,21 +414,21 @@ public:
void task_wait()
{
foreach(SubDevice& sub, devices)
foreach (SubDevice &sub, devices)
sub.device->task_wait();
}
void task_cancel()
{
foreach(SubDevice& sub, devices)
foreach (SubDevice &sub, devices)
sub.device->task_cancel();
}
protected:
protected:
Stats sub_stats_;
};
Device *device_multi_create(DeviceInfo& info, Stats &stats, Profiler& profiler, bool background)
Device *device_multi_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
return new MultiDevice(info, stats, profiler, background);
}

View File

@@ -33,17 +33,16 @@ typedef map<device_ptr, DataVector> DataMap;
typedef vector<RenderTile> TileList;
/* search a list of tiles and find the one that matches the passed render tile */
static TileList::iterator tile_list_find(TileList& tile_list, RenderTile& tile)
static TileList::iterator tile_list_find(TileList &tile_list, RenderTile &tile)
{
for(TileList::iterator it = tile_list.begin(); it != tile_list.end(); ++it)
if(tile.x == it->x && tile.y == it->y && tile.start_sample == it->start_sample)
for (TileList::iterator it = tile_list.begin(); it != tile_list.end(); ++it)
if (tile.x == it->x && tile.y == it->y && tile.start_sample == it->start_sample)
return it;
return tile_list.end();
}
class NetworkDevice : public Device
{
public:
class NetworkDevice : public Device {
public:
boost::asio::io_service io_service;
tcp::socket socket;
device_ptr mem_counter;
@@ -56,7 +55,7 @@ public:
return false;
}
NetworkDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, const char *address)
NetworkDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, const char *address)
: Device(info, stats, profiler, true), socket(io_service)
{
error_func = NetworkError();
@@ -69,13 +68,12 @@ public:
tcp::resolver::iterator end;
boost::system::error_code error = boost::asio::error::host_not_found;
while(error && endpoint_iterator != end)
{
while (error && endpoint_iterator != end) {
socket.close();
socket.connect(*endpoint_iterator++, error);
}
if(error)
if (error)
error_func.network_error(error.message());
mem_counter = 0;
@@ -87,13 +85,14 @@ public:
snd.write();
}
virtual BVHLayoutMask get_bvh_layout_mask() const {
virtual BVHLayoutMask get_bvh_layout_mask() const
{
return BVH_LAYOUT_BVH2;
}
void mem_alloc(device_memory& mem)
void mem_alloc(device_memory &mem)
{
if(mem.name) {
if (mem.name) {
VLOG(1) << "Buffer allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
@@ -108,7 +107,7 @@ public:
snd.write();
}
void mem_copy_to(device_memory& mem)
void mem_copy_to(device_memory &mem)
{
thread_scoped_lock lock(rpc_lock);
@@ -119,7 +118,7 @@ public:
snd.write_buffer(mem.host_pointer, mem.memory_size());
}
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
void mem_copy_from(device_memory &mem, int y, int w, int h, int elem)
{
thread_scoped_lock lock(rpc_lock);
@@ -138,7 +137,7 @@ public:
rcv.read_buffer(mem.host_pointer, data_size);
}
void mem_zero(device_memory& mem)
void mem_zero(device_memory &mem)
{
thread_scoped_lock lock(rpc_lock);
@@ -148,9 +147,9 @@ public:
snd.write();
}
void mem_free(device_memory& mem)
void mem_free(device_memory &mem)
{
if(mem.device_pointer) {
if (mem.device_pointer) {
thread_scoped_lock lock(rpc_lock);
RPCSend snd(socket, &error_func, "mem_free");
@@ -176,9 +175,9 @@ public:
snd.write_buffer(host, size);
}
bool load_kernels(const DeviceRequestedFeatures& requested_features)
bool load_kernels(const DeviceRequestedFeatures &requested_features)
{
if(error_func.have_error())
if (error_func.have_error())
return false;
thread_scoped_lock lock(rpc_lock);
@@ -197,7 +196,7 @@ public:
return result;
}
void task_add(DeviceTask& task)
void task_add(DeviceTask &task)
{
thread_scoped_lock lock(rpc_lock);
@@ -220,8 +219,8 @@ public:
TileList the_tiles;
/* todo: run this threaded for connecting to multiple clients */
for(;;) {
if(error_func.have_error())
for (;;) {
if (error_func.have_error())
break;
RenderTile tile;
@@ -229,11 +228,11 @@ public:
lock.lock();
RPCReceive rcv(socket, &error_func);
if(rcv.name == "acquire_tile") {
if (rcv.name == "acquire_tile") {
lock.unlock();
/* todo: watch out for recursive calls! */
if(the_task.acquire_tile(this, tile)) { /* write return as bool */
if (the_task.acquire_tile(this, tile)) { /* write return as bool */
the_tiles.push_back(tile);
lock.lock();
@@ -249,12 +248,12 @@ public:
lock.unlock();
}
}
else if(rcv.name == "release_tile") {
else if (rcv.name == "release_tile") {
rcv.read(tile);
lock.unlock();
TileList::iterator it = tile_list_find(the_tiles, tile);
if(it != the_tiles.end()) {
if (it != the_tiles.end()) {
tile.buffers = it->buffers;
the_tiles.erase(it);
}
@@ -268,7 +267,7 @@ public:
snd.write();
lock.unlock();
}
else if(rcv.name == "task_wait_done") {
else if (rcv.name == "task_wait_done") {
lock.unlock();
break;
}
@@ -284,21 +283,24 @@ public:
snd.write();
}
int get_split_task_count(DeviceTask&)
int get_split_task_count(DeviceTask &)
{
return 1;
}
private:
private:
NetworkError error_func;
};
Device *device_network_create(DeviceInfo& info, Stats &stats, Profiler &profiler, const char *address)
Device *device_network_create(DeviceInfo &info,
Stats &stats,
Profiler &profiler,
const char *address)
{
return new NetworkDevice(info, stats, profiler, address);
}
void device_network_info(vector<DeviceInfo>& devices)
void device_network_info(vector<DeviceInfo> &devices)
{
DeviceInfo info;
@@ -315,16 +317,20 @@ void device_network_info(vector<DeviceInfo>& devices)
}
class DeviceServer {
public:
public:
thread_mutex rpc_lock;
void network_error(const string &message) {
void network_error(const string &message)
{
error_func.network_error(message);
}
bool have_error() { return error_func.have_error(); }
bool have_error()
{
return error_func.have_error();
}
DeviceServer(Device *device_, tcp::socket& socket_)
DeviceServer(Device *device_, tcp::socket &socket_)
: device(device_), socket(socket_), stop(false), blocked_waiting(false)
{
error_func = NetworkError();
@@ -333,21 +339,21 @@ public:
void listen()
{
/* receive remote function calls */
for(;;) {
for (;;) {
listen_step();
if(stop)
if (stop)
break;
}
}
protected:
protected:
void listen_step()
{
thread_scoped_lock lock(rpc_lock);
RPCReceive rcv(socket, &error_func);
if(rcv.name == "stop")
if (rcv.name == "stop")
stop = true;
else
process(rcv, lock);
@@ -357,7 +363,7 @@ protected:
DataVector &data_vector_insert(device_ptr client_pointer, size_t data_size)
{
/* create a new DataVector and insert it into mem_data */
pair<DataMap::iterator,bool> data_ins = mem_data.insert(
pair<DataMap::iterator, bool> data_ins = mem_data.insert(
DataMap::value_type(client_pointer, DataVector()));
/* make sure it was a unique insertion */
@@ -382,7 +388,7 @@ protected:
/* setup mapping and reverse mapping of client_pointer<->real_pointer */
void pointer_mapping_insert(device_ptr client_pointer, device_ptr real_pointer)
{
pair<PtrMap::iterator,bool> mapins;
pair<PtrMap::iterator, bool> mapins;
/* insert mapping from client pointer to our real device pointer */
mapins = ptr_map.insert(PtrMap::value_type(client_pointer, real_pointer));
@@ -428,9 +434,9 @@ protected:
* the header and delegates control to here when it doesn't
* specifically handle the current RPC.
* The lock must be unlocked before returning */
void process(RPCReceive& rcv, thread_scoped_lock &lock)
void process(RPCReceive &rcv, thread_scoped_lock &lock)
{
if(rcv.name == "mem_alloc") {
if (rcv.name == "mem_alloc") {
string name;
network_device_memory mem(device);
rcv.read(mem, name);
@@ -441,7 +447,7 @@ protected:
device_ptr client_pointer = mem.device_pointer;
DataVector &data_v = data_vector_insert(client_pointer, data_size);
mem.host_pointer = (data_size)? (void*)&(data_v[0]): 0;
mem.host_pointer = (data_size) ? (void *)&(data_v[0]) : 0;
/* Perform the allocation on the actual device. */
device->mem_alloc(mem);
@@ -449,7 +455,7 @@ protected:
/* Store a mapping to/from client_pointer and real device pointer. */
pointer_mapping_insert(client_pointer, mem.device_pointer);
}
else if(rcv.name == "mem_copy_to") {
else if (rcv.name == "mem_copy_to") {
string name;
network_device_memory mem(device);
rcv.read(mem, name);
@@ -458,10 +464,10 @@ protected:
size_t data_size = mem.memory_size();
device_ptr client_pointer = mem.device_pointer;
if(client_pointer) {
if (client_pointer) {
/* Lookup existing host side data buffer. */
DataVector &data_v = data_vector_find(client_pointer);
mem.host_pointer = (void*)&data_v[0];
mem.host_pointer = (void *)&data_v[0];
/* Translate the client pointer to a real device pointer. */
mem.device_pointer = device_ptr_from_client_pointer(client_pointer);
@@ -469,21 +475,21 @@ protected:
else {
/* Allocate host side data buffer. */
DataVector &data_v = data_vector_insert(client_pointer, data_size);
mem.host_pointer = (data_size)? (void*)&(data_v[0]): 0;
mem.host_pointer = (data_size) ? (void *)&(data_v[0]) : 0;
}
/* Copy data from network into memory buffer. */
rcv.read_buffer((uint8_t*)mem.host_pointer, data_size);
rcv.read_buffer((uint8_t *)mem.host_pointer, data_size);
/* Copy the data from the memory buffer to the device buffer. */
device->mem_copy_to(mem);
if(!client_pointer) {
if (!client_pointer) {
/* Store a mapping to/from client_pointer and real device pointer. */
pointer_mapping_insert(client_pointer, mem.device_pointer);
}
}
else if(rcv.name == "mem_copy_from") {
else if (rcv.name == "mem_copy_from") {
string name;
network_device_memory mem(device);
int y, w, h, elem;
@@ -499,7 +505,7 @@ protected:
DataVector &data_v = data_vector_find(client_pointer);
mem.host_pointer = (device_ptr)&(data_v[0]);
mem.host_pointer = (device_ptr) & (data_v[0]);
device->mem_copy_from(mem, y, w, h, elem);
@@ -507,10 +513,10 @@ protected:
RPCSend snd(socket, &error_func, "mem_copy_from");
snd.write();
snd.write_buffer((uint8_t*)mem.host_pointer, data_size);
snd.write_buffer((uint8_t *)mem.host_pointer, data_size);
lock.unlock();
}
else if(rcv.name == "mem_zero") {
else if (rcv.name == "mem_zero") {
string name;
network_device_memory mem(device);
rcv.read(mem, name);
@@ -519,10 +525,10 @@ protected:
size_t data_size = mem.memory_size();
device_ptr client_pointer = mem.device_pointer;
if(client_pointer) {
if (client_pointer) {
/* Lookup existing host side data buffer. */
DataVector &data_v = data_vector_find(client_pointer);
mem.host_pointer = (void*)&data_v[0];
mem.host_pointer = (void *)&data_v[0];
/* Translate the client pointer to a real device pointer. */
mem.device_pointer = device_ptr_from_client_pointer(client_pointer);
@@ -530,18 +536,18 @@ protected:
else {
/* Allocate host side data buffer. */
DataVector &data_v = data_vector_insert(client_pointer, data_size);
mem.host_pointer = (void*)? (device_ptr)&(data_v[0]): 0;
mem.host_pointer = (void *) ? (device_ptr) & (data_v[0]) : 0;
}
/* Zero memory. */
device->mem_zero(mem);
if(!client_pointer) {
if (!client_pointer) {
/* Store a mapping to/from client_pointer and real device pointer. */
pointer_mapping_insert(client_pointer, mem.device_pointer);
}
}
else if(rcv.name == "mem_free") {
else if (rcv.name == "mem_free") {
string name;
network_device_memory mem(device);
@@ -554,7 +560,7 @@ protected:
device->mem_free(mem);
}
else if(rcv.name == "const_copy_to") {
else if (rcv.name == "const_copy_to") {
string name_string;
size_t size;
@@ -567,7 +573,7 @@ protected:
device->const_copy_to(name_string.c_str(), &host_vector[0], size);
}
else if(rcv.name == "load_kernels") {
else if (rcv.name == "load_kernels") {
DeviceRequestedFeatures requested_features;
rcv.read(requested_features.experimental);
rcv.read(requested_features.max_closure);
@@ -581,36 +587,37 @@ protected:
snd.write();
lock.unlock();
}
else if(rcv.name == "task_add") {
else if (rcv.name == "task_add") {
DeviceTask task;
rcv.read(task);
lock.unlock();
if(task.buffer)
if (task.buffer)
task.buffer = device_ptr_from_client_pointer(task.buffer);
if(task.rgba_half)
if (task.rgba_half)
task.rgba_half = device_ptr_from_client_pointer(task.rgba_half);
if(task.rgba_byte)
if (task.rgba_byte)
task.rgba_byte = device_ptr_from_client_pointer(task.rgba_byte);
if(task.shader_input)
if (task.shader_input)
task.shader_input = device_ptr_from_client_pointer(task.shader_input);
if(task.shader_output)
if (task.shader_output)
task.shader_output = device_ptr_from_client_pointer(task.shader_output);
task.acquire_tile = function_bind(&DeviceServer::task_acquire_tile, this, _1, _2);
task.release_tile = function_bind(&DeviceServer::task_release_tile, this, _1);
task.update_progress_sample = function_bind(&DeviceServer::task_update_progress_sample, this);
task.update_progress_sample = function_bind(&DeviceServer::task_update_progress_sample,
this);
task.update_tile_sample = function_bind(&DeviceServer::task_update_tile_sample, this, _1);
task.get_cancel = function_bind(&DeviceServer::task_get_cancel, this);
device->task_add(task);
}
else if(rcv.name == "task_wait") {
else if (rcv.name == "task_wait") {
lock.unlock();
blocked_waiting = true;
@@ -622,24 +629,24 @@ protected:
snd.write();
lock.unlock();
}
else if(rcv.name == "task_cancel") {
else if (rcv.name == "task_cancel") {
lock.unlock();
device->task_cancel();
}
else if(rcv.name == "acquire_tile") {
else if (rcv.name == "acquire_tile") {
AcquireEntry entry;
entry.name = rcv.name;
rcv.read(entry.tile);
acquire_queue.push_back(entry);
lock.unlock();
}
else if(rcv.name == "acquire_tile_none") {
else if (rcv.name == "acquire_tile_none") {
AcquireEntry entry;
entry.name = rcv.name;
acquire_queue.push_back(entry);
lock.unlock();
}
else if(rcv.name == "release_tile") {
else if (rcv.name == "release_tile") {
AcquireEntry entry;
entry.name = rcv.name;
acquire_queue.push_back(entry);
@@ -651,7 +658,7 @@ protected:
}
}
bool task_acquire_tile(Device *, RenderTile& tile)
bool task_acquire_tile(Device *, RenderTile &tile)
{
thread_scoped_lock acquire_lock(acquire_mutex);
@@ -661,32 +668,33 @@ protected:
snd.write();
do {
if(blocked_waiting)
if (blocked_waiting)
listen_step();
/* todo: avoid busy wait loop */
thread_scoped_lock lock(rpc_lock);
if(!acquire_queue.empty()) {
if (!acquire_queue.empty()) {
AcquireEntry entry = acquire_queue.front();
acquire_queue.pop_front();
if(entry.name == "acquire_tile") {
if (entry.name == "acquire_tile") {
tile = entry.tile;
if(tile.buffer) tile.buffer = ptr_map[tile.buffer];
if (tile.buffer)
tile.buffer = ptr_map[tile.buffer];
result = true;
break;
}
else if(entry.name == "acquire_tile_none") {
else if (entry.name == "acquire_tile_none") {
break;
}
else {
cout << "Error: unexpected acquire RPC receive call \"" + entry.name + "\"\n";
}
}
} while(acquire_queue.empty() && !stop && !have_error());
} while (acquire_queue.empty() && !stop && !have_error());
return result;
}
@@ -696,16 +704,17 @@ protected:
; /* skip */
}
void task_update_tile_sample(RenderTile&)
void task_update_tile_sample(RenderTile &)
{
; /* skip */
}
void task_release_tile(RenderTile& tile)
void task_release_tile(RenderTile &tile)
{
thread_scoped_lock acquire_lock(acquire_mutex);
if(tile.buffer) tile.buffer = ptr_imap[tile.buffer];
if (tile.buffer)
tile.buffer = ptr_imap[tile.buffer];
{
thread_scoped_lock lock(rpc_lock);
@@ -716,17 +725,17 @@ protected:
}
do {
if(blocked_waiting)
if (blocked_waiting)
listen_step();
/* todo: avoid busy wait loop */
thread_scoped_lock lock(rpc_lock);
if(!acquire_queue.empty()) {
if (!acquire_queue.empty()) {
AcquireEntry entry = acquire_queue.front();
acquire_queue.pop_front();
if(entry.name == "release_tile") {
if (entry.name == "release_tile") {
lock.unlock();
break;
}
@@ -734,7 +743,7 @@ protected:
cout << "Error: unexpected release RPC receive call \"" + entry.name + "\"\n";
}
}
} while(acquire_queue.empty() && !stop);
} while (acquire_queue.empty() && !stop);
}
bool task_get_cancel()
@@ -744,7 +753,7 @@ protected:
/* properties */
Device *device;
tcp::socket& socket;
tcp::socket &socket;
/* mapping of remote to local pointer */
PtrMap ptr_map;
@@ -761,11 +770,11 @@ protected:
bool stop;
bool blocked_waiting;
private:
private:
NetworkError error_func;
/* todo: free memory and device (osl) on network error */
};
void Device::server_run()
@@ -774,7 +783,7 @@ void Device::server_run()
/* starts thread that responds to discovery requests */
ServerDiscovery discovery;
for(;;) {
for (;;) {
/* accept connection */
boost::asio::io_service io_service;
tcp::acceptor acceptor(io_service, tcp::endpoint(tcp::v4(), SERVER_PORT));
@@ -791,7 +800,7 @@ void Device::server_run()
printf("Disconnected.\n");
}
}
catch(exception& e) {
catch (exception &e) {
fprintf(stderr, "Network server exception: %s\n", e.what());
}
}

View File

@@ -19,35 +19,35 @@
#ifdef WITH_NETWORK
#include <boost/archive/text_iarchive.hpp>
#include <boost/archive/text_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>
#include <boost/archive/binary_oarchive.hpp>
#include <boost/array.hpp>
#include <boost/asio.hpp>
#include <boost/bind.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/thread.hpp>
# include <boost/archive/text_iarchive.hpp>
# include <boost/archive/text_oarchive.hpp>
# include <boost/archive/binary_iarchive.hpp>
# include <boost/archive/binary_oarchive.hpp>
# include <boost/array.hpp>
# include <boost/asio.hpp>
# include <boost/bind.hpp>
# include <boost/serialization/vector.hpp>
# include <boost/thread.hpp>
#include <iostream>
#include <sstream>
#include <deque>
# include <iostream>
# include <sstream>
# include <deque>
#include "render/buffers.h"
# include "render/buffers.h"
#include "util/util_foreach.h"
#include "util/util_list.h"
#include "util/util_map.h"
#include "util/util_param.h"
#include "util/util_string.h"
# include "util/util_foreach.h"
# include "util/util_list.h"
# include "util/util_map.h"
# include "util/util_param.h"
# include "util/util_string.h"
CCL_NAMESPACE_BEGIN
using std::cout;
using std::cerr;
using std::cout;
using std::exception;
using std::hex;
using std::setw;
using std::exception;
using boost::asio::ip::tcp;
@@ -56,21 +56,19 @@ static const int DISCOVER_PORT = 5121;
static const string DISCOVER_REQUEST_MSG = "REQUEST_RENDER_SERVER_IP";
static const string DISCOVER_REPLY_MSG = "REPLY_RENDER_SERVER_IP";
#if 0
# if 0
typedef boost::archive::text_oarchive o_archive;
typedef boost::archive::text_iarchive i_archive;
#else
# else
typedef boost::archive::binary_oarchive o_archive;
typedef boost::archive::binary_iarchive i_archive;
#endif
# endif
/* Serialization of device memory */
class network_device_memory : public device_memory
{
public:
network_device_memory(Device *device)
: device_memory(device, "", MEM_READ_ONLY)
class network_device_memory : public device_memory {
public:
network_device_memory(Device *device) : device_memory(device, "", MEM_READ_ONLY)
{
}
@@ -84,37 +82,41 @@ public:
/* Common netowrk error function / object for both DeviceNetwork and DeviceServer*/
class NetworkError {
public:
NetworkError() {
public:
NetworkError()
{
error = "";
error_count = 0;
}
~NetworkError() {}
~NetworkError()
{
}
void network_error(const string& message) {
void network_error(const string &message)
{
error = message;
error_count += 1;
}
bool have_error() {
bool have_error()
{
return true ? error_count > 0 : false;
}
private:
private:
string error;
int error_count;
};
/* Remote procedure call Send */
class RPCSend {
public:
RPCSend(tcp::socket& socket_, NetworkError* e, const string& name_ = "")
public:
RPCSend(tcp::socket &socket_, NetworkError *e, const string &name_ = "")
: name(name_), socket(socket_), archive(archive_stream), sent(false)
{
archive & name_;
archive &name_;
error_func = e;
fprintf(stderr, "rpc send %s\n", name.c_str());
}
@@ -123,37 +125,37 @@ public:
{
}
void add(const device_memory& mem)
void add(const device_memory &mem)
{
archive & mem.data_type & mem.data_elements & mem.data_size;
archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer;
archive & mem.type & string(mem.name);
archive & mem.interpolation & mem.extension;
archive & mem.device_pointer;
archive &mem.data_type &mem.data_elements &mem.data_size;
archive &mem.data_width &mem.data_height &mem.data_depth &mem.device_pointer;
archive &mem.type &string(mem.name);
archive &mem.interpolation &mem.extension;
archive &mem.device_pointer;
}
template<typename T> void add(const T& data)
template<typename T> void add(const T &data)
{
archive & data;
archive &data;
}
void add(const DeviceTask& task)
void add(const DeviceTask &task)
{
int type = (int)task.type;
archive & type & task.x & task.y & task.w & task.h;
archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
archive & task.offset & task.stride;
archive & task.shader_input & task.shader_output & task.shader_eval_type;
archive & task.shader_x & task.shader_w;
archive & task.need_finish_queue;
archive &type &task.x &task.y &task.w &task.h;
archive &task.rgba_byte &task.rgba_half &task.buffer &task.sample &task.num_samples;
archive &task.offset &task.stride;
archive &task.shader_input &task.shader_output &task.shader_eval_type;
archive &task.shader_x &task.shader_w;
archive &task.need_finish_queue;
}
void add(const RenderTile& tile)
void add(const RenderTile &tile)
{
archive & tile.x & tile.y & tile.w & tile.h;
archive & tile.start_sample & tile.num_samples & tile.sample;
archive & tile.resolution & tile.offset & tile.stride;
archive & tile.buffer;
archive &tile.x &tile.y &tile.w &tile.h;
archive &tile.start_sample &tile.num_samples &tile.sample;
archive &tile.resolution &tile.offset &tile.stride;
archive &tile.buffer;
}
void write()
@@ -168,19 +170,17 @@ public:
header_stream << setw(8) << hex << archive_str.size();
string header_str = header_stream.str();
boost::asio::write(socket,
boost::asio::buffer(header_str),
boost::asio::transfer_all(), error);
boost::asio::write(
socket, boost::asio::buffer(header_str), boost::asio::transfer_all(), error);
if(error.value())
if (error.value())
error_func->network_error(error.message());
/* then send actual data */
boost::asio::write(socket,
boost::asio::buffer(archive_str),
boost::asio::transfer_all(), error);
boost::asio::write(
socket, boost::asio::buffer(archive_str), boost::asio::transfer_all(), error);
if(error.value())
if (error.value())
error_func->network_error(error.message());
sent = true;
@@ -190,17 +190,16 @@ public:
{
boost::system::error_code error;
boost::asio::write(socket,
boost::asio::buffer(buffer, size),
boost::asio::transfer_all(), error);
boost::asio::write(
socket, boost::asio::buffer(buffer, size), boost::asio::transfer_all(), error);
if(error.value())
if (error.value())
error_func->network_error(error.message());
}
protected:
protected:
string name;
tcp::socket& socket;
tcp::socket &socket;
ostringstream archive_stream;
o_archive archive;
bool sent;
@@ -210,8 +209,8 @@ protected:
/* Remote procedure call Receive */
class RPCReceive {
public:
RPCReceive(tcp::socket& socket_, NetworkError* e )
public:
RPCReceive(tcp::socket &socket_, NetworkError *e)
: socket(socket_), archive_stream(NULL), archive(NULL)
{
error_func = e;
@@ -220,34 +219,33 @@ public:
boost::system::error_code error;
size_t len = boost::asio::read(socket, boost::asio::buffer(header), error);
if(error.value()) {
if (error.value()) {
error_func->network_error(error.message());
}
/* verify if we got something */
if(len == header.size()) {
if (len == header.size()) {
/* decode header */
string header_str(&header[0], header.size());
istringstream header_stream(header_str);
size_t data_size;
if((header_stream >> hex >> data_size)) {
if ((header_stream >> hex >> data_size)) {
vector<char> data(data_size);
size_t len = boost::asio::read(socket, boost::asio::buffer(data), error);
if(error.value())
if (error.value())
error_func->network_error(error.message());
if(len == data_size) {
archive_str = (data.size())? string(&data[0], data.size()): string("");
if (len == data_size) {
archive_str = (data.size()) ? string(&data[0], data.size()) : string("");
archive_stream = new istringstream(archive_str);
archive = new i_archive(*archive_stream);
*archive & name;
*archive &name;
fprintf(stderr, "rpc receive %s\n", name.c_str());
}
else {
@@ -269,26 +267,26 @@ public:
delete archive_stream;
}
void read(network_device_memory& mem, string& name)
void read(network_device_memory &mem, string &name)
{
*archive & mem.data_type & mem.data_elements & mem.data_size;
*archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer;
*archive & mem.type & name;
*archive & mem.interpolation & mem.extension;
*archive & mem.device_pointer;
*archive &mem.data_type &mem.data_elements &mem.data_size;
*archive &mem.data_width &mem.data_height &mem.data_depth &mem.device_pointer;
*archive &mem.type &name;
*archive &mem.interpolation &mem.extension;
*archive &mem.device_pointer;
mem.name = name.c_str();
mem.host_pointer = 0;
/* Can't transfer OpenGL texture over network. */
if(mem.type == MEM_PIXELS) {
if (mem.type == MEM_PIXELS) {
mem.type = MEM_READ_WRITE;
}
}
template<typename T> void read(T& data)
template<typename T> void read(T &data)
{
*archive & data;
*archive &data;
}
void read_buffer(void *buffer, size_t size)
@@ -296,42 +294,42 @@ public:
boost::system::error_code error;
size_t len = boost::asio::read(socket, boost::asio::buffer(buffer, size), error);
if(error.value()) {
if (error.value()) {
error_func->network_error(error.message());
}
if(len != size)
if (len != size)
cout << "Network receive error: buffer size doesn't match expected size\n";
}
void read(DeviceTask& task)
void read(DeviceTask &task)
{
int type;
*archive & type & task.x & task.y & task.w & task.h;
*archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
*archive & task.offset & task.stride;
*archive & task.shader_input & task.shader_output & task.shader_eval_type;
*archive & task.shader_x & task.shader_w;
*archive & task.need_finish_queue;
*archive &type &task.x &task.y &task.w &task.h;
*archive &task.rgba_byte &task.rgba_half &task.buffer &task.sample &task.num_samples;
*archive &task.offset &task.stride;
*archive &task.shader_input &task.shader_output &task.shader_eval_type;
*archive &task.shader_x &task.shader_w;
*archive &task.need_finish_queue;
task.type = (DeviceTask::Type)type;
}
void read(RenderTile& tile)
void read(RenderTile &tile)
{
*archive & tile.x & tile.y & tile.w & tile.h;
*archive & tile.start_sample & tile.num_samples & tile.sample;
*archive & tile.resolution & tile.offset & tile.stride;
*archive & tile.buffer;
*archive &tile.x &tile.y &tile.w &tile.h;
*archive &tile.start_sample &tile.num_samples &tile.sample;
*archive &tile.resolution &tile.offset &tile.stride;
*archive &tile.buffer;
tile.buffers = NULL;
}
string name;
protected:
tcp::socket& socket;
protected:
tcp::socket &socket;
string archive_str;
istringstream *archive_stream;
i_archive *archive;
@@ -341,7 +339,7 @@ protected:
/* Server auto discovery */
class ServerDiscovery {
public:
public:
explicit ServerDiscovery(bool discover = false)
: listen_socket(io_service), collect_servers(false)
{
@@ -360,7 +358,7 @@ public:
async_receive();
/* start server discovery */
if(discover) {
if (discover) {
collect_servers = true;
servers.clear();
@@ -391,29 +389,28 @@ public:
return result;
}
private:
void handle_receive_from(const boost::system::error_code& error, size_t size)
private:
void handle_receive_from(const boost::system::error_code &error, size_t size)
{
if(error) {
if (error) {
cout << "Server discovery receive error: " << error.message() << "\n";
return;
}
if(size > 0) {
if (size > 0) {
string msg = string(receive_buffer, size);
/* handle incoming message */
if(collect_servers) {
if(msg == DISCOVER_REPLY_MSG) {
if (collect_servers) {
if (msg == DISCOVER_REPLY_MSG) {
string address = receive_endpoint.address().to_string();
mutex.lock();
/* add address if it's not already in the list */
bool found = std::find(servers.begin(), servers.end(),
address) != servers.end();
bool found = std::find(servers.begin(), servers.end(), address) != servers.end();
if(!found)
if (!found)
servers.push_back(address);
mutex.unlock();
@@ -421,7 +418,7 @@ private:
}
else {
/* reply to request */
if(msg == DISCOVER_REQUEST_MSG)
if (msg == DISCOVER_REQUEST_MSG)
broadcast_message(DISCOVER_REPLY_MSG);
}
}
@@ -431,13 +428,15 @@ private:
void async_receive()
{
listen_socket.async_receive_from(
boost::asio::buffer(receive_buffer), receive_endpoint,
boost::bind(&ServerDiscovery::handle_receive_from, this,
boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
listen_socket.async_receive_from(boost::asio::buffer(receive_buffer),
receive_endpoint,
boost::bind(&ServerDiscovery::handle_receive_from,
this,
boost::asio::placeholders::error,
boost::asio::placeholders::bytes_transferred));
}
void broadcast_message(const string& msg)
void broadcast_message(const string &msg)
{
/* setup broadcast socket */
boost::asio::ip::udp::socket socket(io_service);

View File

@@ -16,18 +16,18 @@
#ifdef WITH_OPENCL
#include "device/opencl/opencl.h"
# include "device/opencl/opencl.h"
#include "device/device_intern.h"
# include "device/device_intern.h"
#include "util/util_foreach.h"
#include "util/util_logging.h"
#include "util/util_set.h"
#include "util/util_string.h"
# include "util/util_foreach.h"
# include "util/util_logging.h"
# include "util/util_set.h"
# include "util/util_string.h"
CCL_NAMESPACE_BEGIN
Device *device_opencl_create(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background)
Device *device_opencl_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
return opencl_create_split_device(info, stats, profiler, background);
}
@@ -37,22 +37,21 @@ bool device_opencl_init()
static bool initialized = false;
static bool result = false;
if(initialized)
if (initialized)
return result;
initialized = true;
if(OpenCLInfo::device_type() != 0) {
if (OpenCLInfo::device_type() != 0) {
int clew_result = clewInit();
if(clew_result == CLEW_SUCCESS) {
if (clew_result == CLEW_SUCCESS) {
VLOG(1) << "CLEW initialization succeeded.";
result = true;
}
else {
VLOG(1) << "CLEW initialization failed: "
<< ((clew_result == CLEW_ERROR_ATEXIT_FAILED)
? "Error setting up atexit() handler"
: "Error opening the library");
<< ((clew_result == CLEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
"Error opening the library");
}
}
else {
@@ -63,14 +62,13 @@ bool device_opencl_init()
return result;
}
static cl_int device_opencl_get_num_platforms_safe(cl_uint *num_platforms)
{
#ifdef _WIN32
# ifdef _WIN32
__try {
return clGetPlatformIDs(0, NULL, num_platforms);
}
__except(EXCEPTION_EXECUTE_HANDLER) {
__except (EXCEPTION_EXECUTE_HANDLER) {
/* Ignore crashes inside the OpenCL driver and hope we can
* survive even with corrupted OpenCL installs. */
fprintf(stderr, "Cycles OpenCL: driver crashed, continuing without OpenCL.\n");
@@ -78,16 +76,16 @@ static cl_int device_opencl_get_num_platforms_safe(cl_uint *num_platforms)
*num_platforms = 0;
return CL_DEVICE_NOT_FOUND;
#else
# else
return clGetPlatformIDs(0, NULL, num_platforms);
#endif
# endif
}
void device_opencl_info(vector<DeviceInfo>& devices)
void device_opencl_info(vector<DeviceInfo> &devices)
{
cl_uint num_platforms = 0;
device_opencl_get_num_platforms_safe(&num_platforms);
if(num_platforms == 0) {
if (num_platforms == 0) {
return;
}
@@ -96,18 +94,18 @@ void device_opencl_info(vector<DeviceInfo>& devices)
/* Devices are numbered consecutively across platforms. */
int num_devices = 0;
set<string> unique_ids;
foreach(OpenCLPlatformDevice& platform_device, usable_devices) {
foreach (OpenCLPlatformDevice &platform_device, usable_devices) {
/* Compute unique ID for persistent user preferences. */
const string& platform_name = platform_device.platform_name;
const string& device_name = platform_device.device_name;
const string &platform_name = platform_device.platform_name;
const string &device_name = platform_device.device_name;
string hardware_id = platform_device.hardware_id;
if(hardware_id == "") {
if (hardware_id == "") {
hardware_id = string_printf("ID_%d", num_devices);
}
string id = string("OPENCL_") + platform_name + "_" + device_name + "_" + hardware_id;
/* Hardware ID might not be unique, add device number in that case. */
if(unique_ids.find(id) != unique_ids.end()) {
if (unique_ids.find(id) != unique_ids.end()) {
id += string_printf("_ID_%d", num_devices);
}
unique_ids.insert(id);
@@ -133,7 +131,7 @@ void device_opencl_info(vector<DeviceInfo>& devices)
string device_opencl_capabilities()
{
if(OpenCLInfo::device_type() == 0) {
if (OpenCLInfo::device_type() == 0) {
return "All OpenCL devices are forced to be OFF";
}
string result = "";
@@ -142,7 +140,7 @@ string device_opencl_capabilities()
*/
cl_uint num_platforms = 0;
opencl_assert(device_opencl_get_num_platforms_safe(&num_platforms));
if(num_platforms == 0) {
if (num_platforms == 0) {
return "No OpenCL platforms found\n";
}
result += string_printf("Number of platforms: %u\n", num_platforms);
@@ -153,32 +151,32 @@ string device_opencl_capabilities()
typedef char cl_string[1024];
#define APPEND_INFO(func, id, name, what, type) \
# define APPEND_INFO(func, id, name, what, type) \
do { \
type data; \
memset(&data, 0, sizeof(data)); \
opencl_assert(func(id, what, sizeof(data), &data, NULL)); \
result += string_printf("%s: %s\n", name, to_string(data).c_str()); \
} while(false)
#define APPEND_STRING_EXTENSION_INFO(func, id, name, what) \
} while (false)
# define APPEND_STRING_EXTENSION_INFO(func, id, name, what) \
do { \
char data[1024] = "\0"; \
size_t length = 0; \
if(func(id, what, sizeof(data), &data, &length) == CL_SUCCESS) { \
if(length != 0 && data[0] != '\0') { \
if (func(id, what, sizeof(data), &data, &length) == CL_SUCCESS) { \
if (length != 0 && data[0] != '\0') { \
result += string_printf("%s: %s\n", name, data); \
} \
} \
} while(false)
#define APPEND_PLATFORM_INFO(id, name, what, type) \
} while (false)
# define APPEND_PLATFORM_INFO(id, name, what, type) \
APPEND_INFO(clGetPlatformInfo, id, "\tPlatform " name, what, type)
#define APPEND_DEVICE_INFO(id, name, what, type) \
# define APPEND_DEVICE_INFO(id, name, what, type) \
APPEND_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what, type)
#define APPEND_DEVICE_STRING_EXTENSION_INFO(id, name, what) \
# define APPEND_DEVICE_STRING_EXTENSION_INFO(id, name, what) \
APPEND_STRING_EXTENSION_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what)
vector<cl_device_id> device_ids;
for(cl_uint platform = 0; platform < num_platforms; ++platform) {
for (cl_uint platform = 0; platform < num_platforms; ++platform) {
cl_platform_id platform_id = platform_ids[platform];
result += string_printf("Platform #%u\n", platform);
@@ -190,20 +188,14 @@ string device_opencl_capabilities()
APPEND_PLATFORM_INFO(platform_id, "Extensions", CL_PLATFORM_EXTENSIONS, cl_string);
cl_uint num_devices = 0;
opencl_assert(clGetDeviceIDs(platform_ids[platform],
CL_DEVICE_TYPE_ALL,
0,
NULL,
&num_devices));
opencl_assert(
clGetDeviceIDs(platform_ids[platform], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices));
result += string_printf("\tNumber of devices: %u\n", num_devices);
device_ids.resize(num_devices);
opencl_assert(clGetDeviceIDs(platform_ids[platform],
CL_DEVICE_TYPE_ALL,
num_devices,
&device_ids[0],
NULL));
for(cl_uint device = 0; device < num_devices; ++device) {
opencl_assert(clGetDeviceIDs(
platform_ids[platform], CL_DEVICE_TYPE_ALL, num_devices, &device_ids[0], NULL));
for (cl_uint device = 0; device < num_devices; ++device) {
cl_device_id device_id = device_ids[device];
result += string_printf("\t\tDevice: #%u\n", device);
@@ -215,15 +207,16 @@ string device_opencl_capabilities()
APPEND_DEVICE_INFO(device_id, "Profile", CL_DEVICE_PROFILE, cl_string);
APPEND_DEVICE_INFO(device_id, "Version", CL_DEVICE_VERSION, cl_string);
APPEND_DEVICE_INFO(device_id, "Extensions", CL_DEVICE_EXTENSIONS, cl_string);
APPEND_DEVICE_INFO(device_id, "Max clock frequency (MHz)", CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint);
APPEND_DEVICE_INFO(
device_id, "Max clock frequency (MHz)", CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint);
APPEND_DEVICE_INFO(device_id, "Max compute units", CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint);
APPEND_DEVICE_INFO(device_id, "Max work group size", CL_DEVICE_MAX_WORK_GROUP_SIZE, size_t);
}
}
#undef APPEND_STRING_INFO
#undef APPEND_PLATFORM_STRING_INFO
#undef APPEND_DEVICE_STRING_INFO
# undef APPEND_STRING_INFO
# undef APPEND_PLATFORM_STRING_INFO
# undef APPEND_DEVICE_STRING_INFO
return result;
}

View File

@@ -27,7 +27,7 @@ CCL_NAMESPACE_BEGIN
static const double alpha = 0.1; /* alpha for rolling average */
DeviceSplitKernel::DeviceSplitKernel(Device *device)
: device(device),
: device(device),
split_data(device, "split_data"),
ray_state(device, "ray_state", MEM_READ_WRITE),
queue_index(device, "queue_index"),
@@ -85,11 +85,11 @@ DeviceSplitKernel::~DeviceSplitKernel()
delete kernel_buffer_update;
}
bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_features)
bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures &requested_features)
{
#define LOAD_KERNEL(name) \
kernel_##name = get_split_kernel_function(#name, requested_features); \
if(!kernel_##name) { \
if (!kernel_##name) { \
device->set_error(string("Split kernel error: failed to load kernel_") + #name); \
return false; \
}
@@ -123,26 +123,27 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
return true;
}
size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size)
size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory &kg,
device_memory &data,
uint64_t max_buffer_size)
{
uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
VLOG(1) << "Split state element size: "
<< string_human_readable_number(size_per_element) << " bytes. ("
<< string_human_readable_size(size_per_element) << ").";
VLOG(1) << "Split state element size: " << string_human_readable_number(size_per_element)
<< " bytes. (" << string_human_readable_size(size_per_element) << ").";
return max_buffer_size / size_per_element;
}
bool DeviceSplitKernel::path_trace(DeviceTask *task,
RenderTile& tile,
device_memory& kgbuffer,
device_memory& kernel_data)
RenderTile &tile,
device_memory &kgbuffer,
device_memory &kernel_data)
{
if(device->have_error()) {
if (device->have_error()) {
return false;
}
/* Allocate all required global memory once. */
if(!kernel_data_initialized) {
if (!kernel_data_initialized) {
kernel_data_initialized = true;
/* Set local size */
@@ -165,7 +166,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
/* Calculate max groups */
/* Denotes the maximum work groups possible w.r.t. current requested tile size. */
unsigned int work_pool_size = (device->info.type == DEVICE_CPU) ? WORK_POOL_SIZE_CPU : WORK_POOL_SIZE_GPU;
unsigned int work_pool_size = (device->info.type == DEVICE_CPU) ? WORK_POOL_SIZE_CPU :
WORK_POOL_SIZE_GPU;
unsigned int max_work_groups = num_global_elements / work_pool_size + 1;
/* Allocate work_pool_wgs memory. */
@@ -180,10 +182,11 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
int num_global_elements = global_size[0] * global_size[1];
#define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \
if(device->have_error()) { \
if (device->have_error()) { \
return false; \
} \
if(!kernel_##name->enqueue(KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \
if (!kernel_##name->enqueue( \
KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \
return false; \
}
@@ -192,7 +195,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
/* for exponential increase between tile updates */
int time_multiplier = 1;
while(tile.sample < tile.start_sample + tile.num_samples) {
while (tile.sample < tile.start_sample + tile.num_samples) {
/* to keep track of how long it takes to run a number of samples */
double start_time = time_dt();
@@ -200,13 +203,15 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
const int initial_num_samples = 1;
/* approx number of samples per second */
int samples_per_second = (avg_time_per_sample > 0.0) ?
int(double(time_multiplier) / avg_time_per_sample) + 1 : initial_num_samples;
int(double(time_multiplier) / avg_time_per_sample) + 1 :
initial_num_samples;
RenderTile subtile = tile;
subtile.start_sample = tile.sample;
subtile.num_samples = min(samples_per_second, tile.start_sample + tile.num_samples - tile.sample);
subtile.num_samples = min(samples_per_second,
tile.start_sample + tile.num_samples - tile.sample);
if(device->have_error()) {
if (device->have_error()) {
return false;
}
@@ -217,7 +222,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
split_data.zero_to_device();
ray_state.zero_to_device();
if(!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
if (!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
subtile,
num_global_elements,
kgbuffer,
@@ -226,8 +231,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
ray_state,
queue_index,
use_queues_flag,
work_pool_wgs))
{
work_pool_wgs)) {
return false;
}
@@ -236,9 +240,9 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
bool activeRaysAvailable = true;
double cancel_time = DBL_MAX;
while(activeRaysAvailable) {
while (activeRaysAvailable) {
/* Do path-iteration in host [Enqueue Path-iteration kernels. */
for(int PathIter = 0; PathIter < 16; PathIter++) {
for (int PathIter = 0; PathIter < 16; PathIter++) {
ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
if (kernel_do_volume) {
@@ -249,7 +253,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
ENQUEUE_SPLIT_KERNEL(shader_setup, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_sort, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(
holdout_emission_blurring_pathtermination_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
@@ -261,14 +266,14 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size);
if(task->get_cancel() && cancel_time == DBL_MAX) {
if (task->get_cancel() && cancel_time == DBL_MAX) {
/* Wait up to twice as many seconds for current samples to finish
* to avoid artifacts in render result from ending too soon.
*/
cancel_time = time_dt() + 2.0 * time_multiplier;
}
if(time_dt() > cancel_time) {
if (time_dt() > cancel_time) {
return true;
}
}
@@ -278,9 +283,9 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
activeRaysAvailable = false;
for(int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) {
if(!IS_STATE(ray_state.data(), rayStateIter, RAY_INACTIVE)) {
if(IS_STATE(ray_state.data(), rayStateIter, RAY_INVALID)) {
for (int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) {
if (!IS_STATE(ray_state.data(), rayStateIter, RAY_INACTIVE)) {
if (IS_STATE(ray_state.data(), rayStateIter, RAY_INVALID)) {
/* Something went wrong, abort to avoid looping endlessly. */
device->set_error("Split kernel error: invalid ray state");
return false;
@@ -292,29 +297,29 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
}
}
if(time_dt() > cancel_time) {
if (time_dt() > cancel_time) {
return true;
}
}
double time_per_sample = ((time_dt()-start_time) / subtile.num_samples);
double time_per_sample = ((time_dt() - start_time) / subtile.num_samples);
if(avg_time_per_sample == 0.0) {
if (avg_time_per_sample == 0.0) {
/* start rolling average */
avg_time_per_sample = time_per_sample;
}
else {
avg_time_per_sample = alpha*time_per_sample + (1.0-alpha)*avg_time_per_sample;
avg_time_per_sample = alpha * time_per_sample + (1.0 - alpha) * avg_time_per_sample;
}
#undef ENQUEUE_SPLIT_KERNEL
tile.sample += subtile.num_samples;
task->update_progress(&tile, tile.w*tile.h*subtile.num_samples);
task->update_progress(&tile, tile.w * tile.h * subtile.num_samples);
time_multiplier = min(time_multiplier << 1, 10);
if(task->get_cancel()) {
if (task->get_cancel()) {
return true;
}
}

View File

@@ -32,7 +32,7 @@ CCL_NAMESPACE_BEGIN
/* Types used for split kernel */
class KernelDimensions {
public:
public:
size_t global_size[2];
size_t local_size[2];
@@ -44,15 +44,17 @@ public:
};
class SplitKernelFunction {
public:
virtual ~SplitKernelFunction() {}
public:
virtual ~SplitKernelFunction()
{
}
/* enqueue the kernel, returns false if there is an error */
virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) = 0;
virtual bool enqueue(const KernelDimensions &dim, device_memory &kg, device_memory &data) = 0;
};
class DeviceSplitKernel {
private:
private:
Device *device;
SplitKernelFunction *kernel_path_init;
@@ -81,7 +83,8 @@ private:
*/
device_only_memory<uchar> split_data;
device_vector<uchar> ray_state;
device_only_memory<int> queue_index; /* Array of size num_queues that tracks the size of each queue. */
device_only_memory<int>
queue_index; /* Array of size num_queues that tracks the size of each queue. */
/* Flag to make sceneintersect and lampemission kernel use queues. */
device_only_memory<char> use_queues_flag;
@@ -97,34 +100,40 @@ private:
size_t local_size[2];
size_t global_size[2];
public:
explicit DeviceSplitKernel(Device* device);
public:
explicit DeviceSplitKernel(Device *device);
virtual ~DeviceSplitKernel();
bool load_kernels(const DeviceRequestedFeatures& requested_features);
bool load_kernels(const DeviceRequestedFeatures &requested_features);
bool path_trace(DeviceTask *task,
RenderTile& rtile,
device_memory& kgbuffer,
device_memory& kernel_data);
RenderTile &rtile,
device_memory &kgbuffer,
device_memory &kernel_data);
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size);
virtual uint64_t state_buffer_size(device_memory &kg,
device_memory &data,
size_t num_threads) = 0;
size_t max_elements_for_max_buffer_size(device_memory &kg,
device_memory &data,
uint64_t max_buffer_size);
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
RenderTile& rtile,
virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim,
RenderTile &rtile,
int num_global_elements,
device_memory& kernel_globals,
device_memory& kernel_data_,
device_memory& split_data,
device_memory& ray_state,
device_memory& queue_index,
device_memory& use_queues_flag,
device_memory& work_pool_wgs) = 0;
device_memory &kernel_globals,
device_memory &kernel_data_,
device_memory &split_data,
device_memory &ray_state,
device_memory &queue_index,
device_memory &use_queues_flag,
device_memory &work_pool_wgs) = 0;
virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name,
const DeviceRequestedFeatures&) = 0;
virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
const DeviceRequestedFeatures &) = 0;
virtual int2 split_kernel_local_size() = 0;
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task) = 0;
virtual int2 split_kernel_global_size(device_memory &kg,
device_memory &data,
DeviceTask *task) = 0;
};
CCL_NAMESPACE_END

View File

@@ -29,34 +29,46 @@ CCL_NAMESPACE_BEGIN
/* Device Task */
DeviceTask::DeviceTask(Type type_)
: type(type_), x(0), y(0), w(0), h(0), rgba_byte(0), rgba_half(0), buffer(0),
sample(0), num_samples(1),
shader_input(0), shader_output(0),
shader_eval_type(0), shader_filter(0), shader_x(0), shader_w(0)
: type(type_),
x(0),
y(0),
w(0),
h(0),
rgba_byte(0),
rgba_half(0),
buffer(0),
sample(0),
num_samples(1),
shader_input(0),
shader_output(0),
shader_eval_type(0),
shader_filter(0),
shader_x(0),
shader_w(0)
{
last_update_time = time_dt();
}
int DeviceTask::get_subtask_count(int num, int max_size)
{
if(max_size != 0) {
if (max_size != 0) {
int max_size_num;
if(type == SHADER) {
max_size_num = (shader_w + max_size - 1)/max_size;
if (type == SHADER) {
max_size_num = (shader_w + max_size - 1) / max_size;
}
else {
max_size = max(1, max_size/w);
max_size_num = (h + max_size - 1)/max_size;
max_size = max(1, max_size / w);
max_size_num = (h + max_size - 1) / max_size;
}
num = max(max_size_num, num);
}
if(type == SHADER) {
if (type == SHADER) {
num = min(shader_w, num);
}
else if(type == RENDER) {
else if (type == RENDER) {
}
else {
num = min(h, num);
@@ -65,14 +77,14 @@ int DeviceTask::get_subtask_count(int num, int max_size)
return num;
}
void DeviceTask::split(list<DeviceTask>& tasks, int num, int max_size)
void DeviceTask::split(list<DeviceTask> &tasks, int num, int max_size)
{
num = get_subtask_count(num, max_size);
if(type == SHADER) {
for(int i = 0; i < num; i++) {
int tx = shader_x + (shader_w/num)*i;
int tw = (i == num-1)? shader_w - i*(shader_w/num): shader_w/num;
if (type == SHADER) {
for (int i = 0; i < num; i++) {
int tx = shader_x + (shader_w / num) * i;
int tw = (i == num - 1) ? shader_w - i * (shader_w / num) : shader_w / num;
DeviceTask task = *this;
@@ -82,14 +94,14 @@ void DeviceTask::split(list<DeviceTask>& tasks, int num, int max_size)
tasks.push_back(task);
}
}
else if(type == RENDER) {
for(int i = 0; i < num; i++)
else if (type == RENDER) {
for (int i = 0; i < num; i++)
tasks.push_back(*this);
}
else {
for(int i = 0; i < num; i++) {
int ty = y + (h/num)*i;
int th = (i == num-1)? h - i*(h/num): h/num;
for (int i = 0; i < num; i++) {
int ty = y + (h / num) * i;
int th = (i == num - 1) ? h - i * (h / num) : h / num;
DeviceTask task = *this;
@@ -103,21 +115,20 @@ void DeviceTask::split(list<DeviceTask>& tasks, int num, int max_size)
void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
{
if((type != RENDER) &&
(type != SHADER))
if ((type != RENDER) && (type != SHADER))
return;
if(update_progress_sample) {
if(pixel_samples == -1) {
if (update_progress_sample) {
if (pixel_samples == -1) {
pixel_samples = shader_w;
}
update_progress_sample(pixel_samples, rtile? rtile->sample : 0);
update_progress_sample(pixel_samples, rtile ? rtile->sample : 0);
}
if(update_tile_sample) {
if (update_tile_sample) {
double current_time = time_dt();
if(current_time - last_update_time >= 1.0) {
if (current_time - last_update_time >= 1.0) {
update_tile_sample(*rtile);
last_update_time = current_time;

View File

@@ -33,7 +33,7 @@ class RenderTile;
class Tile;
class DenoiseParams {
public:
public:
/* Pixel radius for neighbouring pixels to take into account. */
int radius;
/* Controls neighbor pixel weighting for the denoising filter. */
@@ -59,7 +59,7 @@ public:
};
class DeviceTask : public Task {
public:
public:
typedef enum { RENDER, FILM_CONVERT, SHADER } Type;
Type type;
@@ -82,17 +82,17 @@ public:
explicit DeviceTask(Type type = RENDER);
int get_subtask_count(int num, int max_size = 0);
void split(list<DeviceTask>& tasks, int num, int max_size = 0);
void split(list<DeviceTask> &tasks, int num, int max_size = 0);
void update_progress(RenderTile *rtile, int pixel_samples = -1);
function<bool(Device *device, RenderTile&)> acquire_tile;
function<bool(Device *device, RenderTile &)> acquire_tile;
function<void(long, int)> update_progress_sample;
function<void(RenderTile&)> update_tile_sample;
function<void(RenderTile&)> release_tile;
function<void(RenderTile &)> update_tile_sample;
function<void(RenderTile &)> release_tile;
function<bool()> get_cancel;
function<void(RenderTile*, Device*)> map_neighbor_tiles;
function<void(RenderTile*, Device*)> unmap_neighbor_tiles;
function<void(RenderTile *, Device *)> map_neighbor_tiles;
function<void(RenderTile *, Device *)> unmap_neighbor_tiles;
DenoiseParams denoising;
bool denoising_from_render;
@@ -110,7 +110,8 @@ public:
bool need_finish_queue;
bool integrator_branched;
int2 requested_tile_size;
protected:
protected:
double last_update_time;
};

View File

@@ -16,14 +16,14 @@
#ifdef WITH_OPENCL
#include "util/util_foreach.h"
# include "util/util_foreach.h"
#include "device/opencl/opencl.h"
#include "device/opencl/memory_manager.h"
# include "device/opencl/opencl.h"
# include "device/opencl/memory_manager.h"
CCL_NAMESPACE_BEGIN
void MemoryManager::DeviceBuffer::add_allocation(Allocation& allocation)
void MemoryManager::DeviceBuffer::add_allocation(Allocation &allocation)
{
allocations.push_back(&allocation);
}
@@ -35,15 +35,15 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDevice *device)
/* Calculate total size and remove any freed. */
size_t total_size = 0;
for(int i = allocations.size()-1; i >= 0; i--) {
Allocation* allocation = allocations[i];
for (int i = allocations.size() - 1; i >= 0; i--) {
Allocation *allocation = allocations[i];
/* Remove allocations that have been freed. */
if(!allocation->mem || allocation->mem->memory_size() == 0) {
if (!allocation->mem || allocation->mem->memory_size() == 0) {
allocation->device_buffer = NULL;
allocation->size = 0;
allocations.erase(allocations.begin()+i);
allocations.erase(allocations.begin() + i);
need_realloc = true;
@@ -53,7 +53,7 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDevice *device)
/* Get actual size for allocation. */
size_t alloc_size = align_up(allocation->mem->memory_size(), 16);
if(allocation->size != alloc_size) {
if (allocation->size != alloc_size) {
/* Allocation is either new or resized. */
allocation->size = alloc_size;
allocation->needs_copy_to_device = true;
@@ -64,46 +64,51 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDevice *device)
total_size += alloc_size;
}
if(need_realloc) {
if (need_realloc) {
cl_ulong max_buffer_size;
clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
clGetDeviceInfo(
device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
if(total_size > max_buffer_size) {
if (total_size > max_buffer_size) {
device->set_error("Scene too complex to fit in available memory.");
return;
}
device_only_memory<uchar> *new_buffer =
new device_only_memory<uchar>(device, "memory manager buffer");
device_only_memory<uchar> *new_buffer = new device_only_memory<uchar>(device,
"memory manager buffer");
new_buffer->alloc_to_device(total_size);
size_t offset = 0;
foreach(Allocation* allocation, allocations) {
if(allocation->needs_copy_to_device) {
foreach (Allocation *allocation, allocations) {
if (allocation->needs_copy_to_device) {
/* Copy from host to device. */
opencl_device_assert(device, clEnqueueWriteBuffer(device->cqCommandQueue,
opencl_device_assert(device,
clEnqueueWriteBuffer(device->cqCommandQueue,
CL_MEM_PTR(new_buffer->device_pointer),
CL_FALSE,
offset,
allocation->mem->memory_size(),
allocation->mem->host_pointer,
0, NULL, NULL
));
0,
NULL,
NULL));
allocation->needs_copy_to_device = false;
}
else {
/* Fast copy from memory already on device. */
opencl_device_assert(device, clEnqueueCopyBuffer(device->cqCommandQueue,
opencl_device_assert(device,
clEnqueueCopyBuffer(device->cqCommandQueue,
CL_MEM_PTR(buffer->device_pointer),
CL_MEM_PTR(new_buffer->device_pointer),
allocation->desc.offset,
offset,
allocation->mem->memory_size(),
0, NULL, NULL
));
0,
NULL,
NULL));
}
allocation->desc.offset = offset;
@@ -119,17 +124,19 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDevice *device)
size_t offset = 0;
foreach(Allocation* allocation, allocations) {
if(allocation->needs_copy_to_device) {
foreach (Allocation *allocation, allocations) {
if (allocation->needs_copy_to_device) {
/* Copy from host to device. */
opencl_device_assert(device, clEnqueueWriteBuffer(device->cqCommandQueue,
opencl_device_assert(device,
clEnqueueWriteBuffer(device->cqCommandQueue,
CL_MEM_PTR(buffer->device_pointer),
CL_FALSE,
offset,
allocation->mem->memory_size(),
allocation->mem->host_pointer,
0, NULL, NULL
));
0,
NULL,
NULL));
allocation->needs_copy_to_device = false;
}
@@ -147,12 +154,12 @@ void MemoryManager::DeviceBuffer::free(OpenCLDevice *)
buffer->free();
}
MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer()
MemoryManager::DeviceBuffer *MemoryManager::smallest_device_buffer()
{
DeviceBuffer* smallest = device_buffers;
DeviceBuffer *smallest = device_buffers;
foreach(DeviceBuffer& device_buffer, device_buffers) {
if(device_buffer.size < smallest->size) {
foreach (DeviceBuffer &device_buffer, device_buffers) {
if (device_buffer.size < smallest->size) {
smallest = &device_buffer;
}
}
@@ -160,31 +167,29 @@ MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer()
return smallest;
}
MemoryManager::MemoryManager(OpenCLDevice *device)
: device(device), need_update(false)
MemoryManager::MemoryManager(OpenCLDevice *device) : device(device), need_update(false)
{
foreach(DeviceBuffer& device_buffer, device_buffers) {
device_buffer.buffer =
new device_only_memory<uchar>(device, "memory manager buffer");
foreach (DeviceBuffer &device_buffer, device_buffers) {
device_buffer.buffer = new device_only_memory<uchar>(device, "memory manager buffer");
}
}
void MemoryManager::free()
{
foreach(DeviceBuffer& device_buffer, device_buffers) {
foreach (DeviceBuffer &device_buffer, device_buffers) {
device_buffer.free(device);
}
}
void MemoryManager::alloc(const char *name, device_memory& mem)
void MemoryManager::alloc(const char *name, device_memory &mem)
{
Allocation& allocation = allocations[name];
Allocation &allocation = allocations[name];
allocation.mem = &mem;
allocation.needs_copy_to_device = true;
if(!allocation.device_buffer) {
DeviceBuffer* device_buffer = smallest_device_buffer();
if (!allocation.device_buffer) {
DeviceBuffer *device_buffer = smallest_device_buffer();
allocation.device_buffer = device_buffer;
allocation.desc.device_buffer = device_buffer - device_buffers;
@@ -197,11 +202,11 @@ void MemoryManager::alloc(const char *name, device_memory& mem)
need_update = true;
}
bool MemoryManager::free(device_memory& mem)
bool MemoryManager::free(device_memory &mem)
{
foreach(AllocationsMap::value_type& value, allocations) {
Allocation& allocation = value.second;
if(allocation.mem == &mem) {
foreach (AllocationsMap::value_type &value, allocations) {
Allocation &allocation = value.second;
if (allocation.mem == &mem) {
allocation.device_buffer->size -= mem.memory_size();
@@ -220,19 +225,19 @@ MemoryManager::BufferDescriptor MemoryManager::get_descriptor(string name)
{
update_device_memory();
Allocation& allocation = allocations[name];
Allocation &allocation = allocations[name];
return allocation.desc;
}
void MemoryManager::update_device_memory()
{
if(!need_update) {
if (!need_update) {
return;
}
need_update = false;
foreach(DeviceBuffer& device_buffer, device_buffers) {
foreach (DeviceBuffer &device_buffer, device_buffers) {
device_buffer.update_device_memory(device);
}
}
@@ -241,8 +246,8 @@ void MemoryManager::set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg)
{
update_device_memory();
foreach(DeviceBuffer& device_buffer, device_buffers) {
if(device_buffer.buffer->device_pointer) {
foreach (DeviceBuffer &device_buffer, device_buffers) {
if (device_buffer.buffer->device_pointer) {
device->kernel_set_args(kernel, (*narg)++, *device_buffer.buffer);
}
else {

View File

@@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN
class OpenCLDevice;
class MemoryManager {
public:
public:
static const int NUM_DEVICE_BUFFERS = 8;
struct BufferDescriptor {
@@ -37,7 +37,7 @@ public:
cl_ulong offset;
};
private:
private:
struct DeviceBuffer;
struct Allocation {
@@ -57,11 +57,10 @@ private:
struct DeviceBuffer {
device_only_memory<uchar> *buffer;
vector<Allocation*> allocations;
vector<Allocation *> allocations;
size_t size; /* Size of all allocations. */
DeviceBuffer()
: buffer(NULL), size(0)
DeviceBuffer() : buffer(NULL), size(0)
{
}
@@ -71,7 +70,7 @@ private:
buffer = NULL;
}
void add_allocation(Allocation& allocation);
void add_allocation(Allocation &allocation);
void update_device_memory(OpenCLDevice *device);
@@ -87,15 +86,15 @@ private:
bool need_update;
DeviceBuffer* smallest_device_buffer();
DeviceBuffer *smallest_device_buffer();
public:
public:
MemoryManager(OpenCLDevice *device);
void free(); /* Free all memory. */
void alloc(const char *name, device_memory& mem);
bool free(device_memory& mem);
void alloc(const char *name, device_memory &mem);
bool free(device_memory &mem);
BufferDescriptor get_descriptor(string name);

View File

@@ -16,25 +16,25 @@
#ifdef WITH_OPENCL
#include "device/device.h"
#include "device/device_denoising.h"
#include "device/device_split_kernel.h"
# include "device/device.h"
# include "device/device_denoising.h"
# include "device/device_split_kernel.h"
#include "util/util_map.h"
#include "util/util_param.h"
#include "util/util_string.h"
# include "util/util_map.h"
# include "util/util_param.h"
# include "util/util_string.h"
#include "clew.h"
# include "clew.h"
#include "device/opencl/memory_manager.h"
# include "device/opencl/memory_manager.h"
CCL_NAMESPACE_BEGIN
/* Disable workarounds, seems to be working fine on latest drivers. */
#define CYCLES_DISABLE_DRIVER_WORKAROUNDS
# define CYCLES_DISABLE_DRIVER_WORKAROUNDS
/* Define CYCLES_DISABLE_DRIVER_WORKAROUNDS to disable workaounds for testing */
#ifndef CYCLES_DISABLE_DRIVER_WORKAROUNDS
# ifndef CYCLES_DISABLE_DRIVER_WORKAROUNDS
/* Work around AMD driver hangs by ensuring each command is finished before doing anything else. */
# undef clEnqueueNDRangeKernel
# define clEnqueueNDRangeKernel(a, b, c, d, e, f, g, h, i) \
@@ -50,25 +50,27 @@ CCL_NAMESPACE_BEGIN
# define clEnqueueReadBuffer(a, b, c, d, e, f, g, h, i) \
CLEW_GET_FUN(__clewEnqueueReadBuffer)(a, b, c, d, e, f, g, h, i); \
clFinish(a);
#endif /* CYCLES_DISABLE_DRIVER_WORKAROUNDS */
# endif /* CYCLES_DISABLE_DRIVER_WORKAROUNDS */
#define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
# define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
struct OpenCLPlatformDevice {
OpenCLPlatformDevice(cl_platform_id platform_id,
const string& platform_name,
const string &platform_name,
cl_device_id device_id,
cl_device_type device_type,
const string& device_name,
const string& hardware_id,
const string& device_extensions)
const string &device_name,
const string &hardware_id,
const string &device_extensions)
: platform_id(platform_id),
platform_name(platform_name),
device_id(device_id),
device_type(device_type),
device_name(device_name),
hardware_id(hardware_id),
device_extensions(device_extensions) {}
device_extensions(device_extensions)
{
}
cl_platform_id platform_id;
string platform_name;
cl_device_id device_id;
@@ -79,19 +81,14 @@ struct OpenCLPlatformDevice {
};
/* Contains all static OpenCL helper functions. */
class OpenCLInfo
{
public:
class OpenCLInfo {
public:
static cl_device_type device_type();
static bool use_debug();
static bool device_supported(const string& platform_name,
const cl_device_id device_id);
static bool platform_version_check(cl_platform_id platform,
string *error = NULL);
static bool device_version_check(cl_device_id device,
string *error = NULL);
static string get_hardware_id(const string& platform_name,
cl_device_id device_id);
static bool device_supported(const string &platform_name, const cl_device_id device_id);
static bool platform_version_check(cl_platform_id platform, string *error = NULL);
static bool device_version_check(cl_device_id device, string *error = NULL);
static string get_hardware_id(const string &platform_name, cl_device_id device_id);
static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
bool force_all = false);
@@ -101,50 +98,45 @@ public:
static bool get_num_platforms(cl_uint *num_platforms, cl_int *error = NULL);
static cl_uint get_num_platforms();
static bool get_platforms(vector<cl_platform_id> *platform_ids,
cl_int *error = NULL);
static bool get_platforms(vector<cl_platform_id> *platform_ids, cl_int *error = NULL);
static vector<cl_platform_id> get_platforms();
static bool get_platform_name(cl_platform_id platform_id,
string *platform_name);
static bool get_platform_name(cl_platform_id platform_id, string *platform_name);
static string get_platform_name(cl_platform_id platform_id);
static bool get_num_platform_devices(cl_platform_id platform_id,
cl_device_type device_type,
cl_uint *num_devices,
cl_int *error = NULL);
static cl_uint get_num_platform_devices(cl_platform_id platform_id,
cl_device_type device_type);
static cl_uint get_num_platform_devices(cl_platform_id platform_id, cl_device_type device_type);
static bool get_platform_devices(cl_platform_id platform_id,
cl_device_type device_type,
vector<cl_device_id> *device_ids,
cl_int* error = NULL);
cl_int *error = NULL);
static vector<cl_device_id> get_platform_devices(cl_platform_id platform_id,
cl_device_type device_type);
/* Device information. */
static bool get_device_name(cl_device_id device_id,
string *device_name,
cl_int* error = NULL);
static bool get_device_name(cl_device_id device_id, string *device_name, cl_int *error = NULL);
static string get_device_name(cl_device_id device_id);
static bool get_device_extensions(cl_device_id device_id,
string *device_extensions,
cl_int* error = NULL);
cl_int *error = NULL);
static string get_device_extensions(cl_device_id device_id);
static bool get_device_type(cl_device_id device_id,
cl_device_type *device_type,
cl_int* error = NULL);
cl_int *error = NULL);
static cl_device_type get_device_type(cl_device_id device_id);
static bool get_driver_version(cl_device_id device_id,
int *major,
int *minor,
cl_int* error = NULL);
cl_int *error = NULL);
static int mem_sub_ptr_alignment(cl_device_id device_id);
@@ -158,28 +150,24 @@ public:
/* Thread safe cache for contexts and programs.
*/
class OpenCLCache
{
struct Slot
{
struct ProgramEntry
{
class OpenCLCache {
struct Slot {
struct ProgramEntry {
ProgramEntry();
ProgramEntry(const ProgramEntry& rhs);
ProgramEntry(const ProgramEntry &rhs);
~ProgramEntry();
cl_program program;
thread_mutex *mutex;
};
Slot();
Slot(const Slot& rhs);
Slot(const Slot &rhs);
~Slot();
thread_mutex *context_mutex;
cl_context context;
typedef map<ustring, ProgramEntry> EntryMap;
EntryMap programs;
};
/* key is combination of platform ID and device ID */
@@ -196,10 +184,9 @@ class OpenCLCache
thread_mutex kernel_md5_lock;
/* lazy instantiate */
static OpenCLCache& global_instance();
public:
static OpenCLCache &global_instance();
public:
enum ProgramName {
OCL_DEV_BASE_PROGRAM,
OCL_DEV_MEGAKERNEL_PROGRAM,
@@ -210,55 +197,58 @@ public:
* default constructed thread_scoped_lock. */
static cl_context get_context(cl_platform_id platform,
cl_device_id device,
thread_scoped_lock& slot_locker);
thread_scoped_lock &slot_locker);
/* Same as above. */
static cl_program get_program(cl_platform_id platform,
cl_device_id device,
ustring key,
thread_scoped_lock& slot_locker);
thread_scoped_lock &slot_locker);
/* Store context in the cache. You MUST have tried to get the item before storing to it. */
static void store_context(cl_platform_id platform,
cl_device_id device,
cl_context context,
thread_scoped_lock& slot_locker);
thread_scoped_lock &slot_locker);
/* Same as above. */
static void store_program(cl_platform_id platform,
cl_device_id device,
cl_program program,
ustring key,
thread_scoped_lock& slot_locker);
thread_scoped_lock &slot_locker);
static string get_kernel_md5();
};
#define opencl_device_assert(device, stmt) \
# define opencl_device_assert(device, stmt) \
{ \
cl_int err = stmt; \
\
if(err != CL_SUCCESS) { \
string message = string_printf("OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
if((device)->error_message() == "") \
\
if (err != CL_SUCCESS) { \
string message = string_printf( \
"OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
if ((device)->error_message() == "") \
(device)->set_error(message); \
fprintf(stderr, "%s\n", message.c_str()); \
} \
} (void) 0
} \
(void)0
#define opencl_assert(stmt) \
# define opencl_assert(stmt) \
{ \
cl_int err = stmt; \
\
if(err != CL_SUCCESS) { \
string message = string_printf("OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
if(error_msg == "") \
\
if (err != CL_SUCCESS) { \
string message = string_printf( \
"OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
if (error_msg == "") \
error_msg = message; \
fprintf(stderr, "%s\n", message.c_str()); \
} \
} (void) 0
} \
(void)0
class OpenCLDevice : public Device
{
public:
class OpenCLDevice : public Device {
public:
DedicatedTaskPool task_pool;
/* Task pool for required kernels (base, AO kernels during foreground rendering) */
@@ -275,11 +265,13 @@ public:
class OpenCLProgram {
public:
OpenCLProgram() : loaded(false), needs_compiling(true), program(NULL), device(NULL) {}
OpenCLProgram() : loaded(false), needs_compiling(true), program(NULL), device(NULL)
{
}
OpenCLProgram(OpenCLDevice *device,
const string& program_name,
const string& kernel_name,
const string& kernel_build_options,
const string &program_name,
const string &kernel_name,
const string &kernel_build_options,
bool use_stdout = true);
~OpenCLProgram();
@@ -292,8 +284,14 @@ public:
/* Create the OpenCL kernels after loading or compiling */
void create_kernels();
bool is_loaded() const { return loaded; }
const string& get_log() const { return log; }
bool is_loaded() const
{
return loaded;
}
const string &get_log() const
{
return log;
}
void report_error();
/* Wait until this kernel is available to be used
@@ -314,15 +312,15 @@ public:
* build calls internally if they come from the same process.
* If that is not supported, this function just returns false.
*/
bool compile_separate(const string& clbin);
bool compile_separate(const string &clbin);
/* Build the program by calling OpenCL directly. */
bool compile_kernel(const string *debug_src);
/* Loading and saving the program from/to disk. */
bool load_binary(const string& clbin, const string *debug_src = NULL);
bool save_binary(const string& clbin);
bool load_binary(const string &clbin, const string *debug_src = NULL);
bool save_binary(const string &clbin);
void add_log(const string& msg, bool is_debug);
void add_error(const string& msg);
void add_log(const string &msg, bool is_debug);
void add_error(const string &msg);
bool loaded;
bool needs_compiling;
@@ -362,9 +360,9 @@ public:
/* Load the kernels and put the created kernels in the given `programs`
* paramter. */
void load_kernels(vector<OpenCLProgram*> &programs,
const DeviceRequestedFeatures& requested_features,
bool is_preview=false);
void load_kernels(vector<OpenCLProgram *> &programs,
const DeviceRequestedFeatures &requested_features,
bool is_preview = false);
};
DeviceSplitKernel *split_kernel;
@@ -378,7 +376,7 @@ public:
OpenCLSplitPrograms kernel_programs;
OpenCLSplitPrograms preview_programs;
typedef map<string, device_vector<uchar>*> ConstMemMap;
typedef map<string, device_vector<uchar> *> ConstMemMap;
typedef map<string, device_ptr> MemMap;
ConstMemMap const_mem_map;
@@ -390,77 +388,81 @@ public:
string device_name;
bool opencl_error(cl_int err);
void opencl_error(const string& message);
void opencl_assert_err(cl_int err, const char* where);
void opencl_error(const string &message);
void opencl_assert_err(cl_int err, const char *where);
OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, bool background);
OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
~OpenCLDevice();
static void CL_CALLBACK context_notify_callback(const char *err_info,
const void * /*private_info*/, size_t /*cb*/, void *user_data);
const void * /*private_info*/,
size_t /*cb*/,
void *user_data);
bool opencl_version_check();
OpenCLSplitPrograms* get_split_programs();
OpenCLSplitPrograms *get_split_programs();
string device_md5_hash(string kernel_custom_build_options = "");
bool load_kernels(const DeviceRequestedFeatures& requested_features);
void load_required_kernels(const DeviceRequestedFeatures& requested_features);
bool load_kernels(const DeviceRequestedFeatures &requested_features);
void load_required_kernels(const DeviceRequestedFeatures &requested_features);
void load_preview_kernels();
bool wait_for_availability(const DeviceRequestedFeatures& requested_features);
bool wait_for_availability(const DeviceRequestedFeatures &requested_features);
DeviceKernelStatus get_active_kernel_switch_state();
/* Get the name of the opencl program for the given kernel */
const string get_opencl_program_name(const string& kernel_name);
const string get_opencl_program_name(const string &kernel_name);
/* Get the program file name to compile (*.cl) for the given kernel */
const string get_opencl_program_filename(const string& kernel_name);
string get_build_options(const DeviceRequestedFeatures& requested_features,
const string& opencl_program_name,
bool preview_kernel=false);
const string get_opencl_program_filename(const string &kernel_name);
string get_build_options(const DeviceRequestedFeatures &requested_features,
const string &opencl_program_name,
bool preview_kernel = false);
/* Enable the default features to reduce recompilation events */
void enable_default_features(DeviceRequestedFeatures& features);
void enable_default_features(DeviceRequestedFeatures &features);
void mem_alloc(device_memory& mem);
void mem_copy_to(device_memory& mem);
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem);
void mem_zero(device_memory& mem);
void mem_free(device_memory& mem);
void mem_alloc(device_memory &mem);
void mem_copy_to(device_memory &mem);
void mem_copy_from(device_memory &mem, int y, int w, int h, int elem);
void mem_zero(device_memory &mem);
void mem_free(device_memory &mem);
int mem_sub_ptr_alignment();
void const_copy_to(const char *name, void *host, size_t size);
void tex_alloc(device_memory& mem);
void tex_free(device_memory& mem);
void tex_alloc(device_memory &mem);
void tex_free(device_memory &mem);
size_t global_size_round_up(int group_size, int global_size);
void enqueue_kernel(cl_kernel kernel, size_t w, size_t h,
void enqueue_kernel(cl_kernel kernel,
size_t w,
size_t h,
bool x_workgroups = false,
size_t max_workgroup_size = -1);
void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name);
void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg);
void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half);
void shader(DeviceTask& task);
void film_convert(DeviceTask &task,
device_ptr buffer,
device_ptr rgba_byte,
device_ptr rgba_half);
void shader(DeviceTask &task);
void denoise(RenderTile& tile, DenoisingTask& denoising);
void denoise(RenderTile &tile, DenoisingTask &denoising);
class OpenCLDeviceTask : public DeviceTask {
public:
OpenCLDeviceTask(OpenCLDevice *device, DeviceTask& task)
: DeviceTask(task)
OpenCLDeviceTask(OpenCLDevice *device, DeviceTask &task) : DeviceTask(task)
{
run = function_bind(&OpenCLDevice::thread_run,
device,
this);
run = function_bind(&OpenCLDevice::thread_run, device, this);
}
};
int get_split_task_count(DeviceTask& /*task*/)
int get_split_task_count(DeviceTask & /*task*/)
{
return 1;
}
void task_add(DeviceTask& task)
void task_add(DeviceTask &task)
{
task_pool.push(new OpenCLDeviceTask(this, task));
}
@@ -477,16 +479,17 @@ public:
void thread_run(DeviceTask *task);
virtual BVHLayoutMask get_bvh_layout_mask() const {
virtual BVHLayoutMask get_bvh_layout_mask() const
{
return BVH_LAYOUT_BVH2;
}
virtual bool show_samples() const {
virtual bool show_samples() const
{
return true;
}
protected:
protected:
string kernel_build_options(const string *debug_src = NULL);
void mem_zero_kernel(device_ptr ptr, size_t size);
@@ -502,13 +505,13 @@ protected:
device_ptr scale_ptr,
int frame,
DenoisingTask *task);
bool denoising_solve(device_ptr output_ptr,
DenoisingTask *task);
bool denoising_solve(device_ptr output_ptr, DenoisingTask *task);
bool denoising_combine_halves(device_ptr a_ptr,
device_ptr b_ptr,
device_ptr mean_ptr,
device_ptr variance_ptr,
int r, int4 rect,
int r,
int4 rect,
DenoisingTask *task);
bool denoising_divide_shadow(device_ptr a_ptr,
device_ptr b_ptr,
@@ -532,7 +535,7 @@ protected:
device_ptr output_ptr,
DenoisingTask *task);
device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size);
device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int size);
void mem_free_sub_ptr(device_ptr ptr);
class ArgumentWrapper {
@@ -541,37 +544,32 @@ protected:
{
}
ArgumentWrapper(device_memory& argument) : size(sizeof(void*)),
pointer((void*)(&argument.device_pointer))
ArgumentWrapper(device_memory &argument)
: size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
{
}
template<typename T>
ArgumentWrapper(device_vector<T>& argument) : size(sizeof(void*)),
pointer((void*)(&argument.device_pointer))
ArgumentWrapper(device_vector<T> &argument)
: size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
{
}
template<typename T>
ArgumentWrapper(device_only_memory<T>& argument) : size(sizeof(void*)),
pointer((void*)(&argument.device_pointer))
ArgumentWrapper(device_only_memory<T> &argument)
: size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
{
}
template<typename T>
ArgumentWrapper(T& argument) : size(sizeof(argument)),
pointer(&argument)
template<typename T> ArgumentWrapper(T &argument) : size(sizeof(argument)), pointer(&argument)
{
}
ArgumentWrapper(int argument) : size(sizeof(int)),
int_value(argument),
pointer(&int_value)
ArgumentWrapper(int argument) : size(sizeof(int)), int_value(argument), pointer(&int_value)
{
}
ArgumentWrapper(float argument) : size(sizeof(float)),
float_value(argument),
pointer(&float_value)
ArgumentWrapper(float argument)
: size(sizeof(float)), float_value(argument), pointer(&float_value)
{
}
@@ -586,39 +584,39 @@ protected:
*/
int kernel_set_args(cl_kernel kernel,
int start_argument_index,
const ArgumentWrapper& arg1 = ArgumentWrapper(),
const ArgumentWrapper& arg2 = ArgumentWrapper(),
const ArgumentWrapper& arg3 = ArgumentWrapper(),
const ArgumentWrapper& arg4 = ArgumentWrapper(),
const ArgumentWrapper& arg5 = ArgumentWrapper(),
const ArgumentWrapper& arg6 = ArgumentWrapper(),
const ArgumentWrapper& arg7 = ArgumentWrapper(),
const ArgumentWrapper& arg8 = ArgumentWrapper(),
const ArgumentWrapper& arg9 = ArgumentWrapper(),
const ArgumentWrapper& arg10 = ArgumentWrapper(),
const ArgumentWrapper& arg11 = ArgumentWrapper(),
const ArgumentWrapper& arg12 = ArgumentWrapper(),
const ArgumentWrapper& arg13 = ArgumentWrapper(),
const ArgumentWrapper& arg14 = ArgumentWrapper(),
const ArgumentWrapper& arg15 = ArgumentWrapper(),
const ArgumentWrapper& arg16 = ArgumentWrapper(),
const ArgumentWrapper& arg17 = ArgumentWrapper(),
const ArgumentWrapper& arg18 = ArgumentWrapper(),
const ArgumentWrapper& arg19 = ArgumentWrapper(),
const ArgumentWrapper& arg20 = ArgumentWrapper(),
const ArgumentWrapper& arg21 = ArgumentWrapper(),
const ArgumentWrapper& arg22 = ArgumentWrapper(),
const ArgumentWrapper& arg23 = ArgumentWrapper(),
const ArgumentWrapper& arg24 = ArgumentWrapper(),
const ArgumentWrapper& arg25 = ArgumentWrapper(),
const ArgumentWrapper& arg26 = ArgumentWrapper(),
const ArgumentWrapper& arg27 = ArgumentWrapper(),
const ArgumentWrapper& arg28 = ArgumentWrapper(),
const ArgumentWrapper& arg29 = ArgumentWrapper(),
const ArgumentWrapper& arg30 = ArgumentWrapper(),
const ArgumentWrapper& arg31 = ArgumentWrapper(),
const ArgumentWrapper& arg32 = ArgumentWrapper(),
const ArgumentWrapper& arg33 = ArgumentWrapper());
const ArgumentWrapper &arg1 = ArgumentWrapper(),
const ArgumentWrapper &arg2 = ArgumentWrapper(),
const ArgumentWrapper &arg3 = ArgumentWrapper(),
const ArgumentWrapper &arg4 = ArgumentWrapper(),
const ArgumentWrapper &arg5 = ArgumentWrapper(),
const ArgumentWrapper &arg6 = ArgumentWrapper(),
const ArgumentWrapper &arg7 = ArgumentWrapper(),
const ArgumentWrapper &arg8 = ArgumentWrapper(),
const ArgumentWrapper &arg9 = ArgumentWrapper(),
const ArgumentWrapper &arg10 = ArgumentWrapper(),
const ArgumentWrapper &arg11 = ArgumentWrapper(),
const ArgumentWrapper &arg12 = ArgumentWrapper(),
const ArgumentWrapper &arg13 = ArgumentWrapper(),
const ArgumentWrapper &arg14 = ArgumentWrapper(),
const ArgumentWrapper &arg15 = ArgumentWrapper(),
const ArgumentWrapper &arg16 = ArgumentWrapper(),
const ArgumentWrapper &arg17 = ArgumentWrapper(),
const ArgumentWrapper &arg18 = ArgumentWrapper(),
const ArgumentWrapper &arg19 = ArgumentWrapper(),
const ArgumentWrapper &arg20 = ArgumentWrapper(),
const ArgumentWrapper &arg21 = ArgumentWrapper(),
const ArgumentWrapper &arg22 = ArgumentWrapper(),
const ArgumentWrapper &arg23 = ArgumentWrapper(),
const ArgumentWrapper &arg24 = ArgumentWrapper(),
const ArgumentWrapper &arg25 = ArgumentWrapper(),
const ArgumentWrapper &arg26 = ArgumentWrapper(),
const ArgumentWrapper &arg27 = ArgumentWrapper(),
const ArgumentWrapper &arg28 = ArgumentWrapper(),
const ArgumentWrapper &arg29 = ArgumentWrapper(),
const ArgumentWrapper &arg30 = ArgumentWrapper(),
const ArgumentWrapper &arg31 = ArgumentWrapper(),
const ArgumentWrapper &arg32 = ArgumentWrapper(),
const ArgumentWrapper &arg33 = ArgumentWrapper());
void release_kernel_safe(cl_kernel kernel);
void release_mem_object_safe(cl_mem mem);
@@ -626,35 +624,33 @@ protected:
/* ** Those guys are for workign around some compiler-specific bugs ** */
cl_program load_cached_kernel(
ustring key,
thread_scoped_lock& cache_locker);
cl_program load_cached_kernel(ustring key, thread_scoped_lock &cache_locker);
void store_cached_kernel(
cl_program program,
ustring key,
thread_scoped_lock& cache_locker);
void store_cached_kernel(cl_program program, ustring key, thread_scoped_lock &cache_locker);
private:
private:
MemoryManager memory_manager;
friend class MemoryManager;
static_assert_align(TextureInfo, 16);
device_vector<TextureInfo> texture_info;
typedef map<string, device_memory*> TexturesMap;
typedef map<string, device_memory *> TexturesMap;
TexturesMap textures;
bool textures_need_update;
protected:
protected:
void flush_texture_buffers();
friend class OpenCLSplitKernel;
friend class OpenCLSplitKernelFunction;
};
Device *opencl_create_split_device(DeviceInfo& info, Stats& stats, Profiler &profiler, bool background);
Device *opencl_create_split_device(DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool background);
CCL_NAMESPACE_END

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -26,18 +26,17 @@ CCL_NAMESPACE_BEGIN
/* Node Type */
Node::Node(const NodeType *type_, ustring name_)
: name(name_), type(type_)
Node::Node(const NodeType *type_, ustring name_) : name(name_), type(type_)
{
assert(type);
/* assign non-empty name, convenient for debugging */
if(name.empty()) {
if (name.empty()) {
name = type->name;
}
/* initialize default values */
foreach(const SocketType& socket, type->inputs) {
foreach (const SocketType &socket, type->inputs) {
set_default_value(socket);
}
}
@@ -46,80 +45,75 @@ Node::~Node()
{
}
template<typename T>
static T& get_socket_value(const Node *node, const SocketType& socket)
template<typename T> static T &get_socket_value(const Node *node, const SocketType &socket)
{
return (T&)*(((char*)node) + socket.struct_offset);
return (T &)*(((char *)node) + socket.struct_offset);
}
#ifndef NDEBUG
static bool is_socket_float3(const SocketType& socket)
static bool is_socket_float3(const SocketType &socket)
{
return socket.type == SocketType::COLOR ||
socket.type == SocketType::POINT ||
socket.type == SocketType::VECTOR ||
socket.type == SocketType::NORMAL;
return socket.type == SocketType::COLOR || socket.type == SocketType::POINT ||
socket.type == SocketType::VECTOR || socket.type == SocketType::NORMAL;
}
static bool is_socket_array_float3(const SocketType& socket)
static bool is_socket_array_float3(const SocketType &socket)
{
return socket.type == SocketType::COLOR_ARRAY ||
socket.type == SocketType::POINT_ARRAY ||
socket.type == SocketType::VECTOR_ARRAY ||
socket.type == SocketType::NORMAL_ARRAY;
return socket.type == SocketType::COLOR_ARRAY || socket.type == SocketType::POINT_ARRAY ||
socket.type == SocketType::VECTOR_ARRAY || socket.type == SocketType::NORMAL_ARRAY;
}
#endif
/* set values */
void Node::set(const SocketType& input, bool value)
void Node::set(const SocketType &input, bool value)
{
assert(input.type == SocketType::BOOLEAN);
get_socket_value<bool>(this, input) = value;
}
void Node::set(const SocketType& input, int value)
void Node::set(const SocketType &input, int value)
{
assert((input.type == SocketType::INT || input.type == SocketType::ENUM));
get_socket_value<int>(this, input) = value;
}
void Node::set(const SocketType& input, uint value)
void Node::set(const SocketType &input, uint value)
{
assert(input.type == SocketType::UINT);
get_socket_value<uint>(this, input) = value;
}
void Node::set(const SocketType& input, float value)
void Node::set(const SocketType &input, float value)
{
assert(input.type == SocketType::FLOAT);
get_socket_value<float>(this, input) = value;
}
void Node::set(const SocketType& input, float2 value)
void Node::set(const SocketType &input, float2 value)
{
assert(input.type == SocketType::FLOAT);
get_socket_value<float2>(this, input) = value;
}
void Node::set(const SocketType& input, float3 value)
void Node::set(const SocketType &input, float3 value)
{
assert(is_socket_float3(input));
get_socket_value<float3>(this, input) = value;
}
void Node::set(const SocketType& input, const char *value)
void Node::set(const SocketType &input, const char *value)
{
set(input, ustring(value));
}
void Node::set(const SocketType& input, ustring value)
void Node::set(const SocketType &input, ustring value)
{
if(input.type == SocketType::STRING) {
if (input.type == SocketType::STRING) {
get_socket_value<ustring>(this, input) = value;
}
else if(input.type == SocketType::ENUM) {
const NodeEnum& enm = *input.enum_values;
if(enm.exists(value)) {
else if (input.type == SocketType::ENUM) {
const NodeEnum &enm = *input.enum_values;
if (enm.exists(value)) {
get_socket_value<int>(this, input) = enm[value];
}
else {
@@ -131,111 +125,111 @@ void Node::set(const SocketType& input, ustring value)
}
}
void Node::set(const SocketType& input, const Transform& value)
void Node::set(const SocketType &input, const Transform &value)
{
assert(input.type == SocketType::TRANSFORM);
get_socket_value<Transform>(this, input) = value;
}
void Node::set(const SocketType& input, Node *value)
void Node::set(const SocketType &input, Node *value)
{
assert(input.type == SocketType::TRANSFORM);
get_socket_value<Node*>(this, input) = value;
get_socket_value<Node *>(this, input) = value;
}
/* set array values */
void Node::set(const SocketType& input, array<bool>& value)
void Node::set(const SocketType &input, array<bool> &value)
{
assert(input.type == SocketType::BOOLEAN_ARRAY);
get_socket_value<array<bool> >(this, input).steal_data(value);
get_socket_value<array<bool>>(this, input).steal_data(value);
}
void Node::set(const SocketType& input, array<int>& value)
void Node::set(const SocketType &input, array<int> &value)
{
assert(input.type == SocketType::INT_ARRAY);
get_socket_value<array<int> >(this, input).steal_data(value);
get_socket_value<array<int>>(this, input).steal_data(value);
}
void Node::set(const SocketType& input, array<float>& value)
void Node::set(const SocketType &input, array<float> &value)
{
assert(input.type == SocketType::FLOAT_ARRAY);
get_socket_value<array<float> >(this, input).steal_data(value);
get_socket_value<array<float>>(this, input).steal_data(value);
}
void Node::set(const SocketType& input, array<float2>& value)
void Node::set(const SocketType &input, array<float2> &value)
{
assert(input.type == SocketType::FLOAT_ARRAY);
get_socket_value<array<float2> >(this, input).steal_data(value);
get_socket_value<array<float2>>(this, input).steal_data(value);
}
void Node::set(const SocketType& input, array<float3>& value)
void Node::set(const SocketType &input, array<float3> &value)
{
assert(is_socket_array_float3(input));
get_socket_value<array<float3> >(this, input).steal_data(value);
get_socket_value<array<float3>>(this, input).steal_data(value);
}
void Node::set(const SocketType& input, array<ustring>& value)
void Node::set(const SocketType &input, array<ustring> &value)
{
assert(input.type == SocketType::STRING_ARRAY);
get_socket_value<array<ustring> >(this, input).steal_data(value);
get_socket_value<array<ustring>>(this, input).steal_data(value);
}
void Node::set(const SocketType& input, array<Transform>& value)
void Node::set(const SocketType &input, array<Transform> &value)
{
assert(input.type == SocketType::TRANSFORM_ARRAY);
get_socket_value<array<Transform> >(this, input).steal_data(value);
get_socket_value<array<Transform>>(this, input).steal_data(value);
}
void Node::set(const SocketType& input, array<Node*>& value)
void Node::set(const SocketType &input, array<Node *> &value)
{
assert(input.type == SocketType::TRANSFORM_ARRAY);
get_socket_value<array<Node*> >(this, input).steal_data(value);
get_socket_value<array<Node *>>(this, input).steal_data(value);
}
/* get values */
bool Node::get_bool(const SocketType& input) const
bool Node::get_bool(const SocketType &input) const
{
assert(input.type == SocketType::BOOLEAN);
return get_socket_value<bool>(this, input);
}
int Node::get_int(const SocketType& input) const
int Node::get_int(const SocketType &input) const
{
assert(input.type == SocketType::INT || input.type == SocketType::ENUM);
return get_socket_value<int>(this, input);
}
uint Node::get_uint(const SocketType& input) const
uint Node::get_uint(const SocketType &input) const
{
assert(input.type == SocketType::UINT);
return get_socket_value<uint>(this, input);
}
float Node::get_float(const SocketType& input) const
float Node::get_float(const SocketType &input) const
{
assert(input.type == SocketType::FLOAT);
return get_socket_value<float>(this, input);
}
float2 Node::get_float2(const SocketType& input) const
float2 Node::get_float2(const SocketType &input) const
{
assert(input.type == SocketType::FLOAT);
return get_socket_value<float2>(this, input);
}
float3 Node::get_float3(const SocketType& input) const
float3 Node::get_float3(const SocketType &input) const
{
assert(is_socket_float3(input));
return get_socket_value<float3>(this, input);
}
ustring Node::get_string(const SocketType& input) const
ustring Node::get_string(const SocketType &input) const
{
if(input.type == SocketType::STRING) {
if (input.type == SocketType::STRING) {
return get_socket_value<ustring>(this, input);
}
else if(input.type == SocketType::ENUM) {
const NodeEnum& enm = *input.enum_values;
else if (input.type == SocketType::ENUM) {
const NodeEnum &enm = *input.enum_values;
int intvalue = get_socket_value<int>(this, input);
return (enm.exists(intvalue)) ? enm[intvalue] : ustring();
}
@@ -245,165 +239,218 @@ ustring Node::get_string(const SocketType& input) const
}
}
Transform Node::get_transform(const SocketType& input) const
Transform Node::get_transform(const SocketType &input) const
{
assert(input.type == SocketType::TRANSFORM);
return get_socket_value<Transform>(this, input);
}
Node *Node::get_node(const SocketType& input) const
Node *Node::get_node(const SocketType &input) const
{
assert(input.type == SocketType::NODE);
return get_socket_value<Node*>(this, input);
return get_socket_value<Node *>(this, input);
}
/* get array values */
const array<bool>& Node::get_bool_array(const SocketType& input) const
const array<bool> &Node::get_bool_array(const SocketType &input) const
{
assert(input.type == SocketType::BOOLEAN_ARRAY);
return get_socket_value<array<bool> >(this, input);
return get_socket_value<array<bool>>(this, input);
}
const array<int>& Node::get_int_array(const SocketType& input) const
const array<int> &Node::get_int_array(const SocketType &input) const
{
assert(input.type == SocketType::INT_ARRAY);
return get_socket_value<array<int> >(this, input);
return get_socket_value<array<int>>(this, input);
}
const array<float>& Node::get_float_array(const SocketType& input) const
const array<float> &Node::get_float_array(const SocketType &input) const
{
assert(input.type == SocketType::FLOAT_ARRAY);
return get_socket_value<array<float> >(this, input);
return get_socket_value<array<float>>(this, input);
}
const array<float2>& Node::get_float2_array(const SocketType& input) const
const array<float2> &Node::get_float2_array(const SocketType &input) const
{
assert(input.type == SocketType::FLOAT_ARRAY);
return get_socket_value<array<float2> >(this, input);
return get_socket_value<array<float2>>(this, input);
}
const array<float3>& Node::get_float3_array(const SocketType& input) const
const array<float3> &Node::get_float3_array(const SocketType &input) const
{
assert(is_socket_array_float3(input));
return get_socket_value<array<float3> >(this, input);
return get_socket_value<array<float3>>(this, input);
}
const array<ustring>& Node::get_string_array(const SocketType& input) const
const array<ustring> &Node::get_string_array(const SocketType &input) const
{
assert(input.type == SocketType::STRING_ARRAY);
return get_socket_value<array<ustring> >(this, input);
return get_socket_value<array<ustring>>(this, input);
}
const array<Transform>& Node::get_transform_array(const SocketType& input) const
const array<Transform> &Node::get_transform_array(const SocketType &input) const
{
assert(input.type == SocketType::TRANSFORM_ARRAY);
return get_socket_value<array<Transform> >(this, input);
return get_socket_value<array<Transform>>(this, input);
}
const array<Node*>& Node::get_node_array(const SocketType& input) const
const array<Node *> &Node::get_node_array(const SocketType &input) const
{
assert(input.type == SocketType::NODE_ARRAY);
return get_socket_value<array<Node*> >(this, input);
return get_socket_value<array<Node *>>(this, input);
}
/* generic value operations */
bool Node::has_default_value(const SocketType& input) const
bool Node::has_default_value(const SocketType &input) const
{
const void *src = input.default_value;
void *dst = &get_socket_value<char>(this, input);
return memcmp(dst, src, input.size()) == 0;
}
void Node::set_default_value(const SocketType& socket)
void Node::set_default_value(const SocketType &socket)
{
const void *src = socket.default_value;
void *dst = ((char*)this) + socket.struct_offset;
void *dst = ((char *)this) + socket.struct_offset;
memcpy(dst, src, socket.size());
}
template<typename T>
static void copy_array(const Node *node, const SocketType& socket, const Node *other, const SocketType& other_socket)
static void copy_array(const Node *node,
const SocketType &socket,
const Node *other,
const SocketType &other_socket)
{
const array<T>* src = (const array<T>*)(((char*)other) + other_socket.struct_offset);
array<T>* dst = (array<T>*)(((char*)node) + socket.struct_offset);
const array<T> *src = (const array<T> *)(((char *)other) + other_socket.struct_offset);
array<T> *dst = (array<T> *)(((char *)node) + socket.struct_offset);
*dst = *src;
}
void Node::copy_value(const SocketType& socket, const Node& other, const SocketType& other_socket)
void Node::copy_value(const SocketType &socket, const Node &other, const SocketType &other_socket)
{
assert(socket.type == other_socket.type);
if(socket.is_array()) {
switch(socket.type) {
case SocketType::BOOLEAN_ARRAY: copy_array<bool>(this, socket, &other, other_socket); break;
case SocketType::FLOAT_ARRAY: copy_array<float>(this, socket, &other, other_socket); break;
case SocketType::INT_ARRAY: copy_array<int>(this, socket, &other, other_socket); break;
case SocketType::COLOR_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
case SocketType::VECTOR_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
case SocketType::POINT_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
case SocketType::NORMAL_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
case SocketType::POINT2_ARRAY: copy_array<float2>(this, socket, &other, other_socket); break;
case SocketType::STRING_ARRAY: copy_array<ustring>(this, socket, &other, other_socket); break;
case SocketType::TRANSFORM_ARRAY: copy_array<Transform>(this, socket, &other, other_socket); break;
case SocketType::NODE_ARRAY: copy_array<void*>(this, socket, &other, other_socket); break;
default: assert(0); break;
if (socket.is_array()) {
switch (socket.type) {
case SocketType::BOOLEAN_ARRAY:
copy_array<bool>(this, socket, &other, other_socket);
break;
case SocketType::FLOAT_ARRAY:
copy_array<float>(this, socket, &other, other_socket);
break;
case SocketType::INT_ARRAY:
copy_array<int>(this, socket, &other, other_socket);
break;
case SocketType::COLOR_ARRAY:
copy_array<float3>(this, socket, &other, other_socket);
break;
case SocketType::VECTOR_ARRAY:
copy_array<float3>(this, socket, &other, other_socket);
break;
case SocketType::POINT_ARRAY:
copy_array<float3>(this, socket, &other, other_socket);
break;
case SocketType::NORMAL_ARRAY:
copy_array<float3>(this, socket, &other, other_socket);
break;
case SocketType::POINT2_ARRAY:
copy_array<float2>(this, socket, &other, other_socket);
break;
case SocketType::STRING_ARRAY:
copy_array<ustring>(this, socket, &other, other_socket);
break;
case SocketType::TRANSFORM_ARRAY:
copy_array<Transform>(this, socket, &other, other_socket);
break;
case SocketType::NODE_ARRAY:
copy_array<void *>(this, socket, &other, other_socket);
break;
default:
assert(0);
break;
}
}
else {
const void *src = ((char*)&other) + other_socket.struct_offset;
void *dst = ((char*)this) + socket.struct_offset;
const void *src = ((char *)&other) + other_socket.struct_offset;
void *dst = ((char *)this) + socket.struct_offset;
memcpy(dst, src, socket.size());
}
}
template<typename T>
static bool is_array_equal(const Node *node, const Node *other, const SocketType& socket)
static bool is_array_equal(const Node *node, const Node *other, const SocketType &socket)
{
const array<T>* a = (const array<T>*)(((char*)node) + socket.struct_offset);
const array<T>* b = (const array<T>*)(((char*)other) + socket.struct_offset);
const array<T> *a = (const array<T> *)(((char *)node) + socket.struct_offset);
const array<T> *b = (const array<T> *)(((char *)other) + socket.struct_offset);
return *a == *b;
}
template<typename T>
static bool is_value_equal(const Node *node, const Node *other, const SocketType& socket)
static bool is_value_equal(const Node *node, const Node *other, const SocketType &socket)
{
const T *a = (const T*)(((char*)node) + socket.struct_offset);
const T *b = (const T*)(((char*)other) + socket.struct_offset);
const T *a = (const T *)(((char *)node) + socket.struct_offset);
const T *b = (const T *)(((char *)other) + socket.struct_offset);
return *a == *b;
}
bool Node::equals_value(const Node& other, const SocketType& socket) const
bool Node::equals_value(const Node &other, const SocketType &socket) const
{
switch(socket.type) {
case SocketType::BOOLEAN: return is_value_equal<bool>(this, &other, socket);
case SocketType::FLOAT: return is_value_equal<float>(this, &other, socket);
case SocketType::INT: return is_value_equal<int>(this, &other, socket);
case SocketType::UINT: return is_value_equal<uint>(this, &other, socket);
case SocketType::COLOR: return is_value_equal<float3>(this, &other, socket);
case SocketType::VECTOR: return is_value_equal<float3>(this, &other, socket);
case SocketType::POINT: return is_value_equal<float3>(this, &other, socket);
case SocketType::NORMAL: return is_value_equal<float3>(this, &other, socket);
case SocketType::POINT2: return is_value_equal<float2>(this, &other, socket);
case SocketType::CLOSURE: return true;
case SocketType::STRING: return is_value_equal<ustring>(this, &other, socket);
case SocketType::ENUM: return is_value_equal<int>(this, &other, socket);
case SocketType::TRANSFORM: return is_value_equal<Transform>(this, &other, socket);
case SocketType::NODE: return is_value_equal<void*>(this, &other, socket);
switch (socket.type) {
case SocketType::BOOLEAN:
return is_value_equal<bool>(this, &other, socket);
case SocketType::FLOAT:
return is_value_equal<float>(this, &other, socket);
case SocketType::INT:
return is_value_equal<int>(this, &other, socket);
case SocketType::UINT:
return is_value_equal<uint>(this, &other, socket);
case SocketType::COLOR:
return is_value_equal<float3>(this, &other, socket);
case SocketType::VECTOR:
return is_value_equal<float3>(this, &other, socket);
case SocketType::POINT:
return is_value_equal<float3>(this, &other, socket);
case SocketType::NORMAL:
return is_value_equal<float3>(this, &other, socket);
case SocketType::POINT2:
return is_value_equal<float2>(this, &other, socket);
case SocketType::CLOSURE:
return true;
case SocketType::STRING:
return is_value_equal<ustring>(this, &other, socket);
case SocketType::ENUM:
return is_value_equal<int>(this, &other, socket);
case SocketType::TRANSFORM:
return is_value_equal<Transform>(this, &other, socket);
case SocketType::NODE:
return is_value_equal<void *>(this, &other, socket);
case SocketType::BOOLEAN_ARRAY: return is_array_equal<bool>(this, &other, socket);
case SocketType::FLOAT_ARRAY: return is_array_equal<float>(this, &other, socket);
case SocketType::INT_ARRAY: return is_array_equal<int>(this, &other, socket);
case SocketType::COLOR_ARRAY: return is_array_equal<float3>(this, &other, socket);
case SocketType::VECTOR_ARRAY: return is_array_equal<float3>(this, &other, socket);
case SocketType::POINT_ARRAY: return is_array_equal<float3>(this, &other, socket);
case SocketType::NORMAL_ARRAY: return is_array_equal<float3>(this, &other, socket);
case SocketType::POINT2_ARRAY: return is_array_equal<float2>(this, &other, socket);
case SocketType::STRING_ARRAY: return is_array_equal<ustring>(this, &other, socket);
case SocketType::TRANSFORM_ARRAY: return is_array_equal<Transform>(this, &other, socket);
case SocketType::NODE_ARRAY: return is_array_equal<void*>(this, &other, socket);
case SocketType::BOOLEAN_ARRAY:
return is_array_equal<bool>(this, &other, socket);
case SocketType::FLOAT_ARRAY:
return is_array_equal<float>(this, &other, socket);
case SocketType::INT_ARRAY:
return is_array_equal<int>(this, &other, socket);
case SocketType::COLOR_ARRAY:
return is_array_equal<float3>(this, &other, socket);
case SocketType::VECTOR_ARRAY:
return is_array_equal<float3>(this, &other, socket);
case SocketType::POINT_ARRAY:
return is_array_equal<float3>(this, &other, socket);
case SocketType::NORMAL_ARRAY:
return is_array_equal<float3>(this, &other, socket);
case SocketType::POINT2_ARRAY:
return is_array_equal<float2>(this, &other, socket);
case SocketType::STRING_ARRAY:
return is_array_equal<ustring>(this, &other, socket);
case SocketType::TRANSFORM_ARRAY:
return is_array_equal<Transform>(this, &other, socket);
case SocketType::NODE_ARRAY:
return is_array_equal<void *>(this, &other, socket);
case SocketType::UNDEFINED: return true;
case SocketType::UNDEFINED:
return true;
}
return true;
@@ -411,12 +458,12 @@ bool Node::equals_value(const Node& other, const SocketType& socket) const
/* equals */
bool Node::equals(const Node& other) const
bool Node::equals(const Node &other) const
{
assert(type == other.type);
foreach(const SocketType& socket, type->inputs) {
if(!equals_value(other, socket))
foreach (const SocketType &socket, type->inputs) {
if (!equals_value(other, socket))
return false;
}
@@ -427,84 +474,131 @@ bool Node::equals(const Node& other) const
namespace {
template<typename T>
void value_hash(const Node *node, const SocketType& socket, MD5Hash& md5)
template<typename T> void value_hash(const Node *node, const SocketType &socket, MD5Hash &md5)
{
md5.append(((uint8_t*)node) + socket.struct_offset, socket.size());
md5.append(((uint8_t *)node) + socket.struct_offset, socket.size());
}
void float3_hash(const Node *node, const SocketType& socket, MD5Hash& md5)
void float3_hash(const Node *node, const SocketType &socket, MD5Hash &md5)
{
/* Don't compare 4th element used for padding. */
md5.append(((uint8_t*)node) + socket.struct_offset, sizeof(float) * 3);
md5.append(((uint8_t *)node) + socket.struct_offset, sizeof(float) * 3);
}
template<typename T>
void array_hash(const Node *node, const SocketType& socket, MD5Hash& md5)
template<typename T> void array_hash(const Node *node, const SocketType &socket, MD5Hash &md5)
{
const array<T>& a = *(const array<T>*)(((char*)node) + socket.struct_offset);
for(size_t i = 0; i < a.size(); i++) {
md5.append((uint8_t*)&a[i], sizeof(T));
const array<T> &a = *(const array<T> *)(((char *)node) + socket.struct_offset);
for (size_t i = 0; i < a.size(); i++) {
md5.append((uint8_t *)&a[i], sizeof(T));
}
}
void float3_array_hash(const Node *node, const SocketType& socket, MD5Hash& md5)
void float3_array_hash(const Node *node, const SocketType &socket, MD5Hash &md5)
{
/* Don't compare 4th element used for padding. */
const array<float3>& a = *(const array<float3>*)(((char*)node) + socket.struct_offset);
for(size_t i = 0; i < a.size(); i++) {
md5.append((uint8_t*)&a[i], sizeof(float) * 3);
const array<float3> &a = *(const array<float3> *)(((char *)node) + socket.struct_offset);
for (size_t i = 0; i < a.size(); i++) {
md5.append((uint8_t *)&a[i], sizeof(float) * 3);
}
}
} // namespace
void Node::hash(MD5Hash& md5)
void Node::hash(MD5Hash &md5)
{
md5.append(type->name.string());
foreach(const SocketType& socket, type->inputs) {
foreach (const SocketType &socket, type->inputs) {
md5.append(socket.name.string());
switch(socket.type) {
case SocketType::BOOLEAN: value_hash<bool>(this, socket, md5); break;
case SocketType::FLOAT: value_hash<float>(this, socket, md5); break;
case SocketType::INT: value_hash<int>(this, socket, md5); break;
case SocketType::UINT: value_hash<uint>(this, socket, md5); break;
case SocketType::COLOR: float3_hash(this, socket, md5); break;
case SocketType::VECTOR: float3_hash(this, socket, md5); break;
case SocketType::POINT: float3_hash(this, socket, md5); break;
case SocketType::NORMAL: float3_hash(this, socket, md5); break;
case SocketType::POINT2: value_hash<float2>(this, socket, md5); break;
case SocketType::CLOSURE: break;
case SocketType::STRING: value_hash<ustring>(this, socket, md5); break;
case SocketType::ENUM: value_hash<int>(this, socket, md5); break;
case SocketType::TRANSFORM: value_hash<Transform>(this, socket, md5); break;
case SocketType::NODE: value_hash<void*>(this, socket, md5); break;
switch (socket.type) {
case SocketType::BOOLEAN:
value_hash<bool>(this, socket, md5);
break;
case SocketType::FLOAT:
value_hash<float>(this, socket, md5);
break;
case SocketType::INT:
value_hash<int>(this, socket, md5);
break;
case SocketType::UINT:
value_hash<uint>(this, socket, md5);
break;
case SocketType::COLOR:
float3_hash(this, socket, md5);
break;
case SocketType::VECTOR:
float3_hash(this, socket, md5);
break;
case SocketType::POINT:
float3_hash(this, socket, md5);
break;
case SocketType::NORMAL:
float3_hash(this, socket, md5);
break;
case SocketType::POINT2:
value_hash<float2>(this, socket, md5);
break;
case SocketType::CLOSURE:
break;
case SocketType::STRING:
value_hash<ustring>(this, socket, md5);
break;
case SocketType::ENUM:
value_hash<int>(this, socket, md5);
break;
case SocketType::TRANSFORM:
value_hash<Transform>(this, socket, md5);
break;
case SocketType::NODE:
value_hash<void *>(this, socket, md5);
break;
case SocketType::BOOLEAN_ARRAY: array_hash<bool>(this, socket, md5); break;
case SocketType::FLOAT_ARRAY: array_hash<float>(this, socket, md5); break;
case SocketType::INT_ARRAY: array_hash<int>(this, socket, md5); break;
case SocketType::COLOR_ARRAY: float3_array_hash(this, socket, md5); break;
case SocketType::VECTOR_ARRAY: float3_array_hash(this, socket, md5); break;
case SocketType::POINT_ARRAY: float3_array_hash(this, socket, md5); break;
case SocketType::NORMAL_ARRAY: float3_array_hash(this, socket, md5); break;
case SocketType::POINT2_ARRAY: array_hash<float2>(this, socket, md5); break;
case SocketType::STRING_ARRAY: array_hash<ustring>(this, socket, md5); break;
case SocketType::TRANSFORM_ARRAY: array_hash<Transform>(this, socket, md5); break;
case SocketType::NODE_ARRAY: array_hash<void*>(this, socket, md5); break;
case SocketType::BOOLEAN_ARRAY:
array_hash<bool>(this, socket, md5);
break;
case SocketType::FLOAT_ARRAY:
array_hash<float>(this, socket, md5);
break;
case SocketType::INT_ARRAY:
array_hash<int>(this, socket, md5);
break;
case SocketType::COLOR_ARRAY:
float3_array_hash(this, socket, md5);
break;
case SocketType::VECTOR_ARRAY:
float3_array_hash(this, socket, md5);
break;
case SocketType::POINT_ARRAY:
float3_array_hash(this, socket, md5);
break;
case SocketType::NORMAL_ARRAY:
float3_array_hash(this, socket, md5);
break;
case SocketType::POINT2_ARRAY:
array_hash<float2>(this, socket, md5);
break;
case SocketType::STRING_ARRAY:
array_hash<ustring>(this, socket, md5);
break;
case SocketType::TRANSFORM_ARRAY:
array_hash<Transform>(this, socket, md5);
break;
case SocketType::NODE_ARRAY:
array_hash<void *>(this, socket, md5);
break;
case SocketType::UNDEFINED: break;
case SocketType::UNDEFINED:
break;
}
}
}
namespace {
template<typename T>
size_t array_size_in_bytes(const Node *node, const SocketType& socket)
template<typename T> size_t array_size_in_bytes(const Node *node, const SocketType &socket)
{
const array<T>& a = *(const array<T>*)(((char*)node) + socket.struct_offset);
const array<T> &a = *(const array<T> *)(((char *)node) + socket.struct_offset);
return a.size() * sizeof(T);
}
@@ -513,8 +607,8 @@ size_t array_size_in_bytes(const Node *node, const SocketType& socket)
size_t Node::get_total_size_in_bytes() const
{
size_t total_size = 0;
foreach(const SocketType& socket, type->inputs) {
switch(socket.type) {
foreach (const SocketType &socket, type->inputs) {
switch (socket.type) {
case SocketType::BOOLEAN:
case SocketType::FLOAT:
case SocketType::INT:
@@ -563,10 +657,11 @@ size_t Node::get_total_size_in_bytes() const
total_size += array_size_in_bytes<Transform>(this, socket);
break;
case SocketType::NODE_ARRAY:
total_size += array_size_in_bytes<void*>(this, socket);
total_size += array_size_in_bytes<void *>(this, socket);
break;
case SocketType::UNDEFINED: break;
case SocketType::UNDEFINED:
break;
}
}
return total_size;

View File

@@ -31,66 +31,65 @@ struct Transform;
/* Node */
struct Node
{
struct Node {
explicit Node(const NodeType *type, ustring name = ustring());
virtual ~Node();
/* set values */
void set(const SocketType& input, bool value);
void set(const SocketType& input, int value);
void set(const SocketType& input, uint value);
void set(const SocketType& input, float value);
void set(const SocketType& input, float2 value);
void set(const SocketType& input, float3 value);
void set(const SocketType& input, const char *value);
void set(const SocketType& input, ustring value);
void set(const SocketType& input, const Transform& value);
void set(const SocketType& input, Node *value);
void set(const SocketType &input, bool value);
void set(const SocketType &input, int value);
void set(const SocketType &input, uint value);
void set(const SocketType &input, float value);
void set(const SocketType &input, float2 value);
void set(const SocketType &input, float3 value);
void set(const SocketType &input, const char *value);
void set(const SocketType &input, ustring value);
void set(const SocketType &input, const Transform &value);
void set(const SocketType &input, Node *value);
/* set array values. the memory from the input array will taken over
* by the node and the input array will be empty after return */
void set(const SocketType& input, array<bool>& value);
void set(const SocketType& input, array<int>& value);
void set(const SocketType& input, array<float>& value);
void set(const SocketType& input, array<float2>& value);
void set(const SocketType& input, array<float3>& value);
void set(const SocketType& input, array<ustring>& value);
void set(const SocketType& input, array<Transform>& value);
void set(const SocketType& input, array<Node*>& value);
void set(const SocketType &input, array<bool> &value);
void set(const SocketType &input, array<int> &value);
void set(const SocketType &input, array<float> &value);
void set(const SocketType &input, array<float2> &value);
void set(const SocketType &input, array<float3> &value);
void set(const SocketType &input, array<ustring> &value);
void set(const SocketType &input, array<Transform> &value);
void set(const SocketType &input, array<Node *> &value);
/* get values */
bool get_bool(const SocketType& input) const;
int get_int(const SocketType& input) const;
uint get_uint(const SocketType& input) const;
float get_float(const SocketType& input) const;
float2 get_float2(const SocketType& input) const;
float3 get_float3(const SocketType& input) const;
ustring get_string(const SocketType& input) const;
Transform get_transform(const SocketType& input) const;
Node *get_node(const SocketType& input) const;
bool get_bool(const SocketType &input) const;
int get_int(const SocketType &input) const;
uint get_uint(const SocketType &input) const;
float get_float(const SocketType &input) const;
float2 get_float2(const SocketType &input) const;
float3 get_float3(const SocketType &input) const;
ustring get_string(const SocketType &input) const;
Transform get_transform(const SocketType &input) const;
Node *get_node(const SocketType &input) const;
/* get array values */
const array<bool>& get_bool_array(const SocketType& input) const;
const array<int>& get_int_array(const SocketType& input) const;
const array<float>& get_float_array(const SocketType& input) const;
const array<float2>& get_float2_array(const SocketType& input) const;
const array<float3>& get_float3_array(const SocketType& input) const;
const array<ustring>& get_string_array(const SocketType& input) const;
const array<Transform>& get_transform_array(const SocketType& input) const;
const array<Node*>& get_node_array(const SocketType& input) const;
const array<bool> &get_bool_array(const SocketType &input) const;
const array<int> &get_int_array(const SocketType &input) const;
const array<float> &get_float_array(const SocketType &input) const;
const array<float2> &get_float2_array(const SocketType &input) const;
const array<float3> &get_float3_array(const SocketType &input) const;
const array<ustring> &get_string_array(const SocketType &input) const;
const array<Transform> &get_transform_array(const SocketType &input) const;
const array<Node *> &get_node_array(const SocketType &input) const;
/* generic values operations */
bool has_default_value(const SocketType& input) const;
void set_default_value(const SocketType& input);
bool equals_value(const Node& other, const SocketType& input) const;
void copy_value(const SocketType& input, const Node& other, const SocketType& other_input);
bool has_default_value(const SocketType &input) const;
void set_default_value(const SocketType &input);
bool equals_value(const Node &other, const SocketType &input) const;
void copy_value(const SocketType &input, const Node &other, const SocketType &other_input);
/* equals */
bool equals(const Node& other) const;
bool equals(const Node &other) const;
/* compute hash of node and its socket values */
void hash(MD5Hash& md5);
void hash(MD5Hash &md5);
/* Get total size of this node. */
size_t get_total_size_in_bytes() const;

View File

@@ -26,23 +26,48 @@ CCL_NAMESPACE_BEGIN
* Utility class for enum values. */
struct NodeEnum {
bool empty() const { return left.empty(); }
void insert(const char *x, int y) {
bool empty() const
{
return left.empty();
}
void insert(const char *x, int y)
{
left[ustring(x)] = y;
right[y] = ustring(x);
}
bool exists(ustring x) const { return left.find(x) != left.end(); }
bool exists(int y) const { return right.find(y) != right.end(); }
bool exists(ustring x) const
{
return left.find(x) != left.end();
}
bool exists(int y) const
{
return right.find(y) != right.end();
}
int operator[](const char *x) const { return left.find(ustring(x))->second; }
int operator[](ustring x) const { return left.find(x)->second; }
ustring operator[](int y) const { return right.find(y)->second; }
int operator[](const char *x) const
{
return left.find(ustring(x))->second;
}
int operator[](ustring x) const
{
return left.find(x)->second;
}
ustring operator[](int y) const
{
return right.find(y)->second;
}
unordered_map<ustring, int, ustringHash>::const_iterator begin() const { return left.begin(); }
unordered_map<ustring, int, ustringHash>::const_iterator end() const { return left.end(); }
unordered_map<ustring, int, ustringHash>::const_iterator begin() const
{
return left.begin();
}
unordered_map<ustring, int, ustringHash>::const_iterator end() const
{
return left.end();
}
private:
private:
unordered_map<ustring, int, ustringHash> left;
unordered_map<int, ustring> right;
};

View File

@@ -34,36 +34,61 @@ bool SocketType::is_array() const
size_t SocketType::size(Type type)
{
switch(type)
{
case UNDEFINED: return 0;
switch (type) {
case UNDEFINED:
return 0;
case BOOLEAN: return sizeof(bool);
case FLOAT: return sizeof(float);
case INT: return sizeof(int);
case UINT: return sizeof(uint);
case COLOR: return sizeof(float3);
case VECTOR: return sizeof(float3);
case POINT: return sizeof(float3);
case NORMAL: return sizeof(float3);
case POINT2: return sizeof(float2);
case CLOSURE: return 0;
case STRING: return sizeof(ustring);
case ENUM: return sizeof(int);
case TRANSFORM: return sizeof(Transform);
case NODE: return sizeof(void*);
case BOOLEAN:
return sizeof(bool);
case FLOAT:
return sizeof(float);
case INT:
return sizeof(int);
case UINT:
return sizeof(uint);
case COLOR:
return sizeof(float3);
case VECTOR:
return sizeof(float3);
case POINT:
return sizeof(float3);
case NORMAL:
return sizeof(float3);
case POINT2:
return sizeof(float2);
case CLOSURE:
return 0;
case STRING:
return sizeof(ustring);
case ENUM:
return sizeof(int);
case TRANSFORM:
return sizeof(Transform);
case NODE:
return sizeof(void *);
case BOOLEAN_ARRAY: return sizeof(array<bool>);
case FLOAT_ARRAY: return sizeof(array<float>);
case INT_ARRAY: return sizeof(array<int>);
case COLOR_ARRAY: return sizeof(array<float3>);
case VECTOR_ARRAY: return sizeof(array<float3>);
case POINT_ARRAY: return sizeof(array<float3>);
case NORMAL_ARRAY: return sizeof(array<float3>);
case POINT2_ARRAY: return sizeof(array<float2>);
case STRING_ARRAY: return sizeof(array<ustring>);
case TRANSFORM_ARRAY: return sizeof(array<Transform>);
case NODE_ARRAY: return sizeof(array<void*>);
case BOOLEAN_ARRAY:
return sizeof(array<bool>);
case FLOAT_ARRAY:
return sizeof(array<float>);
case INT_ARRAY:
return sizeof(array<int>);
case COLOR_ARRAY:
return sizeof(array<float3>);
case VECTOR_ARRAY:
return sizeof(array<float3>);
case POINT_ARRAY:
return sizeof(array<float3>);
case NORMAL_ARRAY:
return sizeof(array<float3>);
case POINT2_ARRAY:
return sizeof(array<float2>);
case STRING_ARRAY:
return sizeof(array<ustring>);
case TRANSFORM_ARRAY:
return sizeof(array<Transform>);
case NODE_ARRAY:
return sizeof(array<void *>);
}
assert(0);
@@ -83,34 +108,21 @@ void *SocketType::zero_default_value()
ustring SocketType::type_name(Type type)
{
static ustring names[] = {
ustring("undefined"),
static ustring names[] = {ustring("undefined"),
ustring("boolean"),
ustring("float"),
ustring("int"),
ustring("uint"),
ustring("color"),
ustring("vector"),
ustring("point"),
ustring("normal"),
ustring("point2"),
ustring("closure"),
ustring("string"),
ustring("enum"),
ustring("transform"),
ustring("node"),
ustring("boolean"), ustring("float"),
ustring("int"), ustring("uint"),
ustring("color"), ustring("vector"),
ustring("point"), ustring("normal"),
ustring("point2"), ustring("closure"),
ustring("string"), ustring("enum"),
ustring("transform"), ustring("node"),
ustring("array_boolean"),
ustring("array_float"),
ustring("array_int"),
ustring("array_color"),
ustring("array_vector"),
ustring("array_point"),
ustring("array_normal"),
ustring("array_point2"),
ustring("array_string"),
ustring("array_transform"),
ustring("array_boolean"), ustring("array_float"),
ustring("array_int"), ustring("array_color"),
ustring("array_vector"), ustring("array_point"),
ustring("array_normal"), ustring("array_point2"),
ustring("array_string"), ustring("array_transform"),
ustring("array_node")};
return names[(int)type];
@@ -123,8 +135,7 @@ bool SocketType::is_float3(Type type)
/* Node Type */
NodeType::NodeType(Type type_)
: type(type_)
NodeType::NodeType(Type type_) : type(type_)
{
}
@@ -132,9 +143,15 @@ NodeType::~NodeType()
{
}
void NodeType::register_input(ustring name, ustring ui_name, SocketType::Type type, int struct_offset,
const void *default_value, const NodeEnum *enum_values,
const NodeType **node_type, int flags, int extra_flags)
void NodeType::register_input(ustring name,
ustring ui_name,
SocketType::Type type,
int struct_offset,
const void *default_value,
const NodeEnum *enum_values,
const NodeType **node_type,
int flags,
int extra_flags)
{
SocketType socket;
socket.name = name;
@@ -164,8 +181,8 @@ void NodeType::register_output(ustring name, ustring ui_name, SocketType::Type t
const SocketType *NodeType::find_input(ustring name) const
{
foreach(const SocketType& socket, inputs) {
if(socket.name == name) {
foreach (const SocketType &socket, inputs) {
if (socket.name == name) {
return &socket;
}
}
@@ -175,8 +192,8 @@ const SocketType *NodeType::find_input(ustring name) const
const SocketType *NodeType::find_output(ustring name) const
{
foreach(const SocketType& socket, outputs) {
if(socket.name == name) {
foreach (const SocketType &socket, outputs) {
if (socket.name == name) {
return &socket;
}
}
@@ -186,7 +203,7 @@ const SocketType *NodeType::find_output(ustring name) const
/* Node Type Registry */
unordered_map<ustring, NodeType, ustringHash>& NodeType::types()
unordered_map<ustring, NodeType, ustringHash> &NodeType::types()
{
static unordered_map<ustring, NodeType, ustringHash> _types;
return _types;
@@ -196,7 +213,7 @@ NodeType *NodeType::add(const char *name_, CreateFunc create_, Type type_)
{
ustring name(name_);
if(types().find(name) != types().end()) {
if (types().find(name) != types().end()) {
fprintf(stderr, "Node type %s registered twice!\n", name_);
assert(0);
return NULL;

View File

@@ -30,10 +30,8 @@ struct NodeType;
/* Socket Type */
struct SocketType
{
enum Type
{
struct SocketType {
enum Type {
UNDEFINED,
BOOLEAN,
@@ -102,21 +100,21 @@ struct SocketType
/* Node Type */
struct NodeType
{
enum Type {
NONE,
SHADER
};
struct NodeType {
enum Type { NONE, SHADER };
explicit NodeType(Type type = NONE);
~NodeType();
void register_input(ustring name, ustring ui_name, SocketType::Type type,
int struct_offset, const void *default_value,
void register_input(ustring name,
ustring ui_name,
SocketType::Type type,
int struct_offset,
const void *default_value,
const NodeEnum *enum_values = NULL,
const NodeType **node_type = NULL,
int flags = 0, int extra_flags = 0);
int flags = 0,
int extra_flags = 0);
void register_output(ustring name, ustring ui_name, SocketType::Type type);
const SocketType *find_input(ustring name) const;
@@ -126,28 +124,29 @@ struct NodeType
ustring name;
Type type;
vector<SocketType, std::allocator<SocketType> > inputs;
vector<SocketType, std::allocator<SocketType> > outputs;
vector<SocketType, std::allocator<SocketType>> inputs;
vector<SocketType, std::allocator<SocketType>> outputs;
CreateFunc create;
static NodeType *add(const char *name, CreateFunc create, Type type = NONE);
static const NodeType *find(ustring name);
static unordered_map<ustring, NodeType, ustringHash>& types();
static unordered_map<ustring, NodeType, ustringHash> &types();
};
/* Node Definition Macros */
#define NODE_DECLARE \
template<typename T> \
static const NodeType *register_type(); \
static Node *create(const NodeType *type); \
static const NodeType *node_type;
template<typename T> static const NodeType *register_type(); \
static Node *create(const NodeType *type); \
static const NodeType *node_type;
#define NODE_DEFINE(structname) \
const NodeType *structname::node_type = structname::register_type<structname>(); \
Node *structname::create(const NodeType*) { return new structname(); } \
template<typename T> \
const NodeType *structname::register_type()
const NodeType *structname::node_type = structname::register_type<structname>(); \
Node *structname::create(const NodeType *) \
{ \
return new structname(); \
} \
template<typename T> const NodeType *structname::register_type()
/* Sock Definition Macros */
@@ -157,7 +156,15 @@ const NodeType *structname::register_type()
{ \
static datatype defval = default_value; \
CHECK_TYPE(((T *)1)->name, datatype); \
type->register_input(ustring(#name), ustring(ui_name), TYPE, SOCKET_OFFSETOF(T, name), &defval, NULL, NULL, flags, ##__VA_ARGS__); \
type->register_input(ustring(#name), \
ustring(ui_name), \
TYPE, \
SOCKET_OFFSETOF(T, name), \
&defval, \
NULL, \
NULL, \
flags, \
##__VA_ARGS__); \
}
#define SOCKET_BOOLEAN(name, ui_name, default_value, ...) \
@@ -186,80 +193,186 @@ const NodeType *structname::register_type()
{ \
static int defval = default_value; \
assert(SOCKET_SIZEOF(T, name) == sizeof(int)); \
type->register_input(ustring(#name), ustring(ui_name), SocketType::ENUM, SOCKET_OFFSETOF(T, name), &defval, &values, NULL, ##__VA_ARGS__); \
type->register_input(ustring(#name), \
ustring(ui_name), \
SocketType::ENUM, \
SOCKET_OFFSETOF(T, name), \
&defval, \
&values, \
NULL, \
##__VA_ARGS__); \
}
#define SOCKET_NODE(name, ui_name, node_type, ...) \
{ \
static Node *defval = NULL; \
assert(SOCKET_SIZEOF(T, name) == sizeof(Node*)); \
type->register_input(ustring(#name), ustring(ui_name), SocketType::NODE, SOCKET_OFFSETOF(T, name), &defval, NULL, node_type, ##__VA_ARGS__); \
assert(SOCKET_SIZEOF(T, name) == sizeof(Node *)); \
type->register_input(ustring(#name), \
ustring(ui_name), \
SocketType::NODE, \
SOCKET_OFFSETOF(T, name), \
&defval, \
NULL, \
node_type, \
##__VA_ARGS__); \
}
#define SOCKET_BOOLEAN_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<bool>, SocketType::BOOLEAN_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<bool>, SocketType::BOOLEAN_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_INT_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<int>, SocketType::INT_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_FLOAT_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float>, SocketType::FLOAT_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float>, SocketType::FLOAT_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_COLOR_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::COLOR_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::COLOR_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_VECTOR_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::VECTOR_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::VECTOR_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_POINT_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::POINT_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::POINT_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_NORMAL_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::NORMAL_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::NORMAL_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_POINT2_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float2>, SocketType::POINT2_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float2>, SocketType::POINT2_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_STRING_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<ustring>, SocketType::STRING_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<ustring>, SocketType::STRING_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_TRANSFORM_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<Transform>, SocketType::TRANSFORM_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
array<Transform>, \
SocketType::TRANSFORM_ARRAY, \
0, \
##__VA_ARGS__)
#define SOCKET_NODE_ARRAY(name, ui_name, node_type, ...) \
{ \
static Node *defval = NULL; \
assert(SOCKET_SIZEOF(T, name) == sizeof(Node*)); \
type->register_input(ustring(#name), ustring(ui_name), SocketType::NODE_ARRAY, SOCKET_OFFSETOF(T, name), &defval, NULL, node_type, ##__VA_ARGS__); \
assert(SOCKET_SIZEOF(T, name) == sizeof(Node *)); \
type->register_input(ustring(#name), \
ustring(ui_name), \
SocketType::NODE_ARRAY, \
SOCKET_OFFSETOF(T, name), \
&defval, \
NULL, \
node_type, \
##__VA_ARGS__); \
}
#define SOCKET_IN_BOOLEAN(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, bool, SocketType::BOOLEAN, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
bool, \
SocketType::BOOLEAN, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_INT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, int, SocketType::INT, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, int, SocketType::INT, SocketType::LINKABLE, ##__VA_ARGS__)
#define SOCKET_IN_FLOAT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float, SocketType::FLOAT, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float, \
SocketType::FLOAT, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_COLOR(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::COLOR, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::COLOR, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_VECTOR(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::VECTOR, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::VECTOR, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_POINT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::POINT, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::POINT, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_NORMAL(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::NORMAL, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::NORMAL, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_STRING(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, ustring, SocketType::STRING, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
ustring, \
SocketType::STRING, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_CLOSURE(name, ui_name, ...) \
type->register_input(ustring(#name), ustring(ui_name), SocketType::CLOSURE, 0, NULL, NULL, NULL, SocketType::LINKABLE, ##__VA_ARGS__)
type->register_input(ustring(#name), \
ustring(ui_name), \
SocketType::CLOSURE, \
0, \
NULL, \
NULL, \
NULL, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_OUT_BOOLEAN(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::BOOLEAN); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::BOOLEAN); \
}
#define SOCKET_OUT_INT(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::INT); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::INT); \
}
#define SOCKET_OUT_FLOAT(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::FLOAT); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::FLOAT); \
}
#define SOCKET_OUT_COLOR(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::COLOR); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::COLOR); \
}
#define SOCKET_OUT_VECTOR(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::VECTOR); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::VECTOR); \
}
#define SOCKET_OUT_POINT(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::POINT); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::POINT); \
}
#define SOCKET_OUT_NORMAL(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::NORMAL); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::NORMAL); \
}
#define SOCKET_OUT_CLOSURE(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::CLOSURE); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::CLOSURE); \
}
#define SOCKET_OUT_STRING(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::STRING); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::STRING); \
}
#define SOCKET_OUT_ENUM(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::ENUM); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::ENUM); \
}
CCL_NAMESPACE_END

View File

@@ -33,94 +33,86 @@ static const char *xml_write_boolean(bool value)
}
template<int VECTOR_SIZE, typename T>
static void xml_read_float_array(T& value, xml_attribute attr)
static void xml_read_float_array(T &value, xml_attribute attr)
{
vector<string> tokens;
string_split(tokens, attr.value());
if(tokens.size() % VECTOR_SIZE != 0) {
if (tokens.size() % VECTOR_SIZE != 0) {
return;
}
value.resize(tokens.size() / VECTOR_SIZE);
for(size_t i = 0; i < value.size(); i++) {
float *value_float = (float*)&value[i];
for (size_t i = 0; i < value.size(); i++) {
float *value_float = (float *)&value[i];
for(size_t j = 0; j < VECTOR_SIZE; j++)
for (size_t j = 0; j < VECTOR_SIZE; j++)
value_float[j] = (float)atof(tokens[i * VECTOR_SIZE + j].c_str());
}
}
void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
void xml_read_node(XMLReader &reader, Node *node, xml_node xml_node)
{
xml_attribute name_attr = xml_node.attribute("name");
if(name_attr) {
if (name_attr) {
node->name = ustring(name_attr.value());
}
foreach(const SocketType& socket, node->type->inputs) {
if(socket.type == SocketType::CLOSURE || socket.type == SocketType::UNDEFINED) {
foreach (const SocketType &socket, node->type->inputs) {
if (socket.type == SocketType::CLOSURE || socket.type == SocketType::UNDEFINED) {
continue;
}
if(socket.flags & SocketType::INTERNAL) {
if (socket.flags & SocketType::INTERNAL) {
continue;
}
xml_attribute attr = xml_node.attribute(socket.name.c_str());
if(!attr) {
if (!attr) {
continue;
}
switch(socket.type)
{
case SocketType::BOOLEAN:
{
switch (socket.type) {
case SocketType::BOOLEAN: {
node->set(socket, xml_read_boolean(attr.value()));
break;
}
case SocketType::BOOLEAN_ARRAY:
{
case SocketType::BOOLEAN_ARRAY: {
vector<string> tokens;
string_split(tokens, attr.value());
array<bool> value;
value.resize(tokens.size());
for(size_t i = 0; i < value.size(); i++)
for (size_t i = 0; i < value.size(); i++)
value[i] = xml_read_boolean(tokens[i].c_str());
node->set(socket, value);
break;
}
case SocketType::FLOAT:
{
case SocketType::FLOAT: {
node->set(socket, (float)atof(attr.value()));
break;
}
case SocketType::FLOAT_ARRAY:
{
case SocketType::FLOAT_ARRAY: {
array<float> value;
xml_read_float_array<1>(value, attr);
node->set(socket, value);
break;
}
case SocketType::INT:
{
case SocketType::INT: {
node->set(socket, (int)atoi(attr.value()));
break;
}
case SocketType::UINT:
{
case SocketType::UINT: {
node->set(socket, (uint)atoi(attr.value()));
break;
}
case SocketType::INT_ARRAY:
{
case SocketType::INT_ARRAY: {
vector<string> tokens;
string_split(tokens, attr.value());
array<int> value;
value.resize(tokens.size());
for(size_t i = 0; i < value.size(); i++) {
for (size_t i = 0; i < value.size(); i++) {
value[i] = (int)atoi(attr.value());
}
node->set(socket, value);
@@ -129,11 +121,10 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
case SocketType::COLOR:
case SocketType::VECTOR:
case SocketType::POINT:
case SocketType::NORMAL:
{
case SocketType::NORMAL: {
array<float3> value;
xml_read_float_array<3>(value, attr);
if(value.size() == 1) {
if (value.size() == 1) {
node->set(socket, value[0]);
}
break;
@@ -141,103 +132,92 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
case SocketType::COLOR_ARRAY:
case SocketType::VECTOR_ARRAY:
case SocketType::POINT_ARRAY:
case SocketType::NORMAL_ARRAY:
{
case SocketType::NORMAL_ARRAY: {
array<float3> value;
xml_read_float_array<3>(value, attr);
node->set(socket, value);
break;
}
case SocketType::POINT2:
{
case SocketType::POINT2: {
array<float2> value;
xml_read_float_array<2>(value, attr);
if(value.size() == 1) {
if (value.size() == 1) {
node->set(socket, value[0]);
}
break;
}
case SocketType::POINT2_ARRAY:
{
case SocketType::POINT2_ARRAY: {
array<float2> value;
xml_read_float_array<2>(value, attr);
node->set(socket, value);
break;
}
case SocketType::STRING:
{
case SocketType::STRING: {
node->set(socket, attr.value());
break;
}
case SocketType::ENUM:
{
case SocketType::ENUM: {
ustring value(attr.value());
if(socket.enum_values->exists(value)) {
if (socket.enum_values->exists(value)) {
node->set(socket, value);
}
else {
fprintf(stderr, "Unknown value \"%s\" for attribute \"%s\".\n", value.c_str(), socket.name.c_str());
fprintf(stderr,
"Unknown value \"%s\" for attribute \"%s\".\n",
value.c_str(),
socket.name.c_str());
}
break;
}
case SocketType::STRING_ARRAY:
{
case SocketType::STRING_ARRAY: {
vector<string> tokens;
string_split(tokens, attr.value());
array<ustring> value;
value.resize(tokens.size());
for(size_t i = 0; i < value.size(); i++) {
for (size_t i = 0; i < value.size(); i++) {
value[i] = ustring(tokens[i]);
}
node->set(socket, value);
break;
}
case SocketType::TRANSFORM:
{
case SocketType::TRANSFORM: {
array<Transform> value;
xml_read_float_array<12>(value, attr);
if(value.size() == 1) {
if (value.size() == 1) {
node->set(socket, value[0]);
}
break;
}
case SocketType::TRANSFORM_ARRAY:
{
case SocketType::TRANSFORM_ARRAY: {
array<Transform> value;
xml_read_float_array<12>(value, attr);
node->set(socket, value);
break;
}
case SocketType::NODE:
{
case SocketType::NODE: {
ustring value(attr.value());
map<ustring, Node*>::iterator it = reader.node_map.find(value);
if(it != reader.node_map.end())
{
map<ustring, Node *>::iterator it = reader.node_map.find(value);
if (it != reader.node_map.end()) {
Node *value_node = it->second;
if(value_node->type == *(socket.node_type))
if (value_node->type == *(socket.node_type))
node->set(socket, it->second);
}
break;
}
case SocketType::NODE_ARRAY:
{
case SocketType::NODE_ARRAY: {
vector<string> tokens;
string_split(tokens, attr.value());
array<Node*> value;
array<Node *> value;
value.resize(tokens.size());
for(size_t i = 0; i < value.size(); i++)
{
map<ustring, Node*>::iterator it = reader.node_map.find(ustring(tokens[i]));
if(it != reader.node_map.end())
{
for (size_t i = 0; i < value.size(); i++) {
map<ustring, Node *>::iterator it = reader.node_map.find(ustring(tokens[i]));
if (it != reader.node_map.end()) {
Node *value_node = it->second;
value[i] = (value_node->type == *(socket.node_type)) ? value_node : NULL;
}
else
{
else {
value[i] = NULL;
}
}
@@ -250,7 +230,7 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
}
}
if(!node->name.empty())
if (!node->name.empty())
reader.node_map[node->name] = node;
}
@@ -260,73 +240,65 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
xml_node.append_attribute("name") = node->name.c_str();
foreach(const SocketType& socket, node->type->inputs) {
if(socket.type == SocketType::CLOSURE || socket.type == SocketType::UNDEFINED) {
foreach (const SocketType &socket, node->type->inputs) {
if (socket.type == SocketType::CLOSURE || socket.type == SocketType::UNDEFINED) {
continue;
}
if(socket.flags & SocketType::INTERNAL) {
if (socket.flags & SocketType::INTERNAL) {
continue;
}
if(node->has_default_value(socket)) {
if (node->has_default_value(socket)) {
continue;
}
xml_attribute attr = xml_node.append_attribute(socket.name.c_str());
switch(socket.type)
{
case SocketType::BOOLEAN:
{
switch (socket.type) {
case SocketType::BOOLEAN: {
attr = xml_write_boolean(node->get_bool(socket));
break;
}
case SocketType::BOOLEAN_ARRAY:
{
case SocketType::BOOLEAN_ARRAY: {
std::stringstream ss;
const array<bool>& value = node->get_bool_array(socket);
for(size_t i = 0; i < value.size(); i++) {
const array<bool> &value = node->get_bool_array(socket);
for (size_t i = 0; i < value.size(); i++) {
ss << xml_write_boolean(value[i]);
if(i != value.size() - 1)
if (i != value.size() - 1)
ss << " ";
}
attr = ss.str().c_str();
break;
}
case SocketType::FLOAT:
{
case SocketType::FLOAT: {
attr = (double)node->get_float(socket);
break;
}
case SocketType::FLOAT_ARRAY:
{
case SocketType::FLOAT_ARRAY: {
std::stringstream ss;
const array<float>& value = node->get_float_array(socket);
for(size_t i = 0; i < value.size(); i++) {
const array<float> &value = node->get_float_array(socket);
for (size_t i = 0; i < value.size(); i++) {
ss << value[i];
if(i != value.size() - 1) {
if (i != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::INT:
{
case SocketType::INT: {
attr = node->get_int(socket);
break;
}
case SocketType::UINT:
{
case SocketType::UINT: {
attr = node->get_uint(socket);
break;
}
case SocketType::INT_ARRAY:
{
case SocketType::INT_ARRAY: {
std::stringstream ss;
const array<int>& value = node->get_int_array(socket);
for(size_t i = 0; i < value.size(); i++) {
const array<int> &value = node->get_int_array(socket);
for (size_t i = 0; i < value.size(); i++) {
ss << value[i];
if(i != value.size() - 1) {
if (i != value.size() - 1) {
ss << " ";
}
}
@@ -336,41 +308,39 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
case SocketType::COLOR:
case SocketType::VECTOR:
case SocketType::POINT:
case SocketType::NORMAL:
{
case SocketType::NORMAL: {
float3 value = node->get_float3(socket);
attr = string_printf("%g %g %g", (double)value.x, (double)value.y, (double)value.z).c_str();
attr =
string_printf("%g %g %g", (double)value.x, (double)value.y, (double)value.z).c_str();
break;
}
case SocketType::COLOR_ARRAY:
case SocketType::VECTOR_ARRAY:
case SocketType::POINT_ARRAY:
case SocketType::NORMAL_ARRAY:
{
case SocketType::NORMAL_ARRAY: {
std::stringstream ss;
const array<float3>& value = node->get_float3_array(socket);
for(size_t i = 0; i < value.size(); i++) {
ss << string_printf("%g %g %g", (double)value[i].x, (double)value[i].y, (double)value[i].z);
if(i != value.size() - 1) {
const array<float3> &value = node->get_float3_array(socket);
for (size_t i = 0; i < value.size(); i++) {
ss << string_printf(
"%g %g %g", (double)value[i].x, (double)value[i].y, (double)value[i].z);
if (i != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::POINT2:
{
case SocketType::POINT2: {
float2 value = node->get_float2(socket);
attr = string_printf("%g %g", (double)value.x, (double)value.y).c_str();
break;
}
case SocketType::POINT2_ARRAY:
{
case SocketType::POINT2_ARRAY: {
std::stringstream ss;
const array<float2>& value = node->get_float2_array(socket);
for(size_t i = 0; i < value.size(); i++) {
const array<float2> &value = node->get_float2_array(socket);
for (size_t i = 0; i < value.size(); i++) {
ss << string_printf("%g %g", (double)value[i].x, (double)value[i].y);
if(i != value.size() - 1) {
if (i != value.size() - 1) {
ss << " ";
}
}
@@ -378,70 +348,72 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
break;
}
case SocketType::STRING:
case SocketType::ENUM:
{
case SocketType::ENUM: {
attr = node->get_string(socket).c_str();
break;
}
case SocketType::STRING_ARRAY:
{
case SocketType::STRING_ARRAY: {
std::stringstream ss;
const array<ustring>& value = node->get_string_array(socket);
for(size_t i = 0; i < value.size(); i++) {
const array<ustring> &value = node->get_string_array(socket);
for (size_t i = 0; i < value.size(); i++) {
ss << value[i];
if(i != value.size() - 1) {
if (i != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::TRANSFORM:
{
case SocketType::TRANSFORM: {
Transform tfm = node->get_transform(socket);
std::stringstream ss;
for(int i = 0; i < 3; i++) {
ss << string_printf("%g %g %g %g ", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
for (int i = 0; i < 3; i++) {
ss << string_printf("%g %g %g %g ",
(double)tfm[i][0],
(double)tfm[i][1],
(double)tfm[i][2],
(double)tfm[i][3]);
}
ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
attr = ss.str().c_str();
break;
}
case SocketType::TRANSFORM_ARRAY:
{
case SocketType::TRANSFORM_ARRAY: {
std::stringstream ss;
const array<Transform>& value = node->get_transform_array(socket);
for(size_t j = 0; j < value.size(); j++) {
const Transform& tfm = value[j];
const array<Transform> &value = node->get_transform_array(socket);
for (size_t j = 0; j < value.size(); j++) {
const Transform &tfm = value[j];
for(int i = 0; i < 3; i++) {
ss << string_printf("%g %g %g %g ", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
for (int i = 0; i < 3; i++) {
ss << string_printf("%g %g %g %g ",
(double)tfm[i][0],
(double)tfm[i][1],
(double)tfm[i][2],
(double)tfm[i][3]);
}
ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
if(j != value.size() - 1) {
if (j != value.size() - 1) {
ss << " ";
}
}
attr = ss.str().c_str();
break;
}
case SocketType::NODE:
{
case SocketType::NODE: {
Node *value = node->get_node(socket);
if(value) {
if (value) {
attr = value->name.c_str();
}
break;
}
case SocketType::NODE_ARRAY:
{
case SocketType::NODE_ARRAY: {
std::stringstream ss;
const array<Node*>& value = node->get_node_array(socket);
for(size_t i = 0; i < value.size(); i++) {
if(value[i]) {
const array<Node *> &value = node->get_node_array(socket);
for (size_t i = 0; i < value.size(); i++) {
if (value[i]) {
ss << value[i]->name.c_str();
}
if(i != value.size() - 1) {
if (i != value.size() - 1) {
ss << " ";
}
}

View File

@@ -25,10 +25,10 @@
CCL_NAMESPACE_BEGIN
struct XMLReader {
map<ustring, Node*> node_map;
map<ustring, Node *> node_map;
};
void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node);
void xml_read_node(XMLReader &reader, Node *node, xml_node xml_node);
xml_node xml_write_node(Node *node, xml_node xml_root);
CCL_NAMESPACE_END

View File

@@ -57,19 +57,19 @@ CCL_NAMESPACE_BEGIN
#if defined(__HAIR__)
# define BVH_FUNCTION_NAME bvh_intersect_hair
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_HAIR_MINIMUM_WIDTH
# include "kernel/bvh/bvh_traversal.h"
#endif
#if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
# include "kernel/bvh/bvh_traversal.h"
#endif
#if defined(__HAIR__) && defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_hair_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_HAIR_MINIMUM_WIDTH | BVH_MOTION
# include "kernel/bvh/bvh_traversal.h"
#endif
@@ -82,7 +82,7 @@ CCL_NAMESPACE_BEGIN
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_local_motion
# define BVH_FUNCTION_FEATURES BVH_MOTION|BVH_HAIR
# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
# include "kernel/bvh/bvh_local.h"
# endif
#endif /* __BVH_LOCAL__ */
@@ -96,13 +96,13 @@ CCL_NAMESPACE_BEGIN
# if defined(__INSTANCING__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
# include "kernel/bvh/bvh_volume.h"
# endif
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
# include "kernel/bvh/bvh_volume.h"
# endif
#endif /* __VOLUME__ */
@@ -122,19 +122,19 @@ CCL_NAMESPACE_BEGIN
# if defined(__HAIR__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
# include "kernel/bvh/bvh_shadow_all.h"
# endif
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION
# include "kernel/bvh/bvh_shadow_all.h"
# endif
# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION
# include "kernel/bvh/bvh_shadow_all.h"
# endif
#endif /* __SHADOW_RECORD_ALL__ */
@@ -148,13 +148,13 @@ CCL_NAMESPACE_BEGIN
# if defined(__INSTANCING__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR
# include "kernel/bvh/bvh_volume_all.h"
# endif
# if defined(__OBJECT_MOTION__)
# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR
# include "kernel/bvh/bvh_volume_all.h"
# endif
#endif /* __VOLUME_RECORD_ALL__ */
@@ -188,18 +188,19 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
{
PROFILING_INIT(kg, PROFILING_INTERSECT);
if(!scene_intersect_valid(&ray)) {
if (!scene_intersect_valid(&ray)) {
return false;
}
#ifdef __EMBREE__
if(kernel_data.bvh.scene) {
if (kernel_data.bvh.scene) {
isect->t = ray.t;
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
IntersectContext rtc_ctx(&ctx);
RTCRayHit ray_hit;
kernel_embree_setup_rayhit(ray, ray_hit, visibility);
rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
if(ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID && ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID &&
ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
return true;
}
@@ -207,9 +208,9 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
}
#endif /* __EMBREE__ */
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
if (kernel_data.bvh.have_motion) {
# ifdef __HAIR__
if(kernel_data.bvh.have_curves)
if (kernel_data.bvh.have_curves)
return bvh_intersect_hair_motion(kg, &ray, isect, visibility, lcg_state, difl, extmax);
# endif /* __HAIR__ */
@@ -218,14 +219,14 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
#endif /* __OBJECT_MOTION__ */
#ifdef __HAIR__
if(kernel_data.bvh.have_curves)
if (kernel_data.bvh.have_curves)
return bvh_intersect_hair(kg, &ray, isect, visibility, lcg_state, difl, extmax);
#endif /* __HAIR__ */
#ifdef __KERNEL_CPU__
# ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing)
if (kernel_data.bvh.have_instancing)
return bvh_intersect_instancing(kg, &ray, isect, visibility);
# endif /* __INSTANCING__ */
@@ -252,12 +253,12 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
{
PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL);
if(!scene_intersect_valid(&ray)) {
if (!scene_intersect_valid(&ray)) {
local_isect->num_hits = 0;
return false;
}
#ifdef __EMBREE__
if(kernel_data.bvh.scene) {
# ifdef __EMBREE__
if (kernel_data.bvh.scene) {
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SSS);
ctx.lcg_state = lcg_state;
ctx.max_hits = max_hits;
@@ -270,24 +271,18 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
/* Get the Embree scene for this intersection. */
RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
if(geom) {
if (geom) {
float3 P = ray.P;
float3 dir = ray.D;
float3 idir = ray.D;
const int object_flag = kernel_tex_fetch(__object_flag, local_object);
if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
Transform ob_itfm;
rtc_ray.tfar = bvh_instance_motion_push(kg,
local_object,
&ray,
&P,
&dir,
&idir,
ray.t,
&ob_itfm);
rtc_ray.tfar = bvh_instance_motion_push(
kg, local_object, &ray, &P, &dir, &idir, ray.t, &ob_itfm);
/* bvh_instance_motion_push() returns the inverse transform but
* it's not needed here. */
(void) ob_itfm;
(void)ob_itfm;
rtc_ray.org_x = P.x;
rtc_ray.org_y = P.y;
@@ -297,30 +292,20 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
rtc_ray.dir_z = dir.z;
}
RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
if(scene) {
if (scene) {
rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
}
}
return local_isect->num_hits > 0;
}
#endif /* __EMBREE__ */
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
return bvh_intersect_local_motion(kg,
&ray,
local_isect,
local_object,
lcg_state,
max_hits);
# endif /* __EMBREE__ */
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_local_motion(kg, &ray, local_isect, local_object, lcg_state, max_hits);
}
#endif /* __OBJECT_MOTION__ */
return bvh_intersect_local(kg,
&ray,
local_isect,
local_object,
lcg_state,
max_hits);
# endif /* __OBJECT_MOTION__ */
return bvh_intersect_local(kg, &ray, local_isect, local_object, lcg_state, max_hits);
}
#endif
@@ -334,12 +319,12 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
{
PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL);
if(!scene_intersect_valid(ray)) {
if (!scene_intersect_valid(ray)) {
*num_hits = 0;
return false;
}
# ifdef __EMBREE__
if(kernel_data.bvh.scene) {
if (kernel_data.bvh.scene) {
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
ctx.isect_s = isect;
ctx.max_hits = max_hits;
@@ -349,7 +334,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_SHADOW);
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
if(ctx.num_hits > max_hits) {
if (ctx.num_hits > max_hits) {
return true;
}
*num_hits = ctx.num_hits;
@@ -357,55 +342,30 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
}
# endif
# ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
if (kernel_data.bvh.have_motion) {
# ifdef __HAIR__
if(kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair_motion(kg,
ray,
isect,
visibility,
max_hits,
num_hits);
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, visibility, max_hits, num_hits);
}
# endif /* __HAIR__ */
return bvh_intersect_shadow_all_motion(kg,
ray,
isect,
visibility,
max_hits,
num_hits);
return bvh_intersect_shadow_all_motion(kg, ray, isect, visibility, max_hits, num_hits);
}
# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
if(kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair(kg,
ray,
isect,
visibility,
max_hits,
num_hits);
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair(kg, ray, isect, visibility, max_hits, num_hits);
}
# endif /* __HAIR__ */
# ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing) {
return bvh_intersect_shadow_all_instancing(kg,
ray,
isect,
visibility,
max_hits,
num_hits);
if (kernel_data.bvh.have_instancing) {
return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits);
}
# endif /* __INSTANCING__ */
return bvh_intersect_shadow_all(kg,
ray,
isect,
visibility,
max_hits,
num_hits);
return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits);
}
#endif /* __SHADOW_RECORD_ALL__ */
@@ -417,17 +377,17 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
{
PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME);
if(!scene_intersect_valid(ray)) {
if (!scene_intersect_valid(ray)) {
return false;
}
# ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
}
# endif /* __OBJECT_MOTION__ */
# ifdef __KERNEL_CPU__
# ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing)
if (kernel_data.bvh.have_instancing)
return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
# endif /* __INSTANCING__ */
return bvh_intersect_volume(kg, ray, isect, visibility);
@@ -450,11 +410,11 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
{
PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_ALL);
if(!scene_intersect_valid(ray)) {
if (!scene_intersect_valid(ray)) {
return false;
}
# ifdef __EMBREE__
if(kernel_data.bvh.scene) {
if (kernel_data.bvh.scene) {
CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
ctx.isect_s = isect;
ctx.max_hits = max_hits;
@@ -467,19 +427,18 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
}
# endif
# ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
}
# endif /* __OBJECT_MOTION__ */
# ifdef __INSTANCING__
if(kernel_data.bvh.have_instancing)
if (kernel_data.bvh.have_instancing)
return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility);
# endif /* __INSTANCING__ */
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
}
#endif /* __VOLUME_RECORD_ALL__ */
/* Ray offset to avoid self intersection.
*
* This function should be used to compute a modified ray start position for
@@ -497,39 +456,39 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
float3 res;
/* x component */
if(fabsf(P.x) < epsilon_test) {
res.x = P.x + Ng.x*epsilon_f;
if (fabsf(P.x) < epsilon_test) {
res.x = P.x + Ng.x * epsilon_f;
}
else {
uint ix = __float_as_uint(P.x);
ix += ((ix ^ __float_as_uint(Ng.x)) >> 31)? -epsilon_i: epsilon_i;
ix += ((ix ^ __float_as_uint(Ng.x)) >> 31) ? -epsilon_i : epsilon_i;
res.x = __uint_as_float(ix);
}
/* y component */
if(fabsf(P.y) < epsilon_test) {
res.y = P.y + Ng.y*epsilon_f;
if (fabsf(P.y) < epsilon_test) {
res.y = P.y + Ng.y * epsilon_f;
}
else {
uint iy = __float_as_uint(P.y);
iy += ((iy ^ __float_as_uint(Ng.y)) >> 31)? -epsilon_i: epsilon_i;
iy += ((iy ^ __float_as_uint(Ng.y)) >> 31) ? -epsilon_i : epsilon_i;
res.y = __uint_as_float(iy);
}
/* z component */
if(fabsf(P.z) < epsilon_test) {
res.z = P.z + Ng.z*epsilon_f;
if (fabsf(P.z) < epsilon_test) {
res.z = P.z + Ng.z * epsilon_f;
}
else {
uint iz = __float_as_uint(P.z);
iz += ((iz ^ __float_as_uint(Ng.z)) >> 31)? -epsilon_i: epsilon_i;
iz += ((iz ^ __float_as_uint(Ng.z)) >> 31) ? -epsilon_i : epsilon_i;
res.z = __uint_as_float(iz);
}
return res;
#else
const float epsilon_f = 1e-4f;
return P + epsilon_f*Ng;
return P + epsilon_f * Ng;
#endif
}
@@ -537,12 +496,12 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
/* ToDo: Move to another file? */
ccl_device int intersections_compare(const void *a, const void *b)
{
const Intersection *isect_a = (const Intersection*)a;
const Intersection *isect_b = (const Intersection*)b;
const Intersection *isect_a = (const Intersection *)a;
const Intersection *isect_b = (const Intersection *)b;
if(isect_a->t < isect_b->t)
if (isect_a->t < isect_b->t)
return -1;
else if(isect_a->t > isect_b->t)
else if (isect_a->t > isect_b->t)
return 1;
else
return 0;
@@ -552,13 +511,13 @@ ccl_device int intersections_compare(const void *a, const void *b)
#if defined(__SHADOW_RECORD_ALL__)
ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
{
#ifdef __KERNEL_GPU__
# ifdef __KERNEL_GPU__
/* Use bubble sort which has more friendly memory pattern on GPU. */
bool swapped;
do {
swapped = false;
for(int j = 0; j < num_hits - 1; ++j) {
if(hits[j].t > hits[j + 1].t) {
for (int j = 0; j < num_hits - 1; ++j) {
if (hits[j].t > hits[j + 1].t) {
struct Intersection tmp = hits[j];
hits[j] = hits[j + 1];
hits[j + 1] = tmp;
@@ -566,10 +525,10 @@ ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits)
}
}
--num_hits;
} while(swapped);
#else
} while (swapped);
# else
qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
#endif
# endif
}
#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */

View File

@@ -59,19 +59,20 @@ struct CCLIntersectContext {
}
};
class IntersectContext
{
public:
IntersectContext(CCLIntersectContext* ctx)
class IntersectContext {
public:
IntersectContext(CCLIntersectContext *ctx)
{
rtcInitIntersectContext(&context);
userRayExt = ctx;
}
RTCIntersectContext context;
CCLIntersectContext* userRayExt;
CCLIntersectContext *userRayExt;
};
ccl_device_inline void kernel_embree_setup_ray(const Ray& ray, RTCRay& rtc_ray, const uint visibility)
ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
RTCRay &rtc_ray,
const uint visibility)
{
rtc_ray.org_x = ray.P.x;
rtc_ray.org_y = ray.P.y;
@@ -85,40 +86,56 @@ ccl_device_inline void kernel_embree_setup_ray(const Ray& ray, RTCRay& rtc_ray,
rtc_ray.mask = visibility;
}
ccl_device_inline void kernel_embree_setup_rayhit(const Ray& ray, RTCRayHit& rayhit, const uint visibility)
ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
RTCRayHit &rayhit,
const uint visibility)
{
kernel_embree_setup_ray(ray, rayhit.ray, visibility);
rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
rayhit.hit.primID = RTC_INVALID_GEOMETRY_ID;
}
ccl_device_inline void kernel_embree_convert_hit(KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect)
ccl_device_inline void kernel_embree_convert_hit(KernelGlobals *kg,
const RTCRay *ray,
const RTCHit *hit,
Intersection *isect)
{
bool is_hair = hit->geomID & 1;
isect->u = is_hair ? hit->u : 1.0f - hit->v - hit->u;
isect->v = is_hair ? hit->v : hit->u;
isect->t = ray->tfar;
isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
if(hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + kernel_tex_fetch(__object_node, hit->instID[0]/2);
isect->object = hit->instID[0]/2;
if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
isect->prim = hit->primID +
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) +
kernel_tex_fetch(__object_node, hit->instID[0] / 2);
isect->object = hit->instID[0] / 2;
}
else {
isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
isect->object = OBJECT_NONE;
}
isect->type = kernel_tex_fetch(__prim_type, isect->prim);
}
ccl_device_inline void kernel_embree_convert_local_hit(KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int local_object_id)
ccl_device_inline void kernel_embree_convert_local_hit(KernelGlobals *kg,
const RTCRay *ray,
const RTCHit *hit,
Intersection *isect,
int local_object_id)
{
isect->u = 1.0f - hit->v - hit->u;
isect->v = hit->u;
isect->t = ray->tfar;
isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, local_object_id * 2));
isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + kernel_tex_fetch(__object_node, local_object_id);
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.bvh.scene, local_object_id * 2));
isect->prim = hit->primID +
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) +
kernel_tex_fetch(__object_node, local_object_id);
isect->object = local_object_id;
isect->type = kernel_tex_fetch(__prim_type, isect->prim);
}

View File

@@ -43,7 +43,7 @@ ccl_device
#else
ccl_device_inline
#endif
bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
const Ray *ray,
LocalIntersection *local_isect,
int local_object,
@@ -72,23 +72,16 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
int object = OBJECT_NONE;
float isect_t = ray->t;
if(local_isect != NULL) {
if (local_isect != NULL) {
local_isect->num_hits = 0;
}
kernel_assert((local_isect == NULL) == (max_hits == 0));
const int object_flag = kernel_tex_fetch(__object_flag, local_object);
if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
isect_t = bvh_instance_motion_push(kg,
local_object,
ray,
&P,
&dir,
&idir,
isect_t,
&ob_itfm);
isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
#else
isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
#endif
@@ -119,10 +112,10 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
do {
/* traverse internal nodes */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
int node_addr_child1, traverse_mask;
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
@@ -155,10 +148,10 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
if(traverse_mask == 3) {
if (traverse_mask == 3) {
/* Both children were intersected, push the farther one. */
bool is_closest_child1 = (dist[1] < dist[0]);
if(is_closest_child1) {
if (is_closest_child1) {
int tmp = node_addr;
node_addr = node_addr_child1;
node_addr_child1 = tmp;
@@ -170,10 +163,10 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
else {
/* One child was intersected. */
if(traverse_mask == 2) {
if (traverse_mask == 2) {
node_addr = node_addr_child1;
}
else if(traverse_mask == 0) {
else if (traverse_mask == 0) {
/* Neither child was intersected. */
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
@@ -182,8 +175,8 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
/* if node is leaf, fetch triangle list */
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
if (node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
const int prim_addr2 = __float_as_int(leaf.y);
@@ -194,12 +187,12 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
--stack_ptr;
/* primitive intersection */
switch(type & PRIMITIVE_ALL) {
switch (type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* intersect ray against primitive */
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(triangle_intersect_local(kg,
if (triangle_intersect_local(kg,
local_isect,
P,
dir,
@@ -217,9 +210,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
/* intersect ray against primitive */
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect_local(kg,
if (motion_triangle_intersect_local(kg,
local_isect,
P,
dir,
@@ -241,8 +234,8 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
}
}
} while(node_addr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
return false;
}
@@ -254,32 +247,17 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
uint *lcg_state,
int max_hits)
{
switch(kernel_data.bvh.bvh_layout) {
switch (kernel_data.bvh.bvh_layout) {
#ifdef __KERNEL_AVX2__
case BVH_LAYOUT_BVH8:
return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
ray,
local_isect,
local_object,
lcg_state,
max_hits);
return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
#endif
#ifdef __QBVH__
case BVH_LAYOUT_BVH4:
return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
ray,
local_isect,
local_object,
lcg_state,
max_hits);
return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
#endif
case BVH_LAYOUT_BVH2:
return BVH_FUNCTION_FULL_NAME(BVH)(kg,
ray,
local_isect,
local_object,
lcg_state,
max_hits);
return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
kernel_assert(!"Should not happen");
return false;

View File

@@ -22,9 +22,9 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *k
{
Transform space;
const int child_addr = node_addr + child * 3;
space.x = kernel_tex_fetch(__bvh_nodes, child_addr+1);
space.y = kernel_tex_fetch(__bvh_nodes, child_addr+2);
space.z = kernel_tex_fetch(__bvh_nodes, child_addr+3);
space.x = kernel_tex_fetch(__bvh_nodes, child_addr + 1);
space.y = kernel_tex_fetch(__bvh_nodes, child_addr + 2);
space.z = kernel_tex_fetch(__bvh_nodes, child_addr + 3);
return space;
}
@@ -39,10 +39,10 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
{
/* fetch node data */
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1);
float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2);
float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
/* intersect ray against child nodes */
float c0lox = (node0.x - P.x) * idir.x;
@@ -66,14 +66,13 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
dist[0] = c0min;
dist[1] = c1min;
#ifdef __VISIBILITY_FLAG__
# ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
(((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
#else
return ((c0max >= c0min)? 1: 0) |
((c1max >= c1min)? 2: 0);
#endif
return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
(((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
# else
return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
# endif
}
ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
@@ -88,10 +87,10 @@ ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
{
/* fetch node data */
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1);
float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2);
float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
/* intersect ray against child nodes */
float c0lox = (node0.x - P.x) * idir.x;
@@ -112,14 +111,14 @@ ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
if(difl != 0.0f) {
if (difl != 0.0f) {
float hdiff = 1.0f + difl;
float ldiff = 1.0f - difl;
if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
if (__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
c0min = max(ldiff * c0min, c0min - extmax);
c0max = min(hdiff * c0max, c0max + extmax);
}
if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
if (__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
c1min = max(ldiff * c1min, c1min - extmax);
c1max = min(hdiff * c1max, c1max + extmax);
}
@@ -128,18 +127,16 @@ ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
dist[0] = c0min;
dist[1] = c1min;
#ifdef __VISIBILITY_FLAG__
# ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
(((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
#else
return ((c0max >= c0min)? 1: 0) |
((c1max >= c1min)? 2: 0);
#endif
return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
(((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
# else
return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
# endif
}
ccl_device_forceinline bool bvh_unaligned_node_intersect_child(
KernelGlobals *kg,
ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals *kg,
const float3 P,
const float3 dir,
const float t,
@@ -165,8 +162,7 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(
return tnear <= tfar;
}
ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(
KernelGlobals *kg,
ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(KernelGlobals *kg,
const float3 P,
const float3 dir,
const float t,
@@ -190,11 +186,11 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(
const float tnear = max4(0.0f, near_x, near_y, near_z);
const float tfar = min4(t, far_x, far_y, far_z);
*dist = tnear;
if(difl != 0.0f) {
if (difl != 0.0f) {
/* TODO(sergey): Same as for QBVH, needs a proper use. */
const float round_down = 1.0f - difl;
const float round_up = 1.0f + difl;
return round_down*tnear <= round_up*tfar;
return round_down * tnear <= round_up * tfar;
}
else {
return tnear <= tfar;
@@ -211,19 +207,19 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
float dist[2])
{
int mask = 0;
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(cnodes.x) & visibility))
#endif
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
# ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.x) & visibility))
# endif
{
mask |= 1;
}
}
if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(cnodes.y) & visibility))
#endif
if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
# ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.y) & visibility))
# endif
{
mask |= 2;
}
@@ -243,19 +239,19 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg
float dist[2])
{
int mask = 0;
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) {
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(cnodes.x) & visibility))
#endif
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
if (bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) {
# ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.x) & visibility))
# endif
{
mask |= 1;
}
}
if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) {
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(cnodes.y) & visibility))
#endif
if (bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) {
# ifdef __VISIBILITY_FLAG__
if ((__float_as_uint(cnodes.y) & visibility))
# endif
{
mask |= 2;
}
@@ -273,24 +269,11 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
float dist[2])
{
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect(kg,
P,
dir,
idir,
t,
node_addr,
visibility,
dist);
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist);
}
else {
return bvh_aligned_node_intersect(kg,
P,
idir,
t,
node_addr,
visibility,
dist);
return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
}
}
@@ -306,37 +289,21 @@ ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
float dist[2])
{
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect_robust(kg,
P,
dir,
idir,
t,
difl,
extmax,
node_addr,
visibility,
dist);
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect_robust(
kg, P, dir, idir, t, difl, extmax, node_addr, visibility, dist);
}
else {
return bvh_aligned_node_intersect_robust(kg,
P,
idir,
t,
difl,
extmax,
node_addr,
visibility,
dist);
return bvh_aligned_node_intersect_robust(
kg, P, idir, t, difl, extmax, node_addr, visibility, dist);
}
}
#else /* !defined(__KERNEL_SSE2__) */
int ccl_device_forceinline bvh_aligned_node_intersect(
KernelGlobals *kg,
const float3& P,
const float3& dir,
const ssef& tsplat,
int ccl_device_forceinline bvh_aligned_node_intersect(KernelGlobals *kg,
const float3 &P,
const float3 &dir,
const ssef &tsplat,
const ssef Psplat[3],
const ssef idirsplat[3],
const shuffle_swap_t shufflexyz[3],
@@ -348,7 +315,7 @@ int ccl_device_forceinline bvh_aligned_node_intersect(
const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
/* fetch node data */
const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + node_addr;
const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + node_addr;
/* intersect ray against child nodes */
const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
@@ -367,20 +334,19 @@ int ccl_device_forceinline bvh_aligned_node_intersect(
# ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
return cmask;
# else
return mask & 3;
# endif
}
ccl_device_forceinline int bvh_aligned_node_intersect_robust(
KernelGlobals *kg,
const float3& P,
const float3& dir,
const ssef& tsplat,
ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
const float3 &P,
const float3 &dir,
const ssef &tsplat,
const ssef Psplat[3],
const ssef idirsplat[3],
const shuffle_swap_t shufflexyz[3],
@@ -394,7 +360,7 @@ ccl_device_forceinline int bvh_aligned_node_intersect_robust(
const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
/* fetch node data */
const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr;
const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + nodeAddr;
/* intersect ray against child nodes */
const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
@@ -405,18 +371,18 @@ ccl_device_forceinline int bvh_aligned_node_intersect_robust(
ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
const ssef tminmax = minmax ^ pn;
if(difl != 0.0f) {
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
float4 *tminmaxview = (float4*)&tminmax;
float& c0min = tminmaxview->x, &c1min = tminmaxview->y;
float& c0max = tminmaxview->z, &c1max = tminmaxview->w;
if (difl != 0.0f) {
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr + 0);
float4 *tminmaxview = (float4 *)&tminmax;
float &c0min = tminmaxview->x, &c1min = tminmaxview->y;
float &c0max = tminmaxview->z, &c1max = tminmaxview->w;
float hdiff = 1.0f + difl;
float ldiff = 1.0f - difl;
if(__float_as_int(cnodes.x) & PATH_RAY_CURVE) {
if (__float_as_int(cnodes.x) & PATH_RAY_CURVE) {
c0min = max(ldiff * c0min, c0min - extmax);
c0max = min(hdiff * c0max, c0max + extmax);
}
if(__float_as_int(cnodes.y) & PATH_RAY_CURVE) {
if (__float_as_int(cnodes.y) & PATH_RAY_CURVE) {
c1min = max(ldiff * c1min, c1min - extmax);
c1max = min(hdiff * c1max, c1max + extmax);
}
@@ -431,9 +397,9 @@ ccl_device_forceinline int bvh_aligned_node_intersect_robust(
# ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr + 0);
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
return cmask;
# else
return mask & 3;
@@ -443,8 +409,8 @@ ccl_device_forceinline int bvh_aligned_node_intersect_robust(
ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
const float3 P,
const float3 dir,
const ssef& isect_near,
const ssef& isect_far,
const ssef &isect_near,
const ssef &isect_far,
const int node_addr,
const uint visibility,
float dist[2])
@@ -454,22 +420,13 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
float3 aligned_dir0 = transform_direction(&space0, dir),
aligned_dir1 = transform_direction(&space1, dir);
float3 aligned_P0 = transform_point(&space0, P),
aligned_P1 = transform_point(&space1, P);
float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
nrdir1 = -bvh_inverse_direction(aligned_dir1);
ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
aligned_P1.x * nrdir1.x,
0.0f, 0.0f),
lower_y = ssef(aligned_P0.y * nrdir0.y,
aligned_P1.y * nrdir1.y,
0.0f,
0.0f),
lower_z = ssef(aligned_P0.z * nrdir0.z,
aligned_P1.z * nrdir1.z,
0.0f,
0.0f);
ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
@@ -492,9 +449,9 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
# ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
return cmask;
# else
return mask & 3;
@@ -504,8 +461,8 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg,
const float3 P,
const float3 dir,
const ssef& isect_near,
const ssef& isect_far,
const ssef &isect_near,
const ssef &isect_far,
const float difl,
const int node_addr,
const uint visibility,
@@ -516,22 +473,13 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg
float3 aligned_dir0 = transform_direction(&space0, dir),
aligned_dir1 = transform_direction(&space1, dir);
float3 aligned_P0 = transform_point(&space0, P),
aligned_P1 = transform_point(&space1, P);
float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
nrdir1 = -bvh_inverse_direction(aligned_dir1);
ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
aligned_P1.x * nrdir1.x,
0.0f, 0.0f),
lower_y = ssef(aligned_P0.y * nrdir0.y,
aligned_P1.y * nrdir1.y,
0.0f,
0.0f),
lower_z = ssef(aligned_P0.z * nrdir0.z,
aligned_P1.z * nrdir1.z,
0.0f,
0.0f);
ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
@@ -547,10 +495,10 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg
const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
sseb vmask;
if(difl != 0.0f) {
if (difl != 0.0f) {
const float round_down = 1.0f - difl;
const float round_up = 1.0f + difl;
vmask = round_down*tnear <= round_up*tfar;
vmask = round_down * tnear <= round_up * tfar;
}
else {
vmask = tnear <= tfar;
@@ -563,9 +511,9 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg
# ifdef __VISIBILITY_FLAG__
/* this visibility test gives a 5% performance hit, how to solve? */
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
(((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
return cmask;
# else
return mask & 3;
@@ -573,11 +521,11 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg
}
ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
const float3& P,
const float3& dir,
const ssef& isect_near,
const ssef& isect_far,
const ssef& tsplat,
const float3 &P,
const float3 &dir,
const ssef &isect_near,
const ssef &isect_far,
const ssef &tsplat,
const ssef Psplat[3],
const ssef idirsplat[3],
const shuffle_swap_t shufflexyz[3],
@@ -586,36 +534,22 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
float dist[2])
{
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect(kg,
P,
dir,
isect_near,
isect_far,
node_addr,
visibility,
dist);
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect(
kg, P, dir, isect_near, isect_far, node_addr, visibility, dist);
}
else {
return bvh_aligned_node_intersect(kg,
P,
dir,
tsplat,
Psplat,
idirsplat,
shufflexyz,
node_addr,
visibility,
dist);
return bvh_aligned_node_intersect(
kg, P, dir, tsplat, Psplat, idirsplat, shufflexyz, node_addr, visibility, dist);
}
}
ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
const float3& P,
const float3& dir,
const ssef& isect_near,
const ssef& isect_far,
const ssef& tsplat,
const float3 &P,
const float3 &dir,
const ssef &isect_near,
const ssef &isect_far,
const ssef &tsplat,
const ssef Psplat[3],
const ssef idirsplat[3],
const shuffle_swap_t shufflexyz[3],
@@ -626,16 +560,9 @@ ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
float dist[2])
{
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect_robust(kg,
P,
dir,
isect_near,
isect_far,
difl,
node_addr,
visibility,
dist);
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect_robust(
kg, P, dir, isect_near, isect_far, difl, node_addr, visibility, dist);
}
else {
return bvh_aligned_node_intersect_robust(kg,

View File

@@ -19,9 +19,9 @@
#ifdef __QBVH__
# include "kernel/bvh/qbvh_shadow_all.h"
#ifdef __KERNEL_AVX2__
# ifdef __KERNEL_AVX2__
# include "kernel/bvh/obvh_shadow_all.h"
#endif
# endif
#endif
#if BVH_FEATURE(BVH_HAIR)
@@ -44,7 +44,7 @@ ccl_device
#else
ccl_device_inline
#endif
bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
const Ray *ray,
Intersection *isect_array,
const uint visibility,
@@ -107,10 +107,10 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
do {
/* traverse internal nodes */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
int node_addr_child1, traverse_mask;
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
@@ -143,10 +143,10 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
if(traverse_mask == 3) {
if (traverse_mask == 3) {
/* Both children were intersected, push the farther one. */
bool is_closest_child1 = (dist[1] < dist[0]);
if(is_closest_child1) {
if (is_closest_child1) {
int tmp = node_addr;
node_addr = node_addr_child1;
node_addr_child1 = tmp;
@@ -158,10 +158,10 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
else {
/* One child was intersected. */
if(traverse_mask == 2) {
if (traverse_mask == 2) {
node_addr = node_addr_child1;
}
else if(traverse_mask == 0) {
else if (traverse_mask == 0) {
/* Neither child was intersected. */
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
@@ -170,12 +170,12 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
/* if node is leaf, fetch triangle list */
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
if (node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(prim_addr >= 0) {
if (prim_addr >= 0) {
#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
@@ -186,7 +186,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
--stack_ptr;
/* primitive intersection */
while(prim_addr < prim_addr2) {
while (prim_addr < prim_addr2) {
kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
bool hit;
@@ -194,27 +194,15 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
* isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
* might give a few % performance improvement */
switch(p_type) {
switch (p_type) {
case PRIMITIVE_TRIANGLE: {
hit = triangle_intersect(kg,
isect_array,
P,
dir,
visibility,
object,
prim_addr);
hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
hit = motion_triangle_intersect(kg,
isect_array,
P,
dir,
ray->time,
visibility,
object,
prim_addr);
hit = motion_triangle_intersect(
kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
break;
}
#endif
@@ -222,7 +210,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = cardinal_curve_intersect(kg,
isect_array,
P,
@@ -233,7 +221,8 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
ray->time,
curve_type,
NULL,
0, 0);
0,
0);
}
else {
hit = curve_intersect(kg,
@@ -246,7 +235,8 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
ray->time,
curve_type,
NULL,
0, 0);
0,
0);
}
break;
}
@@ -258,7 +248,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
/* shadow ray early termination */
if(hit) {
if (hit) {
/* detect if this surface has a shader with transparent shadows */
/* todo: optimize so primitive visibility flag indicates if
@@ -267,7 +257,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
int shader = 0;
#ifdef __HAIR__
if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
#endif
{
shader = kernel_tex_fetch(__tri_shader, prim);
@@ -281,11 +271,11 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
/* if no transparent shadows, all light is blocked */
if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
return true;
}
/* if maximum number of hits reached, block all light */
else if(*num_hits == max_hits) {
else if (*num_hits == max_hits) {
return true;
}
@@ -305,7 +295,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
# if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
@@ -336,14 +326,14 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stack_ptr >= 0) {
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
if(num_hits_in_instance) {
if (num_hits_in_instance) {
float t_fac;
# if BVH_FEATURE(BVH_MOTION)
@@ -353,8 +343,8 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# endif
/* scale isect->t to adjust for instancing */
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
}
else {
@@ -385,7 +375,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
--stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
return false;
}
@@ -397,32 +387,17 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
const uint max_hits,
uint *num_hits)
{
switch(kernel_data.bvh.bvh_layout) {
switch (kernel_data.bvh.bvh_layout) {
#ifdef __KERNEL_AVX2__
case BVH_LAYOUT_BVH8:
return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
ray,
isect_array,
visibility,
max_hits,
num_hits);
return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
#endif
#ifdef __QBVH__
case BVH_LAYOUT_BVH4:
return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
ray,
isect_array,
visibility,
max_hits,
num_hits);
return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
#endif
case BVH_LAYOUT_BVH2:
return BVH_FUNCTION_FULL_NAME(BVH)(kg,
ray,
isect_array,
visibility,
max_hits,
num_hits);
return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
}
kernel_assert(!"Should not happen");
return false;

View File

@@ -47,11 +47,12 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Intersection *isect,
const uint visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
, uint *lcg_state,
,
uint *lcg_state,
float difl,
float extmax
#endif
)
)
{
/* todo:
* - test if pushing distance on the stack helps (for non shadow rays)
@@ -110,14 +111,14 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
do {
/* traverse internal nodes */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
int node_addr_child1, traverse_mask;
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#if !defined(__KERNEL_SSE2__)
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
if (difl != 0.0f) {
traverse_mask = NODE_INTERSECT_ROBUST(kg,
P,
# if BVH_FEATURE(BVH_HAIR)
@@ -147,7 +148,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
#else // __KERNEL_SSE2__
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
if (difl != 0.0f) {
traverse_mask = NODE_INTERSECT_ROBUST(kg,
P,
dir,
@@ -188,10 +189,10 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
if(traverse_mask == 3) {
if (traverse_mask == 3) {
/* Both children were intersected, push the farther one. */
bool is_closest_child1 = (dist[1] < dist[0]);
if(is_closest_child1) {
if (is_closest_child1) {
int tmp = node_addr;
node_addr = node_addr_child1;
node_addr_child1 = tmp;
@@ -203,10 +204,10 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
else {
/* One child was intersected. */
if(traverse_mask == 2) {
if (traverse_mask == 2) {
node_addr = node_addr_child1;
}
else if(traverse_mask == 0) {
else if (traverse_mask == 0) {
/* Neither child was intersected. */
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
@@ -216,12 +217,12 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
/* if node is leaf, fetch triangle list */
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
if (node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(prim_addr >= 0) {
if (prim_addr >= 0) {
#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
@@ -231,29 +232,22 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
--stack_ptr;
/* primitive intersection */
switch(type & PRIMITIVE_ALL) {
switch (type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(triangle_intersect(kg,
isect,
P,
dir,
visibility,
object,
prim_addr))
{
if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
/* shadow ray early termination */
#if defined(__KERNEL_SSE2__)
if(visibility & PATH_RAY_SHADOW_OPAQUE)
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
# if BVH_FEATURE(BVH_HAIR)
tfar = ssef(isect->t);
# endif
#else
if(visibility & PATH_RAY_SHADOW_OPAQUE)
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
#endif
}
@@ -262,28 +256,21 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect(kg,
isect,
P,
dir,
ray->time,
visibility,
object,
prim_addr))
{
if (motion_triangle_intersect(
kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
/* shadow ray early termination */
# if defined(__KERNEL_SSE2__)
if(visibility & PATH_RAY_SHADOW_OPAQUE)
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
# if BVH_FEATURE(BVH_HAIR)
tfar = ssef(isect->t);
# endif
# else
if(visibility & PATH_RAY_SHADOW_OPAQUE)
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
# endif
}
@@ -294,12 +281,12 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
bool hit;
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = cardinal_curve_intersect(kg,
isect,
P,
@@ -327,17 +314,17 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
difl,
extmax);
}
if(hit) {
if (hit) {
/* shadow ray early termination */
# if defined(__KERNEL_SSE2__)
if(visibility & PATH_RAY_SHADOW_OPAQUE)
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
# if BVH_FEATURE(BVH_HAIR)
tfar = ssef(isect->t);
# endif
# else
if(visibility & PATH_RAY_SHADOW_OPAQUE)
if (visibility & PATH_RAY_SHADOW_OPAQUE)
return true;
# endif
}
@@ -350,10 +337,11 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
# if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
isect->t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
@@ -381,10 +369,10 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stack_ptr >= 0) {
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* instance pop */
@@ -412,7 +400,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
--stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
}
@@ -422,13 +410,14 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
Intersection *isect,
const uint visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
, uint *lcg_state,
,
uint *lcg_state,
float difl,
float extmax
#endif
)
)
{
switch(kernel_data.bvh.bvh_layout) {
switch (kernel_data.bvh.bvh_layout) {
#ifdef __KERNEL_AVX2__
case BVH_LAYOUT_BVH8:
return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
@@ -436,7 +425,8 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
isect,
visibility
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
, lcg_state,
,
lcg_state,
difl,
extmax
# endif
@@ -449,7 +439,8 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
isect,
visibility
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
, lcg_state,
,
lcg_state,
difl,
extmax
# endif
@@ -461,7 +452,8 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
isect,
visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
, lcg_state,
,
lcg_state,
difl,
extmax
#endif

View File

@@ -40,8 +40,8 @@ CCL_NAMESPACE_BEGIN
#define BVH_HAIR 4
#define BVH_HAIR_MINIMUM_WIDTH 8
#define BVH_NAME_JOIN(x,y) x ## _ ## y
#define BVH_NAME_EVAL(x,y) BVH_NAME_JOIN(x,y)
#define BVH_NAME_JOIN(x, y) x##_##y
#define BVH_NAME_EVAL(x, y) BVH_NAME_JOIN(x, y)
#define BVH_FUNCTION_FULL_NAME(prefix) BVH_NAME_EVAL(prefix, BVH_FUNCTION_NAME)
#define BVH_FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
@@ -53,19 +53,19 @@ CCL_NAMESPACE_BEGIN
isect->num_traversed_nodes = 0; \
isect->num_traversed_instances = 0; \
isect->num_intersections = 0; \
} while(0)
} while (0)
# define BVH_DEBUG_NEXT_NODE() \
do { \
++isect->num_traversed_nodes; \
} while(0)
} while (0)
# define BVH_DEBUG_NEXT_INTERSECTION() \
do { \
++isect->num_intersections; \
} while(0)
} while (0)
# define BVH_DEBUG_NEXT_INSTANCE() \
do { \
++isect->num_traversed_instances; \
} while(0)
} while (0)
#else /* __KERNEL_DEBUG__ */
# define BVH_DEBUG_INIT()
# define BVH_DEBUG_NEXT_NODE()

View File

@@ -19,9 +19,9 @@
#ifdef __QBVH__
# include "kernel/bvh/qbvh_volume.h"
#ifdef __KERNEL_AVX2__
# ifdef __KERNEL_AVX2__
# include "kernel/bvh/obvh_volume.h"
#endif
# endif
#endif
#if BVH_FEATURE(BVH_HAIR)
@@ -43,7 +43,7 @@ ccl_device
#else
ccl_device_inline
#endif
bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
const Ray *ray,
Intersection *isect,
const uint visibility)
@@ -103,10 +103,10 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
do {
/* traverse internal nodes */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
int node_addr_child1, traverse_mask;
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
@@ -139,10 +139,10 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
if(traverse_mask == 3) {
if (traverse_mask == 3) {
/* Both children were intersected, push the farther one. */
bool is_closest_child1 = (dist[1] < dist[0]);
if(is_closest_child1) {
if (is_closest_child1) {
int tmp = node_addr;
node_addr = node_addr_child1;
node_addr_child1 = tmp;
@@ -154,10 +154,10 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
else {
/* One child was intersected. */
if(traverse_mask == 2) {
if (traverse_mask == 2) {
node_addr = node_addr_child1;
}
else if(traverse_mask == 0) {
else if (traverse_mask == 0) {
/* Neither child was intersected. */
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
@@ -166,12 +166,12 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
/* if node is leaf, fetch triangle list */
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
if (node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(prim_addr >= 0) {
if (prim_addr >= 0) {
#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
@@ -181,46 +181,38 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
--stack_ptr;
/* primitive intersection */
switch(type & PRIMITIVE_ALL) {
switch (type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* intersect ray against primitive */
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
triangle_intersect(kg,
isect,
P,
dir,
visibility,
object,
prim_addr);
triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
}
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
/* intersect ray against primitive */
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
motion_triangle_intersect(kg,
isect,
P,
dir,
ray->time,
visibility,
object,
prim_addr);
motion_triangle_intersect(
kg, isect, P, dir, ray->time, visibility, object, prim_addr);
}
break;
}
@@ -233,11 +225,12 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
isect->t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
@@ -270,10 +263,10 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stack_ptr >= 0) {
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* instance pop */
@@ -301,7 +294,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
--stack_ptr;
}
#endif /* FEATURE(BVH_MOTION) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
}
@@ -311,26 +304,17 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
Intersection *isect,
const uint visibility)
{
switch(kernel_data.bvh.bvh_layout) {
switch (kernel_data.bvh.bvh_layout) {
#ifdef __KERNEL_AVX2__
case BVH_LAYOUT_BVH8:
return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
ray,
isect,
visibility);
return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect, visibility);
#endif
#ifdef __QBVH__
case BVH_LAYOUT_BVH4:
return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
ray,
isect,
visibility);
return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect, visibility);
#endif
case BVH_LAYOUT_BVH2:
return BVH_FUNCTION_FULL_NAME(BVH)(kg,
ray,
isect,
visibility);
return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
}
kernel_assert(!"Should not happen");
return false;

View File

@@ -19,9 +19,9 @@
#ifdef __QBVH__
# include "kernel/bvh/qbvh_volume_all.h"
#ifdef __KERNEL_AVX2__
# ifdef __KERNEL_AVX2__
# include "kernel/bvh/obvh_volume_all.h"
#endif
# endif
#endif
#if BVH_FEATURE(BVH_HAIR)
@@ -43,7 +43,7 @@ ccl_device
#else
ccl_device_inline
#endif
uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
const Ray *ray,
Intersection *isect_array,
const uint max_hits,
@@ -107,10 +107,10 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
do {
do {
/* traverse internal nodes */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
int node_addr_child1, traverse_mask;
float dist[2];
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#if !defined(__KERNEL_SSE2__)
traverse_mask = NODE_INTERSECT(kg,
@@ -143,10 +143,10 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
node_addr = __float_as_int(cnodes.z);
node_addr_child1 = __float_as_int(cnodes.w);
if(traverse_mask == 3) {
if (traverse_mask == 3) {
/* Both children were intersected, push the farther one. */
bool is_closest_child1 = (dist[1] < dist[0]);
if(is_closest_child1) {
if (is_closest_child1) {
int tmp = node_addr;
node_addr = node_addr_child1;
node_addr_child1 = tmp;
@@ -158,10 +158,10 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
else {
/* One child was intersected. */
if(traverse_mask == 2) {
if (traverse_mask == 2) {
node_addr = node_addr_child1;
}
else if(traverse_mask == 0) {
else if (traverse_mask == 0) {
/* Neither child was intersected. */
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
@@ -170,12 +170,12 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
/* if node is leaf, fetch triangle list */
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
if (node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(prim_addr >= 0) {
if (prim_addr >= 0) {
#endif
const int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
@@ -186,25 +186,21 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
--stack_ptr;
/* primitive intersection */
switch(type & PRIMITIVE_ALL) {
switch (type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* intersect ray against primitive */
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
hit = triangle_intersect(kg,
isect_array,
P,
dir,
visibility,
object,
prim_addr);
if(hit) {
hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
if (hit) {
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
@@ -212,17 +208,18 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
num_hits_in_instance++;
#endif
isect_array->t = isect_t;
if(num_hits == max_hits) {
if (num_hits == max_hits) {
#if BVH_FEATURE(BVH_INSTANCING)
if(object != OBJECT_NONE) {
if (object != OBJECT_NONE) {
# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
Transform itfm = object_fetch_transform(
kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
# endif
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
}
#endif /* BVH_FEATURE(BVH_INSTANCING) */
@@ -235,23 +232,19 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
/* intersect ray against primitive */
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
hit = motion_triangle_intersect(kg,
isect_array,
P,
dir,
ray->time,
visibility,
object,
prim_addr);
if(hit) {
hit = motion_triangle_intersect(
kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
if (hit) {
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
@@ -259,17 +252,18 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
num_hits_in_instance++;
# endif
isect_array->t = isect_t;
if(num_hits == max_hits) {
if (num_hits == max_hits) {
# if BVH_FEATURE(BVH_INSTANCING)
if(object != OBJECT_NONE) {
if (object != OBJECT_NONE) {
# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
Transform itfm = object_fetch_transform(
kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
# endif
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
}
# endif /* BVH_FEATURE(BVH_INSTANCING) */
@@ -288,11 +282,12 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* instance push */
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
isect_t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
# else
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
@@ -328,14 +323,14 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stack_ptr >= 0) {
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
if(num_hits_in_instance) {
if (num_hits_in_instance) {
float t_fac;
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
@@ -343,8 +338,8 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
# endif
/* Scale isect->t to adjust for instancing. */
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
}
else {
@@ -376,7 +371,7 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
--stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
return num_hits;
}
@@ -387,29 +382,17 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
const uint max_hits,
const uint visibility)
{
switch(kernel_data.bvh.bvh_layout) {
switch (kernel_data.bvh.bvh_layout) {
#ifdef __KERNEL_AVX2__
case BVH_LAYOUT_BVH8:
return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
ray,
isect_array,
max_hits,
visibility);
return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, max_hits, visibility);
#endif
#ifdef __QBVH__
case BVH_LAYOUT_BVH4:
return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
ray,
isect_array,
max_hits,
visibility);
return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, max_hits, visibility);
#endif
case BVH_LAYOUT_BVH2:
return BVH_FUNCTION_FULL_NAME(BVH)(kg,
ray,
isect_array,
max_hits,
visibility);
return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility);
}
kernel_assert(!"Should not happen");
return 0;

View File

@@ -49,23 +49,16 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
int object = OBJECT_NONE;
float isect_t = ray->t;
if(local_isect != NULL) {
if (local_isect != NULL) {
local_isect->num_hits = 0;
}
kernel_assert((local_isect == NULL) == (max_hits == 0));
const int object_flag = kernel_tex_fetch(__object_flag, local_object);
if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
isect_t = bvh_instance_motion_push(kg,
local_object,
ray,
&P,
&dir,
&idir,
isect_t,
&ob_itfm);
isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
#else
isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
#endif
@@ -79,7 +72,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
#ifdef __KERNEL_AVX2__
float3 P_idir = P*idir;
float3 P_idir = P * idir;
avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
@@ -89,15 +82,13 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
do {
/* Traverse internal nodes. */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
avxf dist;
int child_mask = NODE_INTERSECT(kg,
tnear,
@@ -112,27 +103,31 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);
if(child_mask != 0) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
if (child_mask != 0) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
avxf cnodes;
#if BVH_FEATURE(BVH_HAIR)
if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
}
else
#endif
{
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
}
/* One child is hit, continue with that child. */
int r = __bscf(child_mask);
if(child_mask == 0) {
if (child_mask == 0) {
node_addr = __float_as_int(cnodes[r]);
continue;
}
@@ -141,12 +136,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
* closer child.
*/
int c0 = __float_as_int(cnodes[r]);
float d0 = ((float*)&dist)[r];
float d0 = ((float *)&dist)[r];
r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
if(child_mask == 0) {
if(d1 < d0) {
float d1 = ((float *)&dist)[r];
if (child_mask == 0) {
if (d1 < d0) {
node_addr = c1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
@@ -181,8 +176,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
if(child_mask == 0) {
float d2 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
@@ -200,8 +195,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
if(child_mask == 0) {
float d3 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c3;
@@ -233,8 +228,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c4 = __float_as_int(cnodes[r]);
float d4 = ((float*)&dist)[r];
if(child_mask == 0) {
float d4 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c4;
@@ -253,8 +248,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c5 = __float_as_int(cnodes[r]);
float d5 = ((float*)&dist)[r];
if(child_mask == 0) {
float d5 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c5;
@@ -288,8 +283,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c6 = __float_as_int(cnodes[r]);
float d6 = ((float*)&dist)[r];
if(child_mask == 0) {
float d6 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c6;
@@ -310,7 +305,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c7 = __float_as_int(cnodes[r]);
float d7 = ((float*)&dist)[r];
float d7 = ((float *)&dist)[r];
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c7;
@@ -337,8 +332,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
}
/* If node is leaf, fetch triangle list. */
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
if (node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
int prim_addr2 = __float_as_int(leaf.y);
@@ -349,12 +344,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
--stack_ptr;
/* Primitive intersection. */
switch(type & PRIMITIVE_ALL) {
switch (type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* Intersect ray against primitive, */
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(triangle_intersect_local(kg,
if (triangle_intersect_local(kg,
local_isect,
P,
dir,
@@ -363,8 +358,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
prim_addr,
isect_t,
lcg_state,
max_hits))
{
max_hits)) {
return true;
}
}
@@ -373,9 +367,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
/* Intersect ray against primitive. */
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect_local(kg,
if (motion_triangle_intersect_local(kg,
local_isect,
P,
dir,
@@ -385,8 +379,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
prim_addr,
isect_t,
lcg_state,
max_hits))
{
max_hits)) {
return true;
}
}
@@ -397,8 +390,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
break;
}
}
} while(node_addr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
return false;
}

View File

@@ -21,7 +21,7 @@ struct OBVHStackItem {
float dist;
};
ccl_device_inline void obvh_near_far_idx_calc(const float3& idir,
ccl_device_inline void obvh_near_far_idx_calc(const float3 &idir,
int *ccl_restrict near_x,
int *ccl_restrict near_y,
int *ccl_restrict near_z,
@@ -31,9 +31,12 @@ ccl_device_inline void obvh_near_far_idx_calc(const float3& idir,
{
#ifdef __KERNEL_SSE__
*near_x = 0; *far_x = 1;
*near_y = 2; *far_y = 3;
*near_z = 4; *far_z = 5;
*near_x = 0;
*far_x = 1;
*near_y = 2;
*far_y = 3;
*near_z = 4;
*far_z = 5;
const size_t mask = movemask(ssef(idir.m128));
@@ -41,18 +44,41 @@ ccl_device_inline void obvh_near_far_idx_calc(const float3& idir,
const int mask_y = (mask & 2) >> 1;
const int mask_z = (mask & 4) >> 2;
*near_x += mask_x; *far_x -= mask_x;
*near_y += mask_y; *far_y -= mask_y;
*near_z += mask_z; *far_z -= mask_z;
*near_x += mask_x;
*far_x -= mask_x;
*near_y += mask_y;
*far_y -= mask_y;
*near_z += mask_z;
*far_z -= mask_z;
#else
if(idir.x >= 0.0f) { *near_x = 0; *far_x = 1; } else { *near_x = 1; *far_x = 0; }
if(idir.y >= 0.0f) { *near_y = 2; *far_y = 3; } else { *near_y = 3; *far_y = 2; }
if(idir.z >= 0.0f) { *near_z = 4; *far_z = 5; } else { *near_z = 5; *far_z = 4; }
if (idir.x >= 0.0f) {
*near_x = 0;
*far_x = 1;
}
else {
*near_x = 1;
*far_x = 0;
}
if (idir.y >= 0.0f) {
*near_y = 2;
*far_y = 3;
}
else {
*near_y = 3;
*far_y = 2;
}
if (idir.z >= 0.0f) {
*near_z = 4;
*far_z = 5;
}
else {
*near_z = 5;
*far_z = 4;
}
#endif
}
ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a,
OBVHStackItem *ccl_restrict b)
ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a, OBVHStackItem *ccl_restrict b)
{
OBVHStackItem tmp = *a;
*a = *b;
@@ -63,9 +89,15 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
OBVHStackItem *ccl_restrict s2,
OBVHStackItem *ccl_restrict s3)
{
if(s2->dist < s1->dist) { obvh_item_swap(s2, s1); }
if(s3->dist < s2->dist) { obvh_item_swap(s3, s2); }
if(s2->dist < s1->dist) { obvh_item_swap(s2, s1); }
if (s2->dist < s1->dist) {
obvh_item_swap(s2, s1);
}
if (s3->dist < s2->dist) {
obvh_item_swap(s3, s2);
}
if (s2->dist < s1->dist) {
obvh_item_swap(s2, s1);
}
}
ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
@@ -73,11 +105,21 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
OBVHStackItem *ccl_restrict s3,
OBVHStackItem *ccl_restrict s4)
{
if(s2->dist < s1->dist) { obvh_item_swap(s2, s1); }
if(s4->dist < s3->dist) { obvh_item_swap(s4, s3); }
if(s3->dist < s1->dist) { obvh_item_swap(s3, s1); }
if(s4->dist < s2->dist) { obvh_item_swap(s4, s2); }
if(s3->dist < s2->dist) { obvh_item_swap(s3, s2); }
if (s2->dist < s1->dist) {
obvh_item_swap(s2, s1);
}
if (s4->dist < s3->dist) {
obvh_item_swap(s4, s3);
}
if (s3->dist < s1->dist) {
obvh_item_swap(s3, s1);
}
if (s4->dist < s2->dist) {
obvh_item_swap(s4, s2);
}
if (s3->dist < s2->dist) {
obvh_item_swap(s3, s2);
}
}
ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
@@ -87,13 +129,13 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
OBVHStackItem *ccl_restrict s5)
{
obvh_stack_sort(s1, s2, s3, s4);
if(s5->dist < s4->dist) {
if (s5->dist < s4->dist) {
obvh_item_swap(s4, s5);
if(s4->dist < s3->dist) {
if (s4->dist < s3->dist) {
obvh_item_swap(s3, s4);
if(s3->dist < s2->dist) {
if (s3->dist < s2->dist) {
obvh_item_swap(s2, s3);
if(s2->dist < s1->dist) {
if (s2->dist < s1->dist) {
obvh_item_swap(s1, s2);
}
}
@@ -109,15 +151,15 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
OBVHStackItem *ccl_restrict s6)
{
obvh_stack_sort(s1, s2, s3, s4, s5);
if(s6->dist < s5->dist) {
if (s6->dist < s5->dist) {
obvh_item_swap(s5, s6);
if(s5->dist < s4->dist) {
if (s5->dist < s4->dist) {
obvh_item_swap(s4, s5);
if(s4->dist < s3->dist) {
if (s4->dist < s3->dist) {
obvh_item_swap(s3, s4);
if(s3->dist < s2->dist) {
if (s3->dist < s2->dist) {
obvh_item_swap(s2, s3);
if(s2->dist < s1->dist) {
if (s2->dist < s1->dist) {
obvh_item_swap(s1, s2);
}
}
@@ -135,17 +177,17 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
OBVHStackItem *ccl_restrict s7)
{
obvh_stack_sort(s1, s2, s3, s4, s5, s6);
if(s7->dist < s6->dist) {
if (s7->dist < s6->dist) {
obvh_item_swap(s6, s7);
if(s6->dist < s5->dist) {
if (s6->dist < s5->dist) {
obvh_item_swap(s5, s6);
if(s5->dist < s4->dist) {
if (s5->dist < s4->dist) {
obvh_item_swap(s4, s5);
if(s4->dist < s3->dist) {
if (s4->dist < s3->dist) {
obvh_item_swap(s3, s4);
if(s3->dist < s2->dist) {
if (s3->dist < s2->dist) {
obvh_item_swap(s2, s3);
if(s2->dist < s1->dist) {
if (s2->dist < s1->dist) {
obvh_item_swap(s1, s2);
}
}
@@ -165,19 +207,19 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
OBVHStackItem *ccl_restrict s8)
{
obvh_stack_sort(s1, s2, s3, s4, s5, s6, s7);
if(s8->dist < s7->dist) {
if (s8->dist < s7->dist) {
obvh_item_swap(s7, s8);
if(s7->dist < s6->dist) {
if (s7->dist < s6->dist) {
obvh_item_swap(s6, s7);
if(s6->dist < s5->dist) {
if (s6->dist < s5->dist) {
obvh_item_swap(s5, s6);
if(s5->dist < s4->dist) {
if (s5->dist < s4->dist) {
obvh_item_swap(s4, s5);
if(s4->dist < s3->dist) {
if (s4->dist < s3->dist) {
obvh_item_swap(s3, s4);
if(s3->dist < s2->dist) {
if (s3->dist < s2->dist) {
obvh_item_swap(s2, s3);
if(s2->dist < s1->dist) {
if (s2->dist < s1->dist) {
obvh_item_swap(s1, s2);
}
}
@@ -191,14 +233,14 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
/* Axis-aligned nodes intersection */
ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
const avxf& isect_near,
const avxf& isect_far,
const avxf &isect_near,
const avxf &isect_far,
#ifdef __KERNEL_AVX2__
const avx3f& org_idir,
const avx3f &org_idir,
#else
const avx3f& org,
const avx3f &org,
#endif
const avx3f& idir,
const avx3f &idir,
const int near_x,
const int near_y,
const int near_z,
@@ -210,12 +252,18 @@ ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg
{
const int offset = node_addr + 2;
#ifdef __KERNEL_AVX2__
const avxf tnear_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+near_x*2), idir.x, org_idir.x);
const avxf tnear_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+near_y*2), idir.y, org_idir.y);
const avxf tnear_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+near_z*2), idir.z, org_idir.z);
const avxf tfar_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+far_x*2), idir.x, org_idir.x);
const avxf tfar_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+far_y*2), idir.y, org_idir.y);
const avxf tfar_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+far_z*2), idir.z, org_idir.z);
const avxf tnear_x = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, org_idir.x);
const avxf tnear_y = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, org_idir.y);
const avxf tnear_z = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, org_idir.z);
const avxf tfar_x = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, org_idir.x);
const avxf tfar_y = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, org_idir.y);
const avxf tfar_z = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, org_idir.z);
const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
@@ -228,16 +276,15 @@ ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg
#endif
}
ccl_device_inline int obvh_aligned_node_intersect_robust(
KernelGlobals *ccl_restrict kg,
const avxf& isect_near,
const avxf& isect_far,
ccl_device_inline int obvh_aligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
const avxf &isect_near,
const avxf &isect_far,
#ifdef __KERNEL_AVX2__
const avx3f& P_idir,
const avx3f &P_idir,
#else
const avx3f& P,
const avx3f &P,
#endif
const avx3f& idir,
const avx3f &idir,
const int near_x,
const int near_y,
const int near_z,
@@ -250,18 +297,24 @@ ccl_device_inline int obvh_aligned_node_intersect_robust(
{
const int offset = node_addr + 2;
#ifdef __KERNEL_AVX2__
const avxf tnear_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, P_idir.x);
const avxf tfar_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, P_idir.x);
const avxf tnear_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, P_idir.y);
const avxf tfar_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, P_idir.y);
const avxf tnear_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, P_idir.z);
const avxf tfar_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, P_idir.z);
const avxf tnear_x = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, P_idir.x);
const avxf tfar_x = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, P_idir.x);
const avxf tnear_y = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, P_idir.y);
const avxf tfar_y = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, P_idir.y);
const avxf tnear_z = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, P_idir.z);
const avxf tfar_z = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, P_idir.z);
const float round_down = 1.0f - difl;
const float round_up = 1.0f + difl;
const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
const avxb vmask = round_down*tnear <= round_up*tfar;
const avxb vmask = round_down * tnear <= round_up * tfar;
int mask = (int)movemask(vmask);
*dist = tnear;
return mask;
@@ -272,16 +325,15 @@ ccl_device_inline int obvh_aligned_node_intersect_robust(
/* Unaligned nodes intersection */
ccl_device_inline int obvh_unaligned_node_intersect(
KernelGlobals *ccl_restrict kg,
const avxf& isect_near,
const avxf& isect_far,
ccl_device_inline int obvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
const avxf &isect_near,
const avxf &isect_far,
#ifdef __KERNEL_AVX2__
const avx3f& org_idir,
const avx3f &org_idir,
#endif
const avx3f& org,
const avx3f& dir,
const avx3f& idir,
const avx3f &org,
const avx3f &dir,
const avx3f &idir,
const int near_x,
const int near_y,
const int near_z,
@@ -292,41 +344,38 @@ ccl_device_inline int obvh_unaligned_node_intersect(
avxf *ccl_restrict dist)
{
const int offset = node_addr;
const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+2);
const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+4);
const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+6);
const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2);
const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4);
const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6);
const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+8);
const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+10);
const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+12);
const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8);
const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10);
const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12);
const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+14);
const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+16);
const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+18);
const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14);
const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16);
const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18);
const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+20);
const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+22);
const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+24);
const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20);
const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22);
const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24);
const avxf aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
const avxf aligned_P_x = org.x*tfm_x_x + org.y*tfm_x_y + org.z*tfm_x_z + tfm_t_x,
aligned_P_y = org.x*tfm_y_x + org.y*tfm_y_y + org.z*tfm_y_z + tfm_t_y,
aligned_P_z = org.x*tfm_z_x + org.y*tfm_z_y + org.z*tfm_z_z + tfm_t_z;
const avxf aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
const avxf neg_one(-1.0f);
const avxf nrdir_x = neg_one / aligned_dir_x,
nrdir_y = neg_one / aligned_dir_y,
const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
nrdir_z = neg_one / aligned_dir_z;
const avxf tlower_x = aligned_P_x * nrdir_x,
tlower_y = aligned_P_y * nrdir_y,
const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
tlower_z = aligned_P_z * nrdir_z;
const avxf tupper_x = tlower_x - nrdir_x,
tupper_y = tlower_y - nrdir_y,
const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
tupper_z = tlower_z - nrdir_z;
const avxf tnear_x = min(tlower_x, tupper_x);
@@ -342,16 +391,15 @@ ccl_device_inline int obvh_unaligned_node_intersect(
return movemask(vmask);
}
ccl_device_inline int obvh_unaligned_node_intersect_robust(
KernelGlobals *ccl_restrict kg,
const avxf& isect_near,
const avxf& isect_far,
ccl_device_inline int obvh_unaligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
const avxf &isect_near,
const avxf &isect_far,
#ifdef __KERNEL_AVX2__
const avx3f& P_idir,
const avx3f &P_idir,
#endif
const avx3f& P,
const avx3f& dir,
const avx3f& idir,
const avx3f &P,
const avx3f &dir,
const avx3f &idir,
const int near_x,
const int near_y,
const int near_z,
@@ -363,41 +411,38 @@ ccl_device_inline int obvh_unaligned_node_intersect_robust(
avxf *ccl_restrict dist)
{
const int offset = node_addr;
const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+2);
const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+4);
const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+6);
const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2);
const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4);
const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6);
const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+8);
const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+10);
const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+12);
const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8);
const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10);
const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12);
const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+14);
const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+16);
const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+18);
const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14);
const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16);
const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18);
const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+20);
const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+22);
const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+24);
const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20);
const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22);
const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24);
const avxf aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
const avxf aligned_P_x = P.x*tfm_x_x + P.y*tfm_x_y + P.z*tfm_x_z + tfm_t_x,
aligned_P_y = P.x*tfm_y_x + P.y*tfm_y_y + P.z*tfm_y_z + tfm_t_y,
aligned_P_z = P.x*tfm_z_x + P.y*tfm_z_y + P.z*tfm_z_z + tfm_t_z;
const avxf aligned_P_x = P.x * tfm_x_x + P.y * tfm_x_y + P.z * tfm_x_z + tfm_t_x,
aligned_P_y = P.x * tfm_y_x + P.y * tfm_y_y + P.z * tfm_y_z + tfm_t_y,
aligned_P_z = P.x * tfm_z_x + P.y * tfm_z_y + P.z * tfm_z_z + tfm_t_z;
const avxf neg_one(-1.0f);
const avxf nrdir_x = neg_one / aligned_dir_x,
nrdir_y = neg_one / aligned_dir_y,
const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
nrdir_z = neg_one / aligned_dir_z;
const avxf tlower_x = aligned_P_x * nrdir_x,
tlower_y = aligned_P_y * nrdir_y,
const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
tlower_z = aligned_P_z * nrdir_z;
const avxf tupper_x = tlower_x - nrdir_x,
tupper_y = tlower_y - nrdir_y,
const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
tupper_z = tlower_z - nrdir_z;
const float round_down = 1.0f - difl;
@@ -412,7 +457,7 @@ ccl_device_inline int obvh_unaligned_node_intersect_robust(
const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
const avxb vmask = round_down*tnear <= round_up*tfar;
const avxb vmask = round_down * tnear <= round_up * tfar;
*dist = tnear;
return movemask(vmask);
}
@@ -422,16 +467,15 @@ ccl_device_inline int obvh_unaligned_node_intersect_robust(
* They'll check node type and call appropriate intersection code.
*/
ccl_device_inline int obvh_node_intersect(
KernelGlobals *ccl_restrict kg,
const avxf& isect_near,
const avxf& isect_far,
ccl_device_inline int obvh_node_intersect(KernelGlobals *ccl_restrict kg,
const avxf &isect_near,
const avxf &isect_far,
#ifdef __KERNEL_AVX2__
const avx3f& org_idir,
const avx3f &org_idir,
#endif
const avx3f& org,
const avx3f& dir,
const avx3f& idir,
const avx3f &org,
const avx3f &dir,
const avx3f &idir,
const int near_x,
const int near_y,
const int near_z,
@@ -443,7 +487,7 @@ ccl_device_inline int obvh_node_intersect(
{
const int offset = node_addr;
const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return obvh_unaligned_node_intersect(kg,
isect_near,
isect_far,
@@ -453,8 +497,12 @@ ccl_device_inline int obvh_node_intersect(
org,
dir,
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
dist);
}
@@ -468,23 +516,26 @@ ccl_device_inline int obvh_node_intersect(
org,
#endif
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
dist);
}
}
ccl_device_inline int obvh_node_intersect_robust(
KernelGlobals *ccl_restrict kg,
const avxf& isect_near,
const avxf& isect_far,
ccl_device_inline int obvh_node_intersect_robust(KernelGlobals *ccl_restrict kg,
const avxf &isect_near,
const avxf &isect_far,
#ifdef __KERNEL_AVX2__
const avx3f& P_idir,
const avx3f &P_idir,
#endif
const avx3f& P,
const avx3f& dir,
const avx3f& idir,
const avx3f &P,
const avx3f &dir,
const avx3f &idir,
const int near_x,
const int near_y,
const int near_z,
@@ -497,7 +548,7 @@ ccl_device_inline int obvh_node_intersect_robust(
{
const int offset = node_addr;
const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return obvh_unaligned_node_intersect_robust(kg,
isect_near,
isect_far,
@@ -507,8 +558,12 @@ ccl_device_inline int obvh_node_intersect_robust(
P,
dir,
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
difl,
dist);
@@ -523,8 +578,12 @@ ccl_device_inline int obvh_node_intersect_robust(
P,
#endif
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
difl,
dist);

View File

@@ -76,7 +76,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
#ifdef __KERNEL_AVX2__
float3 P_idir = P*idir;
float3 P_idir = P * idir;
avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
@@ -86,25 +86,22 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
do {
/* Traverse internal nodes. */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
(void) inodes;
while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
(void)inodes;
if(false
if (false
#ifdef __VISIBILITY_FLAG__
|| ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0)
#endif
#if BVH_FEATURE(BVH_MOTION)
|| UNLIKELY(ray->time < inodes.y)
|| UNLIKELY(ray->time > inodes.z)
|| UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
#endif
) {
/* Pop. */
@@ -121,33 +118,37 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
P_idir4,
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
//#if !defined(__KERNEL_AVX2__)
//#if !defined(__KERNEL_AVX2__)
org4,
#endif
#if BVH_FEATURE(BVH_HAIR)
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);
if(child_mask != 0) {
if (child_mask != 0) {
avxf cnodes;
#if BVH_FEATURE(BVH_HAIR)
if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
}
else
#endif
{
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
}
/* One child is hit, continue with that child. */
int r = __bscf(child_mask);
if(child_mask == 0) {
if (child_mask == 0) {
node_addr = __float_as_int(cnodes[r]);
continue;
}
@@ -156,12 +157,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
* closer child.
*/
int c0 = __float_as_int(cnodes[r]);
float d0 = ((float*)&dist)[r];
float d0 = ((float *)&dist)[r];
r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
if(child_mask == 0) {
if(d1 < d0) {
float d1 = ((float *)&dist)[r];
if (child_mask == 0) {
if (d1 < d0) {
node_addr = c1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
@@ -196,8 +197,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
if(child_mask == 0) {
float d2 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
@@ -215,8 +216,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
if(child_mask == 0) {
float d3 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c3;
@@ -248,8 +249,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c4 = __float_as_int(cnodes[r]);
float d4 = ((float*)&dist)[r];
if(child_mask == 0) {
float d4 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c4;
@@ -269,8 +270,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c5 = __float_as_int(cnodes[r]);
float d5 = ((float*)&dist)[r];
if(child_mask == 0) {
float d5 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c5;
@@ -304,8 +305,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c6 = __float_as_int(cnodes[r]);
float d6 = ((float*)&dist)[r];
if(child_mask == 0) {
float d6 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c6;
@@ -327,7 +328,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c7 = __float_as_int(cnodes[r]);
float d7 = ((float*)&dist)[r];
float d7 = ((float *)&dist)[r];
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c7;
@@ -354,10 +355,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
}
/* If node is leaf, fetch triangle list. */
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
if (node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
if ((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
/* Pop. */
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
@@ -368,7 +369,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(prim_addr >= 0) {
if (prim_addr >= 0) {
#endif
int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
@@ -379,20 +380,16 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
--stack_ptr;
/* Primitive intersection. */
if(p_type == PRIMITIVE_TRIANGLE) {
if (p_type == PRIMITIVE_TRIANGLE) {
int prim_count = prim_addr2 - prim_addr;
if(prim_count < 3) {
while(prim_addr < prim_addr2) {
kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
int hit = triangle_intersect(kg,
isect_array,
P,
dir,
PATH_RAY_SHADOW,
object,
prim_addr);
if (prim_count < 3) {
while (prim_addr < prim_addr2) {
kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) ==
p_type);
int hit = triangle_intersect(
kg, isect_array, P, dir, PATH_RAY_SHADOW, object, prim_addr);
/* Shadow ray early termination. */
if(hit) {
if (hit) {
/* detect if this surface has a shader with transparent shadows */
/* todo: optimize so primitive visibility flag indicates if
@@ -401,7 +398,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
int shader = 0;
#ifdef __HAIR__
if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
#endif
{
shader = kernel_tex_fetch(__tri_shader, prim);
@@ -415,11 +412,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
/* if no transparent shadows, all light is blocked */
if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
return true;
}
/* if maximum number of hits reached, block all light */
else if(*num_hits == max_hits) {
else if (*num_hits == max_hits) {
return true;
}
@@ -435,13 +432,15 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
prim_addr++;
} //while
} else {
kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) == p_type);
}
else {
kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) ==
p_type);
#if BVH_FEATURE(BVH_INSTANCING)
int* nhiptr = &num_hits_in_instance;
int *nhiptr = &num_hits_in_instance;
#else
int nhi= 0;
int nhi = 0;
int *nhiptr = &nhi;
#endif
@@ -457,20 +456,20 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
max_hits,
nhiptr,
isect_t);
if(result == 2) {
if (result == 2) {
return true;
}
} // prim_count
} // PRIMITIVE_TRIANGLE
else {
while(prim_addr < prim_addr2) {
while (prim_addr < prim_addr2) {
kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
#ifdef __SHADOW_TRICKS__
uint tri_object = (object == OBJECT_NONE)
? kernel_tex_fetch(__prim_object, prim_addr)
: object;
if(tri_object == skip_object) {
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
if (tri_object == skip_object) {
++prim_addr;
continue;
}
@@ -482,18 +481,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
* isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
* might give a few % performance improvement */
switch(p_type) {
switch (p_type) {
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
hit = motion_triangle_intersect(kg,
isect_array,
P,
dir,
ray->time,
PATH_RAY_SHADOW,
object,
prim_addr);
hit = motion_triangle_intersect(
kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, prim_addr);
break;
}
#endif
@@ -501,7 +494,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = cardinal_curve_intersect(kg,
isect_array,
P,
@@ -512,7 +505,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
ray->time,
curve_type,
NULL,
0, 0);
0,
0);
}
else {
hit = curve_intersect(kg,
@@ -525,7 +519,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
ray->time,
curve_type,
NULL,
0, 0);
0,
0);
}
break;
}
@@ -537,7 +532,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
}
/* Shadow ray early termination. */
if(hit) {
if (hit) {
/* detect if this surface has a shader with transparent shadows */
/* todo: optimize so primitive visibility flag indicates if
@@ -546,7 +541,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
int shader = 0;
#ifdef __HAIR__
if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
#endif
{
shader = kernel_tex_fetch(__tri_shader, prim);
@@ -560,11 +555,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
/* if no transparent shadows, all light is blocked */
if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
return true;
}
/* if maximum number of hits reached, block all light */
else if(*num_hits == max_hits) {
else if (*num_hits == max_hits) {
return true;
}
@@ -579,13 +574,13 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
}
prim_addr++;
}//while prim
} //while prim
}
}
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* Instance push. */
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
# if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
@@ -596,16 +591,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
num_hits_in_instance = 0;
isect_array->t = isect_t;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect_t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir = P * idir;
P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
@@ -617,18 +610,17 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
node_addr = kernel_tex_fetch(__object_node, object);
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stack_ptr >= 0) {
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
if(num_hits_in_instance) {
if (num_hits_in_instance) {
float t_fac;
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
@@ -636,8 +628,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
# endif
/* Scale isect->t to adjust for instancing. */
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
}
else {
@@ -651,16 +643,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
isect_t = tmax;
isect_array->t = isect_t;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect_t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir = P * idir;
P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
@@ -672,7 +662,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
--stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
return false;
}

View File

@@ -37,11 +37,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
Intersection *isect,
const uint visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
,uint *lcg_state,
,
uint *lcg_state,
float difl,
float extmax
#endif
)
)
{
/* Traversal stack in CUDA thread-local memory. */
OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
@@ -77,7 +78,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
#ifdef __KERNEL_AVX2__
float3 P_idir = P*idir;
float3 P_idir = P * idir;
avx3f P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
@@ -87,27 +88,23 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
do {
/* Traverse internal nodes. */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
(void) inodes;
while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
(void)inodes;
if(UNLIKELY(node_dist > isect->t)
if (UNLIKELY(node_dist > isect->t)
#if BVH_FEATURE(BVH_MOTION)
|| UNLIKELY(ray->time < inodes.y)
|| UNLIKELY(ray->time > inodes.z)
|| UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
#endif
#ifdef __VISIBILITY_FLAG__
|| (__float_as_uint(inodes.x) & visibility) == 0
#endif
)
{
) {
/* Pop. */
node_addr = traversal_stack[stack_ptr].addr;
node_dist = traversal_stack[stack_ptr].dist;
@@ -121,7 +118,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
BVH_DEBUG_NEXT_NODE();
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
if(difl != 0.0f) {
if (difl != 0.0f) {
/* NOTE: We extend all the child BB instead of fetching
* and checking visibility flags for each of the,
*
@@ -140,8 +137,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
dir4,
# endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
difl,
&dist);
@@ -162,32 +163,36 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);
}
if(child_mask != 0) {
if (child_mask != 0) {
avxf cnodes;
/* TODO(sergey): Investigate whether moving cnodes upwards
* gives a speedup (will be different cache pattern but will
* avoid extra check here).
*/
#if BVH_FEATURE(BVH_HAIR)
if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
}
else
#endif
{
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
}
/* One child is hit, continue with that child. */
int r = __bscf(child_mask);
float d0 = ((float*)&dist)[r];
if(child_mask == 0) {
float d0 = ((float *)&dist)[r];
if (child_mask == 0) {
node_addr = __float_as_int(cnodes[r]);
node_dist = d0;
continue;
@@ -199,9 +204,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
int c0 = __float_as_int(cnodes[r]);
r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
if(child_mask == 0) {
if(d1 < d0) {
float d1 = ((float *)&dist)[r];
if (child_mask == 0) {
if (d1 < d0) {
node_addr = c1;
node_dist = d1;
++stack_ptr;
@@ -238,8 +243,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
if(child_mask == 0) {
float d2 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
@@ -258,8 +263,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
if(child_mask == 0) {
float d3 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c3;
@@ -292,8 +297,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c4 = __float_as_int(cnodes[r]);
float d4 = ((float*)&dist)[r];
if(child_mask == 0) {
float d4 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c4;
@@ -314,8 +319,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c5 = __float_as_int(cnodes[r]);
float d5 = ((float*)&dist)[r];
if(child_mask == 0) {
float d5 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c5;
@@ -350,8 +355,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c6 = __float_as_int(cnodes[r]);
float d6 = ((float*)&dist)[r];
if(child_mask == 0) {
float d6 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c6;
@@ -374,7 +379,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c7 = __float_as_int(cnodes[r]);
float d7 = ((float*)&dist)[r];
float d7 = ((float *)&dist)[r];
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c7;
@@ -397,21 +402,19 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
continue;
}
node_addr = traversal_stack[stack_ptr].addr;
node_dist = traversal_stack[stack_ptr].dist;
--stack_ptr;
}
/* If node is leaf, fetch triangle list. */
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
if (node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
#ifdef __VISIBILITY_FLAG__
if(UNLIKELY((node_dist > isect->t) ||
((__float_as_uint(leaf.z) & visibility) == 0)))
if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
#else
if(UNLIKELY((node_dist > isect->t)))
if (UNLIKELY((node_dist > isect->t)))
#endif
{
/* Pop. */
@@ -423,7 +426,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(prim_addr >= 0) {
if (prim_addr >= 0) {
#endif
int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
@@ -434,32 +437,25 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
--stack_ptr;
/* Primitive intersection. */
switch(type & PRIMITIVE_ALL) {
switch (type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
int prim_count = prim_addr2 - prim_addr;
if(prim_count < 3) {
for(; prim_addr < prim_addr2; prim_addr++) {
if (prim_count < 3) {
for (; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(triangle_intersect(kg,
isect,
P,
dir,
visibility,
object,
prim_addr))
{
if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
tfar = avxf(isect->t);
/* Shadow ray early termination. */
if(visibility == PATH_RAY_SHADOW_OPAQUE) {
if (visibility == PATH_RAY_SHADOW_OPAQUE) {
return true;
}
}
}//for
} //for
}
else {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(triangle_intersect8(kg,
if (triangle_intersect8(kg,
&isect,
P,
dir,
@@ -470,33 +466,25 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
0,
0,
NULL,
0.0f))
{
0.0f)) {
tfar = avxf(isect->t);
if(visibility == PATH_RAY_SHADOW_OPAQUE) {
if (visibility == PATH_RAY_SHADOW_OPAQUE) {
return true;
}
}
}//prim count
} //prim count
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect(kg,
isect,
P,
dir,
ray->time,
visibility,
object,
prim_addr))
{
if (motion_triangle_intersect(
kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
tfar = avxf(isect->t);
/* Shadow ray early termination. */
if(visibility == PATH_RAY_SHADOW_OPAQUE) {
if (visibility == PATH_RAY_SHADOW_OPAQUE) {
return true;
}
}
@@ -507,12 +495,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_HAIR)
case PRIMITIVE_CURVE:
case PRIMITIVE_MOTION_CURVE: {
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
bool hit;
if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
hit = cardinal_curve_intersect(kg,
isect,
P,
@@ -540,10 +528,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
difl,
extmax);
}
if(hit) {
if (hit) {
tfar = avxf(isect->t);
/* Shadow ray early termination. */
if(visibility == PATH_RAY_SHADOW_OPAQUE) {
if (visibility == PATH_RAY_SHADOW_OPAQUE) {
return true;
}
}
@@ -556,24 +544,23 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* Instance push. */
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
# if BVH_FEATURE(BVH_MOTION)
qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
qbvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
# else
qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
# endif
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir = P * idir;
P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
@@ -591,10 +578,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stack_ptr >= 0) {
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
@@ -604,16 +591,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir = P * idir;
P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
@@ -626,7 +611,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
--stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
}

View File

@@ -64,7 +64,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
#ifdef __KERNEL_AVX2__
float3 P_idir = P*idir;
float3 P_idir = P * idir;
avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
@@ -74,19 +74,17 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
do {
/* Traverse internal nodes. */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(inodes.x) & visibility) == 0) {
if ((__float_as_uint(inodes.x) & visibility) == 0) {
/* Pop. */
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
@@ -108,26 +106,30 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);
if(child_mask != 0) {
if (child_mask != 0) {
avxf cnodes;
#if BVH_FEATURE(BVH_HAIR)
if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
}
else
#endif
{
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
}
/* One child is hit, continue with that child. */
int r = __bscf(child_mask);
if(child_mask == 0) {
if (child_mask == 0) {
node_addr = __float_as_int(cnodes[r]);
continue;
}
@@ -136,12 +138,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
* closer child.
*/
int c0 = __float_as_int(cnodes[r]);
float d0 = ((float*)&dist)[r];
float d0 = ((float *)&dist)[r];
r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
if(child_mask == 0) {
if(d1 < d0) {
float d1 = ((float *)&dist)[r];
if (child_mask == 0) {
if (d1 < d0) {
node_addr = c1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
@@ -176,8 +178,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
if(child_mask == 0) {
float d2 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
@@ -195,8 +197,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
if(child_mask == 0) {
float d3 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c3;
@@ -228,8 +230,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c4 = __float_as_int(cnodes[r]);
float d4 = ((float*)&dist)[r];
if(child_mask == 0) {
float d4 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c4;
@@ -249,8 +251,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c5 = __float_as_int(cnodes[r]);
float d5 = ((float*)&dist)[r];
if(child_mask == 0) {
float d5 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c5;
@@ -284,8 +286,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c6 = __float_as_int(cnodes[r]);
float d6 = ((float*)&dist)[r];
if(child_mask == 0) {
float d6 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c6;
@@ -307,7 +309,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c7 = __float_as_int(cnodes[r]);
float d7 = ((float*)&dist)[r];
float d7 = ((float *)&dist)[r];
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c7;
@@ -334,10 +336,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
}
/* If node is leaf, fetch triangle list. */
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
if (node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
if((__float_as_uint(leaf.z) & visibility) == 0) {
if ((__float_as_uint(leaf.z) & visibility) == 0) {
/* Pop. */
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
@@ -347,7 +349,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(prim_addr >= 0) {
if (prim_addr >= 0) {
#endif
int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
@@ -358,14 +360,16 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
--stack_ptr;
/* Primitive intersection. */
switch(p_type) {
switch (p_type) {
case PRIMITIVE_TRIANGLE: {
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
@@ -375,16 +379,19 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, prim_addr);
motion_triangle_intersect(
kg, isect, P, dir, ray->time, visibility, object, prim_addr);
}
break;
}
@@ -394,25 +401,24 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* Instance push. */
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
isect->t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir = P * idir;
P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
@@ -434,10 +440,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stack_ptr >= 0) {
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
@@ -447,16 +453,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir = P * idir;
P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
@@ -468,7 +472,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
--stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
return (isect->prim != PRIM_NONE);
}

View File

@@ -68,7 +68,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
#ifdef __KERNEL_AVX2__
float3 P_idir = P*idir;
float3 P_idir = P * idir;
avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
@@ -78,19 +78,17 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
do {
/* Traverse internal nodes. */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
#ifdef __VISIBILITY_FLAG__
if((__float_as_uint(inodes.x) & visibility) == 0) {
if ((__float_as_uint(inodes.x) & visibility) == 0) {
/* Pop. */
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
@@ -112,26 +110,30 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);
if(child_mask != 0) {
if (child_mask != 0) {
avxf cnodes;
#if BVH_FEATURE(BVH_HAIR)
if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
}
else
#endif
{
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
}
/* One child is hit, continue with that child. */
int r = __bscf(child_mask);
if(child_mask == 0) {
if (child_mask == 0) {
node_addr = __float_as_int(cnodes[r]);
continue;
}
@@ -140,12 +142,12 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
* closer child.
*/
int c0 = __float_as_int(cnodes[r]);
float d0 = ((float*)&dist)[r];
float d0 = ((float *)&dist)[r];
r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
if(child_mask == 0) {
if(d1 < d0) {
float d1 = ((float *)&dist)[r];
if (child_mask == 0) {
if (d1 < d0) {
node_addr = c1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
@@ -180,8 +182,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
if(child_mask == 0) {
float d2 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
@@ -199,8 +201,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
if(child_mask == 0) {
float d3 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c3;
@@ -232,8 +234,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c4 = __float_as_int(cnodes[r]);
float d4 = ((float*)&dist)[r];
if(child_mask == 0) {
float d4 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c4;
@@ -253,8 +255,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c5 = __float_as_int(cnodes[r]);
float d5 = ((float*)&dist)[r];
if(child_mask == 0) {
float d5 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c5;
@@ -288,8 +290,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c6 = __float_as_int(cnodes[r]);
float d6 = ((float*)&dist)[r];
if(child_mask == 0) {
float d6 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c6;
@@ -311,7 +313,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c7 = __float_as_int(cnodes[r]);
float d7 = ((float*)&dist)[r];
float d7 = ((float *)&dist)[r];
++stack_ptr;
kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
traversal_stack[stack_ptr].addr = c7;
@@ -338,10 +340,10 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
}
/* If node is leaf, fetch triangle list. */
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
if (node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
if((__float_as_uint(leaf.z) & visibility) == 0) {
if ((__float_as_uint(leaf.z) & visibility) == 0) {
/* Pop. */
node_addr = traversal_stack[stack_ptr].addr;
--stack_ptr;
@@ -351,7 +353,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
int prim_addr = __float_as_int(leaf.x);
#if BVH_FEATURE(BVH_INSTANCING)
if(prim_addr >= 0) {
if (prim_addr >= 0) {
#endif
int prim_addr2 = __float_as_int(leaf.y);
const uint type = __float_as_int(leaf.w);
@@ -363,19 +365,21 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
--stack_ptr;
/* Primitive intersection. */
switch(p_type) {
switch (p_type) {
case PRIMITIVE_TRIANGLE: {
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
if(hit) {
if (hit) {
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
@@ -383,7 +387,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
num_hits_in_instance++;
#endif
isect_array->t = isect_t;
if(num_hits == max_hits) {
if (num_hits == max_hits) {
#if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
@@ -391,8 +395,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
# endif
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
#endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
@@ -403,17 +407,20 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
if(hit) {
hit = motion_triangle_intersect(
kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
if (hit) {
/* Move on to next entry in intersections array. */
isect_array++;
num_hits++;
@@ -421,7 +428,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
num_hits_in_instance++;
# endif
isect_array->t = isect_t;
if(num_hits == max_hits) {
if (num_hits == max_hits) {
# if BVH_FEATURE(BVH_INSTANCING)
# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
@@ -429,8 +436,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
# endif
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
# endif /* BVH_FEATURE(BVH_INSTANCING) */
return num_hits;
@@ -445,25 +452,24 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_INSTANCING)
else {
/* Instance push. */
object = kernel_tex_fetch(__prim_object, -prim_addr-1);
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
int object_flag = kernel_tex_fetch(__object_flag, object);
if(object_flag & SD_OBJECT_HAS_VOLUME) {
if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
isect_t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
# else
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect_t);
idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# if BVH_FEATURE(BVH_HAIR)
dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir = P * idir;
P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
@@ -488,14 +494,14 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
}
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
#if BVH_FEATURE(BVH_INSTANCING)
if(stack_ptr >= 0) {
if (stack_ptr >= 0) {
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
if(num_hits_in_instance) {
if (num_hits_in_instance) {
float t_fac;
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
@@ -503,8 +509,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
# endif
/* Scale isect->t to adjust for instancing. */
for(int i = 0; i < num_hits_in_instance; i++) {
(isect_array-i-1)->t *= t_fac;
for (int i = 0; i < num_hits_in_instance; i++) {
(isect_array - i - 1)->t *= t_fac;
}
}
else {
@@ -518,16 +524,14 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
isect_t = tmax;
isect_array->t = isect_t;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect_t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
# endif
idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# ifdef __KERNEL_AVX2__
P_idir = P*idir;
P_idir = P * idir;
P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
# endif
# if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
@@ -539,7 +543,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
--stack_ptr;
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
return num_hits;
}

View File

@@ -58,23 +58,16 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int object = OBJECT_NONE;
float isect_t = ray->t;
if(local_isect != NULL) {
if (local_isect != NULL) {
local_isect->num_hits = 0;
}
kernel_assert((local_isect == NULL) == (max_hits == 0));
const int object_flag = kernel_tex_fetch(__object_flag, local_object);
if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
isect_t = bvh_instance_motion_push(kg,
local_object,
ray,
&P,
&dir,
&idir,
isect_t,
&ob_itfm);
isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
#else
isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
#endif
@@ -88,7 +81,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
#ifdef __KERNEL_AVX2__
float3 P_idir = P*idir;
float3 P_idir = P * idir;
sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
#endif
#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
@@ -98,15 +91,13 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
qbvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
do {
/* Traverse internal nodes. */
while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
ssef dist;
int child_mask = NODE_INTERSECT(kg,
tnear,
@@ -121,27 +112,31 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);
if(child_mask != 0) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
if (child_mask != 0) {
float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
float4 cnodes;
#if BVH_FEATURE(BVH_HAIR)
if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
}
else
#endif
{
cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
}
/* One child is hit, continue with that child. */
int r = __bscf(child_mask);
if(child_mask == 0) {
if (child_mask == 0) {
node_addr = __float_as_int(cnodes[r]);
continue;
}
@@ -150,12 +145,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
* closer child.
*/
int c0 = __float_as_int(cnodes[r]);
float d0 = ((float*)&dist)[r];
float d0 = ((float *)&dist)[r];
r = __bscf(child_mask);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
if(child_mask == 0) {
if(d1 < d0) {
float d1 = ((float *)&dist)[r];
if (child_mask == 0) {
if (d1 < d0) {
node_addr = c1;
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
@@ -190,8 +185,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c2 = __float_as_int(cnodes[r]);
float d2 = ((float*)&dist)[r];
if(child_mask == 0) {
float d2 = ((float *)&dist)[r];
if (child_mask == 0) {
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c2;
@@ -209,7 +204,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
r = __bscf(child_mask);
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
float d3 = ((float *)&dist)[r];
++stack_ptr;
kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
traversal_stack[stack_ptr].addr = c3;
@@ -229,8 +224,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
}
/* If node is leaf, fetch triangle list. */
if(node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
if (node_addr < 0) {
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
int prim_addr = __float_as_int(leaf.x);
int prim_addr2 = __float_as_int(leaf.y);
@@ -241,12 +236,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
--stack_ptr;
/* Primitive intersection. */
switch(type & PRIMITIVE_ALL) {
switch (type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
/* Intersect ray against primitive, */
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(triangle_intersect_local(kg,
if (triangle_intersect_local(kg,
local_isect,
P,
dir,
@@ -264,9 +259,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
/* Intersect ray against primitive. */
for(; prim_addr < prim_addr2; prim_addr++) {
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect_local(kg,
if (motion_triangle_intersect_local(kg,
local_isect,
P,
dir,
@@ -287,8 +282,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
break;
}
}
} while(node_addr != ENTRYPOINT_SENTINEL);
} while(node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
} while (node_addr != ENTRYPOINT_SENTINEL);
return false;
}

View File

@@ -21,7 +21,7 @@ struct QBVHStackItem {
float dist;
};
ccl_device_inline void qbvh_near_far_idx_calc(const float3& idir,
ccl_device_inline void qbvh_near_far_idx_calc(const float3 &idir,
int *ccl_restrict near_x,
int *ccl_restrict near_y,
int *ccl_restrict near_z,
@@ -31,9 +31,12 @@ ccl_device_inline void qbvh_near_far_idx_calc(const float3& idir,
{
#ifdef __KERNEL_SSE__
*near_x = 0; *far_x = 1;
*near_y = 2; *far_y = 3;
*near_z = 4; *far_z = 5;
*near_x = 0;
*far_x = 1;
*near_y = 2;
*far_y = 3;
*near_z = 4;
*far_z = 5;
const size_t mask = movemask(ssef(idir.m128));
@@ -41,21 +44,44 @@ ccl_device_inline void qbvh_near_far_idx_calc(const float3& idir,
const int mask_y = (mask & 2) >> 1;
const int mask_z = (mask & 4) >> 2;
*near_x += mask_x; *far_x -= mask_x;
*near_y += mask_y; *far_y -= mask_y;
*near_z += mask_z; *far_z -= mask_z;
*near_x += mask_x;
*far_x -= mask_x;
*near_y += mask_y;
*far_y -= mask_y;
*near_z += mask_z;
*far_z -= mask_z;
#else
if(idir.x >= 0.0f) { *near_x = 0; *far_x = 1; } else { *near_x = 1; *far_x = 0; }
if(idir.y >= 0.0f) { *near_y = 2; *far_y = 3; } else { *near_y = 3; *far_y = 2; }
if(idir.z >= 0.0f) { *near_z = 4; *far_z = 5; } else { *near_z = 5; *far_z = 4; }
if (idir.x >= 0.0f) {
*near_x = 0;
*far_x = 1;
}
else {
*near_x = 1;
*far_x = 0;
}
if (idir.y >= 0.0f) {
*near_y = 2;
*far_y = 3;
}
else {
*near_y = 3;
*far_y = 2;
}
if (idir.z >= 0.0f) {
*near_z = 4;
*far_z = 5;
}
else {
*near_z = 5;
*far_z = 4;
}
#endif
}
/* TOOD(sergey): Investigate if using intrinsics helps for both
* stack item swap and float comparison.
*/
ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a,
QBVHStackItem *ccl_restrict b)
ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a, QBVHStackItem *ccl_restrict b)
{
QBVHStackItem tmp = *a;
*a = *b;
@@ -66,9 +92,15 @@ ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
QBVHStackItem *ccl_restrict s2,
QBVHStackItem *ccl_restrict s3)
{
if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
if (s2->dist < s1->dist) {
qbvh_item_swap(s2, s1);
}
if (s3->dist < s2->dist) {
qbvh_item_swap(s3, s2);
}
if (s2->dist < s1->dist) {
qbvh_item_swap(s2, s1);
}
}
ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
@@ -76,25 +108,35 @@ ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
QBVHStackItem *ccl_restrict s3,
QBVHStackItem *ccl_restrict s4)
{
if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); }
if(s3->dist < s1->dist) { qbvh_item_swap(s3, s1); }
if(s4->dist < s2->dist) { qbvh_item_swap(s4, s2); }
if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
if (s2->dist < s1->dist) {
qbvh_item_swap(s2, s1);
}
if (s4->dist < s3->dist) {
qbvh_item_swap(s4, s3);
}
if (s3->dist < s1->dist) {
qbvh_item_swap(s3, s1);
}
if (s4->dist < s2->dist) {
qbvh_item_swap(s4, s2);
}
if (s3->dist < s2->dist) {
qbvh_item_swap(s3, s2);
}
}
/* Axis-aligned nodes intersection */
//ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
const ssef& isect_near,
const ssef& isect_far,
const ssef &isect_near,
const ssef &isect_far,
#ifdef __KERNEL_AVX2__
const sse3f& org_idir,
const sse3f &org_idir,
#else
const sse3f& org,
const sse3f &org,
#endif
const sse3f& idir,
const sse3f &idir,
const int near_x,
const int near_y,
const int near_z,
@@ -106,26 +148,29 @@ static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
{
const int offset = node_addr + 1;
#ifdef __KERNEL_AVX2__
const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x);
const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y);
const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z);
const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, org_idir.x);
const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, org_idir.y);
const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, org_idir.z);
const ssef tnear_x = msub(
kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x), idir.x, org_idir.x);
const ssef tnear_y = msub(
kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y), idir.y, org_idir.y);
const ssef tnear_z = msub(
kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z), idir.z, org_idir.z);
const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x), idir.x, org_idir.x);
const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y), idir.y, org_idir.y);
const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z), idir.z, org_idir.z);
#else
const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - org.x) * idir.x;
const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - org.y) * idir.y;
const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - org.z) * idir.z;
const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - org.x) * idir.x;
const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - org.y) * idir.y;
const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - org.z) * idir.z;
const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x) - org.x) * idir.x;
const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y) - org.y) * idir.y;
const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z) - org.z) * idir.z;
const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x) - org.x) * idir.x;
const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y) - org.y) * idir.y;
const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z) - org.z) * idir.z;
#endif
#ifdef __KERNEL_SSE41__
const ssef tnear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, isect_near));
const ssef tfar = mini(mini(tfar_x, tfar_y), mini(tfar_z, isect_far));
const sseb vmask = cast(tnear) > cast(tfar);
int mask = (int)movemask(vmask)^0xf;
int mask = (int)movemask(vmask) ^ 0xf;
#else
const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
@@ -136,16 +181,15 @@ static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
return mask;
}
ccl_device_inline int qbvh_aligned_node_intersect_robust(
KernelGlobals *ccl_restrict kg,
const ssef& isect_near,
const ssef& isect_far,
ccl_device_inline int qbvh_aligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
const ssef &isect_near,
const ssef &isect_far,
#ifdef __KERNEL_AVX2__
const sse3f& P_idir,
const sse3f &P_idir,
#else
const sse3f& P,
const sse3f &P,
#endif
const sse3f& idir,
const sse3f &idir,
const int near_x,
const int near_y,
const int near_z,
@@ -158,42 +202,41 @@ ccl_device_inline int qbvh_aligned_node_intersect_robust(
{
const int offset = node_addr + 1;
#ifdef __KERNEL_AVX2__
const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x);
const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y);
const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z);
const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, P_idir.x);
const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, P_idir.y);
const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, P_idir.z);
const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x), idir.x, P_idir.x);
const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y), idir.y, P_idir.y);
const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z), idir.z, P_idir.z);
const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x), idir.x, P_idir.x);
const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y), idir.y, P_idir.y);
const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z), idir.z, P_idir.z);
#else
const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - P.x) * idir.x;
const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - P.y) * idir.y;
const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - P.z) * idir.z;
const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - P.x) * idir.x;
const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - P.y) * idir.y;
const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - P.z) * idir.z;
const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x) - P.x) * idir.x;
const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y) - P.y) * idir.y;
const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z) - P.z) * idir.z;
const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x) - P.x) * idir.x;
const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y) - P.y) * idir.y;
const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z) - P.z) * idir.z;
#endif
const float round_down = 1.0f - difl;
const float round_up = 1.0f + difl;
const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
const sseb vmask = round_down*tnear <= round_up*tfar;
const sseb vmask = round_down * tnear <= round_up * tfar;
*dist = tnear;
return (int)movemask(vmask);
}
/* Unaligned nodes intersection */
ccl_device_inline int qbvh_unaligned_node_intersect(
KernelGlobals *ccl_restrict kg,
const ssef& isect_near,
const ssef& isect_far,
ccl_device_inline int qbvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
const ssef &isect_near,
const ssef &isect_far,
#ifdef __KERNEL_AVX2__
const sse3f& org_idir,
const sse3f &org_idir,
#endif
const sse3f& org,
const sse3f& dir,
const sse3f& idir,
const sse3f &org,
const sse3f &dir,
const sse3f &idir,
const int near_x,
const int near_y,
const int near_z,
@@ -204,41 +247,38 @@ ccl_device_inline int qbvh_unaligned_node_intersect(
ssef *ccl_restrict dist)
{
const int offset = node_addr;
const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2);
const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3);
const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 1);
const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 2);
const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 3);
const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4);
const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5);
const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6);
const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 4);
const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 5);
const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 6);
const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7);
const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8);
const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9);
const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 7);
const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 8);
const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 9);
const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10);
const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11);
const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12);
const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 10);
const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 11);
const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 12);
const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
const ssef aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
const ssef aligned_P_x = org.x*tfm_x_x + org.y*tfm_x_y + org.z*tfm_x_z + tfm_t_x,
aligned_P_y = org.x*tfm_y_x + org.y*tfm_y_y + org.z*tfm_y_z + tfm_t_y,
aligned_P_z = org.x*tfm_z_x + org.y*tfm_z_y + org.z*tfm_z_z + tfm_t_z;
const ssef aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
const ssef nrdir_x = neg_one / aligned_dir_x,
nrdir_y = neg_one / aligned_dir_y,
const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
nrdir_z = neg_one / aligned_dir_z;
const ssef tlower_x = aligned_P_x * nrdir_x,
tlower_y = aligned_P_y * nrdir_y,
const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
tlower_z = aligned_P_z * nrdir_z;
const ssef tupper_x = tlower_x - nrdir_x,
tupper_y = tlower_y - nrdir_y,
const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
tupper_z = tlower_z - nrdir_z;
#ifdef __KERNEL_SSE41__
@@ -268,16 +308,15 @@ ccl_device_inline int qbvh_unaligned_node_intersect(
#endif
}
ccl_device_inline int qbvh_unaligned_node_intersect_robust(
KernelGlobals *ccl_restrict kg,
const ssef& isect_near,
const ssef& isect_far,
ccl_device_inline int qbvh_unaligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
const ssef &isect_near,
const ssef &isect_far,
#ifdef __KERNEL_AVX2__
const sse3f& P_idir,
const sse3f &P_idir,
#endif
const sse3f& P,
const sse3f& dir,
const sse3f& idir,
const sse3f &P,
const sse3f &dir,
const sse3f &idir,
const int near_x,
const int near_y,
const int near_z,
@@ -289,41 +328,38 @@ ccl_device_inline int qbvh_unaligned_node_intersect_robust(
ssef *ccl_restrict dist)
{
const int offset = node_addr;
const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2);
const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3);
const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 1);
const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 2);
const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 3);
const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4);
const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5);
const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6);
const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 4);
const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 5);
const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 6);
const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7);
const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8);
const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9);
const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 7);
const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 8);
const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 9);
const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10);
const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11);
const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12);
const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 10);
const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 11);
const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 12);
const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
const ssef aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
const ssef aligned_P_x = P.x*tfm_x_x + P.y*tfm_x_y + P.z*tfm_x_z + tfm_t_x,
aligned_P_y = P.x*tfm_y_x + P.y*tfm_y_y + P.z*tfm_y_z + tfm_t_y,
aligned_P_z = P.x*tfm_z_x + P.y*tfm_z_y + P.z*tfm_z_z + tfm_t_z;
const ssef aligned_P_x = P.x * tfm_x_x + P.y * tfm_x_y + P.z * tfm_x_z + tfm_t_x,
aligned_P_y = P.x * tfm_y_x + P.y * tfm_y_y + P.z * tfm_y_z + tfm_t_y,
aligned_P_z = P.x * tfm_z_x + P.y * tfm_z_y + P.z * tfm_z_z + tfm_t_z;
const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
const ssef nrdir_x = neg_one / aligned_dir_x,
nrdir_y = neg_one / aligned_dir_y,
const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
nrdir_z = neg_one / aligned_dir_z;
const ssef tlower_x = aligned_P_x * nrdir_x,
tlower_y = aligned_P_y * nrdir_y,
const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
tlower_z = aligned_P_z * nrdir_z;
const ssef tupper_x = tlower_x - nrdir_x,
tupper_y = tlower_y - nrdir_y,
const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
tupper_z = tlower_z - nrdir_z;
const float round_down = 1.0f - difl;
@@ -346,7 +382,7 @@ ccl_device_inline int qbvh_unaligned_node_intersect_robust(
#endif
const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
const sseb vmask = round_down*tnear <= round_up*tfar;
const sseb vmask = round_down * tnear <= round_up * tfar;
*dist = tnear;
return movemask(vmask);
}
@@ -356,16 +392,15 @@ ccl_device_inline int qbvh_unaligned_node_intersect_robust(
* They'll check node type and call appropriate intersection code.
*/
ccl_device_inline int qbvh_node_intersect(
KernelGlobals *ccl_restrict kg,
const ssef& isect_near,
const ssef& isect_far,
ccl_device_inline int qbvh_node_intersect(KernelGlobals *ccl_restrict kg,
const ssef &isect_near,
const ssef &isect_far,
#ifdef __KERNEL_AVX2__
const sse3f& org_idir,
const sse3f &org_idir,
#endif
const sse3f& org,
const sse3f& dir,
const sse3f& idir,
const sse3f &org,
const sse3f &dir,
const sse3f &idir,
const int near_x,
const int near_y,
const int near_z,
@@ -377,7 +412,7 @@ ccl_device_inline int qbvh_node_intersect(
{
const int offset = node_addr;
const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return qbvh_unaligned_node_intersect(kg,
isect_near,
isect_far,
@@ -387,8 +422,12 @@ ccl_device_inline int qbvh_node_intersect(
org,
dir,
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
dist);
}
@@ -402,23 +441,26 @@ ccl_device_inline int qbvh_node_intersect(
org,
#endif
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
dist);
}
}
ccl_device_inline int qbvh_node_intersect_robust(
KernelGlobals *ccl_restrict kg,
const ssef& isect_near,
const ssef& isect_far,
ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *ccl_restrict kg,
const ssef &isect_near,
const ssef &isect_far,
#ifdef __KERNEL_AVX2__
const sse3f& P_idir,
const sse3f &P_idir,
#endif
const sse3f& P,
const sse3f& dir,
const sse3f& idir,
const sse3f &P,
const sse3f &dir,
const sse3f &idir,
const int near_x,
const int near_y,
const int near_z,
@@ -431,7 +473,7 @@ ccl_device_inline int qbvh_node_intersect_robust(
{
const int offset = node_addr;
const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return qbvh_unaligned_node_intersect_robust(kg,
isect_near,
isect_far,
@@ -441,8 +483,12 @@ ccl_device_inline int qbvh_node_intersect_robust(
P,
dir,
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
difl,
dist);
@@ -457,8 +503,12 @@ ccl_device_inline int qbvh_node_intersect_robust(
P,
#endif
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
difl,
dist);

Some files were not shown because too many files have changed in this diff Show More