ClangFormat: apply to source, most of intern

Apply clang format as proposed in T53211.

For details on usage and instructions for migrating branches
without conflicts, see:

https://wiki.blender.org/wiki/Tools/ClangFormat
This commit is contained in:
2019-04-17 06:17:24 +02:00
parent b3dabc200a
commit e12c08e8d1
4481 changed files with 1230080 additions and 1155401 deletions

View File

@@ -113,7 +113,8 @@ ATOMIC_INLINE size_t atomic_sub_and_fetch_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_fetch_and_add_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_fetch_and_sub_z(size_t *p, size_t x);
ATOMIC_INLINE size_t atomic_cas_z(size_t *v, size_t old, size_t _new);
ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x); /* Uses CAS loop, see warning below. */
ATOMIC_INLINE size_t
atomic_fetch_and_update_max_z(size_t *p, size_t x); /* Uses CAS loop, see warning below. */
ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x);
ATOMIC_INLINE unsigned int atomic_sub_and_fetch_u(unsigned int *p, unsigned int x);
@@ -123,7 +124,6 @@ ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsig
ATOMIC_INLINE void *atomic_cas_ptr(void **v, void *old, void *_new);
ATOMIC_INLINE float atomic_cas_float(float *v, float old, float _new);
/* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation,

View File

@@ -111,7 +111,8 @@ ATOMIC_INLINE size_t atomic_fetch_and_update_max_z(size_t *p, size_t x)
/******************************************************************************/
/* unsigned operations. */
ATOMIC_STATIC_ASSERT(sizeof(unsigned int) == LG_SIZEOF_INT, "sizeof(unsigned int) != LG_SIZEOF_INT");
ATOMIC_STATIC_ASSERT(sizeof(unsigned int) == LG_SIZEOF_INT,
"sizeof(unsigned int) != LG_SIZEOF_INT");
ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x)
{

View File

@@ -209,7 +209,6 @@ ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b)
#endif
}
#if defined(__clang__)
# pragma GCC diagnostic pop
#endif

View File

@@ -109,8 +109,7 @@ ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
/* Unsigned */
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
{
asm volatile (
"lock; xaddq %0, %1;"
asm volatile("lock; xaddq %0, %1;"
: "+r"(x), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
@@ -120,8 +119,7 @@ ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x)
{
x = (uint64_t)(-(int64_t)x);
asm volatile (
"lock; xaddq %0, %1;"
asm volatile("lock; xaddq %0, %1;"
: "+r"(x), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
@@ -141,19 +139,14 @@ ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x)
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
{
uint64_t ret;
asm volatile (
"lock; cmpxchgq %2,%1"
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
asm volatile("lock; cmpxchgq %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
return ret;
}
/* Signed */
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
{
asm volatile (
"lock; xaddq %0, %1;"
asm volatile("lock; xaddq %0, %1;"
: "+r"(x), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
@@ -163,8 +156,7 @@ ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x)
{
x = -x;
asm volatile (
"lock; xaddq %0, %1;"
asm volatile("lock; xaddq %0, %1;"
: "+r"(x), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
@@ -184,11 +176,7 @@ ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x)
ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
{
int64_t ret;
asm volatile (
"lock; cmpxchgq %2,%1"
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
asm volatile("lock; cmpxchgq %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
return ret;
}
# else
@@ -236,8 +224,7 @@ ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
{
uint32_t ret = x;
asm volatile (
"lock; xaddl %0, %1;"
asm volatile("lock; xaddl %0, %1;"
: "+r"(ret), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
@@ -247,8 +234,7 @@ ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x)
{
uint32_t ret = (uint32_t)(-(int32_t)x);
asm volatile (
"lock; xaddl %0, %1;"
asm volatile("lock; xaddl %0, %1;"
: "+r"(ret), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
@@ -258,11 +244,7 @@ ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x)
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
{
uint32_t ret;
asm volatile (
"lock; cmpxchgl %2,%1"
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
asm volatile("lock; cmpxchgl %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
return ret;
}
@@ -270,8 +252,7 @@ ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _ne
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
{
int32_t ret = x;
asm volatile (
"lock; xaddl %0, %1;"
asm volatile("lock; xaddl %0, %1;"
: "+r"(ret), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
@@ -281,8 +262,7 @@ ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x)
{
int32_t ret = -x;
asm volatile (
"lock; xaddl %0, %1;"
asm volatile("lock; xaddl %0, %1;"
: "+r"(ret), "=m"(*p) /* Outputs. */
: "m"(*p) /* Inputs. */
);
@@ -292,11 +272,7 @@ ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x)
ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
{
int32_t ret;
asm volatile (
"lock; cmpxchgl %2,%1"
: "=a" (ret), "+m" (*v)
: "r" (_new), "0" (old)
: "memory");
asm volatile("lock; cmpxchgl %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
return ret;
}

View File

@@ -100,8 +100,7 @@
/* Copied from BLI_utils... */
/* C++ can't use _Static_assert, expects static_assert() but c++0x only,
* Coverity also errors out. */
#if (!defined(__cplusplus)) && \
(!defined(__COVERITY__)) && \
#if (!defined(__cplusplus)) && (!defined(__COVERITY__)) && \
(defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 406)) /* gcc4.6+ only */
# define ATOMIC_STATIC_ASSERT(a, msg) __extension__ _Static_assert(a, msg);
#else
@@ -113,14 +112,16 @@
/* These can't be used after statements in c89. */
# if defined(__COUNTER__) /* MSVC */
# define ATOMIC_STATIC_ASSERT(a, msg) \
; enum { ATOMIC_ASSERT_CONCAT(static_assert_, __COUNTER__) = 1 / (int)(!!(a)) };
; \
enum { ATOMIC_ASSERT_CONCAT(static_assert_, __COUNTER__) = 1 / (int)(!!(a)) };
# else /* older gcc, clang... */
/* This can't be used twice on the same line so ensure if using in headers
* that the headers are not included twice (by wrapping in #ifndef...#endif)
* Note it doesn't cause an issue when used on same line of separate modules
* compiled with gcc -combine -fwhole-program. */
# define ATOMIC_STATIC_ASSERT(a, msg) \
; enum { ATOMIC_ASSERT_CONCAT(assert_line_, __LINE__) = 1 / (int)(!!(a)) };
; \
enum { ATOMIC_ASSERT_CONCAT(assert_line_, __LINE__) = 1 / (int)(!!(a)) };
# endif
#endif

View File

@@ -54,14 +54,15 @@ static PyObject *AUD_getSoundFromPointer(PyObject *self, PyObject *args)
}
static PyMethodDef meth_sound_from_pointer[] = {
{"_sound_from_pointer", (PyCFunction)AUD_getSoundFromPointer, METH_O,
{"_sound_from_pointer",
(PyCFunction)AUD_getSoundFromPointer,
METH_O,
"_sound_from_pointer(pointer)\n\n"
"Returns the corresponding :class:`Factory` object.\n\n"
":arg pointer: The pointer to the bSound object as long.\n"
":type pointer: long\n"
":return: The corresponding :class:`Factory` object.\n"
":rtype: :class:`Factory`"}
};
":rtype: :class:`Factory`"}};
PyObject *AUD_initPython(void)
{
@@ -71,9 +72,9 @@ PyObject *AUD_initPython(void)
return NULL;
}
PyModule_AddObject(module, "_sound_from_pointer", (PyObject *)PyCFunction_New(meth_sound_from_pointer, NULL));
PyModule_AddObject(
module, "_sound_from_pointer", (PyObject *)PyCFunction_New(meth_sound_from_pointer, NULL));
PyDict_SetItemString(PyImport_GetModuleDict(), "aud", module);
return module;
}

View File

@@ -22,7 +22,6 @@
* \ingroup audaspaceintern
*/
#ifndef __AUD_PYINIT_H__
#define __AUD_PYINIT_H__

View File

@@ -79,7 +79,8 @@ extern "C" {
#endif
#ifdef __GNUC__
# define _CLOG_ATTR_PRINTF_FORMAT(format_param, dots_param) __attribute__((format(printf, format_param, dots_param)))
# define _CLOG_ATTR_PRINTF_FORMAT(format_param, dots_param) \
__attribute__((format(printf, format_param, dots_param)))
#else
# define _CLOG_ATTR_PRINTF_FORMAT(format_param, dots_param)
#endif
@@ -119,14 +120,17 @@ typedef struct CLG_LogRef {
CLG_LogType *type;
} CLG_LogRef;
void CLG_log_str(
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn,
const char *message)
_CLOG_ATTR_NONNULL(1, 3, 4, 5);
void CLG_logf(
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn,
const char *format, ...)
_CLOG_ATTR_NONNULL(1, 3, 4, 5) _CLOG_ATTR_PRINTF_FORMAT(5, 6);
void CLG_log_str(CLG_LogType *lg,
enum CLG_Severity severity,
const char *file_line,
const char *fn,
const char *message) _CLOG_ATTR_NONNULL(1, 3, 4, 5);
void CLG_logf(CLG_LogType *lg,
enum CLG_Severity severity,
const char *file_line,
const char *fn,
const char *format,
...) _CLOG_ATTR_NONNULL(1, 3, 4, 5) _CLOG_ATTR_PRINTF_FORMAT(5, 6);
/* Main initializer and distructor (per session, not logger). */
void CLG_init(void);
@@ -154,41 +158,53 @@ void CLG_logref_init(CLG_LogRef *clg_ref);
#define CLOG_ENSURE(clg_ref) \
((clg_ref)->type ? (clg_ref)->type : (CLG_logref_init(clg_ref), (clg_ref)->type))
#define CLOG_AT_SEVERITY(clg_ref, severity, verbose_level, ...) { \
#define CLOG_AT_SEVERITY(clg_ref, severity, verbose_level, ...) \
{ \
CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || (severity >= CLG_SEVERITY_WARN)) { \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || \
(severity >= CLG_SEVERITY_WARN)) { \
CLG_logf(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, __VA_ARGS__); \
} \
} ((void)0)
} \
((void)0)
#define CLOG_STR_AT_SEVERITY(clg_ref, severity, verbose_level, str) { \
#define CLOG_STR_AT_SEVERITY(clg_ref, severity, verbose_level, str) \
{ \
CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || (severity >= CLG_SEVERITY_WARN)) { \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || \
(severity >= CLG_SEVERITY_WARN)) { \
CLG_log_str(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, str); \
} \
} ((void)0)
} \
((void)0)
#define CLOG_STR_AT_SEVERITY_N(clg_ref, severity, verbose_level, str) { \
#define CLOG_STR_AT_SEVERITY_N(clg_ref, severity, verbose_level, str) \
{ \
CLG_LogType *_lg_ty = CLOG_ENSURE(clg_ref); \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || (severity >= CLG_SEVERITY_WARN)) { \
if (((_lg_ty->flag & CLG_FLAG_USE) && (_lg_ty->level >= verbose_level)) || \
(severity >= CLG_SEVERITY_WARN)) { \
const char *_str = str; \
CLG_log_str(_lg_ty, severity, __FILE__ ":" STRINGIFY(__LINE__), __func__, _str); \
MEM_freeN((void *)_str); \
} \
} ((void)0)
} \
((void)0)
#define CLOG_INFO(clg_ref, level, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, __VA_ARGS__)
#define CLOG_INFO(clg_ref, level, ...) \
CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, __VA_ARGS__)
#define CLOG_WARN(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_WARN, 0, __VA_ARGS__)
#define CLOG_ERROR(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_ERROR, 0, __VA_ARGS__)
#define CLOG_FATAL(clg_ref, ...) CLOG_AT_SEVERITY(clg_ref, CLG_SEVERITY_FATAL, 0, __VA_ARGS__)
#define CLOG_STR_INFO(clg_ref, level, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, str)
#define CLOG_STR_INFO(clg_ref, level, str) \
CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_INFO, level, str)
#define CLOG_STR_WARN(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_WARN, 0, str)
#define CLOG_STR_ERROR(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_ERROR, 0, str)
#define CLOG_STR_FATAL(clg_ref, str) CLOG_STR_AT_SEVERITY(clg_ref, CLG_SEVERITY_FATAL, 0, str)
/* Allocated string which is immediately freed. */
#define CLOG_STR_INFO_N(clg_ref, level, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_INFO, level, str)
#define CLOG_STR_INFO_N(clg_ref, level, str) \
CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_INFO, level, str)
#define CLOG_STR_WARN_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_WARN, 0, str)
#define CLOG_STR_ERROR_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_ERROR, 0, str)
#define CLOG_STR_FATAL_N(clg_ref, str) CLOG_STR_AT_SEVERITY_N(clg_ref, CLG_SEVERITY_FATAL, 0, str)

View File

@@ -46,7 +46,6 @@
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
/* Only other dependency (could use regular malloc too). */
#include "MEM_guardedalloc.h"
@@ -300,15 +299,12 @@ static bool clg_ctx_filter_check(CLogContext *ctx, const char *identifier)
const CLG_IDFilter *flt = ctx->filters[i];
while (flt != NULL) {
const int len = strlen(flt->match);
if (STREQ(flt->match, "*") ||
((len == identifier_len) && (STREQ(identifier, flt->match))))
{
if (STREQ(flt->match, "*") || ((len == identifier_len) && (STREQ(identifier, flt->match)))) {
return (bool)i;
}
if ((len >= 2) && (STREQLEN(".*", &flt->match[len - 2], 2))) {
if (((identifier_len == len - 2) && STREQLEN(identifier, flt->match, len - 2)) ||
((identifier_len >= len - 1) && STREQLEN(identifier, flt->match, len - 1)))
{
((identifier_len >= len - 1) && STREQLEN(identifier, flt->match, len - 1))) {
return (bool)i;
}
}
@@ -388,9 +384,11 @@ static void write_timestamp(CLogStringBuf *cstr, const uint64_t timestamp_tick_s
{
char timestamp_str[64];
const uint64_t timestamp = clg_timestamp_ticks_get() - timestamp_tick_start;
const uint timestamp_len = snprintf(
timestamp_str, sizeof(timestamp_str), "%" PRIu64 ".%03u ",
timestamp / 1000, (uint)(timestamp % 1000));
const uint timestamp_len = snprintf(timestamp_str,
sizeof(timestamp_str),
"%" PRIu64 ".%03u ",
timestamp / 1000,
(uint)(timestamp % 1000));
clg_str_append_with_len(cstr, timestamp_str, timestamp_len);
}
@@ -415,7 +413,10 @@ static void write_type(CLogStringBuf *cstr, CLG_LogType *lg)
clg_str_append(cstr, "): ");
}
static void write_file_line_fn(CLogStringBuf *cstr, const char *file_line, const char *fn, const bool use_basename)
static void write_file_line_fn(CLogStringBuf *cstr,
const char *file_line,
const char *fn,
const bool use_basename)
{
uint file_line_len = strlen(file_line);
if (use_basename) {
@@ -431,14 +432,15 @@ static void write_file_line_fn(CLogStringBuf *cstr, const char *file_line, const
}
clg_str_append_with_len(cstr, file_line, file_line_len);
clg_str_append(cstr, " ");
clg_str_append(cstr, fn);
clg_str_append(cstr, ": ");
}
void CLG_log_str(
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn,
void CLG_log_str(CLG_LogType *lg,
enum CLG_Severity severity,
const char *file_line,
const char *fn,
const char *message)
{
CLogStringBuf cstr;
@@ -473,9 +475,12 @@ void CLG_log_str(
}
}
void CLG_logf(
CLG_LogType *lg, enum CLG_Severity severity, const char *file_line, const char *fn,
const char *fmt, ...)
void CLG_logf(CLG_LogType *lg,
enum CLG_Severity severity,
const char *file_line,
const char *fn,
const char *fmt,
...)
{
CLogStringBuf cstr;
char cstr_stack_buf[CLOG_BUF_LEN_INIT];
@@ -552,7 +557,9 @@ static void CLG_ctx_backtrace_fn_set(CLogContext *ctx, void (*backtrace_fn)(void
ctx->callbacks.backtrace_fn = backtrace_fn;
}
static void clg_ctx_type_filter_append(CLG_IDFilter **flt_list, const char *type_match, int type_match_len)
static void clg_ctx_type_filter_append(CLG_IDFilter **flt_list,
const char *type_match,
int type_match_len)
{
if (type_match_len == 0) {
return;
@@ -564,12 +571,16 @@ static void clg_ctx_type_filter_append(CLG_IDFilter **flt_list, const char *type
/* no need to null terminate since we calloc'd */
}
static void CLG_ctx_type_filter_exclude(CLogContext *ctx, const char *type_match, int type_match_len)
static void CLG_ctx_type_filter_exclude(CLogContext *ctx,
const char *type_match,
int type_match_len)
{
clg_ctx_type_filter_append(&ctx->filters[0], type_match, type_match_len);
}
static void CLG_ctx_type_filter_include(CLogContext *ctx, const char *type_match, int type_match_len)
static void CLG_ctx_type_filter_include(CLogContext *ctx,
const char *type_match,
int type_match_len)
{
clg_ctx_type_filter_append(&ctx->filters[1], type_match, type_match_len);
}
@@ -679,7 +690,6 @@ void CLG_level_set(int level)
CLG_ctx_level_set(g_ctx, level);
}
/** \} */
/* -------------------------------------------------------------------- */

View File

@@ -33,23 +33,19 @@ using std::string;
using std::vector;
namespace std {
template<typename T>
std::string to_string(const T &n) {
template<typename T> std::string to_string(const T &n)
{
std::ostringstream s;
s << n;
return s.str();
}
}
} // namespace std
class CompilationSettings
{
class CompilationSettings {
public:
CompilationSettings()
: target_arch(0),
bits(64),
verbose(false),
fast_math(false)
{}
CompilationSettings() : target_arch(0), bits(64), verbose(false), fast_math(false)
{
}
string cuda_toolkit_dir;
string input_file;
@@ -140,7 +136,8 @@ static bool compile_cuda(CompilationSettings &settings)
}
/* Write a file in the temp folder with the ptx code. */
settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" + OIIO::Filesystem::unique_path();
settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" +
OIIO::Filesystem::unique_path();
FILE *f = fopen(settings.ptx_file.c_str(), "wb");
fwrite(&ptx_code[0], 1, ptx_size, f);
fclose(f);
@@ -154,10 +151,9 @@ static bool link_ptxas(CompilationSettings &settings)
if (settings.cuda_toolkit_dir.size())
cudapath = settings.cuda_toolkit_dir + "/bin/";
string ptx = "\"" +cudapath + "ptxas\" " + settings.ptx_file +
" -o " + settings.output_file +
" --gpu-name sm_" + std::to_string(settings.target_arch) +
" -m" + std::to_string(settings.bits);
string ptx = "\"" + cudapath + "ptxas\" " + settings.ptx_file + " -o " + settings.output_file +
" --gpu-name sm_" + std::to_string(settings.target_arch) + " -m" +
std::to_string(settings.bits);
if (settings.verbose) {
ptx += " --verbose";
@@ -235,15 +231,33 @@ static bool parse_parameters(int argc, const char **argv, CompilationSettings &s
{
OIIO::ArgParse ap;
ap.options("Usage: cycles_cubin_cc [options]",
"-target %d", &settings.target_arch, "target shader model",
"-m %d", &settings.bits, "Cuda architecture bits",
"-i %s", &settings.input_file, "Input source filename",
"-o %s", &settings.output_file, "Output cubin filename",
"-I %L", &settings.includes, "Add additional includepath",
"-D %L", &settings.defines, "Add additional defines",
"-v", &settings.verbose, "Use verbose logging",
"--use_fast_math", &settings.fast_math, "Use fast math",
"-cuda-toolkit-dir %s", &settings.cuda_toolkit_dir, "path to the cuda toolkit binary directory",
"-target %d",
&settings.target_arch,
"target shader model",
"-m %d",
&settings.bits,
"Cuda architecture bits",
"-i %s",
&settings.input_file,
"Input source filename",
"-o %s",
&settings.output_file,
"Output cubin filename",
"-I %L",
&settings.includes,
"Add additional includepath",
"-D %L",
&settings.defines,
"Add additional defines",
"-v",
&settings.verbose,
"Use verbose logging",
"--use_fast_math",
&settings.fast_math,
"Use fast math",
"-cuda-toolkit-dir %s",
&settings.cuda_toolkit_dir,
"path to the cuda toolkit binary directory",
NULL);
if (ap.parse(argc, argv) < 0) {

View File

@@ -52,12 +52,22 @@ int main(int argc, const char **argv)
ArgParse ap;
ap.options("Usage: cycles_server [options]",
"--device %s", &devicename, ("Devices to use: " + devicelist).c_str(),
"--list-devices", &list, "List information about all available devices",
"--threads %d", &threads, "Number of threads to use for CPU device",
"--device %s",
&devicename,
("Devices to use: " + devicelist).c_str(),
"--list-devices",
&list,
"List information about all available devices",
"--threads %d",
&threads,
"Number of threads to use for CPU device",
#ifdef WITH_CYCLES_LOGGING
"--debug", &debug, "Enable debug logging",
"--verbose %d", &verbosity, "Set verbosity of the logger",
"--debug",
&debug,
"Enable debug logging",
"--verbose %d",
&verbosity,
"Set verbosity of the logger",
#endif
NULL);
@@ -78,9 +88,7 @@ int main(int argc, const char **argv)
printf("Devices:\n");
foreach (DeviceInfo &info, devices) {
printf(" %s%s\n",
info.description.c_str(),
(info.display_device)? " (display)": "");
printf(" %s%s\n", info.description.c_str(), (info.display_device) ? " (display)" : "");
}
exit(EXIT_SUCCESS);

View File

@@ -104,11 +104,8 @@ static bool write_render(const uchar *pixels, int w, int h, int channels)
}
/* conversion for different top/bottom convention */
out->write_image(TypeDesc::UINT8,
pixels + (h - 1) * w * channels,
AutoStride,
-w * channels,
AutoStride);
out->write_image(
TypeDesc::UINT8, pixels + (h - 1) * w * channels, AutoStride, -w * channels, AutoStride);
out->close();
@@ -210,7 +207,12 @@ static void display_info(Progress& progress)
" Progress: %05.2f"
" Average: %.4f"
" Interactive: %s",
status.c_str(), total_time, latency, (double) progress_val*100, sample_time, interactive.c_str());
status.c_str(),
total_time,
latency,
(double)progress_val * 100,
sample_time,
interactive.c_str());
view_display_info(str.c_str());
@@ -325,11 +327,21 @@ static void keyboard(unsigned char key)
else if (options.interactive && (key == '0' || key == '1' || key == '2' || key == '3')) {
int bounce;
switch (key) {
case '0': bounce = 0; break;
case '1': bounce = 1; break;
case '2': bounce = 2; break;
case '3': bounce = 3; break;
default: bounce = 0; break;
case '0':
bounce = 0;
break;
case '1':
bounce = 1;
break;
case '2':
bounce = 2;
break;
case '3':
bounce = 3;
break;
default:
bounce = 0;
break;
}
options.session->scene->integrator->max_bounce = bounce;
@@ -381,27 +393,61 @@ static void options_parse(int argc, const char **argv)
int verbosity = 1;
ap.options("Usage: cycles [options] file.xml",
"%*", files_parse, "",
"--device %s", &devicename, ("Devices to use: " + device_names).c_str(),
"%*",
files_parse,
"",
"--device %s",
&devicename,
("Devices to use: " + device_names).c_str(),
#ifdef WITH_OSL
"--shadingsys %s", &ssname, "Shading system to use: svm, osl",
"--shadingsys %s",
&ssname,
"Shading system to use: svm, osl",
#endif
"--background", &options.session_params.background, "Render in background, without user interface",
"--quiet", &options.quiet, "In background mode, don't print progress messages",
"--samples %d", &options.session_params.samples, "Number of samples to render",
"--output %s", &options.output_path, "File path to write output image",
"--threads %d", &options.session_params.threads, "CPU Rendering Threads",
"--width %d", &options.width, "Window width in pixel",
"--height %d", &options.height, "Window height in pixel",
"--tile-width %d", &options.session_params.tile_size.x, "Tile width in pixels",
"--tile-height %d", &options.session_params.tile_size.y, "Tile height in pixels",
"--list-devices", &list, "List information about all available devices",
"--background",
&options.session_params.background,
"Render in background, without user interface",
"--quiet",
&options.quiet,
"In background mode, don't print progress messages",
"--samples %d",
&options.session_params.samples,
"Number of samples to render",
"--output %s",
&options.output_path,
"File path to write output image",
"--threads %d",
&options.session_params.threads,
"CPU Rendering Threads",
"--width %d",
&options.width,
"Window width in pixel",
"--height %d",
&options.height,
"Window height in pixel",
"--tile-width %d",
&options.session_params.tile_size.x,
"Tile width in pixels",
"--tile-height %d",
&options.session_params.tile_size.y,
"Tile height in pixels",
"--list-devices",
&list,
"List information about all available devices",
#ifdef WITH_CYCLES_LOGGING
"--debug", &debug, "Enable debug logging",
"--verbose %d", &verbosity, "Set verbosity of the logger",
"--debug",
&debug,
"Enable debug logging",
"--verbose %d",
&verbosity,
"Set verbosity of the logger",
#endif
"--help", &help, "Print help message",
"--version", &version, "Print version number",
"--help",
&help,
"Print help message",
"--version",
&version,
"Print version number",
NULL);
if (ap.parse(argc, argv) < 0) {
@@ -469,7 +515,8 @@ static void options_parse(int argc, const char **argv)
fprintf(stderr, "Unknown shading system: %s\n", ssname.c_str());
exit(EXIT_FAILURE);
}
else if(options.scene_params.shadingsystem == SHADINGSYSTEM_OSL && options.session_params.device.type != DEVICE_CPU) {
else if (options.scene_params.shadingsystem == SHADINGSYSTEM_OSL &&
options.session_params.device.type != DEVICE_CPU) {
fprintf(stderr, "OSL shading system only works with CPU device\n");
exit(EXIT_FAILURE);
}
@@ -509,8 +556,15 @@ int main(int argc, const char **argv)
string title = "Cycles: " + path_filename(options.filepath);
/* init/exit are callback so they run while GL is initialized */
view_main_loop(title.c_str(), options.width, options.height,
session_init, session_exit, resize, display, keyboard, motion);
view_main_loop(title.c_str(),
options.width,
options.height,
session_init,
session_exit,
resize,
display,
keyboard,
motion);
}
#endif

View File

@@ -58,11 +58,7 @@ struct XMLReadState : public XMLReader {
string base; /* base path to current file*/
float dicing_rate; /* current dicing rate */
XMLReadState()
: scene(NULL),
smooth(false),
shader(NULL),
dicing_rate(1.0f)
XMLReadState() : scene(NULL), smooth(false), shader(NULL), dicing_rate(1.0f)
{
tfm = transform_identity();
}
@@ -248,7 +244,10 @@ static void xml_read_shader_graph(XMLReadState& state, Shader *shader, xml_node
output = out;
if (!output)
fprintf(stderr, "Unknown output socket name \"%s\" on \"%s\".\n", from_node_name.c_str(), from_socket_name.c_str());
fprintf(stderr,
"Unknown output socket name \"%s\" on \"%s\".\n",
from_node_name.c_str(),
from_socket_name.c_str());
}
else
fprintf(stderr, "Unknown shader node name \"%s\".\n", from_node_name.c_str());
@@ -261,7 +260,10 @@ static void xml_read_shader_graph(XMLReadState& state, Shader *shader, xml_node
input = in;
if (!input)
fprintf(stderr, "Unknown input socket name \"%s\" on \"%s\".\n", to_socket_name.c_str(), to_node_name.c_str());
fprintf(stderr,
"Unknown input socket name \"%s\" on \"%s\".\n",
to_socket_name.c_str(),
to_node_name.c_str());
}
else
fprintf(stderr, "Unknown shader node name \"%s\".\n", to_node_name.c_str());

View File

@@ -87,8 +87,7 @@ struct BlenderCamera {
int motion_steps;
};
static void blender_camera_init(BlenderCamera *bcam,
BL::RenderSettings& b_render)
static void blender_camera_init(BlenderCamera *bcam, BL::RenderSettings &b_render)
{
memset((void *)bcam, 0, sizeof(BlenderCamera));
@@ -149,8 +148,7 @@ static void blender_camera_from_object(BlenderCamera *bcam,
bcam->nearclip = b_camera.clip_start();
bcam->farclip = b_camera.clip_end();
switch(b_camera.type())
{
switch (b_camera.type()) {
case BL::Camera::type_ORTHO:
bcam->type = CAMERA_ORTHOGRAPHIC;
break;
@@ -166,10 +164,8 @@ static void blender_camera_from_object(BlenderCamera *bcam,
break;
}
bcam->panorama_type = (PanoramaType)get_enum(ccamera,
"panorama_type",
PANORAMA_NUM_TYPES,
PANORAMA_EQUIRECTANGULAR);
bcam->panorama_type = (PanoramaType)get_enum(
ccamera, "panorama_type", PANORAMA_NUM_TYPES, PANORAMA_EQUIRECTANGULAR);
bcam->fisheye_fov = RNA_float_get(&ccamera, "fisheye_fov");
bcam->fisheye_lens = RNA_float_get(&ccamera, "fisheye_lens");
@@ -252,20 +248,16 @@ static Transform blender_camera_matrix(const Transform& tfm,
/* Mirror ball camera is looking into the negative Y direction
* which matches texture mirror ball mapping.
*/
result = tfm *
make_transform(1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f);
result = tfm * make_transform(
1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f);
}
else {
/* Make it so environment camera needs to be pointed in the direction
* of the positive x-axis to match an environment texture, this way
* it is looking at the center of the texture
*/
result = tfm *
make_transform( 0.0f, -1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
-1.0f, 0.0f, 0.0f, 0.0f);
result = tfm * make_transform(
0.0f, -1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, -1.0f, 0.0f, 0.0f, 0.0f);
}
}
else {
@@ -277,7 +269,8 @@ static Transform blender_camera_matrix(const Transform& tfm,
}
static void blender_camera_viewplane(BlenderCamera *bcam,
int width, int height,
int width,
int height,
BoundBox2D *viewplane,
float *aspectratio,
float *sensor_size)
@@ -365,7 +358,8 @@ static void blender_camera_viewplane(BlenderCamera *bcam,
static void blender_camera_sync(Camera *cam,
BlenderCamera *bcam,
int width, int height,
int width,
int height,
const char *viewname,
PointerRNA *cscene)
{
@@ -374,8 +368,7 @@ static void blender_camera_sync(Camera *cam,
float aspectratio, sensor_size;
/* viewplane */
blender_camera_viewplane(bcam, width, height,
&cam->viewplane, &aspectratio, &sensor_size);
blender_camera_viewplane(bcam, width, height, &cam->viewplane, &aspectratio, &sensor_size);
cam->width = bcam->full_width;
cam->height = bcam->full_height;
@@ -459,9 +452,7 @@ static void blender_camera_sync(Camera *cam,
cam->bladesrotation = bcam->aperturerotation;
/* transform */
cam->matrix = blender_camera_matrix(bcam->matrix,
bcam->type,
bcam->panorama_type);
cam->matrix = blender_camera_matrix(bcam->matrix, bcam->type, bcam->panorama_type);
cam->motion.clear();
cam->motion.resize(bcam->motion_steps, cam->matrix);
cam->use_perspective_motion = false;
@@ -491,7 +482,8 @@ static void blender_camera_sync(Camera *cam,
void BlenderSync::sync_camera(BL::RenderSettings &b_render,
BL::Object &b_override,
int width, int height,
int width,
int height,
const char *viewname)
{
BlenderCamera bcam;
@@ -506,13 +498,12 @@ void BlenderSync::sync_camera(BL::RenderSettings& b_render,
curvemapping_to_array(b_shutter_curve, bcam.shutter_curve, RAMP_TABLE_SIZE);
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
bcam.motion_position =
(Camera::MotionPosition)get_enum(cscene,
bcam.motion_position = (Camera::MotionPosition)get_enum(cscene,
"motion_blur_position",
Camera::MOTION_NUM_POSITIONS,
Camera::MOTION_POSITION_CENTER);
bcam.rolling_shutter_type =
(Camera::RollingShutterType)get_enum(cscene,
bcam.rolling_shutter_type = (Camera::RollingShutterType)get_enum(
cscene,
"rolling_shutter_type",
Camera::ROLLING_SHUTTER_NUM_TYPES,
Camera::ROLLING_SHUTTER_NONE);
@@ -558,10 +549,8 @@ void BlenderSync::sync_camera(BL::RenderSettings& b_render,
}
}
void BlenderSync::sync_camera_motion(BL::RenderSettings& b_render,
BL::Object& b_ob,
int width, int height,
float motion_time)
void BlenderSync::sync_camera_motion(
BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time)
{
if (!b_ob)
return;
@@ -593,11 +582,7 @@ void BlenderSync::sync_camera_motion(BL::RenderSettings& b_render,
bcam.pixelaspect.y = b_render.pixel_aspect_y();
blender_camera_from_object(&bcam, b_engine, b_ob);
blender_camera_viewplane(&bcam,
width, height,
NULL,
&aspectratio,
&sensor_size);
blender_camera_viewplane(&bcam, width, height, NULL, &aspectratio, &sensor_size);
/* TODO(sergey): De-duplicate calculation with camera sync. */
float fov = 2.0f * atanf((0.5f * sensor_size) / bcam.lens / aspectratio);
if (fov != cam->fov) {
@@ -625,7 +610,8 @@ static void blender_camera_view_subset(BL::RenderEngine& b_engine,
BL::Object &b_ob,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
int width, int height,
int width,
int height,
BoundBox2D *view_box,
BoundBox2D *cam_box);
@@ -634,7 +620,8 @@ static void blender_camera_from_view(BlenderCamera *bcam,
BL::Scene &b_scene,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
int width, int height,
int width,
int height,
bool skip_panorama = false)
{
/* 3d view parameters */
@@ -664,7 +651,8 @@ static void blender_camera_from_view(BlenderCamera *bcam,
b_ob,
b_v3d,
b_rv3d,
width, height,
width,
height,
&view_box,
&cam_box);
@@ -709,7 +697,8 @@ static void blender_camera_view_subset(BL::RenderEngine& b_engine,
BL::Object &b_ob,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
int width, int height,
int width,
int height,
BoundBox2D *view_box,
BoundBox2D *cam_box)
{
@@ -721,16 +710,15 @@ static void blender_camera_view_subset(BL::RenderEngine& b_engine,
blender_camera_init(&view_bcam, b_render);
blender_camera_from_view(&view_bcam, b_engine, b_scene, b_v3d, b_rv3d, width, height, true);
blender_camera_viewplane(&view_bcam, width, height,
&view, &view_aspect, &sensor_size);
blender_camera_viewplane(&view_bcam, width, height, &view, &view_aspect, &sensor_size);
/* get camera viewplane */
BlenderCamera cam_bcam;
blender_camera_init(&cam_bcam, b_render);
blender_camera_from_object(&cam_bcam, b_engine, b_ob, true);
blender_camera_viewplane(&cam_bcam, cam_bcam.full_width, cam_bcam.full_height,
&cam, &cam_aspect, &sensor_size);
blender_camera_viewplane(
&cam_bcam, cam_bcam.full_width, cam_bcam.full_height, &cam, &cam_aspect, &sensor_size);
/* return */
*view_box = view * (1.0f / view_aspect);
@@ -743,14 +731,15 @@ static void blender_camera_border_subset(BL::RenderEngine& b_engine,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
BL::Object &b_ob,
int width, int height,
int width,
int height,
const BoundBox2D &border,
BoundBox2D *result)
{
/* Determine camera viewport subset. */
BoundBox2D view_box, cam_box;
blender_camera_view_subset(b_engine, b_render, b_scene, b_ob, b_v3d, b_rv3d, width, height,
&view_box, &cam_box);
blender_camera_view_subset(
b_engine, b_render, b_scene, b_ob, b_v3d, b_rv3d, width, height, &view_box, &cam_box);
/* Determine viewport subset matching given border. */
cam_box = cam_box.make_relative_to(view_box);
@@ -763,7 +752,8 @@ static void blender_camera_border(BlenderCamera *bcam,
BL::Scene &b_scene,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
int width, int height)
int width,
int height)
{
bool is_camera_view;
@@ -797,7 +787,8 @@ static void blender_camera_border(BlenderCamera *bcam,
b_v3d,
b_rv3d,
b_ob,
width, height,
width,
height,
full_border,
&bcam->viewport_camera_border);
@@ -817,7 +808,8 @@ static void blender_camera_border(BlenderCamera *bcam,
b_v3d,
b_rv3d,
b_ob,
width, height,
width,
height,
bcam->border,
&bcam->border);
bcam->border = bcam->border.clamp();
@@ -825,24 +817,14 @@ static void blender_camera_border(BlenderCamera *bcam,
void BlenderSync::sync_view(BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
int width, int height)
int width,
int height)
{
BlenderCamera bcam;
BL::RenderSettings b_render_settings(b_scene.render());
blender_camera_init(&bcam, b_render_settings);
blender_camera_from_view(&bcam,
b_engine,
b_scene,
b_v3d,
b_rv3d,
width, height);
blender_camera_border(&bcam,
b_engine,
b_render_settings,
b_scene,
b_v3d,
b_rv3d,
width, height);
blender_camera_from_view(&bcam, b_engine, b_scene, b_v3d, b_rv3d, width, height);
blender_camera_border(&bcam, b_engine, b_render_settings, b_scene, b_v3d, b_rv3d, width, height);
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
blender_camera_sync(scene->camera, &bcam, width, height, "", &cscene);
@@ -865,7 +847,8 @@ BufferParams BlenderSync::get_buffer_params(BL::RenderSettings& b_render,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,
int width, int height)
int width,
int height)
{
BufferParams params;
bool use_border = false;

View File

@@ -51,9 +51,8 @@ static void interp_weights(float t, float data[4])
data[3] = fc * t3 - fc * t2;
}
static void curveinterp_v3_v3v3v3v3(float3 *p,
float3 *v1, float3 *v2, float3 *v3, float3 *v4,
const float w[4])
static void curveinterp_v3_v3v3v3v3(
float3 *p, float3 *v1, float3 *v2, float3 *v3, float3 *v4, const float w[4])
{
p->x = v1->x * w[0] + v2->x * w[1] + v3->x * w[2] + v4->x * w[3];
p->y = v1->y * w[0] + v2->y * w[1] + v3->y * w[2] + v4->y * w[3];
@@ -77,13 +76,8 @@ static float shaperadius(float shape, float root, float tip, float time)
/* curve functions */
static void InterpolateKeySegments(int seg,
int segno,
int key,
int curve,
float3 *keyloc,
float *time,
ParticleCurveData *CData)
static void InterpolateKeySegments(
int seg, int segno, int key, int curve, float3 *keyloc, float *time, ParticleCurveData *CData)
{
float3 ckey_loc1 = CData->curvekey_co[key];
float3 ckey_loc2 = ckey_loc1;
@@ -112,11 +106,8 @@ static void InterpolateKeySegments(int seg,
curveinterp_v3_v3v3v3v3(keyloc, &ckey_loc1, &ckey_loc2, &ckey_loc3, &ckey_loc4, t);
}
static bool ObtainCacheParticleData(Mesh *mesh,
BL::Mesh *b_mesh,
BL::Object *b_ob,
ParticleCurveData *CData,
bool background)
static bool ObtainCacheParticleData(
Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background)
{
int curvenum = 0;
int keyno = 0;
@@ -129,16 +120,20 @@ static bool ObtainCacheParticleData(Mesh *mesh,
BL::Object::modifiers_iterator b_mod;
for (b_ob->modifiers.begin(b_mod); b_mod != b_ob->modifiers.end(); ++b_mod) {
if((b_mod->type() == b_mod->type_PARTICLE_SYSTEM) && (background ? b_mod->show_render() : b_mod->show_viewport())) {
if ((b_mod->type() == b_mod->type_PARTICLE_SYSTEM) &&
(background ? b_mod->show_render() : b_mod->show_viewport())) {
BL::ParticleSystemModifier psmd((const PointerRNA)b_mod->ptr);
BL::ParticleSystem b_psys((const PointerRNA)psmd.particle_system().ptr);
BL::ParticleSettings b_part((const PointerRNA)b_psys.settings().ptr);
if((b_part.render_type() == BL::ParticleSettings::render_type_PATH) && (b_part.type() == BL::ParticleSettings::type_HAIR)) {
if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
(b_part.type() == BL::ParticleSettings::type_HAIR)) {
int shader = clamp(b_part.material() - 1, 0, mesh->used_shaders.size() - 1);
int display_step = background ? b_part.render_step() : b_part.display_step();
int totparts = b_psys.particles.length();
int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.display_percentage() / 100.0f);
int totchild = background ? b_psys.child_particles.length() :
(int)((float)b_psys.child_particles.length() *
(float)b_part.display_percentage() / 100.0f);
int totcurves = totchild;
if (b_part.child_type() == 0 || totchild == 0)
@@ -220,14 +215,18 @@ static bool ObtainCacheParticleUV(Mesh *mesh,
BL::Object::modifiers_iterator b_mod;
for (b_ob->modifiers.begin(b_mod); b_mod != b_ob->modifiers.end(); ++b_mod) {
if((b_mod->type() == b_mod->type_PARTICLE_SYSTEM) && (background ? b_mod->show_render() : b_mod->show_viewport())) {
if ((b_mod->type() == b_mod->type_PARTICLE_SYSTEM) &&
(background ? b_mod->show_render() : b_mod->show_viewport())) {
BL::ParticleSystemModifier psmd((const PointerRNA)b_mod->ptr);
BL::ParticleSystem b_psys((const PointerRNA)psmd.particle_system().ptr);
BL::ParticleSettings b_part((const PointerRNA)b_psys.settings().ptr);
if((b_part.render_type() == BL::ParticleSettings::render_type_PATH) && (b_part.type() == BL::ParticleSettings::type_HAIR)) {
if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
(b_part.type() == BL::ParticleSettings::type_HAIR)) {
int totparts = b_psys.particles.length();
int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.display_percentage() / 100.0f);
int totchild = background ? b_psys.child_particles.length() :
(int)((float)b_psys.child_particles.length() *
(float)b_part.display_percentage() / 100.0f);
int totcurves = totchild;
if (b_part.child_type() == 0 || totchild == 0)
@@ -279,14 +278,18 @@ static bool ObtainCacheParticleVcol(Mesh *mesh,
BL::Object::modifiers_iterator b_mod;
for (b_ob->modifiers.begin(b_mod); b_mod != b_ob->modifiers.end(); ++b_mod) {
if((b_mod->type() == b_mod->type_PARTICLE_SYSTEM) && (background ? b_mod->show_render() : b_mod->show_viewport())) {
if ((b_mod->type() == b_mod->type_PARTICLE_SYSTEM) &&
(background ? b_mod->show_render() : b_mod->show_viewport())) {
BL::ParticleSystemModifier psmd((const PointerRNA)b_mod->ptr);
BL::ParticleSystem b_psys((const PointerRNA)psmd.particle_system().ptr);
BL::ParticleSettings b_part((const PointerRNA)b_psys.settings().ptr);
if((b_part.render_type() == BL::ParticleSettings::render_type_PATH) && (b_part.type() == BL::ParticleSettings::type_HAIR)) {
if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
(b_part.type() == BL::ParticleSettings::type_HAIR)) {
int totparts = b_psys.particles.length();
int totchild = background ? b_psys.child_particles.length() : (int)((float)b_psys.child_particles.length() * (float)b_part.display_percentage() / 100.0f);
int totchild = background ? b_psys.child_particles.length() :
(int)((float)b_psys.child_particles.length() *
(float)b_part.display_percentage() / 100.0f);
int totcurves = totchild;
if (b_part.child_type() == 0 || totchild == 0)
@@ -324,8 +327,10 @@ static bool ObtainCacheParticleVcol(Mesh *mesh,
return true;
}
static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
float3 RotCam, bool is_ortho)
static void ExportCurveTrianglePlanes(Mesh *mesh,
ParticleCurveData *CData,
float3 RotCam,
bool is_ortho)
{
int vertexno = mesh->verts.size();
int vertexindex = vertexno;
@@ -333,7 +338,9 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
/* compute and reserve size of arrays */
for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for (int curve = CData->psys_firstcurve[sys];
curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
curve++) {
numverts += 2 + (CData->curve_keynum[curve] - 1) * 2;
numtris += (CData->curve_keynum[curve] - 1) * 2;
}
@@ -343,13 +350,17 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
/* actually export */
for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for (int curve = CData->psys_firstcurve[sys];
curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
curve++) {
float3 xbasis;
float3 v1;
float time = 0.0f;
float3 ickey_loc = CData->curvekey_co[CData->curve_firstkey[curve]];
float radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], 0.0f);
v1 = CData->curvekey_co[CData->curve_firstkey[curve] + 1] - CData->curvekey_co[CData->curve_firstkey[curve]];
float radius = shaperadius(
CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], 0.0f);
v1 = CData->curvekey_co[CData->curve_firstkey[curve] + 1] -
CData->curvekey_co[CData->curve_firstkey[curve]];
if (is_ortho)
xbasis = normalize(cross(RotCam, v1));
else
@@ -360,21 +371,29 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
mesh->add_vertex(ickey_loc_shfr);
vertexindex += 2;
for(int curvekey = CData->curve_firstkey[curve] + 1; curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve]; curvekey++) {
for (int curvekey = CData->curve_firstkey[curve] + 1;
curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve];
curvekey++) {
ickey_loc = CData->curvekey_co[curvekey];
if (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)
v1 = CData->curvekey_co[curvekey] - CData->curvekey_co[max(curvekey - 1, CData->curve_firstkey[curve])];
v1 = CData->curvekey_co[curvekey] -
CData->curvekey_co[max(curvekey - 1, CData->curve_firstkey[curve])];
else
v1 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey - 1];
time = CData->curvekey_time[curvekey] / CData->curve_length[curve];
radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time);
radius = shaperadius(
CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time);
if (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)
radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], 0.95f);
radius = shaperadius(CData->psys_shape[sys],
CData->psys_rootradius[sys],
CData->psys_tipradius[sys],
0.95f);
if(CData->psys_closetip[sys] && (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1))
if (CData->psys_closetip[sys] &&
(curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1))
radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], 0.0f, 0.95f);
if (is_ortho)
@@ -385,8 +404,10 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
float3 ickey_loc_shfr = ickey_loc + radius * xbasis;
mesh->add_vertex(ickey_loc_shfl);
mesh->add_vertex(ickey_loc_shfr);
mesh->add_triangle(vertexindex-2, vertexindex, vertexindex-1, CData->psys_shader[sys], true);
mesh->add_triangle(vertexindex+1, vertexindex-1, vertexindex, CData->psys_shader[sys], true);
mesh->add_triangle(
vertexindex - 2, vertexindex, vertexindex - 1, CData->psys_shader[sys], true);
mesh->add_triangle(
vertexindex + 1, vertexindex - 1, vertexindex, CData->psys_shader[sys], true);
vertexindex += 2;
}
}
@@ -402,9 +423,7 @@ static void ExportCurveTrianglePlanes(Mesh *mesh, ParticleCurveData *CData,
/* texture coords still needed */
}
static void ExportCurveTriangleGeometry(Mesh *mesh,
ParticleCurveData *CData,
int resolution)
static void ExportCurveTriangleGeometry(Mesh *mesh, ParticleCurveData *CData, int resolution)
{
int vertexno = mesh->verts.size();
int vertexindex = vertexno;
@@ -412,7 +431,9 @@ static void ExportCurveTriangleGeometry(Mesh *mesh,
/* compute and reserve size of arrays */
for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for (int curve = CData->psys_firstcurve[sys];
curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
curve++) {
numverts += (CData->curve_keynum[curve] - 1) * resolution + resolution;
numtris += (CData->curve_keynum[curve] - 1) * 2 * resolution;
}
@@ -422,25 +443,36 @@ static void ExportCurveTriangleGeometry(Mesh *mesh,
/* actually export */
for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
float3 firstxbasis = cross(make_float3(1.0f,0.0f,0.0f),CData->curvekey_co[CData->curve_firstkey[curve]+1] - CData->curvekey_co[CData->curve_firstkey[curve]]);
for (int curve = CData->psys_firstcurve[sys];
curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
curve++) {
float3 firstxbasis = cross(make_float3(1.0f, 0.0f, 0.0f),
CData->curvekey_co[CData->curve_firstkey[curve] + 1] -
CData->curvekey_co[CData->curve_firstkey[curve]]);
if (!is_zero(firstxbasis))
firstxbasis = normalize(firstxbasis);
else
firstxbasis = normalize(cross(make_float3(0.0f,1.0f,0.0f),CData->curvekey_co[CData->curve_firstkey[curve]+1] - CData->curvekey_co[CData->curve_firstkey[curve]]));
firstxbasis = normalize(cross(make_float3(0.0f, 1.0f, 0.0f),
CData->curvekey_co[CData->curve_firstkey[curve] + 1] -
CData->curvekey_co[CData->curve_firstkey[curve]]));
for(int curvekey = CData->curve_firstkey[curve]; curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1; curvekey++) {
for (int curvekey = CData->curve_firstkey[curve];
curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1;
curvekey++) {
float3 xbasis = firstxbasis;
float3 v1;
float3 v2;
if (curvekey == CData->curve_firstkey[curve]) {
v1 = CData->curvekey_co[min(curvekey+2,CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)] - CData->curvekey_co[curvekey+1];
v1 = CData->curvekey_co[min(
curvekey + 2, CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)] -
CData->curvekey_co[curvekey + 1];
v2 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey];
}
else if (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1) {
v1 = CData->curvekey_co[curvekey] - CData->curvekey_co[curvekey - 1];
v2 = CData->curvekey_co[curvekey-1] - CData->curvekey_co[max(curvekey-2,CData->curve_firstkey[curve])];
v2 = CData->curvekey_co[curvekey - 1] -
CData->curvekey_co[max(curvekey - 2, CData->curve_firstkey[curve])];
}
else {
v1 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey];
@@ -455,7 +487,9 @@ static void ExportCurveTriangleGeometry(Mesh *mesh,
}
}
for(int curvekey = CData->curve_firstkey[curve]; curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1; curvekey++) {
for (int curvekey = CData->curve_firstkey[curve];
curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1;
curvekey++) {
int subv = 1;
float3 xbasis;
float3 ybasis;
@@ -464,12 +498,15 @@ static void ExportCurveTriangleGeometry(Mesh *mesh,
if (curvekey == CData->curve_firstkey[curve]) {
subv = 0;
v1 = CData->curvekey_co[min(curvekey+2,CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)] - CData->curvekey_co[curvekey+1];
v1 = CData->curvekey_co[min(
curvekey + 2, CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)] -
CData->curvekey_co[curvekey + 1];
v2 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey];
}
else if (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1) {
v1 = CData->curvekey_co[curvekey] - CData->curvekey_co[curvekey - 1];
v2 = CData->curvekey_co[curvekey-1] - CData->curvekey_co[max(curvekey-2,CData->curve_firstkey[curve])];
v2 = CData->curvekey_co[curvekey - 1] -
CData->curvekey_co[max(curvekey - 2, CData->curve_firstkey[curve])];
}
else {
v1 = CData->curvekey_co[curvekey + 1] - CData->curvekey_co[curvekey];
@@ -493,27 +530,52 @@ static void ExportCurveTriangleGeometry(Mesh *mesh,
InterpolateKeySegments(subv, 1, curvekey, curve, &ickey_loc, &time, CData);
float radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time);
float radius = shaperadius(CData->psys_shape[sys],
CData->psys_rootradius[sys],
CData->psys_tipradius[sys],
time);
if((curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 2) && (subv == 1))
radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], 0.95f);
if ((curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 2) &&
(subv == 1))
radius = shaperadius(CData->psys_shape[sys],
CData->psys_rootradius[sys],
CData->psys_tipradius[sys],
0.95f);
if(CData->psys_closetip[sys] && (subv == 1) && (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 2))
if (CData->psys_closetip[sys] && (subv == 1) &&
(curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 2))
radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], 0.0f, 0.95f);
float angle = M_2PI_F / (float)resolution;
for (int section = 0; section < resolution; section++) {
float3 ickey_loc_shf = ickey_loc + radius * (cosf(angle * section) * xbasis + sinf(angle * section) * ybasis);
float3 ickey_loc_shf = ickey_loc + radius * (cosf(angle * section) * xbasis +
sinf(angle * section) * ybasis);
mesh->add_vertex(ickey_loc_shf);
}
if (subv != 0) {
for (int section = 0; section < resolution - 1; section++) {
mesh->add_triangle(vertexindex - resolution + section, vertexindex + section, vertexindex - resolution + section + 1, CData->psys_shader[sys], true);
mesh->add_triangle(vertexindex + section + 1, vertexindex - resolution + section + 1, vertexindex + section, CData->psys_shader[sys], true);
mesh->add_triangle(vertexindex - resolution + section,
vertexindex + section,
vertexindex - resolution + section + 1,
CData->psys_shader[sys],
true);
mesh->add_triangle(vertexindex + section + 1,
vertexindex - resolution + section + 1,
vertexindex + section,
CData->psys_shader[sys],
true);
}
mesh->add_triangle(vertexindex-1, vertexindex + resolution - 1, vertexindex - resolution, CData->psys_shader[sys], true);
mesh->add_triangle(vertexindex, vertexindex - resolution , vertexindex + resolution - 1, CData->psys_shader[sys], true);
mesh->add_triangle(vertexindex - 1,
vertexindex + resolution - 1,
vertexindex - resolution,
CData->psys_shader[sys],
true);
mesh->add_triangle(vertexindex,
vertexindex - resolution,
vertexindex + resolution - 1,
CData->psys_shader[sys],
true);
}
vertexindex += resolution;
}
@@ -549,7 +611,9 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
/* compute and reserve size of arrays */
for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for (int curve = CData->psys_firstcurve[sys];
curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
curve++) {
num_keys += CData->curve_keynum[curve];
num_curves++;
}
@@ -566,26 +630,22 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
/* actually export */
for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for (int curve = CData->psys_firstcurve[sys];
curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
curve++) {
size_t num_curve_keys = 0;
for (int curvekey = CData->curve_firstkey[curve];
curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve];
curvekey++)
{
curvekey++) {
const float3 ickey_loc = CData->curvekey_co[curvekey];
const float curve_time = CData->curvekey_time[curvekey];
const float curve_length = CData->curve_length[curve];
const float time = (curve_length > 0.0f)
? curve_time / curve_length
: 0.0f;
float radius = shaperadius(CData->psys_shape[sys],
CData->psys_rootradius[sys],
CData->psys_tipradius[sys],
time);
const float time = (curve_length > 0.0f) ? curve_time / curve_length : 0.0f;
float radius = shaperadius(
CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time);
if (CData->psys_closetip[sys] &&
(curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1))
{
(curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)) {
radius = 0.0f;
}
mesh->add_curve_key(ickey_loc, radius);
@@ -618,9 +678,11 @@ static float4 CurveSegmentMotionCV(ParticleCurveData *CData, int sys, int curve,
const float curve_time = CData->curvekey_time[curvekey];
const float curve_length = CData->curve_length[curve];
float time = (curve_length > 0.0f) ? curve_time / curve_length : 0.0f;
float radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time);
float radius = shaperadius(
CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time);
if(CData->psys_closetip[sys] && (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1))
if (CData->psys_closetip[sys] &&
(curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1))
radius = 0.0f;
/* curve motion keys store both position and radius in float4 */
@@ -645,17 +707,15 @@ static float4 LerpCurveSegmentMotionCV(ParticleCurveData *CData, int sys, int cu
curvekey2 = (CData->curve_keynum[curve] - 1);
curvekey = curvekey2 - 1;
}
const float4 mP = CurveSegmentMotionCV(
CData, sys, curve, first_curve_key + curvekey);
const float4 mP2 = CurveSegmentMotionCV(
CData, sys, curve, first_curve_key + curvekey2);
const float4 mP = CurveSegmentMotionCV(CData, sys, curve, first_curve_key + curvekey);
const float4 mP2 = CurveSegmentMotionCV(CData, sys, curve, first_curve_key + curvekey2);
return lerp(mP, mP2, remainder);
}
static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int motion_step)
{
VLOG(1) << "Exporting curve motion segments for mesh " << mesh->name
<< ", motion step " << motion_step;
VLOG(1) << "Exporting curve motion segments for mesh " << mesh->name << ", motion step "
<< motion_step;
/* find attribute */
Attribute *attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
@@ -676,14 +736,20 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
int num_curves = 0;
for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for (int curve = CData->psys_firstcurve[sys];
curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
curve++) {
/* Curve lengths may not match! Curves can be clipped. */
int curve_key_end = (num_curves+1 < (int)mesh->curve_first_key.size() ? mesh->curve_first_key[num_curves+1] : (int)mesh->curve_keys.size());
int curve_key_end = (num_curves + 1 < (int)mesh->curve_first_key.size() ?
mesh->curve_first_key[num_curves + 1] :
(int)mesh->curve_keys.size());
const int num_center_curve_keys = curve_key_end - mesh->curve_first_key[num_curves];
const int is_num_keys_different = CData->curve_keynum[curve] - num_center_curve_keys;
if (!is_num_keys_different) {
for(int curvekey = CData->curve_firstkey[curve]; curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve]; curvekey++) {
for (int curvekey = CData->curve_firstkey[curve];
curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve];
curvekey++) {
if (i < mesh->curve_keys.size()) {
mP[i] = CurveSegmentMotionCV(CData, sys, curve, curvekey);
if (!have_motion) {
@@ -702,14 +768,9 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
else {
/* Number of keys has changed. Genereate an interpolated version
* to preserve motion blur. */
const float step_size =
num_center_curve_keys > 1
? 1.0f / (num_center_curve_keys - 1)
: 0.0f;
for(int step_index = 0;
step_index < num_center_curve_keys;
++step_index)
{
const float step_size = num_center_curve_keys > 1 ? 1.0f / (num_center_curve_keys - 1) :
0.0f;
for (int step_index = 0; step_index < num_center_curve_keys; ++step_index) {
const float step = step_index * step_size;
mP[i] = LerpCurveSegmentMotionCV(CData, sys, curve, step);
i++;
@@ -733,8 +794,7 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
mesh->curve_attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
}
else if (motion_step > 0) {
VLOG(1) << "Filling in new motion vertex position for motion_step "
<< motion_step;
VLOG(1) << "Filling in new motion vertex position for motion_step " << motion_step;
/* motion, fill up previous steps that we might have skipped because
* they had no motion, but we need them anyway now */
for (int step = 0; step < motion_step; step++) {
@@ -759,8 +819,12 @@ static void ExportCurveTriangleUV(ParticleCurveData *CData,
int vertexindex = vert_offset;
for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for(int curvekey = CData->curve_firstkey[curve]; curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1; curvekey++) {
for (int curve = CData->psys_firstcurve[sys];
curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
curve++) {
for (int curvekey = CData->curve_firstkey[curve];
curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1;
curvekey++) {
for (int section = 0; section < resol; section++) {
uvdata[vertexindex] = CData->curve_uv[curve];
vertexindex++;
@@ -791,21 +855,31 @@ static void ExportCurveTriangleVcol(ParticleCurveData *CData,
int vertexindex = vert_offset;
for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
for(int curve = CData->psys_firstcurve[sys]; curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys]; curve++) {
for(int curvekey = CData->curve_firstkey[curve]; curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1; curvekey++) {
for (int curve = CData->psys_firstcurve[sys];
curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
curve++) {
for (int curvekey = CData->curve_firstkey[curve];
curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1;
curvekey++) {
for (int section = 0; section < resol; section++) {
/* Encode vertex color using the sRGB curve. */
cdata[vertexindex] = color_float_to_byte(color_srgb_to_linear_v3(CData->curve_vcol[curve]));
cdata[vertexindex] = color_float_to_byte(
color_srgb_to_linear_v3(CData->curve_vcol[curve]));
vertexindex++;
cdata[vertexindex] = color_float_to_byte(color_srgb_to_linear_v3(CData->curve_vcol[curve]));
cdata[vertexindex] = color_float_to_byte(
color_srgb_to_linear_v3(CData->curve_vcol[curve]));
vertexindex++;
cdata[vertexindex] = color_float_to_byte(color_srgb_to_linear_v3(CData->curve_vcol[curve]));
cdata[vertexindex] = color_float_to_byte(
color_srgb_to_linear_v3(CData->curve_vcol[curve]));
vertexindex++;
cdata[vertexindex] = color_float_to_byte(color_srgb_to_linear_v3(CData->curve_vcol[curve]));
cdata[vertexindex] = color_float_to_byte(
color_srgb_to_linear_v3(CData->curve_vcol[curve]));
vertexindex++;
cdata[vertexindex] = color_float_to_byte(color_srgb_to_linear_v3(CData->curve_vcol[curve]));
cdata[vertexindex] = color_float_to_byte(
color_srgb_to_linear_v3(CData->curve_vcol[curve]));
vertexindex++;
cdata[vertexindex] = color_float_to_byte(color_srgb_to_linear_v3(CData->curve_vcol[curve]));
cdata[vertexindex] = color_float_to_byte(
color_srgb_to_linear_v3(CData->curve_vcol[curve]));
vertexindex++;
}
}
@@ -826,16 +900,10 @@ void BlenderSync::sync_curve_settings()
curve_system_manager->minimum_width = get_float(csscene, "minimum_width");
curve_system_manager->maximum_width = get_float(csscene, "maximum_width");
curve_system_manager->primitive =
(CurvePrimitiveType)get_enum(csscene,
"primitive",
CURVE_NUM_PRIMITIVE_TYPES,
CURVE_LINE_SEGMENTS);
curve_system_manager->curve_shape =
(CurveShapeType)get_enum(csscene,
"shape",
CURVE_NUM_SHAPE_TYPES,
CURVE_THICK);
curve_system_manager->primitive = (CurvePrimitiveType)get_enum(
csscene, "primitive", CURVE_NUM_PRIMITIVE_TYPES, CURVE_LINE_SEGMENTS);
curve_system_manager->curve_shape = (CurveShapeType)get_enum(
csscene, "shape", CURVE_NUM_SHAPE_TYPES, CURVE_THICK);
curve_system_manager->resolution = get_int(csscene, "resolution");
curve_system_manager->subdivisions = get_int(csscene, "subdivisions");
curve_system_manager->use_backfacing = !get_boolean(csscene, "cull_backfacing");
@@ -880,8 +948,10 @@ void BlenderSync::sync_curve_settings()
for (b_data.objects.begin(b_ob); b_ob != b_data.objects.end(); ++b_ob) {
if (object_is_mesh(*b_ob)) {
BL::Object::particle_systems_iterator b_psys;
for(b_ob->particle_systems.begin(b_psys); b_psys != b_ob->particle_systems.end(); ++b_psys) {
if((b_psys->settings().render_type()==BL::ParticleSettings::render_type_PATH)&&(b_psys->settings().type()==BL::ParticleSettings::type_HAIR)) {
for (b_ob->particle_systems.begin(b_psys); b_psys != b_ob->particle_systems.end();
++b_psys) {
if ((b_psys->settings().render_type() == BL::ParticleSettings::render_type_PATH) &&
(b_psys->settings().type() == BL::ParticleSettings::type_HAIR)) {
BL::ID key = BKE_object_is_modified(*b_ob) ? *b_ob : b_ob->data();
mesh_map.set_recalc(key);
object_map.set_recalc(*b_ob);
@@ -895,11 +965,8 @@ void BlenderSync::sync_curve_settings()
curve_system_manager->tag_update(scene);
}
void BlenderSync::sync_curves(Mesh *mesh,
BL::Mesh& b_mesh,
BL::Object& b_ob,
bool motion,
int motion_step)
void BlenderSync::sync_curves(
Mesh *mesh, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step)
{
if (!motion) {
/* Clear stored curve data */
@@ -945,9 +1012,7 @@ void BlenderSync::sync_curves(Mesh *mesh,
else {
Transform tfm = get_transform(b_ob.matrix_world());
Transform itfm = transform_quick_inverse(tfm);
RotCam = transform_point(&itfm, make_float3(ctfm.x.w,
ctfm.y.w,
ctfm.z.w));
RotCam = transform_point(&itfm, make_float3(ctfm.x.w, ctfm.y.w, ctfm.z.w));
}
bool is_ortho = camera->type == CAMERA_ORTHOGRAPHIC;
ExportCurveTrianglePlanes(mesh, &CData, RotCam, is_ortho);

View File

@@ -48,7 +48,8 @@ DeviceInfo blender_device_info(BL::Preferences& b_preferences, BL::Scene& b_scen
PointerRNA cpreferences;
BL::Preferences::addons_iterator b_addon_iter;
for(b_preferences.addons.begin(b_addon_iter); b_addon_iter != b_preferences.addons.end(); ++b_addon_iter) {
for (b_preferences.addons.begin(b_addon_iter); b_addon_iter != b_preferences.addons.end();
++b_addon_iter) {
if (b_addon_iter->module() == "cycles") {
cpreferences = b_addon_iter->preferences().ptr;
break;
@@ -63,10 +64,8 @@ DeviceInfo blender_device_info(BL::Preferences& b_preferences, BL::Scene& b_scen
COMPUTE_DEVICE_NUM = 3,
};
ComputeDevice compute_device = (ComputeDevice)get_enum(cpreferences,
"compute_device_type",
COMPUTE_DEVICE_NUM,
COMPUTE_DEVICE_CPU);
ComputeDevice compute_device = (ComputeDevice)get_enum(
cpreferences, "compute_device_type", COMPUTE_DEVICE_NUM, COMPUTE_DEVICE_CPU);
if (compute_device != COMPUTE_DEVICE_CPU) {
/* Query GPU devices with matching types. */
@@ -91,13 +90,12 @@ DeviceInfo blender_device_info(BL::Preferences& b_preferences, BL::Scene& b_scen
}
}
}
} RNA_END;
}
RNA_END;
if (!used_devices.empty()) {
int threads = blender_device_threads(b_scene);
device = Device::get_multi_device(used_devices,
threads,
background);
device = Device::get_multi_device(used_devices, threads, background);
}
/* Else keep using the CPU device that was set before. */
}

View File

@@ -30,7 +30,9 @@ CCL_NAMESPACE_BEGIN
int blender_device_threads(BL::Scene &b_scene);
/* Convert Blender settings to device specification. */
DeviceInfo blender_device_info(BL::Preferences& b_preferences, BL::Scene& b_scene, bool background);
DeviceInfo blender_device_info(BL::Preferences &b_preferences,
BL::Scene &b_scene,
bool background);
CCL_NAMESPACE_END

View File

@@ -43,14 +43,10 @@ struct MikkUserData {
const Mesh *mesh,
float3 *tangent,
float *tangent_sign)
: mesh(mesh),
texface(NULL),
orco(NULL),
tangent(tangent),
tangent_sign(tangent_sign)
: mesh(mesh), texface(NULL), orco(NULL), tangent(tangent), tangent_sign(tangent_sign)
{
const AttributeSet& attributes = (mesh->subd_faces.size()) ?
mesh->subd_attributes : mesh->attributes;
const AttributeSet &attributes = (mesh->subd_faces.size()) ? mesh->subd_attributes :
mesh->attributes;
Attribute *attr_vN = attributes.find(ATTR_STD_VERTEX_NORMAL);
vertex_normal = attr_vN->data_float3();
@@ -94,8 +90,7 @@ static int mikk_get_num_faces(const SMikkTSpaceContext *context)
}
}
static int mikk_get_num_verts_of_face(const SMikkTSpaceContext *context,
const int face_num)
static int mikk_get_num_verts_of_face(const SMikkTSpaceContext *context, const int face_num)
{
const MikkUserData *userdata = (const MikkUserData *)context->m_pUserData;
if (userdata->mesh->subd_faces.size()) {
@@ -131,7 +126,8 @@ static int mikk_corner_index(const Mesh *mesh, const int face_num, const int ver
static void mikk_get_position(const SMikkTSpaceContext *context,
float P[3],
const int face_num, const int vert_num)
const int face_num,
const int vert_num)
{
const MikkUserData *userdata = (const MikkUserData *)context->m_pUserData;
const Mesh *mesh = userdata->mesh;
@@ -144,7 +140,8 @@ static void mikk_get_position(const SMikkTSpaceContext *context,
static void mikk_get_texture_coordinate(const SMikkTSpaceContext *context,
float uv[2],
const int face_num, const int vert_num)
const int face_num,
const int vert_num)
{
const MikkUserData *userdata = (const MikkUserData *)context->m_pUserData;
const Mesh *mesh = userdata->mesh;
@@ -170,8 +167,10 @@ static void mikk_get_texture_coordinate(const SMikkTSpaceContext *context,
}
}
static void mikk_get_normal(const SMikkTSpaceContext *context, float N[3],
const int face_num, const int vert_num)
static void mikk_get_normal(const SMikkTSpaceContext *context,
float N[3],
const int face_num,
const int vert_num)
{
const MikkUserData *userdata = (const MikkUserData *)context->m_pUserData;
const Mesh *mesh = userdata->mesh;
@@ -204,7 +203,8 @@ static void mikk_get_normal(const SMikkTSpaceContext *context, float N[3],
static void mikk_set_tangent_space(const SMikkTSpaceContext *context,
const float T[],
const float sign,
const int face_num, const int vert_num)
const int face_num,
const int vert_num)
{
MikkUserData *userdata = (MikkUserData *)context->m_pUserData;
const Mesh *mesh = userdata->mesh;
@@ -215,15 +215,11 @@ static void mikk_set_tangent_space(const SMikkTSpaceContext *context,
}
}
static void mikk_compute_tangents(const BL::Mesh& b_mesh,
const char *layer_name,
Mesh *mesh,
bool need_sign,
bool active_render)
static void mikk_compute_tangents(
const BL::Mesh &b_mesh, const char *layer_name, Mesh *mesh, bool need_sign, bool active_render)
{
/* Create tangent attributes. */
AttributeSet& attributes = (mesh->subd_faces.size()) ?
mesh->subd_attributes : mesh->attributes;
AttributeSet &attributes = (mesh->subd_faces.size()) ? mesh->subd_attributes : mesh->attributes;
Attribute *attr;
ustring name;
if (layer_name != NULL) {
@@ -245,8 +241,7 @@ static void mikk_compute_tangents(const BL::Mesh& b_mesh,
Attribute *attr_sign;
ustring name_sign;
if (layer_name != NULL) {
name_sign = ustring((string(layer_name) +
".tangent_sign").c_str());
name_sign = ustring((string(layer_name) + ".tangent_sign").c_str());
}
else {
name_sign = ustring("orco.tangent_sign");
@@ -256,9 +251,7 @@ static void mikk_compute_tangents(const BL::Mesh& b_mesh,
attr_sign = attributes.add(ATTR_STD_UV_TANGENT_SIGN, name_sign);
}
else {
attr_sign = attributes.add(name_sign,
TypeDesc::TypeFloat,
ATTR_ELEMENT_CORNER);
attr_sign = attributes.add(name_sign, TypeDesc::TypeFloat, ATTR_ELEMENT_CORNER);
}
tangent_sign = attr_sign->data_float();
}
@@ -284,11 +277,8 @@ static void mikk_compute_tangents(const BL::Mesh& b_mesh,
/* Create Volume Attribute */
static void create_mesh_volume_attribute(BL::Object& b_ob,
Mesh *mesh,
ImageManager *image_manager,
AttributeStandard std,
float frame)
static void create_mesh_volume_attribute(
BL::Object &b_ob, Mesh *mesh, ImageManager *image_manager, AttributeStandard std, float frame)
{
BL::SmokeDomainSettings b_domain = object_smoke_domain_find(b_ob);
@@ -304,8 +294,7 @@ static void create_mesh_volume_attribute(BL::Object& b_ob,
bool use_alpha = true;
volume_data->manager = image_manager;
volume_data->slot = image_manager->add_image(
Attribute::standard_name(std),
volume_data->slot = image_manager->add_image(Attribute::standard_name(std),
b_ob.ptr.data,
animated,
frame,
@@ -315,10 +304,7 @@ static void create_mesh_volume_attribute(BL::Object& b_ob,
metadata);
}
static void create_mesh_volume_attributes(Scene *scene,
BL::Object& b_ob,
Mesh *mesh,
float frame)
static void create_mesh_volume_attributes(Scene *scene, BL::Object &b_ob, Mesh *mesh, float frame)
{
/* for smoke volume rendering */
if (mesh->need_attribute(scene, ATTR_STD_VOLUME_DENSITY))
@@ -330,16 +316,15 @@ static void create_mesh_volume_attributes(Scene *scene,
if (mesh->need_attribute(scene, ATTR_STD_VOLUME_HEAT))
create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_HEAT, frame);
if (mesh->need_attribute(scene, ATTR_STD_VOLUME_TEMPERATURE))
create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_TEMPERATURE, frame);
create_mesh_volume_attribute(
b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_TEMPERATURE, frame);
if (mesh->need_attribute(scene, ATTR_STD_VOLUME_VELOCITY))
create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_VELOCITY, frame);
create_mesh_volume_attribute(
b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_VELOCITY, frame);
}
/* Create vertex color attributes. */
static void attr_create_vertex_color(Scene *scene,
Mesh *mesh,
BL::Mesh& b_mesh,
bool subdivision)
static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivision)
{
if (subdivision) {
BL::Mesh::vertex_colors_iterator l;
@@ -348,9 +333,8 @@ static void attr_create_vertex_color(Scene *scene,
if (!mesh->need_attribute(scene, ustring(l->name().c_str())))
continue;
Attribute *attr = mesh->subd_attributes.add(ustring(l->name().c_str()),
TypeDesc::TypeColor,
ATTR_ELEMENT_CORNER_BYTE);
Attribute *attr = mesh->subd_attributes.add(
ustring(l->name().c_str()), TypeDesc::TypeColor, ATTR_ELEMENT_CORNER_BYTE);
BL::Mesh::polygons_iterator p;
uchar4 *cdata = attr->data_uchar4();
@@ -371,9 +355,8 @@ static void attr_create_vertex_color(Scene *scene,
if (!mesh->need_attribute(scene, ustring(l->name().c_str())))
continue;
Attribute *attr = mesh->attributes.add(ustring(l->name().c_str()),
TypeDesc::TypeColor,
ATTR_ELEMENT_CORNER_BYTE);
Attribute *attr = mesh->attributes.add(
ustring(l->name().c_str()), TypeDesc::TypeColor, ATTR_ELEMENT_CORNER_BYTE);
BL::Mesh::loop_triangles_iterator t;
uchar4 *cdata = attr->data_uchar4();
@@ -395,9 +378,7 @@ static void attr_create_vertex_color(Scene *scene,
}
/* Create uv map attributes. */
static void attr_create_uv_map(Scene *scene,
Mesh *mesh,
BL::Mesh& b_mesh)
static void attr_create_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh)
{
if (b_mesh.uv_layers.length() != 0) {
BL::Mesh::uv_layers_iterator l;
@@ -406,17 +387,14 @@ static void attr_create_uv_map(Scene *scene,
const bool active_render = l->active_render();
AttributeStandard uv_std = (active_render) ? ATTR_STD_UV : ATTR_STD_NONE;
ustring uv_name = ustring(l->name().c_str());
AttributeStandard tangent_std = (active_render)? ATTR_STD_UV_TANGENT
: ATTR_STD_NONE;
ustring tangent_name = ustring(
(string(l->name().c_str()) + ".tangent").c_str());
AttributeStandard tangent_std = (active_render) ? ATTR_STD_UV_TANGENT : ATTR_STD_NONE;
ustring tangent_name = ustring((string(l->name().c_str()) + ".tangent").c_str());
/* Denotes whether UV map was requested directly. */
const bool need_uv = mesh->need_attribute(scene, uv_name) ||
mesh->need_attribute(scene, uv_std);
/* Denotes whether tangent was requested directly. */
const bool need_tangent =
mesh->need_attribute(scene, tangent_name) ||
const bool need_tangent = mesh->need_attribute(scene, tangent_name) ||
(active_render && mesh->need_attribute(scene, tangent_std));
/* UV map */
@@ -429,9 +407,7 @@ static void attr_create_uv_map(Scene *scene,
uv_attr = mesh->attributes.add(uv_std, uv_name);
}
else {
uv_attr = mesh->attributes.add(uv_name,
TypeFloat2,
ATTR_ELEMENT_CORNER);
uv_attr = mesh->attributes.add(uv_name, TypeFloat2, ATTR_ELEMENT_CORNER);
}
BL::Mesh::loop_triangles_iterator t;
@@ -448,18 +424,11 @@ static void attr_create_uv_map(Scene *scene,
/* UV tangent */
if (need_tangent) {
AttributeStandard sign_std =
(active_render)? ATTR_STD_UV_TANGENT_SIGN
: ATTR_STD_NONE;
ustring sign_name = ustring(
(string(l->name().c_str()) + ".tangent_sign").c_str());
AttributeStandard sign_std = (active_render) ? ATTR_STD_UV_TANGENT_SIGN : ATTR_STD_NONE;
ustring sign_name = ustring((string(l->name().c_str()) + ".tangent_sign").c_str());
bool need_sign = (mesh->need_attribute(scene, sign_name) ||
mesh->need_attribute(scene, sign_std));
mikk_compute_tangents(b_mesh,
l->name().c_str(),
mesh,
need_sign,
active_render);
mikk_compute_tangents(b_mesh, l->name().c_str(), mesh, need_sign, active_render);
}
/* Remove temporarily created UV attribute. */
if (!need_uv && uv_attr != NULL) {
@@ -476,10 +445,7 @@ static void attr_create_uv_map(Scene *scene,
}
}
static void attr_create_subd_uv_map(Scene *scene,
Mesh *mesh,
BL::Mesh& b_mesh,
bool subdivide_uvs)
static void attr_create_subd_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivide_uvs)
{
if (b_mesh.uv_layers.length() != 0) {
BL::Mesh::uv_layers_iterator l;
@@ -489,17 +455,14 @@ static void attr_create_subd_uv_map(Scene *scene,
bool active_render = l->active_render();
AttributeStandard uv_std = (active_render) ? ATTR_STD_UV : ATTR_STD_NONE;
ustring uv_name = ustring(l->name().c_str());
AttributeStandard tangent_std = (active_render)? ATTR_STD_UV_TANGENT
: ATTR_STD_NONE;
ustring tangent_name = ustring(
(string(l->name().c_str()) + ".tangent").c_str());
AttributeStandard tangent_std = (active_render) ? ATTR_STD_UV_TANGENT : ATTR_STD_NONE;
ustring tangent_name = ustring((string(l->name().c_str()) + ".tangent").c_str());
/* Denotes whether UV map was requested directly. */
const bool need_uv = mesh->need_attribute(scene, uv_name) ||
mesh->need_attribute(scene, uv_std);
/* Denotes whether tangent was requested directly. */
const bool need_tangent =
mesh->need_attribute(scene, tangent_name) ||
const bool need_tangent = mesh->need_attribute(scene, tangent_name) ||
(active_render && mesh->need_attribute(scene, tangent_std));
Attribute *uv_attr = NULL;
@@ -528,18 +491,11 @@ static void attr_create_subd_uv_map(Scene *scene,
/* UV tangent */
if (need_tangent) {
AttributeStandard sign_std =
(active_render)? ATTR_STD_UV_TANGENT_SIGN
: ATTR_STD_NONE;
ustring sign_name = ustring(
(string(l->name().c_str()) + ".tangent_sign").c_str());
AttributeStandard sign_std = (active_render) ? ATTR_STD_UV_TANGENT_SIGN : ATTR_STD_NONE;
ustring sign_name = ustring((string(l->name().c_str()) + ".tangent_sign").c_str());
bool need_sign = (mesh->need_attribute(scene, sign_name) ||
mesh->need_attribute(scene, sign_std));
mikk_compute_tangents(b_mesh,
l->name().c_str(),
mesh,
need_sign,
active_render);
mikk_compute_tangents(b_mesh, l->name().c_str(), mesh, need_sign, active_render);
}
/* Remove temporarily created UV attribute. */
if (!need_uv && uv_attr != NULL) {
@@ -561,8 +517,8 @@ static void attr_create_subd_uv_map(Scene *scene,
/* Compare vertices by sum of their coordinates. */
class VertexAverageComparator {
public:
VertexAverageComparator(const array<float3>& verts)
: verts_(verts) {
VertexAverageComparator(const array<float3> &verts) : verts_(verts)
{
}
bool operator()(const int &vert_idx_a, const int &vert_idx_b)
@@ -582,10 +538,7 @@ protected:
const array<float3> &verts_;
};
static void attr_create_pointiness(Scene *scene,
Mesh *mesh,
BL::Mesh& b_mesh,
bool subdivision)
static void attr_create_pointiness(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivision)
{
if (!mesh->need_attribute(scene, ATTR_STD_POINTINESS)) {
return;
@@ -607,24 +560,18 @@ static void attr_create_pointiness(Scene *scene,
* index.
*/
vector<int> vert_orig_index(num_verts);
for(int sorted_vert_index = 0;
sorted_vert_index < num_verts;
++sorted_vert_index)
{
for (int sorted_vert_index = 0; sorted_vert_index < num_verts; ++sorted_vert_index) {
const int vert_index = sorted_vert_indeices[sorted_vert_index];
const float3 &vert_co = mesh->verts[vert_index];
bool found = false;
for(int other_sorted_vert_index = sorted_vert_index + 1;
other_sorted_vert_index < num_verts;
++other_sorted_vert_index)
{
const int other_vert_index =
sorted_vert_indeices[other_sorted_vert_index];
for (int other_sorted_vert_index = sorted_vert_index + 1; other_sorted_vert_index < num_verts;
++other_sorted_vert_index) {
const int other_vert_index = sorted_vert_indeices[other_sorted_vert_index];
const float3 &other_vert_co = mesh->verts[other_vert_index];
/* We are too far away now, we wouldn't have duplicate. */
if ((other_vert_co.x + other_vert_co.y + other_vert_co.z) -
(vert_co.x + vert_co.y + vert_co.z) > 3 * FLT_EPSILON)
{
(vert_co.x + vert_co.y + vert_co.z) >
3 * FLT_EPSILON) {
break;
}
/* Found duplicate. */
@@ -679,8 +626,7 @@ static void attr_create_pointiness(Scene *scene,
continue;
}
visited_edges.insert(v0, v1);
float3 co0 = get_float3(b_mesh.vertices[v0].co()),
co1 = get_float3(b_mesh.vertices[v1].co());
float3 co0 = get_float3(b_mesh.vertices[v0].co()), co1 = get_float3(b_mesh.vertices[v1].co());
float3 edge = normalize(co1 - co0);
edge_accum[v0] += edge;
edge_accum[v1] += -edge;
@@ -695,9 +641,7 @@ static void attr_create_pointiness(Scene *scene,
}
if (counter[vert_index] > 0) {
const float3 normal = vert_normal[vert_index];
const float angle =
safe_acosf(dot(normal,
edge_accum[vert_index] / counter[vert_index]));
const float angle = safe_acosf(dot(normal, edge_accum[vert_index] / counter[vert_index]));
raw_data[vert_index] = angle * M_1_PI_F;
}
else {
@@ -749,7 +693,8 @@ static void create_mesh(Scene *scene,
int numtris = 0;
int numcorners = 0;
int numngons = 0;
bool use_loop_normals = b_mesh.use_auto_smooth() && (mesh->subdivision_type != Mesh::SUBDIVISION_CATMULL_CLARK);
bool use_loop_normals = b_mesh.use_auto_smooth() &&
(mesh->subdivision_type != Mesh::SUBDIVISION_CATMULL_CLARK);
/* If no faces, create empty mesh. */
if (numfaces == 0) {
@@ -785,13 +730,9 @@ static void create_mesh(Scene *scene,
N = attr_N->data_float3();
/* create generated coordinates from undeformed coordinates */
const bool need_default_tangent =
(subdivision == false) &&
(b_mesh.uv_layers.length() == 0) &&
const bool need_default_tangent = (subdivision == false) && (b_mesh.uv_layers.length() == 0) &&
(mesh->need_attribute(scene, ATTR_STD_UV_TANGENT));
if(mesh->need_attribute(scene, ATTR_STD_GENERATED) ||
need_default_tangent)
{
if (mesh->need_attribute(scene, ATTR_STD_GENERATED) || need_default_tangent) {
Attribute *attr = attributes.add(ATTR_STD_GENERATED);
attr->flags |= ATTR_SUBDIVIDED;
@@ -820,9 +761,8 @@ static void create_mesh(Scene *scene,
if (use_loop_normals) {
BL::Array<float, 9> loop_normals = t->split_normals();
for (int i = 0; i < 3; i++) {
N[vi[i]] = make_float3(loop_normals[i * 3],
loop_normals[i * 3 + 1],
loop_normals[i * 3 + 2]);
N[vi[i]] = make_float3(
loop_normals[i * 3], loop_normals[i * 3 + 1], loop_normals[i * 3 + 2]);
}
}
@@ -963,7 +903,9 @@ static void sync_mesh_fluid_motion(BL::Object& b_ob, Scene *scene, Mesh *mesh)
BL::DomainFluidSettings::fluid_mesh_vertices_iterator fvi;
int i = 0;
for(b_fluid_domain.fluid_mesh_vertices.begin(fvi); fvi != b_fluid_domain.fluid_mesh_vertices.end(); ++fvi, ++i) {
for (b_fluid_domain.fluid_mesh_vertices.begin(fvi);
fvi != b_fluid_domain.fluid_mesh_vertices.end();
++fvi, ++i) {
mP[i] = P[i] + get_float3(fvi->velocity()) * relative_time;
}
}
@@ -1014,11 +956,14 @@ Mesh *BlenderSync::sync_mesh(BL::Depsgraph& b_depsgraph,
if (!mesh_map.sync(&mesh, key)) {
/* if transform was applied to mesh, need full update */
if(object_updated && mesh->transform_applied);
if (object_updated && mesh->transform_applied)
;
/* test if shaders changed, these can be object level so mesh
* does not get tagged for recalc */
else if(mesh->used_shaders != used_shaders);
else if(requested_geometry_flags != mesh->geometry_flags);
else if (mesh->used_shaders != used_shaders)
;
else if (requested_geometry_flags != mesh->geometry_flags)
;
else {
/* even if not tagged for recalc, we may need to sync anyway
* because the shader needs different mesh attributes */
@@ -1073,18 +1018,14 @@ Mesh *BlenderSync::sync_mesh(BL::Depsgraph& b_depsgraph,
/* For some reason, meshes do not need this... */
bool need_undeformed = mesh->need_attribute(scene, ATTR_STD_GENERATED);
BL::Mesh b_mesh = object_to_mesh(b_data,
b_ob,
b_depsgraph,
need_undeformed,
mesh->subdivision_type);
BL::Mesh b_mesh = object_to_mesh(
b_data, b_ob, b_depsgraph, need_undeformed, mesh->subdivision_type);
if (b_mesh) {
/* Sync mesh itself. */
if (view_layer.use_surfaces && show_self) {
if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE)
create_subd_mesh(scene, mesh, b_ob, b_mesh, used_shaders,
dicing_rate, max_subdivisions);
create_subd_mesh(scene, mesh, b_ob, b_mesh, used_shaders, dicing_rate, max_subdivisions);
else
create_mesh(scene, mesh, b_mesh, used_shaders, false);
@@ -1092,7 +1033,8 @@ Mesh *BlenderSync::sync_mesh(BL::Depsgraph& b_depsgraph,
}
/* Sync hair curves. */
if(view_layer.use_hair && show_particles && mesh->subdivision_type == Mesh::SUBDIVISION_NONE) {
if (view_layer.use_hair && show_particles &&
mesh->subdivision_type == Mesh::SUBDIVISION_NONE) {
sync_curves(mesh, b_mesh, b_ob, false);
}
@@ -1105,11 +1047,9 @@ Mesh *BlenderSync::sync_mesh(BL::Depsgraph& b_depsgraph,
sync_mesh_fluid_motion(b_ob, scene, mesh);
/* tag update */
bool rebuild = (oldtriangles != mesh->triangles) ||
(oldsubd_faces != mesh->subd_faces) ||
bool rebuild = (oldtriangles != mesh->triangles) || (oldsubd_faces != mesh->subd_faces) ||
(oldsubd_face_corners != mesh->subd_face_corners) ||
(oldcurve_keys != mesh->curve_keys) ||
(oldcurve_radius != mesh->curve_radius);
(oldcurve_keys != mesh->curve_keys) || (oldcurve_radius != mesh->curve_radius);
mesh->tag_update(scene, rebuild);
@@ -1158,11 +1098,7 @@ void BlenderSync::sync_mesh_motion(BL::Depsgraph& b_depsgraph,
if (ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview)) {
/* get derived mesh */
b_mesh = object_to_mesh(b_data,
b_ob,
b_depsgraph,
false,
Mesh::SUBDIVISION_NONE);
b_mesh = object_to_mesh(b_data, b_ob, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
}
if (!b_mesh) {
@@ -1227,16 +1163,13 @@ void BlenderSync::sync_mesh_motion(BL::Depsgraph& b_depsgraph,
if (new_attribute) {
/* In case of new attribute, we verify if there really was any motion. */
if (b_mesh.vertices.length() != numverts ||
memcmp(mP, &mesh->verts[0], sizeof(float3)*numverts) == 0)
{
memcmp(mP, &mesh->verts[0], sizeof(float3) * numverts) == 0) {
/* no motion, remove attributes again */
if (b_mesh.vertices.length() != numverts) {
VLOG(1) << "Topology differs, disabling motion blur for object "
<< b_ob.name();
VLOG(1) << "Topology differs, disabling motion blur for object " << b_ob.name();
}
else {
VLOG(1) << "No actual deformation motion for object "
<< b_ob.name();
VLOG(1) << "No actual deformation motion for object " << b_ob.name();
}
mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
if (attr_mN)
@@ -1257,8 +1190,8 @@ void BlenderSync::sync_mesh_motion(BL::Depsgraph& b_depsgraph,
}
else {
if (b_mesh.vertices.length() != numverts) {
VLOG(1) << "Topology differs, discarding motion blur for object "
<< b_ob.name() << " at time " << motion_step;
VLOG(1) << "Topology differs, discarding motion blur for object " << b_ob.name()
<< " at time " << motion_step;
memcpy(mP, &mesh->verts[0], sizeof(float3) * numverts);
if (mN != NULL) {
memcpy(mN, attr_N->data_float3(), sizeof(float3) * numverts);

View File

@@ -72,15 +72,11 @@ bool BlenderSync::object_is_mesh(BL::Object& b_ob)
* significant if there are many for path animation. */
BL::Curve b_curve(b_ob.data());
return (b_curve.bevel_object() ||
b_curve.extrude() != 0.0f ||
b_curve.bevel_depth() != 0.0f ||
b_curve.dimensions() == BL::Curve::dimensions_2D ||
b_ob.modifiers.length());
return (b_curve.bevel_object() || b_curve.extrude() != 0.0f || b_curve.bevel_depth() != 0.0f ||
b_curve.dimensions() == BL::Curve::dimensions_2D || b_ob.modifiers.length());
}
else {
return (b_ob_data.is_a(&RNA_Mesh) ||
b_ob_data.is_a(&RNA_Curve) ||
return (b_ob_data.is_a(&RNA_Mesh) || b_ob_data.is_a(&RNA_Curve) ||
b_ob_data.is_a(&RNA_MetaBall));
}
}
@@ -244,12 +240,7 @@ void BlenderSync::sync_background_light(bool use_portal)
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
PointerRNA cworld = RNA_pointer_get(&b_world.ptr, "cycles");
enum SamplingMethod {
SAMPLING_NONE = 0,
SAMPLING_AUTOMATIC,
SAMPLING_MANUAL,
SAMPLING_NUM
};
enum SamplingMethod { SAMPLING_NONE = 0, SAMPLING_AUTOMATIC, SAMPLING_MANUAL, SAMPLING_NUM };
int sampling_method = get_enum(cworld, "sampling_method", SAMPLING_NUM, SAMPLING_AUTOMATIC);
bool sample_as_light = (sampling_method != SAMPLING_NONE);
@@ -258,10 +249,8 @@ void BlenderSync::sync_background_light(bool use_portal)
Light *light;
ObjectKey key(b_world, 0, b_world);
if(light_map.sync(&light, b_world, b_world, key) ||
world_recalc ||
b_world.ptr.data != world_map)
{
if (light_map.sync(&light, b_world, b_world, key) || world_recalc ||
b_world.ptr.data != world_map) {
light->type = LIGHT_BACKGROUND;
if (sampling_method == SAMPLING_MANUAL) {
light->map_resolution = get_int(cworld, "sample_map_resolution");
@@ -302,10 +291,8 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
{
const bool is_instance = b_instance.is_instance();
BL::Object b_ob = b_instance.object();
BL::Object b_parent = is_instance ? b_instance.parent()
: b_instance.object();
BL::Object b_ob_instance = is_instance ? b_instance.instance_object()
: b_ob;
BL::Object b_parent = is_instance ? b_instance.parent() : b_instance.object();
BL::Object b_ob_instance = is_instance ? b_instance.instance_object() : b_ob;
const bool motion = motion_time != 0.0f;
/*const*/ Transform tfm = get_transform(b_ob.matrix_world());
int *persistent_id = NULL;
@@ -404,7 +391,8 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
object_updated = true;
/* mesh sync */
object->mesh = sync_mesh(b_depsgraph, b_ob, b_ob_instance, object_updated, show_self, show_particles);
object->mesh = sync_mesh(
b_depsgraph, b_ob, b_ob_instance, object_updated, show_self, show_particles);
/* special case not tracked by object update flags */
@@ -487,7 +475,8 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
/* dupli texture coordinates and random_id */
if (is_instance) {
object->dupli_generated = 0.5f*get_float3(b_instance.orco()) - make_float3(0.5f, 0.5f, 0.5f);
object->dupli_generated = 0.5f * get_float3(b_instance.orco()) -
make_float3(0.5f, 0.5f, 0.5f);
object->dupli_uv = get_float2(b_instance.uv());
object->random_id = b_instance.random_id();
}
@@ -539,8 +528,7 @@ void BlenderSync::sync_objects(BL::Depsgraph& b_depsgraph, float motion_time)
BL::Depsgraph::object_instances_iterator b_instance_iter;
for (b_depsgraph.object_instances.begin(b_instance_iter);
b_instance_iter != b_depsgraph.object_instances.end() && !cancel;
++b_instance_iter)
{
++b_instance_iter) {
BL::DepsgraphObjectInstance b_instance = *b_instance_iter;
BL::Object b_ob = b_instance.object();
@@ -589,7 +577,8 @@ void BlenderSync::sync_objects(BL::Depsgraph& b_depsgraph, float motion_time)
void BlenderSync::sync_motion(BL::RenderSettings &b_render,
BL::Depsgraph &b_depsgraph,
BL::Object &b_override,
int width, int height,
int width,
int height,
void **python_thread_state)
{
if (scene->need_motion() == Scene::MOTION_NONE)
@@ -607,8 +596,7 @@ void BlenderSync::sync_motion(BL::RenderSettings& b_render,
float frame_center_delta = 0.0f;
if (scene->need_motion() != Scene::MOTION_PASS &&
scene->camera->motion_position != Camera::MOTION_POSITION_CENTER)
{
scene->camera->motion_position != Camera::MOTION_POSITION_CENTER) {
float shuttertime = scene->camera->shuttertime;
if (scene->camera->motion_position == Camera::MOTION_POSITION_END) {
frame_center_delta = -shuttertime * 0.5f;
@@ -639,14 +627,14 @@ void BlenderSync::sync_motion(BL::RenderSettings& b_render,
continue;
}
VLOG(1) << "Synchronizing motion for the relative time "
<< relative_time << ".";
VLOG(1) << "Synchronizing motion for the relative time " << relative_time << ".";
/* fixed shutter time to get previous and next frame for motion pass */
float shuttertime = scene->motion_shutter_time();
/* compute frame and subframe time */
float time = frame_center + subframe_center + frame_center_delta + relative_time * shuttertime * 0.5f;
float time = frame_center + subframe_center + frame_center_delta +
relative_time * shuttertime * 0.5f;
int frame = (int)floorf(time);
float subframe = time - frame;
@@ -657,10 +645,7 @@ void BlenderSync::sync_motion(BL::RenderSettings& b_render,
/* sync camera, only supports two times at the moment */
if (relative_time == -1.0f || relative_time == 1.0f) {
sync_camera_motion(b_render,
b_cam,
width, height,
relative_time);
sync_camera_motion(b_render, b_cam, width, height, relative_time);
}
/* sync object */

View File

@@ -76,17 +76,14 @@ bool BlenderObjectCulling::test(Scene *scene, BL::Object& b_ob, Transform& tfm)
float3 bb[8];
BL::Array<float, 24> boundbox = b_ob.bound_box();
for (int i = 0; i < 8; ++i) {
float3 p = make_float3(boundbox[3 * i + 0],
boundbox[3 * i + 1],
boundbox[3 * i + 2]);
float3 p = make_float3(boundbox[3 * i + 0], boundbox[3 * i + 1], boundbox[3 * i + 2]);
bb[i] = transform_point(&tfm, p);
}
bool camera_culled = use_camera_cull_ && test_camera(scene, bb);
bool distance_culled = use_distance_cull_ && test_distance(scene, bb);
return ((camera_culled && distance_culled) ||
(camera_culled && !use_distance_cull_) ||
return ((camera_culled && distance_culled) || (camera_culled && !use_distance_cull_) ||
(distance_culled && !use_camera_cull_));
}
@@ -103,10 +100,8 @@ bool BlenderObjectCulling::test_camera(Scene *scene, float3 bb[8])
for (int i = 0; i < 8; ++i) {
float3 p = bb[i];
float4 b = make_float4(p.x, p.y, p.z, 1.0f);
float4 c = make_float4(dot(worldtondc.x, b),
dot(worldtondc.y, b),
dot(worldtondc.z, b),
dot(worldtondc.w, b));
float4 c = make_float4(
dot(worldtondc.x, b), dot(worldtondc.y, b), dot(worldtondc.z, b), dot(worldtondc.w, b));
p = float4_to_float3(c / c.w);
if (c.z < 0.0f) {
p.x = 1.0f - p.x;
@@ -121,10 +116,8 @@ bool BlenderObjectCulling::test_camera(Scene *scene, float3 bb[8])
if (all_behind) {
return true;
}
return (bb_min.x >= 1.0f + camera_cull_margin_ ||
bb_min.y >= 1.0f + camera_cull_margin_ ||
bb_max.x <= -camera_cull_margin_ ||
bb_max.y <= -camera_cull_margin_);
return (bb_min.x >= 1.0f + camera_cull_margin_ || bb_min.y >= 1.0f + camera_cull_margin_ ||
bb_max.x <= -camera_cull_margin_ || bb_max.y <= -camera_cull_margin_);
}
bool BlenderObjectCulling::test_distance(Scene *scene, float3 bb[8])

View File

@@ -24,8 +24,7 @@ CCL_NAMESPACE_BEGIN
class Scene;
class BlenderObjectCulling
{
class BlenderObjectCulling {
public:
BlenderObjectCulling(Scene *scene, BL::Scene &b_scene);

View File

@@ -177,13 +177,11 @@ static PyObject *init_func(PyObject * /*self*/, PyObject *args)
BlenderSession::headless = headless;
VLOG(2) << "Debug flags initialized to:\n"
<< DebugFlags();
VLOG(2) << "Debug flags initialized to:\n" << DebugFlags();
Py_RETURN_NONE;
}
static PyObject *exit_func(PyObject * /*self*/, PyObject * /*args*/)
{
ShaderManager::free_memory();
@@ -197,9 +195,15 @@ static PyObject *create_func(PyObject * /*self*/, PyObject *args)
PyObject *pyengine, *pypreferences, *pydata, *pyregion, *pyv3d, *pyrv3d;
int preview_osl;
if(!PyArg_ParseTuple(args, "OOOOOOi", &pyengine, &pypreferences, &pydata,
&pyregion, &pyv3d, &pyrv3d, &preview_osl))
{
if (!PyArg_ParseTuple(args,
"OOOOOOi",
&pyengine,
&pypreferences,
&pydata,
&pyregion,
&pyv3d,
&pyrv3d,
&preview_osl)) {
return NULL;
}
@@ -209,7 +213,8 @@ static PyObject *create_func(PyObject * /*self*/, PyObject *args)
BL::RenderEngine engine(engineptr);
PointerRNA preferencesptr;
RNA_pointer_create(NULL, &RNA_Preferences, (void*)PyLong_AsVoidPtr(pypreferences), &preferencesptr);
RNA_pointer_create(
NULL, &RNA_Preferences, (void *)PyLong_AsVoidPtr(pypreferences), &preferencesptr);
BL::Preferences preferences(preferencesptr);
PointerRNA dataptr;
@@ -283,7 +288,18 @@ static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
const char *pass_type;
int num_pixels, depth, object_id, pass_filter;
if(!PyArg_ParseTuple(args, "OOOsiiOiiO", &pysession, &pydepsgraph, &pyobject, &pass_type, &pass_filter, &object_id, &pypixel_array, &num_pixels, &depth, &pyresult))
if (!PyArg_ParseTuple(args,
"OOOsiiOiiO",
&pysession,
&pydepsgraph,
&pyobject,
&pass_type,
&pass_filter,
&object_id,
&pypixel_array,
&num_pixels,
&depth,
&pyresult))
return NULL;
BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
@@ -304,7 +320,15 @@ static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
python_thread_state_save(&session->python_thread_state);
session->bake(b_depsgraph, b_object, pass_type, pass_filter, object_id, b_bake_pixel, (size_t)num_pixels, depth, (float *)b_result);
session->bake(b_depsgraph,
b_object,
pass_type,
pass_filter,
object_id,
b_bake_pixel,
(size_t)num_pixels,
depth,
(float *)b_result);
python_thread_state_restore(&session->python_thread_state);
@@ -422,7 +446,10 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
BL::BlendData b_data(dataptr);
PointerRNA nodeptr;
RNA_pointer_create((ID*)PyLong_AsVoidPtr(pynodegroup), &RNA_ShaderNodeScript, (void*)PyLong_AsVoidPtr(pynode), &nodeptr);
RNA_pointer_create((ID *)PyLong_AsVoidPtr(pynodegroup),
&RNA_ShaderNodeScript,
(void *)PyLong_AsVoidPtr(pynode),
&nodeptr);
BL::ShaderNodeScript b_node(nodeptr);
/* update bytecode hash */
@@ -476,8 +503,7 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
}
else if (param->type.vecsemantics == TypeDesc::POINT ||
param->type.vecsemantics == TypeDesc::VECTOR ||
param->type.vecsemantics == TypeDesc::NORMAL)
{
param->type.vecsemantics == TypeDesc::NORMAL) {
socket_type = "NodeSocketVector";
data_type = BL::NodeSocket::type_VECTOR;
@@ -534,9 +560,11 @@ static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
if (!b_sock) {
/* create new socket */
if (param->isoutput)
b_sock = b_node.outputs.create(b_data, socket_type.c_str(), param->name.c_str(), param->name.c_str());
b_sock = b_node.outputs.create(
b_data, socket_type.c_str(), param->name.c_str(), param->name.c_str());
else
b_sock = b_node.inputs.create(b_data, socket_type.c_str(), param->name.c_str(), param->name.c_str());
b_sock = b_node.inputs.create(
b_data, socket_type.c_str(), param->name.c_str(), param->name.c_str());
/* set default value */
if (data_type == BL::NodeSocket::type_VALUE) {
@@ -651,7 +679,8 @@ static bool image_parse_filepaths(PyObject *pyfilepaths, vector<string>& filepat
return true;
}
PyObject *sequence = PySequence_Fast(pyfilepaths, "File paths must be a string or sequence of strings");
PyObject *sequence = PySequence_Fast(pyfilepaths,
"File paths must be a string or sequence of strings");
if (sequence == NULL) {
return false;
}
@@ -673,23 +702,30 @@ static bool image_parse_filepaths(PyObject *pyfilepaths, vector<string>& filepat
static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *keywords)
{
static const char *keyword_list[] = {"preferences", "scene", "view_layer",
"input", "output",
"tile_size", "samples", NULL};
static const char *keyword_list[] = {
"preferences", "scene", "view_layer", "input", "output", "tile_size", "samples", NULL};
PyObject *pypreferences, *pyscene, *pyviewlayer;
PyObject *pyinput, *pyoutput = NULL;
int tile_size = 0, samples = 0;
if (!PyArg_ParseTupleAndKeywords(args, keywords, "OOOO|Oii", (char**)keyword_list,
&pypreferences, &pyscene, &pyviewlayer,
&pyinput, &pyoutput,
&tile_size, &samples)) {
if (!PyArg_ParseTupleAndKeywords(args,
keywords,
"OOOO|Oii",
(char **)keyword_list,
&pypreferences,
&pyscene,
&pyviewlayer,
&pyinput,
&pyoutput,
&tile_size,
&samples)) {
return NULL;
}
/* Get device specification from preferences and scene. */
PointerRNA preferencesptr;
RNA_pointer_create(NULL, &RNA_Preferences, (void*)PyLong_AsVoidPtr(pypreferences), &preferencesptr);
RNA_pointer_create(
NULL, &RNA_Preferences, (void *)PyLong_AsVoidPtr(pypreferences), &preferencesptr);
BL::Preferences b_preferences(preferencesptr);
PointerRNA sceneptr;
@@ -700,7 +736,10 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
/* Get denoising parameters from view layer. */
PointerRNA viewlayerptr;
RNA_pointer_create((ID*)PyLong_AsVoidPtr(pyscene), &RNA_ViewLayer, PyLong_AsVoidPtr(pyviewlayer), &viewlayerptr);
RNA_pointer_create((ID *)PyLong_AsVoidPtr(pyscene),
&RNA_ViewLayer,
PyLong_AsVoidPtr(pyviewlayer),
&viewlayerptr);
PointerRNA cviewlayer = RNA_pointer_get(&viewlayerptr, "cycles");
DenoiseParams params;
@@ -762,7 +801,8 @@ static PyObject *merge_func(PyObject * /*self*/, PyObject *args, PyObject *keywo
static const char *keyword_list[] = {"input", "output", NULL};
PyObject *pyinput, *pyoutput = NULL;
if (!PyArg_ParseTupleAndKeywords(args, keywords, "OO", (char**)keyword_list, &pyinput, &pyoutput)) {
if (!PyArg_ParseTupleAndKeywords(
args, keywords, "OO", (char **)keyword_list, &pyinput, &pyoutput)) {
return NULL;
}
@@ -792,7 +832,6 @@ static PyObject *merge_func(PyObject * /*self*/, PyObject *args, PyObject *keywo
Py_RETURN_NONE;
}
static PyObject *debug_flags_update_func(PyObject * /*self*/, PyObject *args)
{
PyObject *pyscene;
@@ -809,8 +848,7 @@ static PyObject *debug_flags_update_func(PyObject * /*self*/, PyObject *args)
Device::tag_update();
}
VLOG(2) << "Debug flags set to:\n"
<< DebugFlags();
VLOG(2) << "Debug flags set to:\n" << DebugFlags();
debug_flags_set = true;
@@ -824,8 +862,7 @@ static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/
Device::tag_update();
}
if (debug_flags_set) {
VLOG(2) << "Debug flags reset to:\n"
<< DebugFlags();
VLOG(2) << "Debug flags reset to:\n" << DebugFlags();
debug_flags_set = false;
}
Py_RETURN_NONE;
@@ -834,9 +871,7 @@ static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/
static PyObject *set_resumable_chunk_func(PyObject * /*self*/, PyObject *args)
{
int num_resumable_chunks, current_resumable_chunk;
if(!PyArg_ParseTuple(args, "ii",
&num_resumable_chunks,
&current_resumable_chunk)) {
if (!PyArg_ParseTuple(args, "ii", &num_resumable_chunks, &current_resumable_chunk)) {
Py_RETURN_NONE;
}
@@ -845,9 +880,7 @@ static PyObject *set_resumable_chunk_func(PyObject * /*self*/, PyObject *args)
abort();
Py_RETURN_NONE;
}
if(current_resumable_chunk < 1 ||
current_resumable_chunk > num_resumable_chunks)
{
if (current_resumable_chunk < 1 || current_resumable_chunk > num_resumable_chunks) {
fprintf(stderr, "Cycles: Bad value for current resumable chunk number.\n");
abort();
Py_RETURN_NONE;
@@ -859,9 +892,7 @@ static PyObject *set_resumable_chunk_func(PyObject * /*self*/, PyObject *args)
BlenderSession::num_resumable_chunks = num_resumable_chunks;
BlenderSession::current_resumable_chunk = current_resumable_chunk;
printf("Cycles: Will render chunk %d of %d\n",
current_resumable_chunk,
num_resumable_chunks);
printf("Cycles: Will render chunk %d of %d\n", current_resumable_chunk, num_resumable_chunks);
Py_RETURN_NONE;
}
@@ -869,10 +900,7 @@ static PyObject *set_resumable_chunk_func(PyObject * /*self*/, PyObject *args)
static PyObject *set_resumable_chunk_range_func(PyObject * /*self*/, PyObject *args)
{
int num_chunks, start_chunk, end_chunk;
if(!PyArg_ParseTuple(args, "iii",
&num_chunks,
&start_chunk,
&end_chunk)) {
if (!PyArg_ParseTuple(args, "iii", &num_chunks, &start_chunk, &end_chunk)) {
Py_RETURN_NONE;
}
@@ -899,16 +927,12 @@ static PyObject *set_resumable_chunk_range_func(PyObject * /*self*/, PyObject *a
VLOG(1) << "Initialized resumable render: "
<< "num_resumable_chunks=" << num_chunks << ", "
<< "start_resumable_chunk=" << start_chunk
<< "end_resumable_chunk=" << end_chunk;
<< "start_resumable_chunk=" << start_chunk << "end_resumable_chunk=" << end_chunk;
BlenderSession::num_resumable_chunks = num_chunks;
BlenderSession::start_resumable_chunk = start_chunk;
BlenderSession::end_resumable_chunk = end_chunk;
printf("Cycles: Will render chunks %d to %d of %d\n",
start_chunk,
end_chunk,
num_chunks);
printf("Cycles: Will render chunks %d to %d of %d\n", start_chunk, end_chunk, num_chunks);
Py_RETURN_NONE;
}
@@ -981,7 +1005,10 @@ static struct PyModuleDef module = {
"Blender cycles render integration",
-1,
methods,
NULL, NULL, NULL, NULL,
NULL,
NULL,
NULL,
NULL,
};
CCL_NAMESPACE_END
@@ -999,12 +1026,15 @@ void *CCL_python_module_init()
int curversion = OSL_LIBRARY_VERSION_CODE;
PyModule_AddObject(mod, "with_osl", Py_True);
Py_INCREF(Py_True);
PyModule_AddObject(mod, "osl_version",
Py_BuildValue("(iii)",
curversion / 10000, (curversion / 100) % 100, curversion % 100));
PyModule_AddObject(mod, "osl_version_string",
PyUnicode_FromFormat("%2d, %2d, %2d",
curversion / 10000, (curversion / 100) % 100, curversion % 100));
PyModule_AddObject(
mod,
"osl_version",
Py_BuildValue("(iii)", curversion / 10000, (curversion / 100) % 100, curversion % 100));
PyModule_AddObject(
mod,
"osl_version_string",
PyUnicode_FromFormat(
"%2d, %2d, %2d", curversion / 10000, (curversion / 100) % 100, curversion % 100));
#else
PyModule_AddObject(mod, "with_osl", Py_False);
Py_INCREF(Py_False);

View File

@@ -84,7 +84,8 @@ BlenderSession::BlenderSession(BL::RenderEngine& b_engine,
BL::BlendData &b_data,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
int width, int height)
int width,
int height)
: session(NULL),
sync(NULL),
b_engine(b_engine),
@@ -119,7 +120,8 @@ void BlenderSession::create()
void BlenderSession::create_session()
{
SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
bool session_pause = BlenderSync::get_session_pause(b_scene, background);
@@ -141,9 +143,12 @@ void BlenderSession::create_session()
scene->name = b_scene.name();
/* setup callbacks for builtin image support */
scene->image_manager->builtin_image_info_cb = function_bind(&BlenderSession::builtin_image_info, this, _1, _2, _3);
scene->image_manager->builtin_image_pixels_cb = function_bind(&BlenderSession::builtin_image_pixels, this, _1, _2, _3, _4, _5);
scene->image_manager->builtin_image_float_pixels_cb = function_bind(&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3, _4, _5);
scene->image_manager->builtin_image_info_cb = function_bind(
&BlenderSession::builtin_image_info, this, _1, _2, _3);
scene->image_manager->builtin_image_pixels_cb = function_bind(
&BlenderSession::builtin_image_pixels, this, _1, _2, _3, _4, _5);
scene->image_manager->builtin_image_float_pixels_cb = function_bind(
&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3, _4, _5);
session->scene = scene;
@@ -164,7 +169,8 @@ void BlenderSession::create_session()
}
/* set buffer parameters */
BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_v3d, b_rv3d, scene->camera, width, height);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_render, b_v3d, b_rv3d, scene->camera, width, height);
session->reset(buffer_params, session_params.samples);
b_engine.use_highlight_tiles(session_params.progressive_refine == false);
@@ -203,13 +209,12 @@ void BlenderSession::reset_session(BL::BlendData& b_data, BL::Depsgraph& b_depsg
return;
}
SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
if(scene->params.modified(scene_params) ||
session->params.modified(session_params) ||
!scene_params.persistent_data)
{
if (scene->params.modified(scene_params) || session->params.modified(session_params) ||
!scene_params.persistent_data) {
/* if scene or session parameters changed, it's easier to simply re-create
* them rather than trying to distinguish which settings need to be updated
*/
@@ -236,11 +241,8 @@ void BlenderSession::reset_session(BL::BlendData& b_data, BL::Depsgraph& b_depsg
BL::SpaceView3D b_null_space_view3d(PointerRNA_NULL);
BL::RegionView3D b_null_region_view3d(PointerRNA_NULL);
BufferParams buffer_params = BlenderSync::get_buffer_params(b_render,
b_null_space_view3d,
b_null_region_view3d,
scene->camera,
width, height);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height);
session->reset(buffer_params, session_params.samples);
b_engine.use_highlight_tiles(session_params.progressive_refine == false);
@@ -304,8 +306,10 @@ static ShaderEvalType get_shader_type(const string& pass_type)
}
static BL::RenderResult begin_render_result(BL::RenderEngine &b_engine,
int x, int y,
int w, int h,
int x,
int y,
int w,
int h,
const char *layername,
const char *viewname)
{
@@ -321,7 +325,9 @@ static void end_render_result(BL::RenderEngine& b_engine,
b_engine.end_result(b_rr, (int)cancel, (int)highlight, (int)do_merge_results);
}
void BlenderSession::do_write_update_render_tile(RenderTile& rtile, bool do_update_only, bool highlight)
void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
bool do_update_only,
bool highlight)
{
int x = rtile.x - session->tile_manager.params.full_x;
int y = rtile.y - session->tile_manager.params.full_y;
@@ -329,7 +335,8 @@ void BlenderSession::do_write_update_render_tile(RenderTile& rtile, bool do_upda
int h = rtile.h;
/* get render result */
BL::RenderResult b_rr = begin_render_result(b_engine, x, y, w, h, b_rlay_name.c_str(), b_rview_name.c_str());
BL::RenderResult b_rr = begin_render_result(
b_engine, x, y, w, h, b_rlay_name.c_str(), b_rview_name.c_str());
/* can happen if the intersected rectangle gives 0 width or height */
if (b_rr.ptr.data == NULL) {
@@ -399,31 +406,31 @@ void BlenderSession::stamp_view_layer_metadata(Scene *scene, const string& view_
string prefix = "cycles." + view_layer_name + ".";
/* Configured number of samples for the view layer. */
b_rr.stamp_data_add_field(
(prefix + "samples").c_str(),
b_rr.stamp_data_add_field((prefix + "samples").c_str(),
to_string(session->params.samples).c_str());
/* Store ranged samples information. */
if (session->tile_manager.range_num_samples != -1) {
b_rr.stamp_data_add_field(
(prefix + "range_start_sample").c_str(),
b_rr.stamp_data_add_field((prefix + "range_start_sample").c_str(),
to_string(session->tile_manager.range_start_sample).c_str());
b_rr.stamp_data_add_field(
(prefix + "range_num_samples").c_str(),
b_rr.stamp_data_add_field((prefix + "range_num_samples").c_str(),
to_string(session->tile_manager.range_num_samples).c_str());
}
/* Write cryptomatte metadata. */
if (scene->film->cryptomatte_passes & CRYPT_OBJECT) {
add_cryptomatte_layer(b_rr, view_layer_name + ".CryptoObject",
add_cryptomatte_layer(b_rr,
view_layer_name + ".CryptoObject",
scene->object_manager->get_cryptomatte_objects(scene));
}
if (scene->film->cryptomatte_passes & CRYPT_MATERIAL) {
add_cryptomatte_layer(b_rr, view_layer_name + ".CryptoMaterial",
add_cryptomatte_layer(b_rr,
view_layer_name + ".CryptoMaterial",
scene->shader_manager->get_cryptomatte_materials(scene));
}
if (scene->film->cryptomatte_passes & CRYPT_ASSET) {
add_cryptomatte_layer(b_rr, view_layer_name + ".CryptoAsset",
add_cryptomatte_layer(b_rr,
view_layer_name + ".CryptoAsset",
scene->object_manager->get_cryptomatte_assets(scene));
}
@@ -444,17 +451,21 @@ void BlenderSession::render(BL::Depsgraph& b_depsgraph_)
/* set callback to write out render results */
session->write_render_tile_cb = function_bind(&BlenderSession::write_render_tile, this, _1);
session->update_render_tile_cb = function_bind(&BlenderSession::update_render_tile, this, _1, _2);
session->update_render_tile_cb = function_bind(
&BlenderSession::update_render_tile, this, _1, _2);
/* get buffer parameters */
SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_v3d, b_rv3d, scene->camera, width, height);
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_render, b_v3d, b_rv3d, scene->camera, width, height);
/* render each layer */
BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
/* temporary render result to find needed passes and views */
BL::RenderResult b_rr = begin_render_result(b_engine, 0, 0, 1, 1, b_view_layer.name().c_str(), NULL);
BL::RenderResult b_rr = begin_render_result(
b_engine, 0, 0, 1, 1, b_view_layer.name().c_str(), NULL);
BL::RenderResult::layers_iterator b_single_rlay;
b_rr.layers.begin(b_single_rlay);
BL::RenderLayer b_rlay = *b_single_rlay;
@@ -500,7 +511,8 @@ void BlenderSession::render(BL::Depsgraph& b_depsgraph_)
}
int view_index = 0;
for(b_rr.views.begin(b_view_iter); b_view_iter != b_rr.views.end(); ++b_view_iter, ++view_index) {
for (b_rr.views.begin(b_view_iter); b_view_iter != b_rr.views.end();
++b_view_iter, ++view_index) {
b_rview_name = b_view_iter->name();
/* set the current view */
@@ -509,12 +521,8 @@ void BlenderSession::render(BL::Depsgraph& b_depsgraph_)
/* update scene */
BL::Object b_camera_override(b_engine.camera_override());
sync->sync_camera(b_render, b_camera_override, width, height, b_rview_name.c_str());
sync->sync_data(b_render,
b_depsgraph,
b_v3d,
b_camera_override,
width, height,
&python_thread_state);
sync->sync_data(
b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
builtin_images_load();
/* Attempt to free all data which is held by Blender side, since at this
@@ -529,7 +537,8 @@ void BlenderSession::render(BL::Depsgraph& b_depsgraph_)
/* Make sure all views have different noise patterns. - hardcoded value just to make it random */
if (view_index != 0) {
scene->integrator->seed += hash_int_2d(scene->integrator->seed, hash_int(view_index * 0xdeadbeef));
scene->integrator->seed += hash_int_2d(scene->integrator->seed,
hash_int(view_index * 0xdeadbeef));
scene->integrator->tag_update(scene);
}
@@ -591,8 +600,8 @@ void BlenderSession::render(BL::Depsgraph& b_depsgraph_)
#endif
}
static void populate_bake_data(BakeData *data, const
int object_id,
static void populate_bake_data(BakeData *data,
const int object_id,
BL::BakePixel &pixel_array,
const int num_pixels)
{
@@ -602,7 +611,8 @@ static void populate_bake_data(BakeData *data, const
for (i = 0; i < num_pixels; i++) {
if (bp.object_id() == object_id) {
data->set(i, bp.primitive_id(), bp.uv(), bp.du_dx(), bp.du_dy(), bp.dv_dx(), bp.dv_dy());
} else {
}
else {
data->set_null(i);
}
bp = bp.next();
@@ -680,12 +690,8 @@ void BlenderSession::bake(BL::Depsgraph& b_depsgraph_,
/* update scene */
BL::Object b_camera_override(b_engine.camera_override());
sync->sync_camera(b_render, b_camera_override, width, height, "");
sync->sync_data(b_render,
b_depsgraph,
b_v3d,
b_camera_override,
width, height,
&python_thread_state);
sync->sync_data(
b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
builtin_images_load();
}
@@ -693,8 +699,10 @@ void BlenderSession::bake(BL::Depsgraph& b_depsgraph_,
if (!session->progress.get_cancel()) {
/* get buffer parameters */
SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_v3d, b_rv3d, scene->camera, width, height);
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_render, b_v3d, b_rv3d, scene->camera, width, height);
scene->bake_manager->set_shader_limit((size_t)b_engine.tile_x(), (size_t)b_engine.tile_y());
@@ -729,12 +737,20 @@ void BlenderSession::bake(BL::Depsgraph& b_depsgraph_,
session->reset(buffer_params, session_params.samples);
session->update_scene();
session->progress.set_update_callback(function_bind(&BlenderSession::update_bake_progress, this));
session->progress.set_update_callback(
function_bind(&BlenderSession::update_bake_progress, this));
}
/* Perform bake. Check cancel to avoid crash with incomplete scene data. */
if (!session->progress.get_cancel() && bake_data) {
scene->bake_manager->bake(scene->device, &scene->dscene, scene, session->progress, shader_type, bake_pass_filter, bake_data, result);
scene->bake_manager->bake(scene->device,
&scene->dscene,
scene,
session->progress,
shader_type,
bake_pass_filter,
bake_data,
result);
}
/* free all memory used (host and device), so we wouldn't leave render
@@ -783,12 +799,14 @@ void BlenderSession::do_write_update_render_result(BL::RenderResult& b_rr,
bool read = false;
if (pass_type != PASS_NONE) {
/* copy pixels */
read = buffers->get_pass_rect(pass_type, exposure, sample, components, &pixels[0], b_pass.name());
read = buffers->get_pass_rect(
pass_type, exposure, sample, components, &pixels[0], b_pass.name());
}
else {
int denoising_offset = BlenderSync::get_denoising_pass(b_pass);
if (denoising_offset >= 0) {
read = buffers->get_denoising_pass_rect(denoising_offset, exposure, sample, components, &pixels[0]);
read = buffers->get_denoising_pass_rect(
denoising_offset, exposure, sample, components, &pixels[0]);
}
}
@@ -831,13 +849,12 @@ void BlenderSession::synchronize(BL::Depsgraph& b_depsgraph_)
return;
/* on session/scene parameter changes, we recreate session entirely */
SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
bool session_pause = BlenderSync::get_session_pause(b_scene, background);
if(session->params.modified(session_params) ||
scene->params.modified(scene_params))
{
if (session->params.modified(session_params) || scene->params.modified(scene_params)) {
free_session();
create_session();
return;
@@ -867,12 +884,8 @@ void BlenderSession::synchronize(BL::Depsgraph& b_depsgraph_)
b_depsgraph = b_depsgraph_;
BL::Object b_camera_override(b_engine.camera_override());
sync->sync_data(b_render,
b_depsgraph,
b_v3d,
b_camera_override,
width, height,
&python_thread_state);
sync->sync_data(
b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
if (b_rv3d)
sync->sync_view(b_v3d, b_rv3d, width, height);
@@ -886,7 +899,8 @@ void BlenderSession::synchronize(BL::Depsgraph& b_depsgraph_)
/* reset if needed */
if (scene->need_reset()) {
BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_v3d, b_rv3d, scene->camera, width, height);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_render, b_v3d, b_rv3d, scene->camera, width, height);
session->reset(buffer_params, session_params.samples);
/* reset time */
@@ -944,8 +958,10 @@ bool BlenderSession::draw(int w, int h)
/* reset if requested */
if (reset) {
SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_v3d, b_rv3d, scene->camera, width, height);
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_render, b_v3d, b_rv3d, scene->camera, width, height);
bool session_pause = BlenderSync::get_session_pause(b_scene, background);
if (session_pause == false) {
@@ -962,12 +978,15 @@ bool BlenderSession::draw(int w, int h)
update_status_progress();
/* draw */
BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_v3d, b_rv3d, scene->camera, width, height);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_render, b_v3d, b_rv3d, scene->camera, width, height);
DeviceDrawParams draw_params;
if (session->params.display_buffer_linear) {
draw_params.bind_display_space_shader_cb = function_bind(&BL::RenderEngine::bind_display_space_shader, &b_engine, b_scene);
draw_params.unbind_display_space_shader_cb = function_bind(&BL::RenderEngine::unbind_display_space_shader, &b_engine);
draw_params.bind_display_space_shader_cb = function_bind(
&BL::RenderEngine::bind_display_space_shader, &b_engine, b_scene);
draw_params.unbind_display_space_shader_cb = function_bind(
&BL::RenderEngine::unbind_display_space_shader, &b_engine);
}
return !session->draw(buffer_params, draw_params);
@@ -1167,8 +1186,7 @@ void BlenderSession::builtin_image_info(const string &builtin_name,
/* Velocity and heat data is always low-resolution. */
if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY) ||
builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT))
{
builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
amplify = 1;
}
@@ -1328,8 +1346,7 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
/* Velocity and heat data is always low-resolution. */
if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY) ||
builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT))
{
builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
amplify = 1;
}
@@ -1384,9 +1401,8 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
}
}
else {
fprintf(stderr,
"Cycles error: unknown volume attribute %s, skipping\n",
builtin_name.c_str());
fprintf(
stderr, "Cycles error: unknown volume attribute %s, skipping\n", builtin_name.c_str());
pixels[0] = 0.0f;
return false;
}
@@ -1430,9 +1446,11 @@ void BlenderSession::update_resumable_tile_manager(int num_samples)
}
if (num_resumable_chunks > num_samples) {
fprintf(stderr, "Cycles warning: more sample chunks (%d) than samples (%d), "
fprintf(stderr,
"Cycles warning: more sample chunks (%d) than samples (%d), "
"this will cause some samples to be included in multiple chunks.\n",
num_resumable_chunks, num_samples);
num_resumable_chunks,
num_samples);
}
const float num_samples_per_chunk = (float)num_samples / num_resumable_chunks;

View File

@@ -44,7 +44,8 @@ public:
BL::BlendData &b_data,
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
int width, int height);
int width,
int height);
~BlenderSession();
@@ -54,8 +55,7 @@ public:
void create_session();
void free_session();
void reset_session(BL::BlendData& b_data,
BL::Depsgraph& b_depsgraph);
void reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsgraph);
/* offline render */
void render(BL::Depsgraph &b_depsgraph);
@@ -70,16 +70,12 @@ public:
const int depth,
float pixels[]);
void write_render_result(BL::RenderResult& b_rr,
BL::RenderLayer& b_rlay,
RenderTile& rtile);
void write_render_result(BL::RenderResult &b_rr, BL::RenderLayer &b_rlay, RenderTile &rtile);
void write_render_tile(RenderTile &rtile);
/* update functions are used to update display buffer only after sample was rendered
* only needed for better visual feedback */
void update_render_result(BL::RenderResult& b_rr,
BL::RenderLayer& b_rlay,
RenderTile& rtile);
void update_render_result(BL::RenderResult &b_rr, BL::RenderLayer &b_rlay, RenderTile &rtile);
void update_render_tile(RenderTile &rtile, bool highlight);
/* interactive updates */
@@ -161,9 +157,7 @@ protected:
void do_write_update_render_tile(RenderTile &rtile, bool do_update_only, bool highlight);
int builtin_image_frame(const string &builtin_name);
void builtin_image_info(const string &builtin_name,
void *builtin_data,
ImageMetaData& metadata);
void builtin_image_info(const string &builtin_name, void *builtin_data, ImageMetaData &metadata);
bool builtin_image_pixels(const string &builtin_name,
void *builtin_data,
unsigned char *pixels,

View File

@@ -40,9 +40,7 @@ typedef map<string, ConvertNode*> ProxyMap;
/* Find */
void BlenderSync::find_shader(BL::ID& id,
vector<Shader*>& used_shaders,
Shader *default_shader)
void BlenderSync::find_shader(BL::ID &id, vector<Shader *> &used_shaders, Shader *default_shader)
{
Shader *shader = (id) ? shader_map.find(id) : default_shader;
@@ -54,26 +52,20 @@ void BlenderSync::find_shader(BL::ID& id,
static VolumeSampling get_volume_sampling(PointerRNA &ptr)
{
return (VolumeSampling)get_enum(ptr,
"volume_sampling",
VOLUME_NUM_SAMPLING,
VOLUME_SAMPLING_DISTANCE);
return (VolumeSampling)get_enum(
ptr, "volume_sampling", VOLUME_NUM_SAMPLING, VOLUME_SAMPLING_DISTANCE);
}
static VolumeInterpolation get_volume_interpolation(PointerRNA &ptr)
{
return (VolumeInterpolation)get_enum(ptr,
"volume_interpolation",
VOLUME_NUM_INTERPOLATION,
VOLUME_INTERPOLATION_LINEAR);
return (VolumeInterpolation)get_enum(
ptr, "volume_interpolation", VOLUME_NUM_INTERPOLATION, VOLUME_INTERPOLATION_LINEAR);
}
static DisplacementMethod get_displacement_method(PointerRNA &ptr)
{
return (DisplacementMethod)get_enum(ptr,
"displacement_method",
DISPLACE_NUM_METHODS,
DISPLACE_BUMP);
return (DisplacementMethod)get_enum(
ptr, "displacement_method", DISPLACE_NUM_METHODS, DISPLACE_BUMP);
}
static int validate_enum_value(int value, int num_values, int default_value)
@@ -84,22 +76,17 @@ static int validate_enum_value(int value, int num_values, int default_value)
return value;
}
template<typename NodeType>
static InterpolationType get_image_interpolation(NodeType& b_node)
template<typename NodeType> static InterpolationType get_image_interpolation(NodeType &b_node)
{
int value = b_node.interpolation();
return (InterpolationType)validate_enum_value(value,
INTERPOLATION_NUM_TYPES,
INTERPOLATION_LINEAR);
return (InterpolationType)validate_enum_value(
value, INTERPOLATION_NUM_TYPES, INTERPOLATION_LINEAR);
}
template<typename NodeType>
static ExtensionType get_image_extension(NodeType& b_node)
template<typename NodeType> static ExtensionType get_image_extension(NodeType &b_node)
{
int value = b_node.extension();
return (ExtensionType)validate_enum_value(value,
EXTENSION_NUM_TYPES,
EXTENSION_REPEAT);
return (ExtensionType)validate_enum_value(value, EXTENSION_NUM_TYPES, EXTENSION_REPEAT);
}
/* Graph */
@@ -189,7 +176,9 @@ static void set_default_value(ShaderInput *input,
break;
}
case SocketType::STRING: {
node->set(socket, (ustring)blender_absolute_path(b_data, b_id, get_string(b_sock.ptr, "default_value")));
node->set(
socket,
(ustring)blender_absolute_path(b_data, b_id, get_string(b_sock.ptr, "default_value")));
break;
}
default:
@@ -212,8 +201,7 @@ static void get_tex_mapping(TextureMapping *mapping, BL::TexMapping& b_mapping)
mapping->z_mapping = (TextureMapping::Mapping)b_mapping.mapping_z();
}
static void get_tex_mapping(TextureMapping *mapping,
BL::ShaderNodeMapping& b_mapping)
static void get_tex_mapping(TextureMapping *mapping, BL::ShaderNodeMapping &b_mapping)
{
if (!b_mapping)
return;
@@ -247,10 +235,7 @@ static ShaderNode *add_node(Scene *scene,
BL::ShaderNodeRGBCurve b_curve_node(b_node);
BL::CurveMapping mapping(b_curve_node.mapping());
RGBCurvesNode *curves = new RGBCurvesNode();
curvemapping_color_to_array(mapping,
curves->curves,
RAMP_TABLE_SIZE,
true);
curvemapping_color_to_array(mapping, curves->curves, RAMP_TABLE_SIZE, true);
curvemapping_minmax(mapping, true, &curves->min_x, &curves->max_x);
node = curves;
}
@@ -258,10 +243,7 @@ static ShaderNode *add_node(Scene *scene,
BL::ShaderNodeVectorCurve b_curve_node(b_node);
BL::CurveMapping mapping(b_curve_node.mapping());
VectorCurvesNode *curves = new VectorCurvesNode();
curvemapping_color_to_array(mapping,
curves->curves,
RAMP_TABLE_SIZE,
false);
curvemapping_color_to_array(mapping, curves->curves, RAMP_TABLE_SIZE, false);
curvemapping_minmax(mapping, false, &curves->min_x, &curves->max_x);
node = curves;
}
@@ -343,7 +325,8 @@ static ShaderNode *add_node(Scene *scene,
BL::ShaderNodeVectorTransform b_vector_transform_node(b_node);
VectorTransformNode *vtransform = new VectorTransformNode();
vtransform->type = (NodeVectorTransformType)b_vector_transform_node.vector_type();
vtransform->convert_from = (NodeVectorTransformConvertSpace)b_vector_transform_node.convert_from();
vtransform->convert_from = (NodeVectorTransformConvertSpace)
b_vector_transform_node.convert_from();
vtransform->convert_to = (NodeVectorTransformConvertSpace)b_vector_transform_node.convert_to();
node = vtransform;
}
@@ -520,7 +503,11 @@ static ShaderNode *add_node(Scene *scene,
else if (b_node.is_a(&RNA_ShaderNodeBsdfHairPrincipled)) {
BL::ShaderNodeBsdfHairPrincipled b_principled_hair_node(b_node);
PrincipledHairBsdfNode *principled_hair = new PrincipledHairBsdfNode();
principled_hair->parametrization = (NodePrincipledHairParametrization) get_enum(b_principled_hair_node.ptr, "parametrization", NODE_PRINCIPLED_HAIR_NUM, NODE_PRINCIPLED_HAIR_REFLECTANCE);
principled_hair->parametrization = (NodePrincipledHairParametrization)get_enum(
b_principled_hair_node.ptr,
"parametrization",
NODE_PRINCIPLED_HAIR_NUM,
NODE_PRINCIPLED_HAIR_REFLECTANCE);
node = principled_hair;
}
else if (b_node.is_a(&RNA_ShaderNodeBsdfPrincipled)) {
@@ -623,7 +610,8 @@ static ShaderNode *add_node(Scene *scene,
node = manager->osl_node("", bytecode_hash, b_script_node.bytecode());
}
else {
string absolute_filepath = blender_absolute_path(b_data, b_ntree, b_script_node.filepath());
string absolute_filepath = blender_absolute_path(
b_data, b_ntree, b_script_node.filepath());
node = manager->osl_node(absolute_filepath, "");
}
}
@@ -641,11 +629,9 @@ static ShaderNode *add_node(Scene *scene,
/* builtin images will use callback-based reading because
* they could only be loaded correct from blender side
*/
bool is_builtin = b_image.packed_file() ||
b_image.source() == BL::Image::source_GENERATED ||
bool is_builtin = b_image.packed_file() || b_image.source() == BL::Image::source_GENERATED ||
b_image.source() == BL::Image::source_MOVIE ||
(b_engine.is_preview() &&
b_image.source() != BL::Image::source_SEQUENCE);
(b_engine.is_preview() && b_image.source() != BL::Image::source_SEQUENCE);
if (is_builtin) {
/* for builtin images we're using image datablock name to find an image to
@@ -655,15 +641,12 @@ static ShaderNode *add_node(Scene *scene,
* builtin names for packed images and movies
*/
int scene_frame = b_scene.frame_current();
int image_frame = image_user_frame_number(b_image_user,
scene_frame);
int image_frame = image_user_frame_number(b_image_user, scene_frame);
image->filename = b_image.name() + "@" + string_printf("%d", image_frame);
image->builtin_data = b_image.ptr.data;
}
else {
image->filename = image_user_file_path(b_image_user,
b_image,
b_scene.frame_current());
image->filename = image_user_file_path(b_image_user, b_image, b_scene.frame_current());
image->builtin_data = NULL;
}
@@ -698,23 +681,18 @@ static ShaderNode *add_node(Scene *scene,
BL::ImageUser b_image_user(b_env_node.image_user());
EnvironmentTextureNode *env = new EnvironmentTextureNode();
if (b_image) {
bool is_builtin = b_image.packed_file() ||
b_image.source() == BL::Image::source_GENERATED ||
bool is_builtin = b_image.packed_file() || b_image.source() == BL::Image::source_GENERATED ||
b_image.source() == BL::Image::source_MOVIE ||
(b_engine.is_preview() &&
b_image.source() != BL::Image::source_SEQUENCE);
(b_engine.is_preview() && b_image.source() != BL::Image::source_SEQUENCE);
if (is_builtin) {
int scene_frame = b_scene.frame_current();
int image_frame = image_user_frame_number(b_image_user,
scene_frame);
int image_frame = image_user_frame_number(b_image_user, scene_frame);
env->filename = b_image.name() + "@" + string_printf("%d", image_frame);
env->builtin_data = b_image.ptr.data;
}
else {
env->filename = image_user_file_path(b_image_user,
b_image,
b_scene.frame_current());
env->filename = image_user_file_path(b_image_user, b_image, b_scene.frame_current());
env->builtin_data = NULL;
}
@@ -881,8 +859,7 @@ static ShaderNode *add_node(Scene *scene,
if (true) {
point_density->add_image();
b_point_density_node.cache_point_density(b_depsgraph);
scene->image_manager->tag_reload_image(
point_density->filename.string(),
scene->image_manager->tag_reload_image(point_density->filename.string(),
point_density->builtin_data,
point_density->interpolation,
EXTENSION_CLIP,
@@ -898,12 +875,8 @@ static ShaderNode *add_node(Scene *scene,
BL::Object b_ob(b_point_density_node.object());
if (b_ob) {
float3 loc, size;
point_density_texture_space(b_depsgraph,
b_point_density_node,
loc,
size);
point_density->tfm =
transform_translate(-loc) * transform_scale(size) *
point_density_texture_space(b_depsgraph, b_point_density_node, loc, size);
point_density->tfm = transform_translate(-loc) * transform_scale(size) *
transform_inverse(get_transform(b_ob.matrix_world()));
}
}
@@ -1028,14 +1001,16 @@ static void add_nodes(Scene *scene,
BL::Node::outputs_iterator b_output;
/* find the node to use for output if there are multiple */
BL::ShaderNode output_node = b_ntree.get_output_node(BL::ShaderNodeOutputMaterial::target_CYCLES);
BL::ShaderNode output_node = b_ntree.get_output_node(
BL::ShaderNodeOutputMaterial::target_CYCLES);
/* add nodes */
for (b_ntree.nodes.begin(b_node); b_node != b_ntree.nodes.end(); ++b_node) {
if (b_node->mute() || b_node->is_a(&RNA_NodeReroute)) {
/* replace muted node with internal links */
BL::Node::internal_links_iterator b_link;
for(b_node->internal_links.begin(b_link); b_link != b_node->internal_links.end(); ++b_link) {
for (b_node->internal_links.begin(b_link); b_link != b_node->internal_links.end();
++b_link) {
BL::NodeSocket to_socket(b_link->to_socket());
SocketType::Type to_socket_type = convert_socket_type(to_socket);
if (to_socket_type == SocketType::UNDEFINED) {
@@ -1050,8 +1025,7 @@ static void add_nodes(Scene *scene,
graph->add(proxy);
}
}
else if(b_node->is_a(&RNA_ShaderNodeGroup) ||
b_node->is_a(&RNA_NodeCustomGroup) ||
else if (b_node->is_a(&RNA_ShaderNodeGroup) || b_node->is_a(&RNA_NodeCustomGroup) ||
b_node->is_a(&RNA_ShaderNodeCustomGroup)) {
BL::ShaderNodeTree b_group_ntree(PointerRNA_NULL);
@@ -1147,14 +1121,8 @@ static void add_nodes(Scene *scene,
}
else {
BL::ShaderNode b_shader_node(*b_node);
node = add_node(scene,
b_engine,
b_data,
b_depsgraph,
b_scene,
graph,
b_ntree,
b_shader_node);
node = add_node(
scene, b_engine, b_data, b_depsgraph, b_scene, graph, b_ntree, b_shader_node);
}
if (node) {
@@ -1388,7 +1356,8 @@ void BlenderSync::sync_world(BL::Depsgraph& b_depsgraph, bool update_all)
if (b_engine.is_preview() == false)
background->transparent = get_boolean(cscene, "film_transparent");
else
background->transparent = b_scene.render().alpha_mode() == BL::RenderSettings::alpha_mode_TRANSPARENT;
background->transparent = b_scene.render().alpha_mode() ==
BL::RenderSettings::alpha_mode_TRANSPARENT;
if (background->transparent) {
background->transparent_glass = get_boolean(cscene, "film_transparent_glass");
@@ -1436,10 +1405,8 @@ void BlenderSync::sync_lights(BL::Depsgraph& b_depsgraph, bool update_all)
else {
float strength = 1.0f;
if(b_light.type() == BL::Light::type_POINT ||
b_light.type() == BL::Light::type_SPOT ||
b_light.type() == BL::Light::type_AREA)
{
if (b_light.type() == BL::Light::type_POINT || b_light.type() == BL::Light::type_SPOT ||
b_light.type() == BL::Light::type_AREA) {
strength = 100.0f;
}

View File

@@ -69,7 +69,8 @@ BlenderSync::BlenderSync(BL::RenderEngine& b_engine,
progress(progress)
{
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") : RNA_float_get(&cscene, "dicing_rate");
dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") :
RNA_float_get(&cscene, "dicing_rate");
max_subdivisions = RNA_int_get(&cscene, "max_subdivisions");
}
@@ -91,8 +92,8 @@ void BlenderSync::sync_recalc(BL::Depsgraph& b_depsgraph)
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
bool dicing_prop_changed = false;
float updated_dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate")
: RNA_float_get(&cscene, "dicing_rate");
float updated_dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") :
RNA_float_get(&cscene, "dicing_rate");
if (dicing_rate != updated_dicing_rate) {
dicing_rate = updated_dicing_rate;
@@ -143,8 +144,7 @@ void BlenderSync::sync_recalc(BL::Depsgraph& b_depsgraph)
if (object_is_mesh(b_ob)) {
if (updated_geometry ||
(object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE))
{
(object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE)) {
BL::ID key = BKE_object_is_modified(b_ob) ? b_ob : b_ob.data();
mesh_map.set_recalc(key);
}
@@ -193,7 +193,8 @@ void BlenderSync::sync_data(BL::RenderSettings& b_render,
BL::Depsgraph &b_depsgraph,
BL::SpaceView3D &b_v3d,
BL::Object &b_override,
int width, int height,
int width,
int height,
void **python_thread_state)
{
BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
@@ -207,17 +208,11 @@ void BlenderSync::sync_data(BL::RenderSettings& b_render,
mesh_synced.clear(); /* use for objects and motion sync */
if(scene->need_motion() == Scene::MOTION_PASS ||
scene->need_motion() == Scene::MOTION_NONE ||
scene->camera->motion_position == Camera::MOTION_POSITION_CENTER)
{
if (scene->need_motion() == Scene::MOTION_PASS || scene->need_motion() == Scene::MOTION_NONE ||
scene->camera->motion_position == Camera::MOTION_POSITION_CENTER) {
sync_objects(b_depsgraph);
}
sync_motion(b_render,
b_depsgraph,
b_override,
width, height,
python_thread_state);
sync_motion(b_render, b_depsgraph, b_override, width, height, python_thread_state);
mesh_synced.clear();
@@ -254,8 +249,7 @@ void BlenderSync::sync_integrator()
integrator->seed = get_int(cscene, "seed");
if (get_boolean(cscene, "use_animated_seed")) {
integrator->seed = hash_int_2d(b_scene.frame_current(),
get_int(cscene, "seed"));
integrator->seed = hash_int_2d(b_scene.frame_current(), get_int(cscene, "seed"));
if (b_scene.frame_subframe() != 0.0f) {
/* TODO(sergey): Ideally should be some sort of hash_merge,
* but this is good enough for now.
@@ -266,10 +260,7 @@ void BlenderSync::sync_integrator()
}
integrator->sampling_pattern = (SamplingPattern)get_enum(
cscene,
"sampling_pattern",
SAMPLING_NUM_PATTERNS,
SAMPLING_PATTERN_SOBOL);
cscene, "sampling_pattern", SAMPLING_NUM_PATTERNS, SAMPLING_PATTERN_SOBOL);
integrator->sample_clamp_direct = get_float(cscene, "sample_clamp_direct");
integrator->sample_clamp_indirect = get_float(cscene, "sample_clamp_indirect");
@@ -282,10 +273,8 @@ void BlenderSync::sync_integrator()
integrator->motion_blur = r.use_motion_blur();
}
integrator->method = (Integrator::Method)get_enum(cscene,
"progressive",
Integrator::NUM_METHODS,
Integrator::PATH);
integrator->method = (Integrator::Method)get_enum(
cscene, "progressive", Integrator::NUM_METHODS, Integrator::PATH);
integrator->sample_all_lights_direct = get_boolean(cscene, "sample_all_lights_direct");
integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect");
@@ -344,11 +333,10 @@ void BlenderSync::sync_film()
Film prevfilm = *film;
film->exposure = get_float(cscene, "film_exposure");
film->filter_type = (FilterType)get_enum(cscene,
"pixel_filter_type",
FILTER_NUM_TYPES,
FILTER_BLACKMAN_HARRIS);
film->filter_width = (film->filter_type == FILTER_BOX)? 1.0f: get_float(cscene, "filter_width");
film->filter_type = (FilterType)get_enum(
cscene, "pixel_filter_type", FILTER_NUM_TYPES, FILTER_BLACKMAN_HARRIS);
film->filter_width = (film->filter_type == FILTER_BOX) ? 1.0f :
get_float(cscene, "filter_width");
if (b_scene.world()) {
BL::WorldMistSettings b_mist = b_scene.world().mist_settings();
@@ -401,7 +389,6 @@ void BlenderSync::sync_view_layer(BL::SpaceView3D& /*b_v3d*/, BL::ViewLayer& b_v
else
view_layer.samples = samples;
}
}
/* Images */
@@ -409,8 +396,7 @@ void BlenderSync::sync_images()
{
/* Sync is a convention for this API, but currently it frees unused buffers. */
const bool is_interface_locked = b_engine.render() &&
b_engine.render().use_lock_interface();
const bool is_interface_locked = b_engine.render() && b_engine.render().use_lock_interface();
if (is_interface_locked == false && BlenderSession::headless == false) {
/* If interface is not locked, it's possible image is needed for
* the display.
@@ -419,17 +405,13 @@ void BlenderSync::sync_images()
}
/* Free buffers used by images which are not needed for render. */
BL::BlendData::images_iterator b_image;
for(b_data.images.begin(b_image);
b_image != b_data.images.end();
++b_image)
{
for (b_data.images.begin(b_image); b_image != b_data.images.end(); ++b_image) {
/* TODO(sergey): Consider making it an utility function to check
* whether image is considered builtin.
*/
const bool is_builtin = b_image->packed_file() ||
b_image->source() == BL::Image::source_GENERATED ||
b_image->source() == BL::Image::source_MOVIE ||
b_engine.is_preview();
b_image->source() == BL::Image::source_MOVIE || b_engine.is_preview();
if (is_builtin == false) {
b_image->buffers_free();
}
@@ -441,7 +423,9 @@ void BlenderSync::sync_images()
PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
{
string name = b_pass.name();
#define MAP_PASS(passname, passtype) if(name == passname) return passtype;
#define MAP_PASS(passname, passtype) \
if (name == passname) \
return passtype;
/* NOTE: Keep in sync with defined names from DNA_scene_types.h */
MAP_PASS("Combined", PASS_COMBINED);
MAP_PASS("Depth", PASS_DEPTH);
@@ -493,14 +477,17 @@ int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass)
{
string name = b_pass.name();
if(name == "Noisy Image") return DENOISING_PASS_PREFILTERED_COLOR;
if (name == "Noisy Image")
return DENOISING_PASS_PREFILTERED_COLOR;
if (name.substr(0, 10) != "Denoising ") {
return -1;
}
name = name.substr(10);
#define MAP_PASS(passname, offset) if(name == passname) return offset;
#define MAP_PASS(passname, offset) \
if (name == passname) \
return offset;
MAP_PASS("Normal", DENOISING_PASS_PREFILTERED_NORMAL);
MAP_PASS("Albedo", DENOISING_PASS_PREFILTERED_ALBEDO);
MAP_PASS("Depth", DENOISING_PASS_PREFILTERED_DEPTH);
@@ -513,8 +500,7 @@ int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass)
return -1;
}
vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
BL::ViewLayer& b_view_layer)
vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer)
{
vector<Pass> passes;
Pass::add(PASS_COMBINED, passes);
@@ -538,7 +524,9 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
scene->film->denoising_flags = 0;
if (full_denoising || write_denoising_passes) {
#define MAP_OPTION(name, flag) if(!get_boolean(crp, name)) scene->film->denoising_flags |= flag;
#define MAP_OPTION(name, flag) \
if (!get_boolean(crp, name)) \
scene->film->denoising_flags |= flag;
MAP_OPTION("denoising_diffuse_direct", DENOISING_CLEAN_DIFFUSE_DIR);
MAP_OPTION("denoising_diffuse_indirect", DENOISING_CLEAN_DIFFUSE_IND);
MAP_OPTION("denoising_glossy_direct", DENOISING_CLEAN_GLOSSY_DIR);
@@ -605,7 +593,8 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
}
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_OBJECT);
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_OBJECT);
}
if (get_boolean(crp, "use_pass_crypto_material")) {
for (int i = 0; i < crypto_depth; ++i) {
@@ -613,7 +602,8 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
}
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_MATERIAL);
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_MATERIAL);
}
if (get_boolean(crp, "use_pass_crypto_asset")) {
for (int i = 0; i < crypto_depth; ++i) {
@@ -621,10 +611,12 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
}
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_ASSET);
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_ASSET);
}
if (get_boolean(crp, "pass_crypto_accurate") && scene->film->cryptomatte_passes != CRYPT_NONE) {
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | CRYPT_ACCURATE);
scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
CRYPT_ACCURATE);
}
return passes;
@@ -636,8 +628,7 @@ void BlenderSync::free_data_after_sync(BL::Depsgraph& b_depsgraph)
* caches to be releases from blender side in order to reduce peak memory
* footprint during synchronization process.
*/
const bool is_interface_locked = b_engine.render() &&
b_engine.render().use_lock_interface();
const bool is_interface_locked = b_engine.render() && b_engine.render().use_lock_interface();
const bool can_free_caches = BlenderSession::headless || is_interface_locked;
if (!can_free_caches) {
return;
@@ -646,18 +637,14 @@ void BlenderSync::free_data_after_sync(BL::Depsgraph& b_depsgraph)
* but that will need some API support first.
*/
BL::Depsgraph::objects_iterator b_ob;
for(b_depsgraph.objects.begin(b_ob);
b_ob != b_depsgraph.objects.end();
++b_ob)
{
for (b_depsgraph.objects.begin(b_ob); b_ob != b_depsgraph.objects.end(); ++b_ob) {
b_ob->cache_release();
}
}
/* Scene Parameters */
SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
bool background)
SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background)
{
BL::RenderSettings r = b_scene.render();
SceneParams params;
@@ -708,7 +695,8 @@ SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
}
#ifdef WITH_EMBREE
params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree") ? BVH_LAYOUT_EMBREE : params.bvh_layout;
params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree") ? BVH_LAYOUT_EMBREE :
params.bvh_layout;
#endif
return params;
}
@@ -819,7 +807,8 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
if (params.progressive_refine) {
BL::Scene::view_layers_iterator b_view_layer;
for(b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end(); ++b_view_layer) {
for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end();
++b_view_layer) {
PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles");
if (get_boolean(crl, "use_denoising")) {
params.progressive_refine = false;
@@ -857,8 +846,8 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
params.progressive_update_timeout = 0.1;
}
params.use_profiling = params.device.has_profiling && !b_engine.is_preview() &&
background && BlenderSession::print_render_stats;
params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background &&
BlenderSession::print_render_stats;
return params;
}

View File

@@ -64,25 +64,29 @@ public:
BL::Depsgraph &b_depsgraph,
BL::SpaceView3D &b_v3d,
BL::Object &b_override,
int width, int height,
int width,
int height,
void **python_thread_state);
void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer);
vector<Pass> sync_render_passes(BL::RenderLayer& b_render_layer,
BL::ViewLayer& b_view_layer);
vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
void sync_integrator();
void sync_camera(BL::RenderSettings &b_render,
BL::Object &b_override,
int width, int height,
int width,
int height,
const char *viewname);
void sync_view(BL::SpaceView3D& b_v3d,
BL::RegionView3D& b_rv3d,
int width, int height);
inline int get_layer_samples() { return view_layer.samples; }
inline int get_layer_bound_samples() { return view_layer.bound_samples; }
void sync_view(BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, int width, int height);
inline int get_layer_samples()
{
return view_layer.samples;
}
inline int get_layer_bound_samples()
{
return view_layer.bound_samples;
}
/* get parameters */
static SceneParams get_scene_params(BL::Scene& b_scene,
bool background);
static SceneParams get_scene_params(BL::Scene &b_scene, bool background);
static SessionParams get_session_params(BL::RenderEngine &b_engine,
BL::Preferences &b_userpref,
BL::Scene &b_scene,
@@ -92,7 +96,8 @@ public:
BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,
int width, int height);
int width,
int height);
static PassType get_pass_type(BL::RenderPass &b_pass);
static int get_denoising_pass(BL::RenderPass &b_pass);
@@ -105,7 +110,8 @@ private:
void sync_motion(BL::RenderSettings &b_render,
BL::Depsgraph &b_depsgraph,
BL::Object &b_override,
int width, int height,
int width,
int height,
void **python_thread_state);
void sync_film();
void sync_view();
@@ -120,11 +126,8 @@ private:
bool object_updated,
bool show_self,
bool show_particles);
void sync_curves(Mesh *mesh,
BL::Mesh& b_mesh,
BL::Object& b_ob,
bool motion,
int motion_step = 0);
void sync_curves(
Mesh *mesh, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step = 0);
Object *sync_object(BL::Depsgraph &b_depsgraph,
BL::ViewLayer &b_view_layer,
BL::DepsgraphObjectInstance &b_instance,
@@ -145,10 +148,8 @@ private:
BL::Object &b_ob,
Object *object,
float motion_time);
void sync_camera_motion(BL::RenderSettings& b_render,
BL::Object& b_ob,
int width, int height,
float motion_time);
void sync_camera_motion(
BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);
/* particles */
bool sync_dupli_particle(BL::Object &b_ob,
@@ -199,7 +200,8 @@ private:
use_hair(true),
samples(0),
bound_samples(false)
{}
{
}
string name;
BL::Material material_override;

View File

@@ -22,12 +22,14 @@ namespace {
/* Point density helpers. */
void density_texture_space_invert(float3& loc,
float3& size)
void density_texture_space_invert(float3 &loc, float3 &size)
{
if(size.x != 0.0f) size.x = 0.5f/size.x;
if(size.y != 0.0f) size.y = 0.5f/size.y;
if(size.z != 0.0f) size.z = 0.5f/size.z;
if (size.x != 0.0f)
size.x = 0.5f / size.x;
if (size.y != 0.0f)
size.y = 0.5f / size.y;
if (size.z != 0.0f)
size.z = 0.5f / size.z;
loc = loc * size - make_float3(0.5f, 0.5f, 0.5f);
}
@@ -46,9 +48,7 @@ void point_density_texture_space(BL::Depsgraph& b_depsgraph,
return;
}
float3 min, max;
b_point_density_node.calc_point_density_minmax(b_depsgraph,
&min[0],
&max[0]);
b_point_density_node.calc_point_density_minmax(b_depsgraph, &min[0], &max[0]);
loc = (min + max) * 0.5f;
size = (max - min) * 0.5f;
density_texture_space_invert(loc, size);

View File

@@ -74,8 +74,7 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
* Also in edit mode do we need to make a copy, to ensure data layers like
* UV are not empty. */
if (mesh.is_editmode() ||
(mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE))
{
(mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
mesh = data.meshes.new_from_object(depsgraph, object, false, false);
}
}
@@ -103,9 +102,7 @@ static inline BL::Mesh object_to_mesh(BL::BlendData& data,
return mesh;
}
static inline void free_object_to_mesh(BL::BlendData& data,
BL::Object& object,
BL::Mesh& mesh)
static inline void free_object_to_mesh(BL::BlendData &data, BL::Object &object, BL::Mesh &mesh)
{
/* Free mesh if we didn't just use the existing one. */
if (object.data().ptr.data != mesh.ptr.data) {
@@ -130,9 +127,7 @@ static inline void colorramp_to_array(BL::ColorRamp& ramp,
}
}
static inline void curvemap_minmax_curve(/*const*/ BL::CurveMap& curve,
float *min_x,
float *max_x)
static inline void curvemap_minmax_curve(/*const*/ BL::CurveMap &curve, float *min_x, float *max_x)
{
*min_x = min(*min_x, curve.points[0].location()[0]);
*max_x = max(*max_x, curve.points[curve.points.length() - 1].location()[0]);
@@ -153,9 +148,7 @@ static inline void curvemapping_minmax(/*const*/ BL::CurveMapping& cumap,
}
}
static inline void curvemapping_to_array(BL::CurveMapping& cumap,
array<float>& data,
int size)
static inline void curvemapping_to_array(BL::CurveMapping &cumap, array<float> &data, int size)
{
cumap.update();
BL::CurveMap curve = cumap.curves[0];
@@ -208,23 +201,17 @@ static inline void curvemapping_color_to_array(BL::CurveMapping& cumap,
else {
for (int i = 0; i < size; i++) {
float t = min_x + (float)i / (float)(size - 1) * range_x;
data[i] = make_float3(mapR.evaluate(t),
mapG.evaluate(t),
mapB.evaluate(t));
data[i] = make_float3(mapR.evaluate(t), mapG.evaluate(t), mapB.evaluate(t));
}
}
}
static inline bool BKE_object_is_modified(BL::Object& self,
BL::Scene& scene,
bool preview)
static inline bool BKE_object_is_modified(BL::Object &self, BL::Scene &scene, bool preview)
{
return self.is_modified(scene, (preview) ? (1 << 0) : (1 << 1)) ? true : false;
}
static inline bool BKE_object_is_deform_modified(BL::Object& self,
BL::Scene& scene,
bool preview)
static inline bool BKE_object_is_deform_modified(BL::Object &self, BL::Scene &scene, bool preview)
{
return self.is_deform_modified(scene, (preview) ? (1 << 0) : (1 << 1)) ? true : false;
}
@@ -239,9 +226,7 @@ static inline int render_resolution_y(BL::RenderSettings& b_render)
return b_render.resolution_y() * b_render.resolution_percentage() / 100;
}
static inline string image_user_file_path(BL::ImageUser& iuser,
BL::Image& ima,
int cfra)
static inline string image_user_file_path(BL::ImageUser &iuser, BL::Image &ima, int cfra)
{
char filepath[1024];
BKE_image_user_frame_calc(iuser.ptr.data, cfra);
@@ -255,14 +240,12 @@ static inline int image_user_frame_number(BL::ImageUser& iuser, int cfra)
return iuser.frame_current();
}
static inline unsigned char *image_get_pixels_for_frame(BL::Image& image,
int frame)
static inline unsigned char *image_get_pixels_for_frame(BL::Image &image, int frame)
{
return BKE_image_get_pixels_for_frame(image.ptr.data, frame);
}
static inline float *image_get_float_pixels_for_frame(BL::Image& image,
int frame)
static inline float *image_get_float_pixels_for_frame(BL::Image &image, int frame)
{
return BKE_image_get_float_pixels_for_frame(image.ptr.data, frame);
}
@@ -272,7 +255,6 @@ static inline void render_add_metadata(BL::RenderResult& b_rr, string name, stri
b_rr.stamp_data_add_field(name.c_str(), value.c_str());
}
/* Utilities */
static inline Transform get_transform(const BL::Array<float, 16> &array)
@@ -436,18 +418,14 @@ static inline void set_string(PointerRNA& ptr, const char *name, const string &v
/* Relative Paths */
static inline string blender_absolute_path(BL::BlendData& b_data,
BL::ID& b_id,
const string& path)
static inline string blender_absolute_path(BL::BlendData &b_data, BL::ID &b_id, const string &path)
{
if (path.size() >= 2 && path[0] == '/' && path[1] == '/') {
string dirname;
if (b_id.library()) {
BL::ID b_library_id(b_id.library());
dirname = blender_absolute_path(b_data,
b_library_id,
b_id.library().filepath());
dirname = blender_absolute_path(b_data, b_library_id, b_id.library().filepath());
}
else
dirname = b_data.filepath();
@@ -475,16 +453,17 @@ static inline string get_text_datablock_content(const PointerRNA& ptr)
/* Texture Space */
static inline void mesh_texture_space(BL::Mesh& b_mesh,
float3& loc,
float3& size)
static inline void mesh_texture_space(BL::Mesh &b_mesh, float3 &loc, float3 &size)
{
loc = get_float3(b_mesh.texspace_location());
size = get_float3(b_mesh.texspace_size());
if(size.x != 0.0f) size.x = 0.5f/size.x;
if(size.y != 0.0f) size.y = 0.5f/size.y;
if(size.z != 0.0f) size.z = 0.5f/size.z;
if (size.x != 0.0f)
size.x = 0.5f / size.x;
if (size.y != 0.0f)
size.y = 0.5f / size.y;
if (size.z != 0.0f)
size.z = 0.5f / size.z;
loc = loc * size - make_float3(0.5f, 0.5f, 0.5f);
}
@@ -521,8 +500,7 @@ static inline uint object_motion_steps(BL::Object& b_parent, BL::Object& b_ob)
}
/* object uses deformation motion blur */
static inline bool object_use_deform_motion(BL::Object& b_parent,
BL::Object& b_ob)
static inline bool object_use_deform_motion(BL::Object &b_parent, BL::Object &b_ob)
{
PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
bool use_deform_motion = get_boolean(cobject, "use_deform_motion");
@@ -572,7 +550,9 @@ static inline BL::DomainFluidSettings object_fluid_domain_find(BL::Object b_ob)
return BL::DomainFluidSettings(PointerRNA_NULL);
}
static inline Mesh::SubdivisionType object_subdivision_type(BL::Object& b_ob, bool preview, bool experimental)
static inline Mesh::SubdivisionType object_subdivision_type(BL::Object &b_ob,
bool preview,
bool experimental)
{
PointerRNA cobj = RNA_pointer_get(&b_ob.ptr, "cycles");
@@ -580,7 +560,8 @@ static inline Mesh::SubdivisionType object_subdivision_type(BL::Object& b_ob, bo
BL::Modifier mod = b_ob.modifiers[b_ob.modifiers.length() - 1];
bool enabled = preview ? mod.show_viewport() : mod.show_render();
if(enabled && mod.type() == BL::Modifier::type_SUBSURF && RNA_boolean_get(&cobj, "use_adaptive_subdivision")) {
if (enabled && mod.type() == BL::Modifier::type_SUBSURF &&
RNA_boolean_get(&cobj, "use_adaptive_subdivision")) {
BL::SubsurfModifier subsurf(mod);
if (subsurf.subdivision_type() == BL::SubsurfModifier::subdivision_type_CATMULL_CLARK) {
@@ -600,8 +581,7 @@ static inline Mesh::SubdivisionType object_subdivision_type(BL::Object& b_ob, bo
* Utility class to keep in sync with blender data.
* Used for objects, meshes, lights and shaders. */
template<typename K, typename T>
class id_map {
template<typename K, typename T> class id_map {
public:
id_map(vector<T *> *scene_data_)
{
@@ -780,8 +760,7 @@ struct ParticleSystemKey {
void *ob;
int id[OBJECT_PERSISTENT_ID_SIZE];
ParticleSystemKey(void *ob_, int id_[OBJECT_PERSISTENT_ID_SIZE])
: ob(ob_)
ParticleSystemKey(void *ob_, int id_[OBJECT_PERSISTENT_ID_SIZE]) : ob(ob_)
{
if (id_)
memcpy(id, id_, sizeof(id));
@@ -803,25 +782,30 @@ struct ParticleSystemKey {
class EdgeMap {
public:
EdgeMap() {
EdgeMap()
{
}
void clear() {
void clear()
{
edges_.clear();
}
void insert(int v0, int v1) {
void insert(int v0, int v1)
{
get_sorted_verts(v0, v1);
edges_.insert(std::pair<int, int>(v0, v1));
}
bool exists(int v0, int v1) {
bool exists(int v0, int v1)
{
get_sorted_verts(v0, v1);
return edges_.find(std::pair<int, int>(v0, v1)) != edges_.end();
}
protected:
void get_sorted_verts(int& v0, int& v1) {
void get_sorted_verts(int &v0, int &v1)
{
if (v0 > v1) {
swap(v0, v1);
}

View File

@@ -41,19 +41,24 @@ CCL_NAMESPACE_BEGIN
const char *bvh_layout_name(BVHLayout layout)
{
switch (layout) {
case BVH_LAYOUT_BVH2: return "BVH2";
case BVH_LAYOUT_BVH4: return "BVH4";
case BVH_LAYOUT_BVH8: return "BVH8";
case BVH_LAYOUT_NONE: return "NONE";
case BVH_LAYOUT_EMBREE: return "EMBREE";
case BVH_LAYOUT_ALL: return "ALL";
case BVH_LAYOUT_BVH2:
return "BVH2";
case BVH_LAYOUT_BVH4:
return "BVH4";
case BVH_LAYOUT_BVH8:
return "BVH8";
case BVH_LAYOUT_NONE:
return "NONE";
case BVH_LAYOUT_EMBREE:
return "EMBREE";
case BVH_LAYOUT_ALL:
return "ALL";
}
LOG(DFATAL) << "Unsupported BVH layout was passed.";
return "";
}
BVHLayout BVHParams::best_bvh_layout(BVHLayout requested_layout,
BVHLayoutMask supported_layouts)
BVHLayout BVHParams::best_bvh_layout(BVHLayout requested_layout, BVHLayoutMask supported_layouts)
{
const BVHLayoutMask requested_layout_mask = (BVHLayoutMask)requested_layout;
/* Check whether requested layout is supported, if so -- no need to do
@@ -66,8 +71,7 @@ BVHLayout BVHParams::best_bvh_layout(BVHLayout requested_layout,
/* This is a mask of supported BVH layouts which are narrower than the
* requested one.
*/
const BVHLayoutMask allowed_layouts_mask =
(supported_layouts & (requested_layout_mask - 1));
const BVHLayoutMask allowed_layouts_mask = (supported_layouts & (requested_layout_mask - 1));
/* We get widest from allowed ones and convert mask to actual layout. */
const BVHLayoutMask widest_allowed_layout_mask = __bsr(allowed_layouts_mask);
return (BVHLayout)(1 << widest_allowed_layout_mask);
@@ -75,8 +79,7 @@ BVHLayout BVHParams::best_bvh_layout(BVHLayout requested_layout,
/* Pack Utility */
BVHStackEntry::BVHStackEntry(const BVHNode *n, int i)
: node(n), idx(i)
BVHStackEntry::BVHStackEntry(const BVHNode *n, int i) : node(n), idx(i)
{
}
@@ -174,7 +177,8 @@ void BVH::refit(Progress& progress)
progress.set_substatus("Packing BVH primitives");
pack_primitives();
if(progress.get_cancel()) return;
if (progress.get_cancel())
return;
progress.set_substatus("Refitting BVH nodes");
refit_nodes();
@@ -244,7 +248,6 @@ void BVH::refit_primitives(int start, int end, BoundBox& bbox, uint& visibility)
}
}
visibility |= ob->visibility_for_tracing();
}
}
@@ -450,8 +453,8 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
}
else {
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
pack_prim_tri_index[pack_prim_index_offset] =
bvh_prim_tri_index[i] + pack_prim_tri_verts_offset;
pack_prim_tri_index[pack_prim_index_offset] = bvh_prim_tri_index[i] +
pack_prim_tri_verts_offset;
}
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
@@ -477,10 +480,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
if (bvh->pack.leaf_nodes.size()) {
int4 *leaf_nodes_offset = &bvh->pack.leaf_nodes[0];
size_t leaf_nodes_offset_size = bvh->pack.leaf_nodes.size();
for(size_t i = 0, j = 0;
i < leaf_nodes_offset_size;
i += BVH_NODE_LEAF_SIZE, j++)
{
for (size_t i = 0, j = 0; i < leaf_nodes_offset_size; i += BVH_NODE_LEAF_SIZE, j++) {
int4 data = leaf_nodes_offset[i];
data.x += prim_offset;
data.y += prim_offset;
@@ -504,9 +504,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
nsize_bbox = BVH_UNALIGNED_ONODE_SIZE - 1;
}
else {
nsize = use_qbvh
? BVH_UNALIGNED_QNODE_SIZE
: BVH_UNALIGNED_NODE_SIZE;
nsize = use_qbvh ? BVH_UNALIGNED_QNODE_SIZE : BVH_UNALIGNED_NODE_SIZE;
nsize_bbox = (use_qbvh) ? BVH_UNALIGNED_QNODE_SIZE - 1 : 0;
}
}
@@ -521,9 +519,7 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
}
}
memcpy(pack_nodes + pack_nodes_offset,
bvh_nodes + i,
nsize_bbox*sizeof(int4));
memcpy(pack_nodes + pack_nodes_offset, bvh_nodes + i, nsize_bbox * sizeof(int4));
/* Modify offsets into arrays */
int4 data = bvh_nodes[i + nsize_bbox];

View File

@@ -73,23 +73,20 @@ struct PackedBVH {
}
};
enum BVH_TYPE {
bvh2,
bvh4,
bvh8
};
enum BVH_TYPE { bvh2, bvh4, bvh8 };
/* BVH */
class BVH
{
class BVH {
public:
PackedBVH pack;
BVHParams params;
vector<Object *> objects;
static BVH *create(const BVHParams &params, const vector<Object *> &objects);
virtual ~BVH() {}
virtual ~BVH()
{
}
virtual void build(Progress &progress, Stats *stats = NULL);
void refit(Progress &progress);
@@ -115,8 +112,7 @@ protected:
};
/* Pack Utility */
struct BVHStackEntry
{
struct BVHStackEntry {
const BVHNode *node;
int idx;

View File

@@ -25,8 +25,7 @@
CCL_NAMESPACE_BEGIN
BVH2::BVH2(const BVHParams& params_, const vector<Object*>& objects_)
: BVH(params_, objects_)
BVH2::BVH2(const BVHParams &params_, const vector<Object *> &objects_) : BVH(params_, objects_)
{
}
@@ -35,8 +34,7 @@ BVHNode *BVH2::widen_children_nodes(const BVHNode *root)
return const_cast<BVHNode *>(root);
}
void BVH2::pack_leaf(const BVHStackEntry& e,
const LeafNode *leaf)
void BVH2::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
{
assert(e.idx + BVH_NODE_LEAF_SIZE <= pack.leaf_nodes.size());
float4 data[BVH_NODE_LEAF_SIZE];
@@ -59,13 +57,12 @@ void BVH2::pack_leaf(const BVHStackEntry& e,
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_NODE_LEAF_SIZE);
}
void BVH2::pack_inner(const BVHStackEntry& e,
const BVHStackEntry& e0,
const BVHStackEntry& e1)
void BVH2::pack_inner(const BVHStackEntry &e, const BVHStackEntry &e0, const BVHStackEntry &e1)
{
if (e0.node->is_unaligned || e1.node->is_unaligned) {
pack_unaligned_inner(e, e0, e1);
} else {
}
else {
pack_aligned_inner(e, e0, e1);
}
}
@@ -75,25 +72,29 @@ void BVH2::pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry &e1)
{
pack_aligned_node(e.idx,
e0.node->bounds, e1.node->bounds,
e0.encodeIdx(), e1.encodeIdx(),
e0.node->visibility, e1.node->visibility);
e0.node->bounds,
e1.node->bounds,
e0.encodeIdx(),
e1.encodeIdx(),
e0.node->visibility,
e1.node->visibility);
}
void BVH2::pack_aligned_node(int idx,
const BoundBox &b0,
const BoundBox &b1,
int c0, int c1,
uint visibility0, uint visibility1)
int c0,
int c1,
uint visibility0,
uint visibility1)
{
assert(idx + BVH_NODE_SIZE <= pack.nodes.size());
assert(c0 < 0 || c0 < pack.nodes.size());
assert(c1 < 0 || c1 < pack.nodes.size());
int4 data[BVH_NODE_SIZE] = {
make_int4(visibility0 & ~PATH_RAY_NODE_UNALIGNED,
visibility1 & ~PATH_RAY_NODE_UNALIGNED,
c0, c1),
make_int4(
visibility0 & ~PATH_RAY_NODE_UNALIGNED, visibility1 & ~PATH_RAY_NODE_UNALIGNED, c0, c1),
make_int4(__float_as_int(b0.min.x),
__float_as_int(b1.min.x),
__float_as_int(b0.max.x),
@@ -120,8 +121,10 @@ void BVH2::pack_unaligned_inner(const BVHStackEntry& e,
e1.node->get_aligned_space(),
e0.node->bounds,
e1.node->bounds,
e0.encodeIdx(), e1.encodeIdx(),
e0.node->visibility, e1.node->visibility);
e0.encodeIdx(),
e1.encodeIdx(),
e0.node->visibility,
e1.node->visibility);
}
void BVH2::pack_unaligned_node(int idx,
@@ -129,18 +132,18 @@ void BVH2::pack_unaligned_node(int idx,
const Transform &aligned_space1,
const BoundBox &bounds0,
const BoundBox &bounds1,
int c0, int c1,
uint visibility0, uint visibility1)
int c0,
int c1,
uint visibility0,
uint visibility1)
{
assert(idx + BVH_UNALIGNED_NODE_SIZE <= pack.nodes.size());
assert(c0 < 0 || c0 < pack.nodes.size());
assert(c1 < 0 || c1 < pack.nodes.size());
float4 data[BVH_UNALIGNED_NODE_SIZE];
Transform space0 = BVHUnaligned::compute_node_transform(bounds0,
aligned_space0);
Transform space1 = BVHUnaligned::compute_node_transform(bounds1,
aligned_space1);
Transform space0 = BVHUnaligned::compute_node_transform(bounds0, aligned_space0);
Transform space1 = BVHUnaligned::compute_node_transform(bounds1, aligned_space1);
data[0] = make_float4(__int_as_float(visibility0 | PATH_RAY_NODE_UNALIGNED),
__int_as_float(visibility1 | PATH_RAY_NODE_UNALIGNED),
__int_as_float(c0),
@@ -164,8 +167,7 @@ void BVH2::pack_nodes(const BVHNode *root)
const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
size_t node_size;
if (params.use_unaligned_nodes) {
const size_t num_unaligned_nodes =
root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
node_size = (num_unaligned_nodes * BVH_UNALIGNED_NODE_SIZE) +
(num_inner_nodes - num_unaligned_nodes) * BVH_NODE_SIZE;
}
@@ -193,8 +195,7 @@ void BVH2::pack_nodes(const BVHNode *root)
}
else {
stack.push_back(BVHStackEntry(root, nextNodeIdx));
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_NODE_SIZE
: BVH_NODE_SIZE;
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_NODE_SIZE : BVH_NODE_SIZE;
}
while (stack.size()) {
@@ -215,9 +216,8 @@ void BVH2::pack_nodes(const BVHNode *root)
}
else {
idx[i] = nextNodeIdx;
nextNodeIdx += e.node->get_child(i)->has_unaligned()
? BVH_UNALIGNED_NODE_SIZE
: BVH_NODE_SIZE;
nextNodeIdx += e.node->get_child(i)->has_unaligned() ? BVH_UNALIGNED_NODE_SIZE :
BVH_NODE_SIZE;
}
}
@@ -276,19 +276,11 @@ void BVH2::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
if (is_unaligned) {
Transform aligned_space = transform_identity();
pack_unaligned_node(idx,
aligned_space, aligned_space,
bbox0, bbox1,
c0, c1,
visibility0,
visibility1);
pack_unaligned_node(
idx, aligned_space, aligned_space, bbox0, bbox1, c0, c1, visibility0, visibility1);
}
else {
pack_aligned_node(idx,
bbox0, bbox1,
c0, c1,
visibility0,
visibility1);
pack_aligned_node(idx, bbox0, bbox1, c0, c1, visibility0, visibility1);
}
bbox.grow(bbox0);

View File

@@ -54,11 +54,8 @@ protected:
/* pack */
void pack_nodes(const BVHNode *root) override;
void pack_leaf(const BVHStackEntry& e,
const LeafNode *leaf);
void pack_inner(const BVHStackEntry& e,
const BVHStackEntry& e0,
const BVHStackEntry& e1);
void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry &e, const BVHStackEntry &e0, const BVHStackEntry &e1);
void pack_aligned_inner(const BVHStackEntry &e,
const BVHStackEntry &e0,
@@ -66,8 +63,10 @@ protected:
void pack_aligned_node(int idx,
const BoundBox &b0,
const BoundBox &b1,
int c0, int c1,
uint visibility0, uint visibility1);
int c0,
int c1,
uint visibility0,
uint visibility1);
void pack_unaligned_inner(const BVHStackEntry &e,
const BVHStackEntry &e0,
@@ -77,8 +76,10 @@ protected:
const Transform &aligned_space1,
const BoundBox &b0,
const BoundBox &b1,
int c0, int c1,
uint visibility0, uint visibility1);
int c0,
int c1,
uint visibility0,
uint visibility1);
/* refit */
void refit_nodes() override;

View File

@@ -31,8 +31,7 @@ CCL_NAMESPACE_BEGIN
* life easier all over the place.
*/
BVH4::BVH4(const BVHParams& params_, const vector<Object*>& objects_)
: BVH(params_, objects_)
BVH4::BVH4(const BVHParams &params_, const vector<Object *> &objects_) : BVH(params_, objects_)
{
params.bvh_layout = BVH_LAYOUT_BVH4;
}
@@ -121,9 +120,7 @@ void BVH4::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_QNODE_LEAF_SIZE);
}
void BVH4::pack_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH4::pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
bool has_unaligned = false;
/* Check whether we have to create unaligned node or all nodes are aligned
@@ -149,9 +146,7 @@ void BVH4::pack_inner(const BVHStackEntry& e,
}
}
void BVH4::pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH4::pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
BoundBox bounds[4];
int child[4];
@@ -159,13 +154,8 @@ void BVH4::pack_aligned_inner(const BVHStackEntry& e,
bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx();
}
pack_aligned_node(e.idx,
bounds,
child,
e.node->visibility,
e.node->time_from,
e.node->time_to,
num);
pack_aligned_node(
e.idx, bounds, child, e.node->visibility, e.node->time_from, e.node->time_to, num);
}
void BVH4::pack_aligned_node(int idx,
@@ -216,9 +206,7 @@ void BVH4::pack_aligned_node(int idx,
memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_QNODE_SIZE);
}
void BVH4::pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH4::pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
Transform aligned_space[4];
BoundBox bounds[4];
@@ -255,9 +243,7 @@ void BVH4::pack_unaligned_node(int idx,
data[0].z = time_to;
for (int i = 0; i < num; i++) {
Transform space = BVHUnaligned::compute_node_transform(
bounds[i],
aligned_space[i]);
Transform space = BVHUnaligned::compute_node_transform(bounds[i], aligned_space[i]);
data[1][i] = space.x.x;
data[2][i] = space.x.y;
@@ -316,8 +302,7 @@ void BVH4::pack_nodes(const BVHNode *root)
const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
size_t node_size;
if (params.use_unaligned_nodes) {
const size_t num_unaligned_nodes =
root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
node_size = (num_unaligned_nodes * BVH_UNALIGNED_QNODE_SIZE) +
(num_inner_nodes - num_unaligned_nodes) * BVH_QNODE_SIZE;
}
@@ -345,8 +330,7 @@ void BVH4::pack_nodes(const BVHNode *root)
}
else {
stack.push_back(BVHStackEntry(root, nextNodeIdx));
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE
: BVH_QNODE_SIZE;
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE : BVH_QNODE_SIZE;
}
while (stack.size()) {
@@ -373,9 +357,7 @@ void BVH4::pack_nodes(const BVHNode *root)
}
else {
idx = nextNodeIdx;
nextNodeIdx += children[i]->has_unaligned()
? BVH_UNALIGNED_QNODE_SIZE
: BVH_QNODE_SIZE;
nextNodeIdx += children[i]->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE : BVH_QNODE_SIZE;
}
stack.push_back(BVHStackEntry(children[i], idx));
}
@@ -435,17 +417,13 @@ void BVH4::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
c = data[7];
}
/* Refit inner node, set bbox from children. */
BoundBox child_bbox[4] = {BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty};
BoundBox child_bbox[4] = {BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty};
uint child_visibility[4] = {0};
int num_nodes = 0;
for (int i = 0; i < 4; ++i) {
if (c[i] != 0) {
refit_node((c[i] < 0)? -c[i]-1: c[i], (c[i] < 0),
child_bbox[i], child_visibility[i]);
refit_node((c[i] < 0) ? -c[i] - 1 : c[i], (c[i] < 0), child_bbox[i], child_visibility[i]);
++num_nodes;
bbox.grow(child_bbox[i]);
visibility |= child_visibility[i];
@@ -453,27 +431,13 @@ void BVH4::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
}
if (is_unaligned) {
Transform aligned_space[4] = {transform_identity(),
transform_identity(),
transform_identity(),
transform_identity()};
pack_unaligned_node(idx,
aligned_space,
child_bbox,
&c[0],
visibility,
0.0f,
1.0f,
num_nodes);
Transform aligned_space[4] = {
transform_identity(), transform_identity(), transform_identity(), transform_identity()};
pack_unaligned_node(
idx, aligned_space, child_bbox, &c[0], visibility, 0.0f, 1.0f, num_nodes);
}
else {
pack_aligned_node(idx,
child_bbox,
&c[0],
visibility,
0.0f,
1.0f,
num_nodes);
pack_aligned_node(idx, child_bbox, &c[0], visibility, 0.0f, 1.0f, num_nodes);
}
}
}

View File

@@ -57,9 +57,7 @@ protected:
void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num);
void pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_aligned_node(int idx,
const BoundBox *bounds,
const int *child,
@@ -68,9 +66,7 @@ protected:
const float time_to,
const int num);
void pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num);
void pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_unaligned_node(int idx,
const Transform *aligned_space,
const BoundBox *bounds,

View File

@@ -36,8 +36,7 @@
CCL_NAMESPACE_BEGIN
BVH8::BVH8(const BVHParams& params_, const vector<Object*>& objects_)
: BVH(params_, objects_)
BVH8::BVH8(const BVHParams &params_, const vector<Object *> &objects_) : BVH(params_, objects_)
{
}
@@ -59,8 +58,7 @@ BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
children[num_children++] = child0;
}
else {
const BVHNode *child00 = child0->get_child(0),
*child01 = child0->get_child(1);
const BVHNode *child00 = child0->get_child(0), *child01 = child0->get_child(1);
if (child00->is_leaf()) {
children[num_children++] = child00;
}
@@ -80,8 +78,7 @@ BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
children[num_children++] = child1;
}
else {
const BVHNode *child10 = child1->get_child(0),
*child11 = child1->get_child(1);
const BVHNode *child10 = child1->get_child(0), *child11 = child1->get_child(1);
if (child10->is_leaf()) {
children[num_children++] = child10;
}
@@ -153,9 +150,7 @@ void BVH8::pack_leaf(const BVHStackEntry& e, const LeafNode *leaf)
memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_ONODE_LEAF_SIZE);
}
void BVH8::pack_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH8::pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
bool has_unaligned = false;
/* Check whether we have to create unaligned node or all nodes are aligned
@@ -181,9 +176,7 @@ void BVH8::pack_inner(const BVHStackEntry& e,
}
}
void BVH8::pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH8::pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
BoundBox bounds[8];
int child[8];
@@ -191,13 +184,8 @@ void BVH8::pack_aligned_inner(const BVHStackEntry& e,
bounds[i] = en[i].node->bounds;
child[i] = en[i].encodeIdx();
}
pack_aligned_node(e.idx,
bounds,
child,
e.node->visibility,
e.node->time_from,
e.node->time_to,
num);
pack_aligned_node(
e.idx, bounds, child, e.node->visibility, e.node->time_from, e.node->time_to, num);
}
void BVH8::pack_aligned_node(int idx,
@@ -248,9 +236,7 @@ void BVH8::pack_aligned_node(int idx,
memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_ONODE_SIZE);
}
void BVH8::pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num)
void BVH8::pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
{
Transform aligned_space[8];
BoundBox bounds[8];
@@ -287,9 +273,7 @@ void BVH8::pack_unaligned_node(int idx,
data[0].c = time_to;
for (int i = 0; i < num; i++) {
Transform space = BVHUnaligned::compute_node_transform(
bounds[i],
aligned_space[i]);
Transform space = BVHUnaligned::compute_node_transform(bounds[i], aligned_space[i]);
data[1][i] = space.x.x;
data[2][i] = space.x.y;
@@ -348,8 +332,7 @@ void BVH8::pack_nodes(const BVHNode *root)
const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
size_t node_size;
if (params.use_unaligned_nodes) {
const size_t num_unaligned_nodes =
root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
node_size = (num_unaligned_nodes * BVH_UNALIGNED_ONODE_SIZE) +
(num_inner_nodes - num_unaligned_nodes) * BVH_ONODE_SIZE;
}
@@ -377,8 +360,7 @@ void BVH8::pack_nodes(const BVHNode *root)
}
else {
stack.push_back(BVHStackEntry(root, nextNodeIdx));
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE
: BVH_ONODE_SIZE;
nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE : BVH_ONODE_SIZE;
}
while (stack.size()) {
@@ -404,9 +386,7 @@ void BVH8::pack_nodes(const BVHNode *root)
}
else {
idx = nextNodeIdx;
nextNodeIdx += children[i]->has_unaligned()
? BVH_UNALIGNED_ONODE_SIZE
: BVH_ONODE_SIZE;
nextNodeIdx += children[i]->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE : BVH_ONODE_SIZE;
}
stack.push_back(BVHStackEntry(children[i], idx));
}
@@ -512,10 +492,14 @@ void BVH8::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
float8 *data = (float8 *)&pack.nodes[idx];
bool is_unaligned = (__float_as_uint(data[0].a) & PATH_RAY_NODE_UNALIGNED) != 0;
/* Refit inner node, set bbox from children. */
BoundBox child_bbox[8] = { BoundBox::empty, BoundBox::empty,
BoundBox::empty, BoundBox::empty,
BoundBox::empty, BoundBox::empty,
BoundBox::empty, BoundBox::empty };
BoundBox child_bbox[8] = {BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty};
int child[8];
uint child_visibility[8] = {0};
int num_nodes = 0;
@@ -524,8 +508,10 @@ void BVH8::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
child[i] = __float_as_int(data[(is_unaligned) ? 13 : 7][i]);
if (child[i] != 0) {
refit_node((child[i] < 0)? -child[i]-1: child[i], (child[i] < 0),
child_bbox[i], child_visibility[i]);
refit_node((child[i] < 0) ? -child[i] - 1 : child[i],
(child[i] < 0),
child_bbox[i],
child_visibility[i]);
++num_nodes;
bbox.grow(child_bbox[i]);
visibility |= child_visibility[i];
@@ -533,27 +519,19 @@ void BVH8::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility)
}
if (is_unaligned) {
Transform aligned_space[8] = { transform_identity(), transform_identity(),
transform_identity(), transform_identity(),
transform_identity(), transform_identity(),
transform_identity(), transform_identity()};
pack_unaligned_node(idx,
aligned_space,
child_bbox,
child,
visibility,
0.0f,
1.0f,
num_nodes);
Transform aligned_space[8] = {transform_identity(),
transform_identity(),
transform_identity(),
transform_identity(),
transform_identity(),
transform_identity(),
transform_identity(),
transform_identity()};
pack_unaligned_node(
idx, aligned_space, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
}
else {
pack_aligned_node(idx,
child_bbox,
child,
visibility,
0.0f,
1.0f,
num_nodes);
pack_aligned_node(idx, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
}
}
}

View File

@@ -68,9 +68,7 @@ protected:
void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
void pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_aligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num);
void pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_aligned_node(int idx,
const BoundBox *bounds,
const int *child,
@@ -79,9 +77,7 @@ protected:
const float time_to,
const int num);
void pack_unaligned_inner(const BVHStackEntry& e,
const BVHStackEntry *en,
int num);
void pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
void pack_unaligned_node(int idx,
const Transform *aligned_space,
const BoundBox *bounds,

View File

@@ -29,15 +29,29 @@ CCL_NAMESPACE_BEGIN
/* SSE replacements */
__forceinline void prefetch_L1 (const void* /*ptr*/) { }
__forceinline void prefetch_L2 (const void* /*ptr*/) { }
__forceinline void prefetch_L3 (const void* /*ptr*/) { }
__forceinline void prefetch_NTA(const void* /*ptr*/) { }
__forceinline void prefetch_L1(const void * /*ptr*/)
{
}
__forceinline void prefetch_L2(const void * /*ptr*/)
{
}
__forceinline void prefetch_L3(const void * /*ptr*/)
{
}
__forceinline void prefetch_NTA(const void * /*ptr*/)
{
}
template<size_t src> __forceinline float extract(const int4 &b)
{ return b[src]; }
{
return b[src];
}
template<size_t dst> __forceinline const float4 insert(const float4 &a, const float b)
{ float4 r = a; r[dst] = b; return r; }
{
float4 r = a;
r[dst] = b;
return r;
}
__forceinline int get_best_dimension(const float4 &bestSAH)
{
@@ -45,9 +59,12 @@ __forceinline int get_best_dimension(const float4& bestSAH)
float minSAH = min(bestSAH.x, min(bestSAH.y, bestSAH.z));
if(bestSAH.x == minSAH) return 0;
else if(bestSAH.y == minSAH) return 1;
else return 2;
if (bestSAH.x == minSAH)
return 0;
else if (bestSAH.y == minSAH)
return 1;
else
return 2;
}
/* BVH Object Binning */
@@ -72,10 +89,7 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job,
* need in re-calculating this.
*/
bounds_ = unaligned_heuristic->compute_aligned_boundbox(
*this,
prims,
*aligned_space,
&cent_bounds_);
*this, prims, *aligned_space, &cent_bounds_);
}
/* compute number of bins to use and precompute scaling factor for binning */
@@ -109,14 +123,26 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job,
int4 bin1 = get_bin(bounds1);
/* increase bounds for bins for even primitive */
int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(bounds0);
int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(bounds0);
int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(bounds0);
int b00 = (int)extract<0>(bin0);
bin_count[b00][0]++;
bin_bounds[b00][0].grow(bounds0);
int b01 = (int)extract<1>(bin0);
bin_count[b01][1]++;
bin_bounds[b01][1].grow(bounds0);
int b02 = (int)extract<2>(bin0);
bin_count[b02][2]++;
bin_bounds[b02][2].grow(bounds0);
/* increase bounds of bins for odd primitive */
int b10 = (int)extract<0>(bin1); bin_count[b10][0]++; bin_bounds[b10][0].grow(bounds1);
int b11 = (int)extract<1>(bin1); bin_count[b11][1]++; bin_bounds[b11][1].grow(bounds1);
int b12 = (int)extract<2>(bin1); bin_count[b12][2]++; bin_bounds[b12][2].grow(bounds1);
int b10 = (int)extract<0>(bin1);
bin_count[b10][0]++;
bin_bounds[b10][0].grow(bounds1);
int b11 = (int)extract<1>(bin1);
bin_count[b11][1]++;
bin_bounds[b11][1].grow(bounds1);
int b12 = (int)extract<2>(bin1);
bin_count[b12][2]++;
bin_bounds[b12][2].grow(bounds1);
}
/* for uneven number of primitives */
@@ -127,9 +153,15 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job,
int4 bin0 = get_bin(bounds0);
/* increase bounds of bins */
int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(bounds0);
int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(bounds0);
int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(bounds0);
int b00 = (int)extract<0>(bin0);
bin_count[b00][0]++;
bin_bounds[b00][0].grow(bounds0);
int b01 = (int)extract<1>(bin0);
bin_count[b01][1]++;
bin_bounds[b01][1].grow(bounds0);
int b02 = (int)extract<2>(bin0);
bin_count[b02][2]++;
bin_bounds[b02][2].grow(bounds0);
}
}
@@ -146,9 +178,12 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job,
count = count + bin_count[i];
r_count[i] = blocks(count);
bx = merge(bx,bin_bounds[i][0]); r_area[i][0] = bx.half_area();
by = merge(by,bin_bounds[i][1]); r_area[i][1] = by.half_area();
bz = merge(bz,bin_bounds[i][2]); r_area[i][2] = bz.half_area();
bx = merge(bx, bin_bounds[i][0]);
r_area[i][0] = bx.half_area();
by = merge(by, bin_bounds[i][1]);
r_area[i][1] = by.half_area();
bz = merge(bz, bin_bounds[i][2]);
r_area[i][2] = bz.half_area();
r_area[i][3] = r_area[i][2];
}
@@ -166,9 +201,12 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job,
for (size_t i = 1; i < num_bins; i++, ii += make_int4(1)) {
count = count + bin_count[i - 1];
bx = merge(bx,bin_bounds[i-1][0]); float Ax = bx.half_area();
by = merge(by,bin_bounds[i-1][1]); float Ay = by.half_area();
bz = merge(bz,bin_bounds[i-1][2]); float Az = bz.half_area();
bx = merge(bx, bin_bounds[i - 1][0]);
float Ax = bx.half_area();
by = merge(by, bin_bounds[i - 1][1]);
float Ay = by.half_area();
bz = merge(bz, bin_bounds[i - 1][2]);
float Az = bz.half_area();
float4 lCount = blocks(count);
float4 lArea = make_float4(Ax, Ay, Az, Az);
@@ -224,7 +262,8 @@ void BVHObjectBinning::split(BVHReference* prims,
}
/* finish */
if (l != 0 && N - 1 - r != 0) {
right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N-1-r), prims);
right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N - 1 - r),
prims);
left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), l), prims);
return;
}
@@ -246,7 +285,8 @@ void BVHObjectBinning::split(BVHReference* prims,
rcent_bounds.grow(prims[start() + i].bounds().center2());
}
right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N/2, N/2 + N%2), prims);
right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N / 2, N / 2 + N % 2),
prims);
left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N / 2), prims);
}

View File

@@ -34,21 +34,23 @@ class BVHBuild;
* location to different sets. The SAH is evaluated by computing the number of
* blocks occupied by the primitives in the partitions. */
class BVHObjectBinning : public BVHRange
{
class BVHObjectBinning : public BVHRange {
public:
__forceinline BVHObjectBinning() : leafSAH(FLT_MAX) {}
__forceinline BVHObjectBinning() : leafSAH(FLT_MAX)
{
}
BVHObjectBinning(const BVHRange &job,
BVHReference *prims,
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
void split(BVHReference *prims,
BVHObjectBinning& left_o,
BVHObjectBinning& right_o) const;
void split(BVHReference *prims, BVHObjectBinning &left_o, BVHObjectBinning &right_o) const;
__forceinline const BoundBox& unaligned_bounds() { return bounds_; }
__forceinline const BoundBox &unaligned_bounds()
{
return bounds_;
}
float splitSAH; /* SAH cost of the best split */
float leafSAH; /* SAH cost of creating a leaf */
@@ -103,8 +105,7 @@ protected:
return prim.bounds();
}
else {
return unaligned_heuristic_->compute_aligned_prim_boundbox(
prim, *aligned_space_);
return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
}
}
};

View File

@@ -42,20 +42,13 @@ CCL_NAMESPACE_BEGIN
class BVHBuildTask : public Task {
public:
BVHBuildTask(BVHBuild *build,
InnerNode *node,
int child,
const BVHObjectBinning& range,
int level)
BVHBuildTask(
BVHBuild *build, InnerNode *node, int child, const BVHObjectBinning &range, int level)
: range_(range)
{
run = function_bind(&BVHBuild::thread_build_node,
build,
node,
child,
&range_,
level);
run = function_bind(&BVHBuild::thread_build_node, build, node, child, &range_, level);
}
private:
BVHObjectBinning range_;
};
@@ -69,8 +62,7 @@ public:
const vector<BVHReference> &references,
int level)
: range_(range),
references_(references.begin() + range.start(),
references.begin() + range.end())
references_(references.begin() + range.start(), references.begin() + range.end())
{
range_.set_start(0);
run = function_bind(&BVHBuild::thread_build_spatial_split_node,
@@ -82,6 +74,7 @@ public:
level,
_1);
}
private:
BVHRange range_;
vector<BVHReference> references_;
@@ -129,10 +122,7 @@ void BVHBuild::add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *m
BoundBox bounds = BoundBox::empty;
t.bounds_grow(verts, bounds);
if (bounds.valid() && t.valid(verts)) {
references.push_back(BVHReference(bounds,
j,
i,
PRIMITIVE_TRIANGLE));
references.push_back(BVHReference(bounds, j, i, PRIMITIVE_TRIANGLE));
root.grow(bounds);
center.grow(bounds.center2());
}
@@ -152,11 +142,7 @@ void BVHBuild::add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *m
t.bounds_grow(vert_steps + step * num_verts, bounds);
}
if (bounds.valid()) {
references.push_back(
BVHReference(bounds,
j,
i,
PRIMITIVE_MOTION_TRIANGLE));
references.push_back(BVHReference(bounds, j, i, PRIMITIVE_MOTION_TRIANGLE));
root.grow(bounds);
center.grow(bounds.center2());
}
@@ -176,12 +162,7 @@ void BVHBuild::add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *m
* calculating BVH time step boundbox.
*/
float3 prev_verts[3];
t.motion_verts(verts,
vert_steps,
num_verts,
num_steps,
0.0f,
prev_verts);
t.motion_verts(verts, vert_steps, num_verts, num_steps, 0.0f, prev_verts);
BoundBox prev_bounds = BoundBox::empty;
prev_bounds.grow(prev_verts[0]);
prev_bounds.grow(prev_verts[1]);
@@ -190,12 +171,7 @@ void BVHBuild::add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *m
for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
float3 curr_verts[3];
t.motion_verts(verts,
vert_steps,
num_verts,
num_steps,
curr_time,
curr_verts);
t.motion_verts(verts, vert_steps, num_verts, num_steps, curr_time, curr_verts);
BoundBox curr_bounds = BoundBox::empty;
curr_bounds.grow(curr_verts[0]);
curr_bounds.grow(curr_verts[1]);
@@ -205,12 +181,7 @@ void BVHBuild::add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *m
if (bounds.valid()) {
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
references.push_back(
BVHReference(bounds,
j,
i,
PRIMITIVE_MOTION_TRIANGLE,
prev_time,
curr_time));
BVHReference(bounds, j, i, PRIMITIVE_MOTION_TRIANGLE, prev_time, curr_time));
root.grow(bounds);
center.grow(bounds.center2());
}
@@ -257,17 +228,11 @@ void BVHBuild::add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh
const size_t num_steps = mesh->motion_steps;
const float3 *key_steps = curve_attr_mP->data_float3();
for (size_t step = 0; step < num_steps - 1; step++) {
curve.bounds_grow(k,
key_steps + step*num_keys,
curve_radius,
bounds);
curve.bounds_grow(k, key_steps + step * num_keys, curve_radius, bounds);
}
if (bounds.valid()) {
int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
references.push_back(BVHReference(bounds,
j,
i,
packed_type));
references.push_back(BVHReference(bounds, j, i, packed_type));
root.grow(bounds);
center.grow(bounds.center2());
}
@@ -294,7 +259,10 @@ void BVHBuild::add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh
num_keys,
num_steps,
0.0f,
k - 1, k, k + 1, k + 2,
k - 1,
k,
k + 1,
k + 2,
prev_keys);
BoundBox prev_bounds = BoundBox::empty;
curve.bounds_grow(prev_keys, prev_bounds);
@@ -308,7 +276,10 @@ void BVHBuild::add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh
num_keys,
num_steps,
curr_time,
k - 1, k, k + 1, k + 2,
k - 1,
k,
k + 1,
k + 2,
curr_keys);
BoundBox curr_bounds = BoundBox::empty;
curve.bounds_grow(curr_keys, curr_bounds);
@@ -317,12 +288,7 @@ void BVHBuild::add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh
if (bounds.valid()) {
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
references.push_back(BVHReference(bounds,
j,
i,
packed_type,
prev_time,
curr_time));
references.push_back(BVHReference(bounds, j, i, packed_type, prev_time, curr_time));
root.grow(bounds);
center.grow(bounds.center2());
}
@@ -416,7 +382,8 @@ void BVHBuild::add_references(BVHRange& root)
i++;
if(progress.get_cancel()) return;
if (progress.get_cancel())
return;
}
/* happens mostly on empty meshes */
@@ -467,8 +434,7 @@ BVHNode* BVHBuild::run()
}
spatial_free_index = 0;
need_prim_time = params.num_motion_curve_steps > 0 ||
params.num_motion_triangle_steps > 0;
need_prim_time = params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0;
/* init progress updates */
double build_start_time;
@@ -518,23 +484,26 @@ BVHNode* BVHBuild::run()
VLOG(1) << "BVH build statistics:\n"
<< " Build time: " << time_dt() - build_start_time << "\n"
<< " Total number of nodes: "
<< string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_NODE_COUNT)) << "\n"
<< string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_NODE_COUNT))
<< "\n"
<< " Number of inner nodes: "
<< string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_INNER_COUNT)) << "\n"
<< string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_INNER_COUNT))
<< "\n"
<< " Number of leaf nodes: "
<< string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_LEAF_COUNT)) << "\n"
<< string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_LEAF_COUNT))
<< "\n"
<< " Number of unaligned nodes: "
<< string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_UNALIGNED_COUNT)) << "\n"
<< string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_UNALIGNED_COUNT))
<< "\n"
<< " Allocation slop factor: "
<< ((prim_type.capacity() != 0)
? (float)prim_type.size() / prim_type.capacity()
: 1.0f) << "\n"
<< ((prim_type.capacity() != 0) ? (float)prim_type.size() / prim_type.capacity() :
1.0f)
<< "\n"
<< " Maximum depth: "
<< string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_DEPTH)) << "\n";
}
}
return rootnode;
}
@@ -546,17 +515,14 @@ void BVHBuild::progress_update()
double progress_start = (double)progress_count / (double)progress_total;
double duplicates = (double)(progress_total - progress_original_total) / (double)progress_total;
string msg = string_printf("Building BVH %.0f%%, duplicates %.0f%%",
progress_start * 100.0, duplicates * 100.0);
string msg = string_printf(
"Building BVH %.0f%%, duplicates %.0f%%", progress_start * 100.0, duplicates * 100.0);
progress.set_substatus(msg);
progress_start_time = time_dt();
}
void BVHBuild::thread_build_node(InnerNode *inner,
int child,
BVHObjectBinning *range,
int level)
void BVHBuild::thread_build_node(InnerNode *inner, int child, BVHObjectBinning *range, int level)
{
if (progress.get_cancel())
return;
@@ -634,7 +600,8 @@ BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
{
size_t size = range.size();
float leafSAH = params.sah_primitive_cost * range.leafSAH;
float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_primitive_cost * range.splitSAH;
float splitSAH = params.sah_node_cost * range.bounds().half_area() +
params.sah_primitive_cost * range.splitSAH;
/* Have at least one inner node on top level, for performance and correct
* visibility tests, since object instances do not check visibility flag.
@@ -642,8 +609,7 @@ BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
if (!(range.size() > 0 && params.top_level && level == 0)) {
/* Make leaf node when threshold reached or SAH tells us. */
if ((params.small_enough_for_leaf(size, level)) ||
(range_within_max_leaf_size(range, references) && leafSAH < splitSAH))
{
(range_within_max_leaf_size(range, references) && leafSAH < splitSAH)) {
return create_leaf_node(range, references);
}
}
@@ -653,22 +619,16 @@ BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
float unalignedLeafSAH = FLT_MAX;
Transform aligned_space;
bool do_unalinged_split = false;
if(params.use_unaligned_nodes &&
splitSAH > params.unaligned_split_threshold*leafSAH)
{
aligned_space = unaligned_heuristic.compute_aligned_space(
range, &references[0]);
unaligned_range = BVHObjectBinning(range,
&references[0],
&unaligned_heuristic,
&aligned_space);
if (params.use_unaligned_nodes && splitSAH > params.unaligned_split_threshold * leafSAH) {
aligned_space = unaligned_heuristic.compute_aligned_space(range, &references[0]);
unaligned_range = BVHObjectBinning(
range, &references[0], &unaligned_heuristic, &aligned_space);
unalignedSplitSAH = params.sah_node_cost * unaligned_range.unaligned_bounds().half_area() +
params.sah_primitive_cost * unaligned_range.splitSAH;
unalignedLeafSAH = params.sah_primitive_cost * unaligned_range.leafSAH;
if (!(range.size() > 0 && params.top_level && level == 0)) {
if (unalignedLeafSAH < unalignedSplitSAH && unalignedSplitSAH < splitSAH &&
range_within_max_leaf_size(range, references))
{
range_within_max_leaf_size(range, references)) {
return create_leaf_node(range, references);
}
}
@@ -689,8 +649,7 @@ BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
BoundBox bounds;
if (do_unalinged_split) {
bounds = unaligned_heuristic.compute_aligned_boundbox(
range, &references[0], aligned_space);
bounds = unaligned_heuristic.compute_aligned_boundbox(range, &references[0], aligned_space);
}
else {
bounds = range.bounds();
@@ -764,18 +723,10 @@ BVHNode* BVHBuild::build_node(const BVHRange& range,
/* float unalignedLeafSAH = FLT_MAX; */
Transform aligned_space;
bool do_unalinged_split = false;
if(params.use_unaligned_nodes &&
splitSAH > params.unaligned_split_threshold*leafSAH)
{
aligned_space =
unaligned_heuristic.compute_aligned_space(range, &references->at(0));
unaligned_split = BVHMixedSplit(this,
storage,
range,
references,
level,
&unaligned_heuristic,
&aligned_space);
if (params.use_unaligned_nodes && splitSAH > params.unaligned_split_threshold * leafSAH) {
aligned_space = unaligned_heuristic.compute_aligned_space(range, &references->at(0));
unaligned_split = BVHMixedSplit(
this, storage, range, references, level, &unaligned_heuristic, &aligned_space);
/* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */
unalignedSplitSAH = params.sah_node_cost * unaligned_split.bounds.half_area() +
params.sah_primitive_cost * unaligned_split.nodeSAH;
@@ -826,19 +777,9 @@ BVHNode* BVHBuild::build_node(const BVHRange& range,
else {
/* Threaded build. */
inner = new InnerNode(bounds);
task_pool.push(new BVHSpatialSplitBuildTask(this,
inner,
0,
left,
*references,
level + 1),
task_pool.push(new BVHSpatialSplitBuildTask(this, inner, 0, left, *references, level + 1),
true);
task_pool.push(new BVHSpatialSplitBuildTask(this,
inner,
1,
right,
*references,
level + 1),
task_pool.push(new BVHSpatialSplitBuildTask(this, inner, 1, right, *references, level + 1),
true);
}
@@ -888,8 +829,7 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start,
}
}
BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
const vector<BVHReference>& references)
BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHReference> &references)
{
/* This is a bit overallocating here (considering leaf size into account),
* but chunk-based re-allocation in vector makes it difficult to use small
@@ -921,10 +861,8 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
uint visibility[PRIMITIVE_NUM_TOTAL] = {0};
/* NOTE: Keep initializtion in sync with actual number of primitives. */
BoundBox bounds[PRIMITIVE_NUM_TOTAL] = {BoundBox::empty,
BoundBox::empty,
BoundBox::empty,
BoundBox::empty};
BoundBox bounds[PRIMITIVE_NUM_TOTAL] = {
BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty};
int ob_num = 0;
int num_new_prims = 0;
/* Fill in per-type type/index array. */
@@ -936,8 +874,7 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
p_type[type_index].push_back(ref.prim_type());
p_index[type_index].push_back(ref.prim_index());
p_object[type_index].push_back(ref.prim_object());
p_time[type_index].push_back(make_float2(ref.time_from(),
ref.time_to()));
p_time[type_index].push_back(make_float2(ref.time_from(), ref.time_to()));
bounds[type_index].grow(ref.bounds());
visibility[type_index] |= objects[ref.prim_object()]->visibility_for_tracing();
@@ -964,9 +901,7 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
BVHNode *leaves[PRIMITIVE_NUM_TOTAL + 1] = {NULL};
int num_leaves = 0;
size_t start_index = 0;
vector<int, LeafStackAllocator> local_prim_type,
local_prim_index,
local_prim_object;
vector<int, LeafStackAllocator> local_prim_type, local_prim_index, local_prim_object;
vector<float2, LeafTimeStackAllocator> local_prim_time;
local_prim_type.resize(num_new_prims);
local_prim_index.resize(num_new_prims);
@@ -990,15 +925,10 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
local_prim_time[index] = p_time[i][j];
}
if (params.use_unaligned_nodes && !alignment_found) {
alignment_found =
unaligned_heuristic.compute_aligned_space(p_ref[i][j],
&aligned_space);
alignment_found = unaligned_heuristic.compute_aligned_space(p_ref[i][j], &aligned_space);
}
}
LeafNode *leaf_node = new LeafNode(bounds[i],
visibility[i],
start_index,
start_index + num);
LeafNode *leaf_node = new LeafNode(bounds[i], visibility[i], start_index, start_index + num);
if (true) {
float time_from = 1.0f, time_to = 0.0f;
for (int j = 0; j < num; ++j) {
@@ -1014,9 +944,7 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
leaf_node->bounds = BoundBox::empty;
for (int j = 0; j < num; ++j) {
const BVHReference &ref = p_ref[i][j];
BoundBox ref_bounds =
unaligned_heuristic.compute_aligned_prim_boundbox(
ref,
BoundBox ref_bounds = unaligned_heuristic.compute_aligned_prim_boundbox(ref,
aligned_space);
leaf_node->bounds.grow(ref_bounds);
}
@@ -1109,9 +1037,7 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
* nodes created.
*/
const BVHReference *ref = (ob_num) ? &object_references[0] : NULL;
leaves[num_leaves] = create_object_leaf_nodes(ref,
start_index + num_new_leaf_data,
ob_num);
leaves[num_leaves] = create_object_leaf_nodes(ref, start_index + num_new_leaf_data, ob_num);
++num_leaves;
}
@@ -1132,7 +1058,8 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
BoundBox inner_bounds = merge(leaves[1]->bounds, leaves[2]->bounds);
BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]);
return new InnerNode(range.bounds(), leaves[0], inner);
} else {
}
else {
/* Should be doing more branches if more primitive types added. */
assert(num_leaves <= 5);
BoundBox inner_bounds_a = merge(leaves[0]->bounds, leaves[1]->bounds);

View File

@@ -41,8 +41,7 @@ class Progress;
/* BVH Builder */
class BVHBuild
{
class BVHBuild {
public:
/* Constructor/Destructor */
BVHBuild(const vector<Object *> &objects,
@@ -77,8 +76,7 @@ protected:
int level,
int thread_id);
BVHNode *build_node(const BVHObjectBinning &range, int level);
BVHNode *create_leaf_node(const BVHRange& range,
const vector<BVHReference>& references);
BVHNode *create_leaf_node(const BVHRange &range, const vector<BVHReference> &references);
BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num);
bool range_within_max_leaf_size(const BVHRange &range,
@@ -86,10 +84,7 @@ protected:
/* Threads. */
enum { THREAD_TASK_SIZE = 4096 };
void thread_build_node(InnerNode *node,
int child,
BVHObjectBinning *range,
int level);
void thread_build_node(InnerNode *node, int child, BVHObjectBinning *range, int level);
void thread_build_spatial_split_node(InnerNode *node,
int child,
BVHRange *range,

View File

@@ -73,10 +73,11 @@ static void rtc_filter_func(const RTCFilterFunctionNArguments *args)
KernelGlobals *kg = ctx->kg;
/* Check if there is backfacing hair to ignore. */
if(IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
&& !(kernel_data.curve.curveflags & CURVE_KN_BACKFACING)
&& !(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
if(dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z), make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
if (IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) &&
!(kernel_data.curve.curveflags & CURVE_KN_BACKFACING) &&
!(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
*args->valid = 0;
return;
}
@@ -93,10 +94,11 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
KernelGlobals *kg = ctx->kg;
/* For all ray types: Check if there is backfacing hair to ignore */
if(IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
&& !(kernel_data.curve.curveflags & CURVE_KN_BACKFACING)
&& !(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
if(dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z), make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
if (IS_HAIR(hit->geomID) && (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) &&
!(kernel_data.curve.curveflags & CURVE_KN_BACKFACING) &&
!(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)) {
if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
*args->valid = 0;
return;
}
@@ -110,8 +112,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
kernel_embree_convert_hit(kg, ray, hit, &current_isect);
for (size_t i = 0; i < ctx->max_hits; ++i) {
if (current_isect.object == ctx->isect_s[i].object &&
current_isect.prim == ctx->isect_s[i].prim &&
current_isect.t == ctx->isect_s[i].t) {
current_isect.prim == ctx->isect_s[i].prim && current_isect.t == ctx->isect_s[i].t) {
/* This intersection was already recorded, skip it. */
*args->valid = 0;
break;
@@ -183,7 +184,8 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
}
}
/* record intersection */
kernel_embree_convert_local_hit(kg, ray, hit, &ctx->ss_isect->hits[hit_idx], ctx->sss_object_id);
kernel_embree_convert_local_hit(
kg, ray, hit, &ctx->ss_isect->hits[hit_idx], ctx->sss_object_id);
ctx->ss_isect->Ng[hit_idx].x = hit->Ng_x;
ctx->ss_isect->Ng[hit_idx].y = hit->Ng_y;
ctx->ss_isect->Ng[hit_idx].z = hit->Ng_z;
@@ -199,8 +201,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
kernel_embree_convert_hit(kg, ray, hit, &current_isect);
for (size_t i = 0; i < ctx->max_hits; ++i) {
if (current_isect.object == ctx->isect_s[i].object &&
current_isect.prim == ctx->isect_s[i].prim &&
current_isect.t == ctx->isect_s[i].t) {
current_isect.prim == ctx->isect_s[i].prim && current_isect.t == ctx->isect_s[i].t) {
/* This intersection was already recorded, skip it. */
*args->valid = 0;
break;
@@ -211,7 +212,8 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments* args)
*isect = current_isect;
/* Only primitives from volume object. */
uint tri_object = (isect->object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, isect->prim) : isect->object;
kernel_tex_fetch(__prim_object, isect->prim) :
isect->object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
--ctx->num_hits;
@@ -282,10 +284,16 @@ int BVHEmbree::rtc_shared_users = 0;
thread_mutex BVHEmbree::rtc_shared_mutex;
BVHEmbree::BVHEmbree(const BVHParams &params_, const vector<Object *> &objects_)
: BVH(params_, objects_), scene(NULL), mem_used(0), top_level(NULL), stats(NULL),
curve_subdivisions(params.curve_subdivisions), build_quality(RTC_BUILD_QUALITY_REFIT),
: BVH(params_, objects_),
scene(NULL),
mem_used(0),
top_level(NULL),
stats(NULL),
curve_subdivisions(params.curve_subdivisions),
build_quality(RTC_BUILD_QUALITY_REFIT),
use_curves(params_.curve_flags & CURVE_KN_INTERPOLATE),
use_ribbons(params.curve_flags & CURVE_KN_RIBBONS), dynamic_scene(true)
use_ribbons(params.curve_flags & CURVE_KN_RIBBONS),
dynamic_scene(true)
{
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
@@ -296,31 +304,34 @@ BVHEmbree::BVHEmbree(const BVHParams& params_, const vector<Object*>& objects_)
ssize_t ret = rtcGetDeviceProperty(rtc_shared_device, RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED);
if (ret != 1) {
assert(0);
VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED flag."\
VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED flag."
"Ray visiblity will not work.";
}
ret = rtcGetDeviceProperty(rtc_shared_device, RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED);
if (ret != 1) {
assert(0);
VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED flag."\
VLOG(1)
<< "Embree is compiled without the RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED flag."
"Renders may not look as expected.";
}
ret = rtcGetDeviceProperty(rtc_shared_device, RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED);
if (ret != 1) {
assert(0);
VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED flag. "\
VLOG(1)
<< "Embree is compiled without the RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED flag. "
"Line primitives will not be rendered.";
}
ret = rtcGetDeviceProperty(rtc_shared_device, RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED);
if (ret != 1) {
assert(0);
VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED flag. "\
VLOG(1) << "Embree is compiled without the RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED "
"flag. "
"Triangle primitives will not be rendered.";
}
ret = rtcGetDeviceProperty(rtc_shared_device, RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED);
if (ret != 0) {
assert(0);
VLOG(1) << "Embree is compiled with the RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED flag. "\
VLOG(1) << "Embree is compiled with the RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED flag. "
"Renders may not look as expected.";
}
}
@@ -393,7 +404,8 @@ void BVHEmbree::build(Progress& progress, Stats *stats_)
RTC_SCENE_FLAG_COMPACT | RTC_SCENE_FLAG_ROBUST;
rtcSetSceneFlags(scene, scene_flags);
build_quality = dynamic ? RTC_BUILD_QUALITY_LOW :
(params.use_spatial_split ? RTC_BUILD_QUALITY_HIGH : RTC_BUILD_QUALITY_MEDIUM);
(params.use_spatial_split ? RTC_BUILD_QUALITY_HIGH :
RTC_BUILD_QUALITY_MEDIUM);
rtcSetSceneBuildQuality(scene, build_quality);
/* Count triangles and curves first, reserve arrays once. */
@@ -456,7 +468,8 @@ void BVHEmbree::build(Progress& progress, Stats *stats_)
add_object(ob, i);
}
++i;
if(progress.get_cancel()) return;
if (progress.get_cancel())
return;
}
if (progress.get_cancel()) {
@@ -518,7 +531,8 @@ void BVHEmbree::add_instance(Object *ob, int i)
if (ob->use_motion()) {
for (size_t step = 0; step < num_motion_steps; ++step) {
rtcSetGeometryTransform(geom_id, step, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (const float*)&ob->motion[step]);
rtcSetGeometryTransform(
geom_id, step, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (const float *)&ob->motion[step]);
}
}
else {
@@ -560,11 +574,12 @@ void BVHEmbree::add_triangles(Object *ob, int i)
rtcSetGeometryBuildQuality(geom_id, build_quality);
rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
unsigned *rtc_indices = (unsigned*)rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_INDEX, 0,
RTC_FORMAT_UINT3, sizeof (int) * 3, num_triangles);
unsigned *rtc_indices = (unsigned *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(int) * 3, num_triangles);
assert(rtc_indices);
if (!rtc_indices) {
VLOG(1) << "Embree could not create new geometry buffer for mesh " << mesh->name.c_str() << ".\n";
VLOG(1) << "Embree could not create new geometry buffer for mesh " << mesh->name.c_str()
<< ".\n";
return;
}
for (size_t j = 0; j < num_triangles; ++j) {
@@ -630,8 +645,8 @@ void BVHEmbree::update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh)
verts = &attr_mP->data_float3()[t_ * num_verts];
}
float *rtc_verts = (float*) rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t,
RTC_FORMAT_FLOAT3, sizeof(float) * 3, num_verts + 1);
float *rtc_verts = (float *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_VERTEX, t, RTC_FORMAT_FLOAT3, sizeof(float) * 3, num_verts + 1);
assert(rtc_verts);
if (rtc_verts) {
for (size_t j = 0; j < num_verts; ++j) {
@@ -675,12 +690,12 @@ void BVHEmbree::update_curve_vertex_buffer(RTCGeometry geom_id, const Mesh* mesh
verts = &attr_mP->data_float3()[t_ * num_keys];
}
float4 *rtc_verts = (float4*)rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t,
RTC_FORMAT_FLOAT4, sizeof (float) * 4, num_keys);
float4 *rtc_verts = (float4 *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_VERTEX, t, RTC_FORMAT_FLOAT4, sizeof(float) * 4, num_keys);
float4 *rtc_tangents = NULL;
if (use_curves) {
rtc_tangents = (float4*)rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_TANGENT, t,
RTC_FORMAT_FLOAT4, sizeof (float) * 4, num_keys);
rtc_tangents = (float4 *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_TANGENT, t, RTC_FORMAT_FLOAT4, sizeof(float) * 4, num_keys);
assert(rtc_tangents);
}
assert(rtc_verts);
@@ -751,14 +766,15 @@ void BVHEmbree::add_curves(Object *ob, int i)
size_t prim_tri_index_size = pack.prim_index.size();
pack.prim_tri_index.resize(prim_tri_index_size + num_segments);
enum RTCGeometryType type = (!use_curves) ? RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE :
enum RTCGeometryType type = (!use_curves) ?
RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE :
(use_ribbons ? RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE :
RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE);
RTCGeometry geom_id = rtcNewGeometry(rtc_shared_device, type);
rtcSetGeometryTessellationRate(geom_id, curve_subdivisions);
unsigned *rtc_indices = (unsigned*) rtcSetNewGeometryBuffer(geom_id, RTC_BUFFER_TYPE_INDEX, 0,
RTC_FORMAT_UINT, sizeof (int), num_segments);
unsigned *rtc_indices = (unsigned *)rtcSetNewGeometryBuffer(
geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT, sizeof(int), num_segments);
size_t rtc_index = 0;
for (size_t j = 0; j < num_curves; ++j) {
Mesh::Curve c = mesh->get_curve(j);
@@ -766,8 +782,8 @@ void BVHEmbree::add_curves(Object *ob, int i)
rtc_indices[rtc_index] = c.first_key + k;
/* Cycles specific data. */
pack.prim_object[prim_object_size + rtc_index] = i;
pack.prim_type[prim_type_size + rtc_index] = (PRIMITIVE_PACK_SEGMENT(num_motion_steps > 1 ?
PRIMITIVE_MOTION_CURVE : PRIMITIVE_CURVE, k));
pack.prim_type[prim_type_size + rtc_index] = (PRIMITIVE_PACK_SEGMENT(
num_motion_steps > 1 ? PRIMITIVE_MOTION_CURVE : PRIMITIVE_CURVE, k));
pack.prim_index[prim_index_size + rtc_index] = j;
pack.prim_tri_index[prim_tri_index_size + rtc_index] = rtc_index;
@@ -896,8 +912,8 @@ void BVHEmbree::pack_nodes(const BVHNode *)
}
else {
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
pack_prim_tri_index[pack_prim_index_offset] =
bvh_prim_tri_index[i] + pack_prim_tri_verts_offset;
pack_prim_tri_index[pack_prim_index_offset] = bvh_prim_tri_index[i] +
pack_prim_tri_verts_offset;
}
pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];

View File

@@ -33,8 +33,7 @@ CCL_NAMESPACE_BEGIN
class Mesh;
class BVHEmbree : public BVH
{
class BVHEmbree : public BVH {
public:
virtual void build(Progress &progress, Stats *stats) override;
virtual ~BVHEmbree();
@@ -58,8 +57,12 @@ protected:
ssize_t mem_used;
void add_delayed_delete_scene(RTCScene scene) { delayed_delete_scenes.push_back(scene); }
void add_delayed_delete_scene(RTCScene scene)
{
delayed_delete_scenes.push_back(scene);
}
BVHEmbree *top_level;
private:
void delete_rtcScene();
void update_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh);

View File

@@ -30,8 +30,7 @@ int BVHNode::getSubtreeSize(BVH_STAT stat) const
{
int cnt = 0;
switch(stat)
{
switch (stat) {
case BVH_STAT_NODE_COUNT:
cnt = 1;
break;
@@ -118,7 +117,8 @@ float BVHNode::computeSubtreeSAHCost(const BVHParams& p, float probability) cons
for (int i = 0; i < num_children(); i++) {
BVHNode *child = get_child(i);
SAH += child->computeSubtreeSAHCost(p, probability * child->bounds.safe_area()/bounds.safe_area());
SAH += child->computeSubtreeSAHCost(
p, probability * child->bounds.safe_area() / bounds.safe_area());
}
return SAH;
@@ -159,9 +159,7 @@ struct DumpTraversalContext {
int id;
};
void dump_subtree(DumpTraversalContext *context,
const BVHNode *node,
const BVHNode *parent = NULL)
void dump_subtree(DumpTraversalContext *context, const BVHNode *node, const BVHNode *parent = NULL)
{
if (node->is_leaf()) {
fprintf(context->stream,

View File

@@ -40,8 +40,7 @@ enum BVH_STAT {
class BVHParams;
class BVHNode
{
class BVHNode {
public:
virtual ~BVHNode()
{
@@ -51,7 +50,10 @@ public:
virtual bool is_leaf() const = 0;
virtual int num_children() const = 0;
virtual BVHNode *get_child(int i) const = 0;
virtual int num_triangles() const { return 0; }
virtual int num_triangles() const
{
return 0;
}
virtual void print(int depth = 0) const = 0;
inline void set_aligned_space(const Transform &aligned_space)
@@ -140,16 +142,12 @@ protected:
}
};
class InnerNode : public BVHNode
{
class InnerNode : public BVHNode {
public:
static constexpr int kNumMaxChildren = 8;
InnerNode(const BoundBox& bounds,
BVHNode* child0,
BVHNode* child1)
: BVHNode(bounds),
num_children_(2)
InnerNode(const BoundBox &bounds, BVHNode *child0, BVHNode *child1)
: BVHNode(bounds), num_children_(2)
{
children[0] = child0;
children[1] = child1;
@@ -164,11 +162,8 @@ public:
}
}
InnerNode(const BoundBox& bounds,
BVHNode** children,
const int num_children)
: BVHNode(bounds),
num_children_(num_children)
InnerNode(const BoundBox &bounds, BVHNode **children, const int num_children)
: BVHNode(bounds), num_children_(num_children)
{
visibility = 0;
time_from = FLT_MAX;
@@ -186,17 +181,21 @@ public:
/* NOTE: This function is only used during binary BVH builder, and it
* supposed to be configured to have 2 children which will be filled in in a
* bit. But this is important to have children reset to NULL. */
explicit InnerNode(const BoundBox& bounds)
: BVHNode(bounds),
num_children_(0)
explicit InnerNode(const BoundBox &bounds) : BVHNode(bounds), num_children_(0)
{
reset_unused_children();
visibility = 0;
num_children_ = 2;
}
bool is_leaf() const { return false; }
int num_children() const { return num_children_; }
bool is_leaf() const
{
return false;
}
int num_children() const
{
return num_children_;
}
BVHNode *get_child(int i) const
{
assert(i >= 0 && i < num_children_);
@@ -216,29 +215,35 @@ protected:
}
};
class LeafNode : public BVHNode
{
class LeafNode : public BVHNode {
public:
LeafNode(const BoundBox &bounds, uint visibility, int lo, int hi)
: BVHNode(bounds),
lo(lo),
hi(hi)
: BVHNode(bounds), lo(lo), hi(hi)
{
this->bounds = bounds;
this->visibility = visibility;
}
LeafNode(const LeafNode& other)
: BVHNode(other),
lo(other.lo),
hi(other.hi)
LeafNode(const LeafNode &other) : BVHNode(other), lo(other.lo), hi(other.hi)
{
}
bool is_leaf() const { return true; }
int num_children() const { return 0; }
BVHNode *get_child(int) const { return NULL; }
int num_triangles() const { return hi - lo; }
bool is_leaf() const
{
return true;
}
int num_children() const
{
return 0;
}
BVHNode *get_child(int) const
{
return NULL;
}
int num_triangles() const
{
return hi - lo;
}
void print(int depth) const;
int lo;

View File

@@ -43,10 +43,8 @@ const char *bvh_layout_name(BVHLayout layout);
/* BVH Parameters */
class BVHParams
{
class BVHParams {
public:
/* spatial split area threshold */
bool use_spatial_split;
float spatial_split_alpha;
@@ -98,11 +96,7 @@ public:
int curve_subdivisions;
/* fixed parameters */
enum {
MAX_DEPTH = 64,
MAX_SPATIAL_DEPTH = 48,
NUM_SPATIAL_BINS = 32
};
enum { MAX_DEPTH = 64, MAX_SPATIAL_DEPTH = 48, NUM_SPATIAL_BINS = 32 };
BVHParams()
{
@@ -139,24 +133,31 @@ public:
/* SAH costs */
__forceinline float cost(int num_nodes, int num_primitives) const
{ return node_cost(num_nodes) + primitive_cost(num_primitives); }
{
return node_cost(num_nodes) + primitive_cost(num_primitives);
}
__forceinline float primitive_cost(int n) const
{ return n*sah_primitive_cost; }
{
return n * sah_primitive_cost;
}
__forceinline float node_cost(int n) const
{ return n*sah_node_cost; }
{
return n * sah_node_cost;
}
__forceinline bool small_enough_for_leaf(int size, int level)
{ return (size <= min_leaf_size || level >= MAX_DEPTH); }
{
return (size <= min_leaf_size || level >= MAX_DEPTH);
}
/* Gets best matching BVH.
*
* If the requested layout is supported by the device, it will be used.
* Otherwise, widest supported layout below that will be used.
*/
static BVHLayout best_bvh_layout(BVHLayout requested_layout,
BVHLayoutMask supported_layouts);
static BVHLayout best_bvh_layout(BVHLayout requested_layout, BVHLayoutMask supported_layouts);
};
/* BVH Reference
@@ -164,10 +165,11 @@ public:
* Reference to a primitive. Primitive index and object are sneakily packed
* into BoundBox to reduce memory usage and align nicely */
class BVHReference
{
class BVHReference {
public:
__forceinline BVHReference() {}
__forceinline BVHReference()
{
}
__forceinline BVHReference(const BoundBox &bounds_,
int prim_index_,
@@ -175,24 +177,40 @@ public:
int prim_type,
float time_from = 0.0f,
float time_to = 1.0f)
: rbounds(bounds_),
time_from_(time_from),
time_to_(time_to)
: rbounds(bounds_), time_from_(time_from), time_to_(time_to)
{
rbounds.min.w = __int_as_float(prim_index_);
rbounds.max.w = __int_as_float(prim_object_);
type = prim_type;
}
__forceinline const BoundBox& bounds() const { return rbounds; }
__forceinline int prim_index() const { return __float_as_int(rbounds.min.w); }
__forceinline int prim_object() const { return __float_as_int(rbounds.max.w); }
__forceinline int prim_type() const { return type; }
__forceinline float time_from() const { return time_from_; }
__forceinline float time_to() const { return time_to_; }
__forceinline const BoundBox &bounds() const
{
return rbounds;
}
__forceinline int prim_index() const
{
return __float_as_int(rbounds.min.w);
}
__forceinline int prim_object() const
{
return __float_as_int(rbounds.max.w);
}
__forceinline int prim_type() const
{
return type;
}
__forceinline float time_from() const
{
return time_from_;
}
__forceinline float time_to() const
{
return time_to_;
}
BVHReference& operator=(const BVHReference &arg) {
BVHReference &operator=(const BVHReference &arg)
{
if (&arg != this) {
/* TODO(sergey): Check if it is still faster to memcpy() with
* modern compilers.
@@ -202,7 +220,6 @@ public:
return *this;
}
protected:
BoundBox rbounds;
uint type;
@@ -215,8 +232,7 @@ protected:
* the reference array of a subset of primitives Again uses trickery to pack
* integers into BoundBox for alignment purposes. */
class BVHRange
{
class BVHRange {
public:
__forceinline BVHRange()
{
@@ -224,8 +240,7 @@ public:
rbounds.max.w = __int_as_float(0);
}
__forceinline BVHRange(const BoundBox& bounds_, int start_, int size_)
: rbounds(bounds_)
__forceinline BVHRange(const BoundBox &bounds_, int start_, int size_) : rbounds(bounds_)
{
rbounds.min.w = __int_as_float(start_);
rbounds.max.w = __int_as_float(size_);
@@ -238,13 +253,31 @@ public:
rbounds.max.w = __int_as_float(size_);
}
__forceinline void set_start(int start_) { rbounds.min.w = __int_as_float(start_); }
__forceinline void set_start(int start_)
{
rbounds.min.w = __int_as_float(start_);
}
__forceinline const BoundBox& bounds() const { return rbounds; }
__forceinline const BoundBox& cent_bounds() const { return cbounds; }
__forceinline int start() const { return __float_as_int(rbounds.min.w); }
__forceinline int size() const { return __float_as_int(rbounds.max.w); }
__forceinline int end() const { return start() + size(); }
__forceinline const BoundBox &bounds() const
{
return rbounds;
}
__forceinline const BoundBox &cent_bounds() const
{
return cbounds;
}
__forceinline int start() const
{
return __float_as_int(rbounds.min.w);
}
__forceinline int size() const
{
return __float_as_int(rbounds.max.w);
}
__forceinline int end() const
{
return start() + size();
}
protected:
BoundBox rbounds;
@@ -253,8 +286,7 @@ protected:
/* BVH Spatial Bin */
struct BVHSpatialBin
{
struct BVHSpatialBin {
BoundBox bounds;
int enter;
int exit;

View File

@@ -35,40 +35,43 @@ public:
BVHReferenceCompare(int dim,
const BVHUnaligned *unaligned_heuristic,
const Transform *aligned_space)
: dim(dim),
unaligned_heuristic(unaligned_heuristic),
aligned_space(aligned_space)
: dim(dim), unaligned_heuristic(unaligned_heuristic), aligned_space(aligned_space)
{
}
__forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
{
return (aligned_space != NULL)
? unaligned_heuristic->compute_aligned_prim_boundbox(
prim, *aligned_space)
: prim.bounds();
return (aligned_space != NULL) ?
unaligned_heuristic->compute_aligned_prim_boundbox(prim, *aligned_space) :
prim.bounds();
}
/* Compare two references.
*
* Returns value is similar to return value of strcmp().
*/
__forceinline int compare(const BVHReference& ra,
const BVHReference& rb) const
__forceinline int compare(const BVHReference &ra, const BVHReference &rb) const
{
BoundBox ra_bounds = get_prim_bounds(ra),
rb_bounds = get_prim_bounds(rb);
BoundBox ra_bounds = get_prim_bounds(ra), rb_bounds = get_prim_bounds(rb);
float ca = ra_bounds.min[dim] + ra_bounds.max[dim];
float cb = rb_bounds.min[dim] + rb_bounds.max[dim];
if(ca < cb) return -1;
else if(ca > cb) return 1;
else if(ra.prim_object() < rb.prim_object()) return -1;
else if(ra.prim_object() > rb.prim_object()) return 1;
else if(ra.prim_index() < rb.prim_index()) return -1;
else if(ra.prim_index() > rb.prim_index()) return 1;
else if(ra.prim_type() < rb.prim_type()) return -1;
else if(ra.prim_type() > rb.prim_type()) return 1;
if (ca < cb)
return -1;
else if (ca > cb)
return 1;
else if (ra.prim_object() < rb.prim_object())
return -1;
else if (ra.prim_object() > rb.prim_object())
return 1;
else if (ra.prim_index() < rb.prim_index())
return -1;
else if (ra.prim_index() > rb.prim_index())
return 1;
else if (ra.prim_type() < rb.prim_type())
return -1;
else if (ra.prim_type() > rb.prim_type())
return 1;
return 0;
}
@@ -93,12 +96,7 @@ public:
const int job_end,
const BVHReferenceCompare &compare)
{
run = function_bind(bvh_reference_sort_threaded,
task_pool,
data,
job_start,
job_end,
compare);
run = function_bind(bvh_reference_sort_threaded, task_pool, data, job_start, job_end, compare);
}
};
@@ -160,10 +158,7 @@ static void bvh_reference_sort_threaded(TaskPool *task_pool,
have_work = false;
if (left < end) {
if (start < right) {
task_pool->push(new BVHSortTask(task_pool,
data,
left, end,
compare), true);
task_pool->push(new BVHSortTask(task_pool, data, left, end, compare), true);
}
else {
start = left;

View File

@@ -76,8 +76,7 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
left_bounds.grow(prim_bounds);
right_bounds = storage_->right_bounds[i - 1];
float sah = nodeSAH +
left_bounds.safe_area() * builder->params.primitive_cost(i) +
float sah = nodeSAH + left_bounds.safe_area() * builder->params.primitive_cost(i) +
right_bounds.safe_area() * builder->params.primitive_cost(range.size() - i);
if (sah < min_sah) {
@@ -93,9 +92,7 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
}
}
void BVHObjectSplit::split(BVHRange& left,
BVHRange& right,
const BVHRange& range)
void BVHObjectSplit::split(BVHRange &left, BVHRange &right, const BVHRange &range)
{
assert(references_->size() > 0);
/* sort references according to split */
@@ -154,9 +151,7 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder,
}
else {
range_bounds = unaligned_heuristic->compute_aligned_boundbox(
range,
&references->at(0),
*aligned_space);
range, &references->at(0), *aligned_space);
}
float3 origin = range_bounds.min;
@@ -186,15 +181,14 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder,
lastBin = clamp(lastBin, firstBin, BVHParams::NUM_SPATIAL_BINS - 1);
for (int dim = 0; dim < 3; dim++) {
BVHReference currRef(get_prim_bounds(ref),
ref.prim_index(),
ref.prim_object(),
ref.prim_type());
BVHReference currRef(
get_prim_bounds(ref), ref.prim_index(), ref.prim_object(), ref.prim_type());
for (int i = firstBin[dim]; i < lastBin[dim]; i++) {
BVHReference leftRef, rightRef;
split_reference(builder, leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1));
split_reference(
builder, leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1));
storage_->bins[dim][i].bounds.grow(leftRef.bounds());
currRef = rightRef;
}
@@ -225,9 +219,9 @@ BVHSpatialSplit::BVHSpatialSplit(const BVHBuild& builder,
leftNum += storage_->bins[dim][i - 1].enter;
rightNum -= storage_->bins[dim][i - 1].exit;
float sah = nodeSAH +
left_bounds.safe_area() * builder.params.primitive_cost(leftNum) +
storage_->right_bounds[i - 1].safe_area() * builder.params.primitive_cost(rightNum);
float sah = nodeSAH + left_bounds.safe_area() * builder.params.primitive_cost(leftNum) +
storage_->right_bounds[i - 1].safe_area() *
builder.params.primitive_cost(rightNum);
if (sah < this->sah) {
this->sah = sah;
@@ -330,9 +324,7 @@ void BVHSpatialSplit::split(BVHBuild *builder,
}
/* Insert duplicated references into actual array in one go. */
if (new_refs.size() != 0) {
refs.insert(refs.begin() + (right_end - new_refs.size()),
new_refs.begin(),
new_refs.end());
refs.insert(refs.begin() + (right_end - new_refs.size()), new_refs.begin(), new_refs.end());
}
if (aligned_space_ != NULL) {
left_bounds = right_bounds = BoundBox::empty;
@@ -440,13 +432,7 @@ void BVHSpatialSplit::split_triangle_reference(const BVHReference& ref,
BoundBox &left_bounds,
BoundBox &right_bounds)
{
split_triangle_primitive(mesh,
NULL,
ref.prim_index(),
dim,
pos,
left_bounds,
right_bounds);
split_triangle_primitive(mesh, NULL, ref.prim_index(), dim, pos, left_bounds, right_bounds);
}
void BVHSpatialSplit::split_curve_reference(const BVHReference &ref,
@@ -466,36 +452,18 @@ void BVHSpatialSplit::split_curve_reference(const BVHReference& ref,
right_bounds);
}
void BVHSpatialSplit::split_object_reference(const Object *object,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds)
void BVHSpatialSplit::split_object_reference(
const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds)
{
Mesh *mesh = object->mesh;
for (int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) {
split_triangle_primitive(mesh,
&object->tfm,
tri_idx,
dim,
pos,
left_bounds,
right_bounds);
split_triangle_primitive(mesh, &object->tfm, tri_idx, dim, pos, left_bounds, right_bounds);
}
for (int curve_idx = 0; curve_idx < mesh->num_curves(); ++curve_idx) {
Mesh::Curve curve = mesh->get_curve(curve_idx);
for(int segment_idx = 0;
segment_idx < curve.num_keys - 1;
++segment_idx)
{
split_curve_primitive(mesh,
&object->tfm,
curve_idx,
segment_idx,
dim,
pos,
left_bounds,
right_bounds);
for (int segment_idx = 0; segment_idx < curve.num_keys - 1; ++segment_idx) {
split_curve_primitive(
mesh, &object->tfm, curve_idx, segment_idx, dim, pos, left_bounds, right_bounds);
}
}
}
@@ -516,27 +484,13 @@ void BVHSpatialSplit::split_reference(const BVHBuild& builder,
const Mesh *mesh = ob->mesh;
if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
split_triangle_reference(ref,
mesh,
dim,
pos,
left_bounds,
right_bounds);
split_triangle_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
}
else if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
split_curve_reference(ref,
mesh,
dim,
pos,
left_bounds,
right_bounds);
split_curve_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
}
else {
split_object_reference(ob,
dim,
pos,
left_bounds,
right_bounds);
split_object_reference(ob, dim, pos, left_bounds, right_bounds);
}
/* intersect with original bounds. */

View File

@@ -28,8 +28,7 @@ struct Transform;
/* Object Split */
class BVHObjectSplit
{
class BVHObjectSplit {
public:
float sah;
int dim;
@@ -37,7 +36,9 @@ public:
BoundBox left_bounds;
BoundBox right_bounds;
BVHObjectSplit() {}
BVHObjectSplit()
{
}
BVHObjectSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
const BVHRange &range,
@@ -46,9 +47,7 @@ public:
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
void split(BVHRange& left,
BVHRange& right,
const BVHRange& range);
void split(BVHRange &left, BVHRange &right, const BVHRange &range);
protected:
BVHSpatialStorage *storage_;
@@ -62,26 +61,22 @@ protected:
return prim.bounds();
}
else {
return unaligned_heuristic_->compute_aligned_prim_boundbox(
prim, *aligned_space_);
return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
}
}
};
/* Spatial Split */
class BVHSpatialSplit
{
class BVHSpatialSplit {
public:
float sah;
int dim;
float pos;
BVHSpatialSplit() : sah(FLT_MAX),
dim(0),
pos(0.0f),
storage_(NULL),
references_(NULL) {}
BVHSpatialSplit() : sah(FLT_MAX), dim(0), pos(0.0f), storage_(NULL), references_(NULL)
{
}
BVHSpatialSplit(const BVHBuild &builder,
BVHSpatialStorage *storage,
const BVHRange &range,
@@ -90,10 +85,7 @@ public:
const BVHUnaligned *unaligned_heuristic = NULL,
const Transform *aligned_space = NULL);
void split(BVHBuild *builder,
BVHRange& left,
BVHRange& right,
const BVHRange& range);
void split(BVHBuild *builder, BVHRange &left, BVHRange &right, const BVHRange &range);
void split_reference(const BVHBuild &builder,
BVHReference &left,
@@ -147,11 +139,8 @@ protected:
float pos,
BoundBox &left_bounds,
BoundBox &right_bounds);
void split_object_reference(const Object *object,
int dim,
float pos,
BoundBox& left_bounds,
BoundBox& right_bounds);
void split_object_reference(
const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds);
__forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
{
@@ -159,8 +148,7 @@ protected:
return prim.bounds();
}
else {
return unaligned_heuristic_->compute_aligned_prim_boundbox(
prim, *aligned_space_);
return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
}
}
@@ -177,8 +165,7 @@ protected:
/* Mixed Object-Spatial Split */
class BVHMixedSplit
{
class BVHMixedSplit {
public:
BVHObjectSplit object;
BVHSpatialSplit spatial;
@@ -191,7 +178,9 @@ public:
BoundBox bounds;
BVHMixedSplit() {}
BVHMixedSplit()
{
}
__forceinline BVHMixedSplit(BVHBuild *builder,
BVHSpatialStorage *storage,
@@ -206,9 +195,7 @@ public:
}
else {
bounds = unaligned_heuristic->compute_aligned_boundbox(
range,
&references->at(0),
*aligned_space);
range, &references->at(0), *aligned_space);
}
/* find split candidates. */
float area = bounds.safe_area();
@@ -216,33 +203,22 @@ public:
leafSAH = area * builder->params.primitive_cost(range.size());
nodeSAH = area * builder->params.node_cost(2);
object = BVHObjectSplit(builder,
storage,
range,
references,
nodeSAH,
unaligned_heuristic,
aligned_space);
object = BVHObjectSplit(
builder, storage, range, references, nodeSAH, unaligned_heuristic, aligned_space);
if (builder->params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) {
BoundBox overlap = object.left_bounds;
overlap.intersect(object.right_bounds);
if (overlap.safe_area() >= builder->spatial_min_overlap) {
spatial = BVHSpatialSplit(*builder,
storage,
range,
references,
nodeSAH,
unaligned_heuristic,
aligned_space);
spatial = BVHSpatialSplit(
*builder, storage, range, references, nodeSAH, unaligned_heuristic, aligned_space);
}
}
/* leaf SAH is the lowest => create leaf. */
minSAH = min(min(leafSAH, object.sah), spatial.sah);
no_split = (minSAH == leafSAH &&
builder->range_within_max_leaf_size(range, *references));
no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range, *references));
}
__forceinline void split(BVHBuild *builder,

View File

@@ -27,14 +27,11 @@
CCL_NAMESPACE_BEGIN
BVHUnaligned::BVHUnaligned(const vector<Object*>& objects)
: objects_(objects)
BVHUnaligned::BVHUnaligned(const vector<Object *> &objects) : objects_(objects)
{
}
Transform BVHUnaligned::compute_aligned_space(
const BVHObjectBinning& range,
Transform BVHUnaligned::compute_aligned_space(const BVHObjectBinning &range,
const BVHReference *references) const
{
for (int i = range.start(); i < range.end(); ++i) {
@@ -50,8 +47,7 @@ Transform BVHUnaligned::compute_aligned_space(
return transform_identity();
}
Transform BVHUnaligned::compute_aligned_space(
const BVHRange& range,
Transform BVHUnaligned::compute_aligned_space(const BVHRange &range,
const BVHReference *references) const
{
for (int i = range.start(); i < range.end(); ++i) {
@@ -67,8 +63,7 @@ Transform BVHUnaligned::compute_aligned_space(
return transform_identity();
}
bool BVHUnaligned::compute_aligned_space(const BVHReference& ref,
Transform *aligned_space) const
bool BVHUnaligned::compute_aligned_space(const BVHReference &ref, Transform *aligned_space) const
{
const Object *object = objects_[ref.prim_object()];
const int packed_type = ref.prim_type();
@@ -79,8 +74,7 @@ bool BVHUnaligned::compute_aligned_space(const BVHReference& ref,
const Mesh *mesh = object->mesh;
const Mesh::Curve &curve = mesh->get_curve(curve_index);
const int key = curve.first_key + segment;
const float3 v1 = mesh->curve_keys[key],
v2 = mesh->curve_keys[key + 1];
const float3 v1 = mesh->curve_keys[key], v2 = mesh->curve_keys[key + 1];
float length;
const float3 axis = normalize_len(v2 - v1, &length);
if (length > 1e-6f) {
@@ -92,8 +86,7 @@ bool BVHUnaligned::compute_aligned_space(const BVHReference& ref,
return false;
}
BoundBox BVHUnaligned::compute_aligned_prim_boundbox(
const BVHReference& prim,
BoundBox BVHUnaligned::compute_aligned_prim_boundbox(const BVHReference &prim,
const Transform &aligned_space) const
{
BoundBox bounds = BoundBox::empty;
@@ -105,11 +98,8 @@ BoundBox BVHUnaligned::compute_aligned_prim_boundbox(
const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
const Mesh *mesh = object->mesh;
const Mesh::Curve &curve = mesh->get_curve(curve_index);
curve.bounds_grow(segment,
&mesh->curve_keys[0],
&mesh->curve_radius[0],
aligned_space,
bounds);
curve.bounds_grow(
segment, &mesh->curve_keys[0], &mesh->curve_radius[0], aligned_space, bounds);
}
else {
bounds = prim.bounds().transformed(&aligned_space);
@@ -117,8 +107,7 @@ BoundBox BVHUnaligned::compute_aligned_prim_boundbox(
return bounds;
}
BoundBox BVHUnaligned::compute_aligned_boundbox(
const BVHObjectBinning& range,
BoundBox BVHUnaligned::compute_aligned_boundbox(const BVHObjectBinning &range,
const BVHReference *references,
const Transform &aligned_space,
BoundBox *cent_bounds) const
@@ -138,8 +127,7 @@ BoundBox BVHUnaligned::compute_aligned_boundbox(
return bounds;
}
BoundBox BVHUnaligned::compute_aligned_boundbox(
const BVHRange& range,
BoundBox BVHUnaligned::compute_aligned_boundbox(const BVHRange &range,
const BVHReference *references,
const Transform &aligned_space,
BoundBox *cent_bounds) const
@@ -159,8 +147,7 @@ BoundBox BVHUnaligned::compute_aligned_boundbox(
return bounds;
}
Transform BVHUnaligned::compute_node_transform(
const BoundBox& bounds,
Transform BVHUnaligned::compute_node_transform(const BoundBox &bounds,
const Transform &aligned_space)
{
Transform space = aligned_space;
@@ -168,9 +155,9 @@ Transform BVHUnaligned::compute_node_transform(
space.y.w -= bounds.min.y;
space.z.w -= bounds.min.z;
float3 dim = bounds.max - bounds.min;
return transform_scale(1.0f / max(1e-18f, dim.x),
1.0f / max(1e-18f, dim.y),
1.0f / max(1e-18f, dim.z)) * space;
return transform_scale(
1.0f / max(1e-18f, dim.x), 1.0f / max(1e-18f, dim.y), 1.0f / max(1e-18f, dim.z)) *
space;
}
CCL_NAMESPACE_END

View File

@@ -34,33 +34,26 @@ public:
BVHUnaligned(const vector<Object *> &objects);
/* Calculate alignment for the oriented node for a given range. */
Transform compute_aligned_space(
const BVHObjectBinning& range,
const BVHReference *references) const;
Transform compute_aligned_space(
const BVHRange& range,
Transform compute_aligned_space(const BVHObjectBinning &range,
const BVHReference *references) const;
Transform compute_aligned_space(const BVHRange &range, const BVHReference *references) const;
/* Calculate alignment for the oriented node for a given reference.
*
* Return true when space was calculated successfully.
*/
bool compute_aligned_space(const BVHReference& ref,
Transform *aligned_space) const;
bool compute_aligned_space(const BVHReference &ref, Transform *aligned_space) const;
/* Calculate primitive's bounding box in given space. */
BoundBox compute_aligned_prim_boundbox(
const BVHReference& prim,
BoundBox compute_aligned_prim_boundbox(const BVHReference &prim,
const Transform &aligned_space) const;
/* Calculate bounding box in given space. */
BoundBox compute_aligned_boundbox(
const BVHObjectBinning& range,
BoundBox compute_aligned_boundbox(const BVHObjectBinning &range,
const BVHReference *references,
const Transform &aligned_space,
BoundBox *cent_bounds = NULL) const;
BoundBox compute_aligned_boundbox(
const BVHRange& range,
BoundBox compute_aligned_boundbox(const BVHRange &range,
const BVHReference *references,
const Transform &aligned_space,
BoundBox *cent_bounds = NULL) const;
@@ -68,8 +61,8 @@ public:
/* Calculate affine transform for node packing.
* Bounds will be in the range of 0..1.
*/
static Transform compute_node_transform(const BoundBox& bounds,
const Transform& aligned_space);
static Transform compute_node_transform(const BoundBox &bounds, const Transform &aligned_space);
protected:
/* List of objects BVH is being created for. */
const vector<Object *> &objects_;

View File

@@ -44,40 +44,33 @@ uint Device::devices_initialized_mask = 0;
/* Device Requested Features */
std::ostream& operator <<(std::ostream &os,
const DeviceRequestedFeatures& requested_features)
std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &requested_features)
{
os << "Experimental features: "
<< (requested_features.experimental ? "On" : "Off") << std::endl;
os << "Experimental features: " << (requested_features.experimental ? "On" : "Off") << std::endl;
os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
/* TODO(sergey): Decode bitflag into list of names. */
os << "Nodes features: " << requested_features.nodes_features << std::endl;
os << "Use Hair: "
<< string_from_bool(requested_features.use_hair) << std::endl;
os << "Use Object Motion: "
<< string_from_bool(requested_features.use_object_motion) << std::endl;
os << "Use Camera Motion: "
<< string_from_bool(requested_features.use_camera_motion) << std::endl;
os << "Use Baking: "
<< string_from_bool(requested_features.use_baking) << std::endl;
os << "Use Subsurface: "
<< string_from_bool(requested_features.use_subsurface) << std::endl;
os << "Use Volume: "
<< string_from_bool(requested_features.use_volume) << std::endl;
os << "Use Branched Integrator: "
<< string_from_bool(requested_features.use_integrator_branched) << std::endl;
os << "Use Patch Evaluation: "
<< string_from_bool(requested_features.use_patch_evaluation) << std::endl;
os << "Use Transparent Shadows: "
<< string_from_bool(requested_features.use_transparent) << std::endl;
os << "Use Principled BSDF: "
<< string_from_bool(requested_features.use_principled) << std::endl;
os << "Use Denoising: "
<< string_from_bool(requested_features.use_denoising) << std::endl;
os << "Use Displacement: "
<< string_from_bool(requested_features.use_true_displacement) << std::endl;
os << "Use Background Light: "
<< string_from_bool(requested_features.use_background_light) << std::endl;
os << "Use Hair: " << string_from_bool(requested_features.use_hair) << std::endl;
os << "Use Object Motion: " << string_from_bool(requested_features.use_object_motion)
<< std::endl;
os << "Use Camera Motion: " << string_from_bool(requested_features.use_camera_motion)
<< std::endl;
os << "Use Baking: " << string_from_bool(requested_features.use_baking) << std::endl;
os << "Use Subsurface: " << string_from_bool(requested_features.use_subsurface) << std::endl;
os << "Use Volume: " << string_from_bool(requested_features.use_volume) << std::endl;
os << "Use Branched Integrator: " << string_from_bool(requested_features.use_integrator_branched)
<< std::endl;
os << "Use Patch Evaluation: " << string_from_bool(requested_features.use_patch_evaluation)
<< std::endl;
os << "Use Transparent Shadows: " << string_from_bool(requested_features.use_transparent)
<< std::endl;
os << "Use Principled BSDF: " << string_from_bool(requested_features.use_principled)
<< std::endl;
os << "Use Denoising: " << string_from_bool(requested_features.use_denoising) << std::endl;
os << "Use Displacement: " << string_from_bool(requested_features.use_true_displacement)
<< std::endl;
os << "Use Background Light: " << string_from_bool(requested_features.use_background_light)
<< std::endl;
return os;
}
@@ -156,10 +149,8 @@ static int bind_fallback_shader(void)
struct Shader {
const char *source;
GLenum type;
} shaders[2] = {
{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
{FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}
};
} shaders[2] = {{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
{FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}};
program = glCreateProgram();
@@ -237,11 +228,18 @@ bool Device::bind_fallback_display_space_shader(const float width, const float h
return true;
}
void Device::draw_pixels(
device_memory& rgba, int y,
int w, int h, int width, int height,
int dx, int dy, int dw, int dh,
bool transparent, const DeviceDrawParams &draw_params)
void Device::draw_pixels(device_memory &rgba,
int y,
int w,
int h,
int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params)
{
const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
@@ -333,8 +331,14 @@ void Device::draw_pixels(
glEnableVertexAttribArray(texcoord_attribute);
glEnableVertexAttribArray(position_attribute);
glVertexAttribPointer(texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
glVertexAttribPointer(position_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)(sizeof(float) * 2));
glVertexAttribPointer(
texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
glVertexAttribPointer(position_attribute,
2,
GL_FLOAT,
GL_FALSE,
4 * sizeof(float),
(const GLvoid *)(sizeof(float) * 2));
glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
@@ -539,7 +543,9 @@ string Device::device_capabilities(uint mask)
return capabilities;
}
DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int threads, bool background)
DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
int threads,
bool background)
{
assert(subdevices.size() > 0);
@@ -566,8 +572,7 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int th
int orig_cpu_threads = (threads) ? threads : system_cpu_thread_count();
int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);
VLOG(1) << "CPU render threads reduced from "
<< orig_cpu_threads << " to " << cpu_threads
VLOG(1) << "CPU render threads reduced from " << orig_cpu_threads << " to " << cpu_threads
<< ", to dedicate to GPU.";
if (cpu_threads >= 1) {

View File

@@ -95,9 +95,11 @@ public:
has_profiling = false;
}
bool operator==(const DeviceInfo &info) {
bool operator==(const DeviceInfo &info)
{
/* Multiple Devices with the same ID would be very bad. */
assert(id != info.id || (type == info.type && num == info.num && description == info.description));
assert(id != info.id ||
(type == info.type && num == info.num && description == info.description));
return id == info.id;
}
};
@@ -215,10 +217,8 @@ public:
if (experimental) {
build_options += "-D__KERNEL_EXPERIMENTAL__ ";
}
build_options += "-D__NODES_MAX_GROUP__=" +
string_printf("%d", max_nodes_group);
build_options += " -D__NODES_FEATURES__=" +
string_printf("%d", nodes_features);
build_options += "-D__NODES_MAX_GROUP__=" + string_printf("%d", max_nodes_group);
build_options += " -D__NODES_FEATURES__=" + string_printf("%d", nodes_features);
if (!use_hair) {
build_options += " -D__NO_HAIR__";
}
@@ -262,8 +262,7 @@ public:
}
};
std::ostream& operator <<(std::ostream &os,
const DeviceRequestedFeatures& requested_features);
std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &requested_features);
/* Device */
@@ -274,6 +273,7 @@ struct DeviceDrawParams {
class Device {
friend class device_sub_ptr;
protected:
enum {
FALLBACK_SHADER_STATUS_NONE = 0,
@@ -281,10 +281,16 @@ protected:
FALLBACK_SHADER_STATUS_SUCCESS,
};
Device(DeviceInfo& info_, Stats &stats_, Profiler &profiler_, bool background) : background(background),
Device(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background)
: background(background),
vertex_buffer(0),
fallback_status(FALLBACK_SHADER_STATUS_NONE), fallback_shader_program(0),
info(info_), stats(stats_), profiler(profiler_) {}
fallback_status(FALLBACK_SHADER_STATUS_NONE),
fallback_shader_program(0),
info(info_),
stats(stats_),
profiler(profiler_)
{
}
bool background;
string error_msg;
@@ -309,8 +315,14 @@ public:
/* info */
DeviceInfo info;
virtual const string& error_message() { return error_msg; }
bool have_error() { return !error_message().empty(); }
virtual const string &error_message()
{
return error_msg;
}
bool have_error()
{
return !error_message().empty();
}
virtual void set_error(const string &error)
{
if (!have_error()) {
@@ -319,7 +331,10 @@ public:
fprintf(stderr, "%s\n", error.c_str());
fflush(stderr);
}
virtual bool show_samples() const { return false; }
virtual bool show_samples() const
{
return false;
}
virtual BVHLayoutMask get_bvh_layout_mask() const = 0;
/* statistics */
@@ -327,32 +342,42 @@ public:
Profiler &profiler;
/* memory alignment */
virtual int mem_sub_ptr_alignment() { return MIN_ALIGNMENT_CPU_DATA_TYPES; }
virtual int mem_sub_ptr_alignment()
{
return MIN_ALIGNMENT_CPU_DATA_TYPES;
}
/* constant memory */
virtual void const_copy_to(const char *name, void *host, size_t size) = 0;
/* open shading language, only for CPU device */
virtual void *osl_memory() { return NULL; }
virtual void *osl_memory()
{
return NULL;
}
/* load/compile kernels, must be called before adding tasks */
virtual bool load_kernels(
const DeviceRequestedFeatures& /*requested_features*/)
{ return true; }
virtual bool load_kernels(const DeviceRequestedFeatures & /*requested_features*/)
{
return true;
}
/* Wait for device to become available to upload data and receive tasks
* This method is used by the OpenCL device to load the
* optimized kernels or when not (yet) available load the
* generic kernels (only during foreground rendering) */
virtual bool wait_for_availability(
const DeviceRequestedFeatures& /*requested_features*/)
{ return true; }
virtual bool wait_for_availability(const DeviceRequestedFeatures & /*requested_features*/)
{
return true;
}
/* Check if there are 'better' kernels available to be used
* We can switch over to these kernels
* This method is used to determine if we can switch the preview kernels
* to regular kernels */
virtual DeviceKernelStatus get_active_kernel_switch_state()
{ return DEVICE_KERNEL_USING_FEATURE_KERNEL; }
{
return DEVICE_KERNEL_USING_FEATURE_KERNEL;
}
/* tasks */
virtual int get_split_task_count(DeviceTask &task) = 0;
@@ -361,10 +386,18 @@ public:
virtual void task_cancel() = 0;
/* opengl drawing */
virtual void draw_pixels(device_memory& mem, int y,
int w, int h, int width, int height,
int dx, int dy, int dw, int dh,
bool transparent, const DeviceDrawParams &draw_params);
virtual void draw_pixels(device_memory &mem,
int y,
int w,
int h,
int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params);
#ifdef WITH_NETWORK
/* networking */
@@ -372,13 +405,25 @@ public:
#endif
/* multi device */
virtual void map_tile(Device * /*sub_device*/, RenderTile& /*tile*/) {}
virtual int device_number(Device * /*sub_device*/) { return 0; }
virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) {}
virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) {}
virtual void map_tile(Device * /*sub_device*/, RenderTile & /*tile*/)
{
}
virtual int device_number(Device * /*sub_device*/)
{
return 0;
}
virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
{
}
virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/)
{
}
/* static */
static Device *create(DeviceInfo& info, Stats &stats, Profiler& profiler, bool background = true);
static Device *create(DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool background = true);
static DeviceType type_from_string(const char *name);
static string string_from_type(DeviceType type);
@@ -402,8 +447,7 @@ protected:
virtual void mem_alloc(device_memory &mem) = 0;
virtual void mem_copy_to(device_memory &mem) = 0;
virtual void mem_copy_from(device_memory& mem,
int y, int w, int h, int elem) = 0;
virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) = 0;
virtual void mem_zero(device_memory &mem) = 0;
virtual void mem_free(device_memory &mem) = 0;

View File

@@ -61,20 +61,15 @@ class CPUDevice;
/* Has to be outside of the class to be shared across template instantiations. */
static const char *logged_architecture = "";
template<typename F>
class KernelFunctions {
template<typename F> class KernelFunctions {
public:
KernelFunctions()
{
kernel = (F)NULL;
}
KernelFunctions(F kernel_default,
F kernel_sse2,
F kernel_sse3,
F kernel_sse41,
F kernel_avx,
F kernel_avx2)
KernelFunctions(
F kernel_default, F kernel_sse2, F kernel_sse3, F kernel_sse41, F kernel_avx, F kernel_avx2)
{
const char *architecture_name = "default";
kernel = kernel_default;
@@ -127,16 +122,19 @@ public:
}
}
inline F operator()() const {
inline F operator()() const
{
assert(kernel);
return kernel;
}
protected:
F kernel;
};
class CPUSplitKernel : public DeviceSplitKernel {
CPUDevice *device;
public:
explicit CPUSplitKernel(CPUDevice *device);
@@ -158,8 +156,7 @@ public:
virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads);
};
class CPUDevice : public Device
{
class CPUDevice : public Device {
public:
TaskPool task_pool;
KernelGlobals kernel_globals;
@@ -176,38 +173,85 @@ public:
DeviceRequestedFeatures requested_features;
KernelFunctions<void (*)(KernelGlobals *, float *, int, int, int, int, int)> path_trace_kernel;
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_half_float_kernel;
KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel;
KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)> shader_kernel;
KernelFunctions<void (*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)>
convert_to_half_float_kernel;
KernelFunctions<void (*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)>
convert_to_byte_kernel;
KernelFunctions<void (*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)>
shader_kernel;
KernelFunctions<void(*)(int, TileInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int)> filter_divide_shadow_kernel;
KernelFunctions<void(*)(int, TileInfo*, int, int, int, int, float*, float*, float, int*, int, int)> filter_get_feature_kernel;
KernelFunctions<void(*)(int, int, int, int*, float*, float*, int, int*)> filter_write_feature_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_detect_outliers_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
KernelFunctions<void (*)(
int, TileInfo *, int, int, float *, float *, float *, float *, float *, int *, int, int)>
filter_divide_shadow_kernel;
KernelFunctions<void (*)(
int, TileInfo *, int, int, int, int, float *, float *, float, int *, int, int)>
filter_get_feature_kernel;
KernelFunctions<void (*)(int, int, int, int *, float *, float *, int, int *)>
filter_write_feature_kernel;
KernelFunctions<void (*)(int, int, float *, float *, float *, float *, int *, int)>
filter_detect_outliers_kernel;
KernelFunctions<void (*)(int, int, float *, float *, float *, float *, int *, int)>
filter_combine_halves_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int, int, float, float)> filter_nlm_calc_difference_kernel;
KernelFunctions<void (*)(
int, int, float *, float *, float *, float *, int *, int, int, int, float, float)>
filter_nlm_calc_difference_kernel;
KernelFunctions<void (*)(float *, float *, int *, int, int)> filter_nlm_blur_kernel;
KernelFunctions<void (*)(float *, float *, int *, int, int)> filter_nlm_calc_weight_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, float*, int*, int, int, int)> filter_nlm_update_output_kernel;
KernelFunctions<void (*)(
int, int, float *, float *, float *, float *, float *, int *, int, int, int)>
filter_nlm_update_output_kernel;
KernelFunctions<void (*)(float *, float *, int *, int)> filter_nlm_normalize_kernel;
KernelFunctions<void(*)(float*, TileInfo*, int, int, int, float*, int*, int*, int, int, bool, int, float)> filter_construct_transform_kernel;
KernelFunctions<void(*)(int, int, int, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int, int, bool)> filter_nlm_construct_gramian_kernel;
KernelFunctions<void(*)(int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel;
KernelFunctions<void (*)(
float *, TileInfo *, int, int, int, float *, int *, int *, int, int, bool, int, float)>
filter_construct_transform_kernel;
KernelFunctions<void (*)(int,
int,
int,
float *,
float *,
float *,
int *,
float *,
float3 *,
int *,
int *,
int,
int,
int,
int,
bool)>
filter_nlm_construct_gramian_kernel;
KernelFunctions<void (*)(int, int, int, float *, int *, float *, float3 *, int *, int)>
filter_finalize_kernel;
KernelFunctions<void(*)(KernelGlobals *, ccl_constant KernelData*, ccl_global void*, int, ccl_global char*,
int, int, int, int, int, int, int, int, ccl_global int*, int,
ccl_global char*, ccl_global unsigned int*, unsigned int, ccl_global float*)> data_init_kernel;
KernelFunctions<void (*)(KernelGlobals *,
ccl_constant KernelData *,
ccl_global void *,
int,
ccl_global char *,
int,
int,
int,
int,
int,
int,
int,
int,
ccl_global int *,
int,
ccl_global char *,
ccl_global unsigned int *,
unsigned int,
ccl_global float *)>
data_init_kernel;
unordered_map<string, KernelFunctions<void (*)(KernelGlobals *, KernelData *)>> split_kernels;
#define KERNEL_FUNCTIONS(name) \
KERNEL_NAME_EVAL(cpu, name), \
KERNEL_NAME_EVAL(cpu_sse2, name), \
KERNEL_NAME_EVAL(cpu_sse3, name), \
KERNEL_NAME_EVAL(cpu_sse41, name), \
KERNEL_NAME_EVAL(cpu_avx, name), \
KERNEL_NAME_EVAL(cpu_avx2, name)
KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse2, name), \
KERNEL_NAME_EVAL(cpu_sse3, name), KERNEL_NAME_EVAL(cpu_sse41, name), \
KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name)
CPUDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
: Device(info_, stats_, profiler_, background_),
@@ -246,7 +290,9 @@ public:
}
need_texture_info = false;
#define REGISTER_SPLIT_KERNEL(name) split_kernels[#name] = KernelFunctions<void(*)(KernelGlobals*, KernelData*)>(KERNEL_FUNCTIONS(name))
#define REGISTER_SPLIT_KERNEL(name) \
split_kernels[#name] = KernelFunctions<void (*)(KernelGlobals *, KernelData *)>( \
KERNEL_FUNCTIONS(name))
REGISTER_SPLIT_KERNEL(path_init);
REGISTER_SPLIT_KERNEL(scene_intersect);
REGISTER_SPLIT_KERNEL(lamp_emission);
@@ -280,7 +326,8 @@ public:
return (info.cpu_threads == 1);
}
virtual BVHLayoutMask get_bvh_layout_mask() const {
virtual BVHLayoutMask get_bvh_layout_mask() const
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
bvh_layout_mask |= BVH_LAYOUT_BVH4;
@@ -347,9 +394,7 @@ public:
}
}
void mem_copy_from(device_memory& /*mem*/,
int /*y*/, int /*w*/, int /*h*/,
int /*elem*/)
void mem_copy_from(device_memory & /*mem*/, int /*y*/, int /*w*/, int /*h*/, int /*elem*/)
{
/* no-op */
}
@@ -398,10 +443,7 @@ public:
if (mem.interpolation == INTERPOLATION_NONE) {
/* Data texture. */
kernel_tex_copy(&kernel_globals,
mem.name,
mem.host_pointer,
mem.data_size);
kernel_tex_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size);
}
else {
/* Image Texture. */
@@ -469,14 +511,16 @@ public:
class CPUDeviceTask : public DeviceTask {
public:
CPUDeviceTask(CPUDevice *device, DeviceTask& task)
: DeviceTask(task)
CPUDeviceTask(CPUDevice *device, DeviceTask &task) : DeviceTask(task)
{
run = function_bind(&CPUDevice::thread_run, device, this);
}
};
bool denoising_non_local_means(device_ptr image_ptr, device_ptr guide_ptr, device_ptr variance_ptr, device_ptr out_ptr,
bool denoising_non_local_means(device_ptr image_ptr,
device_ptr guide_ptr,
device_ptr variance_ptr,
device_ptr out_ptr,
DenoisingTask *task)
{
ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_NON_LOCAL_MEANS);
@@ -504,21 +548,27 @@ public:
int dy = i / (2 * r + 1) - r;
int dx = i % (2 * r + 1) - r;
int local_rect[4] = {max(0, -dx), max(0, -dy), rect.z-rect.x - max(0, dx), rect.w-rect.y - max(0, dy)};
filter_nlm_calc_difference_kernel()(dx, dy,
int local_rect[4] = {
max(0, -dx), max(0, -dy), rect.z - rect.x - max(0, dx), rect.w - rect.y - max(0, dy)};
filter_nlm_calc_difference_kernel()(dx,
dy,
(float *)guide_ptr,
(float *)variance_ptr,
NULL,
difference,
local_rect,
w, channel_offset,
0, a, k_2);
w,
channel_offset,
0,
a,
k_2);
filter_nlm_blur_kernel()(difference, blurDifference, local_rect, w, f);
filter_nlm_calc_weight_kernel()(blurDifference, difference, local_rect, w, f);
filter_nlm_blur_kernel()(difference, blurDifference, local_rect, w, f);
filter_nlm_update_output_kernel()(dx, dy,
filter_nlm_update_output_kernel()(dx,
dy,
blurDifference,
(float *)image_ptr,
difference,
@@ -526,7 +576,8 @@ public:
weightAccum,
local_rect,
channel_offset,
stride, f);
stride,
f);
}
int local_rect[4] = {0, 0, rect.z - rect.x, rect.w - rect.y};
@@ -577,10 +628,12 @@ public:
int dy = i / (2 * r + 1) - r;
int dx = i % (2 * r + 1) - r;
int local_rect[4] = {max(0, -dx), max(0, -dy),
int local_rect[4] = {max(0, -dx),
max(0, -dy),
task->reconstruction_state.source_w - max(0, dx),
task->reconstruction_state.source_h - max(0, dy)};
filter_nlm_calc_difference_kernel()(dx, dy,
filter_nlm_calc_difference_kernel()(dx,
dy,
(float *)color_ptr,
(float *)color_variance_ptr,
(float *)scale_ptr,
@@ -592,9 +645,11 @@ public:
1.0f,
task->nlm_k_2);
filter_nlm_blur_kernel()(difference, blurDifference, local_rect, task->buffer.stride, 4);
filter_nlm_calc_weight_kernel()(blurDifference, difference, local_rect, task->buffer.stride, 4);
filter_nlm_calc_weight_kernel()(
blurDifference, difference, local_rect, task->buffer.stride, 4);
filter_nlm_blur_kernel()(difference, blurDifference, local_rect, task->buffer.stride, 4);
filter_nlm_construct_gramian_kernel()(dx, dy,
filter_nlm_construct_gramian_kernel()(dx,
dy,
task->tile_info->frames[frame],
blurDifference,
(float *)task->buffer.mem.device_pointer,
@@ -614,8 +669,7 @@ public:
return true;
}
bool denoising_solve(device_ptr output_ptr,
DenoisingTask *task)
bool denoising_solve(device_ptr output_ptr, DenoisingTask *task)
{
for (int y = 0; y < task->filter_area.w; y++) {
for (int x = 0; x < task->filter_area.z; x++) {
@@ -633,15 +687,20 @@ public:
return true;
}
bool denoising_combine_halves(device_ptr a_ptr, device_ptr b_ptr,
device_ptr mean_ptr, device_ptr variance_ptr,
int r, int4 rect, DenoisingTask *task)
bool denoising_combine_halves(device_ptr a_ptr,
device_ptr b_ptr,
device_ptr mean_ptr,
device_ptr variance_ptr,
int r,
int4 rect,
DenoisingTask *task)
{
ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_COMBINE_HALVES);
for (int y = rect.y; y < rect.w; y++) {
for (int x = rect.x; x < rect.z; x++) {
filter_combine_halves_kernel()(x, y,
filter_combine_halves_kernel()(x,
y,
(float *)mean_ptr,
(float *)variance_ptr,
(float *)a_ptr,
@@ -653,9 +712,12 @@ public:
return true;
}
bool denoising_divide_shadow(device_ptr a_ptr, device_ptr b_ptr,
device_ptr sample_variance_ptr, device_ptr sv_variance_ptr,
device_ptr buffer_variance_ptr, DenoisingTask *task)
bool denoising_divide_shadow(device_ptr a_ptr,
device_ptr b_ptr,
device_ptr sample_variance_ptr,
device_ptr sv_variance_ptr,
device_ptr buffer_variance_ptr,
DenoisingTask *task)
{
ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_DIVIDE_SHADOW);
@@ -663,7 +725,8 @@ public:
for (int x = task->rect.x; x < task->rect.z; x++) {
filter_divide_shadow_kernel()(task->render_buffer.samples,
task->tile_info,
x, y,
x,
y,
(float *)a_ptr,
(float *)b_ptr,
(float *)sample_variance_ptr,
@@ -692,7 +755,8 @@ public:
task->tile_info,
mean_offset,
variance_offset,
x, y,
x,
y,
(float *)mean_ptr,
(float *)variance_ptr,
scale,
@@ -734,7 +798,8 @@ public:
for (int y = task->rect.y; y < task->rect.w; y++) {
for (int x = task->rect.x; x < task->rect.z; x++) {
filter_detect_outliers_kernel()(x, y,
filter_detect_outliers_kernel()(x,
y,
(float *)image_ptr,
(float *)variance_ptr,
(float *)depth_ptr,
@@ -775,8 +840,7 @@ public:
if (use_coverage) {
coverage.init_pixel(x, y);
}
path_trace_kernel()(kg, render_buffer,
sample, x, y, tile.offset, tile.stride);
path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
}
}
@@ -795,15 +859,23 @@ public:
tile.sample = tile.start_sample + tile.num_samples;
denoising.functions.construct_transform = function_bind(&CPUDevice::denoising_construct_transform, this, &denoising);
denoising.functions.accumulate = function_bind(&CPUDevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
denoising.functions.construct_transform = function_bind(
&CPUDevice::denoising_construct_transform, this, &denoising);
denoising.functions.accumulate = function_bind(
&CPUDevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
denoising.functions.solve = function_bind(&CPUDevice::denoising_solve, this, _1, &denoising);
denoising.functions.divide_shadow = function_bind(&CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising);
denoising.functions.write_feature = function_bind(&CPUDevice::denoising_write_feature, this, _1, _2, _3, &denoising);
denoising.functions.detect_outliers = function_bind(&CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.functions.divide_shadow = function_bind(
&CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
denoising.functions.non_local_means = function_bind(
&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(
&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
denoising.functions.get_feature = function_bind(
&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising);
denoising.functions.write_feature = function_bind(
&CPUDevice::denoising_write_feature, this, _1, _2, _3, &denoising);
denoising.functions.detect_outliers = function_bind(
&CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);
denoising.render_buffer.samples = tile.sample;
@@ -823,7 +895,8 @@ public:
device_only_memory<KernelGlobals> kgbuffer(this, "kernel_globals");
kgbuffer.alloc_to_device(1);
KernelGlobals *kg = new ((void*) kgbuffer.device_pointer) KernelGlobals(thread_kernel_globals_init());
KernelGlobals *kg = new ((void *)kgbuffer.device_pointer)
KernelGlobals(thread_kernel_globals_init());
profiler.add_state(&kg->profiler);
@@ -880,15 +953,26 @@ public:
if (task.rgba_half) {
for (int y = task.y; y < task.y + task.h; y++)
for (int x = task.x; x < task.x + task.w; x++)
convert_to_half_float_kernel()(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
sample_scale, x, y, task.offset, task.stride);
convert_to_half_float_kernel()(&kernel_globals,
(uchar4 *)task.rgba_half,
(float *)task.buffer,
sample_scale,
x,
y,
task.offset,
task.stride);
}
else {
for (int y = task.y; y < task.y + task.h; y++)
for (int x = task.x; x < task.x + task.w; x++)
convert_to_byte_kernel()(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
sample_scale, x, y, task.offset, task.stride);
convert_to_byte_kernel()(&kernel_globals,
(uchar4 *)task.rgba_byte,
(float *)task.buffer,
sample_scale,
x,
y,
task.offset,
task.stride);
}
}
@@ -914,7 +998,6 @@ public:
break;
task.update_progress(NULL);
}
#ifdef WITH_OSL
@@ -996,7 +1079,8 @@ protected:
#endif
}
virtual bool load_kernels(const DeviceRequestedFeatures& requested_features_) {
virtual bool load_kernels(const DeviceRequestedFeatures &requested_features_)
{
requested_features = requested_features_;
return true;
@@ -1010,10 +1094,16 @@ public:
CPUDevice *device;
void (*func)(KernelGlobals *kg, KernelData *data);
CPUSplitKernelFunction(CPUDevice* device) : device(device), func(NULL) {}
~CPUSplitKernelFunction() {}
CPUSplitKernelFunction(CPUDevice *device) : device(device), func(NULL)
{
}
~CPUSplitKernelFunction()
{
}
virtual bool enqueue(const KernelDimensions& dim, device_memory& kernel_globals, device_memory& data)
virtual bool enqueue(const KernelDimensions &dim,
device_memory &kernel_globals,
device_memory &data)
{
if (!func) {
return false;
@@ -1100,11 +1190,17 @@ int2 CPUSplitKernel::split_kernel_local_size()
return make_int2(1, 1);
}
int2 CPUSplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask * /*task*/) {
int2 CPUSplitKernel::split_kernel_global_size(device_memory & /*kg*/,
device_memory & /*data*/,
DeviceTask * /*task*/)
{
return make_int2(1, 1);
}
uint64_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) {
uint64_t CPUSplitKernel::state_buffer_size(device_memory &kernel_globals,
device_memory & /*data*/,
size_t num_threads)
{
KernelGlobals *kg = (KernelGlobals *)kernel_globals.device_pointer;
return split_data_buffer_size(kg, num_threads);

File diff suppressed because it is too large Load Diff

View File

@@ -92,7 +92,8 @@ void DenoisingTask::set_render_buffer(RenderTile *rtiles)
target_buffer.ptr = rtiles[9].buffer;
if (write_passes && rtiles[9].buffers) {
target_buffer.denoising_output_offset = rtiles[9].buffers->params.get_denoising_prefiltered_offset();
target_buffer.denoising_output_offset =
rtiles[9].buffers->params.get_denoising_prefiltered_offset();
}
else {
target_buffer.denoising_output_offset = 0;
@@ -106,7 +107,8 @@ void DenoisingTask::setup_denoising_buffer()
/* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring tiles */
rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w);
rect = rect_expand(rect, radius);
rect = rect_clip(rect, make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3]));
rect = rect_clip(rect,
make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3]));
buffer.use_intensity = write_passes || (tile_info->num_frames > 1);
buffer.passes = buffer.use_intensity ? 15 : 14;
@@ -155,8 +157,7 @@ void DenoisingTask::prefilter_shadowing()
functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var);
/* Reuse memory, the previous data isn't needed anymore. */
device_ptr filtered_a = *buffer_var,
filtered_b = *sample_var;
device_ptr filtered_a = *buffer_var, filtered_b = *sample_var;
/* Use the smoothed variance to filter the two shadow half images using each other for weight calculation. */
nlm_state.set_parameters(5, 3, 1.0f, 0.25f, false);
functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a);
@@ -166,8 +167,7 @@ void DenoisingTask::prefilter_shadowing()
/* Estimate the residual variance between the two filtered halves. */
functions.combine_halves(filtered_a, filtered_b, null_ptr, residual_var, 2, rect);
device_ptr final_a = *unfiltered_a,
final_b = *unfiltered_b;
device_ptr final_a = *unfiltered_a, final_b = *unfiltered_b;
/* Use the residual variance for a second filter pass. */
nlm_state.set_parameters(4, 2, 1.0f, 0.5f, false);
functions.non_local_means(filtered_a, filtered_b, residual_var, final_a);
@@ -187,9 +187,14 @@ void DenoisingTask::prefilter_features()
int variance_from[] = {3, 4, 5, 13, 9, 10, 11};
int pass_to[] = {1, 2, 3, 0, 5, 6, 7};
for (int pass = 0; pass < 7; pass++) {
device_sub_ptr feature_pass(buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr feature_pass(
buffer.mem, pass_to[pass] * buffer.pass_stride, buffer.pass_stride);
/* Get the unfiltered pass and its variance from the RenderBuffers. */
functions.get_feature(mean_from[pass], variance_from[pass], *unfiltered, *variance, 1.0f / render_buffer.samples);
functions.get_feature(mean_from[pass],
variance_from[pass],
*unfiltered,
*variance,
1.0f / render_buffer.samples);
/* Smooth the pass and store the result in the denoising buffers. */
nlm_state.set_parameters(2, 2, 1.0f, 0.25f, false);
functions.non_local_means(*unfiltered, *unfiltered, *variance, *feature_pass);
@@ -209,14 +214,21 @@ void DenoisingTask::prefilter_color()
for (int pass = 0; pass < num_color_passes; pass++) {
device_sub_ptr color_pass(temporary_color, pass * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr color_var_pass(buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride);
functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass, 1.0f / render_buffer.samples);
device_sub_ptr color_var_pass(
buffer.mem, variance_to[pass] * buffer.pass_stride, buffer.pass_stride);
functions.get_feature(mean_from[pass],
variance_from[pass],
*color_pass,
*color_var_pass,
1.0f / render_buffer.samples);
}
device_sub_ptr depth_pass(buffer.mem, 0, buffer.pass_stride);
device_sub_ptr color_var_pass(buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
device_sub_ptr color_var_pass(
buffer.mem, variance_to[0] * buffer.pass_stride, 3 * buffer.pass_stride);
device_sub_ptr output_pass(buffer.mem, mean_to[0] * buffer.pass_stride, 3 * buffer.pass_stride);
functions.detect_outliers(temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
functions.detect_outliers(
temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
if (buffer.use_intensity) {
device_sub_ptr intensity_pass(buffer.mem, 14 * buffer.pass_stride, buffer.pass_stride);
@@ -234,9 +246,11 @@ void DenoisingTask::load_buffer()
int num_passes = buffer.use_intensity ? 15 : 14;
for (int i = 0; i < tile_info->num_frames; i++) {
for (int pass = 0; pass < num_passes; pass++) {
device_sub_ptr to_pass(buffer.mem, i*buffer.frame_stride + pass*buffer.pass_stride, buffer.pass_stride);
device_sub_ptr to_pass(
buffer.mem, i * buffer.frame_stride + pass * buffer.pass_stride, buffer.pass_stride);
bool is_variance = (pass >= 11) && (pass <= 13);
functions.get_feature(pass, -1, *to_pass, null_ptr, is_variance? (1.0f / render_buffer.samples) : 1.0f);
functions.get_feature(
pass, -1, *to_pass, null_ptr, is_variance ? (1.0f / render_buffer.samples) : 1.0f);
}
render_buffer.offset += render_buffer.frame_stride;
}
@@ -276,7 +290,8 @@ void DenoisingTask::reconstruct()
storage.XtWX.zero_to_device();
storage.XtWY.zero_to_device();
reconstruction_state.filter_window = rect_from_shape(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h);
reconstruction_state.filter_window = rect_from_shape(
filter_area.x - rect.x, filter_area.y - rect.y, storage.w, storage.h);
int tile_coordinate_offset = filter_area.y * target_buffer.stride + filter_area.x;
reconstruction_state.buffer_params = make_int4(target_buffer.offset + tile_coordinate_offset,
target_buffer.stride,

View File

@@ -64,16 +64,16 @@ public:
bool do_filter;
struct DeviceFunctions {
function<bool(device_ptr image_ptr, /* Contains the values that are smoothed. */
function<bool(
device_ptr image_ptr, /* Contains the values that are smoothed. */
device_ptr guide_ptr, /* Contains the values that are used to calculate weights. */
device_ptr variance_ptr, /* Contains the variance of the guide image. */
device_ptr out_ptr /* The filtered output is written into this image. */
)> non_local_means;
function<bool(device_ptr color_ptr,
device_ptr color_variance_ptr,
device_ptr scale_ptr,
int frame
)> accumulate;
)>
non_local_means;
function<bool(
device_ptr color_ptr, device_ptr color_variance_ptr, device_ptr scale_ptr, int frame)>
accumulate;
function<bool(device_ptr output_ptr)> solve;
function<bool()> construct_transform;
@@ -82,29 +82,26 @@ public:
device_ptr mean_ptr,
device_ptr variance_ptr,
int r,
int4 rect
)> combine_halves;
int4 rect)>
combine_halves;
function<bool(device_ptr a_ptr,
device_ptr b_ptr,
device_ptr sample_variance_ptr,
device_ptr sv_variance_ptr,
device_ptr buffer_variance_ptr
)> divide_shadow;
device_ptr buffer_variance_ptr)>
divide_shadow;
function<bool(int mean_offset,
int variance_offset,
device_ptr mean_ptr,
device_ptr variance_ptr,
float scale
)> get_feature;
float scale)>
get_feature;
function<bool(device_ptr image_ptr,
device_ptr variance_ptr,
device_ptr depth_ptr,
device_ptr output_ptr
)> detect_outliers;
function<bool(int out_offset,
device_ptr frop_ptr,
device_ptr buffer_ptr
)> write_feature;
device_ptr output_ptr)>
detect_outliers;
function<bool(int out_offset, device_ptr frop_ptr, device_ptr buffer_ptr)> write_feature;
function<void(RenderTile *rtiles)> map_neighbor_tiles;
function<void(RenderTile *rtiles)> unmap_neighbor_tiles;
} functions;
@@ -128,7 +125,13 @@ public:
float k_2; /* Squared value of the k parameter of the filter. */
bool is_color;
void set_parameters(int r_, int f_, float a_, float k_2_, bool is_color_) { r = r_; f = f_; a = a_, k_2 = k_2_; is_color = is_color_; }
void set_parameters(int r_, int f_, float a_, float k_2_, bool is_color_)
{
r = r_;
f = f_;
a = a_, k_2 = k_2_;
is_color = is_color_;
}
} nlm_state;
struct Storage {
@@ -144,7 +147,8 @@ public:
rank(device, "denoising rank"),
XtWX(device, "denoising XtWX"),
XtWY(device, "denoising XtWY")
{}
{
}
} storage;
DenoisingTask(Device *device, const DeviceTask &task);
@@ -167,9 +171,9 @@ public:
bool gpu_temporary_mem;
DenoiseBuffers(Device *device)
: mem(device, "denoising pixel buffer"),
temporary_mem(device, "denoising temporary mem")
{}
: mem(device, "denoising pixel buffer"), temporary_mem(device, "denoising temporary mem")
{
}
} buffer;
protected:

View File

@@ -27,7 +27,10 @@ Device *device_opencl_create(DeviceInfo& info, Stats &stats, Profiler &profiler,
bool device_opencl_compile_kernel(const vector<string> &parameters);
bool device_cuda_init();
Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
Device *device_network_create(DeviceInfo& info, Stats &stats, Profiler &profiler, const char *address);
Device *device_network_create(DeviceInfo &info,
Stats &stats,
Profiler &profiler,
const char *address);
Device *device_multi_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
void device_cpu_info(vector<DeviceInfo> &devices);

View File

@@ -126,8 +126,7 @@ void device_memory::restore_device()
/* Device Sub Ptr */
device_sub_ptr::device_sub_ptr(device_memory& mem, int offset, int size)
: device(mem.device)
device_sub_ptr::device_sub_ptr(device_memory &mem, int offset, int size) : device(mem.device)
{
ptr = device->mem_alloc_sub_ptr(mem, offset, size);
}

View File

@@ -31,13 +31,7 @@ CCL_NAMESPACE_BEGIN
class Device;
enum MemoryType {
MEM_READ_ONLY,
MEM_READ_WRITE,
MEM_DEVICE_ONLY,
MEM_TEXTURE,
MEM_PIXELS
};
enum MemoryType { MEM_READ_ONLY, MEM_READ_WRITE, MEM_DEVICE_ONLY, MEM_TEXTURE, MEM_PIXELS };
/* Supported Data Types */
@@ -55,15 +49,24 @@ enum DataType {
static inline size_t datatype_size(DataType datatype)
{
switch (datatype) {
case TYPE_UNKNOWN: return 1;
case TYPE_UCHAR: return sizeof(uchar);
case TYPE_FLOAT: return sizeof(float);
case TYPE_UINT: return sizeof(uint);
case TYPE_UINT16: return sizeof(uint16_t);
case TYPE_INT: return sizeof(int);
case TYPE_HALF: return sizeof(half);
case TYPE_UINT64: return sizeof(uint64_t);
default: return 0;
case TYPE_UNKNOWN:
return 1;
case TYPE_UCHAR:
return sizeof(uchar);
case TYPE_FLOAT:
return sizeof(float);
case TYPE_UINT:
return sizeof(uint);
case TYPE_UINT16:
return sizeof(uint16_t);
case TYPE_INT:
return sizeof(int);
case TYPE_HALF:
return sizeof(half);
case TYPE_UINT64:
return sizeof(uint64_t);
default:
return 0;
}
}
@@ -184,11 +187,14 @@ template<> struct device_type_traits<uint64_t> {
* Base class for all device memory. This should not be allocated directly,
* instead the appropriate subclass can be used. */
class device_memory
{
class device_memory {
public:
size_t memory_size() { return data_size*data_elements*datatype_size(data_type); }
size_t memory_elements_size(int elements) {
size_t memory_size()
{
return data_size * data_elements * datatype_size(data_type);
}
size_t memory_elements_size(int elements)
{
return elements * data_elements * datatype_size(data_type);
}
@@ -249,9 +255,7 @@ protected:
* Working memory only needed by the device, with no corresponding allocation
* on the host. Only used internally in the device implementations. */
template<typename T>
class device_only_memory : public device_memory
{
template<typename T> class device_only_memory : public device_memory {
public:
device_only_memory(Device *device, const char *name)
: device_memory(device, name, MEM_DEVICE_ONLY)
@@ -307,8 +311,7 @@ public:
* automatically attached to kernel globals, using the provided name
* matching an entry in kernel_textures.h. */
template<typename T> class device_vector : public device_memory
{
template<typename T> class device_vector : public device_memory {
public:
device_vector(Device *device, const char *name, MemoryType type)
: device_memory(device, name, type)
@@ -443,11 +446,9 @@ protected:
* Device memory to efficiently draw as pixels to the screen in interactive
* rendering. Only copying pixels from the device is supported, not copying to. */
template<typename T> class device_pixels : public device_vector<T>
{
template<typename T> class device_pixels : public device_vector<T> {
public:
device_pixels(Device *device, const char *name)
: device_vector<T>(device, name, MEM_PIXELS)
device_pixels(Device *device, const char *name) : device_vector<T>(device, name, MEM_PIXELS)
{
}
@@ -476,8 +477,7 @@ public:
* Note: some devices require offset and size of the sub_ptr to be properly
* aligned to device->mem_address_alingment(). */
class device_sub_ptr
{
class device_sub_ptr {
public:
device_sub_ptr(device_memory &mem, int offset, int size);
~device_sub_ptr();

View File

@@ -31,12 +31,12 @@
CCL_NAMESPACE_BEGIN
class MultiDevice : public Device
{
class MultiDevice : public Device {
public:
struct SubDevice {
explicit SubDevice(Device *device_)
: device(device_) {}
explicit SubDevice(Device *device_) : device(device_)
{
}
Device *device;
map<device_ptr, device_ptr> ptr_map;
@@ -103,7 +103,8 @@ public:
return devices.front().device->show_samples();
}
virtual BVHLayoutMask get_bvh_layout_mask() const {
virtual BVHLayoutMask get_bvh_layout_mask() const
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
foreach (const SubDevice &sub_device, devices) {
bvh_layout_mask &= sub_device.device->get_bvh_layout_mask();
@@ -256,11 +257,18 @@ public:
sub.device->const_copy_to(name, host, size);
}
void draw_pixels(
device_memory& rgba, int y,
int w, int h, int width, int height,
int dx, int dy, int dw, int dh,
bool transparent, const DeviceDrawParams &draw_params)
void draw_pixels(device_memory &rgba,
int y,
int w,
int h,
int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params)
{
device_ptr key = rgba.device_pointer;
int i = 0, sub_h = h / devices.size();
@@ -274,7 +282,8 @@ public:
/* adjust math for w/width */
rgba.device_pointer = sub.ptr_map[key];
sub.device->draw_pixels(rgba, sy, w, sh, width, sheight, dx, sdy, dw, dh, transparent, draw_params);
sub.device->draw_pixels(
rgba, sy, w, sh, width, sheight, dx, sdy, dw, dh, transparent, draw_params);
i++;
}
@@ -285,7 +294,8 @@ public:
{
foreach (SubDevice &sub, devices) {
if (sub.device == sub_device) {
if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer];
if (tile.buffer)
tile.buffer = sub.ptr_map[tile.buffer];
}
}
}
@@ -386,11 +396,16 @@ public:
DeviceTask subtask = tasks.front();
tasks.pop_front();
if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer];
if(task.rgba_byte) subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half];
if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];
if (task.buffer)
subtask.buffer = sub.ptr_map[task.buffer];
if (task.rgba_byte)
subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
if (task.rgba_half)
subtask.rgba_half = sub.ptr_map[task.rgba_half];
if (task.shader_input)
subtask.shader_input = sub.ptr_map[task.shader_input];
if (task.shader_output)
subtask.shader_output = sub.ptr_map[task.shader_output];
sub.device->task_add(subtask);
}

View File

@@ -41,8 +41,7 @@ static TileList::iterator tile_list_find(TileList& tile_list, RenderTile& tile)
return tile_list.end();
}
class NetworkDevice : public Device
{
class NetworkDevice : public Device {
public:
boost::asio::io_service io_service;
tcp::socket socket;
@@ -69,8 +68,7 @@ public:
tcp::resolver::iterator end;
boost::system::error_code error = boost::asio::error::host_not_found;
while(error && endpoint_iterator != end)
{
while (error && endpoint_iterator != end) {
socket.close();
socket.connect(*endpoint_iterator++, error);
}
@@ -87,7 +85,8 @@ public:
snd.write();
}
virtual BVHLayoutMask get_bvh_layout_mask() const {
virtual BVHLayoutMask get_bvh_layout_mask() const
{
return BVH_LAYOUT_BVH2;
}
@@ -293,7 +292,10 @@ private:
NetworkError error_func;
};
Device *device_network_create(DeviceInfo& info, Stats &stats, Profiler &profiler, const char *address)
Device *device_network_create(DeviceInfo &info,
Stats &stats,
Profiler &profiler,
const char *address)
{
return new NetworkDevice(info, stats, profiler, address);
}
@@ -318,11 +320,15 @@ class DeviceServer {
public:
thread_mutex rpc_lock;
void network_error(const string &message) {
void network_error(const string &message)
{
error_func.network_error(message);
}
bool have_error() { return error_func.have_error(); }
bool have_error()
{
return error_func.have_error();
}
DeviceServer(Device *device_, tcp::socket &socket_)
: device(device_), socket(socket_), stop(false), blocked_waiting(false)
@@ -604,7 +610,8 @@ protected:
task.acquire_tile = function_bind(&DeviceServer::task_acquire_tile, this, _1, _2);
task.release_tile = function_bind(&DeviceServer::task_release_tile, this, _1);
task.update_progress_sample = function_bind(&DeviceServer::task_update_progress_sample, this);
task.update_progress_sample = function_bind(&DeviceServer::task_update_progress_sample,
this);
task.update_tile_sample = function_bind(&DeviceServer::task_update_tile_sample, this, _1);
task.get_cancel = function_bind(&DeviceServer::task_get_cancel, this);
@@ -674,7 +681,8 @@ protected:
if (entry.name == "acquire_tile") {
tile = entry.tile;
if(tile.buffer) tile.buffer = ptr_map[tile.buffer];
if (tile.buffer)
tile.buffer = ptr_map[tile.buffer];
result = true;
break;
@@ -705,7 +713,8 @@ protected:
{
thread_scoped_lock acquire_lock(acquire_mutex);
if(tile.buffer) tile.buffer = ptr_imap[tile.buffer];
if (tile.buffer)
tile.buffer = ptr_imap[tile.buffer];
{
thread_scoped_lock lock(rpc_lock);
@@ -761,11 +770,11 @@ protected:
bool stop;
bool blocked_waiting;
private:
NetworkError error_func;
/* todo: free memory and device (osl) on network error */
};
void Device::server_run()

View File

@@ -43,11 +43,11 @@
CCL_NAMESPACE_BEGIN
using std::cout;
using std::cerr;
using std::cout;
using std::exception;
using std::hex;
using std::setw;
using std::exception;
using boost::asio::ip::tcp;
@@ -66,11 +66,9 @@ typedef boost::archive::binary_iarchive i_archive;
/* Serialization of device memory */
class network_device_memory : public device_memory
{
class network_device_memory : public device_memory {
public:
network_device_memory(Device *device)
: device_memory(device, "", MEM_READ_ONLY)
network_device_memory(Device *device) : device_memory(device, "", MEM_READ_ONLY)
{
}
@@ -85,19 +83,24 @@ public:
/* Common netowrk error function / object for both DeviceNetwork and DeviceServer*/
class NetworkError {
public:
NetworkError() {
NetworkError()
{
error = "";
error_count = 0;
}
~NetworkError() {}
~NetworkError()
{
}
void network_error(const string& message) {
void network_error(const string &message)
{
error = message;
error_count += 1;
}
bool have_error() {
bool have_error()
{
return true ? error_count > 0 : false;
}
@@ -106,7 +109,6 @@ private:
int error_count;
};
/* Remote procedure call Send */
class RPCSend {
@@ -168,17 +170,15 @@ public:
header_stream << setw(8) << hex << archive_str.size();
string header_str = header_stream.str();
boost::asio::write(socket,
boost::asio::buffer(header_str),
boost::asio::transfer_all(), error);
boost::asio::write(
socket, boost::asio::buffer(header_str), boost::asio::transfer_all(), error);
if (error.value())
error_func->network_error(error.message());
/* then send actual data */
boost::asio::write(socket,
boost::asio::buffer(archive_str),
boost::asio::transfer_all(), error);
boost::asio::write(
socket, boost::asio::buffer(archive_str), boost::asio::transfer_all(), error);
if (error.value())
error_func->network_error(error.message());
@@ -190,9 +190,8 @@ public:
{
boost::system::error_code error;
boost::asio::write(socket,
boost::asio::buffer(buffer, size),
boost::asio::transfer_all(), error);
boost::asio::write(
socket, boost::asio::buffer(buffer, size), boost::asio::transfer_all(), error);
if (error.value())
error_func->network_error(error.message());
@@ -240,7 +239,6 @@ public:
if (error.value())
error_func->network_error(error.message());
if (len == data_size) {
archive_str = (data.size()) ? string(&data[0], data.size()) : string("");
@@ -410,8 +408,7 @@ private:
mutex.lock();
/* add address if it's not already in the list */
bool found = std::find(servers.begin(), servers.end(),
address) != servers.end();
bool found = std::find(servers.begin(), servers.end(), address) != servers.end();
if (!found)
servers.push_back(address);
@@ -431,10 +428,12 @@ private:
void async_receive()
{
listen_socket.async_receive_from(
boost::asio::buffer(receive_buffer), receive_endpoint,
boost::bind(&ServerDiscovery::handle_receive_from, this,
boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
listen_socket.async_receive_from(boost::asio::buffer(receive_buffer),
receive_endpoint,
boost::bind(&ServerDiscovery::handle_receive_from,
this,
boost::asio::placeholders::error,
boost::asio::placeholders::bytes_transferred));
}
void broadcast_message(const string &msg)

View File

@@ -50,9 +50,8 @@ bool device_opencl_init()
}
else {
VLOG(1) << "CLEW initialization failed: "
<< ((clew_result == CLEW_ERROR_ATEXIT_FAILED)
? "Error setting up atexit() handler"
: "Error opening the library");
<< ((clew_result == CLEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
"Error opening the library");
}
}
else {
@@ -63,7 +62,6 @@ bool device_opencl_init()
return result;
}
static cl_int device_opencl_get_num_platforms_safe(cl_uint *num_platforms)
{
# ifdef _WIN32
@@ -190,19 +188,13 @@ string device_opencl_capabilities()
APPEND_PLATFORM_INFO(platform_id, "Extensions", CL_PLATFORM_EXTENSIONS, cl_string);
cl_uint num_devices = 0;
opencl_assert(clGetDeviceIDs(platform_ids[platform],
CL_DEVICE_TYPE_ALL,
0,
NULL,
&num_devices));
opencl_assert(
clGetDeviceIDs(platform_ids[platform], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices));
result += string_printf("\tNumber of devices: %u\n", num_devices);
device_ids.resize(num_devices);
opencl_assert(clGetDeviceIDs(platform_ids[platform],
CL_DEVICE_TYPE_ALL,
num_devices,
&device_ids[0],
NULL));
opencl_assert(clGetDeviceIDs(
platform_ids[platform], CL_DEVICE_TYPE_ALL, num_devices, &device_ids[0], NULL));
for (cl_uint device = 0; device < num_devices; ++device) {
cl_device_id device_id = device_ids[device];
@@ -215,7 +207,8 @@ string device_opencl_capabilities()
APPEND_DEVICE_INFO(device_id, "Profile", CL_DEVICE_PROFILE, cl_string);
APPEND_DEVICE_INFO(device_id, "Version", CL_DEVICE_VERSION, cl_string);
APPEND_DEVICE_INFO(device_id, "Extensions", CL_DEVICE_EXTENSIONS, cl_string);
APPEND_DEVICE_INFO(device_id, "Max clock frequency (MHz)", CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint);
APPEND_DEVICE_INFO(
device_id, "Max clock frequency (MHz)", CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint);
APPEND_DEVICE_INFO(device_id, "Max compute units", CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint);
APPEND_DEVICE_INFO(device_id, "Max work group size", CL_DEVICE_MAX_WORK_GROUP_SIZE, size_t);
}

View File

@@ -123,12 +123,13 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
return true;
}
size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size)
size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory &kg,
device_memory &data,
uint64_t max_buffer_size)
{
uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
VLOG(1) << "Split state element size: "
<< string_human_readable_number(size_per_element) << " bytes. ("
<< string_human_readable_size(size_per_element) << ").";
VLOG(1) << "Split state element size: " << string_human_readable_number(size_per_element)
<< " bytes. (" << string_human_readable_size(size_per_element) << ").";
return max_buffer_size / size_per_element;
}
@@ -165,7 +166,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
/* Calculate max groups */
/* Denotes the maximum work groups possible w.r.t. current requested tile size. */
unsigned int work_pool_size = (device->info.type == DEVICE_CPU) ? WORK_POOL_SIZE_CPU : WORK_POOL_SIZE_GPU;
unsigned int work_pool_size = (device->info.type == DEVICE_CPU) ? WORK_POOL_SIZE_CPU :
WORK_POOL_SIZE_GPU;
unsigned int max_work_groups = num_global_elements / work_pool_size + 1;
/* Allocate work_pool_wgs memory. */
@@ -183,7 +185,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
if (device->have_error()) { \
return false; \
} \
if(!kernel_##name->enqueue(KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \
if (!kernel_##name->enqueue( \
KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \
return false; \
}
@@ -200,11 +203,13 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
const int initial_num_samples = 1;
/* approx number of samples per second */
int samples_per_second = (avg_time_per_sample > 0.0) ?
int(double(time_multiplier) / avg_time_per_sample) + 1 : initial_num_samples;
int(double(time_multiplier) / avg_time_per_sample) + 1 :
initial_num_samples;
RenderTile subtile = tile;
subtile.start_sample = tile.sample;
subtile.num_samples = min(samples_per_second, tile.start_sample + tile.num_samples - tile.sample);
subtile.num_samples = min(samples_per_second,
tile.start_sample + tile.num_samples - tile.sample);
if (device->have_error()) {
return false;
@@ -226,8 +231,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
ray_state,
queue_index,
use_queues_flag,
work_pool_wgs))
{
work_pool_wgs)) {
return false;
}
@@ -249,7 +253,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
ENQUEUE_SPLIT_KERNEL(shader_setup, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_sort, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(
holdout_emission_blurring_pathtermination_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);

View File

@@ -45,7 +45,9 @@ public:
class SplitKernelFunction {
public:
virtual ~SplitKernelFunction() {}
virtual ~SplitKernelFunction()
{
}
/* enqueue the kernel, returns false if there is an error */
virtual bool enqueue(const KernelDimensions &dim, device_memory &kg, device_memory &data) = 0;
@@ -81,7 +83,8 @@ private:
*/
device_only_memory<uchar> split_data;
device_vector<uchar> ray_state;
device_only_memory<int> queue_index; /* Array of size num_queues that tracks the size of each queue. */
device_only_memory<int>
queue_index; /* Array of size num_queues that tracks the size of each queue. */
/* Flag to make sceneintersect and lampemission kernel use queues. */
device_only_memory<char> use_queues_flag;
@@ -107,8 +110,12 @@ public:
device_memory &kgbuffer,
device_memory &kernel_data);
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size);
virtual uint64_t state_buffer_size(device_memory &kg,
device_memory &data,
size_t num_threads) = 0;
size_t max_elements_for_max_buffer_size(device_memory &kg,
device_memory &data,
uint64_t max_buffer_size);
virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim,
RenderTile &rtile,
@@ -124,7 +131,9 @@ public:
virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
const DeviceRequestedFeatures &) = 0;
virtual int2 split_kernel_local_size() = 0;
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task) = 0;
virtual int2 split_kernel_global_size(device_memory &kg,
device_memory &data,
DeviceTask *task) = 0;
};
CCL_NAMESPACE_END

View File

@@ -29,10 +29,22 @@ CCL_NAMESPACE_BEGIN
/* Device Task */
DeviceTask::DeviceTask(Type type_)
: type(type_), x(0), y(0), w(0), h(0), rgba_byte(0), rgba_half(0), buffer(0),
sample(0), num_samples(1),
shader_input(0), shader_output(0),
shader_eval_type(0), shader_filter(0), shader_x(0), shader_w(0)
: type(type_),
x(0),
y(0),
w(0),
h(0),
rgba_byte(0),
rgba_half(0),
buffer(0),
sample(0),
num_samples(1),
shader_input(0),
shader_output(0),
shader_eval_type(0),
shader_filter(0),
shader_x(0),
shader_w(0)
{
last_update_time = time_dt();
}
@@ -103,8 +115,7 @@ void DeviceTask::split(list<DeviceTask>& tasks, int num, int max_size)
void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
{
if((type != RENDER) &&
(type != SHADER))
if ((type != RENDER) && (type != SHADER))
return;
if (update_progress_sample) {

View File

@@ -110,6 +110,7 @@ public:
bool need_finish_queue;
bool integrator_branched;
int2 requested_tile_size;
protected:
double last_update_time;
};

View File

@@ -66,15 +66,16 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDevice *device)
if (need_realloc) {
cl_ulong max_buffer_size;
clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
clGetDeviceInfo(
device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
if (total_size > max_buffer_size) {
device->set_error("Scene too complex to fit in available memory.");
return;
}
device_only_memory<uchar> *new_buffer =
new device_only_memory<uchar>(device, "memory manager buffer");
device_only_memory<uchar> *new_buffer = new device_only_memory<uchar>(device,
"memory manager buffer");
new_buffer->alloc_to_device(total_size);
@@ -83,27 +84,31 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDevice *device)
foreach (Allocation *allocation, allocations) {
if (allocation->needs_copy_to_device) {
/* Copy from host to device. */
opencl_device_assert(device, clEnqueueWriteBuffer(device->cqCommandQueue,
opencl_device_assert(device,
clEnqueueWriteBuffer(device->cqCommandQueue,
CL_MEM_PTR(new_buffer->device_pointer),
CL_FALSE,
offset,
allocation->mem->memory_size(),
allocation->mem->host_pointer,
0, NULL, NULL
));
0,
NULL,
NULL));
allocation->needs_copy_to_device = false;
}
else {
/* Fast copy from memory already on device. */
opencl_device_assert(device, clEnqueueCopyBuffer(device->cqCommandQueue,
opencl_device_assert(device,
clEnqueueCopyBuffer(device->cqCommandQueue,
CL_MEM_PTR(buffer->device_pointer),
CL_MEM_PTR(new_buffer->device_pointer),
allocation->desc.offset,
offset,
allocation->mem->memory_size(),
0, NULL, NULL
));
0,
NULL,
NULL));
}
allocation->desc.offset = offset;
@@ -122,14 +127,16 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDevice *device)
foreach (Allocation *allocation, allocations) {
if (allocation->needs_copy_to_device) {
/* Copy from host to device. */
opencl_device_assert(device, clEnqueueWriteBuffer(device->cqCommandQueue,
opencl_device_assert(device,
clEnqueueWriteBuffer(device->cqCommandQueue,
CL_MEM_PTR(buffer->device_pointer),
CL_FALSE,
offset,
allocation->mem->memory_size(),
allocation->mem->host_pointer,
0, NULL, NULL
));
0,
NULL,
NULL));
allocation->needs_copy_to_device = false;
}
@@ -160,12 +167,10 @@ MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer()
return smallest;
}
MemoryManager::MemoryManager(OpenCLDevice *device)
: device(device), need_update(false)
MemoryManager::MemoryManager(OpenCLDevice *device) : device(device), need_update(false)
{
foreach (DeviceBuffer &device_buffer, device_buffers) {
device_buffer.buffer =
new device_only_memory<uchar>(device, "memory manager buffer");
device_buffer.buffer = new device_only_memory<uchar>(device, "memory manager buffer");
}
}

View File

@@ -60,8 +60,7 @@ private:
vector<Allocation *> allocations;
size_t size; /* Size of all allocations. */
DeviceBuffer()
: buffer(NULL), size(0)
DeviceBuffer() : buffer(NULL), size(0)
{
}

View File

@@ -68,7 +68,9 @@ struct OpenCLPlatformDevice {
device_type(device_type),
device_name(device_name),
hardware_id(hardware_id),
device_extensions(device_extensions) {}
device_extensions(device_extensions)
{
}
cl_platform_id platform_id;
string platform_name;
cl_device_id device_id;
@@ -79,19 +81,14 @@ struct OpenCLPlatformDevice {
};
/* Contains all static OpenCL helper functions. */
class OpenCLInfo
{
class OpenCLInfo {
public:
static cl_device_type device_type();
static bool use_debug();
static bool device_supported(const string& platform_name,
const cl_device_id device_id);
static bool platform_version_check(cl_platform_id platform,
string *error = NULL);
static bool device_version_check(cl_device_id device,
string *error = NULL);
static string get_hardware_id(const string& platform_name,
cl_device_id device_id);
static bool device_supported(const string &platform_name, const cl_device_id device_id);
static bool platform_version_check(cl_platform_id platform, string *error = NULL);
static bool device_version_check(cl_device_id device, string *error = NULL);
static string get_hardware_id(const string &platform_name, cl_device_id device_id);
static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
bool force_all = false);
@@ -101,20 +98,17 @@ public:
static bool get_num_platforms(cl_uint *num_platforms, cl_int *error = NULL);
static cl_uint get_num_platforms();
static bool get_platforms(vector<cl_platform_id> *platform_ids,
cl_int *error = NULL);
static bool get_platforms(vector<cl_platform_id> *platform_ids, cl_int *error = NULL);
static vector<cl_platform_id> get_platforms();
static bool get_platform_name(cl_platform_id platform_id,
string *platform_name);
static bool get_platform_name(cl_platform_id platform_id, string *platform_name);
static string get_platform_name(cl_platform_id platform_id);
static bool get_num_platform_devices(cl_platform_id platform_id,
cl_device_type device_type,
cl_uint *num_devices,
cl_int *error = NULL);
static cl_uint get_num_platform_devices(cl_platform_id platform_id,
cl_device_type device_type);
static cl_uint get_num_platform_devices(cl_platform_id platform_id, cl_device_type device_type);
static bool get_platform_devices(cl_platform_id platform_id,
cl_device_type device_type,
@@ -124,9 +118,7 @@ public:
cl_device_type device_type);
/* Device information. */
static bool get_device_name(cl_device_id device_id,
string *device_name,
cl_int* error = NULL);
static bool get_device_name(cl_device_id device_id, string *device_name, cl_int *error = NULL);
static string get_device_name(cl_device_id device_id);
@@ -158,12 +150,9 @@ public:
/* Thread safe cache for contexts and programs.
*/
class OpenCLCache
{
struct Slot
{
struct ProgramEntry
{
class OpenCLCache {
struct Slot {
struct ProgramEntry {
ProgramEntry();
ProgramEntry(const ProgramEntry &rhs);
~ProgramEntry();
@@ -179,7 +168,6 @@ class OpenCLCache
cl_context context;
typedef map<ustring, ProgramEntry> EntryMap;
EntryMap programs;
};
/* key is combination of platform ID and device ID */
@@ -199,7 +187,6 @@ class OpenCLCache
static OpenCLCache &global_instance();
public:
enum ProgramName {
OCL_DEV_BASE_PROGRAM,
OCL_DEV_MEGAKERNEL_PROGRAM,
@@ -237,27 +224,30 @@ public:
cl_int err = stmt; \
\
if (err != CL_SUCCESS) { \
string message = string_printf("OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
string message = string_printf( \
"OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
if ((device)->error_message() == "") \
(device)->set_error(message); \
fprintf(stderr, "%s\n", message.c_str()); \
} \
} (void) 0
} \
(void)0
# define opencl_assert(stmt) \
{ \
cl_int err = stmt; \
\
if (err != CL_SUCCESS) { \
string message = string_printf("OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
string message = string_printf( \
"OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
if (error_msg == "") \
error_msg = message; \
fprintf(stderr, "%s\n", message.c_str()); \
} \
} (void) 0
} \
(void)0
class OpenCLDevice : public Device
{
class OpenCLDevice : public Device {
public:
DedicatedTaskPool task_pool;
@@ -275,7 +265,9 @@ public:
class OpenCLProgram {
public:
OpenCLProgram() : loaded(false), needs_compiling(true), program(NULL), device(NULL) {}
OpenCLProgram() : loaded(false), needs_compiling(true), program(NULL), device(NULL)
{
}
OpenCLProgram(OpenCLDevice *device,
const string &program_name,
const string &kernel_name,
@@ -292,8 +284,14 @@ public:
/* Create the OpenCL kernels after loading or compiling */
void create_kernels();
bool is_loaded() const { return loaded; }
const string& get_log() const { return log; }
bool is_loaded() const
{
return loaded;
}
const string &get_log() const
{
return log;
}
void report_error();
/* Wait until this kernel is available to be used
@@ -397,7 +395,9 @@ public:
~OpenCLDevice();
static void CL_CALLBACK context_notify_callback(const char *err_info,
const void * /*private_info*/, size_t /*cb*/, void *user_data);
const void * /*private_info*/,
size_t /*cb*/,
void *user_data);
bool opencl_version_check();
OpenCLSplitPrograms *get_split_programs();
@@ -433,25 +433,27 @@ public:
void tex_free(device_memory &mem);
size_t global_size_round_up(int group_size, int global_size);
void enqueue_kernel(cl_kernel kernel, size_t w, size_t h,
void enqueue_kernel(cl_kernel kernel,
size_t w,
size_t h,
bool x_workgroups = false,
size_t max_workgroup_size = -1);
void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name);
void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg);
void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half);
void film_convert(DeviceTask &task,
device_ptr buffer,
device_ptr rgba_byte,
device_ptr rgba_half);
void shader(DeviceTask &task);
void denoise(RenderTile &tile, DenoisingTask &denoising);
class OpenCLDeviceTask : public DeviceTask {
public:
OpenCLDeviceTask(OpenCLDevice *device, DeviceTask& task)
: DeviceTask(task)
OpenCLDeviceTask(OpenCLDevice *device, DeviceTask &task) : DeviceTask(task)
{
run = function_bind(&OpenCLDevice::thread_run,
device,
this);
run = function_bind(&OpenCLDevice::thread_run, device, this);
}
};
@@ -477,15 +479,16 @@ public:
void thread_run(DeviceTask *task);
virtual BVHLayoutMask get_bvh_layout_mask() const {
virtual BVHLayoutMask get_bvh_layout_mask() const
{
return BVH_LAYOUT_BVH2;
}
virtual bool show_samples() const {
virtual bool show_samples() const
{
return true;
}
protected:
string kernel_build_options(const string *debug_src = NULL);
@@ -502,13 +505,13 @@ protected:
device_ptr scale_ptr,
int frame,
DenoisingTask *task);
bool denoising_solve(device_ptr output_ptr,
DenoisingTask *task);
bool denoising_solve(device_ptr output_ptr, DenoisingTask *task);
bool denoising_combine_halves(device_ptr a_ptr,
device_ptr b_ptr,
device_ptr mean_ptr,
device_ptr variance_ptr,
int r, int4 rect,
int r,
int4 rect,
DenoisingTask *task);
bool denoising_divide_shadow(device_ptr a_ptr,
device_ptr b_ptr,
@@ -541,37 +544,32 @@ protected:
{
}
ArgumentWrapper(device_memory& argument) : size(sizeof(void*)),
pointer((void*)(&argument.device_pointer))
ArgumentWrapper(device_memory &argument)
: size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
{
}
template<typename T>
ArgumentWrapper(device_vector<T>& argument) : size(sizeof(void*)),
pointer((void*)(&argument.device_pointer))
ArgumentWrapper(device_vector<T> &argument)
: size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
{
}
template<typename T>
ArgumentWrapper(device_only_memory<T>& argument) : size(sizeof(void*)),
pointer((void*)(&argument.device_pointer))
ArgumentWrapper(device_only_memory<T> &argument)
: size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
{
}
template<typename T>
ArgumentWrapper(T& argument) : size(sizeof(argument)),
pointer(&argument)
template<typename T> ArgumentWrapper(T &argument) : size(sizeof(argument)), pointer(&argument)
{
}
ArgumentWrapper(int argument) : size(sizeof(int)),
int_value(argument),
pointer(&int_value)
ArgumentWrapper(int argument) : size(sizeof(int)), int_value(argument), pointer(&int_value)
{
}
ArgumentWrapper(float argument) : size(sizeof(float)),
float_value(argument),
pointer(&float_value)
ArgumentWrapper(float argument)
: size(sizeof(float)), float_value(argument), pointer(&float_value)
{
}
@@ -626,14 +624,9 @@ protected:
/* ** Those guys are for workign around some compiler-specific bugs ** */
cl_program load_cached_kernel(
ustring key,
thread_scoped_lock& cache_locker);
cl_program load_cached_kernel(ustring key, thread_scoped_lock &cache_locker);
void store_cached_kernel(
cl_program program,
ustring key,
thread_scoped_lock& cache_locker);
void store_cached_kernel(cl_program program, ustring key, thread_scoped_lock &cache_locker);
private:
MemoryManager memory_manager;
@@ -654,7 +647,10 @@ protected:
friend class OpenCLSplitKernelFunction;
};
Device *opencl_create_split_device(DeviceInfo& info, Stats& stats, Profiler &profiler, bool background);
Device *opencl_create_split_device(DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool background);
CCL_NAMESPACE_END

View File

@@ -32,9 +32,8 @@
CCL_NAMESPACE_BEGIN
struct texture_slot_t {
texture_slot_t(const string& name, int slot)
: name(name),
slot(slot) {
texture_slot_t(const string &name, int slot) : name(name), slot(slot)
{
}
string name;
int slot;
@@ -93,8 +92,7 @@ void OpenCLDevice::enable_default_features(DeviceRequestedFeatures& features)
features.use_principled = true;
features.use_denoising = true;
if (!background)
{
if (!background) {
features.max_nodes_group = NODE_GROUP_LEVEL_MAX;
features.nodes_features = NODE_FEATURE_ALL;
features.use_hair = true;
@@ -104,7 +102,9 @@ void OpenCLDevice::enable_default_features(DeviceRequestedFeatures& features)
}
}
string OpenCLDevice::get_build_options(const DeviceRequestedFeatures& requested_features, const string& opencl_program_name, bool preview_kernel)
string OpenCLDevice::get_build_options(const DeviceRequestedFeatures &requested_features,
const string &opencl_program_name,
bool preview_kernel)
{
/* first check for non-split kernel programs */
if (opencl_program_name == "base" || opencl_program_name == "denoising") {
@@ -233,14 +233,18 @@ OpenCLDevice::OpenCLSplitPrograms::~OpenCLSplitPrograms()
program_shadow_blocked_dl.release();
}
void OpenCLDevice::OpenCLSplitPrograms::load_kernels(vector<OpenCLProgram*> &programs, const DeviceRequestedFeatures& requested_features, bool is_preview)
void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
vector<OpenCLProgram *> &programs,
const DeviceRequestedFeatures &requested_features,
bool is_preview)
{
if (!requested_features.use_baking) {
#define ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(kernel_name) program_split.add_kernel(ustring("path_trace_"#kernel_name));
# define ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(kernel_name) \
program_split.add_kernel(ustring("path_trace_" #kernel_name));
# define ADD_SPLIT_KERNEL_PROGRAM(kernel_name) \
const string program_name_##kernel_name = "split_" #kernel_name; \
program_##kernel_name = \
OpenCLDevice::OpenCLProgram(device, \
program_##kernel_name = OpenCLDevice::OpenCLProgram( \
device, \
program_name_##kernel_name, \
"kernel_" #kernel_name ".cl", \
device->get_build_options(requested_features, program_name_##kernel_name, is_preview)); \
@@ -262,7 +266,8 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(vector<OpenCLProgram*> &pro
/* Quick kernels bundled in a single program to reduce overhead of starting
* Blender processes. */
program_split = OpenCLDevice::OpenCLProgram(device,
program_split = OpenCLDevice::OpenCLProgram(
device,
"split_bundle",
"kernel_split_bundle.cl",
device->get_build_options(requested_features, "split_bundle", is_preview));
@@ -294,8 +299,7 @@ typedef struct KernelGlobalsDummy {
ccl_constant KernelData *data;
ccl_global char *buffers[8];
#define KERNEL_TEX(type, name) \
TextureInfo name;
# define KERNEL_TEX(type, name) TextureInfo name;
# include "kernel/kernel_textures.h"
# undef KERNEL_TEX
SplitData split_data;
@@ -304,7 +308,6 @@ typedef struct KernelGlobalsDummy {
} // namespace
struct CachedSplitMemory {
int id;
device_memory *split_data;
@@ -322,8 +325,8 @@ public:
CachedSplitMemory &cached_memory;
int cached_id;
OpenCLSplitKernelFunction(OpenCLDevice* device, CachedSplitMemory& cached_memory) :
device(device), cached_memory(cached_memory), cached_id(cached_memory.id-1)
OpenCLSplitKernelFunction(OpenCLDevice *device, CachedSplitMemory &cached_memory)
: device(device), cached_memory(cached_memory), cached_id(cached_memory.id - 1)
{
}
@@ -335,18 +338,12 @@ public:
virtual bool enqueue(const KernelDimensions &dim, device_memory &kg, device_memory &data)
{
if (cached_id != cached_memory.id) {
cl_uint start_arg_index =
device->kernel_set_args(program(),
0,
kg,
data,
*cached_memory.split_data,
*cached_memory.ray_state);
cl_uint start_arg_index = device->kernel_set_args(
program(), 0, kg, data, *cached_memory.split_data, *cached_memory.ray_state);
device->set_kernel_arg_buffers(program(), &start_arg_index);
start_arg_index +=
device->kernel_set_args(program(),
start_arg_index += device->kernel_set_args(program(),
start_arg_index,
*cached_memory.queue_index,
*cached_memory.use_queues_flag,
@@ -382,23 +379,23 @@ public:
class OpenCLSplitKernel : public DeviceSplitKernel {
OpenCLDevice *device;
CachedSplitMemory cached_memory;
public:
explicit OpenCLSplitKernel(OpenCLDevice *device) : DeviceSplitKernel(device), device(device) {
explicit OpenCLSplitKernel(OpenCLDevice *device) : DeviceSplitKernel(device), device(device)
{
}
virtual SplitKernelFunction* get_split_kernel_function(const string& kernel_name,
const DeviceRequestedFeatures& requested_features)
virtual SplitKernelFunction *get_split_kernel_function(
const string &kernel_name, const DeviceRequestedFeatures &requested_features)
{
OpenCLSplitKernelFunction *kernel = new OpenCLSplitKernelFunction(device, cached_memory);
const string program_name = device->get_opencl_program_name(kernel_name);
kernel->program =
OpenCLDevice::OpenCLProgram(device,
kernel->program = OpenCLDevice::OpenCLProgram(
device,
program_name,
device->get_opencl_program_filename(kernel_name),
device->get_build_options(requested_features,
program_name,
device->use_preview_kernels));
device->get_build_options(requested_features, program_name, device->use_preview_kernels));
kernel->program.add_kernel(ustring("path_trace_" + kernel_name));
kernel->program.load();
@@ -419,7 +416,8 @@ public:
uint threads = num_threads;
OpenCLDevice::OpenCLSplitPrograms *programs = device->get_split_programs();
cl_kernel kernel_state_buffer_size = programs->program_split(ustring("path_trace_state_buffer_size"));
cl_kernel kernel_state_buffer_size = programs->program_split(
ustring("path_trace_state_buffer_size"));
device->kernel_set_args(kernel_state_buffer_size, 0, kg, data, threads, size_buffer);
size_t global_size = 64;
@@ -458,8 +456,7 @@ public:
device_memory &ray_state,
device_memory &queue_index,
device_memory &use_queues_flag,
device_memory& work_pool_wgs
)
device_memory &work_pool_wgs)
{
cl_int dQueue_size = dim.global_size[0] * dim.global_size[1];
@@ -472,8 +469,7 @@ public:
OpenCLDevice::OpenCLSplitPrograms *programs = device->get_split_programs();
cl_kernel kernel_data_init = programs->program_split(ustring("path_trace_data_init"));
cl_uint start_arg_index =
device->kernel_set_args(kernel_data_init,
cl_uint start_arg_index = device->kernel_set_args(kernel_data_init,
0,
kernel_globals,
kernel_data,
@@ -483,8 +479,7 @@ public:
device->set_kernel_arg_buffers(kernel_data_init, &start_arg_index);
start_arg_index +=
device->kernel_set_args(kernel_data_init,
start_arg_index += device->kernel_set_args(kernel_data_init,
start_arg_index,
start_sample,
end_sample,
@@ -537,7 +532,9 @@ public:
return make_int2(64, 1);
}
virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask * /*task*/)
virtual int2 split_kernel_global_size(device_memory &kg,
device_memory &data,
DeviceTask * /*task*/)
{
cl_device_type type = OpenCLInfo::get_device_type(device->cdDevice);
/* Use small global size on CPU devices as it seems to be much faster. */
@@ -547,22 +544,23 @@ public:
}
cl_ulong max_buffer_size;
clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
clGetDeviceInfo(
device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
if (DebugFlags().opencl.mem_limit) {
max_buffer_size = min(max_buffer_size,
cl_ulong(DebugFlags().opencl.mem_limit - device->stats.mem_used));
}
VLOG(1) << "Maximum device allocation size: "
<< string_human_readable_number(max_buffer_size) << " bytes. ("
<< string_human_readable_size(max_buffer_size) << ").";
VLOG(1) << "Maximum device allocation size: " << string_human_readable_number(max_buffer_size)
<< " bytes. (" << string_human_readable_size(max_buffer_size) << ").";
/* Limit to 2gb, as we shouldn't need more than that and some devices may support much more. */
max_buffer_size = min(max_buffer_size / 2, (cl_ulong)2l * 1024 * 1024 * 1024);
size_t num_elements = max_elements_for_max_buffer_size(kg, data, max_buffer_size);
int2 global_size = make_int2(max(round_down((int)sqrt(num_elements), 64), 64), (int)sqrt(num_elements));
int2 global_size = make_int2(max(round_down((int)sqrt(num_elements), 64), 64),
(int)sqrt(num_elements));
VLOG(1) << "Global size: " << global_size << ".";
return global_size;
}
@@ -591,7 +589,8 @@ void OpenCLDevice::opencl_error(const string& message)
void OpenCLDevice::opencl_assert_err(cl_int err, const char *where)
{
if (err != CL_SUCCESS) {
string message = string_printf("OpenCL error (%d): %s in %s", err, clewErrorString(err), where);
string message = string_printf(
"OpenCL error (%d): %s in %s", err, clewErrorString(err), where);
if (error_msg == "")
error_msg = message;
fprintf(stderr, "%s\n", message.c_str());
@@ -630,8 +629,7 @@ OpenCLDevice::OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, b
cdDevice = platform_device.device_id;
platform_name = platform_device.platform_name;
device_name = platform_device.device_name;
VLOG(2) << "Creating new Cycles device for OpenCL platform "
<< platform_name << ", device "
VLOG(2) << "Creating new Cycles device for OpenCL platform " << platform_name << ", device "
<< device_name << ".";
{
@@ -642,13 +640,11 @@ OpenCLDevice::OpenCLDevice(DeviceInfo& info, Stats &stats, Profiler &profiler, b
if (cxContext == NULL) {
/* create context properties array to specify platform */
const cl_context_properties context_props[] = {
CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform,
0, 0
};
CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform, 0, 0};
/* create context */
cxContext = clCreateContext(context_props, 1, &cdDevice,
context_notify_callback, cdDevice, &ciErr);
cxContext = clCreateContext(
context_props, 1, &cdDevice, context_notify_callback, cdDevice, &ciErr);
if (opencl_error(ciErr)) {
opencl_error("OpenCL: clCreateContext failed");
@@ -715,7 +711,9 @@ OpenCLDevice::~OpenCLDevice()
}
void CL_CALLBACK OpenCLDevice::context_notify_callback(const char *err_info,
const void * /*private_info*/, size_t /*cb*/, void *user_data)
const void * /*private_info*/,
size_t /*cb*/,
void *user_data)
{
string device_name = OpenCLInfo::get_device_name((cl_device_id)user_data);
fprintf(stderr, "OpenCL error (%s): %s\n", device_name.c_str(), err_info);
@@ -759,8 +757,7 @@ string OpenCLDevice::device_md5_hash(string kernel_custom_build_options)
bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures &requested_features)
{
VLOG(2) << "Loading kernels for platform " << platform_name
<< ", device " << device_name << ".";
VLOG(2) << "Loading kernels for platform " << platform_name << ", device " << device_name << ".";
/* Verify if device was initialized. */
if (!device_initialized) {
fprintf(stderr, "OpenCL: failed to initialize device.\n");
@@ -777,7 +774,8 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures& requested_feature
kernel_programs.load_kernels(programs, requested_features, false);
if (!requested_features.use_baking && requested_features.use_denoising) {
denoising_program = OpenCLProgram(this, "denoising", "filter.cl", get_build_options(requested_features, "denoising"));
denoising_program = OpenCLProgram(
this, "denoising", "filter.cl", get_build_options(requested_features, "denoising"));
denoising_program.add_kernel(ustring("filter_divide_shadow"));
denoising_program.add_kernel(ustring("filter_get_feature"));
denoising_program.add_kernel(ustring("filter_write_feature"));
@@ -810,26 +808,32 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures& requested_feature
void OpenCLDevice::load_required_kernels(const DeviceRequestedFeatures &requested_features)
{
vector<OpenCLProgram *> programs;
base_program = OpenCLProgram(this, "base", "kernel_base.cl", get_build_options(requested_features, "base"));
base_program = OpenCLProgram(
this, "base", "kernel_base.cl", get_build_options(requested_features, "base"));
base_program.add_kernel(ustring("convert_to_byte"));
base_program.add_kernel(ustring("convert_to_half_float"));
base_program.add_kernel(ustring("zero_buffer"));
programs.push_back(&base_program);
if (requested_features.use_true_displacement) {
displace_program = OpenCLProgram(this, "displace", "kernel_displace.cl", get_build_options(requested_features, "displace"));
displace_program = OpenCLProgram(
this, "displace", "kernel_displace.cl", get_build_options(requested_features, "displace"));
displace_program.add_kernel(ustring("displace"));
programs.push_back(&displace_program);
}
if (requested_features.use_background_light) {
background_program = OpenCLProgram(this, "background", "kernel_background.cl", get_build_options(requested_features, "background"));
background_program = OpenCLProgram(this,
"background",
"kernel_background.cl",
get_build_options(requested_features, "background"));
background_program.add_kernel(ustring("background"));
programs.push_back(&background_program);
}
if (requested_features.use_baking) {
bake_program = OpenCLProgram(this, "bake", "kernel_bake.cl", get_build_options(requested_features, "bake"));
bake_program = OpenCLProgram(
this, "bake", "kernel_bake.cl", get_build_options(requested_features, "bake"));
bake_program.add_kernel(ustring("bake"));
programs.push_back(&bake_program);
}
@@ -926,8 +930,7 @@ void OpenCLDevice::mem_alloc(device_memory& mem)
clGetDeviceInfo(cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_alloc_size, NULL);
if (DebugFlags().opencl.mem_limit) {
max_alloc_size = min(max_alloc_size,
cl_ulong(DebugFlags().opencl.mem_limit - stats.mem_used));
max_alloc_size = min(max_alloc_size, cl_ulong(DebugFlags().opencl.mem_limit - stats.mem_used));
}
if (size > max_alloc_size) {
@@ -954,11 +957,7 @@ void OpenCLDevice::mem_alloc(device_memory& mem)
* will null_mem buffer.
*/
if (size != 0) {
mem.device_pointer = (device_ptr)clCreateBuffer(cxContext,
mem_flag,
size,
mem_ptr,
&ciErr);
mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, mem_flag, size, mem_ptr, &ciErr);
opencl_assert_err(ciErr, "clCreateBuffer");
}
else {
@@ -990,7 +989,8 @@ void OpenCLDevice::mem_copy_to(device_memory& mem)
size,
mem.host_pointer,
0,
NULL, NULL));
NULL,
NULL));
}
}
}
@@ -1007,7 +1007,8 @@ void OpenCLDevice::mem_copy_from(device_memory& mem, int y, int w, int h, int el
size,
(uchar *)mem.host_pointer + offset,
0,
NULL, NULL));
NULL,
NULL));
}
void OpenCLDevice::mem_zero_kernel(device_ptr mem, size_t size)
@@ -1027,15 +1028,8 @@ void OpenCLDevice::mem_zero_kernel(device_ptr mem, size_t size)
kernel_set_args(ckZeroBuffer, 0, d_buffer, d_size, d_offset);
ciErr = clEnqueueNDRangeKernel(cqCommandQueue,
ckZeroBuffer,
2,
NULL,
global_size,
NULL,
0,
NULL,
NULL);
ciErr = clEnqueueNDRangeKernel(
cqCommandQueue, ckZeroBuffer, 2, NULL, global_size, NULL, 0, NULL, NULL);
opencl_assert_err(ciErr, "clEnqueueNDRangeKernel");
d_offset += d_size;
@@ -1072,7 +1066,8 @@ void OpenCLDevice::mem_zero(device_memory& mem)
mem.memory_size(),
zero,
0,
NULL, NULL));
NULL,
NULL));
if (!mem.host_pointer) {
util_aligned_free(zero);
@@ -1116,11 +1111,8 @@ device_ptr OpenCLDevice::mem_alloc_sub_ptr(device_memory& mem, int offset, int s
info.origin = mem.memory_elements_size(offset);
info.size = mem.memory_elements_size(size);
device_ptr sub_buf = (device_ptr) clCreateSubBuffer(CL_MEM_PTR(mem.device_pointer),
mem_flag,
CL_BUFFER_CREATE_TYPE_REGION,
&info,
&ciErr);
device_ptr sub_buf = (device_ptr)clCreateSubBuffer(
CL_MEM_PTR(mem.device_pointer), mem_flag, CL_BUFFER_CREATE_TYPE_REGION, &info, &ciErr);
opencl_assert_err(ciErr, "clCreateSubBuffer");
return sub_buf;
}
@@ -1187,14 +1179,15 @@ size_t OpenCLDevice::global_size_round_up(int group_size, int global_size)
return global_size + ((r == 0) ? 0 : group_size - r);
}
void OpenCLDevice::enqueue_kernel(cl_kernel kernel, size_t w, size_t h, bool x_workgroups, size_t max_workgroup_size)
void OpenCLDevice::enqueue_kernel(
cl_kernel kernel, size_t w, size_t h, bool x_workgroups, size_t max_workgroup_size)
{
size_t workgroup_size, max_work_items[3];
clGetKernelWorkGroupInfo(kernel, cdDevice,
CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
clGetDeviceInfo(cdDevice,
CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*3, max_work_items, NULL);
clGetKernelWorkGroupInfo(
kernel, cdDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
clGetDeviceInfo(
cdDevice, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, max_work_items, NULL);
if (max_workgroup_size > 0 && workgroup_size > max_workgroup_size) {
workgroup_size = max_workgroup_size;
@@ -1230,7 +1223,8 @@ void OpenCLDevice::enqueue_kernel(cl_kernel kernel, size_t w, size_t h, bool x_w
}
/* run kernel */
opencl_assert(clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL));
opencl_assert(
clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL));
opencl_assert(clFlush(cqCommandQueue));
}
@@ -1284,8 +1278,7 @@ void OpenCLDevice::flush_texture_buffers()
if (string_startswith(name, "__tex_image")) {
int pos = name.rfind("_");
int id = atoi(name.data() + pos + 1);
texture_slots.push_back(texture_slot_t(name,
num_data_slots + id));
texture_slots.push_back(texture_slot_t(name, num_data_slots + id));
num_slots = max(num_slots, num_data_slots + id + 1);
}
}
@@ -1320,7 +1313,6 @@ void OpenCLDevice::flush_texture_buffers()
memory_manager.alloc("texture_info", texture_info);
}
void OpenCLDevice::thread_run(DeviceTask *task)
{
flush_texture_buffers();
@@ -1345,10 +1337,7 @@ void OpenCLDevice::thread_run(DeviceTask *task)
assert(tile.task == RenderTile::PATH_TRACE);
scoped_timer timer(&tile.buffers->render_time);
split_kernel->path_trace(task,
tile,
kgbuffer,
*const_mem_map["__data"]);
split_kernel->path_trace(task, tile, kgbuffer, *const_mem_map["__data"]);
/* Complete kernel execution before release tile. */
/* This helps in multi-device render;
@@ -1374,7 +1363,10 @@ void OpenCLDevice::thread_run(DeviceTask *task)
}
}
void OpenCLDevice::film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
void OpenCLDevice::film_convert(DeviceTask &task,
device_ptr buffer,
device_ptr rgba_byte,
device_ptr rgba_half)
{
/* cast arguments to cl types */
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
@@ -1388,15 +1380,10 @@ void OpenCLDevice::film_convert(DeviceTask& task, device_ptr buffer, device_ptr
cl_int d_offset = task.offset;
cl_int d_stride = task.stride;
cl_kernel ckFilmConvertKernel = (rgba_byte) ? base_program(ustring("convert_to_byte")) :
base_program(ustring("convert_to_half_float"));
cl_kernel ckFilmConvertKernel = (rgba_byte)? base_program(ustring("convert_to_byte")): base_program(ustring("convert_to_half_float"));
cl_uint start_arg_index =
kernel_set_args(ckFilmConvertKernel,
0,
d_data,
d_rgba,
d_buffer);
cl_uint start_arg_index = kernel_set_args(ckFilmConvertKernel, 0, d_data, d_rgba, d_buffer);
set_kernel_arg_buffers(ckFilmConvertKernel, &start_arg_index);
@@ -1432,8 +1419,10 @@ bool OpenCLDevice::denoising_non_local_means(device_ptr image_ptr,
int channel_offset = task->nlm_state.is_color ? task->buffer.pass_stride : 0;
device_sub_ptr difference(task->buffer.temporary_mem, 0, pass_stride * num_shifts);
device_sub_ptr blurDifference(task->buffer.temporary_mem, pass_stride*num_shifts, pass_stride*num_shifts);
device_sub_ptr weightAccum(task->buffer.temporary_mem, 2*pass_stride*num_shifts, pass_stride);
device_sub_ptr blurDifference(
task->buffer.temporary_mem, pass_stride * num_shifts, pass_stride * num_shifts);
device_sub_ptr weightAccum(
task->buffer.temporary_mem, 2 * pass_stride * num_shifts, pass_stride);
cl_mem weightAccum_mem = CL_MEM_PTR(*weightAccum);
cl_mem difference_mem = CL_MEM_PTR(*difference);
cl_mem blurDifference_mem = CL_MEM_PTR(*blurDifference);
@@ -1453,36 +1442,38 @@ bool OpenCLDevice::denoising_non_local_means(device_ptr image_ptr,
cl_kernel ckNLMUpdateOutput = denoising_program(ustring("filter_nlm_update_output"));
cl_kernel ckNLMNormalize = denoising_program(ustring("filter_nlm_normalize"));
kernel_set_args(ckNLMCalcDifference, 0,
kernel_set_args(ckNLMCalcDifference,
0,
guide_mem,
variance_mem,
scale_mem,
difference_mem,
w, h, stride,
w,
h,
stride,
pass_stride,
r, channel_offset,
0, a, k_2);
kernel_set_args(ckNLMBlur, 0,
difference_mem,
blurDifference_mem,
w, h, stride,
pass_stride,
r, f);
kernel_set_args(ckNLMCalcWeight, 0,
blurDifference_mem,
difference_mem,
w, h, stride,
pass_stride,
r, f);
kernel_set_args(ckNLMUpdateOutput, 0,
r,
channel_offset,
0,
a,
k_2);
kernel_set_args(
ckNLMBlur, 0, difference_mem, blurDifference_mem, w, h, stride, pass_stride, r, f);
kernel_set_args(
ckNLMCalcWeight, 0, blurDifference_mem, difference_mem, w, h, stride, pass_stride, r, f);
kernel_set_args(ckNLMUpdateOutput,
0,
blurDifference_mem,
image_mem,
out_mem,
weightAccum_mem,
w, h, stride,
w,
h,
stride,
pass_stride,
channel_offset,
r, f);
r,
f);
enqueue_kernel(ckNLMCalcDifference, w * h, num_shifts, true);
enqueue_kernel(ckNLMBlur, w * h, num_shifts, true);
@@ -1490,8 +1481,7 @@ bool OpenCLDevice::denoising_non_local_means(device_ptr image_ptr,
enqueue_kernel(ckNLMBlur, w * h, num_shifts, true);
enqueue_kernel(ckNLMUpdateOutput, w * h, num_shifts, true);
kernel_set_args(ckNLMNormalize, 0,
out_mem, weightAccum_mem, w, h, stride);
kernel_set_args(ckNLMNormalize, 0, out_mem, weightAccum_mem, w, h, stride);
enqueue_kernel(ckNLMNormalize, w, h);
return true;
@@ -1508,15 +1498,11 @@ bool OpenCLDevice::denoising_construct_transform(DenoisingTask *task)
cl_kernel ckFilterConstructTransform = denoising_program(ustring("filter_construct_transform"));
int arg_ofs = kernel_set_args(ckFilterConstructTransform, 0,
buffer_mem,
tile_info_mem);
int arg_ofs = kernel_set_args(ckFilterConstructTransform, 0, buffer_mem, tile_info_mem);
cl_mem buffers[9];
for (int i = 0; i < 9; i++) {
buffers[i] = CL_MEM_PTR(task->tile_info->buffers[i]);
arg_ofs += kernel_set_args(ckFilterConstructTransform,
arg_ofs,
buffers[i]);
arg_ofs += kernel_set_args(ckFilterConstructTransform, arg_ofs, buffers[i]);
}
kernel_set_args(ckFilterConstructTransform,
arg_ofs,
@@ -1530,10 +1516,7 @@ bool OpenCLDevice::denoising_construct_transform(DenoisingTask *task)
task->radius,
task->pca_threshold);
enqueue_kernel(ckFilterConstructTransform,
task->storage.w,
task->storage.h,
256);
enqueue_kernel(ckFilterConstructTransform, task->storage.w, task->storage.h, 256);
return true;
}
@@ -1571,34 +1554,32 @@ bool OpenCLDevice::denoising_accumulate(device_ptr color_ptr,
int num_shifts = (2 * r + 1) * (2 * r + 1);
device_sub_ptr difference(task->buffer.temporary_mem, 0, pass_stride * num_shifts);
device_sub_ptr blurDifference(task->buffer.temporary_mem, pass_stride*num_shifts, pass_stride*num_shifts);
device_sub_ptr blurDifference(
task->buffer.temporary_mem, pass_stride * num_shifts, pass_stride * num_shifts);
cl_mem difference_mem = CL_MEM_PTR(*difference);
cl_mem blurDifference_mem = CL_MEM_PTR(*blurDifference);
kernel_set_args(ckNLMCalcDifference, 0,
kernel_set_args(ckNLMCalcDifference,
0,
color_mem,
color_variance_mem,
scale_mem,
difference_mem,
w, h, stride,
w,
h,
stride,
pass_stride,
r,
pass_stride,
frame_offset,
1.0f, task->nlm_k_2);
kernel_set_args(ckNLMBlur, 0,
difference_mem,
blurDifference_mem,
w, h, stride,
pass_stride,
r, 4);
kernel_set_args(ckNLMCalcWeight, 0,
blurDifference_mem,
difference_mem,
w, h, stride,
pass_stride,
r, 4);
kernel_set_args(ckNLMConstructGramian, 0,
1.0f,
task->nlm_k_2);
kernel_set_args(
ckNLMBlur, 0, difference_mem, blurDifference_mem, w, h, stride, pass_stride, r, 4);
kernel_set_args(
ckNLMCalcWeight, 0, blurDifference_mem, difference_mem, w, h, stride, pass_stride, r, 4);
kernel_set_args(ckNLMConstructGramian,
0,
t,
blurDifference_mem,
buffer_mem,
@@ -1607,9 +1588,12 @@ bool OpenCLDevice::denoising_accumulate(device_ptr color_ptr,
XtWX_mem,
XtWY_mem,
task->reconstruction_state.filter_window,
w, h, stride,
w,
h,
stride,
pass_stride,
r, 4,
r,
4,
frame_offset,
use_time);
@@ -1622,8 +1606,7 @@ bool OpenCLDevice::denoising_accumulate(device_ptr color_ptr,
return true;
}
bool OpenCLDevice::denoising_solve(device_ptr output_ptr,
DenoisingTask *task)
bool OpenCLDevice::denoising_solve(device_ptr output_ptr, DenoisingTask *task)
{
cl_kernel ckFinalize = denoising_program(ustring("filter_finalize"));
@@ -1635,7 +1618,8 @@ bool OpenCLDevice::denoising_solve(device_ptr output_ptr,
int w = task->reconstruction_state.source_w;
int h = task->reconstruction_state.source_h;
kernel_set_args(ckFinalize, 0,
kernel_set_args(ckFinalize,
0,
output_mem,
rank_mem,
XtWX_mem,
@@ -1652,7 +1636,8 @@ bool OpenCLDevice::denoising_combine_halves(device_ptr a_ptr,
device_ptr b_ptr,
device_ptr mean_ptr,
device_ptr variance_ptr,
int r, int4 rect,
int r,
int4 rect,
DenoisingTask *task)
{
cl_mem a_mem = CL_MEM_PTR(a_ptr);
@@ -1662,16 +1647,8 @@ bool OpenCLDevice::denoising_combine_halves(device_ptr a_ptr,
cl_kernel ckFilterCombineHalves = denoising_program(ustring("filter_combine_halves"));
kernel_set_args(ckFilterCombineHalves, 0,
mean_mem,
variance_mem,
a_mem,
b_mem,
rect,
r);
enqueue_kernel(ckFilterCombineHalves,
task->rect.z-task->rect.x,
task->rect.w-task->rect.y);
kernel_set_args(ckFilterCombineHalves, 0, mean_mem, variance_mem, a_mem, b_mem, rect, r);
enqueue_kernel(ckFilterCombineHalves, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
return true;
}
@@ -1693,16 +1670,15 @@ bool OpenCLDevice::denoising_divide_shadow(device_ptr a_ptr,
cl_kernel ckFilterDivideShadow = denoising_program(ustring("filter_divide_shadow"));
int arg_ofs = kernel_set_args(ckFilterDivideShadow, 0,
task->render_buffer.samples,
tile_info_mem);
int arg_ofs = kernel_set_args(
ckFilterDivideShadow, 0, task->render_buffer.samples, tile_info_mem);
cl_mem buffers[9];
for (int i = 0; i < 9; i++) {
buffers[i] = CL_MEM_PTR(task->tile_info->buffers[i]);
arg_ofs += kernel_set_args(ckFilterDivideShadow, arg_ofs,
buffers[i]);
arg_ofs += kernel_set_args(ckFilterDivideShadow, arg_ofs, buffers[i]);
}
kernel_set_args(ckFilterDivideShadow, arg_ofs,
kernel_set_args(ckFilterDivideShadow,
arg_ofs,
a_mem,
b_mem,
sample_variance_mem,
@@ -1711,9 +1687,7 @@ bool OpenCLDevice::denoising_divide_shadow(device_ptr a_ptr,
task->rect,
task->render_buffer.pass_stride,
task->render_buffer.offset);
enqueue_kernel(ckFilterDivideShadow,
task->rect.z-task->rect.x,
task->rect.w-task->rect.y);
enqueue_kernel(ckFilterDivideShadow, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
return true;
}
@@ -1732,16 +1706,14 @@ bool OpenCLDevice::denoising_get_feature(int mean_offset,
cl_kernel ckFilterGetFeature = denoising_program(ustring("filter_get_feature"));
int arg_ofs = kernel_set_args(ckFilterGetFeature, 0,
task->render_buffer.samples,
tile_info_mem);
int arg_ofs = kernel_set_args(ckFilterGetFeature, 0, task->render_buffer.samples, tile_info_mem);
cl_mem buffers[9];
for (int i = 0; i < 9; i++) {
buffers[i] = CL_MEM_PTR(task->tile_info->buffers[i]);
arg_ofs += kernel_set_args(ckFilterGetFeature, arg_ofs,
buffers[i]);
arg_ofs += kernel_set_args(ckFilterGetFeature, arg_ofs, buffers[i]);
}
kernel_set_args(ckFilterGetFeature, arg_ofs,
kernel_set_args(ckFilterGetFeature,
arg_ofs,
mean_offset,
variance_offset,
mean_mem,
@@ -1750,9 +1722,7 @@ bool OpenCLDevice::denoising_get_feature(int mean_offset,
task->rect,
task->render_buffer.pass_stride,
task->render_buffer.offset);
enqueue_kernel(ckFilterGetFeature,
task->rect.z-task->rect.x,
task->rect.w-task->rect.y);
enqueue_kernel(ckFilterGetFeature, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
return true;
}
@@ -1767,7 +1737,8 @@ bool OpenCLDevice::denoising_write_feature(int out_offset,
cl_kernel ckFilterWriteFeature = denoising_program(ustring("filter_write_feature"));
kernel_set_args(ckFilterWriteFeature, 0,
kernel_set_args(ckFilterWriteFeature,
0,
task->render_buffer.samples,
task->reconstruction_state.buffer_params,
task->filter_area,
@@ -1775,9 +1746,7 @@ bool OpenCLDevice::denoising_write_feature(int out_offset,
buffer_mem,
out_offset,
task->rect);
enqueue_kernel(ckFilterWriteFeature,
task->filter_area.z,
task->filter_area.w);
enqueue_kernel(ckFilterWriteFeature, task->filter_area.z, task->filter_area.w);
return true;
}
@@ -1795,31 +1764,38 @@ bool OpenCLDevice::denoising_detect_outliers(device_ptr image_ptr,
cl_kernel ckFilterDetectOutliers = denoising_program(ustring("filter_detect_outliers"));
kernel_set_args(ckFilterDetectOutliers, 0,
kernel_set_args(ckFilterDetectOutliers,
0,
image_mem,
variance_mem,
depth_mem,
output_mem,
task->rect,
task->buffer.pass_stride);
enqueue_kernel(ckFilterDetectOutliers,
task->rect.z-task->rect.x,
task->rect.w-task->rect.y);
enqueue_kernel(ckFilterDetectOutliers, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
return true;
}
void OpenCLDevice::denoise(RenderTile &rtile, DenoisingTask &denoising)
{
denoising.functions.construct_transform = function_bind(&OpenCLDevice::denoising_construct_transform, this, &denoising);
denoising.functions.accumulate = function_bind(&OpenCLDevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
denoising.functions.construct_transform = function_bind(
&OpenCLDevice::denoising_construct_transform, this, &denoising);
denoising.functions.accumulate = function_bind(
&OpenCLDevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
denoising.functions.solve = function_bind(&OpenCLDevice::denoising_solve, this, _1, &denoising);
denoising.functions.divide_shadow = function_bind(&OpenCLDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
denoising.functions.non_local_means = function_bind(&OpenCLDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(&OpenCLDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
denoising.functions.get_feature = function_bind(&OpenCLDevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising);
denoising.functions.write_feature = function_bind(&OpenCLDevice::denoising_write_feature, this, _1, _2, _3, &denoising);
denoising.functions.detect_outliers = function_bind(&OpenCLDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.functions.divide_shadow = function_bind(
&OpenCLDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
denoising.functions.non_local_means = function_bind(
&OpenCLDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
denoising.functions.combine_halves = function_bind(
&OpenCLDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
denoising.functions.get_feature = function_bind(
&OpenCLDevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising);
denoising.functions.write_feature = function_bind(
&OpenCLDevice::denoising_write_feature, this, _1, _2, _3, &denoising);
denoising.functions.detect_outliers = function_bind(
&OpenCLDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
denoising.render_buffer.samples = rtile.sample;
@@ -1850,28 +1826,15 @@ void OpenCLDevice::shader(DeviceTask& task)
program->wait_for_availability();
cl_kernel kernel = (*program)();
cl_uint start_arg_index =
kernel_set_args(kernel,
0,
d_data,
d_input,
d_output);
cl_uint start_arg_index = kernel_set_args(kernel, 0, d_data, d_input, d_output);
set_kernel_arg_buffers(kernel, &start_arg_index);
start_arg_index += kernel_set_args(kernel,
start_arg_index,
d_shader_eval_type);
start_arg_index += kernel_set_args(kernel, start_arg_index, d_shader_eval_type);
if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
start_arg_index += kernel_set_args(kernel,
start_arg_index,
d_shader_filter);
start_arg_index += kernel_set_args(kernel, start_arg_index, d_shader_filter);
}
start_arg_index += kernel_set_args(kernel,
start_arg_index,
d_shader_x,
d_shader_w,
d_offset);
start_arg_index += kernel_set_args(kernel, start_arg_index, d_shader_x, d_shader_w, d_offset);
for (int sample = 0; sample < task.num_samples; sample++) {
@@ -1893,19 +1856,25 @@ string OpenCLDevice::kernel_build_options(const string *debug_src)
string build_options = "-cl-no-signed-zeros -cl-mad-enable ";
if (platform_name == "NVIDIA CUDA") {
build_options += "-D__KERNEL_OPENCL_NVIDIA__ "
build_options +=
"-D__KERNEL_OPENCL_NVIDIA__ "
"-cl-nv-maxrregcount=32 "
"-cl-nv-verbose ";
uint compute_capability_major, compute_capability_minor;
clGetDeviceInfo(cdDevice, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,
sizeof(cl_uint), &compute_capability_major, NULL);
clGetDeviceInfo(cdDevice, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,
sizeof(cl_uint), &compute_capability_minor, NULL);
clGetDeviceInfo(cdDevice,
CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,
sizeof(cl_uint),
&compute_capability_major,
NULL);
clGetDeviceInfo(cdDevice,
CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,
sizeof(cl_uint),
&compute_capability_minor,
NULL);
build_options += string_printf("-D__COMPUTE_CAPABILITY__=%u ",
compute_capability_major * 100 +
compute_capability_minor * 10);
compute_capability_major * 100 + compute_capability_minor * 10);
}
else if (platform_name == "Apple")
@@ -1983,9 +1952,7 @@ int OpenCLDevice::kernel_set_args(cl_kernel kernel,
do { \
if (arg.pointer != NULL) { \
opencl_assert(clSetKernelArg( \
kernel, \
start_argument_index + current_arg_index, \
arg.size, arg.pointer)); \
kernel, start_argument_index + current_arg_index, arg.size, arg.pointer)); \
++current_arg_index; \
} \
else { \
@@ -2052,27 +2019,22 @@ void OpenCLDevice::release_program_safe(cl_program program)
/* ** Those guys are for workign around some compiler-specific bugs ** */
cl_program OpenCLDevice::load_cached_kernel(ustring key,
thread_scoped_lock& cache_locker)
cl_program OpenCLDevice::load_cached_kernel(ustring key, thread_scoped_lock &cache_locker)
{
return OpenCLCache::get_program(cpPlatform,
cdDevice,
key,
cache_locker);
return OpenCLCache::get_program(cpPlatform, cdDevice, key, cache_locker);
}
void OpenCLDevice::store_cached_kernel(cl_program program,
ustring key,
thread_scoped_lock &cache_locker)
{
OpenCLCache::store_program(cpPlatform,
cdDevice,
program,
key,
cache_locker);
OpenCLCache::store_program(cpPlatform, cdDevice, program, key, cache_locker);
}
Device *opencl_create_split_device(DeviceInfo& info, Stats& stats, Profiler &profiler, bool background)
Device *opencl_create_split_device(DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool background)
{
return new OpenCLDevice(info, stats, profiler, background);
}

View File

@@ -31,15 +31,12 @@ using std::endl;
CCL_NAMESPACE_BEGIN
OpenCLCache::Slot::ProgramEntry::ProgramEntry()
: program(NULL),
mutex(NULL)
OpenCLCache::Slot::ProgramEntry::ProgramEntry() : program(NULL), mutex(NULL)
{
}
OpenCLCache::Slot::ProgramEntry::ProgramEntry(const ProgramEntry &rhs)
: program(rhs.program),
mutex(NULL)
: program(rhs.program), mutex(NULL)
{
}
@@ -48,16 +45,12 @@ OpenCLCache::Slot::ProgramEntry::~ProgramEntry()
delete mutex;
}
OpenCLCache::Slot::Slot()
: context_mutex(NULL),
context(NULL)
OpenCLCache::Slot::Slot() : context_mutex(NULL), context(NULL)
{
}
OpenCLCache::Slot::Slot(const Slot &rhs)
: context_mutex(NULL),
context(NULL),
programs(rhs.programs)
: context_mutex(NULL), context(NULL), programs(rhs.programs)
{
}
@@ -278,7 +271,8 @@ OpenCLDevice::OpenCLProgram::~OpenCLProgram()
void OpenCLDevice::OpenCLProgram::release()
{
for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
for (map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end();
++kernel) {
if (kernel->second) {
clReleaseKernel(kernel->second);
kernel->second = NULL;
@@ -327,8 +321,7 @@ bool OpenCLDevice::OpenCLProgram::build_kernel(const string *debug_src)
string build_options;
build_options = device->kernel_build_options(debug_src) + kernel_build_options;
VLOG(1) << "Build options passed to clBuildProgram: '"
<< build_options << "'.";
VLOG(1) << "Build options passed to clBuildProgram: '" << build_options << "'.";
cl_int ciErr = clBuildProgram(program, 0, NULL, build_options.c_str(), NULL, NULL);
/* show warnings even if build is successful */
@@ -337,17 +330,20 @@ bool OpenCLDevice::OpenCLProgram::build_kernel(const string *debug_src)
clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
if (ciErr != CL_SUCCESS) {
add_error(string("OpenCL build failed with error ") + clewErrorString(ciErr) + ", errors in console.");
add_error(string("OpenCL build failed with error ") + clewErrorString(ciErr) +
", errors in console.");
}
if (ret_val_size > 1) {
vector<char> build_log(ret_val_size + 1);
clGetProgramBuildInfo(program, device->cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, &build_log[0], NULL);
clGetProgramBuildInfo(
program, device->cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, &build_log[0], NULL);
build_log[ret_val_size] = '\0';
/* Skip meaningless empty output from the NVidia compiler. */
if (!(ret_val_size == 2 && build_log[0] == '\n')) {
add_log(string("OpenCL program ") + program_name + " build output: " + string(&build_log[0]), ciErr == CL_SUCCESS);
add_log(string("OpenCL program ") + program_name + " build output: " + string(&build_log[0]),
ciErr == CL_SUCCESS);
}
}
@@ -366,11 +362,7 @@ bool OpenCLDevice::OpenCLProgram::compile_kernel(const string *debug_src)
const char *source_str = source.c_str();
cl_int ciErr;
program = clCreateProgramWithSource(device->cxContext,
1,
&source_str,
&source_len,
&ciErr);
program = clCreateProgramWithSource(device->cxContext, 1, &source_str, &source_len, &ciErr);
if (ciErr != CL_SUCCESS) {
add_error(string("OpenCL program creation failed: ") + clewErrorString(ciErr));
@@ -385,7 +377,9 @@ bool OpenCLDevice::OpenCLProgram::compile_kernel(const string *debug_src)
return false;
double elapsed = time_dt() - starttime;
add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false);
add_log(
string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed),
false);
return true;
}
@@ -416,8 +410,7 @@ bool OpenCLDevice::OpenCLProgram::compile_separate(const string& clbin)
escape_python_string(kernel_file_escaped);
escape_python_string(clbin_escaped);
args.push_back(
string_printf(
args.push_back(string_printf(
"import _cycles; _cycles.opencl_compile(r'%d', r'%s', r'%s', r'%s', r'%s', r'%s')",
device_platform_id,
device_name.c_str(),
@@ -434,7 +427,9 @@ bool OpenCLDevice::OpenCLProgram::compile_separate(const string& clbin)
}
double elapsed = time_dt() - starttime;
add_log(string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false);
add_log(
string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed),
false);
return load_binary(clbin);
}
@@ -462,17 +457,14 @@ bool device_opencl_compile_kernel(const vector<string>& parameters)
OpenCLPlatformDevice &platform_device = usable_devices[device_platform_id];
if (platform_device.platform_name != platform_name ||
platform_device.device_name != device_name)
{
platform_device.device_name != device_name) {
return false;
}
cl_platform_id platform = platform_device.platform_id;
cl_device_id device = platform_device.device_id;
const cl_context_properties context_props[] = {
CL_CONTEXT_PLATFORM, (cl_context_properties) platform,
0, 0
};
CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0, 0};
cl_int err;
cl_context context = clCreateContext(context_props, 1, &device, NULL, NULL, &err);
@@ -507,8 +499,7 @@ bool device_opencl_compile_kernel(const vector<string>& parameters)
return result;
}
bool OpenCLDevice::OpenCLProgram::load_binary(const string& clbin,
const string *debug_src)
bool OpenCLDevice::OpenCLProgram::load_binary(const string &clbin, const string *debug_src)
{
/* read binary into memory */
vector<uint8_t> binary;
@@ -523,12 +514,12 @@ bool OpenCLDevice::OpenCLProgram::load_binary(const string& clbin,
size_t size = binary.size();
const uint8_t *bytes = &binary[0];
program = clCreateProgramWithBinary(device->cxContext, 1, &device->cdDevice,
&size, &bytes, &status, &ciErr);
program = clCreateProgramWithBinary(
device->cxContext, 1, &device->cdDevice, &size, &bytes, &status, &ciErr);
if (status != CL_SUCCESS || ciErr != CL_SUCCESS) {
add_error(string("OpenCL failed create program from cached binary ") + clbin + ": "
+ clewErrorString(status) + " " + clewErrorString(ciErr));
add_error(string("OpenCL failed create program from cached binary ") + clbin + ": " +
clewErrorString(status) + " " + clewErrorString(ciErr));
return false;
}
@@ -562,15 +553,15 @@ bool OpenCLDevice::OpenCLProgram::load()
/* Try to use cached kernel. */
thread_scoped_lock cache_locker;
ustring cache_key(program_name + device_md5);
program = device->load_cached_kernel(cache_key,
cache_locker);
program = device->load_cached_kernel(cache_key, cache_locker);
if (!program) {
add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
/* need to create source to get md5 */
string source = get_program_source(kernel_file);
string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source);
string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" +
util_md5_string(source);
basename = path_cache_get(path_join("kernels", basename));
string clbin = basename + ".clbin";
@@ -580,9 +571,7 @@ bool OpenCLDevice::OpenCLProgram::load()
add_log(string("Loaded program from ") + clbin + ".", true);
/* Cache the program. */
device->store_cached_kernel(program,
cache_key,
cache_locker);
device->store_cached_kernel(program, cache_key, cache_locker);
}
else {
add_log(string("OpenCL program ") + program_name + " not found on disk.", true);
@@ -608,18 +597,17 @@ void OpenCLDevice::OpenCLProgram::compile()
/* Try to use cached kernel. */
thread_scoped_lock cache_locker;
ustring cache_key(program_name + device_md5);
program = device->load_cached_kernel(cache_key,
cache_locker);
program = device->load_cached_kernel(cache_key, cache_locker);
if (!program)
{
if (!program) {
add_log(string("OpenCL program ") + program_name + " not found in cache.", true);
/* need to create source to get md5 */
string source = get_program_source(kernel_file);
string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" + util_md5_string(source);
string basename = "cycles_kernel_" + program_name + "_" + device_md5 + "_" +
util_md5_string(source);
basename = path_cache_get(path_join("kernels", basename));
string clbin = basename + ".clbin";
@@ -637,7 +625,9 @@ void OpenCLDevice::OpenCLProgram::compile()
loaded = true;
}
else {
add_log(string("Separate-process building of ") + clbin + " failed, will fall back to regular building.", true);
add_log(string("Separate-process building of ") + clbin +
" failed, will fall back to regular building.",
true);
/* If does not exist or loading binary failed, compile kernel. */
if (!compile_kernel(debug_src)) {
@@ -652,9 +642,7 @@ void OpenCLDevice::OpenCLProgram::compile()
}
/* Cache the program. */
device->store_cached_kernel(program,
cache_key,
cache_locker);
device->store_cached_kernel(program, cache_key, cache_locker);
}
create_kernels();
@@ -664,13 +652,15 @@ void OpenCLDevice::OpenCLProgram::compile()
void OpenCLDevice::OpenCLProgram::create_kernels()
{
for(map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end(); ++kernel) {
for (map<ustring, cl_kernel>::iterator kernel = kernels.begin(); kernel != kernels.end();
++kernel) {
assert(kernel->second == NULL);
cl_int ciErr;
string name = "kernel_ocl_" + kernel->first.string();
kernel->second = clCreateKernel(program, name.c_str(), &ciErr);
if (device->opencl_error(ciErr)) {
add_error(string("Error getting kernel ") + name + " from program " + program_name + ": " + clewErrorString(ciErr));
add_error(string("Error getting kernel ") + name + " from program " + program_name + ": " +
clewErrorString(ciErr));
return;
}
}
@@ -688,9 +678,11 @@ bool OpenCLDevice::OpenCLProgram::wait_for_availability()
void OpenCLDevice::OpenCLProgram::report_error()
{
/* If loaded is true, there was no error. */
if(loaded) return;
if (loaded)
return;
/* if use_stdout is true, the error was already reported. */
if(use_stdout) return;
if (use_stdout)
return;
cerr << error_msg << endl;
if (!compile_output.empty()) {
@@ -713,8 +705,7 @@ cl_kernel OpenCLDevice::OpenCLProgram::operator()(ustring name)
cl_device_type OpenCLInfo::device_type()
{
switch(DebugFlags().opencl.device_type)
{
switch (DebugFlags().opencl.device_type) {
case DebugFlags::OpenCL::DEVICE_NONE:
return 0;
case DebugFlags::OpenCL::DEVICE_ALL:
@@ -737,8 +728,7 @@ bool OpenCLInfo::use_debug()
return DebugFlags().opencl.debug;
}
bool OpenCLInfo::device_supported(const string& platform_name,
const cl_device_id device_id)
bool OpenCLInfo::device_supported(const string &platform_name, const cl_device_id device_id)
{
cl_device_type device_type;
if (!get_device_type(device_id, &device_type)) {
@@ -764,17 +754,18 @@ bool OpenCLInfo::device_supported(const string& platform_name,
return false;
}
if (platform_name == "AMD Accelerated Parallel Processing" &&
device_type == CL_DEVICE_TYPE_GPU)
{
device_type == CL_DEVICE_TYPE_GPU) {
if (driver_major < 2236) {
VLOG(1) << "AMD driver version " << driver_major << "." << driver_minor << " not supported.";
return false;
}
const char *blacklist[] = {
/* GCN 1 */
"Tahiti", "Pitcairn", "Capeverde", "Oland", "Hainan",
NULL
};
const char *blacklist[] = {/* GCN 1 */
"Tahiti",
"Pitcairn",
"Capeverde",
"Oland",
"Hainan",
NULL};
for (int i = 0; blacklist[i] != NULL; i++) {
if (device_name == blacklist[i]) {
VLOG(1) << "AMD device " << device_name << " not supported";
@@ -789,17 +780,12 @@ bool OpenCLInfo::device_supported(const string& platform_name,
return false;
}
bool OpenCLInfo::platform_version_check(cl_platform_id platform,
string *error)
bool OpenCLInfo::platform_version_check(cl_platform_id platform, string *error)
{
const int req_major = 1, req_minor = 1;
int major, minor;
char version[256];
clGetPlatformInfo(platform,
CL_PLATFORM_VERSION,
sizeof(version),
&version,
NULL);
clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL);
if (sscanf(version, "OpenCL %d.%d", &major, &minor) < 2) {
if (error != NULL) {
*error = string_printf("OpenCL: failed to parse platform version string (%s).", version);
@@ -808,7 +794,8 @@ bool OpenCLInfo::platform_version_check(cl_platform_id platform,
}
if (!((major == req_major && minor >= req_minor) || (major > req_major))) {
if (error != NULL) {
*error = string_printf("OpenCL: platform version 1.1 or later required, found %d.%d", major, minor);
*error = string_printf(
"OpenCL: platform version 1.1 or later required, found %d.%d", major, minor);
}
return false;
}
@@ -818,17 +805,12 @@ bool OpenCLInfo::platform_version_check(cl_platform_id platform,
return true;
}
bool OpenCLInfo::device_version_check(cl_device_id device,
string *error)
bool OpenCLInfo::device_version_check(cl_device_id device, string *error)
{
const int req_major = 1, req_minor = 1;
int major, minor;
char version[256];
clGetDeviceInfo(device,
CL_DEVICE_OPENCL_C_VERSION,
sizeof(version),
&version,
NULL);
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(version), &version, NULL);
if (sscanf(version, "OpenCL C %d.%d", &major, &minor) < 2) {
if (error != NULL) {
*error = string_printf("OpenCL: failed to parse OpenCL C version string (%s).", version);
@@ -852,7 +834,8 @@ string OpenCLInfo::get_hardware_id(const string& platform_name, cl_device_id dev
if (platform_name == "AMD Accelerated Parallel Processing" || platform_name == "Apple") {
/* Use cl_amd_device_topology extension. */
cl_char topology[24];
if(clGetDeviceInfo(device_id, 0x4037, sizeof(topology), topology, NULL) == CL_SUCCESS && topology[0] == 1) {
if (clGetDeviceInfo(device_id, 0x4037, sizeof(topology), topology, NULL) == CL_SUCCESS &&
topology[0] == 1) {
return string_printf("%02x:%02x.%01x",
(unsigned int)topology[21],
(unsigned int)topology[22],
@@ -874,12 +857,13 @@ string OpenCLInfo::get_hardware_id(const string& platform_name, cl_device_id dev
return "";
}
void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
bool force_all)
void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices, bool force_all)
{
const cl_device_type device_type = OpenCLInfo::device_type();
static bool first_time = true;
#define FIRST_VLOG(severity) if(first_time) VLOG(severity)
# define FIRST_VLOG(severity) \
if (first_time) \
VLOG(severity)
usable_devices->clear();
@@ -895,8 +879,7 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices
/* Get platforms. */
if (!get_platforms(&platform_ids, &error)) {
FIRST_VLOG(2) << "Error fetching platforms:"
<< string(clewErrorString(error));
FIRST_VLOG(2) << "Error fetching platforms:" << string(clewErrorString(error));
first_time = false;
return;
}
@@ -913,64 +896,48 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices
FIRST_VLOG(2) << "Failed to get platform name, ignoring.";
continue;
}
FIRST_VLOG(2) << "Enumerating devices for platform "
<< platform_name << ".";
FIRST_VLOG(2) << "Enumerating devices for platform " << platform_name << ".";
if (!platform_version_check(platform_id)) {
FIRST_VLOG(2) << "Ignoring platform " << platform_name
<< " due to too old compiler version.";
continue;
}
if(!get_platform_devices(platform_id,
device_type,
&device_ids,
&error))
{
if (!get_platform_devices(platform_id, device_type, &device_ids, &error)) {
FIRST_VLOG(2) << "Ignoring platform " << platform_name
<< ", failed to fetch of devices: "
<< string(clewErrorString(error));
<< ", failed to fetch of devices: " << string(clewErrorString(error));
continue;
}
if (device_ids.size() == 0) {
FIRST_VLOG(2) << "Ignoring platform " << platform_name
<< ", it has no devices.";
FIRST_VLOG(2) << "Ignoring platform " << platform_name << ", it has no devices.";
continue;
}
for (int num = 0; num < device_ids.size(); num++) {
const cl_device_id device_id = device_ids[num];
string device_name;
if (!get_device_name(device_id, &device_name, &error)) {
FIRST_VLOG(2) << "Failed to fetch device name: "
<< string(clewErrorString(error))
FIRST_VLOG(2) << "Failed to fetch device name: " << string(clewErrorString(error))
<< ", ignoring.";
continue;
}
if (!device_version_check(device_id)) {
FIRST_VLOG(2) << "Ignoring device " << device_name
<< " due to old compiler version.";
FIRST_VLOG(2) << "Ignoring device " << device_name << " due to old compiler version.";
continue;
}
if(force_all ||
device_supported(platform_name, device_id))
{
if (force_all || device_supported(platform_name, device_id)) {
cl_device_type device_type;
if (!get_device_type(device_id, &device_type, &error)) {
FIRST_VLOG(2) << "Ignoring device " << device_name
<< ", failed to fetch device type:"
<< string(clewErrorString(error));
<< ", failed to fetch device type:" << string(clewErrorString(error));
continue;
}
string readable_device_name =
get_readable_device_name(device_id);
string readable_device_name = get_readable_device_name(device_id);
if (readable_device_name != device_name) {
FIRST_VLOG(2) << "Using more readable device name: "
<< readable_device_name;
FIRST_VLOG(2) << "Using more readable device name: " << readable_device_name;
}
FIRST_VLOG(2) << "Adding new device "
<< readable_device_name << ".";
FIRST_VLOG(2) << "Adding new device " << readable_device_name << ".";
string hardware_id = get_hardware_id(platform_name, device_id);
string device_extensions = get_device_extensions(device_id);
usable_devices->push_back(OpenCLPlatformDevice(
platform_id,
usable_devices->push_back(OpenCLPlatformDevice(platform_id,
platform_name,
device_id,
device_type,
@@ -979,16 +946,14 @@ void OpenCLInfo::get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices
device_extensions));
}
else {
FIRST_VLOG(2) << "Ignoring device " << device_name
<< ", not officially supported yet.";
FIRST_VLOG(2) << "Ignoring device " << device_name << ", not officially supported yet.";
}
}
}
first_time = false;
}
bool OpenCLInfo::get_platforms(vector<cl_platform_id> *platform_ids,
cl_int *error)
bool OpenCLInfo::get_platforms(vector<cl_platform_id> *platform_ids, cl_int *error)
{
/* Reset from possible previous state. */
platform_ids->resize(0);
@@ -999,9 +964,7 @@ bool OpenCLInfo::get_platforms(vector<cl_platform_id> *platform_ids,
/* Get actual platforms. */
cl_int err;
platform_ids->resize(num_platforms);
if((err = clGetPlatformIDs(num_platforms,
&platform_ids->at(0),
NULL)) != CL_SUCCESS) {
if ((err = clGetPlatformIDs(num_platforms, &platform_ids->at(0), NULL)) != CL_SUCCESS) {
if (error != NULL) {
*error = err;
}
@@ -1045,16 +1008,11 @@ cl_uint OpenCLInfo::get_num_platforms()
return num_platforms;
}
bool OpenCLInfo::get_platform_name(cl_platform_id platform_id,
string *platform_name)
bool OpenCLInfo::get_platform_name(cl_platform_id platform_id, string *platform_name)
{
char buffer[256];
if(clGetPlatformInfo(platform_id,
CL_PLATFORM_NAME,
sizeof(buffer),
&buffer,
NULL) != CL_SUCCESS)
{
if (clGetPlatformInfo(platform_id, CL_PLATFORM_NAME, sizeof(buffer), &buffer, NULL) !=
CL_SUCCESS) {
*platform_name = "";
return false;
}
@@ -1077,12 +1035,7 @@ bool OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
cl_int *error)
{
cl_int err;
if((err = clGetDeviceIDs(platform_id,
device_type,
0,
NULL,
num_devices)) != CL_SUCCESS)
{
if ((err = clGetDeviceIDs(platform_id, device_type, 0, NULL, num_devices)) != CL_SUCCESS) {
if (error != NULL) {
*error = err;
}
@@ -1099,10 +1052,7 @@ cl_uint OpenCLInfo::get_num_platform_devices(cl_platform_id platform_id,
cl_device_type device_type)
{
cl_uint num_devices;
if(!get_num_platform_devices(platform_id,
device_type,
&num_devices))
{
if (!get_num_platform_devices(platform_id, device_type, &num_devices)) {
return 0;
}
return num_devices;
@@ -1117,22 +1067,14 @@ bool OpenCLInfo::get_platform_devices(cl_platform_id platform_id,
device_ids->resize(0);
/* Get number of devices to pre-allocate memory. */
cl_uint num_devices;
if(!get_num_platform_devices(platform_id,
device_type,
&num_devices,
error))
{
if (!get_num_platform_devices(platform_id, device_type, &num_devices, error)) {
return false;
}
/* Get actual device list. */
device_ids->resize(num_devices);
cl_int err;
if((err = clGetDeviceIDs(platform_id,
device_type,
num_devices,
&device_ids->at(0),
NULL)) != CL_SUCCESS)
{
if ((err = clGetDeviceIDs(platform_id, device_type, num_devices, &device_ids->at(0), NULL)) !=
CL_SUCCESS) {
if (error != NULL) {
*error = err;
}
@@ -1152,18 +1094,12 @@ vector<cl_device_id> OpenCLInfo::get_platform_devices(cl_platform_id platform_id
return devices;
}
bool OpenCLInfo::get_device_name(cl_device_id device_id,
string *device_name,
cl_int* error)
bool OpenCLInfo::get_device_name(cl_device_id device_id, string *device_name, cl_int *error)
{
char buffer[1024];
cl_int err;
if((err = clGetDeviceInfo(device_id,
CL_DEVICE_NAME,
sizeof(buffer),
&buffer,
NULL)) != CL_SUCCESS)
{
if ((err = clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(buffer), &buffer, NULL)) !=
CL_SUCCESS) {
if (error != NULL) {
*error = err;
}
@@ -1192,12 +1128,8 @@ bool OpenCLInfo::get_device_extensions(cl_device_id device_id,
{
char buffer[1024];
cl_int err;
if((err = clGetDeviceInfo(device_id,
CL_DEVICE_EXTENSIONS,
sizeof(buffer),
&buffer,
NULL)) != CL_SUCCESS)
{
if ((err = clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, sizeof(buffer), &buffer, NULL)) !=
CL_SUCCESS) {
if (error != NULL) {
*error = err;
}
@@ -1225,12 +1157,8 @@ bool OpenCLInfo::get_device_type(cl_device_id device_id,
cl_int *error)
{
cl_int err;
if((err = clGetDeviceInfo(device_id,
CL_DEVICE_TYPE,
sizeof(cl_device_type),
device_type,
NULL)) != CL_SUCCESS)
{
if ((err = clGetDeviceInfo(
device_id, CL_DEVICE_TYPE, sizeof(cl_device_type), device_type, NULL)) != CL_SUCCESS) {
if (error != NULL) {
*error = err;
}
@@ -1257,12 +1185,9 @@ string OpenCLInfo::get_readable_device_name(cl_device_id device_id)
string name = "";
char board_name[1024];
size_t length = 0;
if(clGetDeviceInfo(device_id,
CL_DEVICE_BOARD_NAME_AMD,
sizeof(board_name),
&board_name,
&length) == CL_SUCCESS)
{
if (clGetDeviceInfo(
device_id, CL_DEVICE_BOARD_NAME_AMD, sizeof(board_name), &board_name, &length) ==
CL_SUCCESS) {
if (length != 0 && board_name[0] != '\0') {
name = board_name;
}
@@ -1282,8 +1207,7 @@ string OpenCLInfo::get_readable_device_name(cl_device_id device_id)
CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(max_compute_units),
&max_compute_units,
NULL) == CL_SUCCESS)
{
NULL) == CL_SUCCESS) {
name += " " + to_string(max_compute_units);
}
}
@@ -1296,19 +1220,12 @@ string OpenCLInfo::get_readable_device_name(cl_device_id device_id)
return name;
}
bool OpenCLInfo::get_driver_version(cl_device_id device_id,
int *major,
int *minor,
cl_int* error)
bool OpenCLInfo::get_driver_version(cl_device_id device_id, int *major, int *minor, cl_int *error)
{
char buffer[1024];
cl_int err;
if((err = clGetDeviceInfo(device_id,
CL_DRIVER_VERSION,
sizeof(buffer),
&buffer,
NULL)) != CL_SUCCESS)
{
if ((err = clGetDeviceInfo(device_id, CL_DRIVER_VERSION, sizeof(buffer), &buffer, NULL)) !=
CL_SUCCESS) {
if (error != NULL) {
*error = err;
}
@@ -1327,12 +1244,9 @@ bool OpenCLInfo::get_driver_version(cl_device_id device_id,
int OpenCLInfo::mem_sub_ptr_alignment(cl_device_id device_id)
{
int base_align_bits;
if(clGetDeviceInfo(device_id,
CL_DEVICE_MEM_BASE_ADDR_ALIGN,
sizeof(int),
&base_align_bits,
NULL) == CL_SUCCESS)
{
if (clGetDeviceInfo(
device_id, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(int), &base_align_bits, NULL) ==
CL_SUCCESS) {
return base_align_bits / 8;
}
return 1;

View File

@@ -26,8 +26,7 @@ CCL_NAMESPACE_BEGIN
/* Node Type */
Node::Node(const NodeType *type_, ustring name_)
: name(name_), type(type_)
Node::Node(const NodeType *type_, ustring name_) : name(name_), type(type_)
{
assert(type);
@@ -46,8 +45,7 @@ Node::~Node()
{
}
template<typename T>
static T& get_socket_value(const Node *node, const SocketType& socket)
template<typename T> static T &get_socket_value(const Node *node, const SocketType &socket)
{
return (T &)*(((char *)node) + socket.struct_offset);
}
@@ -55,18 +53,14 @@ static T& get_socket_value(const Node *node, const SocketType& socket)
#ifndef NDEBUG
static bool is_socket_float3(const SocketType &socket)
{
return socket.type == SocketType::COLOR ||
socket.type == SocketType::POINT ||
socket.type == SocketType::VECTOR ||
socket.type == SocketType::NORMAL;
return socket.type == SocketType::COLOR || socket.type == SocketType::POINT ||
socket.type == SocketType::VECTOR || socket.type == SocketType::NORMAL;
}
static bool is_socket_array_float3(const SocketType &socket)
{
return socket.type == SocketType::COLOR_ARRAY ||
socket.type == SocketType::POINT_ARRAY ||
socket.type == SocketType::VECTOR_ARRAY ||
socket.type == SocketType::NORMAL_ARRAY;
return socket.type == SocketType::COLOR_ARRAY || socket.type == SocketType::POINT_ARRAY ||
socket.type == SocketType::VECTOR_ARRAY || socket.type == SocketType::NORMAL_ARRAY;
}
#endif
@@ -323,7 +317,10 @@ void Node::set_default_value(const SocketType& socket)
}
template<typename T>
static void copy_array(const Node *node, const SocketType& socket, const Node *other, const SocketType& other_socket)
static void copy_array(const Node *node,
const SocketType &socket,
const Node *other,
const SocketType &other_socket)
{
const array<T> *src = (const array<T> *)(((char *)other) + other_socket.struct_offset);
array<T> *dst = (array<T> *)(((char *)node) + socket.struct_offset);
@@ -336,18 +333,42 @@ void Node::copy_value(const SocketType& socket, const Node& other, const SocketT
if (socket.is_array()) {
switch (socket.type) {
case SocketType::BOOLEAN_ARRAY: copy_array<bool>(this, socket, &other, other_socket); break;
case SocketType::FLOAT_ARRAY: copy_array<float>(this, socket, &other, other_socket); break;
case SocketType::INT_ARRAY: copy_array<int>(this, socket, &other, other_socket); break;
case SocketType::COLOR_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
case SocketType::VECTOR_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
case SocketType::POINT_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
case SocketType::NORMAL_ARRAY: copy_array<float3>(this, socket, &other, other_socket); break;
case SocketType::POINT2_ARRAY: copy_array<float2>(this, socket, &other, other_socket); break;
case SocketType::STRING_ARRAY: copy_array<ustring>(this, socket, &other, other_socket); break;
case SocketType::TRANSFORM_ARRAY: copy_array<Transform>(this, socket, &other, other_socket); break;
case SocketType::NODE_ARRAY: copy_array<void*>(this, socket, &other, other_socket); break;
default: assert(0); break;
case SocketType::BOOLEAN_ARRAY:
copy_array<bool>(this, socket, &other, other_socket);
break;
case SocketType::FLOAT_ARRAY:
copy_array<float>(this, socket, &other, other_socket);
break;
case SocketType::INT_ARRAY:
copy_array<int>(this, socket, &other, other_socket);
break;
case SocketType::COLOR_ARRAY:
copy_array<float3>(this, socket, &other, other_socket);
break;
case SocketType::VECTOR_ARRAY:
copy_array<float3>(this, socket, &other, other_socket);
break;
case SocketType::POINT_ARRAY:
copy_array<float3>(this, socket, &other, other_socket);
break;
case SocketType::NORMAL_ARRAY:
copy_array<float3>(this, socket, &other, other_socket);
break;
case SocketType::POINT2_ARRAY:
copy_array<float2>(this, socket, &other, other_socket);
break;
case SocketType::STRING_ARRAY:
copy_array<ustring>(this, socket, &other, other_socket);
break;
case SocketType::TRANSFORM_ARRAY:
copy_array<Transform>(this, socket, &other, other_socket);
break;
case SocketType::NODE_ARRAY:
copy_array<void *>(this, socket, &other, other_socket);
break;
default:
assert(0);
break;
}
}
else {
@@ -376,34 +397,60 @@ static bool is_value_equal(const Node *node, const Node *other, const SocketType
bool Node::equals_value(const Node &other, const SocketType &socket) const
{
switch (socket.type) {
case SocketType::BOOLEAN: return is_value_equal<bool>(this, &other, socket);
case SocketType::FLOAT: return is_value_equal<float>(this, &other, socket);
case SocketType::INT: return is_value_equal<int>(this, &other, socket);
case SocketType::UINT: return is_value_equal<uint>(this, &other, socket);
case SocketType::COLOR: return is_value_equal<float3>(this, &other, socket);
case SocketType::VECTOR: return is_value_equal<float3>(this, &other, socket);
case SocketType::POINT: return is_value_equal<float3>(this, &other, socket);
case SocketType::NORMAL: return is_value_equal<float3>(this, &other, socket);
case SocketType::POINT2: return is_value_equal<float2>(this, &other, socket);
case SocketType::CLOSURE: return true;
case SocketType::STRING: return is_value_equal<ustring>(this, &other, socket);
case SocketType::ENUM: return is_value_equal<int>(this, &other, socket);
case SocketType::TRANSFORM: return is_value_equal<Transform>(this, &other, socket);
case SocketType::NODE: return is_value_equal<void*>(this, &other, socket);
case SocketType::BOOLEAN:
return is_value_equal<bool>(this, &other, socket);
case SocketType::FLOAT:
return is_value_equal<float>(this, &other, socket);
case SocketType::INT:
return is_value_equal<int>(this, &other, socket);
case SocketType::UINT:
return is_value_equal<uint>(this, &other, socket);
case SocketType::COLOR:
return is_value_equal<float3>(this, &other, socket);
case SocketType::VECTOR:
return is_value_equal<float3>(this, &other, socket);
case SocketType::POINT:
return is_value_equal<float3>(this, &other, socket);
case SocketType::NORMAL:
return is_value_equal<float3>(this, &other, socket);
case SocketType::POINT2:
return is_value_equal<float2>(this, &other, socket);
case SocketType::CLOSURE:
return true;
case SocketType::STRING:
return is_value_equal<ustring>(this, &other, socket);
case SocketType::ENUM:
return is_value_equal<int>(this, &other, socket);
case SocketType::TRANSFORM:
return is_value_equal<Transform>(this, &other, socket);
case SocketType::NODE:
return is_value_equal<void *>(this, &other, socket);
case SocketType::BOOLEAN_ARRAY: return is_array_equal<bool>(this, &other, socket);
case SocketType::FLOAT_ARRAY: return is_array_equal<float>(this, &other, socket);
case SocketType::INT_ARRAY: return is_array_equal<int>(this, &other, socket);
case SocketType::COLOR_ARRAY: return is_array_equal<float3>(this, &other, socket);
case SocketType::VECTOR_ARRAY: return is_array_equal<float3>(this, &other, socket);
case SocketType::POINT_ARRAY: return is_array_equal<float3>(this, &other, socket);
case SocketType::NORMAL_ARRAY: return is_array_equal<float3>(this, &other, socket);
case SocketType::POINT2_ARRAY: return is_array_equal<float2>(this, &other, socket);
case SocketType::STRING_ARRAY: return is_array_equal<ustring>(this, &other, socket);
case SocketType::TRANSFORM_ARRAY: return is_array_equal<Transform>(this, &other, socket);
case SocketType::NODE_ARRAY: return is_array_equal<void*>(this, &other, socket);
case SocketType::BOOLEAN_ARRAY:
return is_array_equal<bool>(this, &other, socket);
case SocketType::FLOAT_ARRAY:
return is_array_equal<float>(this, &other, socket);
case SocketType::INT_ARRAY:
return is_array_equal<int>(this, &other, socket);
case SocketType::COLOR_ARRAY:
return is_array_equal<float3>(this, &other, socket);
case SocketType::VECTOR_ARRAY:
return is_array_equal<float3>(this, &other, socket);
case SocketType::POINT_ARRAY:
return is_array_equal<float3>(this, &other, socket);
case SocketType::NORMAL_ARRAY:
return is_array_equal<float3>(this, &other, socket);
case SocketType::POINT2_ARRAY:
return is_array_equal<float2>(this, &other, socket);
case SocketType::STRING_ARRAY:
return is_array_equal<ustring>(this, &other, socket);
case SocketType::TRANSFORM_ARRAY:
return is_array_equal<Transform>(this, &other, socket);
case SocketType::NODE_ARRAY:
return is_array_equal<void *>(this, &other, socket);
case SocketType::UNDEFINED: return true;
case SocketType::UNDEFINED:
return true;
}
return true;
@@ -427,8 +474,7 @@ bool Node::equals(const Node& other) const
namespace {
template<typename T>
void value_hash(const Node *node, const SocketType& socket, MD5Hash& md5)
template<typename T> void value_hash(const Node *node, const SocketType &socket, MD5Hash &md5)
{
md5.append(((uint8_t *)node) + socket.struct_offset, socket.size());
}
@@ -439,8 +485,7 @@ void float3_hash(const Node *node, const SocketType& socket, MD5Hash& md5)
md5.append(((uint8_t *)node) + socket.struct_offset, sizeof(float) * 3);
}
template<typename T>
void array_hash(const Node *node, const SocketType& socket, MD5Hash& md5)
template<typename T> void array_hash(const Node *node, const SocketType &socket, MD5Hash &md5)
{
const array<T> &a = *(const array<T> *)(((char *)node) + socket.struct_offset);
for (size_t i = 0; i < a.size(); i++) {
@@ -467,42 +512,91 @@ void Node::hash(MD5Hash& md5)
md5.append(socket.name.string());
switch (socket.type) {
case SocketType::BOOLEAN: value_hash<bool>(this, socket, md5); break;
case SocketType::FLOAT: value_hash<float>(this, socket, md5); break;
case SocketType::INT: value_hash<int>(this, socket, md5); break;
case SocketType::UINT: value_hash<uint>(this, socket, md5); break;
case SocketType::COLOR: float3_hash(this, socket, md5); break;
case SocketType::VECTOR: float3_hash(this, socket, md5); break;
case SocketType::POINT: float3_hash(this, socket, md5); break;
case SocketType::NORMAL: float3_hash(this, socket, md5); break;
case SocketType::POINT2: value_hash<float2>(this, socket, md5); break;
case SocketType::CLOSURE: break;
case SocketType::STRING: value_hash<ustring>(this, socket, md5); break;
case SocketType::ENUM: value_hash<int>(this, socket, md5); break;
case SocketType::TRANSFORM: value_hash<Transform>(this, socket, md5); break;
case SocketType::NODE: value_hash<void*>(this, socket, md5); break;
case SocketType::BOOLEAN:
value_hash<bool>(this, socket, md5);
break;
case SocketType::FLOAT:
value_hash<float>(this, socket, md5);
break;
case SocketType::INT:
value_hash<int>(this, socket, md5);
break;
case SocketType::UINT:
value_hash<uint>(this, socket, md5);
break;
case SocketType::COLOR:
float3_hash(this, socket, md5);
break;
case SocketType::VECTOR:
float3_hash(this, socket, md5);
break;
case SocketType::POINT:
float3_hash(this, socket, md5);
break;
case SocketType::NORMAL:
float3_hash(this, socket, md5);
break;
case SocketType::POINT2:
value_hash<float2>(this, socket, md5);
break;
case SocketType::CLOSURE:
break;
case SocketType::STRING:
value_hash<ustring>(this, socket, md5);
break;
case SocketType::ENUM:
value_hash<int>(this, socket, md5);
break;
case SocketType::TRANSFORM:
value_hash<Transform>(this, socket, md5);
break;
case SocketType::NODE:
value_hash<void *>(this, socket, md5);
break;
case SocketType::BOOLEAN_ARRAY: array_hash<bool>(this, socket, md5); break;
case SocketType::FLOAT_ARRAY: array_hash<float>(this, socket, md5); break;
case SocketType::INT_ARRAY: array_hash<int>(this, socket, md5); break;
case SocketType::COLOR_ARRAY: float3_array_hash(this, socket, md5); break;
case SocketType::VECTOR_ARRAY: float3_array_hash(this, socket, md5); break;
case SocketType::POINT_ARRAY: float3_array_hash(this, socket, md5); break;
case SocketType::NORMAL_ARRAY: float3_array_hash(this, socket, md5); break;
case SocketType::POINT2_ARRAY: array_hash<float2>(this, socket, md5); break;
case SocketType::STRING_ARRAY: array_hash<ustring>(this, socket, md5); break;
case SocketType::TRANSFORM_ARRAY: array_hash<Transform>(this, socket, md5); break;
case SocketType::NODE_ARRAY: array_hash<void*>(this, socket, md5); break;
case SocketType::BOOLEAN_ARRAY:
array_hash<bool>(this, socket, md5);
break;
case SocketType::FLOAT_ARRAY:
array_hash<float>(this, socket, md5);
break;
case SocketType::INT_ARRAY:
array_hash<int>(this, socket, md5);
break;
case SocketType::COLOR_ARRAY:
float3_array_hash(this, socket, md5);
break;
case SocketType::VECTOR_ARRAY:
float3_array_hash(this, socket, md5);
break;
case SocketType::POINT_ARRAY:
float3_array_hash(this, socket, md5);
break;
case SocketType::NORMAL_ARRAY:
float3_array_hash(this, socket, md5);
break;
case SocketType::POINT2_ARRAY:
array_hash<float2>(this, socket, md5);
break;
case SocketType::STRING_ARRAY:
array_hash<ustring>(this, socket, md5);
break;
case SocketType::TRANSFORM_ARRAY:
array_hash<Transform>(this, socket, md5);
break;
case SocketType::NODE_ARRAY:
array_hash<void *>(this, socket, md5);
break;
case SocketType::UNDEFINED: break;
case SocketType::UNDEFINED:
break;
}
}
}
namespace {
template<typename T>
size_t array_size_in_bytes(const Node *node, const SocketType& socket)
template<typename T> size_t array_size_in_bytes(const Node *node, const SocketType &socket)
{
const array<T> &a = *(const array<T> *)(((char *)node) + socket.struct_offset);
return a.size() * sizeof(T);
@@ -566,7 +660,8 @@ size_t Node::get_total_size_in_bytes() const
total_size += array_size_in_bytes<void *>(this, socket);
break;
case SocketType::UNDEFINED: break;
case SocketType::UNDEFINED:
break;
}
}
return total_size;

View File

@@ -31,8 +31,7 @@ struct Transform;
/* Node */
struct Node
{
struct Node {
explicit Node(const NodeType *type, ustring name = ustring());
virtual ~Node();

View File

@@ -26,21 +26,46 @@ CCL_NAMESPACE_BEGIN
* Utility class for enum values. */
struct NodeEnum {
bool empty() const { return left.empty(); }
void insert(const char *x, int y) {
bool empty() const
{
return left.empty();
}
void insert(const char *x, int y)
{
left[ustring(x)] = y;
right[y] = ustring(x);
}
bool exists(ustring x) const { return left.find(x) != left.end(); }
bool exists(int y) const { return right.find(y) != right.end(); }
bool exists(ustring x) const
{
return left.find(x) != left.end();
}
bool exists(int y) const
{
return right.find(y) != right.end();
}
int operator[](const char *x) const { return left.find(ustring(x))->second; }
int operator[](ustring x) const { return left.find(x)->second; }
ustring operator[](int y) const { return right.find(y)->second; }
int operator[](const char *x) const
{
return left.find(ustring(x))->second;
}
int operator[](ustring x) const
{
return left.find(x)->second;
}
ustring operator[](int y) const
{
return right.find(y)->second;
}
unordered_map<ustring, int, ustringHash>::const_iterator begin() const { return left.begin(); }
unordered_map<ustring, int, ustringHash>::const_iterator end() const { return left.end(); }
unordered_map<ustring, int, ustringHash>::const_iterator begin() const
{
return left.begin();
}
unordered_map<ustring, int, ustringHash>::const_iterator end() const
{
return left.end();
}
private:
unordered_map<ustring, int, ustringHash> left;

View File

@@ -34,36 +34,61 @@ bool SocketType::is_array() const
size_t SocketType::size(Type type)
{
switch(type)
{
case UNDEFINED: return 0;
switch (type) {
case UNDEFINED:
return 0;
case BOOLEAN: return sizeof(bool);
case FLOAT: return sizeof(float);
case INT: return sizeof(int);
case UINT: return sizeof(uint);
case COLOR: return sizeof(float3);
case VECTOR: return sizeof(float3);
case POINT: return sizeof(float3);
case NORMAL: return sizeof(float3);
case POINT2: return sizeof(float2);
case CLOSURE: return 0;
case STRING: return sizeof(ustring);
case ENUM: return sizeof(int);
case TRANSFORM: return sizeof(Transform);
case NODE: return sizeof(void*);
case BOOLEAN:
return sizeof(bool);
case FLOAT:
return sizeof(float);
case INT:
return sizeof(int);
case UINT:
return sizeof(uint);
case COLOR:
return sizeof(float3);
case VECTOR:
return sizeof(float3);
case POINT:
return sizeof(float3);
case NORMAL:
return sizeof(float3);
case POINT2:
return sizeof(float2);
case CLOSURE:
return 0;
case STRING:
return sizeof(ustring);
case ENUM:
return sizeof(int);
case TRANSFORM:
return sizeof(Transform);
case NODE:
return sizeof(void *);
case BOOLEAN_ARRAY: return sizeof(array<bool>);
case FLOAT_ARRAY: return sizeof(array<float>);
case INT_ARRAY: return sizeof(array<int>);
case COLOR_ARRAY: return sizeof(array<float3>);
case VECTOR_ARRAY: return sizeof(array<float3>);
case POINT_ARRAY: return sizeof(array<float3>);
case NORMAL_ARRAY: return sizeof(array<float3>);
case POINT2_ARRAY: return sizeof(array<float2>);
case STRING_ARRAY: return sizeof(array<ustring>);
case TRANSFORM_ARRAY: return sizeof(array<Transform>);
case NODE_ARRAY: return sizeof(array<void*>);
case BOOLEAN_ARRAY:
return sizeof(array<bool>);
case FLOAT_ARRAY:
return sizeof(array<float>);
case INT_ARRAY:
return sizeof(array<int>);
case COLOR_ARRAY:
return sizeof(array<float3>);
case VECTOR_ARRAY:
return sizeof(array<float3>);
case POINT_ARRAY:
return sizeof(array<float3>);
case NORMAL_ARRAY:
return sizeof(array<float3>);
case POINT2_ARRAY:
return sizeof(array<float2>);
case STRING_ARRAY:
return sizeof(array<ustring>);
case TRANSFORM_ARRAY:
return sizeof(array<Transform>);
case NODE_ARRAY:
return sizeof(array<void *>);
}
assert(0);
@@ -83,34 +108,21 @@ void *SocketType::zero_default_value()
ustring SocketType::type_name(Type type)
{
static ustring names[] = {
ustring("undefined"),
static ustring names[] = {ustring("undefined"),
ustring("boolean"),
ustring("float"),
ustring("int"),
ustring("uint"),
ustring("color"),
ustring("vector"),
ustring("point"),
ustring("normal"),
ustring("point2"),
ustring("closure"),
ustring("string"),
ustring("enum"),
ustring("transform"),
ustring("node"),
ustring("boolean"), ustring("float"),
ustring("int"), ustring("uint"),
ustring("color"), ustring("vector"),
ustring("point"), ustring("normal"),
ustring("point2"), ustring("closure"),
ustring("string"), ustring("enum"),
ustring("transform"), ustring("node"),
ustring("array_boolean"),
ustring("array_float"),
ustring("array_int"),
ustring("array_color"),
ustring("array_vector"),
ustring("array_point"),
ustring("array_normal"),
ustring("array_point2"),
ustring("array_string"),
ustring("array_transform"),
ustring("array_boolean"), ustring("array_float"),
ustring("array_int"), ustring("array_color"),
ustring("array_vector"), ustring("array_point"),
ustring("array_normal"), ustring("array_point2"),
ustring("array_string"), ustring("array_transform"),
ustring("array_node")};
return names[(int)type];
@@ -123,8 +135,7 @@ bool SocketType::is_float3(Type type)
/* Node Type */
NodeType::NodeType(Type type_)
: type(type_)
NodeType::NodeType(Type type_) : type(type_)
{
}
@@ -132,9 +143,15 @@ NodeType::~NodeType()
{
}
void NodeType::register_input(ustring name, ustring ui_name, SocketType::Type type, int struct_offset,
const void *default_value, const NodeEnum *enum_values,
const NodeType **node_type, int flags, int extra_flags)
void NodeType::register_input(ustring name,
ustring ui_name,
SocketType::Type type,
int struct_offset,
const void *default_value,
const NodeEnum *enum_values,
const NodeType **node_type,
int flags,
int extra_flags)
{
SocketType socket;
socket.name = name;

View File

@@ -30,10 +30,8 @@ struct NodeType;
/* Socket Type */
struct SocketType
{
enum Type
{
struct SocketType {
enum Type {
UNDEFINED,
BOOLEAN,
@@ -102,21 +100,21 @@ struct SocketType
/* Node Type */
struct NodeType
{
enum Type {
NONE,
SHADER
};
struct NodeType {
enum Type { NONE, SHADER };
explicit NodeType(Type type = NONE);
~NodeType();
void register_input(ustring name, ustring ui_name, SocketType::Type type,
int struct_offset, const void *default_value,
void register_input(ustring name,
ustring ui_name,
SocketType::Type type,
int struct_offset,
const void *default_value,
const NodeEnum *enum_values = NULL,
const NodeType **node_type = NULL,
int flags = 0, int extra_flags = 0);
int flags = 0,
int extra_flags = 0);
void register_output(ustring name, ustring ui_name, SocketType::Type type);
const SocketType *find_input(ustring name) const;
@@ -138,16 +136,17 @@ struct NodeType
/* Node Definition Macros */
#define NODE_DECLARE \
template<typename T> \
static const NodeType *register_type(); \
template<typename T> static const NodeType *register_type(); \
static Node *create(const NodeType *type); \
static const NodeType *node_type;
#define NODE_DEFINE(structname) \
const NodeType *structname::node_type = structname::register_type<structname>(); \
Node *structname::create(const NodeType*) { return new structname(); } \
template<typename T> \
const NodeType *structname::register_type()
Node *structname::create(const NodeType *) \
{ \
return new structname(); \
} \
template<typename T> const NodeType *structname::register_type()
/* Sock Definition Macros */
@@ -157,7 +156,15 @@ const NodeType *structname::register_type()
{ \
static datatype defval = default_value; \
CHECK_TYPE(((T *)1)->name, datatype); \
type->register_input(ustring(#name), ustring(ui_name), TYPE, SOCKET_OFFSETOF(T, name), &defval, NULL, NULL, flags, ##__VA_ARGS__); \
type->register_input(ustring(#name), \
ustring(ui_name), \
TYPE, \
SOCKET_OFFSETOF(T, name), \
&defval, \
NULL, \
NULL, \
flags, \
##__VA_ARGS__); \
}
#define SOCKET_BOOLEAN(name, ui_name, default_value, ...) \
@@ -186,80 +193,186 @@ const NodeType *structname::register_type()
{ \
static int defval = default_value; \
assert(SOCKET_SIZEOF(T, name) == sizeof(int)); \
type->register_input(ustring(#name), ustring(ui_name), SocketType::ENUM, SOCKET_OFFSETOF(T, name), &defval, &values, NULL, ##__VA_ARGS__); \
type->register_input(ustring(#name), \
ustring(ui_name), \
SocketType::ENUM, \
SOCKET_OFFSETOF(T, name), \
&defval, \
&values, \
NULL, \
##__VA_ARGS__); \
}
#define SOCKET_NODE(name, ui_name, node_type, ...) \
{ \
static Node *defval = NULL; \
assert(SOCKET_SIZEOF(T, name) == sizeof(Node *)); \
type->register_input(ustring(#name), ustring(ui_name), SocketType::NODE, SOCKET_OFFSETOF(T, name), &defval, NULL, node_type, ##__VA_ARGS__); \
type->register_input(ustring(#name), \
ustring(ui_name), \
SocketType::NODE, \
SOCKET_OFFSETOF(T, name), \
&defval, \
NULL, \
node_type, \
##__VA_ARGS__); \
}
#define SOCKET_BOOLEAN_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<bool>, SocketType::BOOLEAN_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<bool>, SocketType::BOOLEAN_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_INT_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<int>, SocketType::INT_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_FLOAT_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float>, SocketType::FLOAT_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float>, SocketType::FLOAT_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_COLOR_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::COLOR_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::COLOR_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_VECTOR_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::VECTOR_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::VECTOR_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_POINT_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::POINT_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::POINT_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_NORMAL_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float3>, SocketType::NORMAL_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float3>, SocketType::NORMAL_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_POINT2_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<float2>, SocketType::POINT2_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<float2>, SocketType::POINT2_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_STRING_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<ustring>, SocketType::STRING_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, array<ustring>, SocketType::STRING_ARRAY, 0, ##__VA_ARGS__)
#define SOCKET_TRANSFORM_ARRAY(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, array<Transform>, SocketType::TRANSFORM_ARRAY, 0, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
array<Transform>, \
SocketType::TRANSFORM_ARRAY, \
0, \
##__VA_ARGS__)
#define SOCKET_NODE_ARRAY(name, ui_name, node_type, ...) \
{ \
static Node *defval = NULL; \
assert(SOCKET_SIZEOF(T, name) == sizeof(Node *)); \
type->register_input(ustring(#name), ustring(ui_name), SocketType::NODE_ARRAY, SOCKET_OFFSETOF(T, name), &defval, NULL, node_type, ##__VA_ARGS__); \
type->register_input(ustring(#name), \
ustring(ui_name), \
SocketType::NODE_ARRAY, \
SOCKET_OFFSETOF(T, name), \
&defval, \
NULL, \
node_type, \
##__VA_ARGS__); \
}
#define SOCKET_IN_BOOLEAN(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, bool, SocketType::BOOLEAN, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
bool, \
SocketType::BOOLEAN, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_INT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, int, SocketType::INT, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE( \
name, ui_name, default_value, int, SocketType::INT, SocketType::LINKABLE, ##__VA_ARGS__)
#define SOCKET_IN_FLOAT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float, SocketType::FLOAT, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float, \
SocketType::FLOAT, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_COLOR(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::COLOR, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::COLOR, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_VECTOR(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::VECTOR, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::VECTOR, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_POINT(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::POINT, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::POINT, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_NORMAL(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, float3, SocketType::NORMAL, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
float3, \
SocketType::NORMAL, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_STRING(name, ui_name, default_value, ...) \
SOCKET_DEFINE(name, ui_name, default_value, ustring, SocketType::STRING, SocketType::LINKABLE, ##__VA_ARGS__)
SOCKET_DEFINE(name, \
ui_name, \
default_value, \
ustring, \
SocketType::STRING, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_IN_CLOSURE(name, ui_name, ...) \
type->register_input(ustring(#name), ustring(ui_name), SocketType::CLOSURE, 0, NULL, NULL, NULL, SocketType::LINKABLE, ##__VA_ARGS__)
type->register_input(ustring(#name), \
ustring(ui_name), \
SocketType::CLOSURE, \
0, \
NULL, \
NULL, \
NULL, \
SocketType::LINKABLE, \
##__VA_ARGS__)
#define SOCKET_OUT_BOOLEAN(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::BOOLEAN); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::BOOLEAN); \
}
#define SOCKET_OUT_INT(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::INT); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::INT); \
}
#define SOCKET_OUT_FLOAT(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::FLOAT); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::FLOAT); \
}
#define SOCKET_OUT_COLOR(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::COLOR); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::COLOR); \
}
#define SOCKET_OUT_VECTOR(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::VECTOR); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::VECTOR); \
}
#define SOCKET_OUT_POINT(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::POINT); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::POINT); \
}
#define SOCKET_OUT_NORMAL(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::NORMAL); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::NORMAL); \
}
#define SOCKET_OUT_CLOSURE(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::CLOSURE); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::CLOSURE); \
}
#define SOCKET_OUT_STRING(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::STRING); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::STRING); \
}
#define SOCKET_OUT_ENUM(name, ui_name) \
{ type->register_output(ustring(#name), ustring(ui_name), SocketType::ENUM); }
{ \
type->register_output(ustring(#name), ustring(ui_name), SocketType::ENUM); \
}
CCL_NAMESPACE_END

View File

@@ -72,15 +72,12 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
continue;
}
switch(socket.type)
{
case SocketType::BOOLEAN:
{
switch (socket.type) {
case SocketType::BOOLEAN: {
node->set(socket, xml_read_boolean(attr.value()));
break;
}
case SocketType::BOOLEAN_ARRAY:
{
case SocketType::BOOLEAN_ARRAY: {
vector<string> tokens;
string_split(tokens, attr.value());
@@ -91,30 +88,25 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
node->set(socket, value);
break;
}
case SocketType::FLOAT:
{
case SocketType::FLOAT: {
node->set(socket, (float)atof(attr.value()));
break;
}
case SocketType::FLOAT_ARRAY:
{
case SocketType::FLOAT_ARRAY: {
array<float> value;
xml_read_float_array<1>(value, attr);
node->set(socket, value);
break;
}
case SocketType::INT:
{
case SocketType::INT: {
node->set(socket, (int)atoi(attr.value()));
break;
}
case SocketType::UINT:
{
case SocketType::UINT: {
node->set(socket, (uint)atoi(attr.value()));
break;
}
case SocketType::INT_ARRAY:
{
case SocketType::INT_ARRAY: {
vector<string> tokens;
string_split(tokens, attr.value());
@@ -129,8 +121,7 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
case SocketType::COLOR:
case SocketType::VECTOR:
case SocketType::POINT:
case SocketType::NORMAL:
{
case SocketType::NORMAL: {
array<float3> value;
xml_read_float_array<3>(value, attr);
if (value.size() == 1) {
@@ -141,15 +132,13 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
case SocketType::COLOR_ARRAY:
case SocketType::VECTOR_ARRAY:
case SocketType::POINT_ARRAY:
case SocketType::NORMAL_ARRAY:
{
case SocketType::NORMAL_ARRAY: {
array<float3> value;
xml_read_float_array<3>(value, attr);
node->set(socket, value);
break;
}
case SocketType::POINT2:
{
case SocketType::POINT2: {
array<float2> value;
xml_read_float_array<2>(value, attr);
if (value.size() == 1) {
@@ -157,31 +146,30 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
}
break;
}
case SocketType::POINT2_ARRAY:
{
case SocketType::POINT2_ARRAY: {
array<float2> value;
xml_read_float_array<2>(value, attr);
node->set(socket, value);
break;
}
case SocketType::STRING:
{
case SocketType::STRING: {
node->set(socket, attr.value());
break;
}
case SocketType::ENUM:
{
case SocketType::ENUM: {
ustring value(attr.value());
if (socket.enum_values->exists(value)) {
node->set(socket, value);
}
else {
fprintf(stderr, "Unknown value \"%s\" for attribute \"%s\".\n", value.c_str(), socket.name.c_str());
fprintf(stderr,
"Unknown value \"%s\" for attribute \"%s\".\n",
value.c_str(),
socket.name.c_str());
}
break;
}
case SocketType::STRING_ARRAY:
{
case SocketType::STRING_ARRAY: {
vector<string> tokens;
string_split(tokens, attr.value());
@@ -193,8 +181,7 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
node->set(socket, value);
break;
}
case SocketType::TRANSFORM:
{
case SocketType::TRANSFORM: {
array<Transform> value;
xml_read_float_array<12>(value, attr);
if (value.size() == 1) {
@@ -202,42 +189,35 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
}
break;
}
case SocketType::TRANSFORM_ARRAY:
{
case SocketType::TRANSFORM_ARRAY: {
array<Transform> value;
xml_read_float_array<12>(value, attr);
node->set(socket, value);
break;
}
case SocketType::NODE:
{
case SocketType::NODE: {
ustring value(attr.value());
map<ustring, Node *>::iterator it = reader.node_map.find(value);
if(it != reader.node_map.end())
{
if (it != reader.node_map.end()) {
Node *value_node = it->second;
if (value_node->type == *(socket.node_type))
node->set(socket, it->second);
}
break;
}
case SocketType::NODE_ARRAY:
{
case SocketType::NODE_ARRAY: {
vector<string> tokens;
string_split(tokens, attr.value());
array<Node *> value;
value.resize(tokens.size());
for(size_t i = 0; i < value.size(); i++)
{
for (size_t i = 0; i < value.size(); i++) {
map<ustring, Node *>::iterator it = reader.node_map.find(ustring(tokens[i]));
if(it != reader.node_map.end())
{
if (it != reader.node_map.end()) {
Node *value_node = it->second;
value[i] = (value_node->type == *(socket.node_type)) ? value_node : NULL;
}
else
{
else {
value[i] = NULL;
}
}
@@ -273,15 +253,12 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
xml_attribute attr = xml_node.append_attribute(socket.name.c_str());
switch(socket.type)
{
case SocketType::BOOLEAN:
{
switch (socket.type) {
case SocketType::BOOLEAN: {
attr = xml_write_boolean(node->get_bool(socket));
break;
}
case SocketType::BOOLEAN_ARRAY:
{
case SocketType::BOOLEAN_ARRAY: {
std::stringstream ss;
const array<bool> &value = node->get_bool_array(socket);
for (size_t i = 0; i < value.size(); i++) {
@@ -292,13 +269,11 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
attr = ss.str().c_str();
break;
}
case SocketType::FLOAT:
{
case SocketType::FLOAT: {
attr = (double)node->get_float(socket);
break;
}
case SocketType::FLOAT_ARRAY:
{
case SocketType::FLOAT_ARRAY: {
std::stringstream ss;
const array<float> &value = node->get_float_array(socket);
for (size_t i = 0; i < value.size(); i++) {
@@ -310,18 +285,15 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
attr = ss.str().c_str();
break;
}
case SocketType::INT:
{
case SocketType::INT: {
attr = node->get_int(socket);
break;
}
case SocketType::UINT:
{
case SocketType::UINT: {
attr = node->get_uint(socket);
break;
}
case SocketType::INT_ARRAY:
{
case SocketType::INT_ARRAY: {
std::stringstream ss;
const array<int> &value = node->get_int_array(socket);
for (size_t i = 0; i < value.size(); i++) {
@@ -336,21 +308,21 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
case SocketType::COLOR:
case SocketType::VECTOR:
case SocketType::POINT:
case SocketType::NORMAL:
{
case SocketType::NORMAL: {
float3 value = node->get_float3(socket);
attr = string_printf("%g %g %g", (double)value.x, (double)value.y, (double)value.z).c_str();
attr =
string_printf("%g %g %g", (double)value.x, (double)value.y, (double)value.z).c_str();
break;
}
case SocketType::COLOR_ARRAY:
case SocketType::VECTOR_ARRAY:
case SocketType::POINT_ARRAY:
case SocketType::NORMAL_ARRAY:
{
case SocketType::NORMAL_ARRAY: {
std::stringstream ss;
const array<float3> &value = node->get_float3_array(socket);
for (size_t i = 0; i < value.size(); i++) {
ss << string_printf("%g %g %g", (double)value[i].x, (double)value[i].y, (double)value[i].z);
ss << string_printf(
"%g %g %g", (double)value[i].x, (double)value[i].y, (double)value[i].z);
if (i != value.size() - 1) {
ss << " ";
}
@@ -358,14 +330,12 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
attr = ss.str().c_str();
break;
}
case SocketType::POINT2:
{
case SocketType::POINT2: {
float2 value = node->get_float2(socket);
attr = string_printf("%g %g", (double)value.x, (double)value.y).c_str();
break;
}
case SocketType::POINT2_ARRAY:
{
case SocketType::POINT2_ARRAY: {
std::stringstream ss;
const array<float2> &value = node->get_float2_array(socket);
for (size_t i = 0; i < value.size(); i++) {
@@ -378,13 +348,11 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
break;
}
case SocketType::STRING:
case SocketType::ENUM:
{
case SocketType::ENUM: {
attr = node->get_string(socket).c_str();
break;
}
case SocketType::STRING_ARRAY:
{
case SocketType::STRING_ARRAY: {
std::stringstream ss;
const array<ustring> &value = node->get_string_array(socket);
for (size_t i = 0; i < value.size(); i++) {
@@ -396,26 +364,32 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
attr = ss.str().c_str();
break;
}
case SocketType::TRANSFORM:
{
case SocketType::TRANSFORM: {
Transform tfm = node->get_transform(socket);
std::stringstream ss;
for (int i = 0; i < 3; i++) {
ss << string_printf("%g %g %g %g ", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
ss << string_printf("%g %g %g %g ",
(double)tfm[i][0],
(double)tfm[i][1],
(double)tfm[i][2],
(double)tfm[i][3]);
}
ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
attr = ss.str().c_str();
break;
}
case SocketType::TRANSFORM_ARRAY:
{
case SocketType::TRANSFORM_ARRAY: {
std::stringstream ss;
const array<Transform> &value = node->get_transform_array(socket);
for (size_t j = 0; j < value.size(); j++) {
const Transform &tfm = value[j];
for (int i = 0; i < 3; i++) {
ss << string_printf("%g %g %g %g ", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
ss << string_printf("%g %g %g %g ",
(double)tfm[i][0],
(double)tfm[i][1],
(double)tfm[i][2],
(double)tfm[i][3]);
}
ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
if (j != value.size() - 1) {
@@ -425,16 +399,14 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
attr = ss.str().c_str();
break;
}
case SocketType::NODE:
{
case SocketType::NODE: {
Node *value = node->get_node(socket);
if (value) {
attr = value->name.c_str();
}
break;
}
case SocketType::NODE_ARRAY:
{
case SocketType::NODE_ARRAY: {
std::stringstream ss;
const array<Node *> &value = node->get_node_array(socket);
for (size_t i = 0; i < value.size(); i++) {

View File

@@ -199,7 +199,8 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
RTCRayHit ray_hit;
kernel_embree_setup_rayhit(ray, ray_hit, visibility);
rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
if(ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID && ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID &&
ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
return true;
}
@@ -277,14 +278,8 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
const int object_flag = kernel_tex_fetch(__object_flag, local_object);
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
Transform ob_itfm;
rtc_ray.tfar = bvh_instance_motion_push(kg,
local_object,
&ray,
&P,
&dir,
&idir,
ray.t,
&ob_itfm);
rtc_ray.tfar = bvh_instance_motion_push(
kg, local_object, &ray, &P, &dir, &idir, ray.t, &ob_itfm);
/* bvh_instance_motion_push() returns the inverse transform but
* it's not needed here. */
(void)ob_itfm;
@@ -307,20 +302,10 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
# endif /* __EMBREE__ */
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_local_motion(kg,
&ray,
local_isect,
local_object,
lcg_state,
max_hits);
return bvh_intersect_local_motion(kg, &ray, local_isect, local_object, lcg_state, max_hits);
}
# endif /* __OBJECT_MOTION__ */
return bvh_intersect_local(kg,
&ray,
local_isect,
local_object,
lcg_state,
max_hits);
return bvh_intersect_local(kg, &ray, local_isect, local_object, lcg_state, max_hits);
}
#endif
@@ -360,52 +345,27 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg,
if (kernel_data.bvh.have_motion) {
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair_motion(kg,
ray,
isect,
visibility,
max_hits,
num_hits);
return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, visibility, max_hits, num_hits);
}
# endif /* __HAIR__ */
return bvh_intersect_shadow_all_motion(kg,
ray,
isect,
visibility,
max_hits,
num_hits);
return bvh_intersect_shadow_all_motion(kg, ray, isect, visibility, max_hits, num_hits);
}
# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
return bvh_intersect_shadow_all_hair(kg,
ray,
isect,
visibility,
max_hits,
num_hits);
return bvh_intersect_shadow_all_hair(kg, ray, isect, visibility, max_hits, num_hits);
}
# endif /* __HAIR__ */
# ifdef __INSTANCING__
if (kernel_data.bvh.have_instancing) {
return bvh_intersect_shadow_all_instancing(kg,
ray,
isect,
visibility,
max_hits,
num_hits);
return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits);
}
# endif /* __INSTANCING__ */
return bvh_intersect_shadow_all(kg,
ray,
isect,
visibility,
max_hits,
num_hits);
return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits);
}
#endif /* __SHADOW_RECORD_ALL__ */
@@ -479,7 +439,6 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
}
#endif /* __VOLUME_RECORD_ALL__ */
/* Ray offset to avoid self intersection.
*
* This function should be used to compute a modified ray start position for

View File

@@ -59,8 +59,7 @@ struct CCLIntersectContext {
}
};
class IntersectContext
{
class IntersectContext {
public:
IntersectContext(CCLIntersectContext *ctx)
{
@@ -71,7 +70,9 @@ public:
CCLIntersectContext *userRayExt;
};
ccl_device_inline void kernel_embree_setup_ray(const Ray& ray, RTCRay& rtc_ray, const uint visibility)
ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
RTCRay &rtc_ray,
const uint visibility)
{
rtc_ray.org_x = ray.P.x;
rtc_ray.org_y = ray.P.y;
@@ -85,14 +86,19 @@ ccl_device_inline void kernel_embree_setup_ray(const Ray& ray, RTCRay& rtc_ray,
rtc_ray.mask = visibility;
}
ccl_device_inline void kernel_embree_setup_rayhit(const Ray& ray, RTCRayHit& rayhit, const uint visibility)
ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
RTCRayHit &rayhit,
const uint visibility)
{
kernel_embree_setup_ray(ray, rayhit.ray, visibility);
rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
rayhit.hit.primID = RTC_INVALID_GEOMETRY_ID;
}
ccl_device_inline void kernel_embree_convert_hit(KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect)
ccl_device_inline void kernel_embree_convert_hit(KernelGlobals *kg,
const RTCRay *ray,
const RTCHit *hit,
Intersection *isect)
{
bool is_hair = hit->geomID & 1;
isect->u = is_hair ? hit->u : 1.0f - hit->v - hit->u;
@@ -100,25 +106,36 @@ ccl_device_inline void kernel_embree_convert_hit(KernelGlobals *kg, const RTCRay
isect->t = ray->tfar;
isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + kernel_tex_fetch(__object_node, hit->instID[0]/2);
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
isect->prim = hit->primID +
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) +
kernel_tex_fetch(__object_node, hit->instID[0] / 2);
isect->object = hit->instID[0] / 2;
}
else {
isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
isect->object = OBJECT_NONE;
}
isect->type = kernel_tex_fetch(__prim_type, isect->prim);
}
ccl_device_inline void kernel_embree_convert_local_hit(KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int local_object_id)
ccl_device_inline void kernel_embree_convert_local_hit(KernelGlobals *kg,
const RTCRay *ray,
const RTCHit *hit,
Intersection *isect,
int local_object_id)
{
isect->u = 1.0f - hit->v - hit->u;
isect->v = hit->u;
isect->t = ray->tfar;
isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, local_object_id * 2));
isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + kernel_tex_fetch(__object_node, local_object_id);
RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
rtcGetGeometry(kernel_data.bvh.scene, local_object_id * 2));
isect->prim = hit->primID +
(intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) +
kernel_tex_fetch(__object_node, local_object_id);
isect->object = local_object_id;
isect->type = kernel_tex_fetch(__prim_type, isect->prim);
}

View File

@@ -81,14 +81,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
isect_t = bvh_instance_motion_push(kg,
local_object,
ray,
&P,
&dir,
&idir,
isect_t,
&ob_itfm);
isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
#else
isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
#endif
@@ -257,29 +250,14 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
switch (kernel_data.bvh.bvh_layout) {
#ifdef __KERNEL_AVX2__
case BVH_LAYOUT_BVH8:
return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
ray,
local_isect,
local_object,
lcg_state,
max_hits);
return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
#endif
#ifdef __QBVH__
case BVH_LAYOUT_BVH4:
return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
ray,
local_isect,
local_object,
lcg_state,
max_hits);
return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
#endif
case BVH_LAYOUT_BVH2:
return BVH_FUNCTION_FULL_NAME(BVH)(kg,
ray,
local_isect,
local_object,
lcg_state,
max_hits);
return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
kernel_assert(!"Should not happen");
return false;

View File

@@ -71,8 +71,7 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
(((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
# else
return ((c0max >= c0min)? 1: 0) |
((c1max >= c1min)? 2: 0);
return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
# endif
}
@@ -133,13 +132,11 @@ ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
(((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
# else
return ((c0max >= c0min)? 1: 0) |
((c1max >= c1min)? 2: 0);
return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
# endif
}
ccl_device_forceinline bool bvh_unaligned_node_intersect_child(
KernelGlobals *kg,
ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals *kg,
const float3 P,
const float3 dir,
const float t,
@@ -165,8 +162,7 @@ ccl_device_forceinline bool bvh_unaligned_node_intersect_child(
return tnear <= tfar;
}
ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(
KernelGlobals *kg,
ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(KernelGlobals *kg,
const float3 P,
const float3 dir,
const float t,
@@ -274,23 +270,10 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
{
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect(kg,
P,
dir,
idir,
t,
node_addr,
visibility,
dist);
return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist);
}
else {
return bvh_aligned_node_intersect(kg,
P,
idir,
t,
node_addr,
visibility,
dist);
return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
}
}
@@ -307,33 +290,17 @@ ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
{
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect_robust(kg,
P,
dir,
idir,
t,
difl,
extmax,
node_addr,
visibility,
dist);
return bvh_unaligned_node_intersect_robust(
kg, P, dir, idir, t, difl, extmax, node_addr, visibility, dist);
}
else {
return bvh_aligned_node_intersect_robust(kg,
P,
idir,
t,
difl,
extmax,
node_addr,
visibility,
dist);
return bvh_aligned_node_intersect_robust(
kg, P, idir, t, difl, extmax, node_addr, visibility, dist);
}
}
#else /* !defined(__KERNEL_SSE2__) */
int ccl_device_forceinline bvh_aligned_node_intersect(
KernelGlobals *kg,
int ccl_device_forceinline bvh_aligned_node_intersect(KernelGlobals *kg,
const float3 &P,
const float3 &dir,
const ssef &tsplat,
@@ -376,8 +343,7 @@ int ccl_device_forceinline bvh_aligned_node_intersect(
# endif
}
ccl_device_forceinline int bvh_aligned_node_intersect_robust(
KernelGlobals *kg,
ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
const float3 &P,
const float3 &dir,
const ssef &tsplat,
@@ -454,22 +420,13 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
float3 aligned_dir0 = transform_direction(&space0, dir),
aligned_dir1 = transform_direction(&space1, dir);
float3 aligned_P0 = transform_point(&space0, P),
aligned_P1 = transform_point(&space1, P);
float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
nrdir1 = -bvh_inverse_direction(aligned_dir1);
ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
aligned_P1.x * nrdir1.x,
0.0f, 0.0f),
lower_y = ssef(aligned_P0.y * nrdir0.y,
aligned_P1.y * nrdir1.y,
0.0f,
0.0f),
lower_z = ssef(aligned_P0.z * nrdir0.z,
aligned_P1.z * nrdir1.z,
0.0f,
0.0f);
ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
@@ -516,22 +473,13 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg
float3 aligned_dir0 = transform_direction(&space0, dir),
aligned_dir1 = transform_direction(&space1, dir);
float3 aligned_P0 = transform_point(&space0, P),
aligned_P1 = transform_point(&space1, P);
float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
nrdir1 = -bvh_inverse_direction(aligned_dir1);
ssef lower_x = ssef(aligned_P0.x * nrdir0.x,
aligned_P1.x * nrdir1.x,
0.0f, 0.0f),
lower_y = ssef(aligned_P0.y * nrdir0.y,
aligned_P1.y * nrdir1.y,
0.0f,
0.0f),
lower_z = ssef(aligned_P0.z * nrdir0.z,
aligned_P1.z * nrdir1.z,
0.0f,
0.0f);
ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
@@ -587,26 +535,12 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
{
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect(kg,
P,
dir,
isect_near,
isect_far,
node_addr,
visibility,
dist);
return bvh_unaligned_node_intersect(
kg, P, dir, isect_near, isect_far, node_addr, visibility, dist);
}
else {
return bvh_aligned_node_intersect(kg,
P,
dir,
tsplat,
Psplat,
idirsplat,
shufflexyz,
node_addr,
visibility,
dist);
return bvh_aligned_node_intersect(
kg, P, dir, tsplat, Psplat, idirsplat, shufflexyz, node_addr, visibility, dist);
}
}
@@ -627,15 +561,8 @@ ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg,
{
float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
return bvh_unaligned_node_intersect_robust(kg,
P,
dir,
isect_near,
isect_far,
difl,
node_addr,
visibility,
dist);
return bvh_unaligned_node_intersect_robust(
kg, P, dir, isect_near, isect_far, difl, node_addr, visibility, dist);
}
else {
return bvh_aligned_node_intersect_robust(kg,

View File

@@ -196,25 +196,13 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
switch (p_type) {
case PRIMITIVE_TRIANGLE: {
hit = triangle_intersect(kg,
isect_array,
P,
dir,
visibility,
object,
prim_addr);
hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
hit = motion_triangle_intersect(kg,
isect_array,
P,
dir,
ray->time,
visibility,
object,
prim_addr);
hit = motion_triangle_intersect(
kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
break;
}
#endif
@@ -233,7 +221,8 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
ray->time,
curve_type,
NULL,
0, 0);
0,
0);
}
else {
hit = curve_intersect(kg,
@@ -246,7 +235,8 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
ray->time,
curve_type,
NULL,
0, 0);
0,
0);
}
break;
}
@@ -400,29 +390,14 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
switch (kernel_data.bvh.bvh_layout) {
#ifdef __KERNEL_AVX2__
case BVH_LAYOUT_BVH8:
return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
ray,
isect_array,
visibility,
max_hits,
num_hits);
return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
#endif
#ifdef __QBVH__
case BVH_LAYOUT_BVH4:
return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
ray,
isect_array,
visibility,
max_hits,
num_hits);
return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
#endif
case BVH_LAYOUT_BVH2:
return BVH_FUNCTION_FULL_NAME(BVH)(kg,
ray,
isect_array,
visibility,
max_hits,
num_hits);
return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
}
kernel_assert(!"Should not happen");
return false;

View File

@@ -47,7 +47,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
Intersection *isect,
const uint visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
, uint *lcg_state,
,
uint *lcg_state,
float difl,
float extmax
#endif
@@ -236,14 +237,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(triangle_intersect(kg,
isect,
P,
dir,
visibility,
object,
prim_addr))
{
if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
/* shadow ray early termination */
#if defined(__KERNEL_SSE2__)
if (visibility & PATH_RAY_SHADOW_OPAQUE)
@@ -265,15 +259,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect(kg,
isect,
P,
dir,
ray->time,
visibility,
object,
prim_addr))
{
if (motion_triangle_intersect(
kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
/* shadow ray early termination */
# if defined(__KERNEL_SSE2__)
if (visibility & PATH_RAY_SHADOW_OPAQUE)
@@ -353,7 +340,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
# if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
isect->t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
@@ -422,7 +410,8 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
Intersection *isect,
const uint visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
, uint *lcg_state,
,
uint *lcg_state,
float difl,
float extmax
#endif
@@ -436,7 +425,8 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
isect,
visibility
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
, lcg_state,
,
lcg_state,
difl,
extmax
# endif
@@ -449,7 +439,8 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
isect,
visibility
# if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
, lcg_state,
,
lcg_state,
difl,
extmax
# endif
@@ -461,7 +452,8 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
isect,
visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
, lcg_state,
,
lcg_state,
difl,
extmax
#endif

View File

@@ -187,18 +187,14 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
triangle_intersect(kg,
isect,
P,
dir,
visibility,
object,
prim_addr);
triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
}
break;
}
@@ -208,19 +204,15 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
motion_triangle_intersect(kg,
isect,
P,
dir,
ray->time,
visibility,
object,
prim_addr);
motion_triangle_intersect(
kg, isect, P, dir, ray->time, visibility, object, prim_addr);
}
break;
}
@@ -237,7 +229,8 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
int object_flag = kernel_tex_fetch(__object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
isect->t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
@@ -314,23 +307,14 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
switch (kernel_data.bvh.bvh_layout) {
#ifdef __KERNEL_AVX2__
case BVH_LAYOUT_BVH8:
return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
ray,
isect,
visibility);
return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect, visibility);
#endif
#ifdef __QBVH__
case BVH_LAYOUT_BVH4:
return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
ray,
isect,
visibility);
return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect, visibility);
#endif
case BVH_LAYOUT_BVH2:
return BVH_FUNCTION_FULL_NAME(BVH)(kg,
ray,
isect,
visibility);
return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
}
kernel_assert(!"Should not happen");
return false;

View File

@@ -192,18 +192,14 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
hit = triangle_intersect(kg,
isect_array,
P,
dir,
visibility,
object,
prim_addr);
hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
if (hit) {
/* Move on to next entry in intersections array. */
isect_array++;
@@ -218,7 +214,8 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
Transform itfm = object_fetch_transform(
kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
# endif
for (int i = 0; i < num_hits_in_instance; i++) {
@@ -238,19 +235,15 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* only primitives from volume object */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
hit = motion_triangle_intersect(kg,
isect_array,
P,
dir,
ray->time,
visibility,
object,
prim_addr);
hit = motion_triangle_intersect(
kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
if (hit) {
/* Move on to next entry in intersections array. */
isect_array++;
@@ -265,7 +258,8 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
Transform itfm = object_fetch_transform(
kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
# endif
for (int i = 0; i < num_hits_in_instance; i++) {
@@ -292,7 +286,8 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
int object_flag = kernel_tex_fetch(__object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
isect_t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
# else
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
@@ -390,26 +385,14 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
switch (kernel_data.bvh.bvh_layout) {
#ifdef __KERNEL_AVX2__
case BVH_LAYOUT_BVH8:
return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
ray,
isect_array,
max_hits,
visibility);
return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, max_hits, visibility);
#endif
#ifdef __QBVH__
case BVH_LAYOUT_BVH4:
return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
ray,
isect_array,
max_hits,
visibility);
return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, max_hits, visibility);
#endif
case BVH_LAYOUT_BVH2:
return BVH_FUNCTION_FULL_NAME(BVH)(kg,
ray,
isect_array,
max_hits,
visibility);
return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility);
}
kernel_assert(!"Should not happen");
return 0;

View File

@@ -58,14 +58,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
isect_t = bvh_instance_motion_push(kg,
local_object,
ray,
&P,
&dir,
&idir,
isect_t,
&ob_itfm);
isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
#else
isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
#endif
@@ -89,9 +82,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
@@ -112,8 +103,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);
@@ -363,8 +358,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
prim_addr,
isect_t,
lcg_state,
max_hits))
{
max_hits)) {
return true;
}
}
@@ -385,8 +379,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
prim_addr,
isect_t,
lcg_state,
max_hits))
{
max_hits)) {
return true;
}
}

View File

@@ -31,9 +31,12 @@ ccl_device_inline void obvh_near_far_idx_calc(const float3& idir,
{
#ifdef __KERNEL_SSE__
*near_x = 0; *far_x = 1;
*near_y = 2; *far_y = 3;
*near_z = 4; *far_z = 5;
*near_x = 0;
*far_x = 1;
*near_y = 2;
*far_y = 3;
*near_z = 4;
*far_z = 5;
const size_t mask = movemask(ssef(idir.m128));
@@ -41,18 +44,41 @@ ccl_device_inline void obvh_near_far_idx_calc(const float3& idir,
const int mask_y = (mask & 2) >> 1;
const int mask_z = (mask & 4) >> 2;
*near_x += mask_x; *far_x -= mask_x;
*near_y += mask_y; *far_y -= mask_y;
*near_z += mask_z; *far_z -= mask_z;
*near_x += mask_x;
*far_x -= mask_x;
*near_y += mask_y;
*far_y -= mask_y;
*near_z += mask_z;
*far_z -= mask_z;
#else
if(idir.x >= 0.0f) { *near_x = 0; *far_x = 1; } else { *near_x = 1; *far_x = 0; }
if(idir.y >= 0.0f) { *near_y = 2; *far_y = 3; } else { *near_y = 3; *far_y = 2; }
if(idir.z >= 0.0f) { *near_z = 4; *far_z = 5; } else { *near_z = 5; *far_z = 4; }
if (idir.x >= 0.0f) {
*near_x = 0;
*far_x = 1;
}
else {
*near_x = 1;
*far_x = 0;
}
if (idir.y >= 0.0f) {
*near_y = 2;
*far_y = 3;
}
else {
*near_y = 3;
*far_y = 2;
}
if (idir.z >= 0.0f) {
*near_z = 4;
*far_z = 5;
}
else {
*near_z = 5;
*far_z = 4;
}
#endif
}
ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a,
OBVHStackItem *ccl_restrict b)
ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a, OBVHStackItem *ccl_restrict b)
{
OBVHStackItem tmp = *a;
*a = *b;
@@ -63,9 +89,15 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
OBVHStackItem *ccl_restrict s2,
OBVHStackItem *ccl_restrict s3)
{
if(s2->dist < s1->dist) { obvh_item_swap(s2, s1); }
if(s3->dist < s2->dist) { obvh_item_swap(s3, s2); }
if(s2->dist < s1->dist) { obvh_item_swap(s2, s1); }
if (s2->dist < s1->dist) {
obvh_item_swap(s2, s1);
}
if (s3->dist < s2->dist) {
obvh_item_swap(s3, s2);
}
if (s2->dist < s1->dist) {
obvh_item_swap(s2, s1);
}
}
ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
@@ -73,11 +105,21 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
OBVHStackItem *ccl_restrict s3,
OBVHStackItem *ccl_restrict s4)
{
if(s2->dist < s1->dist) { obvh_item_swap(s2, s1); }
if(s4->dist < s3->dist) { obvh_item_swap(s4, s3); }
if(s3->dist < s1->dist) { obvh_item_swap(s3, s1); }
if(s4->dist < s2->dist) { obvh_item_swap(s4, s2); }
if(s3->dist < s2->dist) { obvh_item_swap(s3, s2); }
if (s2->dist < s1->dist) {
obvh_item_swap(s2, s1);
}
if (s4->dist < s3->dist) {
obvh_item_swap(s4, s3);
}
if (s3->dist < s1->dist) {
obvh_item_swap(s3, s1);
}
if (s4->dist < s2->dist) {
obvh_item_swap(s4, s2);
}
if (s3->dist < s2->dist) {
obvh_item_swap(s3, s2);
}
}
ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
@@ -210,12 +252,18 @@ ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg
{
const int offset = node_addr + 2;
#ifdef __KERNEL_AVX2__
const avxf tnear_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+near_x*2), idir.x, org_idir.x);
const avxf tnear_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+near_y*2), idir.y, org_idir.y);
const avxf tnear_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+near_z*2), idir.z, org_idir.z);
const avxf tfar_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+far_x*2), idir.x, org_idir.x);
const avxf tfar_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+far_y*2), idir.y, org_idir.y);
const avxf tfar_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+far_z*2), idir.z, org_idir.z);
const avxf tnear_x = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, org_idir.x);
const avxf tnear_y = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, org_idir.y);
const avxf tnear_z = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, org_idir.z);
const avxf tfar_x = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, org_idir.x);
const avxf tfar_y = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, org_idir.y);
const avxf tfar_z = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, org_idir.z);
const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
@@ -228,8 +276,7 @@ ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg
#endif
}
ccl_device_inline int obvh_aligned_node_intersect_robust(
KernelGlobals *ccl_restrict kg,
ccl_device_inline int obvh_aligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
const avxf &isect_near,
const avxf &isect_far,
#ifdef __KERNEL_AVX2__
@@ -250,12 +297,18 @@ ccl_device_inline int obvh_aligned_node_intersect_robust(
{
const int offset = node_addr + 2;
#ifdef __KERNEL_AVX2__
const avxf tnear_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, P_idir.x);
const avxf tfar_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, P_idir.x);
const avxf tnear_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, P_idir.y);
const avxf tfar_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, P_idir.y);
const avxf tnear_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, P_idir.z);
const avxf tfar_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, P_idir.z);
const avxf tnear_x = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, P_idir.x);
const avxf tfar_x = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, P_idir.x);
const avxf tnear_y = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, P_idir.y);
const avxf tfar_y = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, P_idir.y);
const avxf tnear_z = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, P_idir.z);
const avxf tfar_z = msub(
kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, P_idir.z);
const float round_down = 1.0f - difl;
const float round_up = 1.0f + difl;
@@ -272,8 +325,7 @@ ccl_device_inline int obvh_aligned_node_intersect_robust(
/* Unaligned nodes intersection */
ccl_device_inline int obvh_unaligned_node_intersect(
KernelGlobals *ccl_restrict kg,
ccl_device_inline int obvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
const avxf &isect_near,
const avxf &isect_far,
#ifdef __KERNEL_AVX2__
@@ -317,16 +369,13 @@ ccl_device_inline int obvh_unaligned_node_intersect(
aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
const avxf neg_one(-1.0f);
const avxf nrdir_x = neg_one / aligned_dir_x,
nrdir_y = neg_one / aligned_dir_y,
const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
nrdir_z = neg_one / aligned_dir_z;
const avxf tlower_x = aligned_P_x * nrdir_x,
tlower_y = aligned_P_y * nrdir_y,
const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
tlower_z = aligned_P_z * nrdir_z;
const avxf tupper_x = tlower_x - nrdir_x,
tupper_y = tlower_y - nrdir_y,
const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
tupper_z = tlower_z - nrdir_z;
const avxf tnear_x = min(tlower_x, tupper_x);
@@ -342,8 +391,7 @@ ccl_device_inline int obvh_unaligned_node_intersect(
return movemask(vmask);
}
ccl_device_inline int obvh_unaligned_node_intersect_robust(
KernelGlobals *ccl_restrict kg,
ccl_device_inline int obvh_unaligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
const avxf &isect_near,
const avxf &isect_far,
#ifdef __KERNEL_AVX2__
@@ -388,16 +436,13 @@ ccl_device_inline int obvh_unaligned_node_intersect_robust(
aligned_P_z = P.x * tfm_z_x + P.y * tfm_z_y + P.z * tfm_z_z + tfm_t_z;
const avxf neg_one(-1.0f);
const avxf nrdir_x = neg_one / aligned_dir_x,
nrdir_y = neg_one / aligned_dir_y,
const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
nrdir_z = neg_one / aligned_dir_z;
const avxf tlower_x = aligned_P_x * nrdir_x,
tlower_y = aligned_P_y * nrdir_y,
const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
tlower_z = aligned_P_z * nrdir_z;
const avxf tupper_x = tlower_x - nrdir_x,
tupper_y = tlower_y - nrdir_y,
const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
tupper_z = tlower_z - nrdir_z;
const float round_down = 1.0f - difl;
@@ -422,8 +467,7 @@ ccl_device_inline int obvh_unaligned_node_intersect_robust(
* They'll check node type and call appropriate intersection code.
*/
ccl_device_inline int obvh_node_intersect(
KernelGlobals *ccl_restrict kg,
ccl_device_inline int obvh_node_intersect(KernelGlobals *ccl_restrict kg,
const avxf &isect_near,
const avxf &isect_far,
#ifdef __KERNEL_AVX2__
@@ -453,8 +497,12 @@ ccl_device_inline int obvh_node_intersect(
org,
dir,
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
dist);
}
@@ -468,15 +516,18 @@ ccl_device_inline int obvh_node_intersect(
org,
#endif
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
dist);
}
}
ccl_device_inline int obvh_node_intersect_robust(
KernelGlobals *ccl_restrict kg,
ccl_device_inline int obvh_node_intersect_robust(KernelGlobals *ccl_restrict kg,
const avxf &isect_near,
const avxf &isect_far,
#ifdef __KERNEL_AVX2__
@@ -507,8 +558,12 @@ ccl_device_inline int obvh_node_intersect_robust(
P,
dir,
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
difl,
dist);
@@ -523,8 +578,12 @@ ccl_device_inline int obvh_node_intersect_robust(
P,
#endif
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
difl,
dist);

View File

@@ -86,9 +86,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
@@ -103,8 +101,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
|| ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0)
#endif
#if BVH_FEATURE(BVH_MOTION)
|| UNLIKELY(ray->time < inodes.y)
|| UNLIKELY(ray->time > inodes.z)
|| UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
#endif
) {
/* Pop. */
@@ -128,8 +125,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);
@@ -383,14 +384,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
int prim_count = prim_addr2 - prim_addr;
if (prim_count < 3) {
while (prim_addr < prim_addr2) {
kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
int hit = triangle_intersect(kg,
isect_array,
P,
dir,
PATH_RAY_SHADOW,
object,
prim_addr);
kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) ==
p_type);
int hit = triangle_intersect(
kg, isect_array, P, dir, PATH_RAY_SHADOW, object, prim_addr);
/* Shadow ray early termination. */
if (hit) {
/* detect if this surface has a shader with transparent shadows */
@@ -435,8 +432,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
prim_addr++;
} //while
} else {
kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) == p_type);
}
else {
kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) ==
p_type);
#if BVH_FEATURE(BVH_INSTANCING)
int *nhiptr = &num_hits_in_instance;
@@ -467,9 +466,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
#ifdef __SHADOW_TRICKS__
uint tri_object = (object == OBJECT_NONE)
? kernel_tex_fetch(__prim_object, prim_addr)
: object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
if (tri_object == skip_object) {
++prim_addr;
continue;
@@ -486,14 +485,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
hit = motion_triangle_intersect(kg,
isect_array,
P,
dir,
ray->time,
PATH_RAY_SHADOW,
object,
prim_addr);
hit = motion_triangle_intersect(
kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, prim_addr);
break;
}
#endif
@@ -512,7 +505,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
ray->time,
curve_type,
NULL,
0, 0);
0,
0);
}
else {
hit = curve_intersect(kg,
@@ -525,7 +519,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
ray->time,
curve_type,
NULL,
0, 0);
0,
0);
}
break;
}
@@ -596,9 +591,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
num_hits_in_instance = 0;
isect_array->t = isect_t;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect_t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
@@ -617,7 +610,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
node_addr = kernel_tex_fetch(__object_node, object);
}
}
#endif /* FEATURE(BVH_INSTANCING) */
@@ -651,9 +643,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
isect_t = tmax;
isect_array->t = isect_t;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect_t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));

View File

@@ -37,7 +37,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
Intersection *isect,
const uint visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
,uint *lcg_state,
,
uint *lcg_state,
float difl,
float extmax
#endif
@@ -87,9 +88,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
do {
@@ -100,14 +99,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
if (UNLIKELY(node_dist > isect->t)
#if BVH_FEATURE(BVH_MOTION)
|| UNLIKELY(ray->time < inodes.y)
|| UNLIKELY(ray->time > inodes.z)
|| UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
#endif
#ifdef __VISIBILITY_FLAG__
|| (__float_as_uint(inodes.x) & visibility) == 0
#endif
)
{
) {
/* Pop. */
node_addr = traversal_stack[stack_ptr].addr;
node_dist = traversal_stack[stack_ptr].dist;
@@ -140,8 +137,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
dir4,
# endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
difl,
&dist);
@@ -162,8 +163,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);
}
@@ -397,7 +402,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
continue;
}
node_addr = traversal_stack[stack_ptr].addr;
node_dist = traversal_stack[stack_ptr].dist;
--stack_ptr;
@@ -408,8 +412,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
#ifdef __VISIBILITY_FLAG__
if(UNLIKELY((node_dist > isect->t) ||
((__float_as_uint(leaf.z) & visibility) == 0)))
if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
#else
if (UNLIKELY((node_dist > isect->t)))
#endif
@@ -441,14 +444,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(triangle_intersect(kg,
isect,
P,
dir,
visibility,
object,
prim_addr))
{
if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
tfar = avxf(isect->t);
/* Shadow ray early termination. */
if (visibility == PATH_RAY_SHADOW_OPAQUE) {
@@ -470,8 +466,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
0,
0,
NULL,
0.0f))
{
0.0f)) {
tfar = avxf(isect->t);
if (visibility == PATH_RAY_SHADOW_OPAQUE) {
return true;
@@ -485,15 +480,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect(kg,
isect,
P,
dir,
ray->time,
visibility,
object,
prim_addr))
{
if (motion_triangle_intersect(
kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
tfar = avxf(isect->t);
/* Shadow ray early termination. */
if (visibility == PATH_RAY_SHADOW_OPAQUE) {
@@ -559,14 +547,13 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
# if BVH_FEATURE(BVH_MOTION)
qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
qbvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
# else
qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
# endif
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
@@ -604,9 +591,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));

View File

@@ -74,9 +74,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
@@ -108,8 +106,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);
@@ -363,7 +365,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
@@ -378,13 +382,16 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, prim_addr);
motion_triangle_intersect(
kg, isect, P, dir, ray->time, visibility, object, prim_addr);
}
break;
}
@@ -398,14 +405,13 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
int object_flag = kernel_tex_fetch(__object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
isect->t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
@@ -447,9 +453,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));

View File

@@ -78,9 +78,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
@@ -112,8 +110,12 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);
@@ -368,7 +370,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
@@ -406,13 +410,16 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
hit = motion_triangle_intersect(
kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
if (hit) {
/* Move on to next entry in intersections array. */
isect_array++;
@@ -449,14 +456,13 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
int object_flag = kernel_tex_fetch(__object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
isect_t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
# else
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect_t);
idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
# if BVH_FEATURE(BVH_HAIR)
@@ -518,9 +524,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
isect_t = tmax;
isect_array->t = isect_t;
obvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = avxf(isect_t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));

View File

@@ -67,14 +67,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
isect_t = bvh_instance_motion_push(kg,
local_object,
ray,
&P,
&dir,
&idir,
isect_t,
&ob_itfm);
isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
#else
isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
#endif
@@ -98,9 +91,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
qbvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
@@ -121,8 +112,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);

View File

@@ -31,9 +31,12 @@ ccl_device_inline void qbvh_near_far_idx_calc(const float3& idir,
{
#ifdef __KERNEL_SSE__
*near_x = 0; *far_x = 1;
*near_y = 2; *far_y = 3;
*near_z = 4; *far_z = 5;
*near_x = 0;
*far_x = 1;
*near_y = 2;
*far_y = 3;
*near_z = 4;
*far_z = 5;
const size_t mask = movemask(ssef(idir.m128));
@@ -41,21 +44,44 @@ ccl_device_inline void qbvh_near_far_idx_calc(const float3& idir,
const int mask_y = (mask & 2) >> 1;
const int mask_z = (mask & 4) >> 2;
*near_x += mask_x; *far_x -= mask_x;
*near_y += mask_y; *far_y -= mask_y;
*near_z += mask_z; *far_z -= mask_z;
*near_x += mask_x;
*far_x -= mask_x;
*near_y += mask_y;
*far_y -= mask_y;
*near_z += mask_z;
*far_z -= mask_z;
#else
if(idir.x >= 0.0f) { *near_x = 0; *far_x = 1; } else { *near_x = 1; *far_x = 0; }
if(idir.y >= 0.0f) { *near_y = 2; *far_y = 3; } else { *near_y = 3; *far_y = 2; }
if(idir.z >= 0.0f) { *near_z = 4; *far_z = 5; } else { *near_z = 5; *far_z = 4; }
if (idir.x >= 0.0f) {
*near_x = 0;
*far_x = 1;
}
else {
*near_x = 1;
*far_x = 0;
}
if (idir.y >= 0.0f) {
*near_y = 2;
*far_y = 3;
}
else {
*near_y = 3;
*far_y = 2;
}
if (idir.z >= 0.0f) {
*near_z = 4;
*far_z = 5;
}
else {
*near_z = 5;
*far_z = 4;
}
#endif
}
/* TOOD(sergey): Investigate if using intrinsics helps for both
* stack item swap and float comparison.
*/
ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a,
QBVHStackItem *ccl_restrict b)
ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a, QBVHStackItem *ccl_restrict b)
{
QBVHStackItem tmp = *a;
*a = *b;
@@ -66,9 +92,15 @@ ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
QBVHStackItem *ccl_restrict s2,
QBVHStackItem *ccl_restrict s3)
{
if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
if (s2->dist < s1->dist) {
qbvh_item_swap(s2, s1);
}
if (s3->dist < s2->dist) {
qbvh_item_swap(s3, s2);
}
if (s2->dist < s1->dist) {
qbvh_item_swap(s2, s1);
}
}
ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
@@ -76,11 +108,21 @@ ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
QBVHStackItem *ccl_restrict s3,
QBVHStackItem *ccl_restrict s4)
{
if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); }
if(s3->dist < s1->dist) { qbvh_item_swap(s3, s1); }
if(s4->dist < s2->dist) { qbvh_item_swap(s4, s2); }
if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
if (s2->dist < s1->dist) {
qbvh_item_swap(s2, s1);
}
if (s4->dist < s3->dist) {
qbvh_item_swap(s4, s3);
}
if (s3->dist < s1->dist) {
qbvh_item_swap(s3, s1);
}
if (s4->dist < s2->dist) {
qbvh_item_swap(s4, s2);
}
if (s3->dist < s2->dist) {
qbvh_item_swap(s3, s2);
}
}
/* Axis-aligned nodes intersection */
@@ -106,9 +148,12 @@ static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
{
const int offset = node_addr + 1;
#ifdef __KERNEL_AVX2__
const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x);
const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y);
const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z);
const ssef tnear_x = msub(
kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x), idir.x, org_idir.x);
const ssef tnear_y = msub(
kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y), idir.y, org_idir.y);
const ssef tnear_z = msub(
kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z), idir.z, org_idir.z);
const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x), idir.x, org_idir.x);
const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y), idir.y, org_idir.y);
const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z), idir.z, org_idir.z);
@@ -136,8 +181,7 @@ static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
return mask;
}
ccl_device_inline int qbvh_aligned_node_intersect_robust(
KernelGlobals *ccl_restrict kg,
ccl_device_inline int qbvh_aligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
const ssef &isect_near,
const ssef &isect_far,
#ifdef __KERNEL_AVX2__
@@ -184,8 +228,7 @@ ccl_device_inline int qbvh_aligned_node_intersect_robust(
/* Unaligned nodes intersection */
ccl_device_inline int qbvh_unaligned_node_intersect(
KernelGlobals *ccl_restrict kg,
ccl_device_inline int qbvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
const ssef &isect_near,
const ssef &isect_far,
#ifdef __KERNEL_AVX2__
@@ -229,16 +272,13 @@ ccl_device_inline int qbvh_unaligned_node_intersect(
aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
const ssef nrdir_x = neg_one / aligned_dir_x,
nrdir_y = neg_one / aligned_dir_y,
const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
nrdir_z = neg_one / aligned_dir_z;
const ssef tlower_x = aligned_P_x * nrdir_x,
tlower_y = aligned_P_y * nrdir_y,
const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
tlower_z = aligned_P_z * nrdir_z;
const ssef tupper_x = tlower_x - nrdir_x,
tupper_y = tlower_y - nrdir_y,
const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
tupper_z = tlower_z - nrdir_z;
#ifdef __KERNEL_SSE41__
@@ -268,8 +308,7 @@ ccl_device_inline int qbvh_unaligned_node_intersect(
#endif
}
ccl_device_inline int qbvh_unaligned_node_intersect_robust(
KernelGlobals *ccl_restrict kg,
ccl_device_inline int qbvh_unaligned_node_intersect_robust(KernelGlobals *ccl_restrict kg,
const ssef &isect_near,
const ssef &isect_far,
#ifdef __KERNEL_AVX2__
@@ -314,16 +353,13 @@ ccl_device_inline int qbvh_unaligned_node_intersect_robust(
aligned_P_z = P.x * tfm_z_x + P.y * tfm_z_y + P.z * tfm_z_z + tfm_t_z;
const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
const ssef nrdir_x = neg_one / aligned_dir_x,
nrdir_y = neg_one / aligned_dir_y,
const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
nrdir_z = neg_one / aligned_dir_z;
const ssef tlower_x = aligned_P_x * nrdir_x,
tlower_y = aligned_P_y * nrdir_y,
const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
tlower_z = aligned_P_z * nrdir_z;
const ssef tupper_x = tlower_x - nrdir_x,
tupper_y = tlower_y - nrdir_y,
const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
tupper_z = tlower_z - nrdir_z;
const float round_down = 1.0f - difl;
@@ -356,8 +392,7 @@ ccl_device_inline int qbvh_unaligned_node_intersect_robust(
* They'll check node type and call appropriate intersection code.
*/
ccl_device_inline int qbvh_node_intersect(
KernelGlobals *ccl_restrict kg,
ccl_device_inline int qbvh_node_intersect(KernelGlobals *ccl_restrict kg,
const ssef &isect_near,
const ssef &isect_far,
#ifdef __KERNEL_AVX2__
@@ -387,8 +422,12 @@ ccl_device_inline int qbvh_node_intersect(
org,
dir,
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
dist);
}
@@ -402,15 +441,18 @@ ccl_device_inline int qbvh_node_intersect(
org,
#endif
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
dist);
}
}
ccl_device_inline int qbvh_node_intersect_robust(
KernelGlobals *ccl_restrict kg,
ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *ccl_restrict kg,
const ssef &isect_near,
const ssef &isect_far,
#ifdef __KERNEL_AVX2__
@@ -441,8 +483,12 @@ ccl_device_inline int qbvh_node_intersect_robust(
P,
dir,
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
difl,
dist);
@@ -457,8 +503,12 @@ ccl_device_inline int qbvh_node_intersect_robust(
P,
#endif
idir,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
difl,
dist);

View File

@@ -65,7 +65,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*num_hits = 0;
isect_array->t = tmax;
#if BVH_FEATURE(BVH_INSTANCING)
int num_hits_in_instance = 0;
#endif
@@ -87,9 +86,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
qbvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
@@ -104,8 +101,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
|| ((__float_as_uint(inodes.x) & visibility) == 0)
#endif
#if BVH_FEATURE(BVH_MOTION)
|| UNLIKELY(ray->time < inodes.y)
|| UNLIKELY(ray->time > inodes.z)
|| UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
#endif
) {
/* Pop. */
@@ -128,8 +124,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);
@@ -270,25 +270,13 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
switch (p_type) {
case PRIMITIVE_TRIANGLE: {
hit = triangle_intersect(kg,
isect_array,
P,
dir,
visibility,
object,
prim_addr);
hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
break;
}
#if BVH_FEATURE(BVH_MOTION)
case PRIMITIVE_MOTION_TRIANGLE: {
hit = motion_triangle_intersect(kg,
isect_array,
P,
dir,
ray->time,
visibility,
object,
prim_addr);
hit = motion_triangle_intersect(
kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
break;
}
#endif
@@ -307,7 +295,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
ray->time,
curve_type,
NULL,
0, 0);
0,
0);
}
else {
hit = curve_intersect(kg,
@@ -320,7 +309,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
ray->time,
curve_type,
NULL,
0, 0);
0,
0);
}
break;
}
@@ -390,9 +380,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
num_hits_in_instance = 0;
isect_array->t = isect_t;
qbvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = ssef(isect_t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
@@ -411,7 +399,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
node_addr = kernel_tex_fetch(__object_node, object);
}
}
#endif /* FEATURE(BVH_INSTANCING) */
@@ -445,9 +432,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
isect_t = tmax;
isect_array->t = isect_t;
qbvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = ssef(isect_t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));

View File

@@ -37,7 +37,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
Intersection *isect,
const uint visibility
#if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
,uint *lcg_state,
,
uint *lcg_state,
float difl,
float extmax
#endif
@@ -95,9 +96,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
qbvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
@@ -109,14 +108,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if (UNLIKELY(node_dist > isect->t)
#if BVH_FEATURE(BVH_MOTION)
|| UNLIKELY(ray->time < inodes.y)
|| UNLIKELY(ray->time > inodes.z)
|| UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
#endif
#ifdef __VISIBILITY_FLAG__
|| (__float_as_uint(inodes.x) & visibility) == 0
#endif
)
{
) {
/* Pop. */
node_addr = traversal_stack[stack_ptr].addr;
node_dist = traversal_stack[stack_ptr].dist;
@@ -149,8 +146,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
dir4,
# endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
difl,
&dist);
@@ -171,8 +172,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);
}
@@ -292,8 +297,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
#ifdef __VISIBILITY_FLAG__
if(UNLIKELY((node_dist > isect->t) ||
((__float_as_uint(leaf.z) & visibility) == 0)))
if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
#else
if (UNLIKELY((node_dist > isect->t)))
#endif
@@ -324,13 +328,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(triangle_intersect(kg,
isect,
P,
dir,
visibility,
object,
prim_addr)) {
if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
tfar = ssef(isect->t);
/* Shadow ray early termination. */
if (visibility & PATH_RAY_SHADOW_OPAQUE) {
@@ -345,14 +343,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
BVH_DEBUG_NEXT_INTERSECTION();
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
if(motion_triangle_intersect(kg,
isect,
P,
dir,
ray->time,
visibility,
object,
prim_addr)) {
if (motion_triangle_intersect(
kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
tfar = ssef(isect->t);
/* Shadow ray early termination. */
if (visibility & PATH_RAY_SHADOW_OPAQUE) {
@@ -418,14 +410,13 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
# if BVH_FEATURE(BVH_MOTION)
qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
qbvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
# else
qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
# endif
qbvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = ssef(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
@@ -463,9 +454,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
qbvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = ssef(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));

View File

@@ -80,9 +80,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
qbvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
@@ -114,8 +112,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);
@@ -250,7 +252,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
@@ -265,13 +269,16 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, prim_addr);
motion_triangle_intersect(
kg, isect, P, dir, ray->time, visibility, object, prim_addr);
}
break;
}
@@ -285,14 +292,13 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int object_flag = kernel_tex_fetch(__object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
isect->t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
# else
isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
qbvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = ssef(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
@@ -334,9 +340,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
# endif
qbvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = ssef(isect->t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));

View File

@@ -84,9 +84,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* Offsets to select the side that becomes the lower or upper bound. */
int near_x, near_y, near_z;
int far_x, far_y, far_z;
qbvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
/* Traversal loop. */
do {
@@ -118,8 +116,12 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
dir4,
#endif
idir4,
near_x, near_y, near_z,
far_x, far_y, far_z,
near_x,
near_y,
near_z,
far_x,
far_y,
far_z,
node_addr,
&dist);
@@ -255,7 +257,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
@@ -276,7 +280,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
Transform itfm = object_fetch_transform(
kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
# endif
for (int i = 0; i < num_hits_in_instance; i++) {
@@ -295,13 +300,16 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
for (; prim_addr < prim_addr2; prim_addr++) {
kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
/* Only primitives from volume object. */
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
uint tri_object = (object == OBJECT_NONE) ?
kernel_tex_fetch(__prim_object, prim_addr) :
object;
int object_flag = kernel_tex_fetch(__object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
continue;
}
/* Intersect ray against primitive. */
hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
hit = motion_triangle_intersect(
kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
if (hit) {
/* Move on to next entry in intersections array. */
isect_array++;
@@ -316,7 +324,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
# if BVH_FEATURE(BVH_MOTION)
float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
# else
Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
Transform itfm = object_fetch_transform(
kg, object, OBJECT_INVERSE_TRANSFORM);
float t_fac = 1.0f / len(transform_direction(&itfm, dir));
# endif
for (int i = 0; i < num_hits_in_instance; i++) {
@@ -340,14 +349,13 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int object_flag = kernel_tex_fetch(__object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
# if BVH_FEATURE(BVH_MOTION)
isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
isect_t = bvh_instance_motion_push(
kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
# else
isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
# endif
qbvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = ssef(isect_t);
idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
# if BVH_FEATURE(BVH_HAIR)
@@ -409,9 +417,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
isect_t = tmax;
isect_array->t = isect_t;
qbvh_near_far_idx_calc(idir,
&near_x, &near_y, &near_z,
&far_x, &far_y, &far_z);
qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
tfar = ssef(isect_t);
# if BVH_FEATURE(BVH_HAIR)
dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));

View File

@@ -68,7 +68,10 @@ ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 wei
}
#ifdef __OSL__
ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd, int size, float3 weight, void *data)
ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd,
int size,
float3 weight,
void *data)
{
ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);

Some files were not shown because too many files have changed in this diff Show More