Merge branch 'master' into blender2.8
This commit is contained in:
@@ -43,7 +43,7 @@ if(WIN32)
|
||||
set(OPENSUBDIV_EXTRA_ARGS
|
||||
${OPENSUBDIV_EXTRA_ARGS}
|
||||
-DNO_CUDA=${OPENSUBDIV_CUDA}
|
||||
-DCLEW_INCLUDE_DIR=${LIBDIR}/clew/include/cl
|
||||
-DCLEW_INCLUDE_DIR=${LIBDIR}/clew/include/CL
|
||||
-DCLEW_LIBRARY=${LIBDIR}/clew/lib/clew${LIBEXT}
|
||||
-DCUEW_INCLUDE_DIR=${LIBDIR}/cuew/include
|
||||
-DCUEW_LIBRARY=${LIBDIR}/cuew/lib/cuew${LIBEXT}
|
||||
@@ -54,6 +54,7 @@ else()
|
||||
${OPENSUBDIV_EXTRA_ARGS}
|
||||
-DNO_CUDA=ON
|
||||
-DCUEW_INCLUDE_DIR=${LIBDIR}/cuew/include
|
||||
-DCLEW_INCLUDE_DIR=${LIBDIR}/clew/include/CL
|
||||
-DCLEW_LIBRARY=${LIBDIR}/clew/lib/static/${LIBPREFIX}clew${LIBEXT}
|
||||
)
|
||||
endif()
|
||||
|
||||
@@ -23,6 +23,14 @@ set(TBB_EXTRA_ARGS
|
||||
-DTBB_BUILD_STATIC=On
|
||||
)
|
||||
|
||||
if(TBB_VERSION MATCHES 2018)
|
||||
set(TBB_VS_VERSION vs2013)
|
||||
elseif(TBB_VERSION MATCHES 2017)
|
||||
set(TBB_VS_VERSION vs2012)
|
||||
else()
|
||||
set(TBB_VS_VERSION vs2010)
|
||||
endif()
|
||||
|
||||
# CMake script for TBB from https://github.com/wjakob/tbb/blob/master/CMakeLists.txt
|
||||
ExternalProject_Add(external_tbb
|
||||
URL ${TBB_URI}
|
||||
@@ -30,7 +38,7 @@ ExternalProject_Add(external_tbb
|
||||
URL_HASH MD5=${TBB_HASH}
|
||||
PREFIX ${BUILD_DIR}/tbb
|
||||
PATCH_COMMAND COMMAND ${CMAKE_COMMAND} -E copy ${PATCH_DIR}/cmakelists_tbb.txt ${BUILD_DIR}/tbb/src/external_tbb/CMakeLists.txt &&
|
||||
${CMAKE_COMMAND} -E copy ${BUILD_DIR}/tbb/src/external_tbb/build/vs2010/version_string.ver ${BUILD_DIR}/tbb/src/external_tbb/src/tbb/version_string.ver
|
||||
${CMAKE_COMMAND} -E copy ${BUILD_DIR}/tbb/src/external_tbb/build/${TBB_VS_VERSION}/version_string.ver ${BUILD_DIR}/tbb/src/external_tbb/src/tbb/version_string.ver
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/tbb ${DEFAULT_CMAKE_FLAGS} ${TBB_EXTRA_ARGS}
|
||||
INSTALL_DIR ${LIBDIR}/tbb
|
||||
)
|
||||
|
||||
@@ -137,9 +137,16 @@ set(PYTHON_SHORT_VERSION_NO_DOTS 36)
|
||||
set(PYTHON_URI https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tar.xz)
|
||||
set(PYTHON_HASH 2c68846471994897278364fc18730dd9)
|
||||
|
||||
set(TBB_VERSION 44_20160128)
|
||||
set(TBB_URI https://www.threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb${TBB_VERSION}oss_src_0.tgz)
|
||||
set(TBB_HASH 9d8a4cdf43496f1b3f7c473a5248e5cc)
|
||||
if(UNIX AND NOT APPLE)
|
||||
# Needed to be compatible with GCC 7, other platforms can upgrade later
|
||||
set(TBB_VERSION 2017_U7)
|
||||
set(TBB_URI https://github.com/01org/tbb/archive/${TBB_VERSION}.tar.gz)
|
||||
set(TBB_HASH 364f2a4b80e978f38a69cbf7c466b898)
|
||||
else()
|
||||
set(TBB_VERSION 44_20160128)
|
||||
set(TBB_URI https://www.threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb${TBB_VERSION}oss_src_0.tgz)
|
||||
set(TBB_HASH 9d8a4cdf43496f1b3f7c473a5248e5cc)
|
||||
endif()
|
||||
|
||||
set(OPENVDB_VERSION 3.1.0)
|
||||
set(OPENVDB_URI https://github.com/dreamworksanimation/openvdb/archive/v${OPENVDB_VERSION}.tar.gz)
|
||||
|
||||
@@ -753,8 +753,8 @@ OIIO_SOURCE=( "https://github.com/OpenImageIO/oiio/archive/Release-$OIIO_VERSION
|
||||
OIIO_SOURCE_REPO=( "https://github.com/OpenImageIO/oiio.git" )
|
||||
OIIO_SOURCE_REPO_UID="c9e67275a0b248ead96152f6d2221cc0c0f278a4"
|
||||
|
||||
LLVM_SOURCE=( "http://llvm.org/releases/$LLVM_VERSION/llvm-$LLVM_VERSION.src.tar.gz" )
|
||||
LLVM_CLANG_SOURCE=( "http://llvm.org/releases/$LLVM_VERSION/clang-$LLVM_VERSION.src.tar.gz" "http://llvm.org/releases/$LLVM_VERSION/cfe-$LLVM_VERSION.src.tar.gz" )
|
||||
LLVM_SOURCE=( "http://releases.llvm.org/$LLVM_VERSION/llvm-$LLVM_VERSION.src.tar.gz" )
|
||||
LLVM_CLANG_SOURCE=( "http://releases.llvm.org/$LLVM_VERSION/clang-$LLVM_VERSION.src.tar.gz" "http://llvm.org/releases/$LLVM_VERSION/cfe-$LLVM_VERSION.src.tar.gz" )
|
||||
|
||||
OSL_USE_REPO=false
|
||||
OSL_SOURCE=( "https://github.com/imageworks/OpenShadingLanguage/archive/Release-$OSL_VERSION.tar.gz" )
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
# include <cudaGL.h>
|
||||
#endif
|
||||
#include "util/util_debug.h"
|
||||
#include "util/util_foreach.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_map.h"
|
||||
#include "util/util_md5.h"
|
||||
@@ -128,6 +129,12 @@ public:
|
||||
CUdevice cuDevice;
|
||||
CUcontext cuContext;
|
||||
CUmodule cuModule, cuFilterModule;
|
||||
size_t device_texture_headroom;
|
||||
size_t device_working_headroom;
|
||||
bool move_texture_to_host;
|
||||
size_t map_host_used;
|
||||
size_t map_host_limit;
|
||||
int can_map_host;
|
||||
int cuDevId;
|
||||
int cuDevArchitecture;
|
||||
bool first_error;
|
||||
@@ -135,12 +142,15 @@ public:
|
||||
|
||||
struct CUDAMem {
|
||||
CUDAMem()
|
||||
: texobject(0), array(0) {}
|
||||
: texobject(0), array(0), map_host_pointer(0), free_map_host(false) {}
|
||||
|
||||
CUtexObject texobject;
|
||||
CUarray array;
|
||||
void *map_host_pointer;
|
||||
bool free_map_host;
|
||||
};
|
||||
map<device_memory*, CUDAMem> cuda_mem_map;
|
||||
typedef map<device_memory*, CUDAMem> CUDAMemMap;
|
||||
CUDAMemMap cuda_mem_map;
|
||||
|
||||
struct PixelMem {
|
||||
GLuint cuPBO;
|
||||
@@ -240,6 +250,13 @@ public:
|
||||
|
||||
need_texture_info = false;
|
||||
|
||||
device_texture_headroom = 0;
|
||||
device_working_headroom = 0;
|
||||
move_texture_to_host = false;
|
||||
map_host_limit = 0;
|
||||
map_host_used = 0;
|
||||
can_map_host = 0;
|
||||
|
||||
/* Intialize CUDA. */
|
||||
if(cuda_error(cuInit(0)))
|
||||
return;
|
||||
@@ -248,9 +265,16 @@ public:
|
||||
if(cuda_error(cuDeviceGet(&cuDevice, cuDevId)))
|
||||
return;
|
||||
|
||||
/* CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
|
||||
/* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
|
||||
* CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
|
||||
* so we can predict which memory to map to host. */
|
||||
cuda_assert(cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
|
||||
|
||||
unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX;
|
||||
if(can_map_host) {
|
||||
ctx_flags |= CU_CTX_MAP_HOST;
|
||||
init_host_memory();
|
||||
}
|
||||
|
||||
/* Create context. */
|
||||
CUresult result;
|
||||
@@ -611,6 +635,50 @@ public:
|
||||
VLOG(1) << "Local memory reserved "
|
||||
<< string_human_readable_number(free_before - free_after) << " bytes. ("
|
||||
<< string_human_readable_size(free_before - free_after) << ")";
|
||||
|
||||
#if 0
|
||||
/* For testing mapped host memory, fill up device memory. */
|
||||
const size_t keep_mb = 1024;
|
||||
|
||||
while(free_after > keep_mb * 1024 * 1024LL) {
|
||||
CUdeviceptr tmp;
|
||||
cuda_assert(cuMemAlloc(&tmp, 10 * 1024 * 1024LL));
|
||||
cuMemGetInfo(&free_after, &total);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void init_host_memory()
|
||||
{
|
||||
/* Limit amount of host mapped memory, because allocating too much can
|
||||
* cause system instability. Leave at least half or 4 GB of system
|
||||
* memory free, whichever is smaller. */
|
||||
size_t default_limit = 4 * 1024 * 1024 * 1024LL;
|
||||
size_t system_ram = system_physical_ram();
|
||||
|
||||
if(system_ram > 0) {
|
||||
if(system_ram / 2 > default_limit) {
|
||||
map_host_limit = system_ram - default_limit;
|
||||
}
|
||||
else {
|
||||
map_host_limit = system_ram / 2;
|
||||
}
|
||||
}
|
||||
else {
|
||||
VLOG(1) << "Mapped host memory disabled, failed to get system RAM";
|
||||
map_host_limit = 0;
|
||||
}
|
||||
|
||||
/* Amount of device memory to keep is free after texture memory
|
||||
* and working memory allocations respectively. We set the working
|
||||
* memory limit headroom lower so that some space is left after all
|
||||
* texture memory allocations. */
|
||||
device_working_headroom = 32 * 1024 * 1024LL; // 32MB
|
||||
device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
|
||||
|
||||
VLOG(1) << "Mapped host memory limit set to "
|
||||
<< string_human_readable_number(map_host_limit) << " bytes. ("
|
||||
<< string_human_readable_size(map_host_limit) << ")";
|
||||
}
|
||||
|
||||
void load_texture_info()
|
||||
@@ -621,20 +689,167 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
CUDAMem *generic_alloc(device_memory& mem, size_t padding = 0)
|
||||
void move_textures_to_host(size_t size, bool for_texture)
|
||||
{
|
||||
/* Signal to reallocate textures in host memory only. */
|
||||
move_texture_to_host = true;
|
||||
|
||||
while(size > 0) {
|
||||
/* Find suitable memory allocation to move. */
|
||||
device_memory *max_mem = NULL;
|
||||
size_t max_size = 0;
|
||||
bool max_is_image = false;
|
||||
|
||||
foreach(CUDAMemMap::value_type& pair, cuda_mem_map) {
|
||||
device_memory& mem = *pair.first;
|
||||
CUDAMem *cmem = &pair.second;
|
||||
|
||||
bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
|
||||
bool is_image = is_texture && (mem.data_height > 1);
|
||||
|
||||
/* Can't move this type of memory. */
|
||||
if(!is_texture || cmem->array) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Already in host memory. */
|
||||
if(cmem->map_host_pointer) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* For other textures, only move image textures. */
|
||||
if(for_texture && !is_image) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Try to move largest allocation, prefer moving images. */
|
||||
if(is_image > max_is_image ||
|
||||
(is_image == max_is_image && mem.device_size > max_size)) {
|
||||
max_is_image = is_image;
|
||||
max_size = mem.device_size;
|
||||
max_mem = &mem;
|
||||
}
|
||||
}
|
||||
|
||||
/* Move to host memory. This part is mutex protected since
|
||||
* multiple CUDA devices could be moving the memory. The
|
||||
* first one will do it, and the rest will adopt the pointer. */
|
||||
if(max_mem) {
|
||||
VLOG(1) << "Move memory from device to host: " << max_mem->name;
|
||||
|
||||
static thread_mutex move_mutex;
|
||||
thread_scoped_lock lock(move_mutex);
|
||||
|
||||
/* Preserve the original device pointer, in case of multi device
|
||||
* we can't change it because the pointer mapping would break. */
|
||||
device_ptr prev_pointer = max_mem->device_pointer;
|
||||
size_t prev_size = max_mem->device_size;
|
||||
|
||||
tex_free(*max_mem);
|
||||
tex_alloc(*max_mem);
|
||||
size = (max_size >= size)? 0: size - max_size;
|
||||
|
||||
max_mem->device_pointer = prev_pointer;
|
||||
max_mem->device_size = prev_size;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Update texture info array with new pointers. */
|
||||
load_texture_info();
|
||||
|
||||
move_texture_to_host = false;
|
||||
}
|
||||
|
||||
CUDAMem *generic_alloc(device_memory& mem, size_t pitch_padding = 0)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
CUdeviceptr device_pointer = 0;
|
||||
size_t size = mem.memory_size() + pitch_padding;
|
||||
|
||||
CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
|
||||
const char *status = "";
|
||||
|
||||
/* First try allocating in device memory, respecting headroom. We make
|
||||
* an exception for texture info. It is small and frequently accessed,
|
||||
* so treat it as working memory.
|
||||
*
|
||||
* If there is not enough room for working memory, we will try to move
|
||||
* textures to host memory, assuming the performance impact would have
|
||||
* been worse for working memory. */
|
||||
bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
|
||||
bool is_image = is_texture && (mem.data_height > 1);
|
||||
|
||||
size_t headroom = (is_texture)? device_texture_headroom:
|
||||
device_working_headroom;
|
||||
|
||||
size_t total = 0, free = 0;
|
||||
cuMemGetInfo(&free, &total);
|
||||
|
||||
/* Move textures to host memory if needed. */
|
||||
if(!move_texture_to_host && !is_image && (size + headroom) >= free) {
|
||||
move_textures_to_host(size + headroom - free, is_texture);
|
||||
cuMemGetInfo(&free, &total);
|
||||
}
|
||||
|
||||
/* Allocate in device memory. */
|
||||
if(!move_texture_to_host && (size + headroom) < free) {
|
||||
mem_alloc_result = cuMemAlloc(&device_pointer, size);
|
||||
if(mem_alloc_result == CUDA_SUCCESS) {
|
||||
status = " in device memory";
|
||||
}
|
||||
}
|
||||
|
||||
/* Fall back to mapped host memory if needed and possible. */
|
||||
void *map_host_pointer = 0;
|
||||
bool free_map_host = false;
|
||||
|
||||
if(mem_alloc_result != CUDA_SUCCESS && can_map_host &&
|
||||
map_host_used + size < map_host_limit) {
|
||||
if(mem.shared_pointer) {
|
||||
/* Another device already allocated host memory. */
|
||||
mem_alloc_result = CUDA_SUCCESS;
|
||||
map_host_pointer = mem.shared_pointer;
|
||||
}
|
||||
else {
|
||||
/* Allocate host memory ourselves. */
|
||||
mem_alloc_result = cuMemHostAlloc(&map_host_pointer, size,
|
||||
CU_MEMHOSTALLOC_DEVICEMAP |
|
||||
CU_MEMHOSTALLOC_WRITECOMBINED);
|
||||
mem.shared_pointer = map_host_pointer;
|
||||
free_map_host = true;
|
||||
}
|
||||
|
||||
if(mem_alloc_result == CUDA_SUCCESS) {
|
||||
cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, mem.shared_pointer, 0));
|
||||
map_host_used += size;
|
||||
status = " in host memory";
|
||||
|
||||
/* Replace host pointer with our host allocation. Only works if
|
||||
* CUDA memory layout is the same and has no pitch padding. */
|
||||
if(pitch_padding == 0 && mem.host_pointer && mem.host_pointer != mem.shared_pointer) {
|
||||
memcpy(mem.shared_pointer, mem.host_pointer, size);
|
||||
mem.host_free();
|
||||
mem.host_pointer = mem.shared_pointer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(mem_alloc_result != CUDA_SUCCESS) {
|
||||
cuda_assert(mem_alloc_result);
|
||||
status = " failed, out of memory";
|
||||
}
|
||||
|
||||
if(mem.name) {
|
||||
VLOG(1) << "Buffer allocate: " << mem.name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
<< string_human_readable_size(mem.memory_size()) << ")"
|
||||
<< status;
|
||||
}
|
||||
|
||||
/* Allocate memory on device. */
|
||||
CUdeviceptr device_pointer = 0;
|
||||
size_t size = mem.memory_size();
|
||||
cuda_assert(cuMemAlloc(&device_pointer, size + padding));
|
||||
mem.device_pointer = (device_ptr)device_pointer;
|
||||
mem.device_size = size;
|
||||
stats.mem_alloc(size);
|
||||
@@ -645,14 +860,21 @@ public:
|
||||
|
||||
/* Insert into map of allocations. */
|
||||
CUDAMem *cmem = &cuda_mem_map[&mem];
|
||||
cmem->map_host_pointer = map_host_pointer;
|
||||
cmem->free_map_host = free_map_host;
|
||||
return cmem;
|
||||
}
|
||||
|
||||
void generic_copy_to(device_memory& mem)
|
||||
{
|
||||
if(mem.device_pointer) {
|
||||
if(mem.host_pointer && mem.device_pointer) {
|
||||
CUDAContextScope scope(this);
|
||||
cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size()));
|
||||
|
||||
if(mem.host_pointer != mem.shared_pointer) {
|
||||
cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer),
|
||||
mem.host_pointer,
|
||||
mem.memory_size()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -660,8 +882,24 @@ public:
|
||||
{
|
||||
if(mem.device_pointer) {
|
||||
CUDAContextScope scope(this);
|
||||
const CUDAMem& cmem = cuda_mem_map[&mem];
|
||||
|
||||
cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)));
|
||||
if(cmem.map_host_pointer) {
|
||||
/* Free host memory. */
|
||||
if(cmem.free_map_host) {
|
||||
cuMemFreeHost(cmem.map_host_pointer);
|
||||
if(mem.host_pointer == mem.shared_pointer) {
|
||||
mem.host_pointer = 0;
|
||||
}
|
||||
mem.shared_pointer = 0;
|
||||
}
|
||||
|
||||
map_host_used -= mem.device_size;
|
||||
}
|
||||
else {
|
||||
/* Free device memory. */
|
||||
cuMemFree(mem.device_pointer);
|
||||
}
|
||||
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_pointer = 0;
|
||||
@@ -715,11 +953,11 @@ public:
|
||||
size_t offset = elem*y*w;
|
||||
size_t size = elem*w*h;
|
||||
|
||||
if(mem.device_pointer) {
|
||||
if(mem.host_pointer && mem.device_pointer) {
|
||||
cuda_assert(cuMemcpyDtoH((uchar*)mem.host_pointer + offset,
|
||||
(CUdeviceptr)(mem.device_pointer + offset), size));
|
||||
}
|
||||
else {
|
||||
else if(mem.host_pointer) {
|
||||
memset((char*)mem.host_pointer + offset, 0, size);
|
||||
}
|
||||
}
|
||||
@@ -735,7 +973,8 @@ public:
|
||||
memset(mem.host_pointer, 0, mem.memory_size());
|
||||
}
|
||||
|
||||
if(mem.device_pointer) {
|
||||
if(mem.device_pointer &&
|
||||
(!mem.host_pointer || mem.host_pointer != mem.shared_pointer)) {
|
||||
CUDAContextScope scope(this);
|
||||
cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()));
|
||||
}
|
||||
@@ -774,10 +1013,6 @@ public:
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
|
||||
VLOG(1) << "Texture allocate: " << mem.name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
|
||||
/* Check if we are on sm_30 or above, for bindless textures. */
|
||||
bool has_fermi_limits = info.has_fermi_limits;
|
||||
|
||||
@@ -881,6 +1116,10 @@ public:
|
||||
desc.NumChannels = mem.data_elements;
|
||||
desc.Flags = 0;
|
||||
|
||||
VLOG(1) << "Array 3D allocate: " << mem.name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
|
||||
cuda_assert(cuArray3DCreate(&array_3d, &desc));
|
||||
|
||||
if(!array_3d) {
|
||||
@@ -1118,13 +1357,17 @@ public:
|
||||
|
||||
int shift_stride = stride*h;
|
||||
int num_shifts = (2*r+1)*(2*r+1);
|
||||
int mem_size = sizeof(float)*shift_stride*2*num_shifts;
|
||||
int mem_size = sizeof(float)*shift_stride*num_shifts;
|
||||
int channel_offset = 0;
|
||||
|
||||
CUdeviceptr temporary_mem;
|
||||
cuda_assert(cuMemAlloc(&temporary_mem, mem_size));
|
||||
CUdeviceptr difference = temporary_mem;
|
||||
CUdeviceptr blurDifference = temporary_mem + sizeof(float)*shift_stride * num_shifts;
|
||||
device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem");
|
||||
temporary_mem.alloc_to_device(2*mem_size);
|
||||
|
||||
if(have_error())
|
||||
return false;
|
||||
|
||||
CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer);
|
||||
CUdeviceptr blurDifference = difference + mem_size;
|
||||
|
||||
CUdeviceptr weightAccum = task->nlm_state.temporary_3_ptr;
|
||||
cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float)*shift_stride));
|
||||
@@ -1156,7 +1399,7 @@ public:
|
||||
CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args);
|
||||
}
|
||||
|
||||
cuMemFree(temporary_mem);
|
||||
temporary_mem.free();
|
||||
|
||||
{
|
||||
CUfunction cuNLMNormalize;
|
||||
@@ -1225,10 +1468,14 @@ public:
|
||||
int num_shifts = (2*r+1)*(2*r+1);
|
||||
int mem_size = sizeof(float)*shift_stride*num_shifts;
|
||||
|
||||
CUdeviceptr temporary_mem;
|
||||
cuda_assert(cuMemAlloc(&temporary_mem, 2*mem_size));
|
||||
CUdeviceptr difference = temporary_mem;
|
||||
CUdeviceptr blurDifference = temporary_mem + mem_size;
|
||||
device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem");
|
||||
temporary_mem.alloc_to_device(2*mem_size);
|
||||
|
||||
if(have_error())
|
||||
return false;
|
||||
|
||||
CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer);
|
||||
CUdeviceptr blurDifference = difference + mem_size;
|
||||
|
||||
{
|
||||
CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
|
||||
@@ -1268,7 +1515,7 @@ public:
|
||||
CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
|
||||
}
|
||||
|
||||
cuMemFree(temporary_mem);
|
||||
temporary_mem.free();
|
||||
|
||||
{
|
||||
CUfunction cuFinalize;
|
||||
|
||||
@@ -35,7 +35,8 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type)
|
||||
extension(EXTENSION_REPEAT),
|
||||
device(device),
|
||||
device_pointer(0),
|
||||
host_pointer(0)
|
||||
host_pointer(0),
|
||||
shared_pointer(0)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -86,7 +87,7 @@ void device_memory::device_free()
|
||||
|
||||
void device_memory::device_copy_to()
|
||||
{
|
||||
if(data_size) {
|
||||
if(host_pointer) {
|
||||
device->mem_copy_to(*this);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -197,10 +197,13 @@ public:
|
||||
Device *device;
|
||||
device_ptr device_pointer;
|
||||
void *host_pointer;
|
||||
void *shared_pointer;
|
||||
|
||||
virtual ~device_memory();
|
||||
|
||||
protected:
|
||||
friend class CUDADevice;
|
||||
|
||||
/* Only create through subclasses. */
|
||||
device_memory(Device *device, const char *name, MemoryType type);
|
||||
|
||||
|
||||
@@ -48,11 +48,17 @@ public:
|
||||
MultiDevice(DeviceInfo& info, Stats &stats, bool background_)
|
||||
: Device(info, stats, background_), unique_key(1)
|
||||
{
|
||||
Device *device;
|
||||
|
||||
foreach(DeviceInfo& subinfo, info.multi_devices) {
|
||||
device = Device::create(subinfo, sub_stats_, background);
|
||||
devices.push_back(SubDevice(device));
|
||||
Device *device = Device::create(subinfo, sub_stats_, background);
|
||||
|
||||
/* Always add CPU devices at the back since GPU devices can change
|
||||
* host memory pointers, which CPU uses as device pointer. */
|
||||
if(subinfo.type == DEVICE_CPU) {
|
||||
devices.push_back(SubDevice(device));
|
||||
}
|
||||
else {
|
||||
devices.push_front(SubDevice(device));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WITH_NETWORK
|
||||
@@ -63,7 +69,7 @@ public:
|
||||
vector<string> servers = discovery.get_server_list();
|
||||
|
||||
foreach(string& server, servers) {
|
||||
device = device_network_create(info, stats, server.c_str());
|
||||
Device *device = device_network_create(info, stats, server.c_str());
|
||||
if(device)
|
||||
devices.push_back(SubDevice(device));
|
||||
}
|
||||
|
||||
@@ -211,9 +211,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a
|
||||
break;
|
||||
}
|
||||
case NODE_CLOSURE_BSDF:
|
||||
if(type == SHADER_TYPE_SURFACE) {
|
||||
svm_node_closure_bsdf(kg, sd, stack, node, path_flag, &offset);
|
||||
}
|
||||
svm_node_closure_bsdf(kg, sd, stack, node, type, path_flag, &offset);
|
||||
break;
|
||||
case NODE_CLOSURE_EMISSION:
|
||||
svm_node_closure_emission(sd, stack, node);
|
||||
@@ -331,9 +329,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a
|
||||
break;
|
||||
# if NODES_FEATURE(NODE_FEATURE_VOLUME)
|
||||
case NODE_CLOSURE_VOLUME:
|
||||
if(type == SHADER_TYPE_VOLUME) {
|
||||
svm_node_closure_volume(kg, sd, stack, node, path_flag);
|
||||
}
|
||||
svm_node_closure_volume(kg, sd, stack, node, type, path_flag);
|
||||
break;
|
||||
# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */
|
||||
# ifdef __EXTRA_NODES__
|
||||
|
||||
@@ -56,7 +56,7 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, MicrofacetBsdf *bsdf, int t
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag, int *offset)
|
||||
ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type, int path_flag, int *offset)
|
||||
{
|
||||
uint type, param1_offset, param2_offset;
|
||||
|
||||
@@ -67,8 +67,18 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
|
||||
/* note we read this extra node before weight check, so offset is added */
|
||||
uint4 data_node = read_node(kg, offset);
|
||||
|
||||
if(mix_weight == 0.0f)
|
||||
/* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */
|
||||
if(mix_weight == 0.0f || shader_type != SHADER_TYPE_SURFACE) {
|
||||
if(type == CLOSURE_BSDF_PRINCIPLED_ID) {
|
||||
/* Read all principled BSDF extra data to get the right offset. */
|
||||
read_node(kg, offset);
|
||||
read_node(kg, offset);
|
||||
read_node(kg, offset);
|
||||
read_node(kg, offset);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): sd->N;
|
||||
|
||||
@@ -835,9 +845,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag)
|
||||
ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type, int path_flag)
|
||||
{
|
||||
#ifdef __VOLUME__
|
||||
/* Only sum extinction for volumes, variable is shared with surface transparency. */
|
||||
if(shader_type != SHADER_TYPE_VOLUME) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint type, param1_offset, param2_offset;
|
||||
|
||||
uint mix_weight_offset;
|
||||
|
||||
@@ -151,6 +151,10 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
|
||||
progress.reset_sample();
|
||||
progress.set_total_pixel_samples(total_pixel_samples);
|
||||
|
||||
/* needs to be up to date for baking specific AA samples */
|
||||
dscene->data.integrator.aa_samples = num_samples;
|
||||
device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
|
||||
|
||||
for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
|
||||
size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
|
||||
|
||||
@@ -175,9 +179,6 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
|
||||
d_output.zero_to_device();
|
||||
d_input.copy_to_device();
|
||||
|
||||
/* needs to be up to data for attribute access */
|
||||
device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
|
||||
|
||||
DeviceTask task(DeviceTask::SHADER);
|
||||
task.shader_input = d_input.device_pointer;
|
||||
task.shader_output = d_output.device_pointer;
|
||||
|
||||
@@ -151,6 +151,10 @@ bool RenderBuffers::copy_from_device()
|
||||
|
||||
bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels)
|
||||
{
|
||||
if(buffer.data() == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
float invsample = 1.0f/sample;
|
||||
float scale = invsample;
|
||||
bool variance = (offset == DENOISING_PASS_NORMAL_VAR) ||
|
||||
@@ -218,6 +222,10 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp
|
||||
|
||||
bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels)
|
||||
{
|
||||
if(buffer.data() == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int pass_offset = 0;
|
||||
|
||||
for(size_t j = 0; j < params.passes.size(); j++) {
|
||||
|
||||
@@ -703,7 +703,7 @@ void ImageManager::device_load_image(Device *device,
|
||||
|
||||
/* Slot assignment */
|
||||
int flat_slot = type_index_to_flattened_slot(slot, type);
|
||||
string name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot);
|
||||
img->mem_name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot);
|
||||
|
||||
/* Free previous texture in slot. */
|
||||
if(img->mem) {
|
||||
@@ -715,7 +715,7 @@ void ImageManager::device_load_image(Device *device,
|
||||
/* Create new texture. */
|
||||
if(type == IMAGE_DATA_TYPE_FLOAT4) {
|
||||
device_vector<float4> *tex_img
|
||||
= new device_vector<float4>(device, name.c_str(), MEM_TEXTURE);
|
||||
= new device_vector<float4>(device, img->mem_name.c_str(), MEM_TEXTURE);
|
||||
|
||||
if(!file_load_image<TypeDesc::FLOAT, float>(img,
|
||||
type,
|
||||
@@ -741,7 +741,7 @@ void ImageManager::device_load_image(Device *device,
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_FLOAT) {
|
||||
device_vector<float> *tex_img
|
||||
= new device_vector<float>(device, name.c_str(), MEM_TEXTURE);
|
||||
= new device_vector<float>(device, img->mem_name.c_str(), MEM_TEXTURE);
|
||||
|
||||
if(!file_load_image<TypeDesc::FLOAT, float>(img,
|
||||
type,
|
||||
@@ -764,7 +764,7 @@ void ImageManager::device_load_image(Device *device,
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_BYTE4) {
|
||||
device_vector<uchar4> *tex_img
|
||||
= new device_vector<uchar4>(device, name.c_str(), MEM_TEXTURE);
|
||||
= new device_vector<uchar4>(device, img->mem_name.c_str(), MEM_TEXTURE);
|
||||
|
||||
if(!file_load_image<TypeDesc::UINT8, uchar>(img,
|
||||
type,
|
||||
@@ -790,7 +790,7 @@ void ImageManager::device_load_image(Device *device,
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_BYTE) {
|
||||
device_vector<uchar> *tex_img
|
||||
= new device_vector<uchar>(device, name.c_str(), MEM_TEXTURE);
|
||||
= new device_vector<uchar>(device, img->mem_name.c_str(), MEM_TEXTURE);
|
||||
|
||||
if(!file_load_image<TypeDesc::UINT8, uchar>(img,
|
||||
type,
|
||||
@@ -812,7 +812,7 @@ void ImageManager::device_load_image(Device *device,
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_HALF4) {
|
||||
device_vector<half4> *tex_img
|
||||
= new device_vector<half4>(device, name.c_str(), MEM_TEXTURE);
|
||||
= new device_vector<half4>(device, img->mem_name.c_str(), MEM_TEXTURE);
|
||||
|
||||
if(!file_load_image<TypeDesc::HALF, half>(img,
|
||||
type,
|
||||
@@ -837,7 +837,7 @@ void ImageManager::device_load_image(Device *device,
|
||||
}
|
||||
else if(type == IMAGE_DATA_TYPE_HALF) {
|
||||
device_vector<half> *tex_img
|
||||
= new device_vector<half>(device, name.c_str(), MEM_TEXTURE);
|
||||
= new device_vector<half>(device, img->mem_name.c_str(), MEM_TEXTURE);
|
||||
|
||||
if(!file_load_image<TypeDesc::HALF, half>(img,
|
||||
type,
|
||||
|
||||
@@ -111,6 +111,7 @@ public:
|
||||
InterpolationType interpolation;
|
||||
ExtensionType extension;
|
||||
|
||||
string mem_name;
|
||||
device_memory *mem;
|
||||
|
||||
int users;
|
||||
|
||||
@@ -644,7 +644,7 @@ void ObjectManager::device_update_flags(Device *,
|
||||
|
||||
void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Scene *scene)
|
||||
{
|
||||
if(scene->objects.size() == 0) {
|
||||
if(dscene->objects.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -292,5 +292,26 @@ bool system_cpu_support_avx2()
|
||||
|
||||
#endif
|
||||
|
||||
size_t system_physical_ram()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
MEMORYSTATUSEX ram;
|
||||
ram.dwLength = sizeof (ram);
|
||||
GlobalMemoryStatusEx(&ram);
|
||||
return ram.ullTotalPhys * 1024;
|
||||
#elif defined(__APPLE__)
|
||||
uint64_t ram = 0;
|
||||
size_t len = sizeof(ram);
|
||||
if (sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) {
|
||||
return ram;
|
||||
}
|
||||
return 0;
|
||||
#else
|
||||
size_t ps = sysconf(_SC_PAGESIZE);
|
||||
size_t pn = sysconf(_SC_PHYS_PAGES);
|
||||
return ps * pn;
|
||||
#endif
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
|
||||
@@ -42,6 +42,8 @@ bool system_cpu_support_sse41();
|
||||
bool system_cpu_support_avx();
|
||||
bool system_cpu_support_avx2();
|
||||
|
||||
size_t system_physical_ram();
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_SYSTEM_H__ */
|
||||
|
||||
@@ -1067,6 +1067,7 @@ class SmartProject(Operator):
|
||||
island_margin = FloatProperty(
|
||||
name="Island Margin",
|
||||
description="Margin to reduce bleed from adjacent islands",
|
||||
unit='LENGTH', subtype='DISTANCE',
|
||||
min=0.0, max=1.0,
|
||||
default=0.0,
|
||||
)
|
||||
|
||||
@@ -581,10 +581,6 @@ class PARTICLE_PT_physics(ParticleButtonsPanel, Panel):
|
||||
layout.row().prop(part, "physics_type", expand=True)
|
||||
|
||||
row = layout.row()
|
||||
col = row.column(align=True)
|
||||
col.prop(part, "particle_size")
|
||||
col.prop(part, "size_random", slider=True)
|
||||
|
||||
if part.physics_type != 'NO':
|
||||
col = row.column(align=True)
|
||||
col.prop(part, "mass")
|
||||
@@ -1088,7 +1084,8 @@ class PARTICLE_PT_render(ParticleButtonsPanel, Panel):
|
||||
col = row.column()
|
||||
col.label(text="")
|
||||
|
||||
if part.render_type in {'OBJECT', 'GROUP'} and not part.use_advanced_hair:
|
||||
if part.type == 'EMITTER' or \
|
||||
(part.render_type in {'OBJECT', 'GROUP'} and part.type == 'HAIR' and not part.use_advanced_hair):
|
||||
row = layout.row(align=True)
|
||||
row.prop(part, "particle_size")
|
||||
row.prop(part, "size_random", slider=True)
|
||||
|
||||
@@ -34,6 +34,9 @@
|
||||
/* struct DerivedMesh is used directly */
|
||||
#include "BKE_DerivedMesh.h"
|
||||
|
||||
/* Thread sync primitives used directly. */
|
||||
#include "BLI_threads.h"
|
||||
|
||||
struct CCGElem;
|
||||
struct DMFlagMat;
|
||||
struct DMGridAdjacency;
|
||||
@@ -140,6 +143,9 @@ typedef struct CCGDerivedMesh {
|
||||
} multires;
|
||||
|
||||
struct EdgeHash *ehash;
|
||||
|
||||
ThreadRWMutex loops_cache_rwlock;
|
||||
ThreadRWMutex origindex_cache_rwlock;
|
||||
} CCGDerivedMesh;
|
||||
|
||||
#ifdef WITH_OPENSUBDIV
|
||||
|
||||
@@ -173,10 +173,11 @@ typedef struct MeshCalcNormalsData {
|
||||
const MLoop *mloop;
|
||||
MVert *mverts;
|
||||
float (*pnors)[3];
|
||||
float (*lnors_weighted)[3];
|
||||
float (*vnors)[3];
|
||||
} MeshCalcNormalsData;
|
||||
|
||||
static void mesh_calc_normals_poly_task_cb(void *userdata, const int pidx)
|
||||
static void mesh_calc_normals_poly_cb(void *userdata, const int pidx)
|
||||
{
|
||||
MeshCalcNormalsData *data = userdata;
|
||||
const MPoly *mp = &data->mpolys[pidx];
|
||||
@@ -184,7 +185,7 @@ static void mesh_calc_normals_poly_task_cb(void *userdata, const int pidx)
|
||||
BKE_mesh_calc_poly_normal(mp, data->mloop + mp->loopstart, data->mverts, data->pnors[pidx]);
|
||||
}
|
||||
|
||||
static void mesh_calc_normals_poly_accum_task_cb(void *userdata, const int pidx)
|
||||
static void mesh_calc_normals_poly_prepare_cb(void *userdata, const int pidx)
|
||||
{
|
||||
MeshCalcNormalsData *data = userdata;
|
||||
const MPoly *mp = &data->mpolys[pidx];
|
||||
@@ -193,7 +194,7 @@ static void mesh_calc_normals_poly_accum_task_cb(void *userdata, const int pidx)
|
||||
|
||||
float pnor_temp[3];
|
||||
float *pnor = data->pnors ? data->pnors[pidx] : pnor_temp;
|
||||
float (*vnors)[3] = data->vnors;
|
||||
float (*lnors_weighted)[3] = data->lnors_weighted;
|
||||
|
||||
const int nverts = mp->totloop;
|
||||
float (*edgevecbuf)[3] = BLI_array_alloca(edgevecbuf, (size_t)nverts);
|
||||
@@ -220,42 +221,62 @@ static void mesh_calc_normals_poly_accum_task_cb(void *userdata, const int pidx)
|
||||
v_prev = v_curr;
|
||||
}
|
||||
if (UNLIKELY(normalize_v3(pnor) == 0.0f)) {
|
||||
pnor[2] = 1.0f; /* other axis set to 0.0 */
|
||||
pnor[2] = 1.0f; /* other axes set to 0.0 */
|
||||
}
|
||||
}
|
||||
|
||||
/* accumulate angle weighted face normal */
|
||||
/* inline version of #accumulate_vertex_normals_poly_v3 */
|
||||
/* inline version of #accumulate_vertex_normals_poly_v3,
|
||||
* split between this threaded callback and #mesh_calc_normals_poly_accum_cb. */
|
||||
{
|
||||
const float *prev_edge = edgevecbuf[nverts - 1];
|
||||
|
||||
for (i = 0; i < nverts; i++) {
|
||||
const int lidx = mp->loopstart + i;
|
||||
const float *cur_edge = edgevecbuf[i];
|
||||
|
||||
/* calculate angle between the two poly edges incident on
|
||||
* this vertex */
|
||||
const float fac = saacos(-dot_v3v3(cur_edge, prev_edge));
|
||||
|
||||
/* accumulate */
|
||||
for (int k = 3; k--; ) {
|
||||
atomic_add_and_fetch_fl(&vnors[ml[i].v][k], pnor[k] * fac);
|
||||
}
|
||||
/* Store for later accumulation */
|
||||
mul_v3_v3fl(lnors_weighted[lidx], pnor, fac);
|
||||
|
||||
prev_edge = cur_edge;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void mesh_calc_normals_poly_accum_cb(void *userdata, const int lidx)
|
||||
{
|
||||
MeshCalcNormalsData *data = userdata;
|
||||
|
||||
add_v3_v3(data->vnors[data->mloop[lidx].v], data->lnors_weighted[lidx]);
|
||||
}
|
||||
|
||||
static void mesh_calc_normals_poly_finalize_cb(void *userdata, const int vidx)
|
||||
{
|
||||
MeshCalcNormalsData *data = userdata;
|
||||
|
||||
MVert *mv = &data->mverts[vidx];
|
||||
float *no = data->vnors[vidx];
|
||||
|
||||
if (UNLIKELY(normalize_v3(no) == 0.0f)) {
|
||||
/* following Mesh convention; we use vertex coordinate itself for normal in this case */
|
||||
normalize_v3_v3(no, mv->co);
|
||||
}
|
||||
|
||||
normal_float_to_short_v3(mv->no, no);
|
||||
}
|
||||
|
||||
void BKE_mesh_calc_normals_poly(
|
||||
MVert *mverts, float (*r_vertnors)[3], int numVerts,
|
||||
const MLoop *mloop, const MPoly *mpolys,
|
||||
int UNUSED(numLoops), int numPolys, float (*r_polynors)[3],
|
||||
int numLoops, int numPolys, float (*r_polynors)[3],
|
||||
const bool only_face_normals)
|
||||
{
|
||||
const bool do_threaded = (numPolys > BKE_MESH_OMP_LIMIT);
|
||||
float (*pnors)[3] = r_polynors;
|
||||
float (*vnors)[3] = r_vertnors;
|
||||
bool free_vnors = false;
|
||||
int i;
|
||||
|
||||
if (only_face_normals) {
|
||||
BLI_assert((pnors != NULL) || (numPolys == 0));
|
||||
@@ -265,10 +286,14 @@ void BKE_mesh_calc_normals_poly(
|
||||
.mpolys = mpolys, .mloop = mloop, .mverts = mverts, .pnors = pnors,
|
||||
};
|
||||
|
||||
BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_task_cb, (numPolys > BKE_MESH_OMP_LIMIT));
|
||||
BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_cb, do_threaded);
|
||||
return;
|
||||
}
|
||||
|
||||
float (*vnors)[3] = r_vertnors;
|
||||
float (*lnors_weighted)[3] = MEM_mallocN(sizeof(*lnors_weighted) * (size_t)numLoops, __func__);
|
||||
bool free_vnors = false;
|
||||
|
||||
/* first go through and calculate normals for all the polys */
|
||||
if (vnors == NULL) {
|
||||
vnors = MEM_callocN(sizeof(*vnors) * (size_t)numVerts, __func__);
|
||||
@@ -279,26 +304,23 @@ void BKE_mesh_calc_normals_poly(
|
||||
}
|
||||
|
||||
MeshCalcNormalsData data = {
|
||||
.mpolys = mpolys, .mloop = mloop, .mverts = mverts, .pnors = pnors, .vnors = vnors,
|
||||
.mpolys = mpolys, .mloop = mloop, .mverts = mverts,
|
||||
.pnors = pnors, .lnors_weighted = lnors_weighted, .vnors = vnors
|
||||
};
|
||||
|
||||
BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_accum_task_cb, (numPolys > BKE_MESH_OMP_LIMIT));
|
||||
/* Compute poly normals, and prepare weighted loop normals. */
|
||||
BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_prepare_cb, do_threaded);
|
||||
|
||||
for (i = 0; i < numVerts; i++) {
|
||||
MVert *mv = &mverts[i];
|
||||
float *no = vnors[i];
|
||||
/* Actually accumulate weighted loop normals into vertex ones. */
|
||||
BLI_task_parallel_range(0, numLoops, &data, mesh_calc_normals_poly_accum_cb, do_threaded);
|
||||
|
||||
if (UNLIKELY(normalize_v3(no) == 0.0f)) {
|
||||
/* following Mesh convention; we use vertex coordinate itself for normal in this case */
|
||||
normalize_v3_v3(no, mv->co);
|
||||
}
|
||||
|
||||
normal_float_to_short_v3(mv->no, no);
|
||||
}
|
||||
/* Normalize and validate computed vertex normals. */
|
||||
BLI_task_parallel_range(0, numVerts, &data, mesh_calc_normals_poly_finalize_cb, do_threaded);
|
||||
|
||||
if (free_vnors) {
|
||||
MEM_freeN(vnors);
|
||||
}
|
||||
MEM_freeN(lnors_weighted);
|
||||
}
|
||||
|
||||
void BKE_mesh_calc_normals(Mesh *mesh)
|
||||
|
||||
@@ -4501,8 +4501,10 @@ Sequence *BKE_sequencer_foreground_frame_get(Scene *scene, int frame)
|
||||
for (seq = ed->seqbasep->first; seq; seq = seq->next) {
|
||||
if (seq->flag & SEQ_MUTE || seq->startdisp > frame || seq->enddisp <= frame)
|
||||
continue;
|
||||
/* only use elements you can see - not */
|
||||
if (ELEM(seq->type, SEQ_TYPE_IMAGE, SEQ_TYPE_META, SEQ_TYPE_SCENE, SEQ_TYPE_MOVIE, SEQ_TYPE_COLOR)) {
|
||||
/* Only use strips that generate an image, not ones that combine
|
||||
* other strips or apply some effect. */
|
||||
if (ELEM(seq->type, SEQ_TYPE_IMAGE, SEQ_TYPE_META, SEQ_TYPE_SCENE,
|
||||
SEQ_TYPE_MOVIE, SEQ_TYPE_COLOR, SEQ_TYPE_TEXT)) {
|
||||
if (seq->machine > best_machine) {
|
||||
best_seq = seq;
|
||||
best_machine = seq->machine;
|
||||
|
||||
@@ -90,9 +90,6 @@
|
||||
/* assumes MLoop's are layed out 4 for each poly, in order */
|
||||
#define USE_LOOP_LAYOUT_FAST
|
||||
|
||||
static ThreadRWMutex loops_cache_rwlock = BLI_RWLOCK_INITIALIZER;
|
||||
static ThreadRWMutex origindex_cache_rwlock = BLI_RWLOCK_INITIALIZER;
|
||||
|
||||
static CCGDerivedMesh *getCCGDerivedMesh(CCGSubSurf *ss,
|
||||
int drawInteriorEdges,
|
||||
int useSubsurfUv,
|
||||
@@ -1492,21 +1489,24 @@ static void ccgDM_copyFinalLoopArray(DerivedMesh *dm, MLoop *mloop)
|
||||
/* DMFlagMat *faceFlags = ccgdm->faceFlags; */ /* UNUSED */
|
||||
|
||||
if (!ccgdm->ehash) {
|
||||
BLI_rw_mutex_lock(&loops_cache_rwlock, THREAD_LOCK_WRITE);
|
||||
BLI_rw_mutex_lock(&ccgdm->loops_cache_rwlock, THREAD_LOCK_WRITE);
|
||||
if (!ccgdm->ehash) {
|
||||
MEdge *medge;
|
||||
EdgeHash *ehash;
|
||||
|
||||
ccgdm->ehash = BLI_edgehash_new_ex(__func__, ccgdm->dm.numEdgeData);
|
||||
ehash = BLI_edgehash_new_ex(__func__, ccgdm->dm.numEdgeData);
|
||||
medge = ccgdm->dm.getEdgeArray((DerivedMesh *)ccgdm);
|
||||
|
||||
for (i = 0; i < ccgdm->dm.numEdgeData; i++) {
|
||||
BLI_edgehash_insert(ccgdm->ehash, medge[i].v1, medge[i].v2, SET_INT_IN_POINTER(i));
|
||||
BLI_edgehash_insert(ehash, medge[i].v1, medge[i].v2, SET_INT_IN_POINTER(i));
|
||||
}
|
||||
|
||||
atomic_cas_ptr((void**)&ccgdm->ehash, ccgdm->ehash, ehash);
|
||||
}
|
||||
BLI_rw_mutex_unlock(&loops_cache_rwlock);
|
||||
BLI_rw_mutex_unlock(&ccgdm->loops_cache_rwlock);
|
||||
}
|
||||
|
||||
BLI_rw_mutex_lock(&loops_cache_rwlock, THREAD_LOCK_READ);
|
||||
BLI_rw_mutex_lock(&ccgdm->loops_cache_rwlock, THREAD_LOCK_READ);
|
||||
totface = ccgSubSurf_getNumFaces(ss);
|
||||
mv = mloop;
|
||||
for (index = 0; index < totface; index++) {
|
||||
@@ -1549,7 +1549,7 @@ static void ccgDM_copyFinalLoopArray(DerivedMesh *dm, MLoop *mloop)
|
||||
}
|
||||
}
|
||||
}
|
||||
BLI_rw_mutex_unlock(&loops_cache_rwlock);
|
||||
BLI_rw_mutex_unlock(&ccgdm->loops_cache_rwlock);
|
||||
}
|
||||
|
||||
static void ccgDM_copyFinalPolyArray(DerivedMesh *dm, MPoly *mpoly)
|
||||
@@ -3796,6 +3796,10 @@ static void ccgDM_release(DerivedMesh *dm)
|
||||
MEM_freeN(ccgdm->edgeMap);
|
||||
MEM_freeN(ccgdm->faceMap);
|
||||
}
|
||||
|
||||
BLI_rw_mutex_end(&ccgdm->loops_cache_rwlock);
|
||||
BLI_rw_mutex_end(&ccgdm->origindex_cache_rwlock);
|
||||
|
||||
MEM_freeN(ccgdm);
|
||||
}
|
||||
}
|
||||
@@ -3810,14 +3814,14 @@ static void *ccgDM_get_vert_data_layer(DerivedMesh *dm, int type)
|
||||
int a, index, totnone, totorig;
|
||||
|
||||
/* Avoid re-creation if the layer exists already */
|
||||
BLI_rw_mutex_lock(&origindex_cache_rwlock, THREAD_LOCK_READ);
|
||||
BLI_rw_mutex_lock(&ccgdm->origindex_cache_rwlock, THREAD_LOCK_READ);
|
||||
origindex = DM_get_vert_data_layer(dm, CD_ORIGINDEX);
|
||||
BLI_rw_mutex_unlock(&origindex_cache_rwlock);
|
||||
BLI_rw_mutex_unlock(&ccgdm->origindex_cache_rwlock);
|
||||
if (origindex) {
|
||||
return origindex;
|
||||
}
|
||||
|
||||
BLI_rw_mutex_lock(&origindex_cache_rwlock, THREAD_LOCK_WRITE);
|
||||
BLI_rw_mutex_lock(&ccgdm->origindex_cache_rwlock, THREAD_LOCK_WRITE);
|
||||
DM_add_vert_layer(dm, CD_ORIGINDEX, CD_CALLOC, NULL);
|
||||
origindex = DM_get_vert_data_layer(dm, CD_ORIGINDEX);
|
||||
|
||||
@@ -3832,7 +3836,7 @@ static void *ccgDM_get_vert_data_layer(DerivedMesh *dm, int type)
|
||||
CCGVert *v = ccgdm->vertMap[index].vert;
|
||||
origindex[a] = ccgDM_getVertMapIndex(ccgdm->ss, v);
|
||||
}
|
||||
BLI_rw_mutex_unlock(&origindex_cache_rwlock);
|
||||
BLI_rw_mutex_unlock(&ccgdm->origindex_cache_rwlock);
|
||||
|
||||
return origindex;
|
||||
}
|
||||
@@ -4784,6 +4788,9 @@ static CCGDerivedMesh *getCCGDerivedMesh(CCGSubSurf *ss,
|
||||
ccgdm->dm.numLoopData = ccgdm->dm.numPolyData * 4;
|
||||
ccgdm->dm.numTessFaceData = 0;
|
||||
|
||||
BLI_rw_mutex_init(&ccgdm->loops_cache_rwlock);
|
||||
BLI_rw_mutex_init(&ccgdm->origindex_cache_rwlock);
|
||||
|
||||
return ccgdm;
|
||||
}
|
||||
|
||||
|
||||
@@ -1101,7 +1101,7 @@ static void task_parallel_range_ex(
|
||||
}
|
||||
|
||||
task_scheduler = BLI_task_scheduler_get();
|
||||
task_pool = BLI_task_pool_create(task_scheduler, &state);
|
||||
task_pool = BLI_task_pool_create_suspended(task_scheduler, &state);
|
||||
num_threads = BLI_task_scheduler_num_threads(task_scheduler);
|
||||
|
||||
/* The idea here is to prevent creating task for each of the loop iterations
|
||||
@@ -1124,6 +1124,9 @@ static void task_parallel_range_ex(
|
||||
}
|
||||
|
||||
num_tasks = min_ii(num_tasks, (stop - start) / state.chunk_size);
|
||||
|
||||
/* NOTE: This way we are adding a memory barrier and ensure all worker
|
||||
* threads can read and modify the value, without any locks. */
|
||||
atomic_fetch_and_add_int32(&state.iter, 0);
|
||||
|
||||
if (use_userdata_chunk) {
|
||||
@@ -1325,7 +1328,7 @@ void BLI_task_parallel_listbase(
|
||||
}
|
||||
|
||||
task_scheduler = BLI_task_scheduler_get();
|
||||
task_pool = BLI_task_pool_create(task_scheduler, &state);
|
||||
task_pool = BLI_task_pool_create_suspended(task_scheduler, &state);
|
||||
num_threads = BLI_task_scheduler_num_threads(task_scheduler);
|
||||
|
||||
/* The idea here is to prevent creating task for each of the loop iterations
|
||||
@@ -1413,7 +1416,7 @@ void BLI_task_parallel_mempool(
|
||||
}
|
||||
|
||||
task_scheduler = BLI_task_scheduler_get();
|
||||
task_pool = BLI_task_pool_create(task_scheduler, &state);
|
||||
task_pool = BLI_task_pool_create_suspended(task_scheduler, &state);
|
||||
num_threads = BLI_task_scheduler_num_threads(task_scheduler);
|
||||
|
||||
/* The idea here is to prevent creating task for each of the loop iterations
|
||||
|
||||
@@ -985,7 +985,7 @@ void DepsgraphNodeBuilder::build_obdata_geom(Object *object)
|
||||
|
||||
// TODO: "Done" operation
|
||||
|
||||
/* Cloyth modifier. */
|
||||
/* Cloth modifier. */
|
||||
LINKLIST_FOREACH (ModifierData *, md, &object->modifiers) {
|
||||
if (md->type == eModifierType_Cloth) {
|
||||
build_cloth(object);
|
||||
|
||||
@@ -923,7 +923,7 @@ void DepsgraphRelationBuilder::build_animdata(ID *id)
|
||||
/* Animation curves and NLA. */
|
||||
build_animdata_curves(id);
|
||||
/* Drivers. */
|
||||
build_animdata_drievrs(id);
|
||||
build_animdata_drivers(id);
|
||||
}
|
||||
|
||||
void DepsgraphRelationBuilder::build_animdata_curves(ID *id)
|
||||
@@ -992,7 +992,7 @@ void DepsgraphRelationBuilder::build_animdata_curves_targets(ID *id)
|
||||
}
|
||||
}
|
||||
|
||||
void DepsgraphRelationBuilder::build_animdata_drievrs(ID *id)
|
||||
void DepsgraphRelationBuilder::build_animdata_drivers(ID *id)
|
||||
{
|
||||
AnimData *adt = BKE_animdata_from_id(id);
|
||||
if (adt == NULL) {
|
||||
@@ -1922,7 +1922,8 @@ void DepsgraphRelationBuilder::build_gpencil(bGPdata *gpd)
|
||||
// TODO: parent object (when that feature is implemented)
|
||||
}
|
||||
|
||||
void DepsgraphRelationBuilder::build_cachefile(CacheFile *cache_file) {
|
||||
void DepsgraphRelationBuilder::build_cachefile(CacheFile *cache_file)
|
||||
{
|
||||
/* Animation. */
|
||||
build_animdata(&cache_file->id);
|
||||
}
|
||||
|
||||
@@ -206,7 +206,7 @@ struct DepsgraphRelationBuilder
|
||||
void build_animdata(ID *id);
|
||||
void build_animdata_curves(ID *id);
|
||||
void build_animdata_curves_targets(ID *id);
|
||||
void build_animdata_drievrs(ID *id);
|
||||
void build_animdata_drivers(ID *id);
|
||||
void build_driver(ID *id, FCurve *fcurve);
|
||||
void build_driver_data(ID *id, FCurve *fcurve);
|
||||
void build_driver_variables(ID *id, FCurve *fcurve);
|
||||
|
||||
@@ -126,7 +126,7 @@ typedef struct tJoinArmature_AdtFixData {
|
||||
GHash *names_map;
|
||||
} tJoinArmature_AdtFixData;
|
||||
|
||||
/* Callback to pass to void BKE_animdata_main_cb() for fixing driver ID's to point to the new ID */
|
||||
/* Callback to pass to BKE_animdata_main_cb() for fixing driver ID's to point to the new ID */
|
||||
/* FIXME: For now, we only care about drivers here. When editing rigs, it's very rare to have animation
|
||||
* on the rigs being edited already, so it should be safe to skip these.
|
||||
*/
|
||||
|
||||
@@ -1711,7 +1711,7 @@ void ui_but_add_search(uiBut *but, PointerRNA *ptr, PropertyRNA *prop, PointerRN
|
||||
}
|
||||
else if (but->type == UI_BTYPE_SEARCH_MENU) {
|
||||
/* In case we fail to find proper searchprop, so other code might have already set but->type to search menu... */
|
||||
but->type = UI_BTYPE_LABEL;
|
||||
but->flag |= UI_BUT_DISABLED;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user