Alternative Upload geometry data in parallel to multiple GPUs using the "Multi-Device" #107552
|
@ -79,7 +79,7 @@ BVHLayout BVHParams::best_bvh_layout(BVHLayout requested_layout, BVHLayoutMask s
|
|||
BVH::BVH(const BVHParams ¶ms_,
|
||||
const vector<Geometry *> &geometry_,
|
||||
const vector<Object *> &objects_)
|
||||
: params(params_), geometry(geometry_), objects(objects_)
|
||||
: params(params_), geometry(geometry_), objects(objects_), built(false)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -94,7 +94,7 @@ BVH *BVH::create(const BVHParams ¶ms,
|
|||
case BVH_LAYOUT_EMBREE:
|
||||
case BVH_LAYOUT_EMBREEGPU:
|
||||
#ifdef WITH_EMBREE
|
||||
return new BVHEmbree(params, geometry, objects);
|
||||
return new BVHEmbree(params, geometry, objects, device);
|
||||
#else
|
||||
break;
|
||||
#endif
|
||||
|
@ -127,7 +127,7 @@ BVH *BVH::create(const BVHParams ¶ms,
|
|||
case BVH_LAYOUT_MULTI_METAL_EMBREE:
|
||||
case BVH_LAYOUT_MULTI_HIPRT_EMBREE:
|
||||
case BVH_LAYOUT_MULTI_EMBREEGPU_EMBREE:
|
||||
return new BVHMulti(params, geometry, objects);
|
||||
return new BVHMulti(params, geometry, objects, device);
|
||||
case BVH_LAYOUT_NONE:
|
||||
case BVH_LAYOUT_ALL:
|
||||
break;
|
||||
|
|
|
@ -65,18 +65,26 @@ class BVH {
|
|||
BVHParams params;
|
||||
vector<Geometry *> geometry;
|
||||
vector<Object *> objects;
|
||||
|
||||
bool built = false;
|
||||
|
||||
static BVH *create(const BVHParams ¶ms,
|
||||
const vector<Geometry *> &geometry,
|
||||
const vector<Object *> &objects,
|
||||
Device *device);
|
||||
virtual ~BVH() {}
|
||||
|
||||
virtual BVH *get_device_bvh(const Device *)
|
||||
{
|
||||
return this;
|
||||
};
|
||||
virtual void set_device_bvh(const Device *, BVH *){};
|
||||
|
||||
virtual void replace_geometry(const vector<Geometry *> &geometry,
|
||||
const vector<Object *> &objects)
|
||||
{
|
||||
this->geometry = geometry;
|
||||
this->objects = objects;
|
||||
this->built = false;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
|
|
@ -34,7 +34,7 @@ BVH2::BVH2(const BVHParams ¶ms_,
|
|||
|
||||
void BVH2::build(Progress &progress, Stats *)
|
||||
{
|
||||
progress.set_substatus("Building BVH");
|
||||
progress.set_substatus("Building BVH2 BVH");
|
||||
|
||||
/* build nodes */
|
||||
BVHBuild bvh_build(objects,
|
||||
|
|
|
@ -10,6 +10,9 @@
|
|||
|
||||
#include "util/types.h"
|
||||
#include "util/vector.h"
|
||||
#include "util/thread.h"
|
||||
|
||||
#include <atomic>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -32,6 +35,12 @@ struct BVHStackEntry {
|
|||
*/
|
||||
class BVH2 : public BVH {
|
||||
public:
|
||||
/* The BVH2 needs to be built only once these
|
||||
are used to ensure that it the case. */
|
||||
thread_mutex build_mutex;
|
||||
thread_condition_variable build_cv;
|
||||
std::atomic<int> building{0};
|
||||
|
||||
void build(Progress &progress, Stats *stats);
|
||||
void refit(Progress &progress);
|
||||
|
||||
|
|
|
@ -95,11 +95,13 @@ static bool rtc_progress_func(void *user_ptr, const double n)
|
|||
|
||||
BVHEmbree::BVHEmbree(const BVHParams ¶ms_,
|
||||
const vector<Geometry *> &geometry_,
|
||||
const vector<Object *> &objects_)
|
||||
const vector<Object *> &objects_,
|
||||
const Device *device_)
|
||||
: BVH(params_, geometry_, objects_),
|
||||
scene(NULL),
|
||||
rtc_device(NULL),
|
||||
build_quality(RTC_BUILD_QUALITY_REFIT)
|
||||
build_quality(RTC_BUILD_QUALITY_REFIT),
|
||||
device(device_)
|
||||
{
|
||||
SIMD_SET_FLUSH_TO_ZERO;
|
||||
}
|
||||
|
@ -203,7 +205,7 @@ void BVHEmbree::add_object(Object *ob, int i)
|
|||
|
||||
void BVHEmbree::add_instance(Object *ob, int i)
|
||||
{
|
||||
BVHEmbree *instance_bvh = (BVHEmbree *)(ob->get_geometry()->bvh);
|
||||
BVHEmbree *instance_bvh = (BVHEmbree *)(ob->get_geometry()->bvh->get_device_bvh(device));
|
||||
assert(instance_bvh != NULL);
|
||||
|
||||
const size_t num_object_motion_steps = ob->use_motion() ? ob->get_motion().size() : 1;
|
||||
|
|
|
@ -26,6 +26,7 @@ CCL_NAMESPACE_BEGIN
|
|||
class Hair;
|
||||
class Mesh;
|
||||
class PointCloud;
|
||||
class Device;
|
||||
|
||||
class BVHEmbree : public BVH {
|
||||
public:
|
||||
|
@ -41,7 +42,8 @@ class BVHEmbree : public BVH {
|
|||
friend class BVH;
|
||||
BVHEmbree(const BVHParams ¶ms,
|
||||
const vector<Geometry *> &geometry,
|
||||
const vector<Object *> &objects);
|
||||
const vector<Object *> &objects,
|
||||
const Device *device);
|
||||
virtual ~BVHEmbree();
|
||||
|
||||
void add_object(Object *ob, int i);
|
||||
|
@ -60,6 +62,7 @@ class BVHEmbree : public BVH {
|
|||
RTCDevice rtc_device;
|
||||
bool rtc_device_is_sycl;
|
||||
enum RTCBuildQuality build_quality;
|
||||
const Device *device;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -2,29 +2,63 @@
|
|||
* Copyright 2020-2022 Blender Foundation. */
|
||||
|
||||
#include "bvh/multi.h"
|
||||
|
||||
#include "device/device.h"
|
||||
#include "util/foreach.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
BVHMulti::BVHMulti(const BVHParams ¶ms_,
|
||||
const vector<Geometry *> &geometry_,
|
||||
const vector<Object *> &objects_)
|
||||
: BVH(params_, geometry_, objects_)
|
||||
const vector<Object *> &objects_,
|
||||
const Device *device_)
|
||||
: BVH(params_, geometry_, objects_), device(device_)
|
||||
{
|
||||
// Resize the sub-bvh container to match the number of devices
|
||||
int n = device->get_num_devices();
|
||||
sub_bvhs.resize(n);
|
||||
}
|
||||
|
||||
BVHMulti::~BVHMulti()
|
||||
BVHMulti::~BVHMulti() { }
|
||||
|
||||
BVH *BVHMulti::get_device_bvh(const Device *subdevice)
|
||||
{
|
||||
foreach (BVH *bvh, sub_bvhs) {
|
||||
delete bvh;
|
||||
BVH *bvh = NULL;
|
||||
if (subdevice == device) {
|
||||
bvh = this;
|
||||
}
|
||||
else {
|
||||
int id = device->device_number(subdevice);
|
||||
assert(id != -1);
|
||||
if (id != -1) {
|
||||
resize_sub_bvhs_if_needed(id);
|
||||
bvh = sub_bvhs[id].get();
|
||||
}
|
||||
}
|
||||
return bvh;
|
||||
}
|
||||
|
||||
void BVHMulti::set_device_bvh(const Device *subdevice, BVH *bvh)
|
||||
{
|
||||
int id = device->device_number(subdevice);
|
||||
resize_sub_bvhs_if_needed(id);
|
||||
sub_bvhs[id] = unique_ptr<BVH>(bvh);
|
||||
};
|
||||
|
||||
/**
|
||||
* Resize the sub_bvh array if it is not big enough
|
||||
* to hold a device with the given id.
|
||||
*/
|
||||
void BVHMulti::resize_sub_bvhs_if_needed(int id)
|
||||
{
|
||||
if ((id != -1) && (id >= sub_bvhs.size())) {
|
||||
sub_bvhs.resize(id + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void BVHMulti::replace_geometry(const vector<Geometry *> &geometry,
|
||||
const vector<Object *> &objects)
|
||||
{
|
||||
foreach (BVH *bvh, sub_bvhs) {
|
||||
foreach (auto &bvh, sub_bvhs) {
|
||||
bvh->replace_geometry(geometry, objects);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,21 +7,30 @@
|
|||
#include "bvh/bvh.h"
|
||||
#include "bvh/params.h"
|
||||
|
||||
#include "util/vector.h"
|
||||
#include "util/unique_ptr.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class BVHMulti : public BVH {
|
||||
public:
|
||||
vector<BVH *> sub_bvhs;
|
||||
vector<unique_ptr<BVH>> sub_bvhs;
|
||||
|
||||
virtual BVH *get_device_bvh(const Device *device) override;
|
||||
virtual void set_device_bvh(const Device *sub_device, BVH *bvh) override;
|
||||
protected:
|
||||
friend class BVH;
|
||||
BVHMulti(const BVHParams ¶ms,
|
||||
const vector<Geometry *> &geometry,
|
||||
const vector<Object *> &objects);
|
||||
const vector<Object *> &objects,
|
||||
const Device *device);
|
||||
virtual ~BVHMulti();
|
||||
|
||||
const Device *device;
|
||||
void resize_sub_bvhs_if_needed(int id);
|
||||
|
||||
virtual void replace_geometry(const vector<Geometry *> &geometry,
|
||||
const vector<Object *> &objects);
|
||||
const vector<Object *> &objects) override;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -134,23 +134,27 @@ void CPUDevice::mem_alloc(device_memory &mem)
|
|||
}
|
||||
}
|
||||
|
||||
void CPUDevice::mem_copy_to(device_memory &mem)
|
||||
void CPUDevice::mem_copy_to(device_memory &mem, size_t /* size */, size_t /* offset */)
|
||||
{
|
||||
/* size (2n param) or offset are not used as this does not actually copy anything
|
||||
* as the original host memory is used as is. The device
|
||||
* memory is the same memory.
|
||||
*/
|
||||
if (mem.type == MEM_GLOBAL) {
|
||||
global_free(mem);
|
||||
global_alloc(mem);
|
||||
}
|
||||
else if (mem.type == MEM_TEXTURE) {
|
||||
tex_free((device_texture &)mem);
|
||||
tex_alloc((device_texture &)mem);
|
||||
}
|
||||
else {
|
||||
if (!mem.device_pointer) {
|
||||
mem_alloc(mem);
|
||||
}
|
||||
global_free(mem);
|
||||
global_alloc(mem);
|
||||
}
|
||||
else if (mem.type == MEM_TEXTURE) {
|
||||
tex_free((device_texture &)mem);
|
||||
tex_alloc((device_texture &)mem);
|
||||
}
|
||||
else {
|
||||
if (!mem.device_pointer) {
|
||||
mem_alloc(mem);
|
||||
}
|
||||
|
||||
/* copy is no-op */
|
||||
}
|
||||
/* copy is no-op */
|
||||
}
|
||||
}
|
||||
|
||||
void CPUDevice::mem_copy_from(
|
||||
|
@ -260,7 +264,7 @@ void CPUDevice::tex_free(device_texture &mem)
|
|||
}
|
||||
}
|
||||
|
||||
void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
void CPUDevice::build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit)
|
||||
{
|
||||
#ifdef WITH_EMBREE
|
||||
if (bvh->params.bvh_layout == BVH_LAYOUT_EMBREE ||
|
||||
|
@ -282,7 +286,7 @@ void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||
}
|
||||
else
|
||||
#endif
|
||||
Device::build_bvh(bvh, progress, refit);
|
||||
Device::build_bvh(bvh, dscene, progress, refit);
|
||||
}
|
||||
|
||||
void *CPUDevice::get_guiding_device() const
|
||||
|
|
|
@ -63,7 +63,7 @@ class CPUDevice : public Device {
|
|||
bool load_texture_info();
|
||||
|
||||
virtual void mem_alloc(device_memory &mem) override;
|
||||
virtual void mem_copy_to(device_memory &mem) override;
|
||||
virtual void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
|
||||
virtual void mem_copy_from(
|
||||
device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||
virtual void mem_zero(device_memory &mem) override;
|
||||
|
@ -80,7 +80,7 @@ class CPUDevice : public Device {
|
|||
void tex_alloc(device_texture &mem);
|
||||
void tex_free(device_texture &mem);
|
||||
|
||||
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||
void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) override;
|
||||
|
||||
void *get_guiding_device() const override;
|
||||
|
||||
|
@ -88,6 +88,7 @@ class CPUDevice : public Device {
|
|||
vector<CPUKernelThreadGlobals> &kernel_thread_globals) override;
|
||||
virtual void *get_cpu_osl_memory() override;
|
||||
|
||||
//virtual void upload_changed(vector<device_memory *> ) override {} ;
|
||||
protected:
|
||||
virtual bool load_kernels(uint /*kernel_features*/) override;
|
||||
};
|
||||
|
|
|
@ -544,13 +544,30 @@ void CUDADevice::transform_host_pointer(void *&device_pointer, void *&shared_poi
|
|||
cuda_assert(cuMemHostGetDevicePointer_v2((CUdeviceptr *)&device_pointer, shared_pointer, 0));
|
||||
}
|
||||
|
||||
void CUDADevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
|
||||
void CUDADevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size, size_t offset)
|
||||
{
|
||||
const CUDAContextScope scope(this);
|
||||
|
||||
cuda_assert(cuMemcpyHtoD((CUdeviceptr)device_pointer, host_pointer, size));
|
||||
cuda_assert(cuMemcpyHtoD(reinterpret_cast<CUdeviceptr>(device_pointer) + offset,
|
||||
reinterpret_cast<unsigned char *>(host_pointer) + offset,
|
||||
size));
|
||||
}
|
||||
|
||||
#ifdef USE_DEVICE_PINNED_MEMORY
|
||||
void *CUDADevice::host_mem_alloc(size_t size, int aligment) {
|
||||
void *p_mem = NULL;
|
||||
CUDAContextScope scope(this);
|
||||
cuda_assert(cuMemAllocHost(&p_mem, size));
|
||||
|
||||
return p_mem;
|
||||
}
|
||||
|
||||
void CUDADevice::host_mem_free(void *p_mem) {
|
||||
CUDAContextScope scope(this);
|
||||
cuMemFreeHost(p_mem);
|
||||
}
|
||||
#endif
|
||||
|
||||
void CUDADevice::mem_alloc(device_memory &mem)
|
||||
{
|
||||
if (mem.type == MEM_TEXTURE) {
|
||||
|
@ -564,7 +581,7 @@ void CUDADevice::mem_alloc(device_memory &mem)
|
|||
}
|
||||
}
|
||||
|
||||
void CUDADevice::mem_copy_to(device_memory &mem)
|
||||
void CUDADevice::mem_copy_to(device_memory &mem, size_t size, size_t offset)
|
||||
{
|
||||
if (mem.type == MEM_GLOBAL) {
|
||||
global_free(mem);
|
||||
|
@ -578,7 +595,7 @@ void CUDADevice::mem_copy_to(device_memory &mem)
|
|||
if (!mem.device_pointer) {
|
||||
generic_alloc(mem);
|
||||
}
|
||||
generic_copy_to(mem);
|
||||
generic_copy_to(mem, size, offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -69,11 +69,16 @@ class CUDADevice : public GPUDevice {
|
|||
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
|
||||
virtual void free_host(void *shared_pointer) override;
|
||||
virtual void transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
|
||||
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;
|
||||
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size, size_t offset) override;
|
||||
|
||||
#ifdef USE_DEVICE_PINNED_MEMORY
|
||||
void *host_mem_alloc(size_t size, int aligment) override;
|
||||
void host_mem_free(void *p_mem) override;
|
||||
#endif
|
||||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
void mem_copy_to(device_memory &mem) override;
|
||||
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
|
||||
|
||||
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||
|
||||
|
|
|
@ -48,16 +48,57 @@ uint Device::devices_initialized_mask = 0;
|
|||
|
||||
Device::~Device() noexcept(false) {}
|
||||
|
||||
void Device::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
void Device::build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit)
|
||||
{
|
||||
assert(bvh->params.bvh_layout == BVH_LAYOUT_BVH2);
|
||||
|
||||
BVH2 *const bvh2 = static_cast<BVH2 *>(bvh);
|
||||
if (refit) {
|
||||
bvh2->refit(progress);
|
||||
/* Top level BVH2 build must wait on all other BVH2 builds to finish
|
||||
otherwise the top level BVH2 will not have all the correct data
|
||||
*/
|
||||
if(bvh2->params.top_level) {
|
||||
/*
|
||||
This is used to make sure all workers have reached this point.
|
||||
device_init_update_bvh increments the counter.
|
||||
*/
|
||||
bvh2->building--;
|
||||
thread_scoped_lock build_wait_lock(bvh2->build_mutex);
|
||||
/* Wait for other BVH2 builds to complete before proceeding */
|
||||
VLOG_INFO << std::this_thread::get_id() << ":" << bvh2 << " Waiting on other BVH2 builds.";
|
||||
bvh2->build_cv.wait(build_wait_lock, [=]() { return (bvh2->building == 0); });
|
||||
VLOG_INFO << std::this_thread::get_id() << ":" << bvh2 << " Done waiting on other BVH2 builds";
|
||||
}
|
||||
thread_scoped_lock build_lock(bvh2->build_mutex, std::try_to_lock);
|
||||
if (build_lock) {
|
||||
/* Has the BVH already been built? */
|
||||
if (!bvh->built) {
|
||||
/* Build the BVH */
|
||||
VLOG_INFO << std::this_thread::get_id() << ":" << bvh2 << " Performing BVH2 build.";
|
||||
if (refit) {
|
||||
bvh2->refit(progress);
|
||||
}
|
||||
else {
|
||||
bvh2->build(progress, &stats);
|
||||
}
|
||||
bvh->built = true;
|
||||
VLOG_INFO << std::this_thread::get_id() << ":" << bvh2 << " Done building BVH2";
|
||||
}
|
||||
else {
|
||||
VLOG_INFO << std::this_thread::get_id() << ":" << bvh2 << " BVH2 Already built";
|
||||
}
|
||||
bvh2->build_cv.notify_all();
|
||||
}
|
||||
else {
|
||||
bvh2->build(progress, &stats);
|
||||
/* Only need to wait for the top level BVH otherwise
|
||||
this thread can skip on to the next object */
|
||||
if (bvh2->params.top_level) {
|
||||
thread_scoped_lock build_wait_lock(bvh2->build_mutex);
|
||||
/* wait for BVH build to complete before proceeding */
|
||||
VLOG_INFO << std::this_thread::get_id() << ":" << bvh2 << " Waiting on BVH2 build.";
|
||||
bvh2->build_cv.wait(build_wait_lock, [=]() { return (bvh2->built); });
|
||||
VLOG_INFO << std::this_thread::get_id() << ":" << bvh2 << " Done waiting on BVH2 build";
|
||||
} else {
|
||||
VLOG_INFO << std::this_thread::get_id() << ":" << bvh2 << " Skipping BVH2 build.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -459,6 +500,14 @@ void *Device::get_cpu_osl_memory()
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
void *Device::host_mem_alloc(size_t size, int alignment) {
|
||||
return util_aligned_malloc(size, alignment);
|
||||
}
|
||||
|
||||
void Device::host_mem_free(void *p_mem) {
|
||||
util_aligned_free(p_mem);
|
||||
}
|
||||
|
||||
GPUDevice::~GPUDevice() noexcept(false) {}
|
||||
|
||||
bool GPUDevice::load_texture_info()
|
||||
|
@ -596,6 +645,17 @@ void GPUDevice::move_textures_to_host(size_t size, bool for_texture)
|
|||
load_texture_info();
|
||||
}
|
||||
|
||||
void Device::upload_changed(vector<device_memory *> buffers)
|
||||
{
|
||||
for (const auto &buffer : buffers) {
|
||||
VLOG_INFO << "Checking " << buffer->name;
|
||||
if (buffer->modified && (buffer->data_size > 0)) {
|
||||
VLOG_INFO << "Uploading to " << buffer->name;
|
||||
this->mem_copy_to(*buffer, buffer->memory_size(), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GPUDevice::Mem *GPUDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
|
||||
{
|
||||
void *device_pointer = 0;
|
||||
|
@ -757,18 +817,19 @@ void GPUDevice::generic_free(device_memory &mem)
|
|||
}
|
||||
}
|
||||
|
||||
void GPUDevice::generic_copy_to(device_memory &mem)
|
||||
void GPUDevice::generic_copy_to(device_memory &mem, size_t size, size_t offset)
|
||||
{
|
||||
if (!mem.host_pointer || !mem.device_pointer) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
|
||||
* backend device allocation regardless of mem.host_pointer and mem.shared_pointer, and should
|
||||
* copy data from mem.host_pointer. */
|
||||
* cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
|
||||
* mem.host_pointer. */
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, mem.memory_size());
|
||||
size = ((size == -1) ? mem.memory_size() : size);
|
||||
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, size, offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "util/types.h"
|
||||
#include "util/unique_ptr.h"
|
||||
#include "util/vector.h"
|
||||
#include "util/set.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -30,6 +31,7 @@ class Progress;
|
|||
class CPUKernels;
|
||||
class CPUKernelThreadGlobals;
|
||||
class Scene;
|
||||
class DeviceScene;
|
||||
|
||||
/* Device Types */
|
||||
|
||||
|
@ -119,8 +121,9 @@ class DeviceInfo {
|
|||
|
||||
class Device {
|
||||
friend class device_sub_ptr;
|
||||
|
||||
protected:
|
||||
//thread_mutex device_buffer_mutex;
|
||||
//set<device_memory *> device_buffers;
|
||||
protected:
|
||||
Device(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
|
||||
: info(info_), stats(stats_), profiler(profiler_)
|
||||
{
|
||||
|
@ -140,6 +143,11 @@ class Device {
|
|||
/* noexcept needed to silence TBB warning. */
|
||||
virtual ~Device() noexcept(false);
|
||||
|
||||
virtual int get_num_devices() const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* info */
|
||||
DeviceInfo info;
|
||||
virtual const string &error_message()
|
||||
|
@ -160,6 +168,28 @@ class Device {
|
|||
}
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const = 0;
|
||||
|
||||
BVHLayout get_bvh_layout(Device *device, BVHLayout layout)
|
||||
{
|
||||
if (layout == BVH_LAYOUT_MULTI_OPTIX)
|
||||
layout = BVH_LAYOUT_OPTIX;
|
||||
else if (layout == BVH_LAYOUT_MULTI_METAL)
|
||||
layout = BVH_LAYOUT_METAL;
|
||||
else if (layout == BVH_LAYOUT_MULTI_HIPRT)
|
||||
layout = BVH_LAYOUT_HIPRT;
|
||||
else if (layout == BVH_LAYOUT_MULTI_EMBREEGPU)
|
||||
layout = BVH_LAYOUT_EMBREEGPU;
|
||||
else if (layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE)
|
||||
layout = device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX : BVH_LAYOUT_EMBREE;
|
||||
else if (layout == BVH_LAYOUT_MULTI_METAL_EMBREE)
|
||||
layout = device->info.type == DEVICE_METAL ? BVH_LAYOUT_METAL : BVH_LAYOUT_EMBREE;
|
||||
else if (layout == BVH_LAYOUT_MULTI_HIPRT_EMBREE)
|
||||
layout = device->info.type == DEVICE_HIPRT ? BVH_LAYOUT_HIPRT : BVH_LAYOUT_EMBREE;
|
||||
else if (layout == BVH_LAYOUT_MULTI_EMBREEGPU_EMBREE)
|
||||
layout = device->info.type == DEVICE_ONEAPI ? BVH_LAYOUT_EMBREEGPU :
|
||||
BVH_LAYOUT_EMBREE;
|
||||
return layout;
|
||||
}
|
||||
|
||||
/* statistics */
|
||||
Stats &stats;
|
||||
Profiler &profiler;
|
||||
|
@ -205,17 +235,23 @@ class Device {
|
|||
virtual void *get_cpu_osl_memory();
|
||||
|
||||
/* acceleration structure building */
|
||||
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit);
|
||||
virtual void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit);
|
||||
|
||||
/* OptiX specific destructor. */
|
||||
virtual void release_optix_bvh(BVH * /*bvh*/){};
|
||||
|
||||
/* multi device */
|
||||
virtual int device_number(Device * /*sub_device*/)
|
||||
virtual int device_number(const Device * /*sub_device*/) const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This gets the memory pointer for the given device given the virtual device pointer */
|
||||
virtual device_ptr find_matching_mem(device_ptr key, Device * /*sub*/)
|
||||
{
|
||||
return key;
|
||||
}
|
||||
|
||||
/* Called after kernel texture setup, and prior to integrator state setup. */
|
||||
virtual void optimize_for_scene(Scene * /*scene*/) {}
|
||||
|
||||
|
@ -281,14 +317,21 @@ class Device {
|
|||
|
||||
static void free_memory();
|
||||
|
||||
/*
|
||||
* Upload to the device any buffers that have changed
|
||||
*/
|
||||
virtual void upload_changed(vector<device_memory *> buffers);
|
||||
|
||||
protected:
|
||||
/* Memory allocation, only accessed through device_memory. */
|
||||
friend class MultiDevice;
|
||||
friend class DeviceServer;
|
||||
friend class device_memory;
|
||||
|
||||
virtual void *host_mem_alloc(size_t size, int alignment);
|
||||
virtual void host_mem_free(void *p_mem);
|
||||
virtual void mem_alloc(device_memory &mem) = 0;
|
||||
virtual void mem_copy_to(device_memory &mem) = 0;
|
||||
virtual void mem_copy_to(device_memory &mem, size_t size, size_t offset) = 0;
|
||||
virtual void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) = 0;
|
||||
virtual void mem_zero(device_memory &mem) = 0;
|
||||
virtual void mem_free(device_memory &mem) = 0;
|
||||
|
@ -303,7 +346,7 @@ class Device {
|
|||
static vector<DeviceInfo> hip_devices;
|
||||
static vector<DeviceInfo> metal_devices;
|
||||
static vector<DeviceInfo> oneapi_devices;
|
||||
static uint devices_initialized_mask;
|
||||
static uint devices_initialized_mask;
|
||||
};
|
||||
|
||||
/* Device, which is GPU, with some common functionality for GPU back-ends. */
|
||||
|
@ -334,7 +377,6 @@ class GPUDevice : public Device {
|
|||
/* Returns true if the texture info was copied to the device (meaning, some more
|
||||
* re-initialization might be needed). */
|
||||
virtual bool load_texture_info();
|
||||
|
||||
protected:
|
||||
/* Memory allocation, only accessed through device_memory. */
|
||||
friend class device_memory;
|
||||
|
@ -370,7 +412,7 @@ class GPUDevice : public Device {
|
|||
* support of device/host allocations. */
|
||||
virtual GPUDevice::Mem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
|
||||
virtual void generic_free(device_memory &mem);
|
||||
virtual void generic_copy_to(device_memory &mem);
|
||||
virtual void generic_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0);
|
||||
|
||||
/* total - amount of device memory, free - amount of available device memory */
|
||||
virtual void get_device_memory_info(size_t &total, size_t &free) = 0;
|
||||
|
@ -386,9 +428,9 @@ class GPUDevice : public Device {
|
|||
/* This function should return device pointer corresponding to shared pointer, which
|
||||
* is host buffer, allocated in `alloc_host`. The function should `true`, if such
|
||||
* address transformation is possible and `false` otherwise. */
|
||||
virtual void transform_host_pointer(void *&device_pointer, void *&shared_pointer) = 0;
|
||||
|
||||
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) = 0;
|
||||
virtual void transform_host_pointer(void *&device_pointer, void *&shared_pointer) = 0;
|
||||
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size, size_t offset) = 0;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -27,7 +27,7 @@ class DummyDevice : public Device {
|
|||
|
||||
virtual void mem_alloc(device_memory &) override {}
|
||||
|
||||
virtual void mem_copy_to(device_memory &) override {}
|
||||
virtual void mem_copy_to(device_memory &, size_t, size_t) override {}
|
||||
|
||||
virtual void mem_copy_from(device_memory &, size_t, size_t, size_t, size_t) override {}
|
||||
|
||||
|
@ -36,6 +36,7 @@ class DummyDevice : public Device {
|
|||
virtual void mem_free(device_memory &) override {}
|
||||
|
||||
virtual void const_copy_to(const char *, void *, size_t) override {}
|
||||
virtual void upload_changed(vector<device_memory *>) override {}
|
||||
};
|
||||
|
||||
Device *device_dummy_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
|
|
|
@ -506,11 +506,16 @@ void HIPDevice::transform_host_pointer(void *&device_pointer, void *&shared_poin
|
|||
hip_assert(hipHostGetDevicePointer((hipDeviceptr_t *)&device_pointer, shared_pointer, 0));
|
||||
}
|
||||
|
||||
void HIPDevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
|
||||
void HIPDevice::copy_host_to_device(void *device_pointer,
|
||||
void *host_pointer,
|
||||
size_t size,
|
||||
size_t offset)
|
||||
{
|
||||
const HIPContextScope scope(this);
|
||||
|
||||
hip_assert(hipMemcpyHtoD((hipDeviceptr_t)device_pointer, host_pointer, size));
|
||||
hip_assert(hipMemcpyHtoD((hipDeviceptr_t)device_pointer + offset,
|
||||
reinterpret_cast<unsigned char *>(host_pointer) + offset,
|
||||
size));
|
||||
}
|
||||
|
||||
void HIPDevice::mem_alloc(device_memory &mem)
|
||||
|
@ -526,11 +531,16 @@ void HIPDevice::mem_alloc(device_memory &mem)
|
|||
}
|
||||
}
|
||||
|
||||
void HIPDevice::mem_copy_to(device_memory &mem)
|
||||
void HIPDevice::mem_copy_to(device_memory &mem, size_t size, size_t offset)
|
||||
{
|
||||
if (mem.type == MEM_GLOBAL) {
|
||||
global_free(mem);
|
||||
global_alloc(mem);
|
||||
if ((mem.device_size != mem.memory_size()) || (!mem.device_pointer)) {
|
||||
global_free(mem);
|
||||
global_alloc(mem);
|
||||
}
|
||||
else {
|
||||
generic_copy_to(mem, size, offset);
|
||||
}
|
||||
}
|
||||
else if (mem.type == MEM_TEXTURE) {
|
||||
tex_free((device_texture &)mem);
|
||||
|
@ -540,7 +550,7 @@ void HIPDevice::mem_copy_to(device_memory &mem)
|
|||
if (!mem.device_pointer) {
|
||||
generic_alloc(mem);
|
||||
}
|
||||
generic_copy_to(mem);
|
||||
generic_copy_to(mem, size, offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -66,11 +66,11 @@ class HIPDevice : public GPUDevice {
|
|||
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
|
||||
virtual void free_host(void *shared_pointer) override;
|
||||
virtual void transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
|
||||
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;
|
||||
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size, size_t offset) override;
|
||||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
void mem_copy_to(device_memory &mem) override;
|
||||
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
|
||||
|
||||
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||
|
||||
|
|
|
@ -27,7 +27,8 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type)
|
|||
original_device_size(0),
|
||||
original_device(0),
|
||||
need_realloc_(false),
|
||||
modified(false)
|
||||
modified(false),
|
||||
shared_mem(false)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -43,7 +44,7 @@ void *device_memory::host_alloc(size_t size)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void *ptr = util_aligned_malloc(size, MIN_ALIGNMENT_CPU_DATA_TYPES);
|
||||
void *ptr = device->host_mem_alloc(size, MIN_ALIGNMENT_CPU_DATA_TYPES);
|
||||
|
||||
if (ptr) {
|
||||
util_guarded_mem_alloc(size);
|
||||
|
@ -57,10 +58,9 @@ void *device_memory::host_alloc(size_t size)
|
|||
|
||||
void device_memory::host_free()
|
||||
{
|
||||
if (host_pointer) {
|
||||
if ((host_pointer != 0) && !shared_mem) {
|
||||
util_guarded_mem_free(memory_size());
|
||||
util_aligned_free((void *)host_pointer);
|
||||
host_pointer = 0;
|
||||
device->host_mem_free(host_pointer);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -70,6 +70,11 @@ void device_memory::device_alloc()
|
|||
device->mem_alloc(*this);
|
||||
}
|
||||
|
||||
device_ptr device_memory::get_device_ptr(Device *dev) const
|
||||
{
|
||||
return device->find_matching_mem(device_pointer, dev);
|
||||
}
|
||||
|
||||
void device_memory::device_free()
|
||||
{
|
||||
if (device_pointer) {
|
||||
|
@ -77,10 +82,10 @@ void device_memory::device_free()
|
|||
}
|
||||
}
|
||||
|
||||
void device_memory::device_copy_to()
|
||||
void device_memory::device_copy_to(size_t size, size_t offset)
|
||||
{
|
||||
if (host_pointer) {
|
||||
device->mem_copy_to(*this);
|
||||
device->mem_copy_to(*this, memory_elements_size(size), memory_elements_size(offset));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -170,6 +170,12 @@ template<> struct device_type_traits<packed_float3> {
|
|||
static_assert(sizeof(packed_float3) == num_elements * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<packed_uint3> {
|
||||
static const DataType data_type = TYPE_UINT;
|
||||
static const size_t num_elements = 3;
|
||||
static_assert(sizeof(packed_float3) == num_elements * datatype_size(data_type));
|
||||
};
|
||||
|
||||
template<> struct device_type_traits<float4> {
|
||||
static const DataType data_type = TYPE_FLOAT;
|
||||
static const size_t num_elements = 4;
|
||||
|
@ -240,7 +246,8 @@ class device_memory {
|
|||
void *shared_pointer;
|
||||
/* reference counter for shared_pointer */
|
||||
int shared_counter;
|
||||
|
||||
bool modified;
|
||||
|
||||
virtual ~device_memory();
|
||||
|
||||
void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr);
|
||||
|
@ -248,6 +255,8 @@ class device_memory {
|
|||
|
||||
bool is_resident(Device *sub_device) const;
|
||||
|
||||
device_ptr get_device_ptr(Device *dev) const;
|
||||
|
||||
protected:
|
||||
friend class Device;
|
||||
friend class GPUDevice;
|
||||
|
@ -278,7 +287,7 @@ class device_memory {
|
|||
/* Device memory allocation and copying. */
|
||||
void device_alloc();
|
||||
void device_free();
|
||||
void device_copy_to();
|
||||
void device_copy_to(size_t size = -1, size_t offset = 0);
|
||||
void device_copy_from(size_t y, size_t w, size_t h, size_t elem);
|
||||
void device_zero();
|
||||
|
||||
|
@ -288,7 +297,7 @@ class device_memory {
|
|||
size_t original_device_size;
|
||||
Device *original_device;
|
||||
bool need_realloc_;
|
||||
bool modified;
|
||||
bool shared_mem;
|
||||
};
|
||||
|
||||
/* Device Only Memory
|
||||
|
@ -372,6 +381,49 @@ template<typename T> class device_vector : public device_memory {
|
|||
free();
|
||||
}
|
||||
|
||||
/* Host memory assignment. */
|
||||
T *assign_mem(const device_vector<T> &src)
|
||||
{
|
||||
return assign_mem(src.host_pointer, src.data_width, src.data_height, src.data_depth);
|
||||
}
|
||||
|
||||
T *assign_mem(const device_vector<T> *p_src)
|
||||
{
|
||||
return assign_mem(
|
||||
p_src->host_pointer, p_src->data_width, p_src->data_height, p_src->data_depth);
|
||||
}
|
||||
|
||||
T *assign_mem(array<T> &src)
|
||||
{
|
||||
return assign_mem(src.data(), src.size());
|
||||
}
|
||||
|
||||
T *assign_mem(array<T> *p_src)
|
||||
{
|
||||
return assign_mem(
|
||||
p_src->data(), p_src->size());
|
||||
}
|
||||
|
||||
T *assign_mem(void *p_mem, size_t width, size_t height = 0, size_t depth = 0)
|
||||
{
|
||||
size_t new_size = size(width, height, depth);
|
||||
|
||||
host_free();
|
||||
if (new_size > data_size) {
|
||||
device_free();
|
||||
modified = true;
|
||||
assert(device_pointer == 0);
|
||||
}
|
||||
host_pointer = p_mem;
|
||||
shared_mem = true;
|
||||
data_size = new_size;
|
||||
data_width = width;
|
||||
data_height = height;
|
||||
data_depth = depth;
|
||||
|
||||
return data();
|
||||
}
|
||||
|
||||
/* Host memory allocation. */
|
||||
T *alloc(size_t width, size_t height = 0, size_t depth = 0)
|
||||
{
|
||||
|
@ -513,20 +565,22 @@ template<typename T> class device_vector : public device_memory {
|
|||
return data()[i];
|
||||
}
|
||||
|
||||
void copy_to_device()
|
||||
void copy_to_device(size_t size = -1, size_t offset = 0)
|
||||
{
|
||||
if (data_size != 0) {
|
||||
device_copy_to();
|
||||
size = ((size == -1) ? data_size : size);
|
||||
if (data_size != 0) {
|
||||
assert((size + offset) <= data_size);
|
||||
device_copy_to(size, offset);
|
||||
}
|
||||
}
|
||||
|
||||
void copy_to_device_if_modified()
|
||||
void copy_to_device_if_modified(size_t size = -1, size_t offset = 0)
|
||||
{
|
||||
if (!modified) {
|
||||
return;
|
||||
}
|
||||
|
||||
copy_to_device();
|
||||
copy_to_device(size, offset);
|
||||
}
|
||||
|
||||
void clear_modified()
|
||||
|
|
|
@ -10,8 +10,10 @@
|
|||
|
||||
# include "util/progress.h"
|
||||
|
||||
# include "device/device.h"
|
||||
# include "device/metal/bvh.h"
|
||||
# include "device/metal/util.h"
|
||||
# include "device/device.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
|
|
@ -134,7 +134,7 @@ class MetalDevice : public Device {
|
|||
|
||||
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
|
||||
|
||||
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||
virtual void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) override;
|
||||
|
||||
virtual void optimize_for_scene(Scene *scene) override;
|
||||
|
||||
|
@ -147,13 +147,13 @@ class MetalDevice : public Device {
|
|||
|
||||
MetalMem *generic_alloc(device_memory &mem);
|
||||
|
||||
void generic_copy_to(device_memory &mem);
|
||||
void generic_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0);
|
||||
|
||||
void generic_free(device_memory &mem);
|
||||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
void mem_copy_to(device_memory &mem) override;
|
||||
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
|
||||
|
||||
void mem_copy_from(device_memory &mem)
|
||||
{
|
||||
|
|
|
@ -756,7 +756,8 @@ MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
|
|||
return mmem;
|
||||
}
|
||||
|
||||
void MetalDevice::generic_copy_to(device_memory &mem)
|
||||
void MetalDevice::generic_copy_to(device_memory &mem, size_t size,
|
||||
size_t offset)
|
||||
{
|
||||
if (!mem.host_pointer || !mem.device_pointer) {
|
||||
return;
|
||||
|
@ -764,10 +765,11 @@ void MetalDevice::generic_copy_to(device_memory &mem)
|
|||
|
||||
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
|
||||
if (!metal_mem_map.at(&mem)->use_UMA || mem.host_pointer != mem.shared_pointer) {
|
||||
size = ((size == -1) ? mem.memory_size() : size);
|
||||
MetalMem &mmem = *metal_mem_map.at(&mem);
|
||||
memcpy(mmem.hostPtr, mem.host_pointer, mem.memory_size());
|
||||
memcpy( reinterpret_cast<unsigned char *>(mmem.hostPtr) + offset, reinterpret_cast<unsigned char *>(mem.host_pointer) + offset, size);
|
||||
if (mmem.mtlBuffer.storageMode == MTLStorageModeManaged) {
|
||||
[mmem.mtlBuffer didModifyRange:NSMakeRange(0, mem.memory_size())];
|
||||
[mmem.mtlBuffer didModifyRange:NSMakeRange(offset, size)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -829,7 +831,7 @@ void MetalDevice::mem_alloc(device_memory &mem)
|
|||
}
|
||||
}
|
||||
|
||||
void MetalDevice::mem_copy_to(device_memory &mem)
|
||||
void MetalDevice::mem_copy_to(device_memory &mem, size_t size, size_t offset)
|
||||
{
|
||||
if (mem.type == MEM_GLOBAL) {
|
||||
global_free(mem);
|
||||
|
@ -843,7 +845,7 @@ void MetalDevice::mem_copy_to(device_memory &mem)
|
|||
if (!mem.device_pointer) {
|
||||
generic_alloc(mem);
|
||||
}
|
||||
generic_copy_to(mem);
|
||||
generic_copy_to(mem, size, offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1344,10 +1346,10 @@ void MetalDevice::flush_delayed_free_list()
|
|||
delayed_free_list.clear();
|
||||
}
|
||||
|
||||
void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
void MetalDevice::build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit)
|
||||
{
|
||||
if (bvh->params.bvh_layout == BVH_LAYOUT_BVH2) {
|
||||
Device::build_bvh(bvh, progress, refit);
|
||||
Device::build_bvh(bvh, dscene, progress, refit);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -805,7 +805,8 @@ void MetalDeviceQueue::copy_to_device(device_memory &mem)
|
|||
size:mmem.size];
|
||||
}
|
||||
else {
|
||||
metal_device_->mem_copy_to(mem);
|
||||
const size_t size = mem.memory_size();
|
||||
metal_device_->mem_copy_to(mem, size, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "util/log.h"
|
||||
#include "util/map.h"
|
||||
#include "util/time.h"
|
||||
#include "util/tbb.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -25,43 +26,87 @@ class MultiDevice : public Device {
|
|||
public:
|
||||
struct SubDevice {
|
||||
Stats stats;
|
||||
Device *device;
|
||||
unique_ptr<Device> device;
|
||||
map<device_ptr, device_ptr> ptr_map;
|
||||
int peer_island_index = -1;
|
||||
};
|
||||
|
||||
list<SubDevice> devices;
|
||||
class device_memory_clone : public device_texture {
|
||||
public:
|
||||
device_memory_clone(const device_memory &mem, Device *sub_device, device_ptr sub_device_pointer)
|
||||
: device_texture(sub_device, mem.name, 0, IMAGE_DATA_TYPE_FLOAT,INTERPOLATION_NONE,EXTENSION_REPEAT) //mem.type)
|
||||
{
|
||||
data_type = mem.data_type;
|
||||
data_elements = mem.data_elements;
|
||||
data_size = mem.data_size;
|
||||
device_size = mem.device_size;
|
||||
data_width = mem.data_width;
|
||||
data_height = mem.data_height;
|
||||
data_depth = mem.data_depth;
|
||||
type = mem.type;
|
||||
name = mem.name;
|
||||
|
||||
/* Pointers. */
|
||||
device = sub_device;
|
||||
device_pointer = sub_device_pointer;
|
||||
|
||||
host_pointer = mem.host_pointer;
|
||||
shared_pointer = mem.shared_pointer;
|
||||
/* reference counter for shared_pointer */
|
||||
shared_counter = mem.shared_counter;
|
||||
modified = mem.modified;
|
||||
|
||||
if(type == MEM_TEXTURE) {
|
||||
const device_texture *p_tex = static_cast<const device_texture *>(&mem);
|
||||
memcpy(&info, &(p_tex->info), sizeof(TextureInfo));
|
||||
slot = p_tex->slot;
|
||||
}
|
||||
}
|
||||
|
||||
~device_memory_clone() {
|
||||
// Don't free anything
|
||||
host_pointer = 0;
|
||||
device_pointer = 0;
|
||||
}
|
||||
};
|
||||
|
||||
/* Switch from list to a vector to make the parallel_for easily map to the integer id.
|
||||
Also id now could be used to access the real device pointer more quickly. Also, since
|
||||
the vector reallocates the memory on resize the sub-devices are stored as pointers. */
|
||||
vector<unique_ptr<SubDevice>> devices;
|
||||
device_ptr unique_key;
|
||||
vector<vector<SubDevice *>> peer_islands;
|
||||
|
||||
MultiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
: Device(info, stats, profiler), unique_key(1)
|
||||
{
|
||||
int cpu_device_idx = -1;
|
||||
foreach (const DeviceInfo &subinfo, info.multi_devices) {
|
||||
/* Always add CPU devices at the back since GPU devices can change
|
||||
* host memory pointers, which CPU uses as device pointer. */
|
||||
SubDevice *sub;
|
||||
unique_ptr<SubDevice> sub = make_unique<SubDevice>();
|
||||
if (subinfo.type == DEVICE_CPU) {
|
||||
devices.emplace_back();
|
||||
sub = &devices.back();
|
||||
assert(cpu_device_idx == -1);
|
||||
cpu_device_idx = devices.size();
|
||||
}
|
||||
else {
|
||||
devices.emplace_front();
|
||||
sub = &devices.front();
|
||||
}
|
||||
|
||||
/* The pointer to 'sub->stats' will stay valid even after new devices
|
||||
* are added, since 'devices' is a linked list. */
|
||||
sub->device = Device::create(subinfo, sub->stats, profiler);
|
||||
sub->device = std::unique_ptr<Device>(Device::create(subinfo, sub->stats, profiler));
|
||||
devices.emplace_back(std::move(sub));
|
||||
}
|
||||
|
||||
/* Swap the CPU device with the last device to ensure the CPU device is the last */
|
||||
{
|
||||
int last = devices.size() - 1;
|
||||
if ((cpu_device_idx != -1) && (cpu_device_idx != last)) {
|
||||
std::swap(devices[last], devices[cpu_device_idx]);
|
||||
}
|
||||
}
|
||||
/* Build a list of peer islands for the available render devices */
|
||||
foreach (SubDevice &sub, devices) {
|
||||
foreach (auto &sub, devices) {
|
||||
/* First ensure that every device is in at least once peer island */
|
||||
if (sub.peer_island_index < 0) {
|
||||
if (sub->peer_island_index < 0) {
|
||||
peer_islands.emplace_back();
|
||||
sub.peer_island_index = (int)peer_islands.size() - 1;
|
||||
peer_islands[sub.peer_island_index].push_back(&sub);
|
||||
sub->peer_island_index = (int)peer_islands.size() - 1;
|
||||
peer_islands[sub->peer_island_index].push_back(sub.get());
|
||||
}
|
||||
|
||||
if (!info.has_peer_memory) {
|
||||
|
@ -69,30 +114,31 @@ class MultiDevice : public Device {
|
|||
}
|
||||
|
||||
/* Second check peer access between devices and fill up the islands accordingly */
|
||||
foreach (SubDevice &peer_sub, devices) {
|
||||
if (peer_sub.peer_island_index < 0 &&
|
||||
peer_sub.device->info.type == sub.device->info.type &&
|
||||
peer_sub.device->check_peer_access(sub.device))
|
||||
{
|
||||
peer_sub.peer_island_index = sub.peer_island_index;
|
||||
peer_islands[sub.peer_island_index].push_back(&peer_sub);
|
||||
foreach (auto &peer_sub, devices) {
|
||||
if (peer_sub->peer_island_index < 0 &&
|
||||
peer_sub->device->info.type == sub->device->info.type &&
|
||||
peer_sub->device->check_peer_access(sub->device.get()))
|
||||
{
|
||||
peer_sub->peer_island_index = sub->peer_island_index;
|
||||
peer_islands[sub->peer_island_index].push_back(peer_sub.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
~MultiDevice()
|
||||
~MultiDevice() {}
|
||||
|
||||
int get_num_devices() const override
|
||||
{
|
||||
foreach (SubDevice &sub, devices)
|
||||
delete sub.device;
|
||||
return devices.size();
|
||||
}
|
||||
|
||||
const string &error_message() override
|
||||
{
|
||||
error_msg.clear();
|
||||
|
||||
foreach (SubDevice &sub, devices)
|
||||
error_msg += sub.device->error_message();
|
||||
foreach (auto &sub, devices)
|
||||
error_msg += sub->device->error_message();
|
||||
|
||||
return error_msg;
|
||||
}
|
||||
|
@ -101,8 +147,8 @@ class MultiDevice : public Device {
|
|||
{
|
||||
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
|
||||
BVHLayoutMask bvh_layout_mask_all = BVH_LAYOUT_NONE;
|
||||
foreach (const SubDevice &sub_device, devices) {
|
||||
BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask(
|
||||
foreach (const auto &sub_device, devices) {
|
||||
BVHLayoutMask device_bvh_layout_mask = sub_device->device->get_bvh_layout_mask(
|
||||
kernel_features);
|
||||
bvh_layout_mask &= device_bvh_layout_mask;
|
||||
bvh_layout_mask_all |= device_bvh_layout_mask;
|
||||
|
@ -146,8 +192,8 @@ class MultiDevice : public Device {
|
|||
|
||||
bool load_kernels(const uint kernel_features) override
|
||||
{
|
||||
foreach (SubDevice &sub, devices)
|
||||
if (!sub.device->load_kernels(kernel_features))
|
||||
foreach (auto &sub, devices)
|
||||
if (!sub->device->load_kernels(kernel_features))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
@ -155,18 +201,18 @@ class MultiDevice : public Device {
|
|||
|
||||
bool load_osl_kernels() override
|
||||
{
|
||||
foreach (SubDevice &sub, devices)
|
||||
if (!sub.device->load_osl_kernels())
|
||||
foreach (auto &sub, devices)
|
||||
if (!sub->device->load_osl_kernels())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void build_bvh(BVH *bvh, Progress &progress, bool refit) override
|
||||
void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) override
|
||||
{
|
||||
/* Try to build and share a single acceleration structure, if possible */
|
||||
if (bvh->params.bvh_layout == BVH_LAYOUT_BVH2 || bvh->params.bvh_layout == BVH_LAYOUT_EMBREE) {
|
||||
devices.back().device->build_bvh(bvh, progress, refit);
|
||||
devices.back()->device->build_bvh(bvh, dscene, progress, refit);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -182,94 +228,66 @@ class MultiDevice : public Device {
|
|||
BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh);
|
||||
bvh_multi->sub_bvhs.resize(devices.size());
|
||||
|
||||
vector<BVHMulti *> geom_bvhs;
|
||||
geom_bvhs.reserve(bvh->geometry.size());
|
||||
foreach (Geometry *geom, bvh->geometry) {
|
||||
geom_bvhs.push_back(static_cast<BVHMulti *>(geom->bvh));
|
||||
}
|
||||
|
||||
/* Broadcast acceleration structure build to all render devices */
|
||||
size_t i = 0;
|
||||
foreach (SubDevice &sub, devices) {
|
||||
/* Change geometry BVH pointers to the sub BVH */
|
||||
for (size_t k = 0; k < bvh->geometry.size(); ++k) {
|
||||
bvh->geometry[k]->bvh = geom_bvhs[k]->sub_bvhs[i];
|
||||
}
|
||||
parallel_for(
|
||||
size_t(0), devices.size(), [this, &bvh_multi, &dscene, refit, &progress](size_t id) {
|
||||
/* Pointer translation is removed as it is not thread safe. Instead a new method is added
|
||||
to retrieve the real device pointer. */
|
||||
auto &sub = devices[id];
|
||||
|
||||
if (!bvh_multi->sub_bvhs[i]) {
|
||||
BVHParams params = bvh->params;
|
||||
if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX)
|
||||
params.bvh_layout = BVH_LAYOUT_OPTIX;
|
||||
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL)
|
||||
params.bvh_layout = BVH_LAYOUT_METAL;
|
||||
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT)
|
||||
params.bvh_layout = BVH_LAYOUT_HIPRT;
|
||||
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_EMBREEGPU)
|
||||
params.bvh_layout = BVH_LAYOUT_EMBREEGPU;
|
||||
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE)
|
||||
params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
|
||||
BVH_LAYOUT_EMBREE;
|
||||
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE)
|
||||
params.bvh_layout = sub.device->info.type == DEVICE_METAL ? BVH_LAYOUT_METAL :
|
||||
BVH_LAYOUT_EMBREE;
|
||||
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT_EMBREE)
|
||||
params.bvh_layout = sub.device->info.type == DEVICE_HIPRT ? BVH_LAYOUT_HIPRT :
|
||||
BVH_LAYOUT_EMBREE;
|
||||
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_EMBREEGPU_EMBREE)
|
||||
params.bvh_layout = sub.device->info.type == DEVICE_ONEAPI ? BVH_LAYOUT_EMBREEGPU :
|
||||
BVH_LAYOUT_EMBREE;
|
||||
/* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
|
||||
* (since they are put into the top level directly, see bvh_embree.cpp) */
|
||||
if (!params.top_level && params.bvh_layout == BVH_LAYOUT_EMBREE &&
|
||||
!bvh->geometry[0]->is_instanced())
|
||||
{
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
if (!bvh_multi->sub_bvhs[id]) {
|
||||
BVHParams params = bvh_multi->params;
|
||||
params.bvh_layout = get_bvh_layout(sub->device.get(), bvh_multi->params.bvh_layout);
|
||||
|
||||
bvh_multi->sub_bvhs[i] = BVH::create(params, bvh->geometry, bvh->objects, sub.device);
|
||||
}
|
||||
|
||||
sub.device->build_bvh(bvh_multi->sub_bvhs[i], progress, refit);
|
||||
i++;
|
||||
}
|
||||
|
||||
/* Change geometry BVH pointers back to the multi BVH. */
|
||||
for (size_t k = 0; k < bvh->geometry.size(); ++k) {
|
||||
bvh->geometry[k]->bvh = geom_bvhs[k];
|
||||
}
|
||||
/* Skip building a bottom level acceleration structure for non-instanced geometry on
|
||||
* Embree (since they are put into the top level directly, see bvh_embree.cpp) */
|
||||
if (!params.top_level && params.bvh_layout == BVH_LAYOUT_EMBREE &&
|
||||
!bvh_multi->geometry[0]->is_instanced())
|
||||
{
|
||||
}
|
||||
else {
|
||||
bvh_multi->sub_bvhs[id] = std::unique_ptr<BVH>(BVH::create(
|
||||
params, bvh_multi->geometry, bvh_multi->objects, sub->device.get()));
|
||||
}
|
||||
}
|
||||
if (bvh_multi->sub_bvhs[id]) {
|
||||
sub->device->build_bvh(bvh_multi->sub_bvhs[id].get(), dscene, progress, refit);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
virtual void *get_cpu_osl_memory() override
|
||||
{
|
||||
/* Always return the OSL memory of the CPU device (this works since the constructor above
|
||||
* guarantees that CPU devices are always added to the back). */
|
||||
if (devices.size() > 1 && devices.back().device->info.type != DEVICE_CPU) {
|
||||
if (devices.size() > 1 && devices.back()->device->info.type != DEVICE_CPU) {
|
||||
return NULL;
|
||||
}
|
||||
return devices.back().device->get_cpu_osl_memory();
|
||||
|
||||
return devices.back()->device->get_cpu_osl_memory();
|
||||
}
|
||||
|
||||
bool is_resident(device_ptr key, Device *sub_device) override
|
||||
{
|
||||
foreach (SubDevice &sub, devices) {
|
||||
if (sub.device == sub_device) {
|
||||
return find_matching_mem_device(key, sub)->device == sub_device;
|
||||
foreach (auto &sub, devices) {
|
||||
if (sub->device.get() == sub_device) {
|
||||
return find_matching_mem_device(key, sub.get())->device.get() == sub_device;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
SubDevice *find_matching_mem_device(device_ptr key, SubDevice &sub)
|
||||
SubDevice *find_matching_mem_device(device_ptr key, SubDevice *sub)
|
||||
{
|
||||
assert(key != 0 && (sub.peer_island_index >= 0 || sub.ptr_map.find(key) != sub.ptr_map.end()));
|
||||
assert(key != 0 &&
|
||||
(sub->peer_island_index >= 0 || sub->ptr_map.find(key) != sub->ptr_map.end()));
|
||||
|
||||
/* Get the memory owner of this key (first try current device, then peer devices) */
|
||||
SubDevice *owner_sub = ⊂
|
||||
SubDevice *owner_sub = sub;
|
||||
if (owner_sub->ptr_map.find(key) == owner_sub->ptr_map.end()) {
|
||||
foreach (SubDevice *island_sub, peer_islands[sub.peer_island_index]) {
|
||||
foreach (SubDevice *island_sub, peer_islands[sub->peer_island_index]) {
|
||||
if (island_sub != owner_sub && island_sub->ptr_map.find(key) != island_sub->ptr_map.end())
|
||||
{
|
||||
{
|
||||
owner_sub = island_sub;
|
||||
}
|
||||
}
|
||||
|
@ -293,7 +311,18 @@ class MultiDevice : public Device {
|
|||
return owner_sub;
|
||||
}
|
||||
|
||||
inline device_ptr find_matching_mem(device_ptr key, SubDevice &sub)
|
||||
inline device_ptr find_matching_mem(device_ptr key, Device *dev) override
|
||||
{
|
||||
device_ptr ptr = 0;
|
||||
foreach (auto &sub, devices) {
|
||||
if (sub->device.get() == dev) {
|
||||
return find_matching_mem_device(key, sub.get())->ptr_map[key];
|
||||
}
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
inline device_ptr find_matching_mem(device_ptr key, SubDevice *sub)
|
||||
{
|
||||
return find_matching_mem_device(key, sub)->ptr_map[key];
|
||||
}
|
||||
|
@ -306,7 +335,7 @@ class MultiDevice : public Device {
|
|||
/* The remaining memory types can be distributed across devices */
|
||||
foreach (const vector<SubDevice *> &island, peer_islands) {
|
||||
SubDevice *owner_sub = find_suitable_mem_device(key, island);
|
||||
mem.device = owner_sub->device;
|
||||
mem.device = owner_sub->device.get();
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
|
||||
|
@ -319,7 +348,7 @@ class MultiDevice : public Device {
|
|||
stats.mem_alloc(mem.device_size);
|
||||
}
|
||||
|
||||
void mem_copy_to(device_memory &mem) override
|
||||
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override
|
||||
{
|
||||
device_ptr existing_key = mem.device_pointer;
|
||||
device_ptr key = (existing_key) ? existing_key : unique_key++;
|
||||
|
@ -327,26 +356,25 @@ class MultiDevice : public Device {
|
|||
|
||||
/* The tile buffers are allocated on each device (see below), so copy to all of them */
|
||||
foreach (const vector<SubDevice *> &island, peer_islands) {
|
||||
//parallel_for_each (peer_islands.begin(), peer_islands.end(), [&](const vector<SubDevice *> &island) {
|
||||
SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
|
||||
mem.device = owner_sub->device;
|
||||
mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
|
||||
mem.device_size = existing_size;
|
||||
|
||||
owner_sub->device->mem_copy_to(mem);
|
||||
owner_sub->ptr_map[key] = mem.device_pointer;
|
||||
Device *sub_device = owner_sub->device.get();
|
||||
device_ptr sub_device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
|
||||
device_memory_clone sub_mem(mem, sub_device, sub_device_pointer);
|
||||
owner_sub->device->mem_copy_to(sub_mem, size, offset);
|
||||
owner_sub->ptr_map[key] = sub_mem.device_pointer;
|
||||
|
||||
if (mem.type == MEM_GLOBAL || mem.type == MEM_TEXTURE) {
|
||||
/* Need to create texture objects and update pointer in kernel globals on all devices */
|
||||
foreach (SubDevice *island_sub, island) {
|
||||
if (island_sub != owner_sub) {
|
||||
island_sub->device->mem_copy_to(mem);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Need to create texture objects and update pointer in kernel globals on all devices */
|
||||
foreach (SubDevice *island_sub, island) {
|
||||
if (island_sub != owner_sub) {
|
||||
island_sub->device->mem_copy_to(mem, size, offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}//);
|
||||
|
||||
mem.device = this;
|
||||
mem.device_pointer = key;
|
||||
stats.mem_alloc(mem.device_size - existing_size);
|
||||
}
|
||||
|
||||
|
@ -355,12 +383,12 @@ class MultiDevice : public Device {
|
|||
device_ptr key = mem.device_pointer;
|
||||
size_t i = 0, sub_h = h / devices.size();
|
||||
|
||||
foreach (SubDevice &sub, devices) {
|
||||
foreach (auto &sub, devices) {
|
||||
size_t sy = y + i * sub_h;
|
||||
size_t sh = (i == (size_t)devices.size() - 1) ? h - sub_h * i : sub_h;
|
||||
|
||||
SubDevice *owner_sub = find_matching_mem_device(key, sub);
|
||||
mem.device = owner_sub->device;
|
||||
SubDevice *owner_sub = find_matching_mem_device(key, sub.get());
|
||||
mem.device = owner_sub->device.get();
|
||||
mem.device_pointer = owner_sub->ptr_map[key];
|
||||
|
||||
owner_sub->device->mem_copy_from(mem, sy, w, sh, elem);
|
||||
|
@ -379,7 +407,7 @@ class MultiDevice : public Device {
|
|||
|
||||
foreach (const vector<SubDevice *> &island, peer_islands) {
|
||||
SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
|
||||
mem.device = owner_sub->device;
|
||||
mem.device = owner_sub->device.get();
|
||||
mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
|
||||
mem.device_size = existing_size;
|
||||
|
||||
|
@ -395,46 +423,51 @@ class MultiDevice : public Device {
|
|||
void mem_free(device_memory &mem) override
|
||||
{
|
||||
device_ptr key = mem.device_pointer;
|
||||
size_t existing_size = mem.device_size;
|
||||
/* key is zero if the pointer is NULL */
|
||||
if (key != 0) {
|
||||
size_t existing_size = mem.device_size;
|
||||
|
||||
/* Free memory that was allocated for all devices (see above) on each device */
|
||||
foreach (const vector<SubDevice *> &island, peer_islands) {
|
||||
SubDevice *owner_sub = find_matching_mem_device(key, *island.front());
|
||||
mem.device = owner_sub->device;
|
||||
mem.device_pointer = owner_sub->ptr_map[key];
|
||||
mem.device_size = existing_size;
|
||||
/* Free memory that was allocated for all devices (see above) on each device */
|
||||
foreach (const vector<SubDevice *> &island, peer_islands) {
|
||||
SubDevice *owner_sub = find_matching_mem_device(key, island.front());
|
||||
mem.device = owner_sub->device.get();
|
||||
mem.device_pointer = owner_sub->ptr_map[key];
|
||||
mem.device_size = existing_size;
|
||||
owner_sub->device->mem_free(mem);
|
||||
owner_sub->ptr_map.erase(owner_sub->ptr_map.find(key));
|
||||
|
||||
owner_sub->device->mem_free(mem);
|
||||
owner_sub->ptr_map.erase(owner_sub->ptr_map.find(key));
|
||||
|
||||
if (mem.type == MEM_TEXTURE) {
|
||||
/* Free texture objects on all devices */
|
||||
foreach (SubDevice *island_sub, island) {
|
||||
if (island_sub != owner_sub) {
|
||||
island_sub->device->mem_free(mem);
|
||||
if (mem.type == MEM_TEXTURE) {
|
||||
/* Free texture objects on all devices */
|
||||
foreach (SubDevice *island_sub, island) {
|
||||
if (island_sub != owner_sub) {
|
||||
island_sub->device->mem_free(mem);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* restore the device */
|
||||
mem.device = this;
|
||||
|
||||
/* NULL the pointer and size and update the memory tracking */
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
stats.mem_free(existing_size);
|
||||
}
|
||||
|
||||
mem.device = this;
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
stats.mem_free(existing_size);
|
||||
}
|
||||
|
||||
|
||||
void const_copy_to(const char *name, void *host, size_t size) override
|
||||
{
|
||||
foreach (SubDevice &sub, devices)
|
||||
sub.device->const_copy_to(name, host, size);
|
||||
foreach (auto &sub, devices)
|
||||
sub->device->const_copy_to(name, host, size);
|
||||
}
|
||||
|
||||
int device_number(Device *sub_device) override
|
||||
int device_number(const Device *sub_device) const override
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
foreach (SubDevice &sub, devices) {
|
||||
if (sub.device == sub_device)
|
||||
for (const auto &sub : devices) {
|
||||
if (sub->device.get() == sub_device)
|
||||
return i;
|
||||
i++;
|
||||
}
|
||||
|
@ -444,10 +477,47 @@ class MultiDevice : public Device {
|
|||
|
||||
virtual void foreach_device(const function<void(Device *)> &callback) override
|
||||
{
|
||||
foreach (SubDevice &sub, devices) {
|
||||
sub.device->foreach_device(callback);
|
||||
foreach (auto &sub, devices) {
|
||||
sub->device->foreach_device(callback);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void upload_changed(vector<device_memory *> buffers) override
|
||||
{
|
||||
// foreach (const vector<SubDevice *> &island, peer_islands) {
|
||||
parallel_for(size_t(0), peer_islands.size(), [&](const size_t idx) {
|
||||
vector<SubDevice *> &island = peer_islands[idx];
|
||||
for (const device_memory *buffer : buffers) {
|
||||
VLOG_INFO << "Checking " << buffer->name << " on " << this;
|
||||
if (buffer->modified && buffer->data_size > 0) {
|
||||
device_ptr existing_key = buffer->device_pointer;
|
||||
device_ptr key = (existing_key) ? existing_key : unique_key++;
|
||||
size_t existing_size = buffer->device_size;
|
||||
|
||||
SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
|
||||
Device *sub_device = owner_sub->device.get();
|
||||
device_ptr sub_device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] :
|
||||
0;
|
||||
device_memory_clone sub_mem(*buffer, sub_device, sub_device_pointer);
|
||||
|
||||
VLOG_INFO << "Uploading to " << buffer->name;
|
||||
owner_sub->device->mem_copy_to(sub_mem, existing_size, 0);
|
||||
owner_sub->ptr_map[key] = sub_mem.device_pointer;
|
||||
|
||||
// if (sub_mem.type == MEM_GLOBAL || sub_mem.type == MEM_TEXTURE) {
|
||||
// /* Need to create texture objects and update pointer in kernel globals on all
|
||||
// * devices */
|
||||
// foreach (SubDevice *island_sub, island) {
|
||||
// if (island_sub != owner_sub) {
|
||||
// island_sub->device->mem_copy_to(sub_mem, existing_size, 0);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
stats.mem_alloc(sub_mem.device_size - existing_size);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
Device *device_multi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
|
|
|
@ -141,7 +141,7 @@ BVHLayoutMask OneapiDevice::get_bvh_layout_mask(uint requested_features) const
|
|||
}
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
void OneapiDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
void OneapiDevice::build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit)
|
||||
{
|
||||
if (embree_device && bvh->params.bvh_layout == BVH_LAYOUT_EMBREEGPU) {
|
||||
BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
|
||||
|
@ -156,7 +156,7 @@ void OneapiDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||
}
|
||||
}
|
||||
else {
|
||||
Device::build_bvh(bvh, progress, refit);
|
||||
Device::build_bvh(bvh, dscene, progress, refit);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
@ -239,17 +239,17 @@ void OneapiDevice::generic_alloc(device_memory &mem)
|
|||
stats.mem_alloc(memory_size);
|
||||
}
|
||||
|
||||
void OneapiDevice::generic_copy_to(device_memory &mem)
|
||||
void OneapiDevice::generic_copy_to(device_memory &mem, size_t size, size_t offset)
|
||||
{
|
||||
if (!mem.device_pointer) {
|
||||
return;
|
||||
}
|
||||
size_t memory_size = mem.memory_size();
|
||||
|
||||
/* Copy operation from host shouldn't be requested if there is no memory allocated on host. */
|
||||
assert(mem.host_pointer);
|
||||
assert(device_queue_);
|
||||
usm_memcpy(device_queue_, (void *)mem.device_pointer, (void *)mem.host_pointer, memory_size);
|
||||
size = ((size == -1) ? mem.memory_size() : size);
|
||||
usm_memcpy(device_queue_, reinterpret_cast<unsigned char *>(mem.device_pointer) + offset, reinterpret_cast<unsigned char *>(mem.host_pointer) + offset, size);
|
||||
}
|
||||
|
||||
/* TODO: Make sycl::queue part of OneapiQueue and avoid using pointers to sycl::queue. */
|
||||
|
@ -305,12 +305,12 @@ void OneapiDevice::mem_alloc(device_memory &mem)
|
|||
}
|
||||
}
|
||||
|
||||
void OneapiDevice::mem_copy_to(device_memory &mem)
|
||||
void OneapiDevice::mem_copy_to(device_memory &mem, size_t size, size_t offset)
|
||||
{
|
||||
if (mem.name) {
|
||||
VLOG_DEBUG << "OneapiDevice::mem_copy_to: \"" << mem.name << "\", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
<< string_human_readable_number(size) << " bytes. ("
|
||||
<< string_human_readable_size(size) << ")";
|
||||
}
|
||||
|
||||
/* After getting runtime errors we need to avoid performing oneAPI runtime operations
|
||||
|
@ -331,7 +331,7 @@ void OneapiDevice::mem_copy_to(device_memory &mem)
|
|||
if (!mem.device_pointer)
|
||||
mem_alloc(mem);
|
||||
|
||||
generic_copy_to(mem);
|
||||
generic_copy_to(mem, size, offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ class OneapiDevice : public Device {
|
|||
|
||||
virtual ~OneapiDevice();
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||
void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) override;
|
||||
# endif
|
||||
bool check_peer_access(Device *peer_device) override;
|
||||
|
||||
|
@ -56,7 +56,7 @@ class OneapiDevice : public Device {
|
|||
|
||||
void generic_alloc(device_memory &mem);
|
||||
|
||||
void generic_copy_to(device_memory &mem);
|
||||
void generic_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0);
|
||||
|
||||
void generic_free(device_memory &mem);
|
||||
|
||||
|
@ -68,7 +68,7 @@ class OneapiDevice : public Device {
|
|||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
void mem_copy_to(device_memory &mem) override;
|
||||
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
|
||||
|
||||
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||
|
||||
|
|
|
@ -123,7 +123,7 @@ void OneapiDeviceQueue::zero_to_device(device_memory &mem)
|
|||
|
||||
void OneapiDeviceQueue::copy_to_device(device_memory &mem)
|
||||
{
|
||||
oneapi_device_->mem_copy_to(mem);
|
||||
oneapi_device_->mem_copy_to(mem, mem.memory_size(),0);
|
||||
}
|
||||
|
||||
void OneapiDeviceQueue::copy_from_device(device_memory &mem)
|
||||
|
|
|
@ -97,6 +97,11 @@ OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
|||
|
||||
/* Allocate launch parameter buffer memory on device. */
|
||||
launch_params.alloc_to_device(1);
|
||||
|
||||
optixDeviceContextGetProperty(context,
|
||||
OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
|
||||
&max_num_instances,
|
||||
sizeof(max_num_instances));
|
||||
}
|
||||
|
||||
OptiXDevice::~OptiXDevice()
|
||||
|
@ -1014,9 +1019,7 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
|||
* otherwise happen on the same CUDA stream anyway. */
|
||||
static thread_mutex mutex;
|
||||
thread_scoped_lock lock(mutex);
|
||||
|
||||
const CUDAContextScope scope(this);
|
||||
|
||||
const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
|
||||
|
||||
/* Compute memory usage. */
|
||||
|
@ -1094,7 +1097,7 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
|||
if (use_fast_trace_bvh) {
|
||||
uint64_t compacted_size = sizes.outputSizeInBytes;
|
||||
cuda_assert(cuMemcpyDtoH(&compacted_size, compacted_size_prop.result, sizeof(compacted_size)));
|
||||
|
||||
|
||||
/* Temporary memory is no longer needed, so free it now to make space. */
|
||||
temp_mem.free();
|
||||
|
||||
|
@ -1125,7 +1128,7 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
|
|||
return !have_error();
|
||||
}
|
||||
|
||||
void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
void OptiXDevice::build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit)
|
||||
{
|
||||
const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
|
||||
|
||||
|
@ -1457,18 +1460,11 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||
}
|
||||
else {
|
||||
unsigned int num_instances = 0;
|
||||
unsigned int max_num_instances = 0xFFFFFFFF;
|
||||
|
||||
bvh_optix->as_data->free();
|
||||
bvh_optix->traversable_handle = 0;
|
||||
bvh_optix->motion_transform_data->free();
|
||||
|
||||
optixDeviceContextGetProperty(context,
|
||||
OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
|
||||
&max_num_instances,
|
||||
sizeof(max_num_instances));
|
||||
/* Do not count first bit, which is used to distinguish instanced and non-instanced objects. */
|
||||
max_num_instances >>= 1;
|
||||
if (bvh->objects.size() > max_num_instances) {
|
||||
progress.set_error(
|
||||
"Failed to build OptiX acceleration structure because there are too many instances");
|
||||
|
@ -1504,7 +1500,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|||
continue;
|
||||
}
|
||||
|
||||
BVHOptiX *const blas = static_cast<BVHOptiX *>(ob->get_geometry()->bvh);
|
||||
BVHOptiX *const blas = static_cast<BVHOptiX *>(ob->get_geometry()->bvh->get_device_bvh(this));
|
||||
OptixTraversableHandle handle = blas->traversable_handle;
|
||||
if (handle == 0) {
|
||||
continue;
|
||||
|
@ -1708,7 +1704,7 @@ void OptiXDevice::update_launch_params(size_t offset, void *data, size_t data_si
|
|||
{
|
||||
const CUDAContextScope scope(this);
|
||||
|
||||
cuda_assert(cuMemcpyHtoD(launch_params.device_pointer + offset, data, data_size));
|
||||
cuda_assert(cuMemcpyHtoD(launch_params.get_device_ptr(this) + offset, data, data_size));
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -86,6 +86,7 @@ class OptiXDevice : public CUDADevice {
|
|||
vector<unique_ptr<device_only_memory<char>>> delayed_free_bvh_memory;
|
||||
thread_mutex delayed_free_bvh_mutex;
|
||||
|
||||
unsigned int max_num_instances = 0xFFFFFFFF;
|
||||
public:
|
||||
OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||
~OptiXDevice();
|
||||
|
@ -103,7 +104,7 @@ class OptiXDevice : public CUDADevice {
|
|||
const OptixBuildInput &build_input,
|
||||
uint16_t num_motion_steps);
|
||||
|
||||
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||
void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) override;
|
||||
|
||||
void release_optix_bvh(BVH *bvh) override;
|
||||
void free_bvh_memory_delayed();
|
||||
|
|
|
@ -46,6 +46,8 @@
|
|||
|
||||
#include "kernel/util/color.h"
|
||||
|
||||
#include "device/device.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* RenderServices implementation */
|
||||
|
|
|
@ -1,10 +1,16 @@
|
|||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2011-2022 Blender Foundation */
|
||||
|
||||
#include "bvh/bvh2.h"
|
||||
|
||||
#include "scene/devicescene.h"
|
||||
#include "scene/scene.h"
|
||||
|
||||
#include "device/device.h"
|
||||
#include "device/memory.h"
|
||||
|
||||
#include "util/progress.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
DeviceScene::DeviceScene(Device *device)
|
||||
|
@ -59,6 +65,288 @@ DeviceScene::DeviceScene(Device *device)
|
|||
ies_lights(device, "ies", MEM_GLOBAL)
|
||||
{
|
||||
memset((void *)&data, 0, sizeof(data));
|
||||
|
||||
/* Set up the buffers to upload */
|
||||
/* Geometry buffers */
|
||||
geom_buffers.push_back(&tri_verts);
|
||||
geom_buffers.push_back(&tri_shader);
|
||||
geom_buffers.push_back(&tri_vnormal);
|
||||
geom_buffers.push_back(&tri_vindex);
|
||||
geom_buffers.push_back(&tri_patch);
|
||||
geom_buffers.push_back(&tri_patch_uv);
|
||||
|
||||
geom_buffers.push_back(&curve_keys);
|
||||
geom_buffers.push_back(&curves);
|
||||
geom_buffers.push_back(&curve_segments);
|
||||
|
||||
geom_buffers.push_back(&points);
|
||||
geom_buffers.push_back(&points_shader);
|
||||
|
||||
geom_buffers.push_back(&patches);
|
||||
|
||||
/* Attribute buffers */
|
||||
geom_buffers.push_back(&attributes_map);
|
||||
geom_buffers.push_back(&attributes_float);
|
||||
geom_buffers.push_back(&attributes_float2);
|
||||
geom_buffers.push_back(&attributes_float3);
|
||||
geom_buffers.push_back(&attributes_float4);
|
||||
geom_buffers.push_back(&attributes_uchar4);
|
||||
|
||||
geom_buffers.push_back(&objects);
|
||||
}
|
||||
|
||||
void DeviceScene::device_free_geometry(bool force_free)
|
||||
{
|
||||
bvh_nodes.free_if_need_realloc(force_free);
|
||||
bvh_leaf_nodes.free_if_need_realloc(force_free);
|
||||
object_node.free_if_need_realloc(force_free);
|
||||
prim_type.free_if_need_realloc(force_free);
|
||||
prim_visibility.free_if_need_realloc(force_free);
|
||||
prim_index.free_if_need_realloc(force_free);
|
||||
prim_object.free_if_need_realloc(force_free);
|
||||
prim_time.free_if_need_realloc(force_free);
|
||||
tri_verts.free_if_need_realloc(force_free);
|
||||
tri_shader.free_if_need_realloc(force_free);
|
||||
tri_vnormal.free_if_need_realloc(force_free);
|
||||
tri_vindex.free_if_need_realloc(force_free);
|
||||
tri_patch.free_if_need_realloc(force_free);
|
||||
tri_patch_uv.free_if_need_realloc(force_free);
|
||||
curves.free_if_need_realloc(force_free);
|
||||
curve_keys.free_if_need_realloc(force_free);
|
||||
curve_segments.free_if_need_realloc(force_free);
|
||||
points.free_if_need_realloc(force_free);
|
||||
points_shader.free_if_need_realloc(force_free);
|
||||
patches.free_if_need_realloc(force_free);
|
||||
attributes_map.free_if_need_realloc(force_free);
|
||||
attributes_float.free_if_need_realloc(force_free);
|
||||
attributes_float2.free_if_need_realloc(force_free);
|
||||
attributes_float3.free_if_need_realloc(force_free);
|
||||
attributes_float4.free_if_need_realloc(force_free);
|
||||
attributes_uchar4.free_if_need_realloc(force_free);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clears the modified tags for all elements of the device scene
|
||||
*/
|
||||
void DeviceScene::device_scene_clear_modified()
|
||||
{
|
||||
bvh_nodes.clear_modified();
|
||||
bvh_leaf_nodes.clear_modified();
|
||||
object_node.clear_modified();
|
||||
prim_type.clear_modified();
|
||||
prim_visibility.clear_modified();
|
||||
prim_index.clear_modified();
|
||||
prim_object.clear_modified();
|
||||
prim_time.clear_modified();
|
||||
tri_verts.clear_modified();
|
||||
tri_shader.clear_modified();
|
||||
tri_vindex.clear_modified();
|
||||
tri_patch.clear_modified();
|
||||
tri_vnormal.clear_modified();
|
||||
tri_patch_uv.clear_modified();
|
||||
curves.clear_modified();
|
||||
curve_keys.clear_modified();
|
||||
curve_segments.clear_modified();
|
||||
points.clear_modified();
|
||||
points_shader.clear_modified();
|
||||
patches.clear_modified();
|
||||
attributes_map.clear_modified();
|
||||
attributes_float.clear_modified();
|
||||
attributes_float2.clear_modified();
|
||||
attributes_float3.clear_modified();
|
||||
attributes_float4.clear_modified();
|
||||
attributes_uchar4.clear_modified();
|
||||
objects.clear_modified();
|
||||
attributes_map.clear_modified();
|
||||
}
|
||||
|
||||
void DeviceScene::device_update_host_pointers(Device *device,
|
||||
DeviceScene *dscene,
|
||||
const GeometrySizes *p_sizes)
|
||||
{
|
||||
if (dscene->tri_verts.size() > 0) {
|
||||
tri_verts.assign_mem(dscene->tri_verts);
|
||||
tri_verts.tag_modified();
|
||||
|
||||
if (dscene->tri_shader.is_modified()) {
|
||||
tri_shader.assign_mem(dscene->tri_shader);
|
||||
tri_shader.tag_modified();
|
||||
}
|
||||
|
||||
if (dscene->tri_vnormal.is_modified()) {
|
||||
tri_vnormal.assign_mem(dscene->tri_vnormal);
|
||||
tri_vnormal.tag_modified();
|
||||
}
|
||||
|
||||
if (dscene->tri_vindex.is_modified()) {
|
||||
tri_vindex.assign_mem(dscene->tri_vindex);
|
||||
tri_vindex.tag_modified();
|
||||
}
|
||||
|
||||
if (dscene->tri_patch.is_modified()) {
|
||||
tri_patch.assign_mem(dscene->tri_patch);
|
||||
tri_patch.tag_modified();
|
||||
}
|
||||
|
||||
if (dscene->tri_patch_uv.is_modified()) {
|
||||
tri_patch_uv.assign_mem(dscene->tri_patch_uv);
|
||||
tri_patch_uv.tag_modified();
|
||||
}
|
||||
}
|
||||
|
||||
if (dscene->curve_segments.size() > 0) {
|
||||
if (dscene->curve_keys.is_modified()) {
|
||||
curve_keys.assign_mem(dscene->curve_keys);
|
||||
curve_keys.tag_modified();
|
||||
}
|
||||
|
||||
if (dscene->curves.is_modified()) {
|
||||
curves.assign_mem(dscene->curves);
|
||||
curves.tag_modified();
|
||||
}
|
||||
|
||||
if (dscene->curve_segments.is_modified()) {
|
||||
curve_segments.assign_mem(dscene->curve_segments);
|
||||
curve_segments.tag_modified();
|
||||
}
|
||||
}
|
||||
|
||||
if (dscene->points.size() > 0) {
|
||||
points.assign_mem(dscene->points);
|
||||
points.tag_modified();
|
||||
|
||||
points_shader.assign_mem(dscene->points_shader);
|
||||
points_shader.tag_modified();
|
||||
}
|
||||
|
||||
if (dscene->patches.is_modified()) {
|
||||
patches.assign_mem(dscene->patches);
|
||||
patches.tag_modified();
|
||||
}
|
||||
|
||||
// Update the Attributes
|
||||
if (dscene->attributes_map.is_modified()) {
|
||||
attributes_map.assign_mem(dscene->attributes_map);
|
||||
attributes_map.tag_modified();
|
||||
}
|
||||
if (dscene->attributes_float.is_modified()) {
|
||||
attributes_float.assign_mem(dscene->attributes_float);
|
||||
attributes_float.tag_modified();
|
||||
}
|
||||
if (dscene->attributes_float2.is_modified()) {
|
||||
attributes_float2.assign_mem(dscene->attributes_float2);
|
||||
attributes_float2.tag_modified();
|
||||
}
|
||||
if (dscene->attributes_float3.is_modified()) {
|
||||
attributes_float3.assign_mem(dscene->attributes_float3);
|
||||
attributes_float3.tag_modified();
|
||||
}
|
||||
if (dscene->attributes_float4.is_modified()) {
|
||||
attributes_float4.assign_mem(dscene->attributes_float4);
|
||||
attributes_float4.tag_modified();
|
||||
}
|
||||
if (dscene->attributes_uchar4.is_modified()) {
|
||||
attributes_uchar4.assign_mem(dscene->attributes_uchar4);
|
||||
attributes_uchar4.tag_modified();
|
||||
}
|
||||
if (dscene->objects.is_modified()) {
|
||||
objects.assign_mem(dscene->objects);
|
||||
objects.tag_modified();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This copies the data to the devices if they have been modified
|
||||
*/
|
||||
void DeviceScene::device_update_mesh(Device *device,
|
||||
const GeometrySizes *p_sizes,
|
||||
Progress &progress)
|
||||
{
|
||||
progress.set_status("Updating Mesh", "Copying Mesh to device");
|
||||
if (tri_verts.size() > 0) {
|
||||
tri_verts.copy_to_device_if_modified(p_sizes->vert_size, 0);
|
||||
tri_shader.copy_to_device_if_modified(p_sizes->tri_size, 0);
|
||||
tri_vnormal.copy_to_device_if_modified(p_sizes->vert_size, 0);
|
||||
tri_vindex.copy_to_device_if_modified(p_sizes->tri_size, 0);
|
||||
tri_patch.copy_to_device_if_modified(p_sizes->tri_size, 0);
|
||||
tri_patch_uv.copy_to_device_if_modified(p_sizes->vert_size, 0);
|
||||
}
|
||||
|
||||
if (curve_segments.size() > 0) {
|
||||
curve_keys.copy_to_device_if_modified(p_sizes->curve_key_size, 0);
|
||||
curves.copy_to_device_if_modified(p_sizes->curve_size, 0);
|
||||
curve_segments.copy_to_device_if_modified(p_sizes->curve_segment_size, 0);
|
||||
}
|
||||
|
||||
if (points.size() > 0) {
|
||||
points.copy_to_device_if_modified(p_sizes->point_size, 0);
|
||||
points_shader.copy_to_device_if_modified(p_sizes->point_size, 0);
|
||||
}
|
||||
|
||||
patches.copy_to_device_if_modified(p_sizes->patch_size, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Copies the attribute buffer data to the devices
|
||||
*/
|
||||
void DeviceScene::device_update_attributes(Device *device,
|
||||
const AttributeSizes *sizes,
|
||||
Progress &progress)
|
||||
{
|
||||
progress.set_status("Updating Mesh", "Copying Attributes to device");
|
||||
/* copy svm attributes to device */
|
||||
attributes_map.copy_to_device_if_modified();
|
||||
attributes_float.copy_to_device_if_modified(sizes->attr_float_size, 0);
|
||||
attributes_float2.copy_to_device_if_modified(sizes->attr_float2_size, 0);
|
||||
attributes_float3.copy_to_device_if_modified(sizes->attr_float3_size, 0);
|
||||
attributes_float4.copy_to_device_if_modified(sizes->attr_float4_size, 0);
|
||||
attributes_uchar4.copy_to_device_if_modified(sizes->attr_uchar4_size, 0);
|
||||
objects.copy_to_device_if_modified();
|
||||
}
|
||||
|
||||
void DeviceScene::device_update_bvh2(Device *device,
|
||||
BVH *bvh,
|
||||
Progress &progress)
|
||||
{
|
||||
if (bvh->params.bvh_layout == BVH_LAYOUT_BVH2) {
|
||||
BVH2 *bvh2 = static_cast<BVH2 *>(bvh);
|
||||
data.bvh.root = bvh2->pack.root_index;
|
||||
/* When using BVH2, we always have to copy/update the data as its layout is dependent on
|
||||
* the BVH's leaf nodes which may be different when the objects or vertices move. */
|
||||
|
||||
if (bvh2->pack.nodes.size()) {
|
||||
bvh_nodes.assign_mem(bvh2->pack.nodes);
|
||||
bvh_nodes.copy_to_device();
|
||||
}
|
||||
if (bvh2->pack.leaf_nodes.size()) {
|
||||
bvh_leaf_nodes.assign_mem(bvh2->pack.leaf_nodes);
|
||||
bvh_leaf_nodes.copy_to_device();
|
||||
}
|
||||
if (bvh2->pack.object_node.size()) {
|
||||
object_node.assign_mem(bvh2->pack.object_node);
|
||||
object_node.copy_to_device();
|
||||
}
|
||||
if (bvh2->pack.prim_type.size()) {
|
||||
prim_type.assign_mem(bvh2->pack.prim_type);
|
||||
prim_type.copy_to_device();
|
||||
}
|
||||
if (bvh2->pack.prim_visibility.size()) {
|
||||
prim_visibility.assign_mem(bvh2->pack.prim_visibility);
|
||||
prim_visibility.copy_to_device();
|
||||
}
|
||||
if (bvh2->pack.prim_index.size()) {
|
||||
prim_index.assign_mem(bvh2->pack.prim_index);
|
||||
prim_index.copy_to_device();
|
||||
}
|
||||
if (bvh2->pack.prim_object.size()) {
|
||||
prim_object.assign_mem(bvh2->pack.prim_object);
|
||||
prim_object.copy_to_device();
|
||||
}
|
||||
if (bvh2->pack.prim_time.size()) {
|
||||
prim_time.assign_mem(bvh2->pack.prim_time);
|
||||
prim_time.copy_to_device();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -4,14 +4,23 @@
|
|||
#ifndef __DEVICESCENE_H__
|
||||
#define __DEVICESCENE_H__
|
||||
|
||||
#include "device/device.h"
|
||||
#include "device/memory.h"
|
||||
|
||||
#include "util/types.h"
|
||||
#include "util/vector.h"
|
||||
|
||||
#include "kernel/types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class BVH;
|
||||
class Device;
|
||||
class Progress;
|
||||
struct GeometrySizes;
|
||||
struct AttributeSizes;
|
||||
|
||||
/* Scene Device Data */
|
||||
|
||||
class DeviceScene {
|
||||
public:
|
||||
/* BVH */
|
||||
|
@ -91,9 +100,20 @@ class DeviceScene {
|
|||
/* IES lights */
|
||||
device_vector<float> ies_lights;
|
||||
|
||||
vector<device_memory *> geom_buffers;
|
||||
KernelData data;
|
||||
|
||||
DeviceScene(Device *device);
|
||||
|
||||
void device_free_geometry(bool force_free);
|
||||
void device_scene_clear_modified();
|
||||
void device_update_host_pointers(Device *device,
|
||||
DeviceScene *dscene,
|
||||
const GeometrySizes *p_sizes);
|
||||
void device_update_mesh(Device *device, const GeometrySizes *p_sizes, Progress &progress);
|
||||
|
||||
void device_update_attributes(Device *device, const AttributeSizes *sizes, Progress &progress);
|
||||
void device_update_bvh2(Device *device, BVH *bvh, Progress &progress);
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -25,9 +25,13 @@ class Mesh;
|
|||
class Progress;
|
||||
class RenderStats;
|
||||
class Scene;
|
||||
struct SceneTimes;
|
||||
struct GeometrySizes;
|
||||
struct AttributeSizes;
|
||||
class SceneParams;
|
||||
class Shader;
|
||||
class Volume;
|
||||
class Object;
|
||||
struct PackedBVH;
|
||||
|
||||
/* Set of flags used to help determining what data has been modified or needs reallocation, so we
|
||||
|
@ -102,6 +106,7 @@ class Geometry : public Node {
|
|||
BVH *bvh;
|
||||
size_t attr_map_offset;
|
||||
size_t prim_offset;
|
||||
size_t motion_key_offset;
|
||||
|
||||
/* Shader Properties */
|
||||
bool has_volume; /* Set in the device_update_flags(). */
|
||||
|
@ -138,6 +143,10 @@ class Geometry : public Node {
|
|||
int motion_step(float time) const;
|
||||
|
||||
/* BVH */
|
||||
bool create_new_bvh_if_needed(Object *object,
|
||||
Device *device,
|
||||
DeviceScene *dscene,
|
||||
SceneParams *params);
|
||||
void compute_bvh(Device *device,
|
||||
DeviceScene *dscene,
|
||||
SceneParams *params,
|
||||
|
@ -245,6 +254,37 @@ class GeometryManager {
|
|||
/* Statistics */
|
||||
void collect_statistics(const Scene *scene, RenderStats *stats);
|
||||
|
||||
size_t create_object_bvhs(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
const BVHLayout bvh_layout,
|
||||
bool &need_update_scene_bvh);
|
||||
void clear_shader_update_tags(Scene *scene);
|
||||
void clear_geometry_update_and_modified_tags(Scene *scene);
|
||||
void device_data_xfer_and_bvh_update(SceneTimes *times,
|
||||
Scene *scene,
|
||||
Device *device,
|
||||
DeviceScene *dscene,
|
||||
const BVHLayout bvh_layout,
|
||||
size_t num_bvh,
|
||||
bool can_refit,
|
||||
bool need_update_scene_bvh,
|
||||
Progress &progress);
|
||||
void update_object_bounds(Scene *scene);
|
||||
void tesselate(Scene *scene, size_t total_tess_needed, Progress &progress);
|
||||
void pretess_disp_normal_and_vertices_setup(Device *device,
|
||||
Scene *scene,
|
||||
bool &true_displacement_used,
|
||||
bool &curve_shadow_transparency_used,
|
||||
size_t &total_tess_needed);
|
||||
static void device_update_sub_bvh(Device *device,
|
||||
DeviceScene *dscene,
|
||||
BVH *bvh,
|
||||
BVH *sub_bvh,
|
||||
bool can_refit,
|
||||
size_t n,
|
||||
size_t total,
|
||||
Progress *progress);
|
||||
protected:
|
||||
bool displace(Device *device, Scene *scene, Mesh *mesh, Progress &progress);
|
||||
|
||||
|
@ -259,24 +299,58 @@ class GeometryManager {
|
|||
vector<AttributeRequestSet> &object_attributes);
|
||||
|
||||
/* Compute verts/triangles/curves offsets in global arrays. */
|
||||
void geom_calc_offset(Scene *scene, BVHLayout bvh_layout);
|
||||
void geom_calc_offset(Scene *scene);
|
||||
void attrib_calc_sizes(Scene *scene,
|
||||
vector<AttributeRequestSet> &geom_attributes,
|
||||
vector<AttributeRequestSet> &object_attributes,
|
||||
vector<AttributeSet> &object_attribute_values);
|
||||
|
||||
void device_update_object(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
|
||||
|
||||
void device_update_mesh(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
|
||||
void device_update_mesh_preprocess(
|
||||
Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
|
||||
bool displacement_and_curve_shadow_transparency(Scene *scene,
|
||||
Device *device,
|
||||
DeviceScene *dscene,
|
||||
vector<AttributeRequestSet> &geom_attributes,
|
||||
vector<AttributeRequestSet> &object_attributes,
|
||||
vector<AttributeSet> &object_attribute_values,
|
||||
Progress &progress);
|
||||
|
||||
void device_update_attributes(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
Progress &progress);
|
||||
|
||||
void device_update_bvh(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
|
||||
void gather_attributes(Scene *scene,
|
||||
vector<AttributeRequestSet> &geom_attributes,
|
||||
vector<AttributeRequestSet> &object_attributes,
|
||||
vector<AttributeSet> &object_attribute_values);
|
||||
bool device_update_attributes_preprocess(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
vector<AttributeRequestSet> &geom_attributes,
|
||||
vector<AttributeRequestSet> &object_attributes,
|
||||
vector<AttributeSet> &object_attribute_values,
|
||||
Progress &progress);
|
||||
bool device_update_bvh_preprocess(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
Progress &progress);
|
||||
void device_init_update_bvh(Scene *scene);
|
||||
void device_update_bvh(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
bool can_refit,
|
||||
size_t n,
|
||||
size_t total,
|
||||
Progress &progress);
|
||||
void device_update_bvh_postprocess(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
Progress &progress);
|
||||
|
||||
void device_update_displacement_images(Device *device, Scene *scene, Progress &progress);
|
||||
|
||||
void device_update_volume_images(Device *device, Scene *scene, Progress &progress);
|
||||
|
||||
private:
|
||||
vector<Object> object_pool;
|
||||
static void update_attribute_element_offset(Geometry *geom,
|
||||
device_vector<float> &attr_float,
|
||||
size_t &attr_float_offset,
|
||||
|
|
|
@ -253,9 +253,14 @@ void GeometryManager::update_svm_attributes(Device *,
|
|||
}
|
||||
|
||||
/* copy to device */
|
||||
dscene->attributes_map.copy_to_device();
|
||||
/* Copy moved to device_update_attributes */
|
||||
dscene->attributes_map.tag_modified();
|
||||
}
|
||||
|
||||
/*
|
||||
* Copies the attribute data into the buffers and records
|
||||
* the offsets
|
||||
*/
|
||||
void GeometryManager::update_attribute_element_offset(Geometry *geom,
|
||||
device_vector<float> &attr_float,
|
||||
size_t &attr_float_offset,
|
||||
|
@ -358,6 +363,13 @@ void GeometryManager::update_attribute_element_offset(Geometry *geom,
|
|||
float3 *data = mattr->data_float3();
|
||||
offset = attr_float3_offset;
|
||||
|
||||
// Records where the motion vertices are in the attribute array
|
||||
// so that they can be used later to reference the data when building
|
||||
// the BVHs.
|
||||
if (mattr->std == ATTR_STD_MOTION_VERTEX_POSITION) {
|
||||
geom->motion_key_offset = offset;
|
||||
}
|
||||
|
||||
assert(attr_float3.size() >= offset + size);
|
||||
if (mattr->modified) {
|
||||
for (size_t k = 0; k < size; k++) {
|
||||
|
@ -417,6 +429,9 @@ void GeometryManager::update_attribute_element_offset(Geometry *geom,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Records the sizes of the attribute buffers
|
||||
*/
|
||||
static void update_attribute_element_size(Geometry *geom,
|
||||
Attribute *mattr,
|
||||
AttributePrimitive prim,
|
||||
|
@ -453,132 +468,33 @@ static void update_attribute_element_size(Geometry *geom,
|
|||
}
|
||||
}
|
||||
|
||||
void GeometryManager::device_update_attributes(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
Progress &progress)
|
||||
/*
|
||||
* Packs the attribute buffers and records the sizes and offsets using
|
||||
* the attribute sets
|
||||
*/
|
||||
bool GeometryManager::device_update_attributes_preprocess(
|
||||
Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
vector<AttributeRequestSet> &geom_attributes,
|
||||
vector<AttributeRequestSet> &object_attributes,
|
||||
vector<AttributeSet> &object_attribute_values,
|
||||
Progress &progress)
|
||||
{
|
||||
bool update_obj_offsets = false;
|
||||
|
||||
progress.set_status("Updating Mesh", "Computing attributes");
|
||||
|
||||
/* gather per mesh requested attributes. as meshes may have multiple
|
||||
* shaders assigned, this merges the requested attributes that have
|
||||
* been set per shader by the shader manager */
|
||||
vector<AttributeRequestSet> geom_attributes(scene->geometry.size());
|
||||
|
||||
for (size_t i = 0; i < scene->geometry.size(); i++) {
|
||||
Geometry *geom = scene->geometry[i];
|
||||
|
||||
geom->index = i;
|
||||
scene->need_global_attributes(geom_attributes[i]);
|
||||
|
||||
foreach (Node *node, geom->get_used_shaders()) {
|
||||
Shader *shader = static_cast<Shader *>(node);
|
||||
geom_attributes[i].add(shader->attributes);
|
||||
}
|
||||
|
||||
if (geom->is_hair() && static_cast<Hair *>(geom)->need_shadow_transparency()) {
|
||||
geom_attributes[i].add(ATTR_STD_SHADOW_TRANSPARENCY);
|
||||
}
|
||||
// SHOULD NOT ALLOC ONLY ALLOC IF MORE SPACE IS NEEDED
|
||||
{
|
||||
AttributeSizes *sizes = &(scene->attrib_sizes);
|
||||
dscene->attributes_float.alloc(sizes->attr_float_size);
|
||||
dscene->attributes_float2.alloc(sizes->attr_float2_size);
|
||||
dscene->attributes_float3.alloc(sizes->attr_float3_size);
|
||||
dscene->attributes_float4.alloc(sizes->attr_float4_size);
|
||||
dscene->attributes_uchar4.alloc(sizes->attr_uchar4_size);
|
||||
}
|
||||
|
||||
/* convert object attributes to use the same data structures as geometry ones */
|
||||
vector<AttributeRequestSet> object_attributes(scene->objects.size());
|
||||
vector<AttributeSet> object_attribute_values;
|
||||
|
||||
object_attribute_values.reserve(scene->objects.size());
|
||||
|
||||
for (size_t i = 0; i < scene->objects.size(); i++) {
|
||||
Object *object = scene->objects[i];
|
||||
Geometry *geom = object->geometry;
|
||||
size_t geom_idx = geom->index;
|
||||
|
||||
assert(geom_idx < scene->geometry.size() && scene->geometry[geom_idx] == geom);
|
||||
|
||||
object_attribute_values.push_back(AttributeSet(geom, ATTR_PRIM_GEOMETRY));
|
||||
|
||||
AttributeRequestSet &geom_requests = geom_attributes[geom_idx];
|
||||
AttributeRequestSet &attributes = object_attributes[i];
|
||||
AttributeSet &values = object_attribute_values[i];
|
||||
|
||||
for (size_t j = 0; j < object->attributes.size(); j++) {
|
||||
ParamValue ¶m = object->attributes[j];
|
||||
|
||||
/* add attributes that are requested and not already handled by the mesh */
|
||||
if (geom_requests.find(param.name()) && !geom->attributes.find(param.name())) {
|
||||
attributes.add(param.name());
|
||||
|
||||
Attribute *attr = values.add(param.name(), param.type(), ATTR_ELEMENT_OBJECT);
|
||||
assert(param.datasize() == attr->buffer.size());
|
||||
memcpy(attr->buffer.data(), param.data(), param.datasize());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* mesh attribute are stored in a single array per data type. here we fill
|
||||
* those arrays, and set the offset and element type to create attribute
|
||||
* maps next */
|
||||
|
||||
/* Pre-allocate attributes to avoid arrays re-allocation which would
|
||||
* take 2x of overall attribute memory usage.
|
||||
*/
|
||||
size_t attr_float_size = 0;
|
||||
size_t attr_float2_size = 0;
|
||||
size_t attr_float3_size = 0;
|
||||
size_t attr_float4_size = 0;
|
||||
size_t attr_uchar4_size = 0;
|
||||
|
||||
for (size_t i = 0; i < scene->geometry.size(); i++) {
|
||||
Geometry *geom = scene->geometry[i];
|
||||
AttributeRequestSet &attributes = geom_attributes[i];
|
||||
foreach (AttributeRequest &req, attributes.requests) {
|
||||
Attribute *attr = geom->attributes.find(req);
|
||||
|
||||
update_attribute_element_size(geom,
|
||||
attr,
|
||||
ATTR_PRIM_GEOMETRY,
|
||||
&attr_float_size,
|
||||
&attr_float2_size,
|
||||
&attr_float3_size,
|
||||
&attr_float4_size,
|
||||
&attr_uchar4_size);
|
||||
|
||||
if (geom->is_mesh()) {
|
||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||
Attribute *subd_attr = mesh->subd_attributes.find(req);
|
||||
|
||||
update_attribute_element_size(mesh,
|
||||
subd_attr,
|
||||
ATTR_PRIM_SUBD,
|
||||
&attr_float_size,
|
||||
&attr_float2_size,
|
||||
&attr_float3_size,
|
||||
&attr_float4_size,
|
||||
&attr_uchar4_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < scene->objects.size(); i++) {
|
||||
Object *object = scene->objects[i];
|
||||
|
||||
foreach (Attribute &attr, object_attribute_values[i].attributes) {
|
||||
update_attribute_element_size(object->geometry,
|
||||
&attr,
|
||||
ATTR_PRIM_GEOMETRY,
|
||||
&attr_float_size,
|
||||
&attr_float2_size,
|
||||
&attr_float3_size,
|
||||
&attr_float4_size,
|
||||
&attr_uchar4_size);
|
||||
}
|
||||
}
|
||||
|
||||
dscene->attributes_float.alloc(attr_float_size);
|
||||
dscene->attributes_float2.alloc(attr_float2_size);
|
||||
dscene->attributes_float3.alloc(attr_float3_size);
|
||||
dscene->attributes_float4.alloc(attr_float4_size);
|
||||
dscene->attributes_uchar4.alloc(attr_uchar4_size);
|
||||
|
||||
/* The order of those flags needs to match that of AttrKernelDataType. */
|
||||
const bool attributes_need_realloc[AttrKernelDataType::NUM] = {
|
||||
dscene->attributes_float.need_realloc(),
|
||||
|
@ -650,9 +566,6 @@ void GeometryManager::device_update_attributes(Device *device,
|
|||
req.subd_type,
|
||||
req.subd_desc);
|
||||
}
|
||||
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -687,9 +600,6 @@ void GeometryManager::device_update_attributes(Device *device,
|
|||
/* object attributes don't care about subdivision */
|
||||
req.subd_type = req.type;
|
||||
req.subd_desc = req.desc;
|
||||
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -699,24 +609,141 @@ void GeometryManager::device_update_attributes(Device *device,
|
|||
|
||||
update_svm_attributes(device, dscene, scene, geom_attributes, object_attributes);
|
||||
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
update_obj_offsets = scene->object_manager->device_update_geom_offsets(device, dscene, scene);
|
||||
|
||||
/* copy to device */
|
||||
progress.set_status("Updating Mesh", "Copying Attributes to device");
|
||||
return update_obj_offsets;
|
||||
}
|
||||
|
||||
dscene->attributes_float.copy_to_device_if_modified();
|
||||
dscene->attributes_float2.copy_to_device_if_modified();
|
||||
dscene->attributes_float3.copy_to_device_if_modified();
|
||||
dscene->attributes_float4.copy_to_device_if_modified();
|
||||
dscene->attributes_uchar4.copy_to_device_if_modified();
|
||||
/*
|
||||
* Records all the attribute buffer sizes for all the attribute buffers for later use
|
||||
*/
|
||||
void GeometryManager::attrib_calc_sizes(Scene *scene,
|
||||
vector<AttributeRequestSet> &geom_attributes,
|
||||
vector<AttributeRequestSet> &object_attributes,
|
||||
vector<AttributeSet> &object_attribute_values)
|
||||
{
|
||||
AttributeSizes *p_sizes = &(scene->attrib_sizes);
|
||||
p_sizes->attr_float_size = 0;
|
||||
p_sizes->attr_float2_size = 0;
|
||||
p_sizes->attr_float3_size = 0;
|
||||
p_sizes->attr_float4_size = 0;
|
||||
p_sizes->attr_uchar4_size = 0;
|
||||
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
for (size_t i = 0; i < scene->geometry.size(); i++) {
|
||||
Geometry *geom = scene->geometry[i];
|
||||
AttributeRequestSet &attributes = geom_attributes[i];
|
||||
foreach (AttributeRequest &req, attributes.requests) {
|
||||
Attribute *attr = geom->attributes.find(req);
|
||||
|
||||
/* After mesh attributes and patch tables have been copied to device memory,
|
||||
* we need to update offsets in the objects. */
|
||||
scene->object_manager->device_update_geom_offsets(device, dscene, scene);
|
||||
update_attribute_element_size(geom,
|
||||
attr,
|
||||
ATTR_PRIM_GEOMETRY,
|
||||
&(p_sizes->attr_float_size),
|
||||
&(p_sizes->attr_float2_size),
|
||||
&(p_sizes->attr_float3_size),
|
||||
&(p_sizes->attr_float4_size),
|
||||
&(p_sizes->attr_uchar4_size));
|
||||
|
||||
if (geom->is_mesh()) {
|
||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||
Attribute *subd_attr = mesh->subd_attributes.find(req);
|
||||
|
||||
update_attribute_element_size(mesh,
|
||||
subd_attr,
|
||||
ATTR_PRIM_SUBD,
|
||||
&(p_sizes->attr_float_size),
|
||||
&(p_sizes->attr_float2_size),
|
||||
&(p_sizes->attr_float3_size),
|
||||
&(p_sizes->attr_float4_size),
|
||||
&(p_sizes->attr_uchar4_size));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < scene->objects.size(); i++) {
|
||||
Object *object = scene->objects[i];
|
||||
|
||||
foreach (Attribute &attr, object_attribute_values[i].attributes) {
|
||||
update_attribute_element_size(object->geometry,
|
||||
&attr,
|
||||
ATTR_PRIM_GEOMETRY,
|
||||
&(p_sizes->attr_float_size),
|
||||
&(p_sizes->attr_float2_size),
|
||||
&(p_sizes->attr_float3_size),
|
||||
&(p_sizes->attr_float4_size),
|
||||
&(p_sizes->attr_uchar4_size));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Records the set of attributes used by the objects
|
||||
*/
|
||||
void GeometryManager::gather_attributes(Scene *scene,
|
||||
vector<AttributeRequestSet> &geom_attributes,
|
||||
vector<AttributeRequestSet> &object_attributes,
|
||||
vector<AttributeSet> &object_attribute_values)
|
||||
{
|
||||
geom_attributes.clear();
|
||||
object_attributes.clear();
|
||||
object_attribute_values.clear();
|
||||
|
||||
/* gather per mesh requested attributes. as meshes may have multiple
|
||||
* shaders assigned, this merges the requested attributes that have
|
||||
* been set per shader by the shader manager */
|
||||
geom_attributes.resize(scene->geometry.size());
|
||||
|
||||
for (size_t i = 0; i < scene->geometry.size(); i++) {
|
||||
Geometry *geom = scene->geometry[i];
|
||||
|
||||
geom->index = i;
|
||||
scene->need_global_attributes(geom_attributes[i]);
|
||||
|
||||
foreach (Node *node, geom->get_used_shaders()) {
|
||||
Shader *shader = static_cast<Shader *>(node);
|
||||
geom_attributes[i].add(shader->attributes);
|
||||
}
|
||||
|
||||
if (geom->is_hair() && static_cast<Hair *>(geom)->need_shadow_transparency()) {
|
||||
geom_attributes[i].add(ATTR_STD_SHADOW_TRANSPARENCY);
|
||||
}
|
||||
}
|
||||
|
||||
/* convert object attributes to use the same data structures as geometry ones */
|
||||
object_attributes.resize(scene->objects.size());
|
||||
object_attribute_values.reserve(scene->objects.size());
|
||||
|
||||
for (size_t i = 0; i < scene->objects.size(); i++) {
|
||||
Object *object = scene->objects[i];
|
||||
Geometry *geom = object->geometry;
|
||||
size_t geom_idx = geom->index;
|
||||
|
||||
assert(geom_idx < scene->geometry.size() && scene->geometry[geom_idx] == geom);
|
||||
|
||||
object_attribute_values.push_back(AttributeSet(geom, ATTR_PRIM_GEOMETRY));
|
||||
|
||||
AttributeRequestSet &geom_requests = geom_attributes[geom_idx];
|
||||
AttributeRequestSet &attributes = object_attributes[i];
|
||||
AttributeSet &values = object_attribute_values[i];
|
||||
|
||||
for (size_t j = 0; j < object->attributes.size(); j++) {
|
||||
ParamValue ¶m = object->attributes[j];
|
||||
|
||||
/* add attributes that are requested and not already handled by the mesh */
|
||||
if (geom_requests.find(param.name()) && !geom->attributes.find(param.name())) {
|
||||
attributes.add(param.name());
|
||||
|
||||
Attribute *attr = values.add(param.name(), param.type(), ATTR_ELEMENT_OBJECT);
|
||||
assert(param.datasize() == attr->buffer.size());
|
||||
memcpy(attr->buffer.data(), param.data(), param.datasize());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Geometry attributes are stored in a single array per data type. Here determine the
|
||||
* sizes of those buffers.
|
||||
*/
|
||||
attrib_calc_sizes(scene, geom_attributes, object_attributes, object_attribute_values);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -42,40 +42,90 @@ void Geometry::compute_bvh(Device *device,
|
|||
if (progress->get_cancel())
|
||||
return;
|
||||
|
||||
compute_bounds();
|
||||
|
||||
const BVHLayout bvh_layout = BVHParams::best_bvh_layout(
|
||||
params->bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));
|
||||
if (need_build_bvh(bvh_layout)) {
|
||||
string msg = "Updating Geometry BVH ";
|
||||
if (name.empty())
|
||||
msg += string_printf("%u/%u", (uint)(n + 1), (uint)total);
|
||||
else
|
||||
msg += string_printf("%s %u/%u", name.c_str(), (uint)(n + 1), (uint)total);
|
||||
BVH *sub_bvh = bvh->get_device_bvh(device);
|
||||
GeometryManager::device_update_sub_bvh(
|
||||
device, dscene, bvh, sub_bvh, !need_update_rebuild, n, total, progress);
|
||||
}
|
||||
}
|
||||
|
||||
Object object;
|
||||
void GeometryManager::device_init_update_bvh(Scene *scene)
|
||||
{
|
||||
if (scene->bvh->params.bvh_layout == BVH_LAYOUT_BVH2) {
|
||||
/* To ensure that only 1 BVH2 scene is built a count of workers is used */
|
||||
BVH2 *const bvh2 = static_cast<BVH2 *>(scene->bvh);
|
||||
bvh2->building++;
|
||||
}
|
||||
}
|
||||
|
||||
void GeometryManager::device_update_bvh(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
bool can_refit,
|
||||
size_t n,
|
||||
size_t total,
|
||||
Progress &progress)
|
||||
{
|
||||
BVH *bvh = scene->bvh;
|
||||
BVH *sub_bvh = scene->bvh->get_device_bvh(device);
|
||||
GeometryManager::device_update_sub_bvh(
|
||||
device, dscene, bvh, sub_bvh, can_refit, n, total, &progress);
|
||||
}
|
||||
|
||||
void GeometryManager::device_update_bvh_postprocess(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
Progress &progress)
|
||||
{
|
||||
BVH *bvh = scene->bvh;
|
||||
|
||||
const bool has_bvh2_layout = (bvh->params.bvh_layout == BVH_LAYOUT_BVH2);
|
||||
|
||||
if (has_bvh2_layout) {
|
||||
BVH2 *bvh2 = static_cast<BVH2 *>(scene->bvh);
|
||||
PackedBVH pack = std::move(bvh2->pack);
|
||||
dscene->data.bvh.root = pack.root_index;
|
||||
}
|
||||
else {
|
||||
dscene->data.bvh.root = -1;
|
||||
}
|
||||
|
||||
dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
|
||||
dscene->data.bvh.curve_subdivisions = scene->params.curve_subdivisions();
|
||||
dscene->data.device_bvh = 0;
|
||||
}
|
||||
|
||||
bool Geometry::create_new_bvh_if_needed(Object *object,
|
||||
Device *device,
|
||||
DeviceScene *dscene,
|
||||
SceneParams *params)
|
||||
{
|
||||
bool status = false;
|
||||
const BVHLayout bvh_layout = BVHParams::best_bvh_layout(
|
||||
params->bvh_layout, device->get_bvh_layout_mask(dscene->data.kernel_features));
|
||||
if (need_build_bvh(bvh_layout)) {
|
||||
/* Ensure all visibility bits are set at the geometry level BVH. In
|
||||
* the object level BVH is where actual visibility is tested. */
|
||||
object.set_is_shadow_catcher(true);
|
||||
object.set_visibility(~0);
|
||||
object->set_is_shadow_catcher(true);
|
||||
object->set_visibility(~0);
|
||||
|
||||
object.set_geometry(this);
|
||||
object->set_geometry(this);
|
||||
|
||||
vector<Geometry *> geometry;
|
||||
geometry.push_back(this);
|
||||
vector<Object *> objects;
|
||||
objects.push_back(&object);
|
||||
|
||||
objects.push_back(object);
|
||||
|
||||
if (bvh && !need_update_rebuild) {
|
||||
progress->set_status(msg, "Refitting BVH");
|
||||
|
||||
bvh->replace_geometry(geometry, objects);
|
||||
|
||||
device->build_bvh(bvh, *progress, true);
|
||||
}
|
||||
else {
|
||||
progress->set_status(msg, "Building BVH");
|
||||
if (bvh != NULL) {
|
||||
delete bvh;
|
||||
}
|
||||
|
||||
BVHParams bparams;
|
||||
bparams.use_spatial_split = params->use_bvh_spatial_split;
|
||||
|
@ -89,23 +139,64 @@ void Geometry::compute_bvh(Device *device,
|
|||
bparams.bvh_type = params->bvh_type;
|
||||
bparams.curve_subdivisions = params->curve_subdivisions();
|
||||
|
||||
delete bvh;
|
||||
bvh = BVH::create(bparams, geometry, objects, device);
|
||||
MEM_GUARDED_CALL(progress, device->build_bvh, bvh, *progress, false);
|
||||
need_update_rebuild = true;
|
||||
}
|
||||
status = true;
|
||||
}
|
||||
|
||||
need_update_rebuild = false;
|
||||
need_update_bvh_for_offset = false;
|
||||
return status;
|
||||
}
|
||||
|
||||
void GeometryManager::device_update_bvh(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
Progress &progress)
|
||||
void GeometryManager::device_update_sub_bvh(Device *device,
|
||||
DeviceScene *dscene,
|
||||
BVH *bvh,
|
||||
BVH *sub_bvh,
|
||||
bool can_refit,
|
||||
size_t n,
|
||||
size_t total,
|
||||
Progress *progress)
|
||||
{
|
||||
string msg = "Updating Geometry BVH";
|
||||
|
||||
// Is this a multi-bvh?
|
||||
if (sub_bvh && can_refit) {
|
||||
progress->set_status(msg, "Refitting BVH");
|
||||
// Don't redo the setup if this is not a sub-bvh
|
||||
if (sub_bvh != bvh) {
|
||||
sub_bvh->replace_geometry(bvh->geometry, bvh->objects);
|
||||
}
|
||||
}
|
||||
else {
|
||||
progress->set_status(msg, "Building BVH");
|
||||
// Don't redo the setup if this is not a sub-bvh
|
||||
if (sub_bvh != bvh) {
|
||||
// Yes, so setup the device specific sub_bvh in the multi-bvh.
|
||||
BVHParams bparams = bvh->params;
|
||||
// Set the layout to the correct one for the device
|
||||
bparams.bvh_layout = device->get_bvh_layout(device, bvh->params.bvh_layout);
|
||||
if (sub_bvh != NULL) {
|
||||
delete sub_bvh;
|
||||
}
|
||||
VLOG_INFO << "Sub-BVH using layout " << bvh_layout_name(bparams.bvh_layout) << " from layout " << bvh_layout_name(bvh->params.bvh_layout);
|
||||
/* BVH2 should not have a sub-bvh as only 1 is built on the CPU */
|
||||
assert(bparams.bvh_layout != BVH_LAYOUT_BVH2);
|
||||
if(bparams.bvh_layout != BVH_LAYOUT_BVH2) {
|
||||
sub_bvh = BVH::create(bparams, bvh->geometry, bvh->objects, device);
|
||||
bvh->set_device_bvh(device, sub_bvh);
|
||||
}
|
||||
}
|
||||
can_refit = false;
|
||||
}
|
||||
device->build_bvh(sub_bvh, dscene, *progress, can_refit);
|
||||
}
|
||||
|
||||
bool GeometryManager::device_update_bvh_preprocess(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
Progress &progress)
|
||||
{
|
||||
/* bvh build */
|
||||
progress.set_status("Updating Scene BVH", "Building");
|
||||
|
||||
BVHParams bparams;
|
||||
bparams.top_level = true;
|
||||
|
@ -124,73 +215,57 @@ void GeometryManager::device_update_bvh(Device *device,
|
|||
|
||||
const bool can_refit = scene->bvh != nullptr &&
|
||||
(bparams.bvh_layout == BVHLayout::BVH_LAYOUT_OPTIX ||
|
||||
bparams.bvh_layout == BVHLayout::BVH_LAYOUT_METAL);
|
||||
bparams.bvh_layout == BVHLayout::BVH_LAYOUT_METAL ||
|
||||
bparams.bvh_layout == BVHLayout::BVH_LAYOUT_MULTI_OPTIX ||
|
||||
bparams.bvh_layout == BVHLayout::BVH_LAYOUT_MULTI_METAL);
|
||||
|
||||
BVH *bvh = scene->bvh;
|
||||
if (!scene->bvh) {
|
||||
bvh = scene->bvh = BVH::create(bparams, scene->geometry, scene->objects, device);
|
||||
}
|
||||
|
||||
device->build_bvh(bvh, progress, can_refit);
|
||||
/* Mark BVH as having not been built yet */
|
||||
bvh->built = false;
|
||||
return can_refit;
|
||||
}
|
||||
|
||||
if (progress.get_cancel()) {
|
||||
return;
|
||||
/*
|
||||
* Creates a new BVH for the geometry if it is needed otherwise
|
||||
* it determines if the BVH can be refitted. It also counts
|
||||
* the number of BVH that need to be built.
|
||||
*/
|
||||
size_t GeometryManager::create_object_bvhs(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
const BVHLayout bvh_layout,
|
||||
bool &need_update_scene_bvh)
|
||||
{
|
||||
scoped_callback_timer timer([scene](double time) {
|
||||
if (scene->update_stats) {
|
||||
scene->update_stats->geometry.times.add_entry(
|
||||
{"device_update (object BVHs preprocess)", time});
|
||||
}
|
||||
});
|
||||
size_t num_bvh = 0;
|
||||
|
||||
if (scene->geometry.size() > object_pool.size()) {
|
||||
object_pool.resize(scene->geometry.size());
|
||||
}
|
||||
|
||||
const bool has_bvh2_layout = (bparams.bvh_layout == BVH_LAYOUT_BVH2);
|
||||
|
||||
PackedBVH pack;
|
||||
if (has_bvh2_layout) {
|
||||
pack = std::move(static_cast<BVH2 *>(bvh)->pack);
|
||||
}
|
||||
else {
|
||||
pack.root_index = -1;
|
||||
// Create BVH structures where needed
|
||||
int id = 0;
|
||||
foreach (Geometry *geom, scene->geometry) {
|
||||
if (geom->is_modified() || geom->need_update_bvh_for_offset) {
|
||||
need_update_scene_bvh = true;
|
||||
Object *object = &object_pool[id];
|
||||
if(geom->create_new_bvh_if_needed(object, device, dscene, &scene->params)) {
|
||||
num_bvh++;
|
||||
}
|
||||
}
|
||||
id++;
|
||||
}
|
||||
|
||||
/* copy to device */
|
||||
progress.set_status("Updating Scene BVH", "Copying BVH to device");
|
||||
|
||||
/* When using BVH2, we always have to copy/update the data as its layout is dependent on the
|
||||
* BVH's leaf nodes which may be different when the objects or vertices move. */
|
||||
|
||||
if (pack.nodes.size()) {
|
||||
dscene->bvh_nodes.steal_data(pack.nodes);
|
||||
dscene->bvh_nodes.copy_to_device();
|
||||
}
|
||||
if (pack.leaf_nodes.size()) {
|
||||
dscene->bvh_leaf_nodes.steal_data(pack.leaf_nodes);
|
||||
dscene->bvh_leaf_nodes.copy_to_device();
|
||||
}
|
||||
if (pack.object_node.size()) {
|
||||
dscene->object_node.steal_data(pack.object_node);
|
||||
dscene->object_node.copy_to_device();
|
||||
}
|
||||
if (pack.prim_type.size()) {
|
||||
dscene->prim_type.steal_data(pack.prim_type);
|
||||
dscene->prim_type.copy_to_device();
|
||||
}
|
||||
if (pack.prim_visibility.size()) {
|
||||
dscene->prim_visibility.steal_data(pack.prim_visibility);
|
||||
dscene->prim_visibility.copy_to_device();
|
||||
}
|
||||
if (pack.prim_index.size()) {
|
||||
dscene->prim_index.steal_data(pack.prim_index);
|
||||
dscene->prim_index.copy_to_device();
|
||||
}
|
||||
if (pack.prim_object.size()) {
|
||||
dscene->prim_object.steal_data(pack.prim_object);
|
||||
dscene->prim_object.copy_to_device();
|
||||
}
|
||||
if (pack.prim_time.size()) {
|
||||
dscene->prim_time.steal_data(pack.prim_time);
|
||||
dscene->prim_time.copy_to_device();
|
||||
}
|
||||
|
||||
dscene->data.bvh.root = pack.root_index;
|
||||
dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
|
||||
dscene->data.bvh.curve_subdivisions = scene->params.curve_subdivisions();
|
||||
/* The scene handle is set in 'CPUDevice::const_copy_to' and 'OptiXDevice::const_copy_to' */
|
||||
dscene->data.device_bvh = 0;
|
||||
return num_bvh;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
|
|
@ -35,65 +35,25 @@
|
|||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
void GeometryManager::device_update_mesh(Device *,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
Progress &progress)
|
||||
/**
|
||||
* Packs the geometry data into the device scene. That is it fills out
|
||||
* the geometry buffers
|
||||
*/
|
||||
void GeometryManager::device_update_mesh_preprocess(
|
||||
Device *device, DeviceScene *dscene, Scene *scene, Progress &progress)
|
||||
{
|
||||
/* Count. */
|
||||
size_t vert_size = 0;
|
||||
size_t tri_size = 0;
|
||||
|
||||
size_t curve_key_size = 0;
|
||||
size_t curve_size = 0;
|
||||
size_t curve_segment_size = 0;
|
||||
|
||||
size_t point_size = 0;
|
||||
|
||||
size_t patch_size = 0;
|
||||
|
||||
foreach (Geometry *geom, scene->geometry) {
|
||||
if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
|
||||
Mesh *mesh = static_cast<Mesh *>(geom);
|
||||
|
||||
vert_size += mesh->verts.size();
|
||||
tri_size += mesh->num_triangles();
|
||||
|
||||
if (mesh->get_num_subd_faces()) {
|
||||
Mesh::SubdFace last = mesh->get_subd_face(mesh->get_num_subd_faces() - 1);
|
||||
patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8;
|
||||
|
||||
/* patch tables are stored in same array so include them in patch_size */
|
||||
if (mesh->patch_table) {
|
||||
mesh->patch_table_offset = patch_size;
|
||||
patch_size += mesh->patch_table->total_size();
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (geom->is_hair()) {
|
||||
Hair *hair = static_cast<Hair *>(geom);
|
||||
|
||||
curve_key_size += hair->get_curve_keys().size();
|
||||
curve_size += hair->num_curves();
|
||||
curve_segment_size += hair->num_segments();
|
||||
}
|
||||
else if (geom->is_pointcloud()) {
|
||||
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
||||
point_size += pointcloud->num_points();
|
||||
}
|
||||
}
|
||||
|
||||
/* Fill in all the arrays. */
|
||||
if (tri_size != 0) {
|
||||
GeometrySizes *p_sizes = &(scene->geom_sizes);
|
||||
if (p_sizes->tri_size != 0) {
|
||||
/* normals */
|
||||
progress.set_status("Updating Mesh", "Computing normals");
|
||||
progress.set_status("Updating Mesh", "Computing mesh");
|
||||
|
||||
packed_float3 *tri_verts = dscene->tri_verts.alloc(vert_size);
|
||||
uint *tri_shader = dscene->tri_shader.alloc(tri_size);
|
||||
packed_float3 *vnormal = dscene->tri_vnormal.alloc(vert_size);
|
||||
packed_uint3 *tri_vindex = dscene->tri_vindex.alloc(tri_size);
|
||||
uint *tri_patch = dscene->tri_patch.alloc(tri_size);
|
||||
float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(vert_size);
|
||||
packed_float3 *tri_verts = dscene->tri_verts.alloc(p_sizes->vert_size);
|
||||
uint *tri_shader = dscene->tri_shader.alloc(p_sizes->tri_size);
|
||||
packed_float3 *vnormal = dscene->tri_vnormal.alloc(p_sizes->vert_size);
|
||||
packed_uint3 *tri_vindex = dscene->tri_vindex.alloc(p_sizes->tri_size);
|
||||
uint *tri_patch = dscene->tri_patch.alloc(p_sizes->tri_size);
|
||||
float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(p_sizes->vert_size);
|
||||
|
||||
const bool copy_all_data = dscene->tri_shader.need_realloc() ||
|
||||
dscene->tri_vindex.need_realloc() ||
|
||||
|
@ -123,29 +83,18 @@ void GeometryManager::device_update_mesh(Device *,
|
|||
&tri_patch[mesh->prim_offset],
|
||||
&tri_patch_uv[mesh->vert_offset]);
|
||||
}
|
||||
|
||||
if (progress.get_cancel())
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* vertex coordinates */
|
||||
progress.set_status("Updating Mesh", "Copying Mesh to device");
|
||||
|
||||
dscene->tri_verts.copy_to_device_if_modified();
|
||||
dscene->tri_shader.copy_to_device_if_modified();
|
||||
dscene->tri_vnormal.copy_to_device_if_modified();
|
||||
dscene->tri_vindex.copy_to_device_if_modified();
|
||||
dscene->tri_patch.copy_to_device_if_modified();
|
||||
dscene->tri_patch_uv.copy_to_device_if_modified();
|
||||
}
|
||||
|
||||
if (curve_segment_size != 0) {
|
||||
progress.set_status("Updating Mesh", "Copying Curves to device");
|
||||
if (p_sizes->curve_segment_size != 0) {
|
||||
progress.set_status("Updating Mesh", "Computing curves");
|
||||
|
||||
float4 *curve_keys = dscene->curve_keys.alloc(curve_key_size);
|
||||
KernelCurve *curves = dscene->curves.alloc(curve_size);
|
||||
KernelCurveSegment *curve_segments = dscene->curve_segments.alloc(curve_segment_size);
|
||||
float4 *curve_keys = dscene->curve_keys.alloc(p_sizes->curve_key_size);
|
||||
KernelCurve *curves = dscene->curves.alloc(p_sizes->curve_size);
|
||||
KernelCurveSegment *curve_segments = dscene->curve_segments.alloc(p_sizes->curve_segment_size);
|
||||
|
||||
const bool copy_all_data = dscene->curve_keys.need_realloc() ||
|
||||
dscene->curves.need_realloc() ||
|
||||
|
@ -172,17 +121,13 @@ void GeometryManager::device_update_mesh(Device *,
|
|||
return;
|
||||
}
|
||||
}
|
||||
|
||||
dscene->curve_keys.copy_to_device_if_modified();
|
||||
dscene->curves.copy_to_device_if_modified();
|
||||
dscene->curve_segments.copy_to_device_if_modified();
|
||||
}
|
||||
|
||||
if (point_size != 0) {
|
||||
progress.set_status("Updating Mesh", "Copying Point clouds to device");
|
||||
if (p_sizes->point_size != 0) {
|
||||
progress.set_status("Updating Mesh", "Computing point clouds");
|
||||
|
||||
float4 *points = dscene->points.alloc(point_size);
|
||||
uint *points_shader = dscene->points_shader.alloc(point_size);
|
||||
float4 *points = dscene->points.alloc(p_sizes->point_size);
|
||||
uint *points_shader = dscene->points_shader.alloc(p_sizes->point_size);
|
||||
|
||||
foreach (Geometry *geom, scene->geometry) {
|
||||
if (geom->is_pointcloud()) {
|
||||
|
@ -193,15 +138,12 @@ void GeometryManager::device_update_mesh(Device *,
|
|||
return;
|
||||
}
|
||||
}
|
||||
|
||||
dscene->points.copy_to_device();
|
||||
dscene->points_shader.copy_to_device();
|
||||
}
|
||||
|
||||
if (patch_size != 0 && dscene->patches.need_realloc()) {
|
||||
progress.set_status("Updating Mesh", "Copying Patches to device");
|
||||
if (p_sizes->patch_size != 0 && dscene->patches.need_realloc()) {
|
||||
progress.set_status("Updating Mesh", "Computing patches");
|
||||
|
||||
uint *patch_data = dscene->patches.alloc(patch_size);
|
||||
uint *patch_data = dscene->patches.alloc(p_sizes->patch_size);
|
||||
|
||||
foreach (Geometry *geom, scene->geometry) {
|
||||
if (geom->is_mesh()) {
|
||||
|
@ -217,8 +159,6 @@ void GeometryManager::device_update_mesh(Device *,
|
|||
return;
|
||||
}
|
||||
}
|
||||
|
||||
dscene->patches.copy_to_device();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -714,7 +714,8 @@ void ImageManager::device_load_image(Device *device, Scene *scene, size_t slot,
|
|||
|
||||
/* Create new texture. */
|
||||
if (type == IMAGE_DATA_TYPE_FLOAT4) {
|
||||
if (!file_load_image<TypeDesc::FLOAT, float>(img, texture_limit)) {
|
||||
bool status = file_load_image<TypeDesc::FLOAT, float>(img, texture_limit);
|
||||
if (!status) {
|
||||
/* on failure to load, we set a 1x1 pixels pink image */
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
float *pixels = (float *)img->mem->alloc(1, 1);
|
||||
|
@ -726,7 +727,8 @@ void ImageManager::device_load_image(Device *device, Scene *scene, size_t slot,
|
|||
}
|
||||
}
|
||||
else if (type == IMAGE_DATA_TYPE_FLOAT) {
|
||||
if (!file_load_image<TypeDesc::FLOAT, float>(img, texture_limit)) {
|
||||
bool status = file_load_image<TypeDesc::FLOAT, float>(img, texture_limit);
|
||||
if (!status) {
|
||||
/* on failure to load, we set a 1x1 pixels pink image */
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
float *pixels = (float *)img->mem->alloc(1, 1);
|
||||
|
@ -735,7 +737,8 @@ void ImageManager::device_load_image(Device *device, Scene *scene, size_t slot,
|
|||
}
|
||||
}
|
||||
else if (type == IMAGE_DATA_TYPE_BYTE4) {
|
||||
if (!file_load_image<TypeDesc::UINT8, uchar>(img, texture_limit)) {
|
||||
bool status = file_load_image<TypeDesc::UINT8, uchar>(img, texture_limit);
|
||||
if (!status) {
|
||||
/* on failure to load, we set a 1x1 pixels pink image */
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
uchar *pixels = (uchar *)img->mem->alloc(1, 1);
|
||||
|
@ -747,7 +750,8 @@ void ImageManager::device_load_image(Device *device, Scene *scene, size_t slot,
|
|||
}
|
||||
}
|
||||
else if (type == IMAGE_DATA_TYPE_BYTE) {
|
||||
if (!file_load_image<TypeDesc::UINT8, uchar>(img, texture_limit)) {
|
||||
bool status = file_load_image<TypeDesc::UINT8, uchar>(img, texture_limit);
|
||||
if (!status) {
|
||||
/* on failure to load, we set a 1x1 pixels pink image */
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
uchar *pixels = (uchar *)img->mem->alloc(1, 1);
|
||||
|
@ -756,7 +760,8 @@ void ImageManager::device_load_image(Device *device, Scene *scene, size_t slot,
|
|||
}
|
||||
}
|
||||
else if (type == IMAGE_DATA_TYPE_HALF4) {
|
||||
if (!file_load_image<TypeDesc::HALF, half>(img, texture_limit)) {
|
||||
bool status = file_load_image<TypeDesc::HALF, half>(img, texture_limit);
|
||||
if (!status) {
|
||||
/* on failure to load, we set a 1x1 pixels pink image */
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
half *pixels = (half *)img->mem->alloc(1, 1);
|
||||
|
@ -768,7 +773,8 @@ void ImageManager::device_load_image(Device *device, Scene *scene, size_t slot,
|
|||
}
|
||||
}
|
||||
else if (type == IMAGE_DATA_TYPE_USHORT) {
|
||||
if (!file_load_image<TypeDesc::USHORT, uint16_t>(img, texture_limit)) {
|
||||
bool status = file_load_image<TypeDesc::USHORT, uint16_t>(img, texture_limit);
|
||||
if (!status) {
|
||||
/* on failure to load, we set a 1x1 pixels pink image */
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
uint16_t *pixels = (uint16_t *)img->mem->alloc(1, 1);
|
||||
|
@ -777,7 +783,8 @@ void ImageManager::device_load_image(Device *device, Scene *scene, size_t slot,
|
|||
}
|
||||
}
|
||||
else if (type == IMAGE_DATA_TYPE_USHORT4) {
|
||||
if (!file_load_image<TypeDesc::USHORT, uint16_t>(img, texture_limit)) {
|
||||
bool status = file_load_image<TypeDesc::USHORT, uint16_t>(img, texture_limit);
|
||||
if (!status) {
|
||||
/* on failure to load, we set a 1x1 pixels pink image */
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
uint16_t *pixels = (uint16_t *)img->mem->alloc(1, 1);
|
||||
|
@ -789,7 +796,8 @@ void ImageManager::device_load_image(Device *device, Scene *scene, size_t slot,
|
|||
}
|
||||
}
|
||||
else if (type == IMAGE_DATA_TYPE_HALF) {
|
||||
if (!file_load_image<TypeDesc::HALF, half>(img, texture_limit)) {
|
||||
bool status = file_load_image<TypeDesc::HALF, half>(img, texture_limit);
|
||||
if (!status) {
|
||||
/* on failure to load, we set a 1x1 pixels pink image */
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
half *pixels = (half *)img->mem->alloc(1, 1);
|
||||
|
@ -811,8 +819,14 @@ void ImageManager::device_load_image(Device *device, Scene *scene, size_t slot,
|
|||
#endif
|
||||
|
||||
{
|
||||
VLOG_INFO << "BEGIN Copy texture:" << img->mem->name;
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
VLOG_INFO << "LOCK Copy texture:" << img->mem->name;
|
||||
//vector<device_memory *> image{img->mem};
|
||||
//img->mem->modified = true;
|
||||
//img->mem->device->upload_changed(image);
|
||||
img->mem->copy_to_device();
|
||||
VLOG_INFO << "END Copy texture:" << img->mem->name;
|
||||
}
|
||||
|
||||
/* Cleanup memory in image loader. */
|
||||
|
@ -839,6 +853,7 @@ void ImageManager::device_free_image(Device *, size_t slot)
|
|||
if (img->mem) {
|
||||
thread_scoped_lock device_lock(device_mutex);
|
||||
delete img->mem;
|
||||
img->mem = NULL;
|
||||
}
|
||||
|
||||
delete img->loader;
|
||||
|
|
|
@ -725,11 +725,8 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene,
|
|||
}
|
||||
});
|
||||
|
||||
if (progress.get_cancel()) {
|
||||
return;
|
||||
}
|
||||
|
||||
dscene->objects.copy_to_device_if_modified();
|
||||
/* Moved copy to GeometryManager::device_update
|
||||
for better control over updating */
|
||||
if (state.need_motion == Scene::MOTION_PASS) {
|
||||
dscene->object_motion_pass.copy_to_device();
|
||||
}
|
||||
|
@ -742,7 +739,6 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene, Scene *scene,
|
|||
dscene->data.bvh.have_points = state.have_points;
|
||||
dscene->data.bvh.have_volumes = state.have_volumes;
|
||||
|
||||
dscene->objects.clear_modified();
|
||||
dscene->object_motion_pass.clear_modified();
|
||||
dscene->object_motion.clear_modified();
|
||||
}
|
||||
|
@ -926,16 +922,16 @@ void ObjectManager::device_update_flags(
|
|||
dscene->object_volume_step.clear_modified();
|
||||
}
|
||||
|
||||
void ObjectManager::device_update_geom_offsets(Device *, DeviceScene *dscene, Scene *scene)
|
||||
bool ObjectManager::device_update_geom_offsets(Device *, DeviceScene *dscene, Scene *scene)
|
||||
{
|
||||
bool update = false;
|
||||
|
||||
if (dscene->objects.size() == 0) {
|
||||
return;
|
||||
return update;
|
||||
}
|
||||
|
||||
KernelObject *kobjects = dscene->objects.data();
|
||||
|
||||
bool update = false;
|
||||
|
||||
foreach (Object *object, scene->objects) {
|
||||
Geometry *geom = object->geometry;
|
||||
|
||||
|
@ -967,8 +963,10 @@ void ObjectManager::device_update_geom_offsets(Device *, DeviceScene *dscene, Sc
|
|||
}
|
||||
|
||||
if (update) {
|
||||
dscene->objects.copy_to_device();
|
||||
/* Moved to copy to device_update_attributes */
|
||||
dscene->objects.tag_modified();
|
||||
}
|
||||
return update;
|
||||
}
|
||||
|
||||
void ObjectManager::device_free(Device *, DeviceScene *dscene, bool force_free)
|
||||
|
|
|
@ -168,7 +168,7 @@ class ObjectManager {
|
|||
Scene *scene,
|
||||
Progress &progress,
|
||||
bool bounds_valid = true);
|
||||
void device_update_geom_offsets(Device *device, DeviceScene *dscene, Scene *scene);
|
||||
bool device_update_geom_offsets(Device *device, DeviceScene *dscene, Scene *scene);
|
||||
|
||||
void device_free(Device *device, DeviceScene *dscene, bool force_free);
|
||||
|
||||
|
|
|
@ -34,6 +34,17 @@
|
|||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
* checks the progress for if a cancel has been requested and also
|
||||
* the device to see if an error has occurred.
|
||||
*/
|
||||
bool Scene::check_cancel_update(Progress &progress, Device *device) {
|
||||
bool status = false;
|
||||
status = progress.get_cancel();
|
||||
|
||||
return status || ((device != NULL) && device->have_error());
|
||||
}
|
||||
|
||||
Scene::Scene(const SceneParams ¶ms_, Device *device)
|
||||
: name("Scene"),
|
||||
bvh(NULL),
|
||||
|
@ -50,7 +61,17 @@ Scene::Scene(const SceneParams ¶ms_, Device *device)
|
|||
/* TODO(sergey): Check if it's indeed optimal value for the split kernel. */
|
||||
max_closure_global(1)
|
||||
{
|
||||
/* Create a DeviceScene for each device */
|
||||
device->foreach_device([this](Device *sub_device) {
|
||||
auto sub_dscene = make_unique<DeviceScene>(sub_device);
|
||||
memset((void *)&sub_dscene->data, 0, sizeof(sub_dscene->data));
|
||||
this->dscenes.push_back(std::move(sub_dscene));
|
||||
});
|
||||
memset((void *)&dscene.data, 0, sizeof(dscene.data));
|
||||
|
||||
/* Stats time logging allocate memory to store times for each device */
|
||||
size_t device_count = this->dscenes.size();
|
||||
this->times.resize(device_count);
|
||||
|
||||
shader_manager = ShaderManager::create(
|
||||
device->info.has_osl ? params.shadingsystem : SHADINGSYSTEM_SVM, device);
|
||||
|
@ -212,8 +233,7 @@ void Scene::device_update(Device *device_, Progress &progress)
|
|||
progress.set_status("Updating Shaders");
|
||||
shader_manager->device_update(device, &dscene, this, progress);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
procedural_manager->update(this, progress);
|
||||
|
||||
|
@ -223,14 +243,12 @@ void Scene::device_update(Device *device_, Progress &progress)
|
|||
progress.set_status("Updating Background");
|
||||
background->device_update(device, &dscene, this);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
progress.set_status("Updating Camera");
|
||||
camera->device_update(device, &dscene, this);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
geometry_manager->device_update_preprocess(device, this, progress);
|
||||
|
||||
|
@ -240,80 +258,67 @@ void Scene::device_update(Device *device_, Progress &progress)
|
|||
progress.set_status("Updating Objects");
|
||||
object_manager->device_update(device, &dscene, this, progress);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
progress.set_status("Updating Particle Systems");
|
||||
particle_system_manager->device_update(device, &dscene, this, progress);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
progress.set_status("Updating Meshes");
|
||||
geometry_manager->device_update(device, &dscene, this, progress);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
progress.set_status("Updating Objects Flags");
|
||||
object_manager->device_update_flags(device, &dscene, this, progress);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
progress.set_status("Updating Primitive Offsets");
|
||||
object_manager->device_update_prim_offsets(device, &dscene, this);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
progress.set_status("Updating Images");
|
||||
image_manager->device_update(device, this, progress);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
progress.set_status("Updating Camera Volume");
|
||||
camera->device_update_volume(device, &dscene, this);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
progress.set_status("Updating Lookup Tables");
|
||||
lookup_tables->device_update(device, &dscene, this);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
progress.set_status("Updating Lights");
|
||||
light_manager->device_update(device, &dscene, this, progress);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
progress.set_status("Updating Integrator");
|
||||
integrator->device_update(device, &dscene, this);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
progress.set_status("Updating Film");
|
||||
film->device_update(device, &dscene, this);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
progress.set_status("Updating Lookup Tables");
|
||||
lookup_tables->device_update(device, &dscene, this);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
progress.set_status("Updating Baking");
|
||||
bake_manager->device_update(device, &dscene, this, progress);
|
||||
|
||||
if (progress.get_cancel() || device->have_error())
|
||||
return;
|
||||
if(check_cancel_update(progress, device)) { return; }
|
||||
|
||||
if (device->have_error() == false) {
|
||||
dscene.data.volume_stack_size = get_volume_stack_size();
|
||||
|
|
|
@ -49,6 +49,40 @@ class BakeData;
|
|||
class RenderStats;
|
||||
class SceneUpdateStats;
|
||||
class Volume;
|
||||
class DeviceScene;
|
||||
|
||||
/* Geometry Sizes */
|
||||
struct GeometrySizes {
|
||||
size_t vert_size;
|
||||
size_t tri_size;
|
||||
|
||||
size_t curve_size;
|
||||
size_t curve_key_size;
|
||||
size_t curve_segment_size;
|
||||
|
||||
size_t point_size;
|
||||
|
||||
size_t patch_size;
|
||||
size_t face_size;
|
||||
size_t corner_size;
|
||||
};
|
||||
|
||||
/* Attribute Sizes */
|
||||
struct AttributeSizes {
|
||||
size_t attr_float_size;
|
||||
size_t attr_float2_size;
|
||||
size_t attr_float3_size;
|
||||
size_t attr_float4_size;
|
||||
size_t attr_uchar4_size;
|
||||
};
|
||||
|
||||
/* Stats time logging */
|
||||
struct SceneTimes {
|
||||
double mesh;
|
||||
double attrib;
|
||||
double object_bvh;
|
||||
double scene_bvh;
|
||||
};
|
||||
|
||||
/* Scene Parameters */
|
||||
|
||||
|
@ -156,7 +190,14 @@ class Scene : public NodeOwner {
|
|||
/* device */
|
||||
Device *device;
|
||||
DeviceScene dscene;
|
||||
GeometrySizes geom_sizes;
|
||||
AttributeSizes attrib_sizes;
|
||||
|
||||
/* Stores a DeviceScene for each sub-device */
|
||||
std::vector<unique_ptr<DeviceScene>> dscenes;
|
||||
|
||||
vector<SceneTimes> times;
|
||||
|
||||
/* parameters */
|
||||
SceneParams params;
|
||||
|
||||
|
@ -241,6 +282,12 @@ class Scene : public NodeOwner {
|
|||
template<typename T> void delete_nodes(const set<T *> &nodes, const NodeOwner *owner);
|
||||
|
||||
protected:
|
||||
/*
|
||||
* checks the progress for if a cancel has been requested and also
|
||||
* the device to see if an error has occurred.
|
||||
*/
|
||||
bool check_cancel_update(Progress &progress, Device *device);
|
||||
|
||||
/* Check if some heavy data worth logging was updated.
|
||||
* Mainly used to suppress extra annoying logging.
|
||||
*/
|
||||
|
|
|
@ -480,7 +480,7 @@ void ShaderManager::device_update(Device *device,
|
|||
device_update_specific(device, dscene, scene, progress);
|
||||
}
|
||||
|
||||
void ShaderManager::device_update_common(Device * /*device*/,
|
||||
void ShaderManager::device_update_common(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
Progress & /*progress*/)
|
||||
|
|
Loading…
Reference in New Issue