Alternative Upload geometry data in parallel to multiple GPUs using the "Multi-Device" #107552
|
@ -134,32 +134,27 @@ void CPUDevice::mem_alloc(device_memory &mem)
|
|||
}
|
||||
}
|
||||
|
||||
void CPUDevice::mem_copy_to(device_memory &mem)
|
||||
void CPUDevice::mem_copy_to(device_memory &mem, size_t /* size */, size_t /* offset */)
|
||||
{
|
||||
if (mem.type == MEM_GLOBAL) {
|
||||
global_free(mem);
|
||||
global_alloc(mem);
|
||||
}
|
||||
else if (mem.type == MEM_TEXTURE) {
|
||||
tex_free((device_texture &)mem);
|
||||
tex_alloc((device_texture &)mem);
|
||||
}
|
||||
else {
|
||||
if (!mem.device_pointer) {
|
||||
mem_alloc(mem);
|
||||
}
|
||||
|
||||
/* copy is no-op */
|
||||
}
|
||||
}
|
||||
|
||||
void CPUDevice::mem_copy_to(device_memory &mem, size_t, size_t offset)
|
||||
{
|
||||
/* size (2n param) is not used as this does not actually copy anything
|
||||
/* size (2n param) or offset are not used as this does not actually copy anything
|
||||
* as the original host memory is used as is. The device
|
||||
* memory is the same memory.
|
||||
*/
|
||||
mem_copy_to(mem);
|
||||
if (mem.type == MEM_GLOBAL) {
|
||||
global_free(mem);
|
||||
global_alloc(mem);
|
||||
}
|
||||
else if (mem.type == MEM_TEXTURE) {
|
||||
tex_free((device_texture &)mem);
|
||||
tex_alloc((device_texture &)mem);
|
||||
}
|
||||
else {
|
||||
if (!mem.device_pointer) {
|
||||
mem_alloc(mem);
|
||||
}
|
||||
|
||||
/* copy is no-op */
|
||||
}
|
||||
}
|
||||
|
||||
void CPUDevice::mem_copy_from(
|
||||
|
|
|
@ -63,8 +63,7 @@ class CPUDevice : public Device {
|
|||
bool load_texture_info();
|
||||
|
||||
virtual void mem_alloc(device_memory &mem) override;
|
||||
virtual void mem_copy_to(device_memory &mem) override;
|
||||
virtual void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
|
||||
virtual void mem_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0) override;
|
||||
virtual void mem_copy_from(
|
||||
device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||
virtual void mem_zero(device_memory &mem) override;
|
||||
|
|
|
@ -584,29 +584,6 @@ void CUDADevice::mem_alloc(device_memory &mem)
|
|||
}
|
||||
}
|
||||
|
||||
void CUDADevice::mem_copy_to(device_memory &mem)
|
||||
{
|
||||
if (mem.type == MEM_GLOBAL) {
|
||||
if ((mem.device_size < mem.memory_size()) || (!mem.device_pointer)) {
|
||||
global_free(mem);
|
||||
global_alloc(mem);
|
||||
}
|
||||
else {
|
||||
generic_copy_to(mem);
|
||||
}
|
||||
}
|
||||
else if (mem.type == MEM_TEXTURE) {
|
||||
tex_free((device_texture &)mem);
|
||||
tex_alloc((device_texture &)mem);
|
||||
}
|
||||
else {
|
||||
if (!mem.device_pointer) {
|
||||
generic_alloc(mem);
|
||||
}
|
||||
generic_copy_to(mem);
|
||||
}
|
||||
}
|
||||
|
||||
void CUDADevice::mem_copy_to(device_memory &mem, size_t size, size_t offset)
|
||||
{
|
||||
if (mem.type == MEM_GLOBAL) {
|
||||
|
|
|
@ -76,9 +76,7 @@ class CUDADevice : public GPUDevice {
|
|||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
void mem_copy_to(device_memory &mem) override;
|
||||
|
||||
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
|
||||
void mem_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0) override;
|
||||
|
||||
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||
|
||||
|
|
|
@ -794,33 +794,19 @@ void GPUDevice::generic_free(device_memory &mem)
|
|||
}
|
||||
}
|
||||
|
||||
void GPUDevice::generic_copy_to(device_memory &mem)
|
||||
{
|
||||
if (!mem.host_pointer || !mem.device_pointer) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
|
||||
* backend device allocation regardless of mem.host_pointer and mem.shared_pointer, and should
|
||||
* copy data from mem.host_pointer. */
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, mem.memory_size(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
void GPUDevice::generic_copy_to(device_memory &mem, size_t size, size_t offset)
|
||||
{
|
||||
{
|
||||
if (!mem.host_pointer || !mem.device_pointer) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
|
||||
* cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
|
||||
* mem.host_pointer. */
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
|
||||
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, mem.memory_size(), offset);
|
||||
size = ((size == -1) ? mem.memory_size() : size);
|
||||
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, size, offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -300,8 +300,7 @@ class Device {
|
|||
virtual void *host_mem_alloc(size_t size, int alignment);
|
||||
virtual void host_mem_free(void *p_mem);
|
||||
virtual void mem_alloc(device_memory &mem) = 0;
|
||||
virtual void mem_copy_to(device_memory &mem) = 0;
|
||||
virtual void mem_copy_to(device_memory &mem, size_t size, size_t offset) = 0;
|
||||
virtual void mem_copy_to(device_memory &mem, size_t size, size_t offset = 0) = 0;
|
||||
virtual void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) = 0;
|
||||
virtual void mem_zero(device_memory &mem) = 0;
|
||||
virtual void mem_free(device_memory &mem) = 0;
|
||||
|
@ -383,8 +382,7 @@ class GPUDevice : public Device {
|
|||
* support of device/host allocations. */
|
||||
virtual GPUDevice::Mem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
|
||||
virtual void generic_free(device_memory &mem);
|
||||
virtual void generic_copy_to(device_memory &mem);
|
||||
void generic_copy_to(device_memory &mem, size_t size, size_t offset);
|
||||
virtual void generic_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0);
|
||||
|
||||
/* total - amount of device memory, free - amount of available device memory */
|
||||
virtual void get_device_memory_info(size_t &total, size_t &free) = 0;
|
||||
|
|
|
@ -27,7 +27,7 @@ class DummyDevice : public Device {
|
|||
|
||||
virtual void mem_alloc(device_memory &) override {}
|
||||
|
||||
virtual void mem_copy_to(device_memory &) override {}
|
||||
// virtual void mem_copy_to(device_memory &) override {}
|
||||
|
||||
virtual void mem_copy_to(device_memory &, size_t, size_t) override {}
|
||||
|
||||
|
|
|
@ -531,29 +531,6 @@ void HIPDevice::mem_alloc(device_memory &mem)
|
|||
}
|
||||
}
|
||||
|
||||
void HIPDevice::mem_copy_to(device_memory &mem)
|
||||
{
|
||||
if (mem.type == MEM_GLOBAL) {
|
||||
if ((mem.device_size < mem.memory_size()) || (!mem.device_pointer)) {
|
||||
global_free(mem);
|
||||
global_alloc(mem);
|
||||
}
|
||||
else {
|
||||
generic_copy_to(mem);
|
||||
}
|
||||
}
|
||||
else if (mem.type == MEM_TEXTURE) {
|
||||
tex_free((device_texture &)mem);
|
||||
tex_alloc((device_texture &)mem);
|
||||
}
|
||||
else {
|
||||
if (!mem.device_pointer) {
|
||||
generic_alloc(mem);
|
||||
}
|
||||
generic_copy_to(mem);
|
||||
}
|
||||
}
|
||||
|
||||
void HIPDevice::mem_copy_to(device_memory &mem, size_t size, size_t offset)
|
||||
{
|
||||
if (mem.type == MEM_GLOBAL) {
|
||||
|
|
|
@ -67,9 +67,7 @@ class HIPDevice : public GPUDevice {
|
|||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
void mem_copy_to(device_memory &mem) override;
|
||||
|
||||
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
|
||||
void mem_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0) override;
|
||||
|
||||
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||
|
||||
|
|
|
@ -82,17 +82,10 @@ void device_memory::device_free()
|
|||
}
|
||||
}
|
||||
|
||||
void device_memory::device_copy_to()
|
||||
{
|
||||
if (host_pointer) {
|
||||
device->mem_copy_to(*this);
|
||||
}
|
||||
}
|
||||
|
||||
void device_memory::device_copy_to(size_t size, size_t offset)
|
||||
{
|
||||
if (host_pointer) {
|
||||
device->mem_copy_to(*this, size, offset);
|
||||
device->mem_copy_to(*this, memory_elements_size(size), memory_elements_size(offset));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -286,8 +286,7 @@ class device_memory {
|
|||
/* Device memory allocation and copying. */
|
||||
void device_alloc();
|
||||
void device_free();
|
||||
void device_copy_to();
|
||||
void device_copy_to(size_t size, size_t offset);
|
||||
void device_copy_to(size_t size = -1, size_t offset = 0);
|
||||
void device_copy_from(size_t y, size_t w, size_t h, size_t elem);
|
||||
void device_zero();
|
||||
|
||||
|
@ -430,7 +429,6 @@ template<typename T> class device_vector : public device_memory {
|
|||
host_free();
|
||||
if (new_size > data_size) {
|
||||
device_free();
|
||||
// host_pointer = host_alloc(sizeof(T) * new_size);
|
||||
modified = true;
|
||||
assert(device_pointer == 0);
|
||||
}
|
||||
|
@ -584,21 +582,15 @@ template<typename T> class device_vector : public device_memory {
|
|||
return data()[i];
|
||||
}
|
||||
|
||||
void copy_to_device()
|
||||
void copy_to_device(size_t size = -1, size_t offset = 0)
|
||||
{
|
||||
if (data_size != 0) {
|
||||
device_copy_to();
|
||||
}
|
||||
}
|
||||
|
||||
void copy_to_device(size_t size, size_t offset)
|
||||
{
|
||||
if (data_size != 0) {
|
||||
assert(size <= data_size);
|
||||
size = ((size == -1) ? data_size : size);
|
||||
if (data_size != 0) {
|
||||
assert((size + offset) <= data_size);
|
||||
device_copy_to(size, offset);
|
||||
}
|
||||
}
|
||||
void copy_to_device_if_modified(size_t size, size_t offset)
|
||||
void copy_to_device_if_modified(size_t size = -1, size_t offset = 0)
|
||||
{
|
||||
if (!modified) {
|
||||
return;
|
||||
|
@ -607,15 +599,6 @@ template<typename T> class device_vector : public device_memory {
|
|||
copy_to_device(size, offset);
|
||||
}
|
||||
|
||||
void copy_to_device_if_modified()
|
||||
{
|
||||
if (!modified) {
|
||||
return;
|
||||
}
|
||||
|
||||
copy_to_device();
|
||||
}
|
||||
|
||||
void clear_modified()
|
||||
{
|
||||
modified = false;
|
||||
|
|
|
@ -147,17 +147,13 @@ class MetalDevice : public Device {
|
|||
|
||||
MetalMem *generic_alloc(device_memory &mem);
|
||||
|
||||
void generic_copy_to(device_memory &mem);
|
||||
|
||||
void generic_copy_to(device_memory &mem, size_t size, size_t offset);
|
||||
void generic_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0);
|
||||
|
||||
void generic_free(device_memory &mem);
|
||||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
void mem_copy_to(device_memory &mem) override;
|
||||
|
||||
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
|
||||
void mem_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0) override;
|
||||
|
||||
void mem_copy_from(device_memory &mem)
|
||||
{
|
||||
|
|
|
@ -766,30 +766,15 @@ size_t offset)
|
|||
|
||||
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
|
||||
if (!metal_mem_map.at(&mem)->use_UMA || mem.host_pointer != mem.shared_pointer) {
|
||||
size = ((size == -1) ? mem.memory_size() : size);
|
||||
MetalMem &mmem = *metal_mem_map.at(&mem);
|
||||
memcpy(mmem.hostPtr, mem.host_pointer, mem.memory_size());
|
||||
memcpy( reinterpret_cast<unsigned char *>(mmem.hostPtr) + offset, reinterpret_cast<unsigned char *>(mem.host_pointer) + offset, size);
|
||||
if (mmem.mtlBuffer.storageMode == MTLStorageModeManaged) {
|
||||
[mmem.mtlBuffer didModifyRange:NSMakeRange(offset, size)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MetalDevice::generic_copy_to(device_memory &mem)
|
||||
{
|
||||
if (!mem.host_pointer || !mem.device_pointer) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
|
||||
if (!metal_mem_map.at(&mem)->use_UMA || mem.host_pointer != mem.shared_pointer) {
|
||||
MetalMem &mmem = *metal_mem_map.at(&mem);
|
||||
memcpy(mmem.hostPtr, mem.host_pointer, mem.memory_size());
|
||||
if (mmem.mtlBuffer.storageMode == MTLStorageModeManaged) {
|
||||
[mmem.mtlBuffer didModifyRange:NSMakeRange(0, mem.memory_size())];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MetalDevice::generic_free(device_memory &mem)
|
||||
{
|
||||
if (mem.device_pointer) {
|
||||
|
@ -870,26 +855,6 @@ void MetalDevice::mem_copy_to(device_memory &mem, size_t size, size_t offset)
|
|||
}
|
||||
}
|
||||
|
||||
void MetalDevice::mem_copy_to(device_memory &mem)
|
||||
{
|
||||
if (mem.type == MEM_GLOBAL) {
|
||||
if ((mem.device_size < mem.memory_size()) || (!mem.device_pointer)) {
|
||||
global_free(mem);
|
||||
global_alloc(mem);
|
||||
}
|
||||
}
|
||||
else if (mem.type == MEM_TEXTURE) {
|
||||
tex_free((device_texture &)mem);
|
||||
tex_alloc((device_texture &)mem);
|
||||
}
|
||||
else {
|
||||
if (!mem.device_pointer) {
|
||||
generic_alloc(mem);
|
||||
}
|
||||
generic_copy_to(mem);
|
||||
}
|
||||
}
|
||||
|
||||
void MetalDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)
|
||||
{
|
||||
if (mem.host_pointer) {
|
||||
|
|
|
@ -307,37 +307,6 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
|
|||
stats.mem_alloc(mem.device_size);
|
||||
}
|
||||
|
||||
void mem_copy_to(device_memory &mem) override
|
||||
{
|
||||
device_ptr existing_key = mem.device_pointer;
|
||||
device_ptr key = (existing_key) ? existing_key : unique_key++;
|
||||
size_t existing_size = mem.device_size;
|
||||
|
||||
/* The tile buffers are allocated on each device (see below), so copy to all of them */
|
||||
foreach (const vector<SubDevice *> &island, peer_islands) {
|
||||
SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
|
||||
mem.device = owner_sub->device;
|
||||
mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
|
||||
mem.device_size = existing_size;
|
||||
|
||||
owner_sub->device->mem_copy_to(mem);
|
||||
owner_sub->ptr_map[key] = mem.device_pointer;
|
||||
|
||||
if (mem.type == MEM_GLOBAL || mem.type == MEM_TEXTURE) {
|
||||
/* Need to create texture objects and update pointer in kernel globals on all devices */
|
||||
foreach (SubDevice *island_sub, island) {
|
||||
if (island_sub != owner_sub) {
|
||||
island_sub->device->mem_copy_to(mem);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mem.device = this;
|
||||
mem.device_pointer = key;
|
||||
stats.mem_alloc(mem.device_size - existing_size);
|
||||
}
|
||||
|
||||
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override
|
||||
{
|
||||
device_ptr existing_key = mem.device_pointer;
|
||||
|
@ -351,7 +320,7 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
|
|||
mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
|
||||
mem.device_size = existing_size;
|
||||
|
||||
owner_sub->device->mem_copy_to(mem);
|
||||
owner_sub->device->mem_copy_to(mem, size, offset);
|
||||
owner_sub->ptr_map[key] = mem.device_pointer;
|
||||
|
||||
if (mem.type == MEM_GLOBAL || mem.type == MEM_TEXTURE) {
|
||||
|
@ -447,39 +416,6 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
|
|||
}
|
||||
}
|
||||
|
||||
// void mem_free(device_memory &mem) override
|
||||
// {
|
||||
// device_ptr key = mem.device_pointer;
|
||||
// size_t existing_size = mem.device_size;
|
||||
|
||||
// /* Free memory that was allocated for all devices (see above) on each device */
|
||||
// foreach (const vector<SubDevice *> &island, peer_islands) {
|
||||
// SubDevice *owner_sub = find_matching_mem_device(key, island.front());
|
||||
// mem.device = owner_sub->device;
|
||||
// mem.device_pointer = owner_sub->ptr_map[key];
|
||||
// mem.device_size = existing_size;
|
||||
|
||||
// owner_sub->device->mem_free(mem);
|
||||
// owner_sub->ptr_map.erase(owner_sub->ptr_map.find(key));
|
||||
|
||||
// if (mem.type == MEM_TEXTURE) {
|
||||
// /* Free texture objects on all devices */
|
||||
// foreach (SubDevice *island_sub, island) {
|
||||
// if (island_sub != owner_sub) {
|
||||
// island_sub->device->mem_free(mem);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// if (mem.device_pointer) {
|
||||
// mem.device = this;
|
||||
// mem.device_pointer = 0;
|
||||
// mem.device_size = 0;
|
||||
// stats.mem_free(existing_size);
|
||||
// }
|
||||
// }
|
||||
|
||||
void const_copy_to(const char *name, void *host, size_t size) override
|
||||
{
|
||||
foreach (SubDevice *sub, devices)
|
||||
|
|
Loading…
Reference in New Issue