Alternative Upload geometry data in parallel to multiple GPUs using the "Multi-Device" #107552

Open
William Leeson wants to merge 137 commits from leesonw/blender-cluster:upload_changed into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
14 changed files with 39 additions and 238 deletions
Showing only changes of commit 5409cc2011 - Show all commits

View File

@ -134,32 +134,27 @@ void CPUDevice::mem_alloc(device_memory &mem)
}
}
void CPUDevice::mem_copy_to(device_memory &mem)
void CPUDevice::mem_copy_to(device_memory &mem, size_t /* size */, size_t /* offset */)
{
if (mem.type == MEM_GLOBAL) {
global_free(mem);
global_alloc(mem);
}
else if (mem.type == MEM_TEXTURE) {
tex_free((device_texture &)mem);
tex_alloc((device_texture &)mem);
}
else {
if (!mem.device_pointer) {
mem_alloc(mem);
}
/* copy is no-op */
}
}
void CPUDevice::mem_copy_to(device_memory &mem, size_t, size_t offset)
{
/* size (2n param) is not used as this does not actually copy anything
/* size (2n param) or offset are not used as this does not actually copy anything
* as the original host memory is used as is. The device
* memory is the same memory.
*/
mem_copy_to(mem);
if (mem.type == MEM_GLOBAL) {
global_free(mem);
global_alloc(mem);
}
else if (mem.type == MEM_TEXTURE) {
tex_free((device_texture &)mem);
tex_alloc((device_texture &)mem);
}
else {
if (!mem.device_pointer) {
mem_alloc(mem);
}
/* copy is no-op */
}
}
void CPUDevice::mem_copy_from(

View File

@ -63,8 +63,7 @@ class CPUDevice : public Device {
bool load_texture_info();
virtual void mem_alloc(device_memory &mem) override;
virtual void mem_copy_to(device_memory &mem) override;
virtual void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
virtual void mem_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0) override;
virtual void mem_copy_from(
device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
virtual void mem_zero(device_memory &mem) override;

View File

@ -584,29 +584,6 @@ void CUDADevice::mem_alloc(device_memory &mem)
}
}
void CUDADevice::mem_copy_to(device_memory &mem)
{
if (mem.type == MEM_GLOBAL) {
if ((mem.device_size < mem.memory_size()) || (!mem.device_pointer)) {
global_free(mem);
global_alloc(mem);
}
else {
generic_copy_to(mem);
}
}
else if (mem.type == MEM_TEXTURE) {
tex_free((device_texture &)mem);
tex_alloc((device_texture &)mem);
}
else {
if (!mem.device_pointer) {
generic_alloc(mem);
}
generic_copy_to(mem);
}
}
void CUDADevice::mem_copy_to(device_memory &mem, size_t size, size_t offset)
{
if (mem.type == MEM_GLOBAL) {

View File

@ -76,9 +76,7 @@ class CUDADevice : public GPUDevice {
void mem_alloc(device_memory &mem) override;
void mem_copy_to(device_memory &mem) override;
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
void mem_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0) override;
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;

View File

@ -794,33 +794,19 @@ void GPUDevice::generic_free(device_memory &mem)
}
}
void GPUDevice::generic_copy_to(device_memory &mem)
{
if (!mem.host_pointer || !mem.device_pointer) {
return;
}
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
* backend device allocation regardless of mem.host_pointer and mem.shared_pointer, and should
* copy data from mem.host_pointer. */
thread_scoped_lock lock(device_mem_map_mutex);
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, mem.memory_size(), 0);
}
}
void GPUDevice::generic_copy_to(device_memory &mem, size_t size, size_t offset)
{
{
if (!mem.host_pointer || !mem.device_pointer) {
return;
}
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
* cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
* mem.host_pointer. */
thread_scoped_lock lock(device_mem_map_mutex);
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, mem.memory_size(), offset);
size = ((size == -1) ? mem.memory_size() : size);
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, size, offset);
}
}

View File

@ -300,8 +300,7 @@ class Device {
virtual void *host_mem_alloc(size_t size, int alignment);
virtual void host_mem_free(void *p_mem);
virtual void mem_alloc(device_memory &mem) = 0;
virtual void mem_copy_to(device_memory &mem) = 0;
virtual void mem_copy_to(device_memory &mem, size_t size, size_t offset) = 0;
virtual void mem_copy_to(device_memory &mem, size_t size, size_t offset = 0) = 0;
virtual void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) = 0;
virtual void mem_zero(device_memory &mem) = 0;
virtual void mem_free(device_memory &mem) = 0;
@ -383,8 +382,7 @@ class GPUDevice : public Device {
* support of device/host allocations. */
virtual GPUDevice::Mem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
virtual void generic_free(device_memory &mem);
virtual void generic_copy_to(device_memory &mem);
void generic_copy_to(device_memory &mem, size_t size, size_t offset);
virtual void generic_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0);
/* total - amount of device memory, free - amount of available device memory */
virtual void get_device_memory_info(size_t &total, size_t &free) = 0;

View File

@ -27,7 +27,7 @@ class DummyDevice : public Device {
virtual void mem_alloc(device_memory &) override {}
virtual void mem_copy_to(device_memory &) override {}
// virtual void mem_copy_to(device_memory &) override {}
virtual void mem_copy_to(device_memory &, size_t, size_t) override {}

View File

@ -531,29 +531,6 @@ void HIPDevice::mem_alloc(device_memory &mem)
}
}
void HIPDevice::mem_copy_to(device_memory &mem)
{
if (mem.type == MEM_GLOBAL) {
if ((mem.device_size < mem.memory_size()) || (!mem.device_pointer)) {
global_free(mem);
global_alloc(mem);
}
else {
generic_copy_to(mem);
}
}
else if (mem.type == MEM_TEXTURE) {
tex_free((device_texture &)mem);
tex_alloc((device_texture &)mem);
}
else {
if (!mem.device_pointer) {
generic_alloc(mem);
}
generic_copy_to(mem);
}
}
void HIPDevice::mem_copy_to(device_memory &mem, size_t size, size_t offset)
{
if (mem.type == MEM_GLOBAL) {

View File

@ -67,9 +67,7 @@ class HIPDevice : public GPUDevice {
void mem_alloc(device_memory &mem) override;
void mem_copy_to(device_memory &mem) override;
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
void mem_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0) override;
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;

View File

@ -82,17 +82,10 @@ void device_memory::device_free()
}
}
void device_memory::device_copy_to()
{
if (host_pointer) {
device->mem_copy_to(*this);
}
}
void device_memory::device_copy_to(size_t size, size_t offset)
{
if (host_pointer) {
device->mem_copy_to(*this, size, offset);
device->mem_copy_to(*this, memory_elements_size(size), memory_elements_size(offset));
}
}

View File

@ -286,8 +286,7 @@ class device_memory {
/* Device memory allocation and copying. */
void device_alloc();
void device_free();
void device_copy_to();
void device_copy_to(size_t size, size_t offset);
void device_copy_to(size_t size = -1, size_t offset = 0);
void device_copy_from(size_t y, size_t w, size_t h, size_t elem);
void device_zero();
@ -430,7 +429,6 @@ template<typename T> class device_vector : public device_memory {
host_free();
if (new_size > data_size) {
device_free();
// host_pointer = host_alloc(sizeof(T) * new_size);
modified = true;
assert(device_pointer == 0);
}
@ -584,21 +582,15 @@ template<typename T> class device_vector : public device_memory {
return data()[i];
}
void copy_to_device()
void copy_to_device(size_t size = -1, size_t offset = 0)
{
if (data_size != 0) {
device_copy_to();
}
}
void copy_to_device(size_t size, size_t offset)
{
if (data_size != 0) {
assert(size <= data_size);
size = ((size == -1) ? data_size : size);
if (data_size != 0) {
assert((size + offset) <= data_size);
device_copy_to(size, offset);
}
}
void copy_to_device_if_modified(size_t size, size_t offset)
void copy_to_device_if_modified(size_t size = -1, size_t offset = 0)
{
if (!modified) {
return;
@ -607,15 +599,6 @@ template<typename T> class device_vector : public device_memory {
copy_to_device(size, offset);
}
void copy_to_device_if_modified()
{
if (!modified) {
return;
}
copy_to_device();
}
void clear_modified()
{
modified = false;

View File

@ -147,17 +147,13 @@ class MetalDevice : public Device {
MetalMem *generic_alloc(device_memory &mem);
void generic_copy_to(device_memory &mem);
void generic_copy_to(device_memory &mem, size_t size, size_t offset);
void generic_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0);
void generic_free(device_memory &mem);
void mem_alloc(device_memory &mem) override;
void mem_copy_to(device_memory &mem) override;
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
void mem_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0) override;
void mem_copy_from(device_memory &mem)
{

View File

@ -766,30 +766,15 @@ size_t offset)
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
if (!metal_mem_map.at(&mem)->use_UMA || mem.host_pointer != mem.shared_pointer) {
size = ((size == -1) ? mem.memory_size() : size);
MetalMem &mmem = *metal_mem_map.at(&mem);
memcpy(mmem.hostPtr, mem.host_pointer, mem.memory_size());
memcpy( reinterpret_cast<unsigned char *>(mmem.hostPtr) + offset, reinterpret_cast<unsigned char *>(mem.host_pointer) + offset, size);
if (mmem.mtlBuffer.storageMode == MTLStorageModeManaged) {
[mmem.mtlBuffer didModifyRange:NSMakeRange(offset, size)];
}
}
}
void MetalDevice::generic_copy_to(device_memory &mem)
{
if (!mem.host_pointer || !mem.device_pointer) {
return;
}
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
if (!metal_mem_map.at(&mem)->use_UMA || mem.host_pointer != mem.shared_pointer) {
MetalMem &mmem = *metal_mem_map.at(&mem);
memcpy(mmem.hostPtr, mem.host_pointer, mem.memory_size());
if (mmem.mtlBuffer.storageMode == MTLStorageModeManaged) {
[mmem.mtlBuffer didModifyRange:NSMakeRange(0, mem.memory_size())];
}
}
}
void MetalDevice::generic_free(device_memory &mem)
{
if (mem.device_pointer) {
@ -870,26 +855,6 @@ void MetalDevice::mem_copy_to(device_memory &mem, size_t size, size_t offset)
}
}
void MetalDevice::mem_copy_to(device_memory &mem)
{
if (mem.type == MEM_GLOBAL) {
if ((mem.device_size < mem.memory_size()) || (!mem.device_pointer)) {
global_free(mem);
global_alloc(mem);
}
}
else if (mem.type == MEM_TEXTURE) {
tex_free((device_texture &)mem);
tex_alloc((device_texture &)mem);
}
else {
if (!mem.device_pointer) {
generic_alloc(mem);
}
generic_copy_to(mem);
}
}
void MetalDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)
{
if (mem.host_pointer) {

View File

@ -307,37 +307,6 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
stats.mem_alloc(mem.device_size);
}
void mem_copy_to(device_memory &mem) override
{
device_ptr existing_key = mem.device_pointer;
device_ptr key = (existing_key) ? existing_key : unique_key++;
size_t existing_size = mem.device_size;
/* The tile buffers are allocated on each device (see below), so copy to all of them */
foreach (const vector<SubDevice *> &island, peer_islands) {
SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
mem.device = owner_sub->device;
mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
mem.device_size = existing_size;
owner_sub->device->mem_copy_to(mem);
owner_sub->ptr_map[key] = mem.device_pointer;
if (mem.type == MEM_GLOBAL || mem.type == MEM_TEXTURE) {
/* Need to create texture objects and update pointer in kernel globals on all devices */
foreach (SubDevice *island_sub, island) {
if (island_sub != owner_sub) {
island_sub->device->mem_copy_to(mem);
}
}
}
}
mem.device = this;
mem.device_pointer = key;
stats.mem_alloc(mem.device_size - existing_size);
}
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override
{
device_ptr existing_key = mem.device_pointer;
@ -351,7 +320,7 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
mem.device_size = existing_size;
owner_sub->device->mem_copy_to(mem);
owner_sub->device->mem_copy_to(mem, size, offset);
owner_sub->ptr_map[key] = mem.device_pointer;
if (mem.type == MEM_GLOBAL || mem.type == MEM_TEXTURE) {
@ -447,39 +416,6 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
}
}
// void mem_free(device_memory &mem) override
// {
// device_ptr key = mem.device_pointer;
// size_t existing_size = mem.device_size;
// /* Free memory that was allocated for all devices (see above) on each device */
// foreach (const vector<SubDevice *> &island, peer_islands) {
// SubDevice *owner_sub = find_matching_mem_device(key, island.front());
// mem.device = owner_sub->device;
// mem.device_pointer = owner_sub->ptr_map[key];
// mem.device_size = existing_size;
// owner_sub->device->mem_free(mem);
// owner_sub->ptr_map.erase(owner_sub->ptr_map.find(key));
// if (mem.type == MEM_TEXTURE) {
// /* Free texture objects on all devices */
// foreach (SubDevice *island_sub, island) {
// if (island_sub != owner_sub) {
// island_sub->device->mem_free(mem);
// }
// }
// }
// }
// if (mem.device_pointer) {
// mem.device = this;
// mem.device_pointer = 0;
// mem.device_size = 0;
// stats.mem_free(existing_size);
// }
// }
void const_copy_to(const char *name, void *host, size_t size) override
{
foreach (SubDevice *sub, devices)