Alternative Upload geometry data in parallel to multiple GPUs using the "Multi-Device" #107552
|
@ -26,74 +26,68 @@ class MultiDevice : public Device {
|
|||
public:
|
||||
struct SubDevice {
|
||||
Stats stats;
|
||||
Device *device;
|
||||
unique_ptr<Device> device;
|
||||
map<device_ptr, device_ptr> ptr_map;
|
||||
int peer_island_index = -1;
|
||||
};
|
||||
|
||||
// Switch from list to a vector to make the parallel_for easily map to the integer id.
|
||||
// Also id now could be used to access the real device pointer more quickly. Also, since
|
||||
// the vector reallocates the memory on resize the sub-devices are stored as pointers.
|
||||
vector<SubDevice *> devices;
|
||||
/* Switch from list to a vector to make the parallel_for easily map to the integer id.
|
||||
Also id now could be used to access the real device pointer more quickly. Also, since
|
||||
the vector reallocates the memory on resize the sub-devices are stored as pointers. */
|
||||
vector<unique_ptr<SubDevice>> devices;
|
||||
device_ptr unique_key;
|
||||
vector<vector<SubDevice *>> peer_islands;
|
||||
|
||||
MultiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
: Device(info, stats, profiler), unique_key(1)
|
||||
{
|
||||
int cpu_device_idx = -1;
|
||||
foreach (const DeviceInfo &subinfo, info.multi_devices) {
|
||||
/* Always add CPU devices at the back since GPU devices can change
|
||||
* host memory pointers, which CPU uses as device pointer. */
|
||||
SubDevice *sub = new SubDevice;
|
||||
if (subinfo.type == DEVICE_CPU) {
|
||||
assert(cpu_device_idx == -1);
|
||||
cpu_device_idx = devices.size();
|
||||
}
|
||||
devices.emplace_back(sub);
|
||||
sub->device = Device::create(subinfo, sub->stats, profiler);
|
||||
int cpu_device_idx = -1;
|
||||
foreach (const DeviceInfo &subinfo, info.multi_devices) {
|
||||
/* Always add CPU devices at the back since GPU devices can change
|
||||
* host memory pointers, which CPU uses as device pointer. */
|
||||
unique_ptr<SubDevice> sub = make_unique<SubDevice>();
|
||||
if (subinfo.type == DEVICE_CPU) {
|
||||
assert(cpu_device_idx == -1);
|
||||
cpu_device_idx = devices.size();
|
||||
}
|
||||
sub->device = std::unique_ptr<Device>(Device::create(subinfo, sub->stats, profiler));
|
||||
devices.emplace_back(std::move(sub));
|
||||
}
|
||||
|
||||
/* Swop the CPU device with the last device to ensure the CPU device is the last */
|
||||
{
|
||||
int last = devices.size() - 1;
|
||||
if ((cpu_device_idx != -1) && (cpu_device_idx != last)) {
|
||||
std::swap(devices[last], devices[cpu_device_idx]);
|
||||
}
|
||||
}
|
||||
/* Build a list of peer islands for the available render devices */
|
||||
foreach (auto &sub, devices) {
|
||||
/* First ensure that every device is in at least once peer island */
|
||||
if (sub->peer_island_index < 0) {
|
||||
peer_islands.emplace_back();
|
||||
sub->peer_island_index = (int)peer_islands.size() - 1;
|
||||
peer_islands[sub->peer_island_index].push_back(sub.get());
|
||||
}
|
||||
|
||||
/* Swop the CPU device with the last device to ensure the CPU device is the last */
|
||||
{
|
||||
int last = devices.size() - 1;
|
||||
if ((cpu_device_idx != -1) && (cpu_device_idx != last)) {
|
||||
std::swap(devices[last], devices[cpu_device_idx]);
|
||||
if (!info.has_peer_memory) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Second check peer access between devices and fill up the islands accordingly */
|
||||
foreach (auto &peer_sub, devices) {
|
||||
if (peer_sub->peer_island_index < 0 &&
|
||||
peer_sub->device->info.type == sub->device->info.type &&
|
||||
peer_sub->device->check_peer_access(sub->device.get())) {
|
||||
peer_sub->peer_island_index = sub->peer_island_index;
|
||||
peer_islands[sub->peer_island_index].push_back(peer_sub.get());
|
||||
}
|
||||
}
|
||||
/* Build a list of peer islands for the available render devices */
|
||||
foreach (SubDevice *sub, devices) {
|
||||
/* First ensure that every device is in at least once peer island */
|
||||
if (sub->peer_island_index < 0) {
|
||||
peer_islands.emplace_back();
|
||||
sub->peer_island_index = (int)peer_islands.size() - 1;
|
||||
peer_islands[sub->peer_island_index].push_back(sub);
|
||||
}
|
||||
|
||||
if (!info.has_peer_memory) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Second check peer access between devices and fill up the islands accordingly */
|
||||
foreach (SubDevice *peer_sub, devices) {
|
||||
if (peer_sub->peer_island_index < 0 &&
|
||||
peer_sub->device->info.type == sub->device->info.type &&
|
||||
peer_sub->device->check_peer_access(sub->device)) {
|
||||
peer_sub->peer_island_index = sub->peer_island_index;
|
||||
peer_islands[sub->peer_island_index].push_back(peer_sub);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
~MultiDevice()
|
||||
{
|
||||
foreach (SubDevice *sub, devices) {
|
||||
delete sub->device;
|
||||
delete sub;
|
||||
}
|
||||
}
|
||||
|
||||
~MultiDevice() {}
|
||||
|
||||
int get_num_devices() const override
|
||||
{
|
||||
return devices.size();
|
||||
|
@ -103,7 +97,7 @@ class MultiDevice : public Device {
|
|||
{
|
||||
error_msg.clear();
|
||||
|
||||
foreach (SubDevice *sub, devices)
|
||||
foreach (auto &sub, devices)
|
||||
error_msg += sub->device->error_message();
|
||||
|
||||
return error_msg;
|
||||
|
@ -113,7 +107,7 @@ class MultiDevice : public Device {
|
|||
{
|
||||
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
|
||||
BVHLayoutMask bvh_layout_mask_all = BVH_LAYOUT_NONE;
|
||||
foreach (const SubDevice *sub_device, devices) {
|
||||
foreach (const auto &sub_device, devices) {
|
||||
BVHLayoutMask device_bvh_layout_mask = sub_device->device->get_bvh_layout_mask();
|
||||
bvh_layout_mask &= device_bvh_layout_mask;
|
||||
bvh_layout_mask_all |= device_bvh_layout_mask;
|
||||
|
@ -144,7 +138,7 @@ class MultiDevice : public Device {
|
|||
|
||||
bool load_kernels(const uint kernel_features) override
|
||||
{
|
||||
foreach (SubDevice *sub, devices)
|
||||
foreach (auto &sub, devices)
|
||||
if (!sub->device->load_kernels(kernel_features))
|
||||
return false;
|
||||
|
||||
|
@ -153,14 +147,14 @@ class MultiDevice : public Device {
|
|||
|
||||
bool load_osl_kernels() override
|
||||
{
|
||||
foreach (SubDevice *sub, devices)
|
||||
foreach (auto &sub, devices)
|
||||
if (!sub->device->load_osl_kernels())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) override
|
||||
void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) override
|
||||
{
|
||||
/* Try to build and share a single acceleration structure, if possible */
|
||||
if (bvh->params.bvh_layout == BVH_LAYOUT_BVH2 || bvh->params.bvh_layout == BVH_LAYOUT_EMBREE) {
|
||||
|
@ -177,38 +171,39 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
|
|||
bvh_multi->sub_bvhs.resize(devices.size());
|
||||
|
||||
/* Broadcast acceleration structure build to all render devices */
|
||||
parallel_for(size_t(0), devices.size(), [this, &bvh_multi, &dscene, refit, &progress](size_t id) {
|
||||
// WL: Pointer translation is removed as it is not thread safe. Instead a new method is added
|
||||
// to retrieve the real device pointer.
|
||||
SubDevice *sub = devices[id];
|
||||
parallel_for(
|
||||
size_t(0), devices.size(), [this, &bvh_multi, &dscene, refit, &progress](size_t id) {
|
||||
/* Pointer translation is removed as it is not thread safe. Instead a new method is added
|
||||
to retrieve the real device pointer. */
|
||||
auto &sub = devices[id];
|
||||
|
||||
if (!bvh_multi->sub_bvhs[id]) {
|
||||
BVHParams params = bvh_multi->params;
|
||||
if (bvh_multi->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX)
|
||||
params.bvh_layout = BVH_LAYOUT_OPTIX;
|
||||
else if (bvh_multi->params.bvh_layout == BVH_LAYOUT_MULTI_METAL)
|
||||
params.bvh_layout = BVH_LAYOUT_METAL;
|
||||
else if (bvh_multi->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE)
|
||||
params.bvh_layout = sub->device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
|
||||
BVH_LAYOUT_EMBREE;
|
||||
else if (bvh_multi->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE)
|
||||
params.bvh_layout = sub->device->info.type == DEVICE_METAL ? BVH_LAYOUT_METAL :
|
||||
BVH_LAYOUT_EMBREE;
|
||||
if (!bvh_multi->sub_bvhs[id]) {
|
||||
BVHParams params = bvh_multi->params;
|
||||
if (bvh_multi->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX)
|
||||
params.bvh_layout = BVH_LAYOUT_OPTIX;
|
||||
else if (bvh_multi->params.bvh_layout == BVH_LAYOUT_MULTI_METAL)
|
||||
params.bvh_layout = BVH_LAYOUT_METAL;
|
||||
else if (bvh_multi->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE)
|
||||
params.bvh_layout = sub->device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
|
||||
BVH_LAYOUT_EMBREE;
|
||||
else if (bvh_multi->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE)
|
||||
params.bvh_layout = sub->device->info.type == DEVICE_METAL ? BVH_LAYOUT_METAL :
|
||||
BVH_LAYOUT_EMBREE;
|
||||
|
||||
/* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
|
||||
* (since they are put into the top level directly, see bvh_embree.cpp) */
|
||||
if (!params.top_level && params.bvh_layout == BVH_LAYOUT_EMBREE &&
|
||||
!bvh_multi->geometry[0]->is_instanced()) {
|
||||
}
|
||||
else {
|
||||
bvh_multi->sub_bvhs[id] = BVH::create(
|
||||
params, bvh_multi->geometry, bvh_multi->objects, sub->device);
|
||||
}
|
||||
}
|
||||
if (bvh_multi->sub_bvhs[id]) {
|
||||
sub->device->build_bvh(bvh_multi->sub_bvhs[id], dscene, progress, refit);
|
||||
}
|
||||
});
|
||||
/* Skip building a bottom level acceleration structure for non-instanced geometry on
|
||||
* Embree (since they are put into the top level directly, see bvh_embree.cpp) */
|
||||
if (!params.top_level && params.bvh_layout == BVH_LAYOUT_EMBREE &&
|
||||
!bvh_multi->geometry[0]->is_instanced()) {
|
||||
}
|
||||
else {
|
||||
bvh_multi->sub_bvhs[id] = BVH::create(
|
||||
params, bvh_multi->geometry, bvh_multi->objects, sub->device.get());
|
||||
}
|
||||
}
|
||||
if (bvh_multi->sub_bvhs[id]) {
|
||||
sub->device->build_bvh(bvh_multi->sub_bvhs[id], dscene, progress, refit);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
virtual void *get_cpu_osl_memory() override
|
||||
|
@ -224,9 +219,9 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
|
|||
|
||||
bool is_resident(device_ptr key, Device *sub_device) override
|
||||
{
|
||||
foreach (SubDevice *sub, devices) {
|
||||
if (sub->device == sub_device) {
|
||||
return find_matching_mem_device(key, sub)->device == sub_device;
|
||||
foreach (auto &sub, devices) {
|
||||
if (sub->device.get() == sub_device) {
|
||||
return find_matching_mem_device(key, sub.get())->device.get() == sub_device;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
@ -268,9 +263,9 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
|
|||
inline device_ptr find_matching_mem(device_ptr key, Device *dev) override
|
||||
{
|
||||
device_ptr ptr = 0;
|
||||
foreach (SubDevice *sub, devices) {
|
||||
if (sub->device == dev) {
|
||||
return find_matching_mem_device(key, sub)->ptr_map[key];
|
||||
foreach (auto &sub, devices) {
|
||||
if (sub->device.get() == dev) {
|
||||
return find_matching_mem_device(key, sub.get())->ptr_map[key];
|
||||
}
|
||||
}
|
||||
return ptr;
|
||||
|
@ -289,7 +284,7 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
|
|||
/* The remaining memory types can be distributed across devices */
|
||||
foreach (const vector<SubDevice *> &island, peer_islands) {
|
||||
SubDevice *owner_sub = find_suitable_mem_device(key, island);
|
||||
mem.device = owner_sub->device;
|
||||
mem.device = owner_sub->device.get();
|
||||
mem.device_pointer = 0;
|
||||
mem.device_size = 0;
|
||||
|
||||
|
@ -311,7 +306,7 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
|
|||
/* The tile buffers are allocated on each device (see below), so copy to all of them */
|
||||
foreach (const vector<SubDevice *> &island, peer_islands) {
|
||||
SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
|
||||
mem.device = owner_sub->device;
|
||||
mem.device = owner_sub->device.get();
|
||||
mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
|
||||
mem.device_size = existing_size;
|
||||
|
||||
|
@ -338,12 +333,12 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
|
|||
device_ptr key = mem.device_pointer;
|
||||
size_t i = 0, sub_h = h / devices.size();
|
||||
|
||||
foreach (SubDevice *sub, devices) {
|
||||
foreach (auto &sub, devices) {
|
||||
size_t sy = y + i * sub_h;
|
||||
size_t sh = (i == (size_t)devices.size() - 1) ? h - sub_h * i : sub_h;
|
||||
|
||||
SubDevice *owner_sub = find_matching_mem_device(key, sub);
|
||||
mem.device = owner_sub->device;
|
||||
SubDevice *owner_sub = find_matching_mem_device(key, sub.get());
|
||||
mem.device = owner_sub->device.get();
|
||||
mem.device_pointer = owner_sub->ptr_map[key];
|
||||
|
||||
owner_sub->device->mem_copy_from(mem, sy, w, sh, elem);
|
||||
|
@ -362,7 +357,7 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
|
|||
|
||||
foreach (const vector<SubDevice *> &island, peer_islands) {
|
||||
SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
|
||||
mem.device = owner_sub->device;
|
||||
mem.device = owner_sub->device.get();
|
||||
mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
|
||||
mem.device_size = existing_size;
|
||||
|
||||
|
@ -385,7 +380,7 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
|
|||
/* Free memory that was allocated for all devices (see above) on each device */
|
||||
foreach (const vector<SubDevice *> &island, peer_islands) {
|
||||
SubDevice *owner_sub = find_matching_mem_device(key, island.front());
|
||||
mem.device = owner_sub->device;
|
||||
mem.device = owner_sub->device.get();
|
||||
mem.device_pointer = owner_sub->ptr_map[key];
|
||||
mem.device_size = existing_size;
|
||||
owner_sub->device->mem_free(mem);
|
||||
|
@ -413,7 +408,7 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
|
|||
|
||||
void const_copy_to(const char *name, void *host, size_t size) override
|
||||
{
|
||||
foreach (SubDevice *sub, devices)
|
||||
foreach (auto &sub, devices)
|
||||
sub->device->const_copy_to(name, host, size);
|
||||
}
|
||||
|
||||
|
@ -421,8 +416,8 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
|
|||
{
|
||||
int i = 0;
|
||||
|
||||
for (const SubDevice *sub : devices) {
|
||||
if (sub->device == sub_device)
|
||||
for (const auto &sub : devices) {
|
||||
if (sub->device.get() == sub_device)
|
||||
return i;
|
||||
i++;
|
||||
}
|
||||
|
@ -432,7 +427,7 @@ void build_bvh(BVH *bvh, DeviceScene *dscene, Progress &progress, bool refit) ov
|
|||
|
||||
virtual void foreach_device(const function<void(Device *)> &callback) override
|
||||
{
|
||||
foreach (SubDevice *sub, devices) {
|
||||
foreach (auto &sub, devices) {
|
||||
sub->device->foreach_device(callback);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue