Alternative Upload geometry data in parallel to multiple GPUs using the "Multi-Device" #107552

Open
William Leeson wants to merge 137 commits from leesonw/blender-cluster:upload_changed into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
14 changed files with 56 additions and 56 deletions
Showing only changes of commit 66a6a7a0af - Show all commits

View File

@ -63,7 +63,7 @@ class CPUDevice : public Device {
bool load_texture_info();
virtual void mem_alloc(device_memory &mem) override;
virtual void mem_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0) override;
virtual void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
virtual void mem_copy_from(
device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
virtual void mem_zero(device_memory &mem) override;

View File

@ -27,7 +27,6 @@ bool device_cuda_init()
return result;
initialized = true;
int cuew_result = cuewInit(CUEW_INIT_CUDA);
if (cuew_result == CUEW_SUCCESS) {
VLOG_INFO << "CUEW initialization succeeded";

View File

@ -68,7 +68,6 @@ class CUDADevice : public GPUDevice {
virtual void free_device(void *device_pointer) override;
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
virtual void free_host(void *shared_pointer) override;
virtual void transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size, size_t offset) override;
@ -79,7 +78,7 @@ class CUDADevice : public GPUDevice {
void mem_alloc(device_memory &mem) override;
void mem_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0) override;
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;

View File

@ -795,17 +795,17 @@ void GPUDevice::generic_free(device_memory &mem)
}
void GPUDevice::generic_copy_to(device_memory &mem, size_t size, size_t offset)
{
{
if (!mem.host_pointer || !mem.device_pointer) {
return;
}
/* If use_mapped_host of mem is false, the current device only uses device memory allocated by
* cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
* mem.host_pointer. */
thread_scoped_lock lock(device_mem_map_mutex);
if (!device_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
size = ((size == -1) ? mem.memory_size() : size);
size = ((size == -1) ? mem.memory_size() : size);
copy_host_to_device((void *)mem.device_pointer, mem.host_pointer, size, offset);
}
}

View File

@ -7,6 +7,7 @@
#include <stdlib.h>
#include "bvh/params.h"
#include "device/denoise.h"
#include "device/memory.h"
@ -300,7 +301,7 @@ class Device {
virtual void *host_mem_alloc(size_t size, int alignment);
virtual void host_mem_free(void *p_mem);
virtual void mem_alloc(device_memory &mem) = 0;
virtual void mem_copy_to(device_memory &mem, size_t size, size_t offset = 0) = 0;
virtual void mem_copy_to(device_memory &mem, size_t size, size_t offset) = 0;
virtual void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) = 0;
virtual void mem_zero(device_memory &mem) = 0;
virtual void mem_free(device_memory &mem) = 0;
@ -383,7 +384,7 @@ class GPUDevice : public Device {
virtual GPUDevice::Mem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
virtual void generic_free(device_memory &mem);
virtual void generic_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0);
/* total - amount of device memory, free - amount of available device memory */
virtual void get_device_memory_info(size_t &total, size_t &free) = 0;

View File

@ -27,8 +27,6 @@ class DummyDevice : public Device {
virtual void mem_alloc(device_memory &) override {}
// virtual void mem_copy_to(device_memory &) override {}
virtual void mem_copy_to(device_memory &, size_t, size_t) override {}
virtual void mem_copy_from(device_memory &, size_t, size_t, size_t, size_t) override {}

View File

@ -61,13 +61,12 @@ class HIPDevice : public GPUDevice {
virtual void free_device(void *device_pointer) override;
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
virtual void free_host(void *shared_pointer) override;
virtual void transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size, size_t offset) override;
void mem_alloc(device_memory &mem) override;
void mem_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0) override;
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;

View File

@ -428,6 +428,7 @@ template<typename T> class device_vector : public device_memory {
T *alloc(size_t width, size_t height = 0, size_t depth = 0)
{
size_t new_size = size(width, height, depth);
if (new_size != data_size) {
device_free();
host_free();
@ -572,6 +573,7 @@ template<typename T> class device_vector : public device_memory {
device_copy_to(size, offset);
}
}
void copy_to_device_if_modified(size_t size = -1, size_t offset = 0)
{
if (!modified) {

View File

@ -153,8 +153,8 @@ class MetalDevice : public Device {
void mem_alloc(device_memory &mem) override;
void mem_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0) override;
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
void mem_copy_from(device_memory &mem)
{
mem_copy_from(mem, -1, -1, -1, -1);

View File

@ -163,6 +163,7 @@ void OneapiDevice::generic_copy_to(device_memory &mem, size_t size, size_t offse
/* Copy operation from host shouldn't be requested if there is no memory allocated on host. */
assert(mem.host_pointer);
assert(device_queue_);
size = ((size == -1) ? mem.memory_size() : size);
usm_memcpy(device_queue_, reinterpret_cast<unsigned char *>(mem.device_pointer) + offset, reinterpret_cast<unsigned char *>(mem.host_pointer) + offset, size);
}

View File

@ -60,7 +60,7 @@ class OneapiDevice : public Device {
void mem_alloc(device_memory &mem) override;
void mem_copy_to(device_memory &mem, size_t size = -1, size_t offset = 0) override;
void mem_copy_to(device_memory &mem, size_t size, size_t offset) override;
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;

View File

@ -698,7 +698,7 @@ void GeometryManager::device_update_volume_images(Device *device, Scene *scene,
* geometry that must be rebuilt. It also determines if any displacement
* or shadow transparancy occurs in the scene.
*/
void GeometryManager::preTessDispNormalAndVerticesSetup(Device *device,
void GeometryManager::pretess_disp_normal_and_vertices_setup(Device *device,
Scene *scene,
bool &true_displacement_used,
bool &curve_shadow_transparency_used,
@ -760,16 +760,16 @@ void GeometryManager::preTessDispNormalAndVerticesSetup(Device *device,
* Uploads the mesh data to the device and then builds or refits the BVH
* using the uploaded data.
*/
void GeometryManager::deviceDataXferAndBVHUpdate(int idx,
Scene *scene,
DeviceScene *dscene,
GeometrySizes &sizes,
AttributeSizes &attrib_sizes,
const BVHLayout bvh_layout,
size_t num_bvh,
bool can_refit,
bool need_update_scene_bvh,
Progress &progress)
void GeometryManager::device_data_xfer_and_bvh_update(int idx,
Scene *scene,
DeviceScene *dscene,
GeometrySizes &sizes,
AttributeSizes &attrib_sizes,
const BVHLayout bvh_layout,
size_t num_bvh,
bool can_refit,
bool need_update_scene_bvh,
Progress &progress)
{
auto sub_dscene = scene->dscenes[idx];
sub_dscene->data.bvh.bvh_layout = BVH_LAYOUT_NONE;
@ -834,7 +834,7 @@ void GeometryManager::deviceDataXferAndBVHUpdate(int idx,
* Calculates the bounds for any modified geometry and
* then updates the objects bounds from the geometry.
*/
void GeometryManager::updateObjectBounds(Scene *scene)
void GeometryManager::update_object_bounds(Scene *scene)
{
Scene::MotionType need_motion = scene->need_motion();
bool motion_blur = need_motion == Scene::MOTION_BLUR;
@ -918,7 +918,7 @@ void GeometryManager::device_update(Device *device,
bool curve_shadow_transparency_used = false;
size_t total_tess_needed = 0;
preTessDispNormalAndVerticesSetup(
pretess_disp_normal_and_vertices_setup(
device, scene, true_displacement_used, curve_shadow_transparency_used, total_tess_needed);
tesselate(scene, total_tess_needed, progress);
@ -963,7 +963,7 @@ void GeometryManager::device_update(Device *device,
}
{
updateObjectBounds(scene);
update_object_bounds(scene);
}
/* Update the BVH even when there is no geometry so the kernel's BVH data is still valid,
* especially when removing all of the objects during interactive renders.
@ -976,7 +976,7 @@ void GeometryManager::device_update(Device *device,
device->get_bvh_layout_mask());
dscene->data.bvh.bvh_layout = bvh_layout;
size_t num_bvh = createObjectBVHs(device, dscene, scene, bvh_layout, need_update_scene_bvh);
size_t num_bvh = create_object_bvhs(device, dscene, scene, bvh_layout, need_update_scene_bvh);
bool can_refit_scene_bvh = true;
if(need_update_scene_bvh) {
can_refit_scene_bvh = device_update_bvh_preprocess(device, dscene, scene, progress);
@ -984,20 +984,20 @@ void GeometryManager::device_update(Device *device,
{
size_t num_scenes = scene->dscenes.size();
VLOG_INFO << "Rendering using " << num_scenes << " devices";
// Parallel upload the geometry data to the devices and
// calculate or refit the BVHs
/* Parallel upload the geometry data to the devices and
calculate or refit the BVHs */
parallel_for(
size_t(0), num_scenes, [=, this, &sizes, &attrib_sizes, &progress](const size_t idx) {
deviceDataXferAndBVHUpdate(idx,
scene,
dscene,
sizes,
attrib_sizes,
bvh_layout,
num_bvh,
can_refit_scene_bvh,
need_update_scene_bvh,
progress);
device_data_xfer_and_bvh_update(idx,
scene,
dscene,
sizes,
attrib_sizes,
bvh_layout,
num_bvh,
can_refit_scene_bvh,
need_update_scene_bvh,
progress);
});
if (need_update_scene_bvh) {
device_update_bvh_postprocess(device, dscene, scene, progress);
@ -1024,8 +1024,8 @@ void GeometryManager::device_update(Device *device,
}
}
clearGeometryUpdateAndModifiedTags(scene);
clearShaderUpdateTags(scene);
clear_geometry_update_and_modified_tags(scene);
clear_shader_update_tags(scene);
update_flags = UPDATE_NONE;
device_scene_clear_modified(dscene);
}
@ -1097,7 +1097,7 @@ void GeometryManager::collect_statistics(const Scene *scene, RenderStats *stats)
/*
* Clears all tags used to indicate the the shader needs to be updated.
*/
void GeometryManager::clearShaderUpdateTags(Scene *scene)
void GeometryManager::clear_shader_update_tags(Scene *scene)
{
/* unset flags */
foreach (Shader *shader, scene->shaders) {
@ -1111,7 +1111,7 @@ void GeometryManager::clearShaderUpdateTags(Scene *scene)
* Clears all tags used to indicate the the geometry needs to be updated
* or has been modified.
*/
void GeometryManager::clearGeometryUpdateAndModifiedTags(Scene *scene)
void GeometryManager::clear_geometry_update_and_modified_tags(Scene *scene)
{
// Clear update tags
foreach (Geometry *geom, scene->geometry) {

View File

@ -217,6 +217,7 @@ struct GeometrySizes {
size_t *motion_vert_offsets;
};
/* Attribute Sizes */
struct AttributeSizes {
size_t attr_float_size;
size_t attr_float2_size;
@ -280,15 +281,15 @@ class GeometryManager {
/* Statistics */
void collect_statistics(const Scene *scene, RenderStats *stats);
size_t createObjectBVHs(Device *device,
size_t create_object_bvhs(Device *device,
DeviceScene *dscene,
Scene *scene,
const BVHLayout bvh_layout,
bool &need_update_scene_bvh);
void updateSceneBVHs(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
void clearShaderUpdateTags(Scene *scene);
void clearGeometryUpdateAndModifiedTags(Scene *scene);
void deviceDataXferAndBVHUpdate(int idx,
void update_scene_bvhs(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
void clear_shader_update_tags(Scene *scene);
void clear_geometry_update_and_modified_tags(Scene *scene);
void device_data_xfer_and_bvh_update(int idx,
Scene *scene,
DeviceScene *dscene,
GeometrySizes &sizes,
@ -298,9 +299,9 @@ class GeometryManager {
bool can_refit,
bool need_update_scene_bvh,
Progress &progress);
void updateObjectBounds(Scene *scene);
void update_object_bounds(Scene *scene);
void tesselate(Scene *scene, size_t total_tess_needed, Progress &progress);
void preTessDispNormalAndVerticesSetup(Device *device,
void pretess_disp_normal_and_vertices_setup(Device *device,
Scene *scene,
bool &true_displacement_used,
bool &curve_shadow_transparency_used,

View File

@ -265,7 +265,7 @@ bool GeometryManager::device_update_bvh_preprocess(Device *device,
* it determines if the BVH can be refitted. It also counts
* the number of BVH that need to be built.
*/
size_t GeometryManager::createObjectBVHs(Device *device,
size_t GeometryManager::create_object_bvhs(Device *device,
DeviceScene *dscene,
Scene *scene,
const BVHLayout bvh_layout,
@ -303,7 +303,7 @@ size_t GeometryManager::createObjectBVHs(Device *device,
* Prepares scene BVH for building or refitting. Then builds or refits the scene
* BVH for all the devices.
*/
void GeometryManager::updateSceneBVHs(Device *device,
void GeometryManager::update_scene_bvhs(Device *device,
DeviceScene *dscene,
Scene *scene,
Progress &progress)