Alternative Upload geometry data in parallel to multiple GPUs using the "Multi-Device" #107552
|
@ -886,9 +886,11 @@ typedef enum {
|
|||
} nvrtcResult;
|
||||
|
||||
typedef struct _nvrtcProgram* nvrtcProgram;
|
||||
|
||||
|
||||
// FRL_CGR
|
||||
/* Function types. */
|
||||
typedef void CUDAAPI tnvtxRangePushA(const char *msg);
|
||||
typedef void CUDAAPI tnvtxRangePop();
|
||||
// FRL_CGR
|
||||
typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pStr);
|
||||
typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pStr);
|
||||
typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
|
||||
|
@ -1130,6 +1132,8 @@ typedef nvrtcResult CUDAAPI tnvrtcGetLoweredName(nvrtcProgram prog, const char*
|
|||
|
||||
|
||||
/* Function declarations. */
|
||||
extern tnvtxRangePushA *nvtxRangePushA;
|
||||
extern tnvtxRangePop *nvtxRangePop;
|
||||
extern tcuGetErrorString *cuGetErrorString;
|
||||
extern tcuGetErrorName *cuGetErrorName;
|
||||
extern tcuInit *cuInit;
|
||||
|
@ -1378,7 +1382,10 @@ enum {
|
|||
|
||||
enum {
|
||||
CUEW_INIT_CUDA = 1,
|
||||
CUEW_INIT_NVRTC = 2
|
||||
// FRL_CGR
|
||||
CUEW_INIT_NVRTC = 2,
|
||||
CUEW_INIT_NVTX = 4,
|
||||
// FRL_CGR
|
||||
};
|
||||
|
||||
int cuewInit(cuuint32_t flags);
|
||||
|
|
|
@ -66,10 +66,22 @@ typedef void* DynamicLibrary;
|
|||
_LIBRARY_FIND_CHECKED(nvrtc_lib, name)
|
||||
#define NVRTC_LIBRARY_FIND(name) _LIBRARY_FIND(nvrtc_lib, name)
|
||||
|
||||
// FRL_CGR
|
||||
#define NVTX_LIBRARY_FIND_CHECKED(name) \
|
||||
_LIBRARY_FIND_CHECKED(nvrtc_lib, name)
|
||||
#define NVTX_LIBRARY_FIND(name) _LIBRARY_FIND(nvtx_lib, name)
|
||||
// FRL_CGR
|
||||
|
||||
static DynamicLibrary cuda_lib;
|
||||
static DynamicLibrary nvrtc_lib;
|
||||
static DynamicLibrary nvtx_lib;
|
||||
|
||||
/* Function definitions. */
|
||||
// FRL_CGR
|
||||
tnvtxRangePushA *nvtxRangePushA;
|
||||
tnvtxRangePop *nvtxRangePop;
|
||||
// FRL_CGR
|
||||
|
||||
tcuGetErrorString *cuGetErrorString;
|
||||
tcuGetErrorName *cuGetErrorName;
|
||||
tcuInit *cuInit;
|
||||
|
@ -611,6 +623,16 @@ static int cuewCudaInit(void) {
|
|||
return result;
|
||||
}
|
||||
|
||||
// FRL_CGR
|
||||
static void cuewExitNvtx(void) {
|
||||
if (nvrtc_lib != NULL) {
|
||||
/* Ignore errors. */
|
||||
dynamic_library_close(nvtx_lib);
|
||||
nvtx_lib = NULL;
|
||||
}
|
||||
}
|
||||
// FRL_CGR
|
||||
|
||||
static void cuewExitNvrtc(void) {
|
||||
if (nvrtc_lib != NULL) {
|
||||
/* Ignore errors. */
|
||||
|
@ -681,6 +703,56 @@ static int cuewNvrtcInit(void) {
|
|||
return result;
|
||||
}
|
||||
|
||||
// FRL_CGR
|
||||
static int cuewNvtxInit(void) {
|
||||
/* Library paths. */
|
||||
#ifdef _WIN32
|
||||
/* Expected in c:/windows/system or similar, no path needed. */
|
||||
const char *nvtc_paths[] = {"nvToolsExt64_101_0.dll",
|
||||
NULL};
|
||||
#elif defined(__APPLE__)
|
||||
/* Default installation path. */
|
||||
const char *nvtx_paths[] = {"/usr/local/cuda/lib/libnvToolsExt.dylib", NULL};
|
||||
#else
|
||||
const char *nvtx_paths[] = {"libnvToolsExt.so",
|
||||
# if defined(__x86_64__) || defined(_M_X64)
|
||||
"/usr/local/cuda/lib64/libnvToolsExt.so",
|
||||
#else
|
||||
"/usr/local/cuda/lib/libnvToolsExt.so",
|
||||
#endif
|
||||
NULL};
|
||||
#endif
|
||||
static int nvtx_initialized = 0;
|
||||
static int result = 0;
|
||||
int error;
|
||||
|
||||
if (nvtx_initialized) {
|
||||
return result;
|
||||
}
|
||||
|
||||
nvtx_initialized = 1;
|
||||
|
||||
error = atexit(cuewExitNvtx);
|
||||
if (error) {
|
||||
result = CUEW_ERROR_ATEXIT_FAILED;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Load library. */
|
||||
nvtx_lib = dynamic_library_open_find(nvtx_paths);
|
||||
|
||||
if (nvtx_lib == NULL) {
|
||||
result = CUEW_ERROR_OPEN_FAILED;
|
||||
return result;
|
||||
}
|
||||
|
||||
NVTX_LIBRARY_FIND(nvtxRangePushA);
|
||||
NVTX_LIBRARY_FIND(nvtxRangePop);
|
||||
|
||||
result = CUEW_SUCCESS;
|
||||
return result;
|
||||
}
|
||||
// FRL_CGR
|
||||
|
||||
int cuewInit(cuuint32_t flags) {
|
||||
int result = CUEW_SUCCESS;
|
||||
|
@ -698,6 +770,14 @@ int cuewInit(cuuint32_t flags) {
|
|||
return result;
|
||||
}
|
||||
}
|
||||
// FRL_CGR
|
||||
if(flags & CUEW_INIT_NVTX) {
|
||||
result = cuewNvtxInit();
|
||||
if(result != CUEW_SUCCESS) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
// FRL_CGR
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -27,7 +27,13 @@ bool device_cuda_init()
|
|||
return result;
|
||||
|
||||
initialized = true;
|
||||
int cuew_result = cuewInit(CUEW_INIT_CUDA);
|
||||
// FRL_CGR
|
||||
int flags = CUEW_INIT_CUDA;
|
||||
#ifdef USE_SCOPED_MARKER
|
||||
flags |= CUEW_INIT_NVTX;
|
||||
#endif
|
||||
int cuew_result = cuewInit(flags);
|
||||
// FRL_CGR
|
||||
if (cuew_result == CUEW_SUCCESS) {
|
||||
VLOG_INFO << "CUEW initialization succeeded";
|
||||
if (CUDADevice::have_precompiled_kernels()) {
|
||||
|
|
|
@ -987,7 +987,15 @@ int CUDADevice::get_device_default_attribute(CUdevice_attribute attribute, int d
|
|||
}
|
||||
return value;
|
||||
}
|
||||
// FRL_CGR
|
||||
void CUDADevice::push_marker(const string name) {
|
||||
nvtxRangePushA(name.c_str());
|
||||
}
|
||||
|
||||
void CUDADevice::pop_marker() {
|
||||
nvtxRangePop();
|
||||
}
|
||||
// FRL_CGR
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
|
|
@ -100,6 +100,8 @@ class CUDADevice : public GPUDevice {
|
|||
int get_num_multiprocessors();
|
||||
int get_max_num_threads_per_multiprocessor();
|
||||
|
||||
virtual void push_marker(const string name) override;
|
||||
virtual void pop_marker() override;
|
||||
protected:
|
||||
bool get_device_attribute(CUdevice_attribute attribute, int *value);
|
||||
int get_device_default_attribute(CUdevice_attribute attribute, int default_value);
|
||||
|
|
|
@ -216,6 +216,16 @@ class Device {
|
|||
{
|
||||
return 0;
|
||||
}
|
||||
// FRL_CGR END
|
||||
|
||||
// FRL_CGR BEGIN
|
||||
virtual void push_marker(const string) {
|
||||
}
|
||||
|
||||
virtual void pop_marker() {
|
||||
}
|
||||
|
||||
// FRL_CGR END
|
||||
|
||||
/* Called after kernel texture setup, and prior to integrator state setup. */
|
||||
virtual void optimize_for_scene(Scene * /*scene*/)
|
||||
|
@ -309,6 +319,31 @@ class Device {
|
|||
static uint devices_initialized_mask;
|
||||
};
|
||||
|
||||
// FRL_CGR
|
||||
class ScopedMarker {
|
||||
private:
|
||||
Device *_device;
|
||||
public:
|
||||
ScopedMarker(Device *p_device, const string name) {
|
||||
_device = p_device;
|
||||
_device->push_marker(name.c_str() + std::to_string(p_device->info.num));
|
||||
}
|
||||
|
||||
~ScopedMarker() {
|
||||
_device->pop_marker();
|
||||
}
|
||||
};
|
||||
|
||||
#define USE_SCOPED_MARKER
|
||||
#ifndef SCOPED_MARKER
|
||||
# ifdef USE_SCOPED_MARKER
|
||||
# define SCOPED_MARKER(device, msg) ScopedMarker scoped_marker(device, msg)
|
||||
# else
|
||||
# define SCOPED_MARKER(device, msg)
|
||||
# endif
|
||||
#endif
|
||||
// FRL_CGR
|
||||
|
||||
/* Device, which is GPU, with some common functionality for GPU backends */
|
||||
class GPUDevice : public Device {
|
||||
protected:
|
||||
|
|
|
@ -702,6 +702,7 @@ void GeometryManager::device_update_attributes(Device *device,
|
|||
Scene *scene,
|
||||
Progress &progress)
|
||||
{
|
||||
SCOPED_MARKER(device, "GeometryManager::device_update_attributes");
|
||||
progress.set_status("Updating Mesh", "Computing attributes");
|
||||
|
||||
/* gather per mesh requested attributes. as meshes may have multiple
|
||||
|
@ -1042,11 +1043,12 @@ void GeometryManager::geom_calc_offset(Scene *scene, BVHLayout bvh_layout)
|
|||
}
|
||||
}
|
||||
|
||||
void GeometryManager::device_update_mesh(Device *,
|
||||
void GeometryManager::device_update_mesh(Device *device,
|
||||
DeviceScene *dscene,
|
||||
Scene *scene,
|
||||
Progress &progress)
|
||||
{
|
||||
SCOPED_MARKER(device, "GeometryManager::device_update_mesh");
|
||||
/* Count. */
|
||||
size_t vert_size = 0;
|
||||
size_t tri_size = 0;
|
||||
|
@ -1769,6 +1771,7 @@ void GeometryManager::device_update(Device *device,
|
|||
Scene *scene,
|
||||
Progress &progress)
|
||||
{
|
||||
SCOPED_MARKER(device, "GeometryManager::device_update");
|
||||
if (!need_update())
|
||||
return;
|
||||
|
||||
|
@ -1784,7 +1787,8 @@ void GeometryManager::device_update(Device *device,
|
|||
scene->update_stats->geometry.times.add_entry({"device_update (normals)", time});
|
||||
}
|
||||
});
|
||||
|
||||
{
|
||||
SCOPED_MARKER(device, "Update face and vertex normals");
|
||||
foreach (Geometry *geom, scene->geometry) {
|
||||
if (geom->is_modified()) {
|
||||
if ((geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME)) {
|
||||
|
@ -1820,6 +1824,7 @@ void GeometryManager::device_update(Device *device,
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (progress.get_cancel()) {
|
||||
|
@ -1828,6 +1833,7 @@ void GeometryManager::device_update(Device *device,
|
|||
|
||||
/* Tessellate meshes that are using subdivision */
|
||||
if (total_tess_needed) {
|
||||
SCOPED_MARKER(device, "Tesselate");
|
||||
scoped_callback_timer timer([scene](double time) {
|
||||
if (scene->update_stats) {
|
||||
scene->update_stats->geometry.times.add_entry(
|
||||
|
@ -1877,6 +1883,7 @@ void GeometryManager::device_update(Device *device,
|
|||
/* Update images needed for true displacement. */
|
||||
bool old_need_object_flags_update = false;
|
||||
if (true_displacement_used || curve_shadow_transparency_used) {
|
||||
SCOPED_MARKER(device, "Update displacement images");
|
||||
scoped_callback_timer timer([scene](double time) {
|
||||
if (scene->update_stats) {
|
||||
scene->update_stats->geometry.times.add_entry(
|
||||
|
@ -1925,6 +1932,7 @@ void GeometryManager::device_update(Device *device,
|
|||
size_t num_bvh = 0;
|
||||
|
||||
{
|
||||
SCOPED_MARKER(device, "displace and shadow transp");
|
||||
/* Copy constant data needed by shader evaluation. */
|
||||
device->const_copy_to("data", &dscene->data, sizeof(dscene->data));
|
||||
|
||||
|
@ -1990,6 +1998,7 @@ void GeometryManager::device_update(Device *device,
|
|||
bool need_update_scene_bvh = (scene->bvh == nullptr ||
|
||||
(update_flags & (TRANSFORM_MODIFIED | VISIBILITY_MODIFIED)) != 0);
|
||||
{
|
||||
SCOPED_MARKER(device, "Build Object BVH");
|
||||
scoped_callback_timer timer([scene](double time) {
|
||||
if (scene->update_stats) {
|
||||
scene->update_stats->geometry.times.add_entry({"device_update (build object BVHs)", time});
|
||||
|
@ -2025,6 +2034,7 @@ void GeometryManager::device_update(Device *device,
|
|||
|
||||
/* Update objects. */
|
||||
{
|
||||
SCOPED_MARKER(device, "compute bounds");
|
||||
scoped_callback_timer timer([scene](double time) {
|
||||
if (scene->update_stats) {
|
||||
scene->update_stats->geometry.times.add_entry({"device_update (compute bounds)", time});
|
||||
|
@ -2040,6 +2050,7 @@ void GeometryManager::device_update(Device *device,
|
|||
}
|
||||
|
||||
if (need_update_scene_bvh) {
|
||||
SCOPED_MARKER(device, "update scene BVH");
|
||||
scoped_callback_timer timer([scene](double time) {
|
||||
if (scene->update_stats) {
|
||||
scene->update_stats->geometry.times.add_entry({"device_update (build scene BVH)", time});
|
||||
|
@ -2123,6 +2134,7 @@ void GeometryManager::device_update(Device *device,
|
|||
|
||||
void GeometryManager::device_free(Device *device, DeviceScene *dscene, bool force_free)
|
||||
{
|
||||
SCOPED_MARKER(device, "GeometryManager::device_free");
|
||||
dscene->bvh_nodes.free_if_need_realloc(force_free);
|
||||
dscene->bvh_leaf_nodes.free_if_need_realloc(force_free);
|
||||
dscene->object_node.free_if_need_realloc(force_free);
|
||||
|
|
Loading…
Reference in New Issue