Uses a light tree to more effectively sample scenes with many lights. This can significantly reduce noise, at the cost of a somewhat longer render time per sample. Light tree sampling is enabled by default. It can be disabled in the Sampling > Lights panel. Scenes using light clamping or ray visibility tricks may render different as these are biased techniques that depend on the sampling strategy. The implementation is currently disabled on AMD HIP. This is planned to be fixed before the release. Implementation by Jeffrey Liu, Weizhen Huang, Alaska and Brecht Van Lommel. Ref T77889
458 lines
11 KiB
C++
458 lines
11 KiB
C++
/* SPDX-License-Identifier: Apache-2.0
|
|
* Copyright 2011-2022 Blender Foundation */
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "bvh/bvh2.h"
|
|
|
|
#include "device/device.h"
|
|
#include "device/queue.h"
|
|
|
|
#include "device/cpu/device.h"
|
|
#include "device/cpu/kernel.h"
|
|
#include "device/cuda/device.h"
|
|
#include "device/dummy/device.h"
|
|
#include "device/hip/device.h"
|
|
#include "device/metal/device.h"
|
|
#include "device/multi/device.h"
|
|
#include "device/oneapi/device.h"
|
|
#include "device/optix/device.h"
|
|
|
|
#include "util/foreach.h"
|
|
#include "util/half.h"
|
|
#include "util/log.h"
|
|
#include "util/math.h"
|
|
#include "util/string.h"
|
|
#include "util/system.h"
|
|
#include "util/task.h"
|
|
#include "util/time.h"
|
|
#include "util/types.h"
|
|
#include "util/vector.h"
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
bool Device::need_types_update = true;
|
|
bool Device::need_devices_update = true;
|
|
thread_mutex Device::device_mutex;
|
|
vector<DeviceInfo> Device::cuda_devices;
|
|
vector<DeviceInfo> Device::optix_devices;
|
|
vector<DeviceInfo> Device::cpu_devices;
|
|
vector<DeviceInfo> Device::hip_devices;
|
|
vector<DeviceInfo> Device::metal_devices;
|
|
vector<DeviceInfo> Device::oneapi_devices;
|
|
uint Device::devices_initialized_mask = 0;
|
|
|
|
/* Device */
|
|
|
|
Device::~Device() noexcept(false)
|
|
{
|
|
}
|
|
|
|
void Device::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|
{
|
|
assert(bvh->params.bvh_layout == BVH_LAYOUT_BVH2);
|
|
|
|
BVH2 *const bvh2 = static_cast<BVH2 *>(bvh);
|
|
if (refit) {
|
|
bvh2->refit(progress);
|
|
}
|
|
else {
|
|
bvh2->build(progress, &stats);
|
|
}
|
|
}
|
|
|
|
Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
|
{
|
|
if (!info.multi_devices.empty()) {
|
|
/* Always create a multi device when info contains multiple devices.
|
|
* This is done so that the type can still be e.g. DEVICE_CPU to indicate
|
|
* that it is a homogeneous collection of devices, which simplifies checks. */
|
|
return device_multi_create(info, stats, profiler);
|
|
}
|
|
|
|
Device *device = NULL;
|
|
|
|
switch (info.type) {
|
|
case DEVICE_CPU:
|
|
device = device_cpu_create(info, stats, profiler);
|
|
break;
|
|
#ifdef WITH_CUDA
|
|
case DEVICE_CUDA:
|
|
if (device_cuda_init())
|
|
device = device_cuda_create(info, stats, profiler);
|
|
break;
|
|
#endif
|
|
#ifdef WITH_OPTIX
|
|
case DEVICE_OPTIX:
|
|
if (device_optix_init())
|
|
device = device_optix_create(info, stats, profiler);
|
|
break;
|
|
#endif
|
|
|
|
#ifdef WITH_HIP
|
|
case DEVICE_HIP:
|
|
if (device_hip_init())
|
|
device = device_hip_create(info, stats, profiler);
|
|
break;
|
|
#endif
|
|
|
|
#ifdef WITH_METAL
|
|
case DEVICE_METAL:
|
|
if (device_metal_init())
|
|
device = device_metal_create(info, stats, profiler);
|
|
break;
|
|
#endif
|
|
|
|
#ifdef WITH_ONEAPI
|
|
case DEVICE_ONEAPI:
|
|
device = device_oneapi_create(info, stats, profiler);
|
|
break;
|
|
#endif
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (device == NULL) {
|
|
device = device_dummy_create(info, stats, profiler);
|
|
}
|
|
|
|
return device;
|
|
}
|
|
|
|
DeviceType Device::type_from_string(const char *name)
|
|
{
|
|
if (strcmp(name, "CPU") == 0)
|
|
return DEVICE_CPU;
|
|
else if (strcmp(name, "CUDA") == 0)
|
|
return DEVICE_CUDA;
|
|
else if (strcmp(name, "OPTIX") == 0)
|
|
return DEVICE_OPTIX;
|
|
else if (strcmp(name, "MULTI") == 0)
|
|
return DEVICE_MULTI;
|
|
else if (strcmp(name, "HIP") == 0)
|
|
return DEVICE_HIP;
|
|
else if (strcmp(name, "METAL") == 0)
|
|
return DEVICE_METAL;
|
|
else if (strcmp(name, "ONEAPI") == 0)
|
|
return DEVICE_ONEAPI;
|
|
|
|
return DEVICE_NONE;
|
|
}
|
|
|
|
string Device::string_from_type(DeviceType type)
|
|
{
|
|
if (type == DEVICE_CPU)
|
|
return "CPU";
|
|
else if (type == DEVICE_CUDA)
|
|
return "CUDA";
|
|
else if (type == DEVICE_OPTIX)
|
|
return "OPTIX";
|
|
else if (type == DEVICE_MULTI)
|
|
return "MULTI";
|
|
else if (type == DEVICE_HIP)
|
|
return "HIP";
|
|
else if (type == DEVICE_METAL)
|
|
return "METAL";
|
|
else if (type == DEVICE_ONEAPI)
|
|
return "ONEAPI";
|
|
|
|
return "";
|
|
}
|
|
|
|
vector<DeviceType> Device::available_types()
|
|
{
|
|
vector<DeviceType> types;
|
|
types.push_back(DEVICE_CPU);
|
|
#ifdef WITH_CUDA
|
|
types.push_back(DEVICE_CUDA);
|
|
#endif
|
|
#ifdef WITH_OPTIX
|
|
types.push_back(DEVICE_OPTIX);
|
|
#endif
|
|
#ifdef WITH_HIP
|
|
types.push_back(DEVICE_HIP);
|
|
#endif
|
|
#ifdef WITH_METAL
|
|
types.push_back(DEVICE_METAL);
|
|
#endif
|
|
#ifdef WITH_ONEAPI
|
|
types.push_back(DEVICE_ONEAPI);
|
|
#endif
|
|
return types;
|
|
}
|
|
|
|
vector<DeviceInfo> Device::available_devices(uint mask)
|
|
{
|
|
/* Lazy initialize devices. On some platforms OpenCL or CUDA drivers can
|
|
* be broken and cause crashes when only trying to get device info, so
|
|
* we don't want to do any initialization until the user chooses to. */
|
|
thread_scoped_lock lock(device_mutex);
|
|
vector<DeviceInfo> devices;
|
|
|
|
#if defined(WITH_CUDA) || defined(WITH_OPTIX)
|
|
if (mask & (DEVICE_MASK_CUDA | DEVICE_MASK_OPTIX)) {
|
|
if (!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
|
|
if (device_cuda_init()) {
|
|
device_cuda_info(cuda_devices);
|
|
}
|
|
devices_initialized_mask |= DEVICE_MASK_CUDA;
|
|
}
|
|
if (mask & DEVICE_MASK_CUDA) {
|
|
foreach (DeviceInfo &info, cuda_devices) {
|
|
devices.push_back(info);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef WITH_OPTIX
|
|
if (mask & DEVICE_MASK_OPTIX) {
|
|
if (!(devices_initialized_mask & DEVICE_MASK_OPTIX)) {
|
|
if (device_optix_init()) {
|
|
device_optix_info(cuda_devices, optix_devices);
|
|
}
|
|
devices_initialized_mask |= DEVICE_MASK_OPTIX;
|
|
}
|
|
foreach (DeviceInfo &info, optix_devices) {
|
|
devices.push_back(info);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef WITH_HIP
|
|
if (mask & DEVICE_MASK_HIP) {
|
|
if (!(devices_initialized_mask & DEVICE_MASK_HIP)) {
|
|
if (device_hip_init()) {
|
|
device_hip_info(hip_devices);
|
|
}
|
|
devices_initialized_mask |= DEVICE_MASK_HIP;
|
|
}
|
|
foreach (DeviceInfo &info, hip_devices) {
|
|
devices.push_back(info);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef WITH_ONEAPI
|
|
if (mask & DEVICE_MASK_ONEAPI) {
|
|
if (!(devices_initialized_mask & DEVICE_MASK_ONEAPI)) {
|
|
if (device_oneapi_init()) {
|
|
device_oneapi_info(oneapi_devices);
|
|
}
|
|
devices_initialized_mask |= DEVICE_MASK_ONEAPI;
|
|
}
|
|
foreach (DeviceInfo &info, oneapi_devices) {
|
|
devices.push_back(info);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
if (mask & DEVICE_MASK_CPU) {
|
|
if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
|
|
device_cpu_info(cpu_devices);
|
|
devices_initialized_mask |= DEVICE_MASK_CPU;
|
|
}
|
|
foreach (DeviceInfo &info, cpu_devices) {
|
|
devices.push_back(info);
|
|
}
|
|
}
|
|
|
|
#ifdef WITH_METAL
|
|
if (mask & DEVICE_MASK_METAL) {
|
|
if (!(devices_initialized_mask & DEVICE_MASK_METAL)) {
|
|
if (device_metal_init()) {
|
|
device_metal_info(metal_devices);
|
|
}
|
|
devices_initialized_mask |= DEVICE_MASK_METAL;
|
|
}
|
|
foreach (DeviceInfo &info, metal_devices) {
|
|
devices.push_back(info);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return devices;
|
|
}
|
|
|
|
DeviceInfo Device::dummy_device(const string &error_msg)
|
|
{
|
|
DeviceInfo info;
|
|
info.type = DEVICE_DUMMY;
|
|
info.error_msg = error_msg;
|
|
return info;
|
|
}
|
|
|
|
string Device::device_capabilities(uint mask)
|
|
{
|
|
thread_scoped_lock lock(device_mutex);
|
|
string capabilities = "";
|
|
|
|
if (mask & DEVICE_MASK_CPU) {
|
|
capabilities += "\nCPU device capabilities: ";
|
|
capabilities += device_cpu_capabilities() + "\n";
|
|
}
|
|
|
|
#ifdef WITH_CUDA
|
|
if (mask & DEVICE_MASK_CUDA) {
|
|
if (device_cuda_init()) {
|
|
capabilities += "\nCUDA device capabilities:\n";
|
|
capabilities += device_cuda_capabilities();
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef WITH_HIP
|
|
if (mask & DEVICE_MASK_HIP) {
|
|
if (device_hip_init()) {
|
|
capabilities += "\nHIP device capabilities:\n";
|
|
capabilities += device_hip_capabilities();
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef WITH_ONEAPI
|
|
if (mask & DEVICE_MASK_ONEAPI) {
|
|
if (device_oneapi_init()) {
|
|
capabilities += "\noneAPI device capabilities:\n";
|
|
capabilities += device_oneapi_capabilities();
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef WITH_METAL
|
|
if (mask & DEVICE_MASK_METAL) {
|
|
if (device_metal_init()) {
|
|
capabilities += "\nMetal device capabilities:\n";
|
|
capabilities += device_metal_capabilities();
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return capabilities;
|
|
}
|
|
|
|
DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
|
int threads,
|
|
bool background)
|
|
{
|
|
assert(subdevices.size() > 0);
|
|
|
|
if (subdevices.size() == 1) {
|
|
/* No multi device needed. */
|
|
return subdevices.front();
|
|
}
|
|
|
|
DeviceInfo info;
|
|
info.type = DEVICE_NONE;
|
|
info.id = "MULTI";
|
|
info.description = "Multi Device";
|
|
info.num = 0;
|
|
|
|
info.has_nanovdb = true;
|
|
info.has_light_tree = true;
|
|
info.has_osl = true;
|
|
info.has_guiding = true;
|
|
info.has_profiling = true;
|
|
info.has_peer_memory = false;
|
|
info.use_metalrt = false;
|
|
info.denoisers = DENOISER_ALL;
|
|
|
|
foreach (const DeviceInfo &device, subdevices) {
|
|
/* Ensure CPU device does not slow down GPU. */
|
|
if (device.type == DEVICE_CPU && subdevices.size() > 1) {
|
|
if (background) {
|
|
int orig_cpu_threads = (threads) ? threads : TaskScheduler::max_concurrency();
|
|
int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), size_t(0));
|
|
|
|
VLOG_INFO << "CPU render threads reduced from " << orig_cpu_threads << " to "
|
|
<< cpu_threads << ", to dedicate to GPU.";
|
|
|
|
if (cpu_threads >= 1) {
|
|
DeviceInfo cpu_device = device;
|
|
cpu_device.cpu_threads = cpu_threads;
|
|
info.multi_devices.push_back(cpu_device);
|
|
}
|
|
else {
|
|
continue;
|
|
}
|
|
}
|
|
else {
|
|
VLOG_INFO << "CPU render threads disabled for interactive render.";
|
|
continue;
|
|
}
|
|
}
|
|
else {
|
|
info.multi_devices.push_back(device);
|
|
}
|
|
|
|
/* Create unique ID for this combination of devices. */
|
|
info.id += device.id;
|
|
|
|
/* Set device type to MULTI if subdevices are not of a common type. */
|
|
if (info.type == DEVICE_NONE) {
|
|
info.type = device.type;
|
|
}
|
|
else if (device.type != info.type) {
|
|
info.type = DEVICE_MULTI;
|
|
}
|
|
|
|
/* Accumulate device info. */
|
|
info.has_nanovdb &= device.has_nanovdb;
|
|
info.has_light_tree &= device.has_light_tree;
|
|
info.has_osl &= device.has_osl;
|
|
info.has_guiding &= device.has_guiding;
|
|
info.has_profiling &= device.has_profiling;
|
|
info.has_peer_memory |= device.has_peer_memory;
|
|
info.use_metalrt |= device.use_metalrt;
|
|
info.denoisers &= device.denoisers;
|
|
}
|
|
|
|
return info;
|
|
}
|
|
|
|
void Device::tag_update()
|
|
{
|
|
free_memory();
|
|
}
|
|
|
|
void Device::free_memory()
|
|
{
|
|
devices_initialized_mask = 0;
|
|
cuda_devices.free_memory();
|
|
optix_devices.free_memory();
|
|
hip_devices.free_memory();
|
|
oneapi_devices.free_memory();
|
|
cpu_devices.free_memory();
|
|
metal_devices.free_memory();
|
|
}
|
|
|
|
unique_ptr<DeviceQueue> Device::gpu_queue_create()
|
|
{
|
|
LOG(FATAL) << "Device does not support queues.";
|
|
return nullptr;
|
|
}
|
|
|
|
const CPUKernels &Device::get_cpu_kernels()
|
|
{
|
|
/* Initialize CPU kernels once and reuse. */
|
|
static CPUKernels kernels;
|
|
return kernels;
|
|
}
|
|
|
|
void Device::get_cpu_kernel_thread_globals(
|
|
vector<CPUKernelThreadGlobals> & /*kernel_thread_globals*/)
|
|
{
|
|
LOG(FATAL) << "Device does not support CPU kernels.";
|
|
}
|
|
|
|
void *Device::get_cpu_osl_memory()
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
/* DeviceInfo */
|
|
|
|
CCL_NAMESPACE_END
|