This repository has been archived on 2023-10-09. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
blender-archive/intern/cycles/device/device.cpp
Brecht Van Lommel 10d2cbfa36 Fix T84872: OptiX GPU + CPU rendering uses branched path samples
Branched path tracing is not supported for OptiX, and it would still use the
number of AA samples from there when branched path was enabled by the user
earlier but auto disabled and hidden in the UI when using OptiX.

Ref D10159
2021-01-20 14:59:23 +01:00

747 lines
21 KiB
C++

/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdlib.h>
#include <string.h>
#include "bvh/bvh2.h"
#include "device/device.h"
#include "device/device_intern.h"
#include "util/util_foreach.h"
#include "util/util_half.h"
#include "util/util_logging.h"
#include "util/util_math.h"
#include "util/util_opengl.h"
#include "util/util_string.h"
#include "util/util_system.h"
#include "util/util_time.h"
#include "util/util_types.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
bool Device::need_types_update = true;
bool Device::need_devices_update = true;
thread_mutex Device::device_mutex;
vector<DeviceInfo> Device::opencl_devices;
vector<DeviceInfo> Device::cuda_devices;
vector<DeviceInfo> Device::optix_devices;
vector<DeviceInfo> Device::cpu_devices;
vector<DeviceInfo> Device::network_devices;
uint Device::devices_initialized_mask = 0;
/* Device Requested Features */
std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &requested_features)
{
os << "Experimental features: " << (requested_features.experimental ? "On" : "Off") << std::endl;
os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
/* TODO(sergey): Decode bitflag into list of names. */
os << "Nodes features: " << requested_features.nodes_features << std::endl;
os << "Use Hair: " << string_from_bool(requested_features.use_hair) << std::endl;
os << "Use Object Motion: " << string_from_bool(requested_features.use_object_motion)
<< std::endl;
os << "Use Camera Motion: " << string_from_bool(requested_features.use_camera_motion)
<< std::endl;
os << "Use Baking: " << string_from_bool(requested_features.use_baking) << std::endl;
os << "Use Subsurface: " << string_from_bool(requested_features.use_subsurface) << std::endl;
os << "Use Volume: " << string_from_bool(requested_features.use_volume) << std::endl;
os << "Use Branched Integrator: " << string_from_bool(requested_features.use_integrator_branched)
<< std::endl;
os << "Use Patch Evaluation: " << string_from_bool(requested_features.use_patch_evaluation)
<< std::endl;
os << "Use Transparent Shadows: " << string_from_bool(requested_features.use_transparent)
<< std::endl;
os << "Use Principled BSDF: " << string_from_bool(requested_features.use_principled)
<< std::endl;
os << "Use Denoising: " << string_from_bool(requested_features.use_denoising) << std::endl;
os << "Use Displacement: " << string_from_bool(requested_features.use_true_displacement)
<< std::endl;
os << "Use Background Light: " << string_from_bool(requested_features.use_background_light)
<< std::endl;
return os;
}
/* Device */
Device::~Device() noexcept(false)
{
if (!background) {
if (vertex_buffer != 0) {
glDeleteBuffers(1, &vertex_buffer);
}
if (fallback_shader_program != 0) {
glDeleteProgram(fallback_shader_program);
}
}
}
/* TODO move shaders to standalone .glsl file. */
const char *FALLBACK_VERTEX_SHADER =
"#version 330\n"
"uniform vec2 fullscreen;\n"
"in vec2 texCoord;\n"
"in vec2 pos;\n"
"out vec2 texCoord_interp;\n"
"\n"
"vec2 normalize_coordinates()\n"
"{\n"
" return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
"}\n"
"\n"
"void main()\n"
"{\n"
" gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
" texCoord_interp = texCoord;\n"
"}\n\0";
const char *FALLBACK_FRAGMENT_SHADER =
"#version 330\n"
"uniform sampler2D image_texture;\n"
"in vec2 texCoord_interp;\n"
"out vec4 fragColor;\n"
"\n"
"void main()\n"
"{\n"
" fragColor = texture(image_texture, texCoord_interp);\n"
"}\n\0";
static void shader_print_errors(const char *task, const char *log, const char *code)
{
LOG(ERROR) << "Shader: " << task << " error:";
LOG(ERROR) << "===== shader string ====";
stringstream stream(code);
string partial;
int line = 1;
while (getline(stream, partial, '\n')) {
if (line < 10) {
LOG(ERROR) << " " << line << " " << partial;
}
else {
LOG(ERROR) << line << " " << partial;
}
line++;
}
LOG(ERROR) << log;
}
static int bind_fallback_shader(void)
{
GLint status;
GLchar log[5000];
GLsizei length = 0;
GLuint program = 0;
struct Shader {
const char *source;
GLenum type;
} shaders[2] = {{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
{FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}};
program = glCreateProgram();
for (int i = 0; i < 2; i++) {
GLuint shader = glCreateShader(shaders[i].type);
string source_str = shaders[i].source;
const char *c_str = source_str.c_str();
glShaderSource(shader, 1, &c_str, NULL);
glCompileShader(shader);
glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
if (!status) {
glGetShaderInfoLog(shader, sizeof(log), &length, log);
shader_print_errors("compile", log, c_str);
return 0;
}
glAttachShader(program, shader);
}
/* Link output. */
glBindFragDataLocation(program, 0, "fragColor");
/* Link and error check. */
glLinkProgram(program);
glGetProgramiv(program, GL_LINK_STATUS, &status);
if (!status) {
glGetShaderInfoLog(program, sizeof(log), &length, log);
shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER);
shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER);
return 0;
}
return program;
}
bool Device::bind_fallback_display_space_shader(const float width, const float height)
{
if (fallback_status == FALLBACK_SHADER_STATUS_ERROR) {
return false;
}
if (fallback_status == FALLBACK_SHADER_STATUS_NONE) {
fallback_shader_program = bind_fallback_shader();
fallback_status = FALLBACK_SHADER_STATUS_ERROR;
if (fallback_shader_program == 0) {
return false;
}
glUseProgram(fallback_shader_program);
image_texture_location = glGetUniformLocation(fallback_shader_program, "image_texture");
if (image_texture_location < 0) {
LOG(ERROR) << "Shader doesn't contain the 'image_texture' uniform.";
return false;
}
fullscreen_location = glGetUniformLocation(fallback_shader_program, "fullscreen");
if (fullscreen_location < 0) {
LOG(ERROR) << "Shader doesn't contain the 'fullscreen' uniform.";
return false;
}
fallback_status = FALLBACK_SHADER_STATUS_SUCCESS;
}
/* Run this every time. */
glUseProgram(fallback_shader_program);
glUniform1i(image_texture_location, 0);
glUniform2f(fullscreen_location, width, height);
return true;
}
void Device::draw_pixels(device_memory &rgba,
int y,
int w,
int h,
int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params)
{
const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
assert(rgba.type == MEM_PIXELS);
mem_copy_from(rgba, y, w, h, rgba.memory_elements_size(1));
GLuint texid;
glActiveTexture(GL_TEXTURE0);
glGenTextures(1, &texid);
glBindTexture(GL_TEXTURE_2D, texid);
if (rgba.data_type == TYPE_HALF) {
GLhalf *data_pointer = (GLhalf *)rgba.host_pointer;
data_pointer += 4 * y * w;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer);
}
else {
uint8_t *data_pointer = (uint8_t *)rgba.host_pointer;
data_pointer += 4 * y * w;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data_pointer);
}
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
if (transparent) {
glEnable(GL_BLEND);
glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
}
GLint shader_program;
if (use_fallback_shader) {
if (!bind_fallback_display_space_shader(dw, dh)) {
return;
}
shader_program = fallback_shader_program;
}
else {
draw_params.bind_display_space_shader_cb();
glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
}
if (!vertex_buffer) {
glGenBuffers(1, &vertex_buffer);
}
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
/* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered
*/
glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
float *vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
if (vpointer) {
/* texture coordinate - vertex pair */
vpointer[0] = 0.0f;
vpointer[1] = 0.0f;
vpointer[2] = dx;
vpointer[3] = dy;
vpointer[4] = 1.0f;
vpointer[5] = 0.0f;
vpointer[6] = (float)width + dx;
vpointer[7] = dy;
vpointer[8] = 1.0f;
vpointer[9] = 1.0f;
vpointer[10] = (float)width + dx;
vpointer[11] = (float)height + dy;
vpointer[12] = 0.0f;
vpointer[13] = 1.0f;
vpointer[14] = dx;
vpointer[15] = (float)height + dy;
if (vertex_buffer) {
glUnmapBuffer(GL_ARRAY_BUFFER);
}
}
GLuint vertex_array_object;
GLuint position_attribute, texcoord_attribute;
glGenVertexArrays(1, &vertex_array_object);
glBindVertexArray(vertex_array_object);
texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
position_attribute = glGetAttribLocation(shader_program, "pos");
glEnableVertexAttribArray(texcoord_attribute);
glEnableVertexAttribArray(position_attribute);
glVertexAttribPointer(
texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
glVertexAttribPointer(position_attribute,
2,
GL_FLOAT,
GL_FALSE,
4 * sizeof(float),
(const GLvoid *)(sizeof(float) * 2));
glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
if (vertex_buffer) {
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
if (use_fallback_shader) {
glUseProgram(0);
}
else {
draw_params.unbind_display_space_shader_cb();
}
glDeleteVertexArrays(1, &vertex_array_object);
glBindTexture(GL_TEXTURE_2D, 0);
glDeleteTextures(1, &texid);
if (transparent) {
glDisable(GL_BLEND);
}
}
void Device::build_bvh(BVH *bvh, Progress &progress, bool refit)
{
assert(bvh->params.bvh_layout == BVH_LAYOUT_BVH2);
BVH2 *const bvh2 = static_cast<BVH2 *>(bvh);
if (refit) {
bvh2->refit(progress);
}
else {
bvh2->build(progress, &stats);
}
}
Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
#ifdef WITH_MULTI
if (!info.multi_devices.empty()) {
/* Always create a multi device when info contains multiple devices.
* This is done so that the type can still be e.g. DEVICE_CPU to indicate
* that it is a homogeneous collection of devices, which simplifies checks. */
return device_multi_create(info, stats, profiler, background);
}
#endif
Device *device = NULL;
switch (info.type) {
case DEVICE_CPU:
device = device_cpu_create(info, stats, profiler, background);
break;
#ifdef WITH_CUDA
case DEVICE_CUDA:
if (device_cuda_init())
device = device_cuda_create(info, stats, profiler, background);
break;
#endif
#ifdef WITH_OPTIX
case DEVICE_OPTIX:
if (device_optix_init())
device = device_optix_create(info, stats, profiler, background);
break;
#endif
#ifdef WITH_NETWORK
case DEVICE_NETWORK:
device = device_network_create(info, stats, profiler, "127.0.0.1");
break;
#endif
#ifdef WITH_OPENCL
case DEVICE_OPENCL:
if (device_opencl_init())
device = device_opencl_create(info, stats, profiler, background);
break;
#endif
default:
break;
}
if (device == NULL) {
device = device_dummy_create(info, stats, profiler, background);
}
return device;
}
DeviceType Device::type_from_string(const char *name)
{
if (strcmp(name, "CPU") == 0)
return DEVICE_CPU;
else if (strcmp(name, "CUDA") == 0)
return DEVICE_CUDA;
else if (strcmp(name, "OPTIX") == 0)
return DEVICE_OPTIX;
else if (strcmp(name, "OPENCL") == 0)
return DEVICE_OPENCL;
else if (strcmp(name, "NETWORK") == 0)
return DEVICE_NETWORK;
else if (strcmp(name, "MULTI") == 0)
return DEVICE_MULTI;
return DEVICE_NONE;
}
string Device::string_from_type(DeviceType type)
{
if (type == DEVICE_CPU)
return "CPU";
else if (type == DEVICE_CUDA)
return "CUDA";
else if (type == DEVICE_OPTIX)
return "OPTIX";
else if (type == DEVICE_OPENCL)
return "OPENCL";
else if (type == DEVICE_NETWORK)
return "NETWORK";
else if (type == DEVICE_MULTI)
return "MULTI";
return "";
}
vector<DeviceType> Device::available_types()
{
vector<DeviceType> types;
types.push_back(DEVICE_CPU);
#ifdef WITH_CUDA
types.push_back(DEVICE_CUDA);
#endif
#ifdef WITH_OPTIX
types.push_back(DEVICE_OPTIX);
#endif
#ifdef WITH_OPENCL
types.push_back(DEVICE_OPENCL);
#endif
#ifdef WITH_NETWORK
types.push_back(DEVICE_NETWORK);
#endif
return types;
}
vector<DeviceInfo> Device::available_devices(uint mask)
{
/* Lazy initialize devices. On some platforms OpenCL or CUDA drivers can
* be broken and cause crashes when only trying to get device info, so
* we don't want to do any initialization until the user chooses to. */
thread_scoped_lock lock(device_mutex);
vector<DeviceInfo> devices;
#ifdef WITH_OPENCL
if (mask & DEVICE_MASK_OPENCL) {
if (!(devices_initialized_mask & DEVICE_MASK_OPENCL)) {
if (device_opencl_init()) {
device_opencl_info(opencl_devices);
}
devices_initialized_mask |= DEVICE_MASK_OPENCL;
}
foreach (DeviceInfo &info, opencl_devices) {
devices.push_back(info);
}
}
#endif
#if defined(WITH_CUDA) || defined(WITH_OPTIX)
if (mask & (DEVICE_MASK_CUDA | DEVICE_MASK_OPTIX)) {
if (!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
if (device_cuda_init()) {
device_cuda_info(cuda_devices);
}
devices_initialized_mask |= DEVICE_MASK_CUDA;
}
if (mask & DEVICE_MASK_CUDA) {
foreach (DeviceInfo &info, cuda_devices) {
devices.push_back(info);
}
}
}
#endif
#ifdef WITH_OPTIX
if (mask & DEVICE_MASK_OPTIX) {
if (!(devices_initialized_mask & DEVICE_MASK_OPTIX)) {
if (device_optix_init()) {
device_optix_info(cuda_devices, optix_devices);
}
devices_initialized_mask |= DEVICE_MASK_OPTIX;
}
foreach (DeviceInfo &info, optix_devices) {
devices.push_back(info);
}
}
#endif
if (mask & DEVICE_MASK_CPU) {
if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
device_cpu_info(cpu_devices);
devices_initialized_mask |= DEVICE_MASK_CPU;
}
foreach (DeviceInfo &info, cpu_devices) {
devices.push_back(info);
}
}
#ifdef WITH_NETWORK
if (mask & DEVICE_MASK_NETWORK) {
if (!(devices_initialized_mask & DEVICE_MASK_NETWORK)) {
device_network_info(network_devices);
devices_initialized_mask |= DEVICE_MASK_NETWORK;
}
foreach (DeviceInfo &info, network_devices) {
devices.push_back(info);
}
}
#endif
return devices;
}
DeviceInfo Device::dummy_device(const string &error_msg)
{
DeviceInfo info;
info.type = DEVICE_DUMMY;
info.error_msg = error_msg;
return info;
}
string Device::device_capabilities(uint mask)
{
thread_scoped_lock lock(device_mutex);
string capabilities = "";
if (mask & DEVICE_MASK_CPU) {
capabilities += "\nCPU device capabilities: ";
capabilities += device_cpu_capabilities() + "\n";
}
#ifdef WITH_OPENCL
if (mask & DEVICE_MASK_OPENCL) {
if (device_opencl_init()) {
capabilities += "\nOpenCL device capabilities:\n";
capabilities += device_opencl_capabilities();
}
}
#endif
#ifdef WITH_CUDA
if (mask & DEVICE_MASK_CUDA) {
if (device_cuda_init()) {
capabilities += "\nCUDA device capabilities:\n";
capabilities += device_cuda_capabilities();
}
}
#endif
return capabilities;
}
DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
int threads,
bool background)
{
assert(subdevices.size() > 0);
if (subdevices.size() == 1) {
/* No multi device needed. */
return subdevices.front();
}
DeviceInfo info;
info.type = subdevices.front().type;
info.id = "MULTI";
info.description = "Multi Device";
info.num = 0;
info.has_half_images = true;
info.has_volume_decoupled = true;
info.has_branched_path = true;
info.has_adaptive_stop_per_sample = true;
info.has_osl = true;
info.has_profiling = true;
info.has_peer_memory = false;
info.denoisers = DENOISER_ALL;
foreach (const DeviceInfo &device, subdevices) {
/* Ensure CPU device does not slow down GPU. */
if (device.type == DEVICE_CPU && subdevices.size() > 1) {
if (background) {
int orig_cpu_threads = (threads) ? threads : system_cpu_thread_count();
int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);
VLOG(1) << "CPU render threads reduced from " << orig_cpu_threads << " to " << cpu_threads
<< ", to dedicate to GPU.";
if (cpu_threads >= 1) {
DeviceInfo cpu_device = device;
cpu_device.cpu_threads = cpu_threads;
info.multi_devices.push_back(cpu_device);
}
else {
continue;
}
}
else {
VLOG(1) << "CPU render threads disabled for interactive render.";
continue;
}
}
else {
info.multi_devices.push_back(device);
}
/* Create unique ID for this combination of devices. */
info.id += device.id;
/* Set device type to MULTI if subdevices are not of a common type. */
if (device.type != info.type) {
info.type = DEVICE_MULTI;
}
/* Accumulate device info. */
info.has_half_images &= device.has_half_images;
info.has_volume_decoupled &= device.has_volume_decoupled;
info.has_branched_path &= device.has_branched_path;
info.has_adaptive_stop_per_sample &= device.has_adaptive_stop_per_sample;
info.has_osl &= device.has_osl;
info.has_profiling &= device.has_profiling;
info.has_peer_memory |= device.has_peer_memory;
info.denoisers &= device.denoisers;
}
return info;
}
void Device::tag_update()
{
free_memory();
}
void Device::free_memory()
{
devices_initialized_mask = 0;
cuda_devices.free_memory();
optix_devices.free_memory();
opencl_devices.free_memory();
cpu_devices.free_memory();
network_devices.free_memory();
}
/* DeviceInfo */
void DeviceInfo::add_denoising_devices(DenoiserType denoiser_type)
{
assert(denoising_devices.empty());
if (denoiser_type == DENOISER_OPTIX && type != DEVICE_OPTIX) {
vector<DeviceInfo> optix_devices = Device::available_devices(DEVICE_MASK_OPTIX);
if (!optix_devices.empty()) {
/* Convert to a special multi device with separate denoising devices. */
if (multi_devices.empty()) {
multi_devices.push_back(*this);
}
/* Try to use the same physical devices for denoising. */
for (const DeviceInfo &cuda_device : multi_devices) {
if (cuda_device.type == DEVICE_CUDA) {
for (const DeviceInfo &optix_device : optix_devices) {
if (cuda_device.num == optix_device.num) {
id += optix_device.id;
denoising_devices.push_back(optix_device);
break;
}
}
}
}
if (denoising_devices.empty()) {
/* Simply use the first available OptiX device. */
const DeviceInfo optix_device = optix_devices.front();
id += optix_device.id; /* Uniquely identify this special multi device. */
denoising_devices.push_back(optix_device);
}
denoisers = denoiser_type;
}
}
else if (denoiser_type == DENOISER_OPENIMAGEDENOISE && type != DEVICE_CPU) {
/* Convert to a special multi device with separate denoising devices. */
if (multi_devices.empty()) {
multi_devices.push_back(*this);
}
/* Add CPU denoising devices. */
DeviceInfo cpu_device = Device::available_devices(DEVICE_MASK_CPU).front();
denoising_devices.push_back(cpu_device);
denoisers = denoiser_type;
}
}
CCL_NAMESPACE_END