blender-archive/intern/cycles/device/device.cpp

/*
 * Copyright 2011-2013 Blender Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <stdlib.h>
#include <string.h>

#include "bvh/bvh2.h"

#include "device/device.h"
#include "device/device_intern.h"

#include "util/util_foreach.h"
#include "util/util_half.h"
#include "util/util_logging.h"
#include "util/util_math.h"
#include "util/util_opengl.h"
#include "util/util_string.h"
#include "util/util_system.h"
#include "util/util_time.h"
#include "util/util_types.h"
#include "util/util_vector.h"

CCL_NAMESPACE_BEGIN

bool Device::need_types_update = true;
bool Device::need_devices_update = true;
thread_mutex Device::device_mutex;
vector<DeviceInfo> Device::opencl_devices;
vector<DeviceInfo> Device::cuda_devices;
vector<DeviceInfo> Device::optix_devices;
vector<DeviceInfo> Device::cpu_devices;
vector<DeviceInfo> Device::network_devices;
uint Device::devices_initialized_mask = 0;

/* Device Requested Features */

std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &requested_features)
{
  os << "Experimental features: " << (requested_features.experimental ? "On" : "Off") << std::endl;
  os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
  /* TODO(sergey): Decode bitflag into list of names. */
  os << "Nodes features: " << requested_features.nodes_features << std::endl;
  os << "Use Hair: " << string_from_bool(requested_features.use_hair) << std::endl;
  os << "Use Object Motion: " << string_from_bool(requested_features.use_object_motion)
     << std::endl;
  os << "Use Camera Motion: " << string_from_bool(requested_features.use_camera_motion)
     << std::endl;
  os << "Use Baking: " << string_from_bool(requested_features.use_baking) << std::endl;
  os << "Use Subsurface: " << string_from_bool(requested_features.use_subsurface) << std::endl;
  os << "Use Volume: " << string_from_bool(requested_features.use_volume) << std::endl;
  os << "Use Branched Integrator: " << string_from_bool(requested_features.use_integrator_branched)
     << std::endl;
  os << "Use Patch Evaluation: " << string_from_bool(requested_features.use_patch_evaluation)
     << std::endl;
  os << "Use Transparent Shadows: " << string_from_bool(requested_features.use_transparent)
     << std::endl;
  os << "Use Principled BSDF: " << string_from_bool(requested_features.use_principled)
     << std::endl;
  os << "Use Denoising: " << string_from_bool(requested_features.use_denoising) << std::endl;
  os << "Use Displacement: " << string_from_bool(requested_features.use_true_displacement)
     << std::endl;
  os << "Use Background Light: " << string_from_bool(requested_features.use_background_light)
     << std::endl;
  return os;
}

/* Device */

Device::~Device() noexcept(false)
{
  if (!background) {
    if (vertex_buffer != 0) {
      glDeleteBuffers(1, &vertex_buffer);
    }
    if (fallback_shader_program != 0) {
      glDeleteProgram(fallback_shader_program);
    }
  }
}

/* TODO move shaders to standalone .glsl file. */
const char *FALLBACK_VERTEX_SHADER =
    "#version 330\n"
    "uniform vec2 fullscreen;\n"
    "in vec2 texCoord;\n"
    "in vec2 pos;\n"
    "out vec2 texCoord_interp;\n"
    "\n"
    "vec2 normalize_coordinates()\n"
    "{\n"
    "   return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
    "}\n"
    "\n"
    "void main()\n"
    "{\n"
    "   gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
    "   texCoord_interp = texCoord;\n"
    "}\n\0";

const char *FALLBACK_FRAGMENT_SHADER =
    "#version 330\n"
    "uniform sampler2D image_texture;\n"
    "in vec2 texCoord_interp;\n"
    "out vec4 fragColor;\n"
    "\n"
    "void main()\n"
    "{\n"
    "   fragColor = texture(image_texture, texCoord_interp);\n"
    "}\n\0";

static void shader_print_errors(const char *task, const char *log, const char *code)
{
  LOG(ERROR) << "Shader: " << task << " error:";
  LOG(ERROR) << "===== shader string ====";

  stringstream stream(code);
  string partial;

  int line = 1;
  while (getline(stream, partial, '\n')) {
    if (line < 10) {
      LOG(ERROR) << " " << line << " " << partial;
    }
    else {
      LOG(ERROR) << line << " " << partial;
    }
    line++;
  }
  LOG(ERROR) << log;
}

static int bind_fallback_shader(void)
{
  GLint status;
  GLchar log[5000];
  GLsizei length = 0;
  GLuint program = 0;

  struct Shader {
    const char *source;
    GLenum type;
  } shaders[2] = {{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
                  {FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}};

  program = glCreateProgram();

  for (int i = 0; i < 2; i++) {
    GLuint shader = glCreateShader(shaders[i].type);

    string source_str = shaders[i].source;
    const char *c_str = source_str.c_str();

    glShaderSource(shader, 1, &c_str, NULL);
    glCompileShader(shader);

    glGetShaderiv(shader, GL_COMPILE_STATUS, &status);

    if (!status) {
      glGetShaderInfoLog(shader, sizeof(log), &length, log);
      shader_print_errors("compile", log, c_str);
      return 0;
    }

    glAttachShader(program, shader);
  }

  /* Link output. */
  glBindFragDataLocation(program, 0, "fragColor");

  /* Link and error check. */
  glLinkProgram(program);

  glGetProgramiv(program, GL_LINK_STATUS, &status);
  if (!status) {
    glGetShaderInfoLog(program, sizeof(log), &length, log);
    shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER);
    shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER);
    return 0;
  }

  return program;
}

bool Device::bind_fallback_display_space_shader(const float width, const float height)
{
  if (fallback_status == FALLBACK_SHADER_STATUS_ERROR) {
    return false;
  }

  if (fallback_status == FALLBACK_SHADER_STATUS_NONE) {
    fallback_shader_program = bind_fallback_shader();
    fallback_status = FALLBACK_SHADER_STATUS_ERROR;

    if (fallback_shader_program == 0) {
      return false;
    }

    glUseProgram(fallback_shader_program);
    image_texture_location = glGetUniformLocation(fallback_shader_program, "image_texture");
    if (image_texture_location < 0) {
      LOG(ERROR) << "Shader doesn't contain the 'image_texture' uniform.";
      return false;
    }

    fullscreen_location = glGetUniformLocation(fallback_shader_program, "fullscreen");
    if (fullscreen_location < 0) {
      LOG(ERROR) << "Shader doesn't contain the 'fullscreen' uniform.";
      return false;
    }

    fallback_status = FALLBACK_SHADER_STATUS_SUCCESS;
  }

  /* Run this every time. */
  glUseProgram(fallback_shader_program);
  glUniform1i(image_texture_location, 0);
  glUniform2f(fullscreen_location, width, height);
  return true;
}

void Device::draw_pixels(device_memory &rgba,
                         int y,
                         int w,
                         int h,
                         int width,
                         int height,
                         int dx,
                         int dy,
                         int dw,
                         int dh,
                         bool transparent,
                         const DeviceDrawParams &draw_params)
{
  const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);

  assert(rgba.type == MEM_PIXELS);
  mem_copy_from(rgba, y, w, h, rgba.memory_elements_size(1));

  GLuint texid;
  glActiveTexture(GL_TEXTURE0);
  glGenTextures(1, &texid);
  glBindTexture(GL_TEXTURE_2D, texid);

  if (rgba.data_type == TYPE_HALF) {
    GLhalf *data_pointer = (GLhalf *)rgba.host_pointer;
    data_pointer += 4 * y * w;
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer);
  }
  else {
    uint8_t *data_pointer = (uint8_t *)rgba.host_pointer;
    data_pointer += 4 * y * w;
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data_pointer);
  }

  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);

  if (transparent) {
    glEnable(GL_BLEND);
    glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
  }

  GLint shader_program;
  if (use_fallback_shader) {
    if (!bind_fallback_display_space_shader(dw, dh)) {
      return;
    }
    shader_program = fallback_shader_program;
  }
  else {
    draw_params.bind_display_space_shader_cb();
    glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
  }

  if (!vertex_buffer) {
    glGenBuffers(1, &vertex_buffer);
  }

  glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
  /* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered
   */
  glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);

  float *vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);

  if (vpointer) {
    /* texture coordinate - vertex pair */
    vpointer[0] = 0.0f;
    vpointer[1] = 0.0f;
    vpointer[2] = dx;
    vpointer[3] = dy;

    vpointer[4] = 1.0f;
    vpointer[5] = 0.0f;
    vpointer[6] = (float)width + dx;
    vpointer[7] = dy;

    vpointer[8] = 1.0f;
    vpointer[9] = 1.0f;
    vpointer[10] = (float)width + dx;
    vpointer[11] = (float)height + dy;

    vpointer[12] = 0.0f;
    vpointer[13] = 1.0f;
    vpointer[14] = dx;
    vpointer[15] = (float)height + dy;

    if (vertex_buffer) {
      glUnmapBuffer(GL_ARRAY_BUFFER);
    }
  }

  GLuint vertex_array_object;
  GLuint position_attribute, texcoord_attribute;

  glGenVertexArrays(1, &vertex_array_object);
  glBindVertexArray(vertex_array_object);

  texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
  position_attribute = glGetAttribLocation(shader_program, "pos");

  glEnableVertexAttribArray(texcoord_attribute);
  glEnableVertexAttribArray(position_attribute);

  glVertexAttribPointer(
      texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
  glVertexAttribPointer(position_attribute,
                        2,
                        GL_FLOAT,
                        GL_FALSE,
                        4 * sizeof(float),
                        (const GLvoid *)(sizeof(float) * 2));

  glDrawArrays(GL_TRIANGLE_FAN, 0, 4);

  if (vertex_buffer) {
    glBindBuffer(GL_ARRAY_BUFFER, 0);
  }

  if (use_fallback_shader) {
    glUseProgram(0);
  }
  else {
    draw_params.unbind_display_space_shader_cb();
  }

  glDeleteVertexArrays(1, &vertex_array_object);
  glBindTexture(GL_TEXTURE_2D, 0);
  glDeleteTextures(1, &texid);

  if (transparent) {
    glDisable(GL_BLEND);
  }
}

void Device::build_bvh(BVH *bvh, Progress &progress, bool refit)
{
  assert(bvh->params.bvh_layout == BVH_LAYOUT_BVH2);

  BVH2 *const bvh2 = static_cast<BVH2 *>(bvh);
  if (refit) {
    bvh2->refit(progress);
  }
  else {
    bvh2->build(progress, &stats);
  }
}

Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
#ifdef WITH_MULTI
  if (!info.multi_devices.empty()) {
    /* Always create a multi device when info contains multiple devices.
     * This is done so that the type can still be e.g. DEVICE_CPU to indicate
     * that it is a homogeneous collection of devices, which simplifies checks. */
    return device_multi_create(info, stats, profiler, background);
  }
#endif

  Device *device = NULL;

  switch (info.type) {
    case DEVICE_CPU:
      device = device_cpu_create(info, stats, profiler, background);
      break;
#ifdef WITH_CUDA
    case DEVICE_CUDA:
      if (device_cuda_init())
        device = device_cuda_create(info, stats, profiler, background);
      break;
#endif
#ifdef WITH_OPTIX
    case DEVICE_OPTIX:
      if (device_optix_init())
        device = device_optix_create(info, stats, profiler, background);
      break;
#endif
#ifdef WITH_NETWORK
    case DEVICE_NETWORK:
      device = device_network_create(info, stats, profiler, "127.0.0.1");
      break;
#endif
#ifdef WITH_OPENCL
    case DEVICE_OPENCL:
      if (device_opencl_init())
        device = device_opencl_create(info, stats, profiler, background);
      break;
#endif
    default:
      break;
  }

  if (device == NULL) {
    device = device_dummy_create(info, stats, profiler, background);
  }

  return device;
}

DeviceType Device::type_from_string(const char *name)
{
  if (strcmp(name, "CPU") == 0)
    return DEVICE_CPU;
  else if (strcmp(name, "CUDA") == 0)
    return DEVICE_CUDA;
  else if (strcmp(name, "OPTIX") == 0)
    return DEVICE_OPTIX;
  else if (strcmp(name, "OPENCL") == 0)
    return DEVICE_OPENCL;
  else if (strcmp(name, "NETWORK") == 0)
    return DEVICE_NETWORK;
  else if (strcmp(name, "MULTI") == 0)
    return DEVICE_MULTI;

  return DEVICE_NONE;
}

string Device::string_from_type(DeviceType type)
{
  if (type == DEVICE_CPU)
    return "CPU";
  else if (type == DEVICE_CUDA)
    return "CUDA";
  else if (type == DEVICE_OPTIX)
    return "OPTIX";
  else if (type == DEVICE_OPENCL)
    return "OPENCL";
  else if (type == DEVICE_NETWORK)
    return "NETWORK";
  else if (type == DEVICE_MULTI)
    return "MULTI";

  return "";
}

vector<DeviceType> Device::available_types()
{
  vector<DeviceType> types;
  types.push_back(DEVICE_CPU);
#ifdef WITH_CUDA
  types.push_back(DEVICE_CUDA);
#endif
#ifdef WITH_OPTIX
  types.push_back(DEVICE_OPTIX);
#endif
#ifdef WITH_OPENCL
  types.push_back(DEVICE_OPENCL);
#endif
#ifdef WITH_NETWORK
  types.push_back(DEVICE_NETWORK);
#endif
  return types;
}

vector<DeviceInfo> Device::available_devices(uint mask)
{
  /* Lazy initialize devices. On some platforms OpenCL or CUDA drivers can
   * be broken and cause crashes when only trying to get device info, so
   * we don't want to do any initialization until the user chooses to. */
  thread_scoped_lock lock(device_mutex);
  vector<DeviceInfo> devices;

#ifdef WITH_OPENCL
  if (mask & DEVICE_MASK_OPENCL) {
    if (!(devices_initialized_mask & DEVICE_MASK_OPENCL)) {
      if (device_opencl_init()) {
        device_opencl_info(opencl_devices);
      }
      devices_initialized_mask |= DEVICE_MASK_OPENCL;
    }
    foreach (DeviceInfo &info, opencl_devices) {
      devices.push_back(info);
    }
  }
#endif

#if defined(WITH_CUDA) || defined(WITH_OPTIX)
  if (mask & (DEVICE_MASK_CUDA | DEVICE_MASK_OPTIX)) {
    if (!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
      if (device_cuda_init()) {
        device_cuda_info(cuda_devices);
      }
      devices_initialized_mask |= DEVICE_MASK_CUDA;
    }
    if (mask & DEVICE_MASK_CUDA) {
      foreach (DeviceInfo &info, cuda_devices) {
        devices.push_back(info);
      }
    }
  }
#endif

#ifdef WITH_OPTIX
  if (mask & DEVICE_MASK_OPTIX) {
    if (!(devices_initialized_mask & DEVICE_MASK_OPTIX)) {
      if (device_optix_init()) {
        device_optix_info(cuda_devices, optix_devices);
      }
      devices_initialized_mask |= DEVICE_MASK_OPTIX;
    }
    foreach (DeviceInfo &info, optix_devices) {
      devices.push_back(info);
    }
  }
#endif

  if (mask & DEVICE_MASK_CPU) {
    if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
      device_cpu_info(cpu_devices);
      devices_initialized_mask |= DEVICE_MASK_CPU;
    }
    foreach (DeviceInfo &info, cpu_devices) {
      devices.push_back(info);
    }
  }

#ifdef WITH_NETWORK
  if (mask & DEVICE_MASK_NETWORK) {
    if (!(devices_initialized_mask & DEVICE_MASK_NETWORK)) {
      device_network_info(network_devices);
      devices_initialized_mask |= DEVICE_MASK_NETWORK;
    }
    foreach (DeviceInfo &info, network_devices) {
      devices.push_back(info);
    }
  }
#endif

  return devices;
}

DeviceInfo Device::dummy_device(const string &error_msg)
{
  DeviceInfo info;
  info.type = DEVICE_DUMMY;
  info.error_msg = error_msg;
  return info;
}

string Device::device_capabilities(uint mask)
{
  thread_scoped_lock lock(device_mutex);
  string capabilities = "";

  if (mask & DEVICE_MASK_CPU) {
    capabilities += "\nCPU device capabilities: ";
    capabilities += device_cpu_capabilities() + "\n";
  }

#ifdef WITH_OPENCL
  if (mask & DEVICE_MASK_OPENCL) {
    if (device_opencl_init()) {
      capabilities += "\nOpenCL device capabilities:\n";
      capabilities += device_opencl_capabilities();
    }
  }
#endif

#ifdef WITH_CUDA
  if (mask & DEVICE_MASK_CUDA) {
    if (device_cuda_init()) {
      capabilities += "\nCUDA device capabilities:\n";
      capabilities += device_cuda_capabilities();
    }
  }
#endif

  return capabilities;
}

DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
                                    int threads,
                                    bool background)
{
  assert(subdevices.size() > 0);

  if (subdevices.size() == 1) {
    /* No multi device needed. */
    return subdevices.front();
  }

  DeviceInfo info;
  info.type = subdevices.front().type;
  info.id = "MULTI";
  info.description = "Multi Device";
  info.num = 0;

  info.has_half_images = true;
  info.has_volume_decoupled = true;
  info.has_branched_path = true;
  info.has_adaptive_stop_per_sample = true;
  info.has_osl = true;
  info.has_profiling = true;
  info.has_peer_memory = false;
  info.denoisers = DENOISER_ALL;

  foreach (const DeviceInfo &device, subdevices) {
    /* Ensure CPU device does not slow down GPU. */
    if (device.type == DEVICE_CPU && subdevices.size() > 1) {
      if (background) {
        int orig_cpu_threads = (threads) ? threads : system_cpu_thread_count();
        int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);

        VLOG(1) << "CPU render threads reduced from " << orig_cpu_threads << " to " << cpu_threads
                << ", to dedicate to GPU.";

        if (cpu_threads >= 1) {
          DeviceInfo cpu_device = device;
          cpu_device.cpu_threads = cpu_threads;
          info.multi_devices.push_back(cpu_device);
        }
        else {
          continue;
        }
      }
      else {
        VLOG(1) << "CPU render threads disabled for interactive render.";
        continue;
      }
    }
    else {
      info.multi_devices.push_back(device);
    }

    /* Create unique ID for this combination of devices. */
    info.id += device.id;

    /* Set device type to MULTI if subdevices are not of a common type. */
    if (device.type != info.type) {
      info.type = DEVICE_MULTI;
    }

    /* Accumulate device info. */
    info.has_half_images &= device.has_half_images;
    info.has_volume_decoupled &= device.has_volume_decoupled;
    info.has_branched_path &= device.has_branched_path;
    info.has_adaptive_stop_per_sample &= device.has_adaptive_stop_per_sample;
    info.has_osl &= device.has_osl;
    info.has_profiling &= device.has_profiling;
    info.has_peer_memory |= device.has_peer_memory;
    info.denoisers &= device.denoisers;
  }

  return info;
}

void Device::tag_update()
{
  free_memory();
}

void Device::free_memory()
{
  devices_initialized_mask = 0;
  cuda_devices.free_memory();
  optix_devices.free_memory();
  opencl_devices.free_memory();
  cpu_devices.free_memory();
  network_devices.free_memory();
}

/* DeviceInfo */

void DeviceInfo::add_denoising_devices(DenoiserType denoiser_type)
{
  assert(denoising_devices.empty());

  if (denoiser_type == DENOISER_OPTIX && type != DEVICE_OPTIX) {
    vector<DeviceInfo> optix_devices = Device::available_devices(DEVICE_MASK_OPTIX);
    if (!optix_devices.empty()) {
      /* Convert to a special multi device with separate denoising devices. */
      if (multi_devices.empty()) {
        multi_devices.push_back(*this);
      }

      /* Try to use the same physical devices for denoising. */
      for (const DeviceInfo &cuda_device : multi_devices) {
        if (cuda_device.type == DEVICE_CUDA) {
          for (const DeviceInfo &optix_device : optix_devices) {
            if (cuda_device.num == optix_device.num) {
              id += optix_device.id;
              denoising_devices.push_back(optix_device);
              break;
            }
          }
        }
      }

      if (denoising_devices.empty()) {
        /* Simply use the first available OptiX device. */
        const DeviceInfo optix_device = optix_devices.front();
        id += optix_device.id; /* Uniquely identify this special multi device. */
        denoising_devices.push_back(optix_device);
      }

      denoisers = denoiser_type;
    }
  }
  else if (denoiser_type == DENOISER_OPENIMAGEDENOISE && type != DEVICE_CPU) {
    /* Convert to a special multi device with separate denoising devices. */
    if (multi_devices.empty()) {
      multi_devices.push_back(*this);
    }

    /* Add CPU denoising devices. */
    DeviceInfo cpu_device = Device::available_devices(DEVICE_MASK_CPU).front();
    denoising_devices.push_back(cpu_device);

    denoisers = denoiser_type;
  }
}

CCL_NAMESPACE_END