Cycles: Add Intel GPU support for OpenImageDenoise #108314

Merged
Stefan Werner merged 28 commits from Stefan_Werner/blender:oidn2 into main 2023-11-20 11:12:51 +01:00
9 changed files with 451 additions and 5 deletions

View File

@ -222,7 +222,8 @@ enum_guiding_directional_sampling_types = (
def enum_openimagedenoise_denoiser(self, context):
import _cycles
if _cycles.with_openimagedenoise:
return [('OPENIMAGEDENOISE', "OpenImageDenoise", "Use Intel OpenImageDenoise AI denoiser running on the CPU", 4)]
return [('OPENIMAGEDENOISE', "OpenImageDenoise",
"Use Intel OpenImageDenoise AI denoiser", 4)]
return []

View File

@ -4,6 +4,7 @@
#include "device/cpu/device.h"
#include "device/cpu/device_impl.h"
#include "integrator/denoiser_oidn.h"
/* Used for `info.denoisers`. */
/* TODO(sergey): The denoisers are probably to be moved completely out of the device into their

View File

@ -9,6 +9,7 @@
#ifdef WITH_ONEAPI
# include "device/device.h"
# include "device/oneapi/device_impl.h"
# include "integrator/denoiser_oidn_gpu.h"
# include "util/path.h"
# include "util/string.h"
@ -107,7 +108,11 @@ static void device_iterator_cb(
info.id = id;
info.has_nanovdb = true;
info.denoisers = 0;
# if defined(WITH_OPENIMAGEDENOISE)
if (OIDNDenoiserGPU::is_device_supported(info)) {
info.denoisers |= DENOISER_OPENIMAGEDENOISE;
}
# endif
info.has_gpu_queue = true;

View File

@ -14,6 +14,7 @@ set(SRC
denoiser.cpp
denoiser_gpu.cpp
denoiser_oidn.cpp
denoiser_oidn_gpu.cpp
denoiser_optix.cpp
path_trace.cpp
tile.cpp
@ -36,6 +37,7 @@ set(SRC_HEADERS
denoiser.h
denoiser_gpu.h
denoiser_oidn.h
denoiser_oidn_gpu.h
denoiser_optix.h
guiding.h
path_trace.h

View File

@ -6,6 +6,9 @@
#include "device/device.h"
#include "integrator/denoiser_oidn.h"
#ifdef WITH_OPENIMAGEDENOISE
# include "integrator/denoiser_oidn_gpu.h"
#endif
#include "integrator/denoiser_optix.h"
#include "session/buffers.h"
#include "util/log.h"
@ -23,6 +26,12 @@ unique_ptr<Denoiser> Denoiser::create(Device *path_trace_device, const DenoisePa
}
#endif
#ifdef WITH_OPENIMAGEDENOISE
if (params.type == DENOISER_OPENIMAGEDENOISE && path_trace_device->info.type == DEVICE_ONEAPI) {
return make_unique<OIDNDenoiserGPU>(path_trace_device, params);
}
#endif
/* Always fallback to OIDN. */
DenoiseParams oidn_params = params;
oidn_params.type = DENOISER_OPENIMAGEDENOISE;
@ -131,8 +140,8 @@ static DeviceInfo find_best_denoiser_device_info(const vector<DeviceInfo> &devic
continue;
}
/* TODO(sergey): Use one of the already configured devices, so that OptiX denoising can happen
* on a physical CUDA device which is already used for rendering. */
/* TODO(sergey): Use one of the already configured devices, so that GPU denoising can happen
* on a physical device which is already used for rendering. */
/* TODO(sergey): Choose fastest device for denoising. */

View File

@ -10,7 +10,7 @@
CCL_NAMESPACE_BEGIN
/* Implementation of denoising API which uses OpenImageDenoise library. */
/* Implementation of a CPU based denoiser which uses OpenImageDenoise library. */
class OIDNDenoiser : public Denoiser {
public:
/* Forwardly declared state which might be using compile-flag specific fields, such as

View File

@ -0,0 +1,350 @@
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
#if defined(WITH_OPENIMAGEDENOISE)
# include "integrator/denoiser_oidn_gpu.h"
# include <array>
# include "device/device.h"
# include "device/queue.h"
# include "integrator/pass_accessor_cpu.h"
# include "session/buffers.h"
# include "util/array.h"
# include "util/log.h"
# include "util/openimagedenoise.h"
# include "kernel/device/cpu/compat.h"
# include "kernel/device/cpu/kernel.h"
# if OIDN_VERSION_MAJOR < 2
# define oidnSetFilterBool oidnSetFilter1b
# define oidnSetFilterInt oidnSetFilter1i
# define oidnExecuteFilterAsync oidnExecuteFilter
# endif
CCL_NAMESPACE_BEGIN
/* Ideally, this would be dynamic and adaptively change when the runtime runs out of memory. */
constexpr int prefilter_max_mem = 1024;
thread_mutex OIDNDenoiserGPU::mutex_;
bool OIDNDenoiserGPU::is_device_type_supported(const DeviceType &type)
{
switch (type) {
# ifdef OIDN_DEVICE_SYCL
/* Assume all devices with Cycles support are also supported by OIDN2. */
case DEVICE_ONEAPI:
return true;
# endif
default:
return false;
}
}
bool OIDNDenoiserGPU::is_device_supported(const DeviceInfo &device)
{
/* Currently falls back to checking just the device type, can be improved. */
return is_device_type_supported(device.type);
}
OIDNDenoiserGPU::OIDNDenoiserGPU(Device *path_trace_device, const DenoiseParams &params)
: DenoiserGPU(path_trace_device, params)
{
DCHECK_EQ(params.type, DENOISER_OPENIMAGEDENOISE);
}
OIDNDenoiserGPU::~OIDNDenoiserGPU()
{
if (albedo_filter_) {
oidnReleaseFilter(albedo_filter_);
}
if (normal_filter_) {
oidnReleaseFilter(normal_filter_);
}
if (oidn_filter_) {
oidnReleaseFilter(oidn_filter_);
}
if (oidn_device_) {
oidnReleaseDevice(oidn_device_);
}
}
bool OIDNDenoiserGPU::denoise_buffer(const BufferParams &buffer_params,
RenderBuffers *render_buffers,
const int num_samples,
bool allow_inplace_modification)
{
return DenoiserGPU::denoise_buffer(
buffer_params, render_buffers, num_samples, allow_inplace_modification);
}
uint OIDNDenoiserGPU::get_device_type_mask() const
{
uint device_mask = 0;
# ifdef OIDN_DEVICE_SYCL
device_mask |= DEVICE_MASK_ONEAPI;
# endif
return device_mask;
}
OIDNFilter OIDNDenoiserGPU::create_filter()
{
const char *error_message = nullptr;
OIDNFilter filter = oidnNewFilter(oidn_device_, "RT");
if (filter == nullptr) {
OIDNError err = oidnGetDeviceError(oidn_device_, (const char **)&error_message);
if (OIDN_ERROR_NONE != err) {
LOG(ERROR) << "OIDN error: " << error_message;
denoiser_device_->set_error(error_message);
}
}
return filter;
}
bool OIDNDenoiserGPU::denoise_create_if_needed(DenoiseContext &context)
{
const bool recreate_denoiser = (oidn_device_ == nullptr) || (oidn_filter_ == nullptr) ||
(use_pass_albedo_ != context.use_pass_albedo) ||
(use_pass_normal_ != context.use_pass_normal);
if (!recreate_denoiser) {
return true;
}
/* Destroy existing handle before creating new one. */
if (oidn_filter_) {
oidnReleaseFilter(oidn_filter_);
}
if (oidn_device_) {
oidnReleaseDevice(oidn_device_);
}
switch (denoiser_device_->info.type) {
# if defined(OIDN_DEVICE_SYCL)
case DEVICE_ONEAPI:
oidn_device_ = oidnNewDevice(OIDN_DEVICE_TYPE_SYCL);
denoiser_queue_->init_execution();
break;
# endif
default:
break;
}
if (!oidn_device_) {
denoiser_device_->set_error("Failed to create OIDN device");
return false;
}
oidnCommitDevice(oidn_device_);
oidn_filter_ = create_filter();
if (oidn_filter_ == nullptr) {
return false;
}
oidnSetFilterBool(oidn_filter_, "hdr", true);
oidnSetFilterBool(oidn_filter_, "srgb", false);
oidnSetFilterInt(oidn_filter_, "maxMemoryMB", max_mem_);
if (params_.prefilter == DENOISER_PREFILTER_NONE ||
params_.prefilter == DENOISER_PREFILTER_ACCURATE)
{
oidnSetFilterInt(oidn_filter_, "cleanAux", true);
}
if (context.use_pass_albedo) {
albedo_filter_ = create_filter();
if (albedo_filter_ == nullptr) {
oidnSetFilterInt(oidn_filter_, "maxMemoryMB", prefilter_max_mem);
return false;
}
}
if (context.use_pass_normal) {
normal_filter_ = create_filter();
if (normal_filter_ == nullptr) {
oidnSetFilterInt(oidn_filter_, "maxMemoryMB", prefilter_max_mem);
return false;
}
}
/* OIDN denoiser handle was created with the requested number of input passes. */
use_pass_albedo_ = context.use_pass_albedo;
use_pass_normal_ = context.use_pass_normal;
/* OIDN denoiser has been created, but it needs configuration. */
is_configured_ = false;
return true;
}
bool OIDNDenoiserGPU::denoise_configure_if_needed(DenoiseContext &context)
{
/* Limit maximum tile size denoiser can be invoked with. */
const int2 size = make_int2(context.buffer_params.width, context.buffer_params.height);
if (is_configured_ && (configured_size_.x == size.x && configured_size_.y == size.y)) {
return true;
}
is_configured_ = true;
configured_size_ = size;
return true;
}
bool OIDNDenoiserGPU::denoise_run(const DenoiseContext &context, const DenoisePass &pass)
{
/* Color pass. */
const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float);
oidnSetSharedFilterImage(oidn_filter_,
"color",
(void *)context.render_buffers->buffer.device_pointer,
OIDN_FORMAT_FLOAT3,
context.buffer_params.width,
context.buffer_params.height,
pass.denoised_offset * sizeof(float),
pass_stride_in_bytes,
pass_stride_in_bytes * context.buffer_params.stride);
oidnSetSharedFilterImage(oidn_filter_,
"output",
(void *)context.render_buffers->buffer.device_pointer,
OIDN_FORMAT_FLOAT3,
context.buffer_params.width,
context.buffer_params.height,
pass.denoised_offset * sizeof(float),
pass_stride_in_bytes,
pass_stride_in_bytes * context.buffer_params.stride);
/* Optional albedo and color passes. */
if (context.num_input_passes > 1) {
const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float);
const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes;
if (context.use_pass_albedo) {
if (params_.prefilter == DENOISER_PREFILTER_NONE) {
oidnSetSharedFilterImage(oidn_filter_,
"albedo",
(void *)d_guiding_buffer,
OIDN_FORMAT_FLOAT3,
context.buffer_params.width,
context.buffer_params.height,
context.guiding_params.pass_albedo * sizeof(float),
pixel_stride_in_bytes,
row_stride_in_bytes);
}
else {
oidnSetSharedFilterImage(albedo_filter_,
"color",
(void *)d_guiding_buffer,
OIDN_FORMAT_FLOAT3,
context.buffer_params.width,
context.buffer_params.height,
context.guiding_params.pass_albedo * sizeof(float),
pixel_stride_in_bytes,
row_stride_in_bytes);
oidnSetSharedFilterImage(albedo_filter_,
"output",
(void *)d_guiding_buffer,
OIDN_FORMAT_FLOAT3,
context.buffer_params.width,
context.buffer_params.height,
context.guiding_params.pass_albedo * sizeof(float),
pixel_stride_in_bytes,
row_stride_in_bytes);
oidnCommitFilter(albedo_filter_);
oidnExecuteFilterAsync(albedo_filter_);
oidnSetSharedFilterImage(oidn_filter_,
"albedo",
(void *)d_guiding_buffer,
OIDN_FORMAT_FLOAT3,
context.buffer_params.width,
context.buffer_params.height,
context.guiding_params.pass_albedo * sizeof(float),
pixel_stride_in_bytes,
row_stride_in_bytes);
}
}
if (context.use_pass_normal) {
if (params_.prefilter == DENOISER_PREFILTER_NONE) {
oidnSetSharedFilterImage(oidn_filter_,
"normal",
(void *)d_guiding_buffer,
OIDN_FORMAT_FLOAT3,
context.buffer_params.width,
context.buffer_params.height,
context.guiding_params.pass_normal * sizeof(float),
pixel_stride_in_bytes,
row_stride_in_bytes);
}
else {
oidnSetSharedFilterImage(normal_filter_,
"color",
(void *)d_guiding_buffer,
OIDN_FORMAT_FLOAT3,
context.buffer_params.width,
context.buffer_params.height,
context.guiding_params.pass_normal * sizeof(float),
pixel_stride_in_bytes,
row_stride_in_bytes);
oidnSetSharedFilterImage(normal_filter_,
"output",
(void *)d_guiding_buffer,
OIDN_FORMAT_FLOAT3,
context.buffer_params.width,
context.buffer_params.height,
context.guiding_params.pass_normal * sizeof(float),
pixel_stride_in_bytes,
row_stride_in_bytes);
oidnCommitFilter(normal_filter_);
oidnExecuteFilterAsync(normal_filter_);
oidnSetSharedFilterImage(oidn_filter_,
"normal",
(void *)d_guiding_buffer,
OIDN_FORMAT_FLOAT3,
context.buffer_params.width,
context.buffer_params.height,
context.guiding_params.pass_normal * sizeof(float),
pixel_stride_in_bytes,
row_stride_in_bytes);
}
}
}
oidnCommitFilter(oidn_filter_);
oidnExecuteFilter(oidn_filter_);
const char *out_message = nullptr;
OIDNError err = oidnGetDeviceError(oidn_device_, (const char **)&out_message);
if (OIDN_ERROR_NONE != err) {
/* If OIDN runs out of memory, reduce mem limit and retry */
while (err == OIDN_ERROR_OUT_OF_MEMORY && max_mem_ > 200) {
max_mem_ = max_mem_ / 2;
oidnSetFilterInt(oidn_filter_, "maxMemoryMB", max_mem_);
oidnCommitFilter(oidn_filter_);
oidnExecuteFilter(oidn_filter_);
err = oidnGetDeviceError(oidn_device_, &out_message);
}
if (out_message) {
LOG(ERROR) << "OIDN error: " << out_message;
denoiser_device_->set_error(out_message);
}
else {
LOG(ERROR) << "OIDN error: unspecified";
denoiser_device_->set_error("Unspecified OIDN error");
}
return false;
}
return true;
}
CCL_NAMESPACE_END
#endif

View File

@ -0,0 +1,75 @@
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
#pragma once
#if defined(WITH_OPENIMAGEDENOISE)
# include "integrator/denoiser_gpu.h"
# include "util/thread.h"
# include "util/unique_ptr.h"
typedef struct OIDNDeviceImpl *OIDNDevice;
typedef struct OIDNFilterImpl *OIDNFilter;
typedef struct OIDNBufferImpl *OIDNBuffer;
CCL_NAMESPACE_BEGIN
/* Implementation of a GPU denoiser which uses OpenImageDenoise library. */
class OIDNDenoiserGPU : public DenoiserGPU {
friend class OIDNDenoiseContext;
public:
/* Forwardly declared state which might be using compile-flag specific fields, such as
* OpenImageDenoise device and filter handles. */
class State;
OIDNDenoiserGPU(Device *path_trace_device, const DenoiseParams &params);
~OIDNDenoiserGPU();
virtual bool denoise_buffer(const BufferParams &buffer_params,
RenderBuffers *render_buffers,
const int num_samples,
bool allow_inplace_modification) override;
static bool is_device_supported(const DeviceInfo &device);
static bool is_device_type_supported(const DeviceType &type);
protected:
virtual uint get_device_type_mask() const override;
/* We only perform one denoising at a time, since OpenImageDenoise itself is multithreaded.
* Use this mutex whenever images are passed to the OIDN and needs to be denoised. */
static thread_mutex mutex_;
/* Create OIDN denoiser descriptor if needed.
* Will do nothing if the current OIDN descriptor is usable for the given parameters.
* If the OIDN denoiser descriptor did re-allocate here it is left unconfigured. */
virtual bool denoise_create_if_needed(DenoiseContext &context) override;
/* Configure existing OIDN denoiser descriptor for the use for the given task. */
virtual bool denoise_configure_if_needed(DenoiseContext &context) override;
/* Run configured denoiser. */
virtual bool denoise_run(const DenoiseContext &context, const DenoisePass &pass) override;
OIDNFilter create_filter();
OIDNDevice oidn_device_ = nullptr;
OIDNFilter oidn_filter_ = nullptr;
OIDNFilter albedo_filter_ = nullptr;
OIDNFilter normal_filter_ = nullptr;
bool is_configured_ = false;
int2 configured_size_ = make_int2(0, 0);
bool use_pass_albedo_ = false;
bool use_pass_normal_ = false;
int max_mem_ = 3000;
};
CCL_NAMESPACE_END
#endif

View File

@ -137,6 +137,9 @@ bool oneapi_run_test_kernel(SyclQueue *queue_)
bool oneapi_kernel_is_required_for_features(const std::string &kernel_name,
const uint kernel_features)
{
/* Skip all non-Cycles kernels */
if (kernel_name.find("oneapi_kernel_") == std::string::npos)
return false;
if ((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0 &&
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE)) !=
std::string::npos)