Cycles already treats denoising fairly separate in its code, with a dedicated `Denoiser` base class used to describe denoising behavior. That class has been fully implemented for OIDN (`denoiser_oidn.cpp`), but for OptiX was mostly empty (`denoiser_optix.cpp`) and denoising was instead implemented in the OptiX device. That meant denoising code was split over various files and directories, making it a bit awkward to work with. This patch moves the OptiX denoising implementation into the existing `OptiXDenoiser` class, so that everything is in one place. There are no functional changes, code has been mostly moved as-is. To retain support for potential other denoiser implementations based on a GPU device in the future, the `DeviceDenoiser` base class was kept and slightly extended (and its file renamed to `denoiser_gpu.cpp` to follow similar naming rules as `path_trace_work_*.cpp`). Differential Revision: https://developer.blender.org/D16502
787 lines
30 KiB
C++
787 lines
30 KiB
C++
/* SPDX-License-Identifier: Apache-2.0
|
|
* Copyright 2011-2022 Blender Foundation */
|
|
|
|
#ifdef WITH_OPTIX
|
|
|
|
# include "integrator/denoiser_optix.h"
|
|
# include "integrator/pass_accessor_gpu.h"
|
|
|
|
# include "device/optix/device_impl.h"
|
|
# include "device/optix/queue.h"
|
|
|
|
# include <optix_denoiser_tiling.h>
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
# if OPTIX_ABI_VERSION >= 60
|
|
using ::optixUtilDenoiserInvokeTiled;
|
|
# else
|
|
// A minimal copy of functionality `optix_denoiser_tiling.h` which allows to fix integer overflow
|
|
// issues without bumping SDK or driver requirement.
|
|
//
|
|
// The original code is Copyright NVIDIA Corporation, BSD-3-Clause.
|
|
static OptixResult optixUtilDenoiserSplitImage(const OptixImage2D &input,
|
|
const OptixImage2D &output,
|
|
unsigned int overlapWindowSizeInPixels,
|
|
unsigned int tileWidth,
|
|
unsigned int tileHeight,
|
|
std::vector<OptixUtilDenoiserImageTile> &tiles)
|
|
{
|
|
if (tileWidth == 0 || tileHeight == 0)
|
|
return OPTIX_ERROR_INVALID_VALUE;
|
|
|
|
unsigned int inPixelStride = optixUtilGetPixelStride(input);
|
|
unsigned int outPixelStride = optixUtilGetPixelStride(output);
|
|
|
|
int inp_w = std::min(tileWidth + 2 * overlapWindowSizeInPixels, input.width);
|
|
int inp_h = std::min(tileHeight + 2 * overlapWindowSizeInPixels, input.height);
|
|
int inp_y = 0, copied_y = 0;
|
|
|
|
do {
|
|
int inputOffsetY = inp_y == 0 ? 0 :
|
|
std::max((int)overlapWindowSizeInPixels,
|
|
inp_h - ((int)input.height - inp_y));
|
|
int copy_y = inp_y == 0 ? std::min(input.height, tileHeight + overlapWindowSizeInPixels) :
|
|
std::min(tileHeight, input.height - copied_y);
|
|
|
|
int inp_x = 0, copied_x = 0;
|
|
do {
|
|
int inputOffsetX = inp_x == 0 ? 0 :
|
|
std::max((int)overlapWindowSizeInPixels,
|
|
inp_w - ((int)input.width - inp_x));
|
|
int copy_x = inp_x == 0 ? std::min(input.width, tileWidth + overlapWindowSizeInPixels) :
|
|
std::min(tileWidth, input.width - copied_x);
|
|
|
|
OptixUtilDenoiserImageTile tile;
|
|
tile.input.data = input.data + (size_t)(inp_y - inputOffsetY) * input.rowStrideInBytes +
|
|
+(size_t)(inp_x - inputOffsetX) * inPixelStride;
|
|
tile.input.width = inp_w;
|
|
tile.input.height = inp_h;
|
|
tile.input.rowStrideInBytes = input.rowStrideInBytes;
|
|
tile.input.pixelStrideInBytes = input.pixelStrideInBytes;
|
|
tile.input.format = input.format;
|
|
|
|
tile.output.data = output.data + (size_t)inp_y * output.rowStrideInBytes +
|
|
(size_t)inp_x * outPixelStride;
|
|
tile.output.width = copy_x;
|
|
tile.output.height = copy_y;
|
|
tile.output.rowStrideInBytes = output.rowStrideInBytes;
|
|
tile.output.pixelStrideInBytes = output.pixelStrideInBytes;
|
|
tile.output.format = output.format;
|
|
|
|
tile.inputOffsetX = inputOffsetX;
|
|
tile.inputOffsetY = inputOffsetY;
|
|
tiles.push_back(tile);
|
|
|
|
inp_x += inp_x == 0 ? tileWidth + overlapWindowSizeInPixels : tileWidth;
|
|
copied_x += copy_x;
|
|
} while (inp_x < static_cast<int>(input.width));
|
|
|
|
inp_y += inp_y == 0 ? tileHeight + overlapWindowSizeInPixels : tileHeight;
|
|
copied_y += copy_y;
|
|
} while (inp_y < static_cast<int>(input.height));
|
|
|
|
return OPTIX_SUCCESS;
|
|
}
|
|
|
|
static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
|
|
CUstream stream,
|
|
const OptixDenoiserParams *params,
|
|
CUdeviceptr denoiserState,
|
|
size_t denoiserStateSizeInBytes,
|
|
const OptixDenoiserGuideLayer *guideLayer,
|
|
const OptixDenoiserLayer *layers,
|
|
unsigned int numLayers,
|
|
CUdeviceptr scratch,
|
|
size_t scratchSizeInBytes,
|
|
unsigned int overlapWindowSizeInPixels,
|
|
unsigned int tileWidth,
|
|
unsigned int tileHeight)
|
|
{
|
|
if (!guideLayer || !layers)
|
|
return OPTIX_ERROR_INVALID_VALUE;
|
|
|
|
std::vector<std::vector<OptixUtilDenoiserImageTile>> tiles(numLayers);
|
|
std::vector<std::vector<OptixUtilDenoiserImageTile>> prevTiles(numLayers);
|
|
for (unsigned int l = 0; l < numLayers; l++) {
|
|
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].input,
|
|
layers[l].output,
|
|
overlapWindowSizeInPixels,
|
|
tileWidth,
|
|
tileHeight,
|
|
tiles[l]))
|
|
return res;
|
|
|
|
if (layers[l].previousOutput.data) {
|
|
OptixImage2D dummyOutput = layers[l].previousOutput;
|
|
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].previousOutput,
|
|
dummyOutput,
|
|
overlapWindowSizeInPixels,
|
|
tileWidth,
|
|
tileHeight,
|
|
prevTiles[l]))
|
|
return res;
|
|
}
|
|
}
|
|
|
|
std::vector<OptixUtilDenoiserImageTile> albedoTiles;
|
|
if (guideLayer->albedo.data) {
|
|
OptixImage2D dummyOutput = guideLayer->albedo;
|
|
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->albedo,
|
|
dummyOutput,
|
|
overlapWindowSizeInPixels,
|
|
tileWidth,
|
|
tileHeight,
|
|
albedoTiles))
|
|
return res;
|
|
}
|
|
|
|
std::vector<OptixUtilDenoiserImageTile> normalTiles;
|
|
if (guideLayer->normal.data) {
|
|
OptixImage2D dummyOutput = guideLayer->normal;
|
|
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->normal,
|
|
dummyOutput,
|
|
overlapWindowSizeInPixels,
|
|
tileWidth,
|
|
tileHeight,
|
|
normalTiles))
|
|
return res;
|
|
}
|
|
std::vector<OptixUtilDenoiserImageTile> flowTiles;
|
|
if (guideLayer->flow.data) {
|
|
OptixImage2D dummyOutput = guideLayer->flow;
|
|
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->flow,
|
|
dummyOutput,
|
|
overlapWindowSizeInPixels,
|
|
tileWidth,
|
|
tileHeight,
|
|
flowTiles))
|
|
return res;
|
|
}
|
|
|
|
for (size_t t = 0; t < tiles[0].size(); t++) {
|
|
std::vector<OptixDenoiserLayer> tlayers;
|
|
for (unsigned int l = 0; l < numLayers; l++) {
|
|
OptixDenoiserLayer layer = {};
|
|
layer.input = (tiles[l])[t].input;
|
|
layer.output = (tiles[l])[t].output;
|
|
if (layers[l].previousOutput.data)
|
|
layer.previousOutput = (prevTiles[l])[t].input;
|
|
tlayers.push_back(layer);
|
|
}
|
|
|
|
OptixDenoiserGuideLayer gl = {};
|
|
if (guideLayer->albedo.data)
|
|
gl.albedo = albedoTiles[t].input;
|
|
|
|
if (guideLayer->normal.data)
|
|
gl.normal = normalTiles[t].input;
|
|
|
|
if (guideLayer->flow.data)
|
|
gl.flow = flowTiles[t].input;
|
|
|
|
if (const OptixResult res = optixDenoiserInvoke(denoiser,
|
|
stream,
|
|
params,
|
|
denoiserState,
|
|
denoiserStateSizeInBytes,
|
|
&gl,
|
|
&tlayers[0],
|
|
numLayers,
|
|
(tiles[0])[t].inputOffsetX,
|
|
(tiles[0])[t].inputOffsetY,
|
|
scratch,
|
|
scratchSizeInBytes))
|
|
return res;
|
|
}
|
|
return OPTIX_SUCCESS;
|
|
}
|
|
# endif
|
|
|
|
OptiXDenoiser::OptiXDenoiser(Device *path_trace_device, const DenoiseParams ¶ms)
|
|
: DenoiserGPU(path_trace_device, params), state_(path_trace_device, "__denoiser_state", true)
|
|
{
|
|
}
|
|
|
|
OptiXDenoiser::~OptiXDenoiser()
|
|
{
|
|
/* It is important that the OptixDenoiser handle is destroyed before the OptixDeviceContext
|
|
* handle, which is guaranteed since the local denoising device owning the OptiX device context
|
|
* is deleted as part of the Denoiser class destructor call after this. */
|
|
if (optix_denoiser_ != nullptr) {
|
|
optixDenoiserDestroy(optix_denoiser_);
|
|
}
|
|
}
|
|
|
|
uint OptiXDenoiser::get_device_type_mask() const
|
|
{
|
|
return DEVICE_MASK_OPTIX;
|
|
}
|
|
|
|
class OptiXDenoiser::DenoiseContext {
|
|
public:
|
|
explicit DenoiseContext(OptiXDevice *device, const DenoiseTask &task)
|
|
: denoise_params(task.params),
|
|
render_buffers(task.render_buffers),
|
|
buffer_params(task.buffer_params),
|
|
guiding_buffer(device, "denoiser guiding passes buffer", true),
|
|
num_samples(task.num_samples)
|
|
{
|
|
num_input_passes = 1;
|
|
if (denoise_params.use_pass_albedo) {
|
|
num_input_passes += 1;
|
|
use_pass_albedo = true;
|
|
pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO);
|
|
if (denoise_params.use_pass_normal) {
|
|
num_input_passes += 1;
|
|
use_pass_normal = true;
|
|
pass_denoising_normal = buffer_params.get_pass_offset(PASS_DENOISING_NORMAL);
|
|
}
|
|
}
|
|
|
|
if (denoise_params.temporally_stable) {
|
|
prev_output.device_pointer = render_buffers->buffer.device_pointer;
|
|
|
|
prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
|
|
|
|
prev_output.stride = buffer_params.stride;
|
|
prev_output.pass_stride = buffer_params.pass_stride;
|
|
|
|
num_input_passes += 1;
|
|
use_pass_motion = true;
|
|
pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
|
|
}
|
|
|
|
use_guiding_passes = (num_input_passes - 1) > 0;
|
|
|
|
if (use_guiding_passes) {
|
|
if (task.allow_inplace_modification) {
|
|
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
|
|
|
|
guiding_params.pass_albedo = pass_denoising_albedo;
|
|
guiding_params.pass_normal = pass_denoising_normal;
|
|
guiding_params.pass_flow = pass_motion;
|
|
|
|
guiding_params.stride = buffer_params.stride;
|
|
guiding_params.pass_stride = buffer_params.pass_stride;
|
|
}
|
|
else {
|
|
guiding_params.pass_stride = 0;
|
|
if (use_pass_albedo) {
|
|
guiding_params.pass_albedo = guiding_params.pass_stride;
|
|
guiding_params.pass_stride += 3;
|
|
}
|
|
if (use_pass_normal) {
|
|
guiding_params.pass_normal = guiding_params.pass_stride;
|
|
guiding_params.pass_stride += 3;
|
|
}
|
|
if (use_pass_motion) {
|
|
guiding_params.pass_flow = guiding_params.pass_stride;
|
|
guiding_params.pass_stride += 2;
|
|
}
|
|
|
|
guiding_params.stride = buffer_params.width;
|
|
|
|
guiding_buffer.alloc_to_device(buffer_params.width * buffer_params.height *
|
|
guiding_params.pass_stride);
|
|
guiding_params.device_pointer = guiding_buffer.device_pointer;
|
|
}
|
|
}
|
|
|
|
pass_sample_count = buffer_params.get_pass_offset(PASS_SAMPLE_COUNT);
|
|
}
|
|
|
|
const DenoiseParams &denoise_params;
|
|
|
|
RenderBuffers *render_buffers = nullptr;
|
|
const BufferParams &buffer_params;
|
|
|
|
/* Previous output. */
|
|
struct {
|
|
device_ptr device_pointer = 0;
|
|
|
|
int offset = PASS_UNUSED;
|
|
|
|
int stride = -1;
|
|
int pass_stride = -1;
|
|
} prev_output;
|
|
|
|
/* Device-side storage of the guiding passes. */
|
|
device_only_memory<float> guiding_buffer;
|
|
|
|
struct {
|
|
device_ptr device_pointer = 0;
|
|
|
|
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
|
|
int pass_albedo = PASS_UNUSED;
|
|
int pass_normal = PASS_UNUSED;
|
|
int pass_flow = PASS_UNUSED;
|
|
|
|
int stride = -1;
|
|
int pass_stride = -1;
|
|
} guiding_params;
|
|
|
|
/* Number of input passes. Including the color and extra auxiliary passes. */
|
|
int num_input_passes = 0;
|
|
bool use_guiding_passes = false;
|
|
bool use_pass_albedo = false;
|
|
bool use_pass_normal = false;
|
|
bool use_pass_motion = false;
|
|
|
|
int num_samples = 0;
|
|
|
|
int pass_sample_count = PASS_UNUSED;
|
|
|
|
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
|
|
int pass_denoising_albedo = PASS_UNUSED;
|
|
int pass_denoising_normal = PASS_UNUSED;
|
|
int pass_motion = PASS_UNUSED;
|
|
|
|
/* For passes which don't need albedo channel for denoising we replace the actual albedo with
|
|
* the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
|
|
* the fake values and denoising of passes which do need albedo can no longer happen. */
|
|
bool albedo_replaced_with_fake = false;
|
|
};
|
|
|
|
class OptiXDenoiser::DenoisePass {
|
|
public:
|
|
DenoisePass(const PassType type, const BufferParams &buffer_params) : type(type)
|
|
{
|
|
noisy_offset = buffer_params.get_pass_offset(type, PassMode::NOISY);
|
|
denoised_offset = buffer_params.get_pass_offset(type, PassMode::DENOISED);
|
|
|
|
const PassInfo pass_info = Pass::get_info(type);
|
|
num_components = pass_info.num_components;
|
|
use_compositing = pass_info.use_compositing;
|
|
use_denoising_albedo = pass_info.use_denoising_albedo;
|
|
}
|
|
|
|
PassType type;
|
|
|
|
int noisy_offset;
|
|
int denoised_offset;
|
|
|
|
int num_components;
|
|
bool use_compositing;
|
|
bool use_denoising_albedo;
|
|
};
|
|
|
|
bool OptiXDenoiser::denoise_buffer(const DenoiseTask &task)
|
|
{
|
|
OptiXDevice *const optix_device = static_cast<OptiXDevice *>(denoiser_device_);
|
|
|
|
const CUDAContextScope scope(optix_device);
|
|
|
|
DenoiseContext context(optix_device, task);
|
|
|
|
if (!denoise_ensure(context)) {
|
|
return false;
|
|
}
|
|
|
|
if (!denoise_filter_guiding_preprocess(context)) {
|
|
LOG(ERROR) << "Error preprocessing guiding passes.";
|
|
return false;
|
|
}
|
|
|
|
/* Passes which will use real albedo when it is available. */
|
|
denoise_pass(context, PASS_COMBINED);
|
|
denoise_pass(context, PASS_SHADOW_CATCHER_MATTE);
|
|
|
|
/* Passes which do not need albedo and hence if real is present it needs to become fake. */
|
|
denoise_pass(context, PASS_SHADOW_CATCHER);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool OptiXDenoiser::denoise_filter_guiding_preprocess(const DenoiseContext &context)
|
|
{
|
|
const BufferParams &buffer_params = context.buffer_params;
|
|
|
|
const int work_size = buffer_params.width * buffer_params.height;
|
|
|
|
DeviceKernelArguments args(&context.guiding_params.device_pointer,
|
|
&context.guiding_params.pass_stride,
|
|
&context.guiding_params.pass_albedo,
|
|
&context.guiding_params.pass_normal,
|
|
&context.guiding_params.pass_flow,
|
|
&context.render_buffers->buffer.device_pointer,
|
|
&buffer_params.offset,
|
|
&buffer_params.stride,
|
|
&buffer_params.pass_stride,
|
|
&context.pass_sample_count,
|
|
&context.pass_denoising_albedo,
|
|
&context.pass_denoising_normal,
|
|
&context.pass_motion,
|
|
&buffer_params.full_x,
|
|
&buffer_params.full_y,
|
|
&buffer_params.width,
|
|
&buffer_params.height,
|
|
&context.num_samples);
|
|
|
|
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
|
|
}
|
|
|
|
bool OptiXDenoiser::denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context)
|
|
{
|
|
const BufferParams &buffer_params = context.buffer_params;
|
|
|
|
const int work_size = buffer_params.width * buffer_params.height;
|
|
|
|
DeviceKernelArguments args(&context.guiding_params.device_pointer,
|
|
&context.guiding_params.pass_stride,
|
|
&context.guiding_params.pass_albedo,
|
|
&buffer_params.width,
|
|
&buffer_params.height);
|
|
|
|
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
|
|
}
|
|
|
|
void OptiXDenoiser::denoise_pass(DenoiseContext &context, PassType pass_type)
|
|
{
|
|
const BufferParams &buffer_params = context.buffer_params;
|
|
|
|
const DenoisePass pass(pass_type, buffer_params);
|
|
|
|
if (pass.noisy_offset == PASS_UNUSED) {
|
|
return;
|
|
}
|
|
if (pass.denoised_offset == PASS_UNUSED) {
|
|
LOG(DFATAL) << "Missing denoised pass " << pass_type_as_string(pass_type);
|
|
return;
|
|
}
|
|
|
|
if (pass.use_denoising_albedo) {
|
|
if (context.albedo_replaced_with_fake) {
|
|
LOG(ERROR) << "Pass which requires albedo is denoised after fake albedo has been set.";
|
|
return;
|
|
}
|
|
}
|
|
else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
|
|
context.albedo_replaced_with_fake = true;
|
|
if (!denoise_filter_guiding_set_fake_albedo(context)) {
|
|
LOG(ERROR) << "Error replacing real albedo with the fake one.";
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* Read and preprocess noisy color input pass. */
|
|
denoise_color_read(context, pass);
|
|
if (!denoise_filter_color_preprocess(context, pass)) {
|
|
LOG(ERROR) << "Error converting denoising passes to RGB buffer.";
|
|
return;
|
|
}
|
|
|
|
if (!denoise_run(context, pass)) {
|
|
LOG(ERROR) << "Error running OptiX denoiser.";
|
|
return;
|
|
}
|
|
|
|
/* Store result in the combined pass of the render buffer.
|
|
*
|
|
* This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */
|
|
if (!denoise_filter_color_postprocess(context, pass)) {
|
|
LOG(ERROR) << "Error copying denoiser result to the denoised pass.";
|
|
return;
|
|
}
|
|
|
|
denoiser_queue_->synchronize();
|
|
}
|
|
|
|
void OptiXDenoiser::denoise_color_read(const DenoiseContext &context, const DenoisePass &pass)
|
|
{
|
|
PassAccessor::PassAccessInfo pass_access_info;
|
|
pass_access_info.type = pass.type;
|
|
pass_access_info.mode = PassMode::NOISY;
|
|
pass_access_info.offset = pass.noisy_offset;
|
|
|
|
/* Denoiser operates on passes which are used to calculate the approximation, and is never used
|
|
* on the approximation. The latter is not even possible because OptiX does not support
|
|
* denoising of semi-transparent pixels. */
|
|
pass_access_info.use_approximate_shadow_catcher = false;
|
|
pass_access_info.use_approximate_shadow_catcher_background = false;
|
|
pass_access_info.show_active_pixels = false;
|
|
|
|
/* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
|
|
*/
|
|
const PassAccessorGPU pass_accessor(
|
|
denoiser_queue_.get(), pass_access_info, 1.0f, context.num_samples);
|
|
|
|
PassAccessor::Destination destination(pass_access_info.type);
|
|
destination.d_pixels = context.render_buffers->buffer.device_pointer +
|
|
pass.denoised_offset * sizeof(float);
|
|
destination.num_components = 3;
|
|
destination.pixel_stride = context.buffer_params.pass_stride;
|
|
|
|
BufferParams buffer_params = context.buffer_params;
|
|
buffer_params.window_x = 0;
|
|
buffer_params.window_y = 0;
|
|
buffer_params.window_width = buffer_params.width;
|
|
buffer_params.window_height = buffer_params.height;
|
|
|
|
pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
|
|
}
|
|
|
|
bool OptiXDenoiser::denoise_filter_color_preprocess(const DenoiseContext &context,
|
|
const DenoisePass &pass)
|
|
{
|
|
const BufferParams &buffer_params = context.buffer_params;
|
|
|
|
const int work_size = buffer_params.width * buffer_params.height;
|
|
|
|
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
|
|
&buffer_params.full_x,
|
|
&buffer_params.full_y,
|
|
&buffer_params.width,
|
|
&buffer_params.height,
|
|
&buffer_params.offset,
|
|
&buffer_params.stride,
|
|
&buffer_params.pass_stride,
|
|
&pass.denoised_offset);
|
|
|
|
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
|
|
}
|
|
|
|
bool OptiXDenoiser::denoise_filter_color_postprocess(const DenoiseContext &context,
|
|
const DenoisePass &pass)
|
|
{
|
|
const BufferParams &buffer_params = context.buffer_params;
|
|
|
|
const int work_size = buffer_params.width * buffer_params.height;
|
|
|
|
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
|
|
&buffer_params.full_x,
|
|
&buffer_params.full_y,
|
|
&buffer_params.width,
|
|
&buffer_params.height,
|
|
&buffer_params.offset,
|
|
&buffer_params.stride,
|
|
&buffer_params.pass_stride,
|
|
&context.num_samples,
|
|
&pass.noisy_offset,
|
|
&pass.denoised_offset,
|
|
&context.pass_sample_count,
|
|
&pass.num_components,
|
|
&pass.use_compositing);
|
|
|
|
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
|
|
}
|
|
|
|
bool OptiXDenoiser::denoise_ensure(DenoiseContext &context)
|
|
{
|
|
if (!denoise_create_if_needed(context)) {
|
|
LOG(ERROR) << "OptiX denoiser creation has failed.";
|
|
return false;
|
|
}
|
|
|
|
if (!denoise_configure_if_needed(context)) {
|
|
LOG(ERROR) << "OptiX denoiser configuration has failed.";
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool OptiXDenoiser::denoise_create_if_needed(DenoiseContext &context)
|
|
{
|
|
const bool recreate_denoiser = (optix_denoiser_ == nullptr) ||
|
|
(use_pass_albedo_ != context.use_pass_albedo) ||
|
|
(use_pass_normal_ != context.use_pass_normal) ||
|
|
(use_pass_motion_ != context.use_pass_motion);
|
|
if (!recreate_denoiser) {
|
|
return true;
|
|
}
|
|
|
|
/* Destroy existing handle before creating new one. */
|
|
if (optix_denoiser_) {
|
|
optixDenoiserDestroy(optix_denoiser_);
|
|
}
|
|
|
|
/* Create OptiX denoiser handle on demand when it is first used. */
|
|
OptixDenoiserOptions denoiser_options = {};
|
|
denoiser_options.guideAlbedo = context.use_pass_albedo;
|
|
denoiser_options.guideNormal = context.use_pass_normal;
|
|
|
|
OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
|
|
if (context.use_pass_motion) {
|
|
model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
|
|
}
|
|
|
|
const OptixResult result = optixDenoiserCreate(
|
|
static_cast<OptiXDevice *>(denoiser_device_)->context,
|
|
model,
|
|
&denoiser_options,
|
|
&optix_denoiser_);
|
|
|
|
if (result != OPTIX_SUCCESS) {
|
|
denoiser_device_->set_error("Failed to create OptiX denoiser");
|
|
return false;
|
|
}
|
|
|
|
/* OptiX denoiser handle was created with the requested number of input passes. */
|
|
use_pass_albedo_ = context.use_pass_albedo;
|
|
use_pass_normal_ = context.use_pass_normal;
|
|
use_pass_motion_ = context.use_pass_motion;
|
|
|
|
/* OptiX denoiser has been created, but it needs configuration. */
|
|
is_configured_ = false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool OptiXDenoiser::denoise_configure_if_needed(DenoiseContext &context)
|
|
{
|
|
/* Limit maximum tile size denoiser can be invoked with. */
|
|
const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
|
|
min(context.buffer_params.height, 4096));
|
|
|
|
if (is_configured_ && (configured_size_.x == tile_size.x && configured_size_.y == tile_size.y)) {
|
|
return true;
|
|
}
|
|
|
|
optix_device_assert(
|
|
denoiser_device_,
|
|
optixDenoiserComputeMemoryResources(optix_denoiser_, tile_size.x, tile_size.y, &sizes_));
|
|
|
|
/* Allocate denoiser state if tile size has changed since last setup. */
|
|
state_.device = denoiser_device_;
|
|
state_.alloc_to_device(sizes_.stateSizeInBytes + sizes_.withOverlapScratchSizeInBytes);
|
|
|
|
/* Initialize denoiser state for the current tile size. */
|
|
const OptixResult result = optixDenoiserSetup(
|
|
optix_denoiser_,
|
|
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
|
|
* on a stream that is not the default stream. */
|
|
tile_size.x + sizes_.overlapWindowSizeInPixels * 2,
|
|
tile_size.y + sizes_.overlapWindowSizeInPixels * 2,
|
|
state_.device_pointer,
|
|
sizes_.stateSizeInBytes,
|
|
state_.device_pointer + sizes_.stateSizeInBytes,
|
|
sizes_.withOverlapScratchSizeInBytes);
|
|
if (result != OPTIX_SUCCESS) {
|
|
denoiser_device_->set_error("Failed to set up OptiX denoiser");
|
|
return false;
|
|
}
|
|
|
|
cuda_device_assert(denoiser_device_, cuCtxSynchronize());
|
|
|
|
is_configured_ = true;
|
|
configured_size_ = tile_size;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool OptiXDenoiser::denoise_run(const DenoiseContext &context, const DenoisePass &pass)
|
|
{
|
|
const BufferParams &buffer_params = context.buffer_params;
|
|
const int width = buffer_params.width;
|
|
const int height = buffer_params.height;
|
|
|
|
/* Set up input and output layer information. */
|
|
OptixImage2D color_layer = {0};
|
|
OptixImage2D albedo_layer = {0};
|
|
OptixImage2D normal_layer = {0};
|
|
OptixImage2D flow_layer = {0};
|
|
|
|
OptixImage2D output_layer = {0};
|
|
OptixImage2D prev_output_layer = {0};
|
|
|
|
/* Color pass. */
|
|
{
|
|
const int pass_denoised = pass.denoised_offset;
|
|
const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float);
|
|
|
|
color_layer.data = context.render_buffers->buffer.device_pointer +
|
|
pass_denoised * sizeof(float);
|
|
color_layer.width = width;
|
|
color_layer.height = height;
|
|
color_layer.rowStrideInBytes = pass_stride_in_bytes * context.buffer_params.stride;
|
|
color_layer.pixelStrideInBytes = pass_stride_in_bytes;
|
|
color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
|
}
|
|
|
|
/* Previous output. */
|
|
if (context.prev_output.offset != PASS_UNUSED) {
|
|
const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
|
|
|
|
prev_output_layer.data = context.prev_output.device_pointer +
|
|
context.prev_output.offset * sizeof(float);
|
|
prev_output_layer.width = width;
|
|
prev_output_layer.height = height;
|
|
prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
|
|
prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
|
|
prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
|
}
|
|
|
|
/* Optional albedo and color passes. */
|
|
if (context.num_input_passes > 1) {
|
|
const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
|
|
const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float);
|
|
const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes;
|
|
|
|
if (context.use_pass_albedo) {
|
|
albedo_layer.data = d_guiding_buffer + context.guiding_params.pass_albedo * sizeof(float);
|
|
albedo_layer.width = width;
|
|
albedo_layer.height = height;
|
|
albedo_layer.rowStrideInBytes = row_stride_in_bytes;
|
|
albedo_layer.pixelStrideInBytes = pixel_stride_in_bytes;
|
|
albedo_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
|
}
|
|
|
|
if (context.use_pass_normal) {
|
|
normal_layer.data = d_guiding_buffer + context.guiding_params.pass_normal * sizeof(float);
|
|
normal_layer.width = width;
|
|
normal_layer.height = height;
|
|
normal_layer.rowStrideInBytes = row_stride_in_bytes;
|
|
normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
|
|
normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
|
|
}
|
|
|
|
if (context.use_pass_motion) {
|
|
flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
|
|
flow_layer.width = width;
|
|
flow_layer.height = height;
|
|
flow_layer.rowStrideInBytes = row_stride_in_bytes;
|
|
flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
|
|
flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
|
|
}
|
|
}
|
|
|
|
/* Denoise in-place of the noisy input in the render buffers. */
|
|
output_layer = color_layer;
|
|
|
|
OptixDenoiserGuideLayer guide_layers = {};
|
|
guide_layers.albedo = albedo_layer;
|
|
guide_layers.normal = normal_layer;
|
|
guide_layers.flow = flow_layer;
|
|
|
|
OptixDenoiserLayer image_layers = {};
|
|
image_layers.input = color_layer;
|
|
image_layers.previousOutput = prev_output_layer;
|
|
image_layers.output = output_layer;
|
|
|
|
/* Finally run denoising. */
|
|
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
|
|
|
|
optix_device_assert(denoiser_device_,
|
|
ccl::optixUtilDenoiserInvokeTiled(
|
|
optix_denoiser_,
|
|
static_cast<OptiXDeviceQueue *>(denoiser_queue_.get())->stream(),
|
|
¶ms,
|
|
state_.device_pointer,
|
|
sizes_.stateSizeInBytes,
|
|
&guide_layers,
|
|
&image_layers,
|
|
1,
|
|
state_.device_pointer + sizes_.stateSizeInBytes,
|
|
sizes_.withOverlapScratchSizeInBytes,
|
|
sizes_.overlapWindowSizeInPixels,
|
|
configured_size_.x,
|
|
configured_size_.y));
|
|
|
|
return true;
|
|
}
|
|
|
|
CCL_NAMESPACE_END
|
|
|
|
#endif
|