This repository has been archived on 2023-10-09. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
blender-archive/intern/cycles/integrator/denoiser_optix.cpp
Patrick Mours a859837cde Cleanup: Move OptiX denoiser code from device into denoiser class
Cycles already treats denoising fairly separate in its code, with a
dedicated `Denoiser` base class used to describe denoising
behavior. That class has been fully implemented for OIDN
(`denoiser_oidn.cpp`), but for OptiX was mostly empty
(`denoiser_optix.cpp`) and denoising was instead implemented in
the OptiX device. That meant denoising code was split over various
files and directories, making it a bit awkward to work with. This
patch moves the OptiX denoising implementation into the existing
`OptiXDenoiser` class, so that everything is in one place. There are
no functional changes, code has been mostly moved as-is. To
retain support for potential other denoiser implementations based
on a GPU device in the future, the `DeviceDenoiser` base class was
kept and slightly extended (and its file renamed to
`denoiser_gpu.cpp` to follow similar naming rules as
`path_trace_work_*.cpp`).

Differential Revision: https://developer.blender.org/D16502
2022-11-15 15:50:01 +01:00

787 lines
30 KiB
C++

/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#ifdef WITH_OPTIX
# include "integrator/denoiser_optix.h"
# include "integrator/pass_accessor_gpu.h"
# include "device/optix/device_impl.h"
# include "device/optix/queue.h"
# include <optix_denoiser_tiling.h>
CCL_NAMESPACE_BEGIN
# if OPTIX_ABI_VERSION >= 60
using ::optixUtilDenoiserInvokeTiled;
# else
// A minimal copy of functionality `optix_denoiser_tiling.h` which allows to fix integer overflow
// issues without bumping SDK or driver requirement.
//
// The original code is Copyright NVIDIA Corporation, BSD-3-Clause.
static OptixResult optixUtilDenoiserSplitImage(const OptixImage2D &input,
const OptixImage2D &output,
unsigned int overlapWindowSizeInPixels,
unsigned int tileWidth,
unsigned int tileHeight,
std::vector<OptixUtilDenoiserImageTile> &tiles)
{
if (tileWidth == 0 || tileHeight == 0)
return OPTIX_ERROR_INVALID_VALUE;
unsigned int inPixelStride = optixUtilGetPixelStride(input);
unsigned int outPixelStride = optixUtilGetPixelStride(output);
int inp_w = std::min(tileWidth + 2 * overlapWindowSizeInPixels, input.width);
int inp_h = std::min(tileHeight + 2 * overlapWindowSizeInPixels, input.height);
int inp_y = 0, copied_y = 0;
do {
int inputOffsetY = inp_y == 0 ? 0 :
std::max((int)overlapWindowSizeInPixels,
inp_h - ((int)input.height - inp_y));
int copy_y = inp_y == 0 ? std::min(input.height, tileHeight + overlapWindowSizeInPixels) :
std::min(tileHeight, input.height - copied_y);
int inp_x = 0, copied_x = 0;
do {
int inputOffsetX = inp_x == 0 ? 0 :
std::max((int)overlapWindowSizeInPixels,
inp_w - ((int)input.width - inp_x));
int copy_x = inp_x == 0 ? std::min(input.width, tileWidth + overlapWindowSizeInPixels) :
std::min(tileWidth, input.width - copied_x);
OptixUtilDenoiserImageTile tile;
tile.input.data = input.data + (size_t)(inp_y - inputOffsetY) * input.rowStrideInBytes +
+(size_t)(inp_x - inputOffsetX) * inPixelStride;
tile.input.width = inp_w;
tile.input.height = inp_h;
tile.input.rowStrideInBytes = input.rowStrideInBytes;
tile.input.pixelStrideInBytes = input.pixelStrideInBytes;
tile.input.format = input.format;
tile.output.data = output.data + (size_t)inp_y * output.rowStrideInBytes +
(size_t)inp_x * outPixelStride;
tile.output.width = copy_x;
tile.output.height = copy_y;
tile.output.rowStrideInBytes = output.rowStrideInBytes;
tile.output.pixelStrideInBytes = output.pixelStrideInBytes;
tile.output.format = output.format;
tile.inputOffsetX = inputOffsetX;
tile.inputOffsetY = inputOffsetY;
tiles.push_back(tile);
inp_x += inp_x == 0 ? tileWidth + overlapWindowSizeInPixels : tileWidth;
copied_x += copy_x;
} while (inp_x < static_cast<int>(input.width));
inp_y += inp_y == 0 ? tileHeight + overlapWindowSizeInPixels : tileHeight;
copied_y += copy_y;
} while (inp_y < static_cast<int>(input.height));
return OPTIX_SUCCESS;
}
static OptixResult optixUtilDenoiserInvokeTiled(OptixDenoiser denoiser,
CUstream stream,
const OptixDenoiserParams *params,
CUdeviceptr denoiserState,
size_t denoiserStateSizeInBytes,
const OptixDenoiserGuideLayer *guideLayer,
const OptixDenoiserLayer *layers,
unsigned int numLayers,
CUdeviceptr scratch,
size_t scratchSizeInBytes,
unsigned int overlapWindowSizeInPixels,
unsigned int tileWidth,
unsigned int tileHeight)
{
if (!guideLayer || !layers)
return OPTIX_ERROR_INVALID_VALUE;
std::vector<std::vector<OptixUtilDenoiserImageTile>> tiles(numLayers);
std::vector<std::vector<OptixUtilDenoiserImageTile>> prevTiles(numLayers);
for (unsigned int l = 0; l < numLayers; l++) {
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].input,
layers[l].output,
overlapWindowSizeInPixels,
tileWidth,
tileHeight,
tiles[l]))
return res;
if (layers[l].previousOutput.data) {
OptixImage2D dummyOutput = layers[l].previousOutput;
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(layers[l].previousOutput,
dummyOutput,
overlapWindowSizeInPixels,
tileWidth,
tileHeight,
prevTiles[l]))
return res;
}
}
std::vector<OptixUtilDenoiserImageTile> albedoTiles;
if (guideLayer->albedo.data) {
OptixImage2D dummyOutput = guideLayer->albedo;
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->albedo,
dummyOutput,
overlapWindowSizeInPixels,
tileWidth,
tileHeight,
albedoTiles))
return res;
}
std::vector<OptixUtilDenoiserImageTile> normalTiles;
if (guideLayer->normal.data) {
OptixImage2D dummyOutput = guideLayer->normal;
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->normal,
dummyOutput,
overlapWindowSizeInPixels,
tileWidth,
tileHeight,
normalTiles))
return res;
}
std::vector<OptixUtilDenoiserImageTile> flowTiles;
if (guideLayer->flow.data) {
OptixImage2D dummyOutput = guideLayer->flow;
if (const OptixResult res = ccl::optixUtilDenoiserSplitImage(guideLayer->flow,
dummyOutput,
overlapWindowSizeInPixels,
tileWidth,
tileHeight,
flowTiles))
return res;
}
for (size_t t = 0; t < tiles[0].size(); t++) {
std::vector<OptixDenoiserLayer> tlayers;
for (unsigned int l = 0; l < numLayers; l++) {
OptixDenoiserLayer layer = {};
layer.input = (tiles[l])[t].input;
layer.output = (tiles[l])[t].output;
if (layers[l].previousOutput.data)
layer.previousOutput = (prevTiles[l])[t].input;
tlayers.push_back(layer);
}
OptixDenoiserGuideLayer gl = {};
if (guideLayer->albedo.data)
gl.albedo = albedoTiles[t].input;
if (guideLayer->normal.data)
gl.normal = normalTiles[t].input;
if (guideLayer->flow.data)
gl.flow = flowTiles[t].input;
if (const OptixResult res = optixDenoiserInvoke(denoiser,
stream,
params,
denoiserState,
denoiserStateSizeInBytes,
&gl,
&tlayers[0],
numLayers,
(tiles[0])[t].inputOffsetX,
(tiles[0])[t].inputOffsetY,
scratch,
scratchSizeInBytes))
return res;
}
return OPTIX_SUCCESS;
}
# endif
OptiXDenoiser::OptiXDenoiser(Device *path_trace_device, const DenoiseParams &params)
: DenoiserGPU(path_trace_device, params), state_(path_trace_device, "__denoiser_state", true)
{
}
OptiXDenoiser::~OptiXDenoiser()
{
/* It is important that the OptixDenoiser handle is destroyed before the OptixDeviceContext
* handle, which is guaranteed since the local denoising device owning the OptiX device context
* is deleted as part of the Denoiser class destructor call after this. */
if (optix_denoiser_ != nullptr) {
optixDenoiserDestroy(optix_denoiser_);
}
}
uint OptiXDenoiser::get_device_type_mask() const
{
return DEVICE_MASK_OPTIX;
}
class OptiXDenoiser::DenoiseContext {
public:
explicit DenoiseContext(OptiXDevice *device, const DenoiseTask &task)
: denoise_params(task.params),
render_buffers(task.render_buffers),
buffer_params(task.buffer_params),
guiding_buffer(device, "denoiser guiding passes buffer", true),
num_samples(task.num_samples)
{
num_input_passes = 1;
if (denoise_params.use_pass_albedo) {
num_input_passes += 1;
use_pass_albedo = true;
pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO);
if (denoise_params.use_pass_normal) {
num_input_passes += 1;
use_pass_normal = true;
pass_denoising_normal = buffer_params.get_pass_offset(PASS_DENOISING_NORMAL);
}
}
if (denoise_params.temporally_stable) {
prev_output.device_pointer = render_buffers->buffer.device_pointer;
prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
prev_output.stride = buffer_params.stride;
prev_output.pass_stride = buffer_params.pass_stride;
num_input_passes += 1;
use_pass_motion = true;
pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
}
use_guiding_passes = (num_input_passes - 1) > 0;
if (use_guiding_passes) {
if (task.allow_inplace_modification) {
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
guiding_params.pass_albedo = pass_denoising_albedo;
guiding_params.pass_normal = pass_denoising_normal;
guiding_params.pass_flow = pass_motion;
guiding_params.stride = buffer_params.stride;
guiding_params.pass_stride = buffer_params.pass_stride;
}
else {
guiding_params.pass_stride = 0;
if (use_pass_albedo) {
guiding_params.pass_albedo = guiding_params.pass_stride;
guiding_params.pass_stride += 3;
}
if (use_pass_normal) {
guiding_params.pass_normal = guiding_params.pass_stride;
guiding_params.pass_stride += 3;
}
if (use_pass_motion) {
guiding_params.pass_flow = guiding_params.pass_stride;
guiding_params.pass_stride += 2;
}
guiding_params.stride = buffer_params.width;
guiding_buffer.alloc_to_device(buffer_params.width * buffer_params.height *
guiding_params.pass_stride);
guiding_params.device_pointer = guiding_buffer.device_pointer;
}
}
pass_sample_count = buffer_params.get_pass_offset(PASS_SAMPLE_COUNT);
}
const DenoiseParams &denoise_params;
RenderBuffers *render_buffers = nullptr;
const BufferParams &buffer_params;
/* Previous output. */
struct {
device_ptr device_pointer = 0;
int offset = PASS_UNUSED;
int stride = -1;
int pass_stride = -1;
} prev_output;
/* Device-side storage of the guiding passes. */
device_only_memory<float> guiding_buffer;
struct {
device_ptr device_pointer = 0;
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
int pass_albedo = PASS_UNUSED;
int pass_normal = PASS_UNUSED;
int pass_flow = PASS_UNUSED;
int stride = -1;
int pass_stride = -1;
} guiding_params;
/* Number of input passes. Including the color and extra auxiliary passes. */
int num_input_passes = 0;
bool use_guiding_passes = false;
bool use_pass_albedo = false;
bool use_pass_normal = false;
bool use_pass_motion = false;
int num_samples = 0;
int pass_sample_count = PASS_UNUSED;
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
int pass_denoising_albedo = PASS_UNUSED;
int pass_denoising_normal = PASS_UNUSED;
int pass_motion = PASS_UNUSED;
/* For passes which don't need albedo channel for denoising we replace the actual albedo with
* the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
* the fake values and denoising of passes which do need albedo can no longer happen. */
bool albedo_replaced_with_fake = false;
};
class OptiXDenoiser::DenoisePass {
public:
DenoisePass(const PassType type, const BufferParams &buffer_params) : type(type)
{
noisy_offset = buffer_params.get_pass_offset(type, PassMode::NOISY);
denoised_offset = buffer_params.get_pass_offset(type, PassMode::DENOISED);
const PassInfo pass_info = Pass::get_info(type);
num_components = pass_info.num_components;
use_compositing = pass_info.use_compositing;
use_denoising_albedo = pass_info.use_denoising_albedo;
}
PassType type;
int noisy_offset;
int denoised_offset;
int num_components;
bool use_compositing;
bool use_denoising_albedo;
};
bool OptiXDenoiser::denoise_buffer(const DenoiseTask &task)
{
OptiXDevice *const optix_device = static_cast<OptiXDevice *>(denoiser_device_);
const CUDAContextScope scope(optix_device);
DenoiseContext context(optix_device, task);
if (!denoise_ensure(context)) {
return false;
}
if (!denoise_filter_guiding_preprocess(context)) {
LOG(ERROR) << "Error preprocessing guiding passes.";
return false;
}
/* Passes which will use real albedo when it is available. */
denoise_pass(context, PASS_COMBINED);
denoise_pass(context, PASS_SHADOW_CATCHER_MATTE);
/* Passes which do not need albedo and hence if real is present it needs to become fake. */
denoise_pass(context, PASS_SHADOW_CATCHER);
return true;
}
bool OptiXDenoiser::denoise_filter_guiding_preprocess(const DenoiseContext &context)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.guiding_params.device_pointer,
&context.guiding_params.pass_stride,
&context.guiding_params.pass_albedo,
&context.guiding_params.pass_normal,
&context.guiding_params.pass_flow,
&context.render_buffers->buffer.device_pointer,
&buffer_params.offset,
&buffer_params.stride,
&buffer_params.pass_stride,
&context.pass_sample_count,
&context.pass_denoising_albedo,
&context.pass_denoising_normal,
&context.pass_motion,
&buffer_params.full_x,
&buffer_params.full_y,
&buffer_params.width,
&buffer_params.height,
&context.num_samples);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
}
bool OptiXDenoiser::denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.guiding_params.device_pointer,
&context.guiding_params.pass_stride,
&context.guiding_params.pass_albedo,
&buffer_params.width,
&buffer_params.height);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
}
void OptiXDenoiser::denoise_pass(DenoiseContext &context, PassType pass_type)
{
const BufferParams &buffer_params = context.buffer_params;
const DenoisePass pass(pass_type, buffer_params);
if (pass.noisy_offset == PASS_UNUSED) {
return;
}
if (pass.denoised_offset == PASS_UNUSED) {
LOG(DFATAL) << "Missing denoised pass " << pass_type_as_string(pass_type);
return;
}
if (pass.use_denoising_albedo) {
if (context.albedo_replaced_with_fake) {
LOG(ERROR) << "Pass which requires albedo is denoised after fake albedo has been set.";
return;
}
}
else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
context.albedo_replaced_with_fake = true;
if (!denoise_filter_guiding_set_fake_albedo(context)) {
LOG(ERROR) << "Error replacing real albedo with the fake one.";
return;
}
}
/* Read and preprocess noisy color input pass. */
denoise_color_read(context, pass);
if (!denoise_filter_color_preprocess(context, pass)) {
LOG(ERROR) << "Error converting denoising passes to RGB buffer.";
return;
}
if (!denoise_run(context, pass)) {
LOG(ERROR) << "Error running OptiX denoiser.";
return;
}
/* Store result in the combined pass of the render buffer.
*
* This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */
if (!denoise_filter_color_postprocess(context, pass)) {
LOG(ERROR) << "Error copying denoiser result to the denoised pass.";
return;
}
denoiser_queue_->synchronize();
}
void OptiXDenoiser::denoise_color_read(const DenoiseContext &context, const DenoisePass &pass)
{
PassAccessor::PassAccessInfo pass_access_info;
pass_access_info.type = pass.type;
pass_access_info.mode = PassMode::NOISY;
pass_access_info.offset = pass.noisy_offset;
/* Denoiser operates on passes which are used to calculate the approximation, and is never used
* on the approximation. The latter is not even possible because OptiX does not support
* denoising of semi-transparent pixels. */
pass_access_info.use_approximate_shadow_catcher = false;
pass_access_info.use_approximate_shadow_catcher_background = false;
pass_access_info.show_active_pixels = false;
/* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
*/
const PassAccessorGPU pass_accessor(
denoiser_queue_.get(), pass_access_info, 1.0f, context.num_samples);
PassAccessor::Destination destination(pass_access_info.type);
destination.d_pixels = context.render_buffers->buffer.device_pointer +
pass.denoised_offset * sizeof(float);
destination.num_components = 3;
destination.pixel_stride = context.buffer_params.pass_stride;
BufferParams buffer_params = context.buffer_params;
buffer_params.window_x = 0;
buffer_params.window_y = 0;
buffer_params.window_width = buffer_params.width;
buffer_params.window_height = buffer_params.height;
pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
}
bool OptiXDenoiser::denoise_filter_color_preprocess(const DenoiseContext &context,
const DenoisePass &pass)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
&buffer_params.full_x,
&buffer_params.full_y,
&buffer_params.width,
&buffer_params.height,
&buffer_params.offset,
&buffer_params.stride,
&buffer_params.pass_stride,
&pass.denoised_offset);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
}
bool OptiXDenoiser::denoise_filter_color_postprocess(const DenoiseContext &context,
const DenoisePass &pass)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
&buffer_params.full_x,
&buffer_params.full_y,
&buffer_params.width,
&buffer_params.height,
&buffer_params.offset,
&buffer_params.stride,
&buffer_params.pass_stride,
&context.num_samples,
&pass.noisy_offset,
&pass.denoised_offset,
&context.pass_sample_count,
&pass.num_components,
&pass.use_compositing);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
}
bool OptiXDenoiser::denoise_ensure(DenoiseContext &context)
{
if (!denoise_create_if_needed(context)) {
LOG(ERROR) << "OptiX denoiser creation has failed.";
return false;
}
if (!denoise_configure_if_needed(context)) {
LOG(ERROR) << "OptiX denoiser configuration has failed.";
return false;
}
return true;
}
bool OptiXDenoiser::denoise_create_if_needed(DenoiseContext &context)
{
const bool recreate_denoiser = (optix_denoiser_ == nullptr) ||
(use_pass_albedo_ != context.use_pass_albedo) ||
(use_pass_normal_ != context.use_pass_normal) ||
(use_pass_motion_ != context.use_pass_motion);
if (!recreate_denoiser) {
return true;
}
/* Destroy existing handle before creating new one. */
if (optix_denoiser_) {
optixDenoiserDestroy(optix_denoiser_);
}
/* Create OptiX denoiser handle on demand when it is first used. */
OptixDenoiserOptions denoiser_options = {};
denoiser_options.guideAlbedo = context.use_pass_albedo;
denoiser_options.guideNormal = context.use_pass_normal;
OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
if (context.use_pass_motion) {
model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
}
const OptixResult result = optixDenoiserCreate(
static_cast<OptiXDevice *>(denoiser_device_)->context,
model,
&denoiser_options,
&optix_denoiser_);
if (result != OPTIX_SUCCESS) {
denoiser_device_->set_error("Failed to create OptiX denoiser");
return false;
}
/* OptiX denoiser handle was created with the requested number of input passes. */
use_pass_albedo_ = context.use_pass_albedo;
use_pass_normal_ = context.use_pass_normal;
use_pass_motion_ = context.use_pass_motion;
/* OptiX denoiser has been created, but it needs configuration. */
is_configured_ = false;
return true;
}
bool OptiXDenoiser::denoise_configure_if_needed(DenoiseContext &context)
{
/* Limit maximum tile size denoiser can be invoked with. */
const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
min(context.buffer_params.height, 4096));
if (is_configured_ && (configured_size_.x == tile_size.x && configured_size_.y == tile_size.y)) {
return true;
}
optix_device_assert(
denoiser_device_,
optixDenoiserComputeMemoryResources(optix_denoiser_, tile_size.x, tile_size.y, &sizes_));
/* Allocate denoiser state if tile size has changed since last setup. */
state_.device = denoiser_device_;
state_.alloc_to_device(sizes_.stateSizeInBytes + sizes_.withOverlapScratchSizeInBytes);
/* Initialize denoiser state for the current tile size. */
const OptixResult result = optixDenoiserSetup(
optix_denoiser_,
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
* on a stream that is not the default stream. */
tile_size.x + sizes_.overlapWindowSizeInPixels * 2,
tile_size.y + sizes_.overlapWindowSizeInPixels * 2,
state_.device_pointer,
sizes_.stateSizeInBytes,
state_.device_pointer + sizes_.stateSizeInBytes,
sizes_.withOverlapScratchSizeInBytes);
if (result != OPTIX_SUCCESS) {
denoiser_device_->set_error("Failed to set up OptiX denoiser");
return false;
}
cuda_device_assert(denoiser_device_, cuCtxSynchronize());
is_configured_ = true;
configured_size_ = tile_size;
return true;
}
bool OptiXDenoiser::denoise_run(const DenoiseContext &context, const DenoisePass &pass)
{
const BufferParams &buffer_params = context.buffer_params;
const int width = buffer_params.width;
const int height = buffer_params.height;
/* Set up input and output layer information. */
OptixImage2D color_layer = {0};
OptixImage2D albedo_layer = {0};
OptixImage2D normal_layer = {0};
OptixImage2D flow_layer = {0};
OptixImage2D output_layer = {0};
OptixImage2D prev_output_layer = {0};
/* Color pass. */
{
const int pass_denoised = pass.denoised_offset;
const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float);
color_layer.data = context.render_buffers->buffer.device_pointer +
pass_denoised * sizeof(float);
color_layer.width = width;
color_layer.height = height;
color_layer.rowStrideInBytes = pass_stride_in_bytes * context.buffer_params.stride;
color_layer.pixelStrideInBytes = pass_stride_in_bytes;
color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
}
/* Previous output. */
if (context.prev_output.offset != PASS_UNUSED) {
const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
prev_output_layer.data = context.prev_output.device_pointer +
context.prev_output.offset * sizeof(float);
prev_output_layer.width = width;
prev_output_layer.height = height;
prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
}
/* Optional albedo and color passes. */
if (context.num_input_passes > 1) {
const device_ptr d_guiding_buffer = context.guiding_params.device_pointer;
const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float);
const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes;
if (context.use_pass_albedo) {
albedo_layer.data = d_guiding_buffer + context.guiding_params.pass_albedo * sizeof(float);
albedo_layer.width = width;
albedo_layer.height = height;
albedo_layer.rowStrideInBytes = row_stride_in_bytes;
albedo_layer.pixelStrideInBytes = pixel_stride_in_bytes;
albedo_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
}
if (context.use_pass_normal) {
normal_layer.data = d_guiding_buffer + context.guiding_params.pass_normal * sizeof(float);
normal_layer.width = width;
normal_layer.height = height;
normal_layer.rowStrideInBytes = row_stride_in_bytes;
normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
}
if (context.use_pass_motion) {
flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
flow_layer.width = width;
flow_layer.height = height;
flow_layer.rowStrideInBytes = row_stride_in_bytes;
flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
}
}
/* Denoise in-place of the noisy input in the render buffers. */
output_layer = color_layer;
OptixDenoiserGuideLayer guide_layers = {};
guide_layers.albedo = albedo_layer;
guide_layers.normal = normal_layer;
guide_layers.flow = flow_layer;
OptixDenoiserLayer image_layers = {};
image_layers.input = color_layer;
image_layers.previousOutput = prev_output_layer;
image_layers.output = output_layer;
/* Finally run denoising. */
OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
optix_device_assert(denoiser_device_,
ccl::optixUtilDenoiserInvokeTiled(
optix_denoiser_,
static_cast<OptiXDeviceQueue *>(denoiser_queue_.get())->stream(),
&params,
state_.device_pointer,
sizes_.stateSizeInBytes,
&guide_layers,
&image_layers,
1,
state_.device_pointer + sizes_.stateSizeInBytes,
sizes_.withOverlapScratchSizeInBytes,
sizes_.overlapWindowSizeInPixels,
configured_size_.x,
configured_size_.y));
return true;
}
CCL_NAMESPACE_END
#endif