diff --git a/source/blender/compositor/operations/COM_DenoiseOperation.cc b/source/blender/compositor/operations/COM_DenoiseOperation.cc index c11f798fad4..89acc403484 100644 --- a/source/blender/compositor/operations/COM_DenoiseOperation.cc +++ b/source/blender/compositor/operations/COM_DenoiseOperation.cc @@ -44,6 +44,10 @@ class DenoiseFilter { oidn::DeviceRef device_; oidn::FilterRef filter_; bool initialized_ = false; + bool system_memory_supported_ = true; + std::vector buffers_; + oidn::BufferRef oidn_output_; + MemoryBuffer *output_ = nullptr; #endif public: @@ -60,7 +64,11 @@ class DenoiseFilter { * nonetheless. */ BLI_mutex_lock(&oidn_lock); - device_ = oidn::newDevice(oidn::DeviceType::CPU); + device_ = oidn::newDevice(); +#if OIDN_VERSION_MAJOR >= 2 + system_memory_supported_ = device_.get("systemMemorySupported"); + if (device_.get("type") == (int)oidn::DeviceType::CPU) +#endif device_.set("setAffinity", false); device_.commit(); filter_ = device_.newFilter("RT"); @@ -79,8 +87,24 @@ class DenoiseFilter { { BLI_assert(initialized_); BLI_assert(!buffer->is_a_single_elem()); + oidn::BufferRef oidn_buffer; + size_t buffer_len = buffer->get_elem_bytes_len() * buffer->get_width() * buffer->get_height(); + if (system_memory_supported_) { + oidn_buffer = device_.newBuffer(buffer->get_buffer(), buffer_len); + } +#if OIDN_VERSION_MAJOR >= 2 + else { + oidn_buffer = device_.newBuffer(buffer_len); + oidn_buffer.write(0, buffer_len, buffer->get_buffer()); + if (name == "output") { + oidn_output_ = oidn_buffer; + output_ = buffer; + } + } +#endif + buffers_.emplace_back(oidn_buffer); filter_.setImage(name.data(), - buffer->get_buffer(), + oidn_buffer, oidn::Format::Float3, buffer->get_width(), buffer->get_height(), @@ -99,6 +123,13 @@ class DenoiseFilter { BLI_assert(initialized_); filter_.commit(); filter_.execute(); +#if OIDN_VERSION_MAJOR >= 2 + if (!system_memory_supported_ && output_ && oidn_output_) { + size_t buffer_len = output_->get_elem_bytes_len() * output_->get_width() * output_->get_height(); + assert(buffer_len == oidn_output_.getSize()); + oidn_output_.read(0, buffer_len, output_->get_buffer()); + } +#endif } #else diff --git a/source/blender/nodes/composite/nodes/node_composite_denoise.cc b/source/blender/nodes/composite/nodes/node_composite_denoise.cc index 573f660c7fa..4f01a424868 100644 --- a/source/blender/nodes/composite/nodes/node_composite_denoise.cc +++ b/source/blender/nodes/composite/nodes/node_composite_denoise.cc @@ -102,7 +102,12 @@ class DenoiseOperation : public NodeOperation { } #ifdef WITH_OPENIMAGEDENOISE - oidn::DeviceRef device = oidn::newDevice(oidn::DeviceType::CPU); + oidn::DeviceRef device; + bool host_buffer = false; + device = oidn::newDevice(); + + host_buffer = device.get("systemMemorySupported"); + device.commit(); const int width = input_image.domain().size.x; @@ -110,13 +115,23 @@ class DenoiseOperation : public NodeOperation { const int pixel_stride = sizeof(float) * 4; const eGPUDataFormat data_format = GPU_DATA_FLOAT; + const size_t buffer_size = width * height * pixel_stride; + /* Download the input texture and set it as both the input and output of the filter to denoise * it in-place. */ GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE); float *color = static_cast(GPU_texture_read(input_image, data_format, 0)); + oidn::BufferRef color_buffer; + if (host_buffer) { + color_buffer = device.newBuffer(color, buffer_size); + } + else { + color_buffer = device.newBuffer(buffer_size, oidn::Storage::Device); + color_buffer.write(0, buffer_size, color); + } oidn::FilterRef filter = device.newFilter("RT"); - filter.setImage("color", color, oidn::Format::Float3, width, height, 0, pixel_stride); - filter.setImage("output", color, oidn::Format::Float3, width, height, 0, pixel_stride); + filter.setImage("color", color_buffer, oidn::Format::Float3, width, height, 0, pixel_stride); + filter.setImage("output", color_buffer, oidn::Format::Float3, width, height, 0, pixel_stride); filter.set("hdr", use_hdr()); filter.set("cleanAux", auxiliary_passes_are_clean()); filter.setProgressMonitorFunction(oidn_progress_monitor_function, &context()); @@ -127,19 +142,26 @@ class DenoiseOperation : public NodeOperation { Result &input_albedo = get_input("Albedo"); if (!input_albedo.is_single_value()) { albedo = static_cast(GPU_texture_read(input_albedo, data_format, 0)); - + oidn::BufferRef albedo_buffer; + if (host_buffer) { + albedo_buffer = device.newBuffer(albedo, buffer_size); + } + else { + albedo_buffer = device.newBuffer(buffer_size, oidn::Storage::Device); + albedo_buffer.write(0, buffer_size, albedo); + } if (should_denoise_auxiliary_passes()) { oidn::FilterRef albedoFilter = device.newFilter("RT"); albedoFilter.setImage( - "albedo", albedo, oidn::Format::Float3, width, height, 0, pixel_stride); + "albedo", albedo_buffer, oidn::Format::Float3, width, height, 0, pixel_stride); albedoFilter.setImage( - "output", albedo, oidn::Format::Float3, width, height, 0, pixel_stride); + "output", albedo_buffer, oidn::Format::Float3, width, height, 0, pixel_stride); albedoFilter.setProgressMonitorFunction(oidn_progress_monitor_function, &context()); albedoFilter.commit(); albedoFilter.execute(); } - filter.setImage("albedo", albedo, oidn::Format::Float3, width, height, 0, pixel_stride); + filter.setImage("albedo", albedo_buffer, oidn::Format::Float3, width, height, 0, pixel_stride); } /* If the albedo and normal inputs are not single value inputs, download the normal texture, @@ -150,24 +172,34 @@ class DenoiseOperation : public NodeOperation { Result &input_normal = get_input("Normal"); if (albedo && !input_normal.is_single_value()) { normal = static_cast(GPU_texture_read(input_normal, data_format, 0)); - + oidn::BufferRef normal_buffer; + if (host_buffer) { + normal_buffer = device.newBuffer(normal, buffer_size); + } + else { + normal_buffer = device.newBuffer(buffer_size, oidn::Storage::Device); + normal_buffer.write(0, buffer_size, normal); + } if (should_denoise_auxiliary_passes()) { oidn::FilterRef normalFilter = device.newFilter("RT"); normalFilter.setImage( - "normal", normal, oidn::Format::Float3, width, height, 0, pixel_stride); + "normal", normal_buffer, oidn::Format::Float3, width, height, 0, pixel_stride); normalFilter.setImage( - "output", normal, oidn::Format::Float3, width, height, 0, pixel_stride); + "output", normal_buffer, oidn::Format::Float3, width, height, 0, pixel_stride); normalFilter.setProgressMonitorFunction(oidn_progress_monitor_function, &context()); normalFilter.commit(); normalFilter.execute(); } - filter.setImage("normal", normal, oidn::Format::Float3, width, height, 0, pixel_stride); + filter.setImage( + "normal", normal_buffer, oidn::Format::Float3, width, height, 0, pixel_stride); } filter.commit(); filter.execute(); + color_buffer.read(0, buffer_size, color); + output_image.allocate_texture(input_image.domain()); GPU_texture_update(output_image, data_format, color);