Compositor: Enable GPU denoising for OpenImageDenoise #115242
|
@ -33,6 +33,10 @@ class DenoiseFilter {
|
|||
oidn::DeviceRef device_;
|
||||
oidn::FilterRef filter_;
|
||||
bool initialized_ = false;
|
||||
bool system_memory_supported_ = true;
|
||||
|
||||
std::vector<oidn::BufferRef> buffers_;
|
||||
oidn::BufferRef oidn_output_;
|
||||
MemoryBuffer *output_ = nullptr;
|
||||
#endif
|
||||
|
||||
public:
|
||||
|
@ -49,7 +53,11 @@ class DenoiseFilter {
|
|||
* nonetheless. */
|
||||
BLI_mutex_lock(&oidn_lock);
|
||||
|
||||
device_ = oidn::newDevice(oidn::DeviceType::CPU);
|
||||
device_ = oidn::newDevice();
|
||||
Sergey Sharybin
commented
Does the OIDN 1 always use CPU device? Does the OIDN 1 always use CPU device?
Stefan Werner
commented
Yes. OIDN version 1 does not support any other devices. Explicitly requesting the CPU device here was only added when the library was upgraded to 2.0.0 to prevent automatic selection of other devices. Yes. OIDN version 1 does not support any other devices. Explicitly requesting the CPU device here was only added when the library was upgraded to 2.0.0 to prevent automatic selection of other devices.
|
||||
#if OIDN_VERSION_MAJOR >= 2
|
||||
system_memory_supported_ = device_.get<bool>("systemMemorySupported");
|
||||
if (device_.get<int>("type") == (int)oidn::DeviceType::CPU)
|
||||
#endif
|
||||
device_.set("setAffinity", false);
|
||||
Sergey Sharybin
commented
I am not really happy with code formatted like this. Such preprocessor in-between of control flow is really not readable. The more readable code with less functional changes and less changes later on when we completely remove OIDN 1 code paths would be
I am not really happy with code formatted like this. Such preprocessor in-between of control flow is really not readable.
The more readable code with less functional changes and less changes later on when we completely remove OIDN 1 code paths would be
```
# if OIDN_VERSION_MAJOR >= 2
device_ = oidn::newDevice();
system_memory_supported_ = device_.get<bool>("systemMemorySupported");
if (device_.get<int>("type") == (int)oidn::DeviceType::CPU) {
device_.set("setAffinity", false);
}
# else
device_ = oidn::newDevice(oidn::DeviceType::CPU);
# endif
```
|
||||
device_.commit();
|
||||
filter_ = device_.newFilter("RT");
|
||||
|
@ -67,8 +75,24 @@ class DenoiseFilter {
|
|||
{
|
||||
BLI_assert(initialized_);
|
||||
BLI_assert(!buffer->is_a_single_elem());
|
||||
oidn::BufferRef oidn_buffer;
|
||||
size_t buffer_len = buffer->get_elem_bytes_len() * buffer->get_width() * buffer->get_height();
|
||||
Sergey Sharybin
commented
Otherwise it is only up-casted to `size_t(buffer->get_elem_bytes_len()) * ...`
Otherwise it is only up-casted to `int`, which will not be enough to composite really high-res images.
|
||||
if (system_memory_supported_) {
|
||||
oidn_buffer = device_.newBuffer(buffer->get_buffer(), buffer_len);
|
||||
}
|
||||
#if OIDN_VERSION_MAJOR >= 2
|
||||
else {
|
||||
oidn_buffer = device_.newBuffer(buffer_len);
|
||||
oidn_buffer.write(0, buffer_len, buffer->get_buffer());
|
||||
if (name == "output") {
|
||||
oidn_output_ = oidn_buffer;
|
||||
output_ = buffer;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
buffers_.emplace_back(oidn_buffer);
|
||||
filter_.setImage(name.data(),
|
||||
buffer->get_buffer(),
|
||||
oidn_buffer,
|
||||
oidn::Format::Float3,
|
||||
buffer->get_width(),
|
||||
buffer->get_height(),
|
||||
|
@ -87,6 +111,13 @@ class DenoiseFilter {
|
|||
BLI_assert(initialized_);
|
||||
filter_.commit();
|
||||
filter_.execute();
|
||||
#if OIDN_VERSION_MAJOR >= 2
|
||||
if (!system_memory_supported_ && output_ && oidn_output_) {
|
||||
size_t buffer_len = output_->get_elem_bytes_len() * output_->get_width() * output_->get_height();
|
||||
assert(buffer_len == oidn_output_.getSize());
|
||||
oidn_output_.read(0, buffer_len, output_->get_buffer());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#else
|
||||
|
|
Loading…
Reference in New Issue
can_use_host_memory_
seems to be a better name which indicates the intent more clearly.I don't think so. "Host memory" in a GPU compute context often means host pinned memory which is always supported. That's not what this flag indicates. CUDA calls it "system allocated memory", which is where the naming of this flag comes from. Some exotic CUDA systems (with HMM) also support this, not just CPUs, but I haven't encountered such a system so far.
This is a bit different from the conventions we follow in Cycles, but I see your point.
How about
can_use_system_memory_
with a comment explaining that when true OIDN will use Blender-side allocated memory as-is, without copy to a temporary buffer? Or, if you prefer the current naming, add a comment nevertheless.Sure! I agree that such a comment should be added.