Fix T68944: Added check for SSE4.1 to denoising node.

Since OpenImageDenoise requires a CPU with SSE 4.1 or newer,
let the node act as passthrough on unsupported CPUs and display
a message in the node itself.
This commit is contained in:
Stefan Werner
2019-08-27 14:03:49 +02:00
parent e39528b351
commit d547f9d3d2
4 changed files with 69 additions and 48 deletions

View File

@@ -24,6 +24,7 @@
*/ */
int BLI_cpu_support_sse2(void); int BLI_cpu_support_sse2(void);
int BLI_cpu_support_sse41(void);
void BLI_system_backtrace(FILE *fp); void BLI_system_backtrace(FILE *fp);
/* Get CPU brand, result is to be MEM_freeN()-ed. */ /* Get CPU brand, result is to be MEM_freeN()-ed. */

View File

@@ -179,6 +179,19 @@ char *BLI_cpu_brand_string(void)
return NULL; return NULL;
} }
int BLI_cpu_support_sse41(void)
{
int result[4], num;
__cpuid(result, 0);
num = result[0];
if (num >= 1) {
__cpuid(result, 0x00000001);
return (result[2] & ((int)1 << 19)) != 0;
}
return 0;
}
void BLI_hostname_get(char *buffer, size_t bufsize) void BLI_hostname_get(char *buffer, size_t bufsize)
{ {
#ifndef WIN32 #ifndef WIN32

View File

@@ -97,66 +97,69 @@ void DenoiseOperation::generateDenoise(float *data,
return; return;
} }
#ifdef WITH_OPENIMAGEDENOISE #ifdef WITH_OPENIMAGEDENOISE
oidn::DeviceRef device = oidn::newDevice(); if (BLI_cpu_support_sse41()) {
device.commit(); oidn::DeviceRef device = oidn::newDevice();
device.commit();
oidn::FilterRef filter = device.newFilter("RT"); oidn::FilterRef filter = device.newFilter("RT");
filter.setImage("color", filter.setImage("color",
inputBufferColor, inputBufferColor,
oidn::Format::Float3,
inputTileColor->getWidth(),
inputTileColor->getHeight(),
0,
4 * sizeof(float));
if (inputTileAlbedo && inputTileAlbedo->getBuffer()) {
filter.setImage("albedo",
inputTileAlbedo->getBuffer(),
oidn::Format::Float3, oidn::Format::Float3,
inputTileAlbedo->getWidth(), inputTileColor->getWidth(),
inputTileAlbedo->getHeight(), inputTileColor->getHeight(),
0, 0,
4 * sizeof(float)); 4 * sizeof(float));
} if (inputTileAlbedo && inputTileAlbedo->getBuffer()) {
if (inputTileNormal && inputTileNormal->getBuffer()) { filter.setImage("albedo",
filter.setImage("normal", inputTileAlbedo->getBuffer(),
inputTileNormal->getBuffer(), oidn::Format::Float3,
inputTileAlbedo->getWidth(),
inputTileAlbedo->getHeight(),
0,
4 * sizeof(float));
}
if (inputTileNormal && inputTileNormal->getBuffer()) {
filter.setImage("normal",
inputTileNormal->getBuffer(),
oidn::Format::Float3,
inputTileNormal->getWidth(),
inputTileNormal->getHeight(),
0,
3 * sizeof(float));
}
filter.setImage("output",
data,
oidn::Format::Float3, oidn::Format::Float3,
inputTileNormal->getWidth(), inputTileColor->getWidth(),
inputTileNormal->getHeight(), inputTileColor->getHeight(),
0, 0,
3 * sizeof(float)); 4 * sizeof(float));
}
filter.setImage("output",
data,
oidn::Format::Float3,
inputTileColor->getWidth(),
inputTileColor->getHeight(),
0,
4 * sizeof(float));
BLI_assert(settings); BLI_assert(settings);
if (settings) { if (settings) {
filter.set("hdr", settings->hdr); filter.set("hdr", settings->hdr);
filter.set("srgb", false); filter.set("srgb", false);
} }
filter.commit(); filter.commit();
/* Since it's memory intensive, it's better to run only one instance of OIDN at a time. /* Since it's memory intensive, it's better to run only one instance of OIDN at a time.
* OpenImageDenoise is multithreaded internally and should use all available cores nonetheless. * OpenImageDenoise is multithreaded internally and should use all available cores nonetheless.
*/ */
BLI_mutex_lock(&oidn_lock); BLI_mutex_lock(&oidn_lock);
filter.execute(); filter.execute();
BLI_mutex_unlock(&oidn_lock); BLI_mutex_unlock(&oidn_lock);
/* copy the alpha channel, OpenImageDenoise currently only supports RGB */ /* copy the alpha channel, OpenImageDenoise currently only supports RGB */
size_t numPixels = inputTileColor->getWidth() * inputTileColor->getHeight(); size_t numPixels = inputTileColor->getWidth() * inputTileColor->getHeight();
for (size_t i = 0; i < numPixels; ++i) { for (size_t i = 0; i < numPixels; ++i) {
data[i * 4 + 3] = inputBufferColor[i * 4 + 3]; data[i * 4 + 3] = inputBufferColor[i * 4 + 3];
}
return;
} }
#else #endif
/* If built without OIDN or running on an unsupported CPU, just pass through. */
UNUSED_VARS(inputTileAlbedo, inputTileNormal, settings); UNUSED_VARS(inputTileAlbedo, inputTileNormal, settings);
::memcpy(data, ::memcpy(data,
inputBufferColor, inputBufferColor,
inputTileColor->getWidth() * inputTileColor->getHeight() * sizeof(float) * 4); inputTileColor->getWidth() * inputTileColor->getHeight() * sizeof(float) * 4);
#endif
} }

View File

@@ -2703,6 +2703,10 @@ static void node_composit_buts_denoise(uiLayout *layout, bContext *UNUSED(C), Po
{ {
#ifndef WITH_OPENIMAGEDENOISE #ifndef WITH_OPENIMAGEDENOISE
uiItemL(layout, IFACE_("Disabled, built without OpenImageDenoise"), ICON_ERROR); uiItemL(layout, IFACE_("Disabled, built without OpenImageDenoise"), ICON_ERROR);
#else
if (!BLI_cpu_support_sse41()) {
uiItemL(layout, IFACE_("Disabled, CPU with SSE4.1 is required"), ICON_ERROR);
}
#endif #endif
uiItemR(layout, ptr, "use_hdr", 0, NULL, ICON_NONE); uiItemR(layout, ptr, "use_hdr", 0, NULL, ICON_NONE);