VSE: make Glow effect 6x-10x faster #115818

Merged
Aras Pranckevicius merged 5 commits from aras_p/blender:vse-glow-opt into main 2023-12-06 19:39:51 +01:00
3 changed files with 108 additions and 214 deletions

View File

@ -643,8 +643,6 @@ void IMB_buffer_byte_from_byte(unsigned char *rect_to,
int height, int height,
int stride_to, int stride_to,
int stride_from); int stride_from);
void IMB_buffer_float_unpremultiply(float *buf, int width, int height);
void IMB_buffer_float_premultiply(float *buf, int width, int height);
/** /**
* Change the ordering of the color bytes pointed to by rect from * Change the ordering of the color bytes pointed to by rect from

View File

@ -856,26 +856,6 @@ void IMB_color_to_bw(ImBuf *ibuf)
} }
} }
void IMB_buffer_float_unpremultiply(float *buf, int width, int height)
{
size_t total = size_t(width) * height;
float *fp = buf;
while (total--) {
premul_to_straight_v4(fp);
fp += 4;
}
}
void IMB_buffer_float_premultiply(float *buf, int width, int height)
{
size_t total = size_t(width) * height;
float *fp = buf;
while (total--) {
straight_to_premul_v4(fp);
fp += 4;
}
}
/** \} */ /** \} */
/* -------------------------------------------------------------------- */ /* -------------------------------------------------------------------- */

View File

@ -14,11 +14,15 @@
#include "MEM_guardedalloc.h" #include "MEM_guardedalloc.h"
#include "BLI_array.hh"
#include "BLI_listbase.h" #include "BLI_listbase.h"
#include "BLI_math_rotation.h" #include "BLI_math_rotation.h"
#include "BLI_math_vector.hh"
#include "BLI_math_vector_types.hh"
#include "BLI_path_util.h" #include "BLI_path_util.h"
#include "BLI_rect.h" #include "BLI_rect.h"
#include "BLI_string.h" #include "BLI_string.h"
#include "BLI_task.hh"
#include "BLI_threads.h" #include "BLI_threads.h"
#include "BLI_utildefines.h" #include "BLI_utildefines.h"
@ -122,13 +126,6 @@ static void slice_get_float_buffers(const SeqRenderData *context,
/** \name Glow Effect /** \name Glow Effect
* \{ */ * \{ */
enum {
GlowR = 0,
GlowG = 1,
GlowB = 2,
GlowA = 3,
};
static ImBuf *prepare_effect_imbufs(const SeqRenderData *context, static ImBuf *prepare_effect_imbufs(const SeqRenderData *context,
ImBuf *ibuf1, ImBuf *ibuf1,
ImBuf *ibuf2, ImBuf *ibuf2,
@ -1960,20 +1957,14 @@ static void do_transform_effect(const SeqRenderData *context,
/** \name Glow Effect /** \name Glow Effect
* \{ */ * \{ */
static void RVBlurBitmap2_float(float *map, int width, int height, float blur, int quality) static void glow_blur_bitmap(const blender::float4 *src,
blender::float4 *map,
int width,
int height,
float blur,
int quality)
{ {
/* Much better than the previous blur! using namespace blender;
* We do the blurring in two passes which is a whole lot faster.
* I changed the math around to implement an actual Gaussian distribution.
*
* Watch out though, it tends to misbehave with large blur values on
* a small bitmap. Avoid! */
float *temp = nullptr, *swap;
float *filter = nullptr;
int x, y, i, fx, fy;
int index, ix, halfWidth;
float fval, k, curColor[4], curColor2[4], weight = 0;
/* If we're not really blurring, bail out */ /* If we're not really blurring, bail out */
if (blur <= 0) { if (blur <= 0) {
@ -1981,183 +1972,95 @@ static void RVBlurBitmap2_float(float *map, int width, int height, float blur, i
} }
/* If result would be no blurring, early out. */ /* If result would be no blurring, early out. */
halfWidth = ((quality + 1) * blur); const int halfWidth = ((quality + 1) * blur);
if (halfWidth == 0) { if (halfWidth == 0) {
return; return;
} }
/* Allocate memory for the temp-map and the blur filter matrix. */ Array<float4> temp(width * height);
temp = static_cast<float *>(MEM_mallocN(sizeof(float[4]) * width * height, "blurbitmaptemp"));
if (!temp) {
return;
}
/* Allocate memory for the filter elements */ /* Initialize the gaussian filter. @TODO: use code from RE_filter_value */
filter = (float *)MEM_mallocN(sizeof(float) * halfWidth * 2, "blurbitmapfilter"); Array<float> filter(halfWidth * 2);
if (!filter) { const float k = -1.0f / (2.0f * float(M_PI) * blur * blur);
MEM_freeN(temp); float weight = 0;
return; for (int ix = 0; ix < halfWidth; ix++) {
}
/* Apparently we're calculating a bell curve based on the standard deviation (or radius)
* This code is based on an example posted to comp.graphics.algorithms by
* Blancmange <bmange@airdmhor.gen.nz>
*/
k = -1.0f / (2.0f * float(M_PI) * blur * blur);
for (ix = 0; ix < halfWidth; ix++) {
weight = float(exp(k * (ix * ix))); weight = float(exp(k * (ix * ix)));
filter[halfWidth - ix] = weight; filter[halfWidth - ix] = weight;
filter[halfWidth + ix] = weight; filter[halfWidth + ix] = weight;
} }
filter[0] = weight; filter[0] = weight;
/* Normalize the array */ /* Normalize the array */
fval = 0; float fval = 0;
for (ix = 0; ix < halfWidth * 2; ix++) { for (int ix = 0; ix < halfWidth * 2; ix++) {
fval += filter[ix]; fval += filter[ix];
} }
for (int ix = 0; ix < halfWidth * 2; ix++) {
for (ix = 0; ix < halfWidth * 2; ix++) {
filter[ix] /= fval; filter[ix] /= fval;
} }
/* Blur the rows */ /* Blur the rows: read map, write temp */
for (y = 0; y < height; y++) { threading::parallel_for(IndexRange(height), 32, [&](const IndexRange y_range) {
/* Do the left & right strips */ for (const int y : y_range) {
for (x = 0; x < halfWidth; x++) { for (int x = 0; x < width; x++) {
fx = 0; float4 curColor = float4(0.0f);
zero_v4(curColor); int xmin = math::max(x - halfWidth, 0);
zero_v4(curColor2); int xmax = math::min(x + halfWidth, width);
for (int nx = xmin, index = (xmin - x) + halfWidth; nx < xmax; nx++, index++) {
for (i = x - halfWidth; i < x + halfWidth; i++) { curColor += map[nx + y * width] * filter[index];
if ((i >= 0) && (i < width)) {
index = (i + y * width) * 4;
madd_v4_v4fl(curColor, map + index, filter[fx]);
index = (width - 1 - i + y * width) * 4;
madd_v4_v4fl(curColor2, map + index, filter[fx]);
} }
fx++; temp[x + y * width] = curColor;
} }
index = (x + y * width) * 4; }
copy_v4_v4(temp + index, curColor); });
index = (width - 1 - x + y * width) * 4; /* Blur the columns: read temp, write map */
copy_v4_v4(temp + index, curColor2); threading::parallel_for(IndexRange(width), 32, [&](const IndexRange x_range) {
const float4 one = float4(1.0f);
for (const int x : x_range) {
for (int y = 0; y < height; y++) {
float4 curColor = float4(0.0f);
int ymin = math::max(y - halfWidth, 0);
int ymax = math::min(y + halfWidth, height);
for (int ny = ymin, index = (ymin - y) + halfWidth; ny < ymax; ny++, index++) {
curColor += temp[x + ny * width] * filter[index];
}
if (src != nullptr) {
curColor = math::min(one, src[x + y * width] + curColor);
}
map[x + y * width] = curColor;
}
}
});
} }
/* Do the main body */ static void blur_isolate_highlights(const blender::float4 *in,
for (x = halfWidth; x < width - halfWidth; x++) { blender::float4 *out,
fx = 0; int width,
zero_v4(curColor); int height,
for (i = x - halfWidth; i < x + halfWidth; i++) { float threshold,
index = (i + y * width) * 4; float boost,
madd_v4_v4fl(curColor, map + index, filter[fx]); float clamp)
fx++;
}
index = (x + y * width) * 4;
copy_v4_v4(temp + index, curColor);
}
}
/* Swap buffers */
swap = temp;
temp = map;
map = swap;
/* Blur the columns */
for (x = 0; x < width; x++) {
/* Do the top & bottom strips */
for (y = 0; y < halfWidth; y++) {
fy = 0;
zero_v4(curColor);
zero_v4(curColor2);
for (i = y - halfWidth; i < y + halfWidth; i++) {
if ((i >= 0) && (i < height)) {
/* Bottom */
index = (x + i * width) * 4;
madd_v4_v4fl(curColor, map + index, filter[fy]);
/* Top */
index = (x + (height - 1 - i) * width) * 4;
madd_v4_v4fl(curColor2, map + index, filter[fy]);
}
fy++;
}
index = (x + y * width) * 4;
copy_v4_v4(temp + index, curColor);
index = (x + (height - 1 - y) * width) * 4;
copy_v4_v4(temp + index, curColor2);
}
/* Do the main body */
for (y = halfWidth; y < height - halfWidth; y++) {
fy = 0;
zero_v4(curColor);
for (i = y - halfWidth; i < y + halfWidth; i++) {
index = (x + i * width) * 4;
madd_v4_v4fl(curColor, map + index, filter[fy]);
fy++;
}
index = (x + y * width) * 4;
copy_v4_v4(temp + index, curColor);
}
}
/* Swap buffers */
swap = temp;
temp = map;
// map = swap; /* UNUSED. */
/* Tidy up. */
MEM_freeN(filter);
MEM_freeN(temp);
}
static void RVAddBitmaps_float(float *a, float *b, float *c, int width, int height)
{ {
int x, y, index; using namespace blender;
threading::parallel_for(IndexRange(height), 64, [&](const IndexRange y_range) {
for (y = 0; y < height; y++) { const float4 clampv = float4(clamp);
for (x = 0; x < width; x++) { for (const int y : y_range) {
index = (x + y * width) * 4; int index = y * width;
c[index + GlowR] = min_ff(1.0f, a[index + GlowR] + b[index + GlowR]); for (int x = 0; x < width; x++, index++) {
c[index + GlowG] = min_ff(1.0f, a[index + GlowG] + b[index + GlowG]);
c[index + GlowB] = min_ff(1.0f, a[index + GlowB] + b[index + GlowB]);
c[index + GlowA] = min_ff(1.0f, a[index + GlowA] + b[index + GlowA]);
}
}
}
static void RVIsolateHighlights_float(
const float *in, float *out, int width, int height, float threshold, float boost, float clamp)
{
int x, y, index;
float intensity;
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
index = (x + y * width) * 4;
/* Isolate the intensity */ /* Isolate the intensity */
intensity = (in[index + GlowR] + in[index + GlowG] + in[index + GlowB] - threshold); float intensity = (in[index].x + in[index].y + in[index].z - threshold);
float4 val;
if (intensity > 0) { if (intensity > 0) {
out[index + GlowR] = min_ff(clamp, (in[index + GlowR] * boost * intensity)); val = math::min(clampv, in[index] * (boost * intensity));
out[index + GlowG] = min_ff(clamp, (in[index + GlowG] * boost * intensity));
out[index + GlowB] = min_ff(clamp, (in[index + GlowB] * boost * intensity));
out[index + GlowA] = min_ff(clamp, (in[index + GlowA] * boost * intensity));
} }
else { else {
out[index + GlowR] = 0; val = float4(0.0f);
out[index + GlowG] = 0; }
out[index + GlowB] = 0; out[index] = val;
out[index + GlowA] = 0;
}
} }
} }
});
} }
static void init_glow_effect(Sequence *seq) static void init_glow_effect(Sequence *seq)
@ -2203,28 +2106,38 @@ static void do_glow_effect_byte(Sequence *seq,
uchar * /*rect2*/, uchar * /*rect2*/,
uchar *out) uchar *out)
{ {
float *outbuf, *inbuf; using namespace blender;
GlowVars *glow = (GlowVars *)seq->effectdata; GlowVars *glow = (GlowVars *)seq->effectdata;
inbuf = static_cast<float *>(MEM_mallocN(sizeof(float[4]) * x * y, "glow effect input")); Array<float4> inbuf(x * y);
outbuf = static_cast<float *>(MEM_mallocN(sizeof(float[4]) * x * y, "glow effect output")); Array<float4> outbuf(x * y);
IMB_buffer_float_from_byte(inbuf, rect1, IB_PROFILE_SRGB, IB_PROFILE_SRGB, false, x, y, x, x); using namespace blender;
IMB_buffer_float_premultiply(inbuf, x, y); IMB_colormanagement_transform_from_byte_threaded(*inbuf.data(), rect1, x, y, 4, "sRGB", "sRGB");
RVIsolateHighlights_float( blur_isolate_highlights(
inbuf, outbuf, x, y, glow->fMini * 3.0f, glow->fBoost * fac, glow->fClamp); inbuf.data(), outbuf.data(), x, y, glow->fMini * 3.0f, glow->fBoost * fac, glow->fClamp);
RVBlurBitmap2_float(outbuf, x, y, glow->dDist * (render_size / 100.0f), glow->dQuality); glow_blur_bitmap(glow->bNoComp ? nullptr : inbuf.data(),
if (!glow->bNoComp) { outbuf.data(),
RVAddBitmaps_float(inbuf, outbuf, outbuf, x, y); x,
} y,
glow->dDist * (render_size / 100.0f),
glow->dQuality);
IMB_buffer_float_unpremultiply(outbuf, x, y); threading::parallel_for(IndexRange(y), 64, [&](const IndexRange y_range) {
IMB_buffer_byte_from_float( size_t offset = y_range.first() * x;
out, outbuf, 4, 0.0f, IB_PROFILE_SRGB, IB_PROFILE_SRGB, false, x, y, x, x); IMB_buffer_byte_from_float(out + offset * 4,
*(outbuf.data() + offset),
MEM_freeN(inbuf); 4,
MEM_freeN(outbuf); 0.0f,
IB_PROFILE_SRGB,
IB_PROFILE_SRGB,
true,
x,
y_range.size(),
x,
x);
});
} }
static void do_glow_effect_float(Sequence *seq, static void do_glow_effect_float(Sequence *seq,
@ -2236,16 +2149,19 @@ static void do_glow_effect_float(Sequence *seq,
float * /*rect2*/, float * /*rect2*/,
float *out) float *out)
{ {
float *outbuf = out; using namespace blender;
float *inbuf = rect1; float4 *outbuf = reinterpret_cast<float4 *>(out);
float4 *inbuf = reinterpret_cast<float4 *>(rect1);
GlowVars *glow = (GlowVars *)seq->effectdata; GlowVars *glow = (GlowVars *)seq->effectdata;
RVIsolateHighlights_float( blur_isolate_highlights(
inbuf, outbuf, x, y, glow->fMini * 3.0f, glow->fBoost * fac, glow->fClamp); inbuf, outbuf, x, y, glow->fMini * 3.0f, glow->fBoost * fac, glow->fClamp);
RVBlurBitmap2_float(outbuf, x, y, glow->dDist * (render_size / 100.0f), glow->dQuality); glow_blur_bitmap(glow->bNoComp ? nullptr : inbuf,
if (!glow->bNoComp) { outbuf,
RVAddBitmaps_float(inbuf, outbuf, outbuf, x, y); x,
} y,
glow->dDist * (render_size / 100.0f),
glow->dQuality);
} }
static ImBuf *do_glow_effect(const SeqRenderData *context, static ImBuf *do_glow_effect(const SeqRenderData *context,