diff --git a/source/blender/sequencer/intern/effects.cc b/source/blender/sequencer/intern/effects.cc index 3a0d4e91ef8..f8ef6a668fc 100644 --- a/source/blender/sequencer/intern/effects.cc +++ b/source/blender/sequencer/intern/effects.cc @@ -64,6 +64,8 @@ #include "strip_time.hh" #include "utils.hh" +using blender::float4; + static SeqEffectHandle get_sequence_effect_impl(int seq_type); /* -------------------------------------------------------------------- */ @@ -120,6 +122,44 @@ static void slice_get_float_buffers(const SeqRenderData *context, } } +static float4 load_premul_pixel(const uchar *ptr) +{ + float4 res; + straight_uchar_to_premul_float(res, ptr); + return res; +} + +static float4 load_premul_pixel(const float *ptr) +{ + return float4(ptr); +} + +static void store_premul_pixel(const float4 &pix, uchar *dst) +{ + premul_float_to_straight_uchar(dst, pix); +} + +static void store_premul_pixel(const float4 &pix, float *dst) +{ + *reinterpret_cast(dst) = pix; +} + +static void store_opaque_black_pixel(uchar *dst) +{ + dst[0] = 0; + dst[1] = 0; + dst[2] = 0; + dst[3] = 255; +} + +static void store_opaque_black_pixel(float *dst) +{ + dst[0] = 0.0f; + dst[1] = 0.0f; + dst[2] = 0.0f; + dst[3] = 1.0f; +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -203,72 +243,32 @@ static void init_alpha_over_or_under(Sequence *seq) seq->seq1 = seq2; } -static void do_alphaover_effect_byte( - float fac, int x, int y, uchar *rect1, uchar *rect2, uchar *out) +/* dst = src1 over src2 (alpha from src1) */ +template +static void do_alphaover_effect( + float fac, int width, int height, const T *src1, const T *src2, T *dst) { - uchar *cp1 = rect1; - uchar *cp2 = rect2; - uchar *rt = out; - - for (int i = 0; i < y; i++) { - for (int j = 0; j < x; j++) { - /* rt = rt1 over rt2 (alpha from rt1) */ - - float tempc[4], rt1[4], rt2[4]; - straight_uchar_to_premul_float(rt1, cp1); - straight_uchar_to_premul_float(rt2, cp2); - - float mfac = 1.0f - fac * rt1[3]; - - if (fac <= 0.0f) { - *((uint *)rt) = *((uint *)cp2); - } - else if (mfac <= 0.0f) { - *((uint *)rt) = *((uint *)cp1); - } - else { - tempc[0] = fac * rt1[0] + mfac * rt2[0]; - tempc[1] = fac * rt1[1] + mfac * rt2[1]; - tempc[2] = fac * rt1[2] + mfac * rt2[2]; - tempc[3] = fac * rt1[3] + mfac * rt2[3]; - - premul_float_to_straight_uchar(rt, tempc); - } - cp1 += 4; - cp2 += 4; - rt += 4; - } + if (fac <= 0.0f) { + memcpy(dst, src2, sizeof(T) * 4 * width * height); + return; } -} -static void do_alphaover_effect_float( - float fac, int x, int y, float *rect1, float *rect2, float *out) -{ - float *rt1 = rect1; - float *rt2 = rect2; - float *rt = out; + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + float4 col1 = load_premul_pixel(src1); + float mfac = 1.0f - fac * col1.w; - for (int i = 0; i < y; i++) { - for (int j = 0; j < x; j++) { - /* rt = rt1 over rt2 (alpha from rt1) */ - - float mfac = 1.0f - (fac * rt1[3]); - - if (fac <= 0.0f) { - memcpy(rt, rt2, sizeof(float[4])); - } - else if (mfac <= 0) { - memcpy(rt, rt1, sizeof(float[4])); + if (mfac <= 0.0f) { + memcpy(dst, src1, sizeof(T) * 4); } else { - rt[0] = fac * rt1[0] + mfac * rt2[0]; - rt[1] = fac * rt1[1] + mfac * rt2[1]; - rt[2] = fac * rt1[2] + mfac * rt2[2]; - rt[3] = fac * rt1[3] + mfac * rt2[3]; + float4 col2 = load_premul_pixel(src2); + float4 col = fac * col1 + mfac * col2; + store_premul_pixel(col, dst); } - rt1 += 4; - rt2 += 4; - rt += 4; + src1 += 4; + src2 += 4; + dst += 4; } } } @@ -290,7 +290,7 @@ static void do_alphaover_effect(const SeqRenderData *context, slice_get_float_buffers( context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out); - do_alphaover_effect_float(fac, context->rectx, total_lines, rect1, rect2, rect_out); + do_alphaover_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out); } else { uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr; @@ -298,7 +298,7 @@ static void do_alphaover_effect(const SeqRenderData *context, slice_get_byte_buffers( context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out); - do_alphaover_effect_byte(fac, context->rectx, total_lines, rect1, rect2, rect_out); + do_alphaover_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out); } } @@ -308,88 +308,34 @@ static void do_alphaover_effect(const SeqRenderData *context, /** \name Alpha Under Effect * \{ */ -static void do_alphaunder_effect_byte( - float fac, int x, int y, uchar *rect1, uchar *rect2, uchar *out) +/* dst = src1 under src2 (alpha from src2) */ +template +static void do_alphaunder_effect( + float fac, int width, int height, const T *src1, const T *src2, T *dst) { - uchar *cp1 = rect1; - uchar *cp2 = rect2; - uchar *rt = out; - - for (int i = 0; i < y; i++) { - for (int j = 0; j < x; j++) { - /* rt = rt1 under rt2 (alpha from rt2) */ - - float tempc[4], rt1[4], rt2[4]; - straight_uchar_to_premul_float(rt1, cp1); - straight_uchar_to_premul_float(rt2, cp2); - - /* this complex optimization is because the - * 'skybuf' can be crossed in - */ - if (rt2[3] <= 0.0f && fac >= 1.0f) { - *((uint *)rt) = *((uint *)cp1); - } - else if (rt2[3] >= 1.0f) { - *((uint *)rt) = *((uint *)cp2); - } - else { - float temp_fac = (fac * (1.0f - rt2[3])); - - if (fac <= 0) { - *((uint *)rt) = *((uint *)cp2); - } - else { - tempc[0] = (temp_fac * rt1[0] + rt2[0]); - tempc[1] = (temp_fac * rt1[1] + rt2[1]); - tempc[2] = (temp_fac * rt1[2] + rt2[2]); - tempc[3] = (temp_fac * rt1[3] + rt2[3]); - - premul_float_to_straight_uchar(rt, tempc); - } - } - cp1 += 4; - cp2 += 4; - rt += 4; - } + if (fac >= 1.0f) { + memcpy(dst, src1, sizeof(T) * 4 * width * height); + return; } -} -static void do_alphaunder_effect_float( - float fac, int x, int y, float *rect1, float *rect2, float *out) -{ - float *rt1 = rect1; - float *rt2 = rect2; - float *rt = out; - - for (int i = 0; i < y; i++) { - for (int j = 0; j < x; j++) { - /* rt = rt1 under rt2 (alpha from rt2) */ - - /* this complex optimization is because the - * 'skybuf' can be crossed in - */ - if (rt2[3] <= 0 && fac >= 1.0f) { - memcpy(rt, rt1, sizeof(float[4])); + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + float4 col2 = load_premul_pixel(src2); + if (col2.w <= 0.0f) { + memcpy(dst, src1, sizeof(T) * 4); } - else if (rt2[3] >= 1.0f) { - memcpy(rt, rt2, sizeof(float[4])); + else if (col2.w >= 1.0f || fac <= 0.0f) { + memcpy(dst, src2, sizeof(T) * 4); } else { - float temp_fac = fac * (1.0f - rt2[3]); - - if (fac == 0) { - memcpy(rt, rt2, sizeof(float[4])); - } - else { - rt[0] = temp_fac * rt1[0] + rt2[0]; - rt[1] = temp_fac * rt1[1] + rt2[1]; - rt[2] = temp_fac * rt1[2] + rt2[2]; - rt[3] = temp_fac * rt1[3] + rt2[3]; - } + float mfac = fac * (1.0f - col2.w); + float4 col1 = load_premul_pixel(src1); + float4 col = mfac * col1 + col2; + store_premul_pixel(col, dst); } - rt1 += 4; - rt2 += 4; - rt += 4; + src1 += 4; + src2 += 4; + dst += 4; } } } @@ -411,7 +357,7 @@ static void do_alphaunder_effect(const SeqRenderData *context, slice_get_float_buffers( context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out); - do_alphaunder_effect_float(fac, context->rectx, total_lines, rect1, rect2, rect_out); + do_alphaunder_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out); } else { uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr; @@ -419,7 +365,7 @@ static void do_alphaunder_effect(const SeqRenderData *context, slice_get_byte_buffers( context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out); - do_alphaunder_effect_byte(fac, context->rectx, total_lines, rect1, rect2, rect_out); + do_alphaunder_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out); } } @@ -527,53 +473,24 @@ static float invGammaCorrect(float c) return sqrtf_signed(c); } -static void do_gammacross_effect_byte( - float fac, int x, int y, uchar *rect1, uchar *rect2, uchar *out) +template +static void do_gammacross_effect( + float fac, int width, int height, const T *src1, const T *src2, T *dst) { - uchar *cp1 = rect1; - uchar *cp2 = rect2; - uchar *rt = out; - float mfac = 1.0f - fac; - for (int i = 0; i < y; i++) { - for (int j = 0; j < x; j++) { - float rt1[4], rt2[4], tempc[4]; - - straight_uchar_to_premul_float(rt1, cp1); - straight_uchar_to_premul_float(rt2, cp2); - - tempc[0] = gammaCorrect(mfac * invGammaCorrect(rt1[0]) + fac * invGammaCorrect(rt2[0])); - tempc[1] = gammaCorrect(mfac * invGammaCorrect(rt1[1]) + fac * invGammaCorrect(rt2[1])); - tempc[2] = gammaCorrect(mfac * invGammaCorrect(rt1[2]) + fac * invGammaCorrect(rt2[2])); - tempc[3] = gammaCorrect(mfac * invGammaCorrect(rt1[3]) + fac * invGammaCorrect(rt2[3])); - - premul_float_to_straight_uchar(rt, tempc); - cp1 += 4; - cp2 += 4; - rt += 4; - } - } -} - -static void do_gammacross_effect_float( - float fac, int x, int y, float *rect1, float *rect2, float *out) -{ - float *rt1 = rect1; - float *rt2 = rect2; - float *rt = out; - - float mfac = 1.0f - fac; - - for (int i = 0; i < y; i++) { - for (int j = 0; j < x; j++) { - rt[0] = gammaCorrect(mfac * invGammaCorrect(rt1[0]) + fac * invGammaCorrect(rt2[0])); - rt[1] = gammaCorrect(mfac * invGammaCorrect(rt1[1]) + fac * invGammaCorrect(rt2[1])); - rt[2] = gammaCorrect(mfac * invGammaCorrect(rt1[2]) + fac * invGammaCorrect(rt2[2])); - rt[3] = gammaCorrect(mfac * invGammaCorrect(rt1[3]) + fac * invGammaCorrect(rt2[3])); - rt1 += 4; - rt2 += 4; - rt += 4; + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + float4 col1 = load_premul_pixel(src1); + float4 col2 = load_premul_pixel(src2); + float4 col; + for (int c = 0; c < 4; ++c) { + col[c] = gammaCorrect(mfac * invGammaCorrect(col1[c]) + fac * invGammaCorrect(col2[c])); + } + store_premul_pixel(col, dst); + src1 += 4; + src2 += 4; + dst += 4; } } } @@ -604,7 +521,7 @@ static void do_gammacross_effect(const SeqRenderData *context, slice_get_float_buffers( context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out); - do_gammacross_effect_float(fac, context->rectx, total_lines, rect1, rect2, rect_out); + do_gammacross_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out); } else { uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr; @@ -612,7 +529,7 @@ static void do_gammacross_effect(const SeqRenderData *context, slice_get_byte_buffers( context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out); - do_gammacross_effect_byte(fac, context->rectx, total_lines, rect1, rect2, rect_out); + do_gammacross_effect(fac, context->rectx, total_lines, rect1, rect2, rect_out); } } @@ -943,57 +860,21 @@ static void do_mul_effect(const SeqRenderData *context, /** \name Blend Mode Effect * \{ */ -using IMB_blend_func_byte = void (*)(uchar *dst, const uchar *src1, const uchar *src2); -using IMB_blend_func_float = void (*)(float *dst, const float *src1, const float *src2); - -BLI_INLINE void apply_blend_function_byte(float fac, - int x, - int y, - uchar *rect1, - uchar *rect2, - uchar *out, - IMB_blend_func_byte blend_function) +/* blend_function has to be: void (T* dst, const T *src1, const T *src2) */ +template +static void apply_blend_function( + float fac, int width, int height, const T *src1, T *src2, T *dst, Func blend_function) { - uchar *rt1 = rect1; - uchar *rt2 = rect2; - uchar *rt = out; - - for (int i = 0; i < y; i++) { - for (int j = 0; j < x; j++) { - uint achannel = rt2[3]; - rt2[3] = uint(achannel) * fac; - blend_function(rt, rt1, rt2); - rt2[3] = achannel; - rt[3] = rt1[3]; - rt1 += 4; - rt2 += 4; - rt += 4; - } - } -} - -BLI_INLINE void apply_blend_function_float(float fac, - int x, - int y, - float *rect1, - float *rect2, - float *out, - IMB_blend_func_float blend_function) -{ - float *rt1 = rect1; - float *rt2 = rect2; - float *rt = out; - - for (int i = 0; i < y; i++) { - for (int j = 0; j < x; j++) { - float achannel = rt2[3]; - rt2[3] = achannel * fac; - blend_function(rt, rt1, rt2); - rt2[3] = achannel; - rt[3] = rt1[3]; - rt1 += 4; - rt2 += 4; - rt += 4; + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + T achannel = src2[3]; + src2[3] = T(achannel * fac); + blend_function(dst, src1, src2); + src2[3] = achannel; + dst[3] = src1[3]; + src1 += 4; + src2 += 4; + dst += 4; } } } @@ -1003,67 +884,67 @@ static void do_blend_effect_float( { switch (btype) { case SEQ_TYPE_ADD: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_add_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_add_float); break; case SEQ_TYPE_SUB: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_sub_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_sub_float); break; case SEQ_TYPE_MUL: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_mul_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_mul_float); break; case SEQ_TYPE_DARKEN: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_darken_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_darken_float); break; case SEQ_TYPE_COLOR_BURN: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_burn_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_burn_float); break; case SEQ_TYPE_LINEAR_BURN: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_linearburn_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_linearburn_float); break; case SEQ_TYPE_SCREEN: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_screen_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_screen_float); break; case SEQ_TYPE_LIGHTEN: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_lighten_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_lighten_float); break; case SEQ_TYPE_DODGE: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_dodge_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_dodge_float); break; case SEQ_TYPE_OVERLAY: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_overlay_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_overlay_float); break; case SEQ_TYPE_SOFT_LIGHT: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_softlight_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_softlight_float); break; case SEQ_TYPE_HARD_LIGHT: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_hardlight_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_hardlight_float); break; case SEQ_TYPE_PIN_LIGHT: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_pinlight_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_pinlight_float); break; case SEQ_TYPE_LIN_LIGHT: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_linearlight_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_linearlight_float); break; case SEQ_TYPE_VIVID_LIGHT: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_vividlight_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_vividlight_float); break; case SEQ_TYPE_BLEND_COLOR: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_color_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_color_float); break; case SEQ_TYPE_HUE: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_hue_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_hue_float); break; case SEQ_TYPE_SATURATION: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_saturation_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_saturation_float); break; case SEQ_TYPE_VALUE: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_luminosity_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_luminosity_float); break; case SEQ_TYPE_DIFFERENCE: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_difference_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_difference_float); break; case SEQ_TYPE_EXCLUSION: - apply_blend_function_float(fac, x, y, rect1, rect2, out, blend_color_exclusion_float); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_exclusion_float); break; default: break; @@ -1075,67 +956,67 @@ static void do_blend_effect_byte( { switch (btype) { case SEQ_TYPE_ADD: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_add_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_add_byte); break; case SEQ_TYPE_SUB: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_sub_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_sub_byte); break; case SEQ_TYPE_MUL: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_mul_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_mul_byte); break; case SEQ_TYPE_DARKEN: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_darken_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_darken_byte); break; case SEQ_TYPE_COLOR_BURN: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_burn_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_burn_byte); break; case SEQ_TYPE_LINEAR_BURN: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_linearburn_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_linearburn_byte); break; case SEQ_TYPE_SCREEN: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_screen_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_screen_byte); break; case SEQ_TYPE_LIGHTEN: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_lighten_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_lighten_byte); break; case SEQ_TYPE_DODGE: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_dodge_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_dodge_byte); break; case SEQ_TYPE_OVERLAY: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_overlay_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_overlay_byte); break; case SEQ_TYPE_SOFT_LIGHT: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_softlight_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_softlight_byte); break; case SEQ_TYPE_HARD_LIGHT: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_hardlight_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_hardlight_byte); break; case SEQ_TYPE_PIN_LIGHT: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_pinlight_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_pinlight_byte); break; case SEQ_TYPE_LIN_LIGHT: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_linearlight_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_linearlight_byte); break; case SEQ_TYPE_VIVID_LIGHT: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_vividlight_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_vividlight_byte); break; case SEQ_TYPE_BLEND_COLOR: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_color_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_color_byte); break; case SEQ_TYPE_HUE: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_hue_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_hue_byte); break; case SEQ_TYPE_SATURATION: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_saturation_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_saturation_byte); break; case SEQ_TYPE_VALUE: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_luminosity_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_luminosity_byte); break; case SEQ_TYPE_DIFFERENCE: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_difference_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_difference_byte); break; case SEQ_TYPE_EXCLUSION: - apply_blend_function_byte(fac, x, y, rect1, rect2, out, blend_color_exclusion_byte); + apply_blend_function(fac, x, y, rect1, rect2, out, blend_color_exclusion_byte); break; default: break; @@ -1516,58 +1397,38 @@ static void copy_wipe_effect(Sequence *dst, Sequence *src, const int /*flag*/) dst->effectdata = MEM_dupallocN(src->effectdata); } -static void do_wipe_effect_byte(const Sequence *seq, - float fac, - int width, - int height, - const uchar *rect1, - const uchar *rect2, - uchar *out) +template +static void do_wipe_effect( + const Sequence *seq, float fac, int width, int height, const T *rect1, const T *rect2, T *out) { using namespace blender; const WipeVars *wipe = (const WipeVars *)seq->effectdata; const WipeZone wipezone = precalc_wipe_zone(wipe, width, height); threading::parallel_for(IndexRange(height), 64, [&](const IndexRange y_range) { - const uchar *cp1 = rect1 + y_range.first() * width * 4; - const uchar *cp2 = rect2 + y_range.first() * width * 4; - uchar *rt = out + y_range.first() * width * 4; + const T *cp1 = rect1 ? rect1 + y_range.first() * width * 4 : nullptr; + const T *cp2 = rect2 ? rect2 + y_range.first() * width * 4 : nullptr; + T *rt = out + y_range.first() * width * 4; for (const int y : y_range) { for (int x = 0; x < width; x++) { float check = check_zone(&wipezone, x, y, fac); if (check) { if (cp1) { - float rt1[4], rt2[4], tempc[4]; - - straight_uchar_to_premul_float(rt1, cp1); - straight_uchar_to_premul_float(rt2, cp2); - - tempc[0] = rt1[0] * check + rt2[0] * (1 - check); - tempc[1] = rt1[1] * check + rt2[1] * (1 - check); - tempc[2] = rt1[2] * check + rt2[2] * (1 - check); - tempc[3] = rt1[3] * check + rt2[3] * (1 - check); - - premul_float_to_straight_uchar(rt, tempc); + float4 col1 = load_premul_pixel(cp1); + float4 col2 = load_premul_pixel(cp2); + float4 col = col1 * check + col2 * (1.0f - check); + store_premul_pixel(col, rt); } else { - rt[0] = 0; - rt[1] = 0; - rt[2] = 0; - rt[3] = 255; + store_opaque_black_pixel(rt); } } else { if (cp2) { - rt[0] = cp2[0]; - rt[1] = cp2[1]; - rt[2] = cp2[2]; - rt[3] = cp2[3]; + memcpy(rt, cp2, sizeof(T) * 4); } else { - rt[0] = 0; - rt[1] = 0; - rt[2] = 0; - rt[3] = 255; + store_opaque_black_pixel(rt); } } @@ -1583,66 +1444,6 @@ static void do_wipe_effect_byte(const Sequence *seq, }); } -static void do_wipe_effect_float(Sequence *seq, - float fac, - int width, - int height, - const float *rect1, - const float *rect2, - float *out) -{ - using namespace blender; - const WipeVars *wipe = (const WipeVars *)seq->effectdata; - const WipeZone wipezone = precalc_wipe_zone(wipe, width, height); - - threading::parallel_for(IndexRange(height), 64, [&](const IndexRange y_range) { - const float *rt1 = rect1 + y_range.first() * width * 4; - const float *rt2 = rect2 + y_range.first() * width * 4; - float *rt = out + y_range.first() * width * 4; - for (const int y : y_range) { - for (int x = 0; x < width; x++) { - float check = check_zone(&wipezone, x, y, fac); - if (check) { - if (rt1) { - rt[0] = rt1[0] * check + rt2[0] * (1 - check); - rt[1] = rt1[1] * check + rt2[1] * (1 - check); - rt[2] = rt1[2] * check + rt2[2] * (1 - check); - rt[3] = rt1[3] * check + rt2[3] * (1 - check); - } - else { - rt[0] = 0; - rt[1] = 0; - rt[2] = 0; - rt[3] = 1.0; - } - } - else { - if (rt2) { - rt[0] = rt2[0]; - rt[1] = rt2[1]; - rt[2] = rt2[2]; - rt[3] = rt2[3]; - } - else { - rt[0] = 0; - rt[1] = 0; - rt[2] = 0; - rt[3] = 1.0; - } - } - - rt += 4; - if (rt1 != nullptr) { - rt1 += 4; - } - if (rt2 != nullptr) { - rt2 += 4; - } - } - } - }); -} - static ImBuf *do_wipe_effect(const SeqRenderData *context, Sequence *seq, float /*timeline_frame*/, @@ -1654,22 +1455,22 @@ static ImBuf *do_wipe_effect(const SeqRenderData *context, ImBuf *out = prepare_effect_imbufs(context, ibuf1, ibuf2, ibuf3); if (out->float_buffer.data) { - do_wipe_effect_float(seq, - fac, - context->rectx, - context->recty, - ibuf1->float_buffer.data, - ibuf2->float_buffer.data, - out->float_buffer.data); + do_wipe_effect(seq, + fac, + context->rectx, + context->recty, + ibuf1->float_buffer.data, + ibuf2->float_buffer.data, + out->float_buffer.data); } else { - do_wipe_effect_byte(seq, - fac, - context->rectx, - context->recty, - ibuf1->byte_buffer.data, - ibuf2->byte_buffer.data, - out->byte_buffer.data); + do_wipe_effect(seq, + fac, + context->rectx, + context->recty, + ibuf1->byte_buffer.data, + ibuf2->byte_buffer.data, + out->byte_buffer.data); } return out; @@ -1837,12 +1638,8 @@ static void do_transform_effect(const SeqRenderData *context, /** \name Glow Effect * \{ */ -static void glow_blur_bitmap(const blender::float4 *src, - blender::float4 *map, - int width, - int height, - float blur, - int quality) +static void glow_blur_bitmap( + const float4 *src, float4 *map, int width, int height, float blur, int quality) { using namespace blender; @@ -1913,8 +1710,8 @@ static void glow_blur_bitmap(const blender::float4 *src, }); } -static void blur_isolate_highlights(const blender::float4 *in, - blender::float4 *out, +static void blur_isolate_highlights(const float4 *in, + float4 *out, int width, int height, float threshold, @@ -2525,7 +2322,7 @@ static void do_overdrop_effect(const SeqRenderData *context, context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out); do_drop_effect_float(fac, x, y, rect1, rect2, rect_out); - do_alphaover_effect_float(fac, x, y, rect1, rect2, rect_out); + do_alphaover_effect(fac, x, y, rect1, rect2, rect_out); } else { uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr; @@ -2534,7 +2331,7 @@ static void do_overdrop_effect(const SeqRenderData *context, context, ibuf1, ibuf2, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out); do_drop_effect_byte(fac, x, y, rect1, rect2, rect_out); - do_alphaover_effect_byte(fac, x, y, rect1, rect2, rect_out); + do_alphaover_effect(fac, x, y, rect1, rect2, rect_out); } } @@ -2544,14 +2341,6 @@ static void do_overdrop_effect(const SeqRenderData *context, /** \name Gaussian Blur * \{ */ -/* NOTE: This gaussian blur implementation accumulates values in the square - * kernel rather that doing X direction and then Y direction because of the - * lack of using multiple-staged filters. - * - * Once we can we'll implement a way to apply filter as multiple stages we - * can optimize hell of a lot in here. - */ - static void init_gaussian_blur_effect(Sequence *seq) { if (seq->effectdata) { @@ -2585,346 +2374,92 @@ static int early_out_gaussian_blur(Sequence *seq, float /*fac*/) return EARLY_DO_EFFECT; } -/* TODO(sergey): De-duplicate with compositor. */ -static float *make_gaussian_blur_kernel(float rad, int size) +static blender::Array make_gaussian_blur_kernel(float rad, int size) { - float *gausstab, sum, val; - float fac; - int i, n; + int n = 2 * size + 1; + blender::Array gausstab(n); - n = 2 * size + 1; - - gausstab = (float *)MEM_mallocN(sizeof(float) * n, __func__); - - sum = 0.0f; - fac = (rad > 0.0f ? 1.0f / rad : 0.0f); - for (i = -size; i <= size; i++) { - val = RE_filter_value(R_FILTER_GAUSS, float(i) * fac); + float sum = 0.0f; + float fac = (rad > 0.0f ? 1.0f / rad : 0.0f); + for (int i = -size; i <= size; i++) { + float val = RE_filter_value(R_FILTER_GAUSS, float(i) * fac); sum += val; gausstab[i + size] = val; } - sum = 1.0f / sum; - for (i = 0; i < n; i++) { - gausstab[i] *= sum; + float inv_sum = 1.0f / sum; + for (int i = 0; i < n; i++) { + gausstab[i] *= inv_sum; } return gausstab; } -static void do_gaussian_blur_effect_byte_x(Sequence *seq, - int start_line, - int x, - int y, - int frame_width, - int /*frame_height*/, - const uchar *rect, - uchar *out) +template +static void gaussian_blur_x(const blender::Array &gausstab, + int half_size, + int start_line, + int width, + int height, + int /*frame_height*/, + const T *rect, + T *dst) { -#define INDEX(_x, _y) (((_y) * (x) + (_x)) * 4) - GaussianBlurVars *data = static_cast(seq->effectdata); - const int size_x = int(data->size_x + 0.5f); - int i, j; - - /* Make gaussian weight table. */ - float *gausstab_x; - gausstab_x = make_gaussian_blur_kernel(data->size_x, size_x); - - for (i = 0; i < y; i++) { - for (j = 0; j < x; j++) { - int out_index = INDEX(j, i); - float accum[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + dst += start_line * width * 4; + for (int y = start_line; y < start_line + height; y++) { + for (int x = 0; x < width; x++) { + float4 accum(0.0f); float accum_weight = 0.0f; - for (int current_x = j - size_x; current_x <= j + size_x; current_x++) { - if (current_x < 0 || current_x >= frame_width) { - /* Out of bounds. */ - continue; - } - int index = INDEX(current_x, i + start_line); - float weight = gausstab_x[current_x - j + size_x]; - accum[0] += rect[index] * weight; - accum[1] += rect[index + 1] * weight; - accum[2] += rect[index + 2] * weight; - accum[3] += rect[index + 3] * weight; + int xmin = blender::math::max(x - half_size, 0); + int xmax = blender::math::min(x + half_size, width - 1); + for (int nx = xmin, index = (xmin - x) + half_size; nx <= xmax; nx++, index++) { + float weight = gausstab[index]; + int offset = (y * width + nx) * 4; + accum += float4(rect + offset) * weight; accum_weight += weight; } - - float inv_accum_weight = 1.0f / accum_weight; - out[out_index + 0] = accum[0] * inv_accum_weight; - out[out_index + 1] = accum[1] * inv_accum_weight; - out[out_index + 2] = accum[2] * inv_accum_weight; - out[out_index + 3] = accum[3] * inv_accum_weight; + accum *= (1.0f / accum_weight); + dst[0] = accum[0]; + dst[1] = accum[1]; + dst[2] = accum[2]; + dst[3] = accum[3]; + dst += 4; } } - - MEM_freeN(gausstab_x); -#undef INDEX } -static void do_gaussian_blur_effect_byte_y(Sequence *seq, - int start_line, - int x, - int y, - int /*frame_width*/, - int frame_height, - const uchar *rect, - uchar *out) +template +static void gaussian_blur_y(const blender::Array &gausstab, + int half_size, + int start_line, + int width, + int height, + int frame_height, + const T *rect, + T *dst) { -#define INDEX(_x, _y) (((_y) * (x) + (_x)) * 4) - GaussianBlurVars *data = static_cast(seq->effectdata); - const int size_y = int(data->size_y + 0.5f); - int i, j; - - /* Make gaussian weight table. */ - float *gausstab_y; - gausstab_y = make_gaussian_blur_kernel(data->size_y, size_y); - - for (i = 0; i < y; i++) { - for (j = 0; j < x; j++) { - int out_index = INDEX(j, i); - float accum[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + dst += start_line * width * 4; + for (int y = start_line; y < start_line + height; y++) { + for (int x = 0; x < width; x++) { + float4 accum(0.0f); float accum_weight = 0.0f; - for (int current_y = i - size_y; current_y <= i + size_y; current_y++) { - if (current_y < -start_line || current_y + start_line >= frame_height) { - /* Out of bounds. */ - continue; - } - int index = INDEX(j, current_y + start_line); - float weight = gausstab_y[current_y - i + size_y]; - accum[0] += rect[index] * weight; - accum[1] += rect[index + 1] * weight; - accum[2] += rect[index + 2] * weight; - accum[3] += rect[index + 3] * weight; + int ymin = blender::math::max(y - half_size, 0); + int ymax = blender::math::min(y + half_size, frame_height - 1); + for (int ny = ymin, index = (ymin - y) + half_size; ny <= ymax; ny++, index++) { + float weight = gausstab[index]; + int offset = (ny * width + x) * 4; + accum += float4(rect + offset) * weight; accum_weight += weight; } - float inv_accum_weight = 1.0f / accum_weight; - out[out_index + 0] = accum[0] * inv_accum_weight; - out[out_index + 1] = accum[1] * inv_accum_weight; - out[out_index + 2] = accum[2] * inv_accum_weight; - out[out_index + 3] = accum[3] * inv_accum_weight; + accum *= (1.0f / accum_weight); + dst[0] = accum[0]; + dst[1] = accum[1]; + dst[2] = accum[2]; + dst[3] = accum[3]; + dst += 4; } } - - MEM_freeN(gausstab_y); -#undef INDEX -} - -static void do_gaussian_blur_effect_float_x(Sequence *seq, - int start_line, - int x, - int y, - int frame_width, - int /*frame_height*/, - float *rect, - float *out) -{ -#define INDEX(_x, _y) (((_y) * (x) + (_x)) * 4) - GaussianBlurVars *data = static_cast(seq->effectdata); - const int size_x = int(data->size_x + 0.5f); - int i, j; - - /* Make gaussian weight table. */ - float *gausstab_x; - gausstab_x = make_gaussian_blur_kernel(data->size_x, size_x); - - for (i = 0; i < y; i++) { - for (j = 0; j < x; j++) { - int out_index = INDEX(j, i); - float accum[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float accum_weight = 0.0f; - for (int current_x = j - size_x; current_x <= j + size_x; current_x++) { - if (current_x < 0 || current_x >= frame_width) { - /* Out of bounds. */ - continue; - } - int index = INDEX(current_x, i + start_line); - float weight = gausstab_x[current_x - j + size_x]; - madd_v4_v4fl(accum, &rect[index], weight); - accum_weight += weight; - } - mul_v4_v4fl(&out[out_index], accum, 1.0f / accum_weight); - } - } - - MEM_freeN(gausstab_x); -#undef INDEX -} - -static void do_gaussian_blur_effect_float_y(Sequence *seq, - int start_line, - int x, - int y, - int /*frame_width*/, - int frame_height, - float *rect, - float *out) -{ -#define INDEX(_x, _y) (((_y) * (x) + (_x)) * 4) - GaussianBlurVars *data = static_cast(seq->effectdata); - const int size_y = int(data->size_y + 0.5f); - int i, j; - - /* Make gaussian weight table. */ - float *gausstab_y; - gausstab_y = make_gaussian_blur_kernel(data->size_y, size_y); - - for (i = 0; i < y; i++) { - for (j = 0; j < x; j++) { - int out_index = INDEX(j, i); - float accum[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float accum_weight = 0.0f; - for (int current_y = i - size_y; current_y <= i + size_y; current_y++) { - if (current_y < -start_line || current_y + start_line >= frame_height) { - /* Out of bounds. */ - continue; - } - int index = INDEX(j, current_y + start_line); - float weight = gausstab_y[current_y - i + size_y]; - madd_v4_v4fl(accum, &rect[index], weight); - accum_weight += weight; - } - mul_v4_v4fl(&out[out_index], accum, 1.0f / accum_weight); - } - } - - MEM_freeN(gausstab_y); -#undef INDEX -} - -static void do_gaussian_blur_effect_x_cb(const SeqRenderData *context, - Sequence *seq, - ImBuf *ibuf, - int start_line, - int total_lines, - ImBuf *out) -{ - if (out->float_buffer.data) { - float *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr; - - slice_get_float_buffers( - context, ibuf, nullptr, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out); - - do_gaussian_blur_effect_float_x(seq, - start_line, - context->rectx, - total_lines, - context->rectx, - context->recty, - ibuf->float_buffer.data, - rect_out); - } - else { - uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr; - - slice_get_byte_buffers( - context, ibuf, nullptr, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out); - - do_gaussian_blur_effect_byte_x(seq, - start_line, - context->rectx, - total_lines, - context->rectx, - context->recty, - ibuf->byte_buffer.data, - rect_out); - } -} - -static void do_gaussian_blur_effect_y_cb(const SeqRenderData *context, - Sequence *seq, - ImBuf *ibuf, - int start_line, - int total_lines, - ImBuf *out) -{ - if (out->float_buffer.data) { - float *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr; - - slice_get_float_buffers( - context, ibuf, nullptr, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out); - - do_gaussian_blur_effect_float_y(seq, - start_line, - context->rectx, - total_lines, - context->rectx, - context->recty, - ibuf->float_buffer.data, - rect_out); - } - else { - uchar *rect1 = nullptr, *rect2 = nullptr, *rect_out = nullptr; - - slice_get_byte_buffers( - context, ibuf, nullptr, nullptr, out, start_line, &rect1, &rect2, nullptr, &rect_out); - - do_gaussian_blur_effect_byte_y(seq, - start_line, - context->rectx, - total_lines, - context->rectx, - context->recty, - ibuf->byte_buffer.data, - rect_out); - } -} - -struct RenderGaussianBlurEffectInitData { - const SeqRenderData *context; - Sequence *seq; - ImBuf *ibuf; - ImBuf *out; -}; - -struct RenderGaussianBlurEffectThread { - const SeqRenderData *context; - Sequence *seq; - ImBuf *ibuf; - ImBuf *out; - int start_line, tot_line; -}; - -static void render_effect_execute_init_handle(void *handle_v, - int start_line, - int tot_line, - void *init_data_v) -{ - RenderGaussianBlurEffectThread *handle = (RenderGaussianBlurEffectThread *)handle_v; - RenderGaussianBlurEffectInitData *init_data = (RenderGaussianBlurEffectInitData *)init_data_v; - - handle->context = init_data->context; - handle->seq = init_data->seq; - handle->ibuf = init_data->ibuf; - handle->out = init_data->out; - - handle->start_line = start_line; - handle->tot_line = tot_line; -} - -static void *render_effect_execute_do_x_thread(void *thread_data_v) -{ - RenderGaussianBlurEffectThread *thread_data = (RenderGaussianBlurEffectThread *)thread_data_v; - do_gaussian_blur_effect_x_cb(thread_data->context, - thread_data->seq, - thread_data->ibuf, - thread_data->start_line, - thread_data->tot_line, - thread_data->out); - return nullptr; -} - -static void *render_effect_execute_do_y_thread(void *thread_data_v) -{ - RenderGaussianBlurEffectThread *thread_data = (RenderGaussianBlurEffectThread *)thread_data_v; - do_gaussian_blur_effect_y_cb(thread_data->context, - thread_data->seq, - thread_data->ibuf, - thread_data->start_line, - thread_data->tot_line, - thread_data->out); - - return nullptr; } static ImBuf *do_gaussian_blur_effect(const SeqRenderData *context, @@ -2935,32 +2470,75 @@ static ImBuf *do_gaussian_blur_effect(const SeqRenderData *context, ImBuf * /*ibuf2*/, ImBuf * /*ibuf3*/) { + using namespace blender; + + /* Create blur kernel weights. */ + const GaussianBlurVars *data = static_cast(seq->effectdata); + const int half_size_x = int(data->size_x + 0.5f); + const int half_size_y = int(data->size_y + 0.5f); + Array gausstab_x = make_gaussian_blur_kernel(data->size_x, half_size_x); + Array gausstab_y = make_gaussian_blur_kernel(data->size_y, half_size_y); + + const int width = context->rectx; + const int height = context->recty; + const bool is_float = ibuf1->float_buffer.data; + + /* Horizontal blur: create output, blur ibuf1 into it. */ ImBuf *out = prepare_effect_imbufs(context, ibuf1, nullptr, nullptr); + threading::parallel_for(IndexRange(context->recty), 32, [&](const IndexRange y_range) { + const int y_first = y_range.first(); + const int y_size = y_range.size(); + if (is_float) { + gaussian_blur_x(gausstab_x, + half_size_x, + y_first, + width, + y_size, + height, + ibuf1->float_buffer.data, + out->float_buffer.data); + } + else { + gaussian_blur_x(gausstab_x, + half_size_x, + y_first, + width, + y_size, + height, + ibuf1->byte_buffer.data, + out->byte_buffer.data); + } + }); - RenderGaussianBlurEffectInitData init_data; - - init_data.context = context; - init_data.seq = seq; - init_data.ibuf = ibuf1; - init_data.out = out; - - IMB_processor_apply_threaded(out->y, - sizeof(RenderGaussianBlurEffectThread), - &init_data, - render_effect_execute_init_handle, - render_effect_execute_do_x_thread); - + /* Vertical blur: create output, blur previous output into it. */ ibuf1 = out; - init_data.ibuf = ibuf1; out = prepare_effect_imbufs(context, ibuf1, nullptr, nullptr); - init_data.out = out; - - IMB_processor_apply_threaded(out->y, - sizeof(RenderGaussianBlurEffectThread), - &init_data, - render_effect_execute_init_handle, - render_effect_execute_do_y_thread); + threading::parallel_for(IndexRange(context->recty), 32, [&](const IndexRange y_range) { + const int y_first = y_range.first(); + const int y_size = y_range.size(); + if (is_float) { + gaussian_blur_y(gausstab_y, + half_size_y, + y_first, + width, + y_size, + height, + ibuf1->float_buffer.data, + out->float_buffer.data); + } + else { + gaussian_blur_y(gausstab_y, + half_size_y, + y_first, + width, + y_size, + height, + ibuf1->byte_buffer.data, + out->byte_buffer.data); + } + }); + /* Free the first output. */ IMB_freeImBuf(ibuf1); return out;