diff --git a/source/blender/blenkernel/intern/tracking_stabilize.cc b/source/blender/blenkernel/intern/tracking_stabilize.cc index d41d5ce9b96..bf99d21b8f1 100644 --- a/source/blender/blenkernel/intern/tracking_stabilize.cc +++ b/source/blender/blenkernel/intern/tracking_stabilize.cc @@ -1316,7 +1316,7 @@ static void tracking_stabilize_frame_interpolation_cb(void *__restrict userdata, for (int x = 0; x < tmpibuf->x; x++, dst++) { vec[0] = float(x); mul_v3_m4v3(rvec, mat, vec); - *dst = imbuf::interpolate_bilinear_fl(ibuf, rvec[0], rvec[1]); + *dst = imbuf::interpolate_bilinear_border_fl(ibuf, rvec[0], rvec[1]); } } else if (data->tracking_filter == TRACKING_FILTER_BICUBIC) { @@ -1342,7 +1342,7 @@ static void tracking_stabilize_frame_interpolation_cb(void *__restrict userdata, for (int x = 0; x < tmpibuf->x; x++, dst++) { vec[0] = float(x); mul_v3_m4v3(rvec, mat, vec); - *dst = imbuf::interpolate_bilinear_byte(ibuf, rvec[0], rvec[1]); + *dst = imbuf::interpolate_bilinear_border_byte(ibuf, rvec[0], rvec[1]); } } else if (data->tracking_filter == TRACKING_FILTER_BICUBIC) { diff --git a/source/blender/blenlib/BLI_math_interp.hh b/source/blender/blenlib/BLI_math_interp.hh index ca60b45ff16..2354acfec40 100644 --- a/source/blender/blenlib/BLI_math_interp.hh +++ b/source/blender/blenlib/BLI_math_interp.hh @@ -142,7 +142,7 @@ inline void interpolate_nearest_wrap_fl( } /** - * Bilinear sampling. + * Bilinear sampling (with black border). * * Takes four image samples at floor(u,v) and floor(u,v)+1, and blends them * based on fractional parts of u,v. Samples outside the image are turned @@ -152,6 +152,26 @@ inline void interpolate_nearest_wrap_fl( * to get proper filtering. */ +[[nodiscard]] uchar4 interpolate_bilinear_border_byte( + const uchar *buffer, int width, int height, float u, float v); + +[[nodiscard]] float4 interpolate_bilinear_border_fl( + const float *buffer, int width, int height, float u, float v); + +void interpolate_bilinear_border_fl( + const float *buffer, float *output, int width, int height, int components, float u, float v); + +/** + * Bilinear sampling. + * + * Takes four image samples at floor(u,v) and floor(u,v)+1, and blends them + * based on fractional parts of u,v. + * Samples outside the image are clamped to texels at image edge. + * + * Note that you probably want to subtract 0.5 from u,v before this function, + * to get proper filtering. + */ + [[nodiscard]] uchar4 interpolate_bilinear_byte( const uchar *buffer, int width, int height, float u, float v); diff --git a/source/blender/blenlib/BLI_simd.h b/source/blender/blenlib/BLI_simd.h index 7ebc8cf4a3d..3d348b35e88 100644 --- a/source/blender/blenlib/BLI_simd.h +++ b/source/blender/blenlib/BLI_simd.h @@ -30,3 +30,9 @@ #else # define BLI_HAVE_SSE2 0 #endif + +#if defined(__SSE4_1__) || (defined(__ARM_NEON) && defined(WITH_SSE2NEON)) +# define BLI_HAVE_SSE4 1 +#else +# define BLI_HAVE_SSE4 0 +#endif diff --git a/source/blender/blenlib/intern/math_interp.cc b/source/blender/blenlib/intern/math_interp.cc index c1784e98ee2..ac2fa83f3b8 100644 --- a/source/blender/blenlib/intern/math_interp.cc +++ b/source/blender/blenlib/intern/math_interp.cc @@ -17,6 +17,8 @@ #include "BLI_simd.h" #include "BLI_strict_flags.h" +namespace blender::math { + enum class eCubicFilter { BSpline, Mitchell, @@ -24,7 +26,7 @@ enum class eCubicFilter { /* Calculate cubic filter coefficients, for samples at -1,0,+1,+2. * f is 0..1 offset from texel center in pixel space. */ -template static blender::float4 cubic_filter_coefficients(float f) +template static float4 cubic_filter_coefficients(float f) { float f2 = f * f; float f3 = f2 * f; @@ -35,7 +37,7 @@ template static blender::float4 cubic_filter_coefficie float w0 = -w3 + f2 * 0.5f - f * 0.5f + 1.0f / 6.0f; float w1 = f3 * 0.5f - f2 * 1.0f + 2.0f / 3.0f; float w2 = 1.0f - w0 - w1 - w3; - return blender::float4(w0, w1, w2, w3); + return float4(w0, w1, w2, w3); } else if constexpr (filter == eCubicFilter::Mitchell) { /* Cubic Mitchell-Netravali filter with B=1/3, C=1/3 parameters. */ @@ -43,7 +45,7 @@ template static blender::float4 cubic_filter_coefficie float w1 = 7.0f / 6.0f * f3 - 2.0f * f2 + 8.0f / 9.0f; float w2 = -7.0f / 6.0f * f3 + 3.0f / 2.0f * f2 + 0.5f * f + 1.0f / 18.0f; float w3 = 7.0f / 18.0f * f3 - 1.0f / 3.0f * f2; - return blender::float4(w0, w1, w2, w3); + return float4(w0, w1, w2, w3); } } @@ -54,13 +56,11 @@ template static blender::float4 cubic_filter_coefficie BLI_INLINE __m128 floor_simd(__m128 v) { -# if defined(__SSE4_1__) || defined(__ARM_NEON) && defined(WITH_SSE2NEON) - /* If we're on SSE4 or ARM NEON, just use the simple floor() way. */ +# if BLI_HAVE_SSE4 __m128 v_floor = _mm_floor_ps(v); # else - /* The hard way: truncate, for negative inputs this will round towards zero. - * Then compare with input, and subtract 1 for the inputs that were - * negative. */ + /* Truncate, for negative inputs this will round towards zero. Then compare + * with input, and subtract 1 for the inputs that were negative. */ __m128 v_trunc = _mm_cvtepi32_ps(_mm_cvttps_epi32(v)); __m128 v_neg = _mm_cmplt_ps(v, v_trunc); __m128 v_floor = _mm_sub_ps(v_trunc, _mm_and_ps(v_neg, _mm_set1_ps(1.0f))); @@ -68,6 +68,30 @@ BLI_INLINE __m128 floor_simd(__m128 v) return v_floor; } +BLI_INLINE __m128i min_i_simd(__m128i a, __m128i b) +{ +# if BLI_HAVE_SSE4 + return _mm_min_epi32(a, b); +# else + __m128i cmp = _mm_cmplt_epi32(a, b); + a = _mm_and_si128(cmp, a); + b = _mm_andnot_si128(cmp, b); + return _mm_or_si128(a, b); +# endif +} + +BLI_INLINE __m128i max_i_simd(__m128i a, __m128i b) +{ +# if BLI_HAVE_SSE4 + return _mm_max_epi32(a, b); +# else + __m128i cmp = _mm_cmplt_epi32(b, a); + a = _mm_and_si128(cmp, a); + b = _mm_andnot_si128(cmp, b); + return _mm_or_si128(a, b); +# endif +} + template BLI_INLINE void bicubic_interpolation_uchar_simd( const uchar *src_buffer, uchar *output, int width, int height, float u, float v) @@ -90,8 +114,8 @@ BLI_INLINE void bicubic_interpolation_uchar_simd( __m128 frac_uv = _mm_sub_ps(uv, uv_floor); /* Calculate pixel weights. */ - blender::float4 wx = cubic_filter_coefficients(_mm_cvtss_f32(frac_uv)); - blender::float4 wy = cubic_filter_coefficients( + float4 wx = cubic_filter_coefficients(_mm_cvtss_f32(frac_uv)); + float4 wy = cubic_filter_coefficients( _mm_cvtss_f32(_mm_shuffle_ps(frac_uv, frac_uv, 1))); /* Read 4x4 source pixels and blend them. */ @@ -134,8 +158,6 @@ template static void bicubic_interpolation( const T *src_buffer, T *output, int width, int height, int components, float u, float v) { - using namespace blender; - BLI_assert(src_buffer && output); #if BLI_HAVE_SSE2 @@ -234,6 +256,7 @@ static void bicubic_interpolation( } } +template BLI_INLINE void bilinear_fl_impl(const float *buffer, float *output, int width, @@ -288,33 +311,23 @@ BLI_INLINE void bilinear_fl_impl(const float *buffer, return; } - /* Sample including outside of edges of image. */ - if (x1 < 0 || y1 < 0) { - row1 = empty; + /* Sample locations. */ + if constexpr (border) { + row1 = (x1 < 0 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x1) * components; + row2 = (x1 < 0 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x1) * components; + row3 = (x2 > width - 1 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x2) * components; + row4 = (x2 > width - 1 || y2 > height - 1) ? empty : + buffer + (int64_t(width) * y2 + x2) * components; } else { - row1 = buffer + width * y1 * components + components * x1; - } - - if (x1 < 0 || y2 > height - 1) { - row2 = empty; - } - else { - row2 = buffer + width * y2 * components + components * x1; - } - - if (x2 > width - 1 || y1 < 0) { - row3 = empty; - } - else { - row3 = buffer + width * y1 * components + components * x2; - } - - if (x2 > width - 1 || y2 > height - 1) { - row4 = empty; - } - else { - row4 = buffer + width * y2 * components + components * x2; + x1 = blender::math::clamp(x1, 0, width - 1); + x2 = blender::math::clamp(x2, 0, width - 1); + y1 = blender::math::clamp(y1, 0, height - 1); + y2 = blender::math::clamp(y2, 0, height - 1); + row1 = buffer + (int64_t(width) * y1 + x1) * components; + row2 = buffer + (int64_t(width) * y2 + x1) * components; + row3 = buffer + (int64_t(width) * y1 + x2) * components; + row4 = buffer + (int64_t(width) * y2 + x2) * components; } a = u - uf; @@ -355,23 +368,13 @@ BLI_INLINE void bilinear_fl_impl(const float *buffer, } } -namespace blender::math { - -uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, float u, float v) +template +BLI_INLINE uchar4 bilinear_byte_impl(const uchar *buffer, int width, int height, float u, float v) { BLI_assert(buffer); uchar4 res; #if BLI_HAVE_SSE2 - /* Bilinear interpolation needs to read and blend four image pixels, while - * also handling conditions of sample coordinate being outside of the - * image, in which case black (all zeroes) should be used as the sample - * contribution. - * - * Code below does all that without any branches, by making outside the - * image sample locations still read the first pixel of the image, but - * later making sure that the result is set to zero for that sample. */ - __m128 uvuv = _mm_set_ps(v, u, v, u); __m128 uvuv_floor = floor_simd(uvuv); @@ -380,18 +383,42 @@ uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, flo /* Check whether any of the coordinates are outside of the image. */ __m128i size_minus_1 = _mm_sub_epi32(_mm_set_epi32(height, width, height, width), _mm_set1_epi32(1)); - __m128i too_lo_xy12 = _mm_cmplt_epi32(xy12, _mm_setzero_si128()); - __m128i too_hi_xy12 = _mm_cmplt_epi32(size_minus_1, xy12); - __m128i invalid_xy12 = _mm_or_si128(too_lo_xy12, too_hi_xy12); - /* Samples 1,2,3,4 are in this order: x1y1, x1y2, x2y1, x2y2 */ - __m128i x1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(2, 2, 0, 0)); - __m128i y1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(3, 1, 3, 1)); - __m128i invalid_1234 = _mm_or_si128(_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(2, 2, 0, 0)), - _mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(3, 1, 3, 1))); - /* Set x & y to zero for invalid samples. */ - x1234 = _mm_andnot_si128(invalid_1234, x1234); - y1234 = _mm_andnot_si128(invalid_1234, y1234); + /* Samples 1,2,3,4 will be in this order: x1y1, x1y2, x2y1, x2y2. */ + __m128i x1234, y1234, invalid_1234; + + if constexpr (border) { + /* Blend black colors for samples right outside the image: figure out + * which of the 4 samples were outside, set their coordinates to zero + * and later on put black color into their place. */ + __m128i too_lo_xy12 = _mm_cmplt_epi32(xy12, _mm_setzero_si128()); + __m128i too_hi_xy12 = _mm_cmplt_epi32(size_minus_1, xy12); + __m128i invalid_xy12 = _mm_or_si128(too_lo_xy12, too_hi_xy12); + + /* Samples 1,2,3,4 are in this order: x1y1, x1y2, x2y1, x2y2 */ + x1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(2, 2, 0, 0)); + y1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(3, 1, 3, 1)); + invalid_1234 = _mm_or_si128(_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(2, 2, 0, 0)), + _mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(3, 1, 3, 1))); + /* Set x & y to zero for invalid samples. */ + x1234 = _mm_andnot_si128(invalid_1234, x1234); + y1234 = _mm_andnot_si128(invalid_1234, y1234); + } + else { + /* Clamp samples to image edges, unless all four of them are outside + * in which case return black. */ + __m128i xy12_clamped = max_i_simd(xy12, _mm_setzero_si128()); + xy12_clamped = min_i_simd(xy12_clamped, size_minus_1); + __m128i valid_xy12 = _mm_cmpeq_epi32(xy12, xy12_clamped); + __m128i valid_pairs = _mm_and_si128(valid_xy12, + _mm_shuffle_epi32(valid_xy12, _MM_SHUFFLE(0, 3, 2, 1))); + if (_mm_movemask_ps(_mm_castsi128_ps(valid_pairs)) == 0) { + return uchar4(0); + } + + x1234 = _mm_shuffle_epi32(xy12_clamped, _MM_SHUFFLE(2, 2, 0, 0)); + y1234 = _mm_shuffle_epi32(xy12_clamped, _MM_SHUFFLE(3, 1, 3, 1)); + } /* Read the four sample values. Do address calculations in C, since SSE * before 4.1 makes it very cumbersome to do full integer multiplies. */ @@ -404,8 +431,10 @@ uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, flo int sample3 = ((const int *)buffer)[ycoord[2] * int64_t(width) + xcoord[2]]; int sample4 = ((const int *)buffer)[ycoord[3] * int64_t(width) + xcoord[3]]; __m128i samples1234 = _mm_set_epi32(sample4, sample3, sample2, sample1); - /* Set samples to black for the ones that were actually invalid. */ - samples1234 = _mm_andnot_si128(invalid_1234, samples1234); + if constexpr (border) { + /* Set samples to black for the ones that were actually invalid. */ + samples1234 = _mm_andnot_si128(invalid_1234, samples1234); + } /* Expand samples from packed 8-bit RGBA to full floats: * spread to 16 bit values. */ @@ -455,35 +484,24 @@ uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, flo return uchar4(0); } - /* Sample including outside of edges of image. */ + /* Sample locations. */ const uchar *row1, *row2, *row3, *row4; uchar empty[4] = {0, 0, 0, 0}; - if (x1 < 0 || y1 < 0) { - row1 = empty; + if constexpr (border) { + row1 = (x1 < 0 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x1) * 4; + row2 = (x1 < 0 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x1) * 4; + row3 = (x2 > width - 1 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x2) * 4; + row4 = (x2 > width - 1 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x2) * 4; } else { - row1 = buffer + width * y1 * 4 + 4 * x1; - } - - if (x1 < 0 || y2 > height - 1) { - row2 = empty; - } - else { - row2 = buffer + width * y2 * 4 + 4 * x1; - } - - if (x2 > width - 1 || y1 < 0) { - row3 = empty; - } - else { - row3 = buffer + width * y1 * 4 + 4 * x2; - } - - if (x2 > width - 1 || y2 > height - 1) { - row4 = empty; - } - else { - row4 = buffer + width * y2 * 4 + 4 * x2; + x1 = blender::math::clamp(x1, 0, width - 1); + x2 = blender::math::clamp(x2, 0, width - 1); + y1 = blender::math::clamp(y1, 0, height - 1); + y2 = blender::math::clamp(y2, 0, height - 1); + row1 = buffer + (int64_t(width) * y1 + x1) * 4; + row2 = buffer + (int64_t(width) * y2 + x1) * 4; + row3 = buffer + (int64_t(width) * y1 + x2) * 4; + row4 = buffer + (int64_t(width) * y2 + x2) * 4; } float a = u - uf; @@ -502,17 +520,41 @@ uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, flo return res; } +uchar4 interpolate_bilinear_border_byte( + const uchar *buffer, int width, int height, float u, float v) +{ + return bilinear_byte_impl(buffer, width, height, u, v); +} + +uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, float u, float v) +{ + return bilinear_byte_impl(buffer, width, height, u, v); +} + +float4 interpolate_bilinear_border_fl(const float *buffer, int width, int height, float u, float v) +{ + float4 res; + bilinear_fl_impl(buffer, res, width, height, 4, u, v); + return res; +} + +void interpolate_bilinear_border_fl( + const float *buffer, float *output, int width, int height, int components, float u, float v) +{ + bilinear_fl_impl(buffer, output, width, height, components, u, v); +} + float4 interpolate_bilinear_fl(const float *buffer, int width, int height, float u, float v) { float4 res; - bilinear_fl_impl(buffer, res, width, height, 4, u, v); + bilinear_fl_impl(buffer, res, width, height, 4, u, v); return res; } void interpolate_bilinear_fl( const float *buffer, float *output, int width, int height, int components, float u, float v) { - bilinear_fl_impl(buffer, output, width, height, components, u, v); + bilinear_fl_impl(buffer, output, width, height, components, u, v); } void interpolate_bilinear_wrap_fl(const float *buffer, @@ -525,7 +567,7 @@ void interpolate_bilinear_wrap_fl(const float *buffer, bool wrap_x, bool wrap_y) { - bilinear_fl_impl(buffer, output, width, height, components, u, v, wrap_x, wrap_y); + bilinear_fl_impl(buffer, output, width, height, components, u, v, wrap_x, wrap_y); } uchar4 interpolate_bilinear_wrap_byte(const uchar *buffer, int width, int height, float u, float v) @@ -573,7 +615,7 @@ uchar4 interpolate_bilinear_wrap_byte(const uchar *buffer, int width, int height float4 interpolate_bilinear_wrap_fl(const float *buffer, int width, int height, float u, float v) { float4 res; - bilinear_fl_impl(buffer, res, width, height, 4, u, v, true, true); + bilinear_fl_impl(buffer, res, width, height, 4, u, v, true, true); return res; } diff --git a/source/blender/blenlib/tests/BLI_math_interp_test.cc b/source/blender/blenlib/tests/BLI_math_interp_test.cc index 07a68f118b5..0ce66170467 100644 --- a/source/blender/blenlib/tests/BLI_math_interp_test.cc +++ b/source/blender/blenlib/tests/BLI_math_interp_test.cc @@ -28,10 +28,10 @@ TEST(math_interp, BilinearCharExactSamples) { uchar4 res; uchar4 exp1 = {73, 108, 153, 251}; - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 1.0f, 2.0f); + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 1.0f, 2.0f); EXPECT_EQ(exp1, res); uchar4 exp2 = {240, 160, 90, 20}; - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 2.0f, 0.0f); + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 2.0f, 0.0f); EXPECT_EQ(exp2, res); } @@ -39,10 +39,10 @@ TEST(math_interp, BilinearCharHalfwayUSamples) { uchar4 res; uchar4 exp1 = {31, 37, 42, 48}; - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0.5f, 1.0f); + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0.5f, 1.0f); EXPECT_EQ(exp1, res); uchar4 exp2 = {243, 242, 224, 223}; - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0.5f, 0.0f); + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0.5f, 0.0f); EXPECT_EQ(exp2, res); } @@ -50,10 +50,10 @@ TEST(math_interp, BilinearCharHalfwayVSamples) { uchar4 res; uchar4 exp1 = {1, 2, 3, 4}; - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0.0f, 1.5f); + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0.0f, 1.5f); EXPECT_EQ(exp1, res); uchar4 exp2 = {127, 128, 129, 130}; - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 2.0f, 1.5f); + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 2.0f, 1.5f); EXPECT_EQ(exp2, res); } @@ -61,10 +61,11 @@ TEST(math_interp, BilinearCharSamples) { uchar4 res; uchar4 exp1 = {136, 133, 132, 130}; - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 1.25f, 0.625f); + res = interpolate_bilinear_border_byte( + image_char[0][0], image_width, image_height, 1.25f, 0.625f); EXPECT_EQ(exp1, res); uchar4 exp2 = {219, 191, 167, 142}; - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 1.4f, 0.1f); + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 1.4f, 0.1f); EXPECT_EQ(exp2, res); } @@ -72,25 +73,39 @@ TEST(math_interp, BilinearFloatSamples) { float4 res; float4 exp1 = {135.9375f, 133.28125f, 131.5625f, 129.84375f}; - res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 1.25f, 0.625f); + res = interpolate_bilinear_border_fl(image_fl[0][0], image_width, image_height, 1.25f, 0.625f); EXPECT_V4_NEAR(exp1, res, float_tolerance); float4 exp2 = {219.36f, 191.2f, 166.64f, 142.08f}; - res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 1.4f, 0.1f); + res = interpolate_bilinear_border_fl(image_fl[0][0], image_width, image_height, 1.4f, 0.1f); EXPECT_V4_NEAR(exp2, res, float_tolerance); } +TEST(math_interp, BilinearCharPartiallyOutsideImageBorder) +{ + uchar4 res; + uchar4 exp1 = {1, 1, 2, 2}; + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, -0.5f, 2.0f); + EXPECT_EQ(exp1, res); + uchar4 exp2 = {9, 11, 15, 22}; + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 1.25f, 2.9f); + EXPECT_EQ(exp2, res); + uchar4 exp3 = {173, 115, 65, 14}; + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 2.2f, -0.1f); + EXPECT_EQ(exp3, res); +} + TEST(math_interp, BilinearCharPartiallyOutsideImage) { uchar4 res; - uchar4 exp1 = {1, 1, 2, 2}; + uint4 exp1 = {1, 2, 3, 4}; res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, -0.5f, 2.0f); - EXPECT_EQ(exp1, res); - uchar4 exp2 = {9, 11, 15, 22}; + EXPECT_EQ(exp1, uint4(res)); + uint4 exp2 = {87, 113, 147, 221}; res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 1.25f, 2.9f); - EXPECT_EQ(exp2, res); - uchar4 exp3 = {173, 115, 65, 14}; + EXPECT_EQ(exp2, uint4(res)); + uint4 exp3 = {240, 160, 90, 20}; res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 2.2f, -0.1f); - EXPECT_EQ(exp3, res); + EXPECT_EQ(exp3, uint4(res)); } TEST(math_interp, BilinearCharPartiallyOutsideImageWrap) @@ -107,16 +122,30 @@ TEST(math_interp, BilinearCharPartiallyOutsideImageWrap) EXPECT_EQ(exp3, res); } -TEST(math_interp, BilinearFloatPartiallyOutsideImage) +TEST(math_interp, BilinearFloatPartiallyOutsideImageBorder) { float4 res; float4 exp1 = {0.5f, 1, 1.5f, 2}; - res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, -0.5f, 2.0f); + res = interpolate_bilinear_border_fl(image_fl[0][0], image_width, image_height, -0.5f, 2.0f); EXPECT_V4_NEAR(exp1, res, float_tolerance); float4 exp2 = {8.675f, 11.325f, 14.725f, 22.1f}; - res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 1.25f, 2.9f); + res = interpolate_bilinear_border_fl(image_fl[0][0], image_width, image_height, 1.25f, 2.9f); EXPECT_V4_NEAR(exp2, res, float_tolerance); float4 exp3 = {172.8f, 115.2f, 64.8f, 14.4f}; + res = interpolate_bilinear_border_fl(image_fl[0][0], image_width, image_height, 2.2f, -0.1f); + EXPECT_V4_NEAR(exp3, res, float_tolerance); +} + +TEST(math_interp, BilinearFloatPartiallyOutsideImage) +{ + float4 res; + float4 exp1 = {1.0f, 2.0f, 3.0f, 4.0f}; + res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, -0.5f, 2.0f); + EXPECT_V4_NEAR(exp1, res, float_tolerance); + float4 exp2 = {86.75f, 113.25f, 147.25f, 221.0f}; + res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 1.25f, 2.9f); + EXPECT_V4_NEAR(exp2, res, float_tolerance); + float4 exp3 = {240.0f, 160.0f, 90.0f, 20.0f}; res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 2.2f, -0.1f); EXPECT_V4_NEAR(exp3, res, float_tolerance); } @@ -151,23 +180,23 @@ TEST(math_interp, BilinearCharFullyOutsideImage) uchar4 res; uchar4 exp = {0, 0, 0, 0}; /* Out of range on U */ - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, -1.5f, 0); + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, -1.5f, 0); EXPECT_EQ(exp, res); - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, -1.1f, 0); + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, -1.1f, 0); EXPECT_EQ(exp, res); - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 3, 0); + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 3, 0); EXPECT_EQ(exp, res); - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 5, 0); + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 5, 0); EXPECT_EQ(exp, res); /* Out of range on V */ - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0, -3.2f); + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0, -3.2f); EXPECT_EQ(exp, res); - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0, -1.5f); + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0, -1.5f); EXPECT_EQ(exp, res); - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0, 3.1f); + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0, 3.1f); EXPECT_EQ(exp, res); - res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0, 500.0f); + res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0, 500.0f); EXPECT_EQ(exp, res); } diff --git a/source/blender/compositor/intern/COM_MemoryBuffer.h b/source/blender/compositor/intern/COM_MemoryBuffer.h index e5902e84821..9e240537daf 100644 --- a/source/blender/compositor/intern/COM_MemoryBuffer.h +++ b/source/blender/compositor/intern/COM_MemoryBuffer.h @@ -249,17 +249,17 @@ class MemoryBuffer { single_y = rel_y - last_y; } - math::interpolate_bilinear_fl(buffer_, out, 1, 1, num_channels_, single_x, single_y); + math::interpolate_bilinear_border_fl(buffer_, out, 1, 1, num_channels_, single_x, single_y); return; } - math::interpolate_bilinear_fl(buffer_, - out, - get_width(), - get_height(), - num_channels_, - get_relative_x(x), - get_relative_y(y)); + math::interpolate_bilinear_border_fl(buffer_, + out, + get_width(), + get_height(), + num_channels_, + get_relative_x(x), + get_relative_y(y)); } void read_elem_sampled(float x, float y, PixelSampler sampler, float *out) const diff --git a/source/blender/compositor/operations/COM_ImageOperation.cc b/source/blender/compositor/operations/COM_ImageOperation.cc index 331cd98b58f..c9415306ebf 100644 --- a/source/blender/compositor/operations/COM_ImageOperation.cc +++ b/source/blender/compositor/operations/COM_ImageOperation.cc @@ -104,7 +104,7 @@ static void sample_image_at_location(ImBuf *ibuf, imbuf::interpolate_nearest_fl(ibuf, color, x, y); break; case PixelSampler::Bilinear: - imbuf::interpolate_bilinear_fl(ibuf, color, x, y); + imbuf::interpolate_bilinear_border_fl(ibuf, color, x, y); break; case PixelSampler::Bicubic: imbuf::interpolate_cubic_bspline_fl(ibuf, color, x, y); @@ -118,7 +118,7 @@ static void sample_image_at_location(ImBuf *ibuf, byte_color = imbuf::interpolate_nearest_byte(ibuf, x, y); break; case PixelSampler::Bilinear: - byte_color = imbuf::interpolate_bilinear_byte(ibuf, x, y); + byte_color = imbuf::interpolate_bilinear_border_byte(ibuf, x, y); break; case PixelSampler::Bicubic: byte_color = imbuf::interpolate_cubic_bspline_byte(ibuf, x, y); diff --git a/source/blender/compositor/operations/COM_MovieClipOperation.cc b/source/blender/compositor/operations/COM_MovieClipOperation.cc index 569450fe4dd..1ea9f78e491 100644 --- a/source/blender/compositor/operations/COM_MovieClipOperation.cc +++ b/source/blender/compositor/operations/COM_MovieClipOperation.cc @@ -85,7 +85,7 @@ void MovieClipBaseOperation::execute_pixel_sampled(float output[4], imbuf::interpolate_nearest_fl(ibuf, output, x, y); break; case PixelSampler::Bilinear: - imbuf::interpolate_bilinear_fl(ibuf, output, x, y); + imbuf::interpolate_bilinear_border_fl(ibuf, output, x, y); break; case PixelSampler::Bicubic: imbuf::interpolate_cubic_bspline_fl(ibuf, output, x, y); diff --git a/source/blender/compositor/operations/COM_MultilayerImageOperation.cc b/source/blender/compositor/operations/COM_MultilayerImageOperation.cc index 9a91c44f227..b7e3fed43ef 100644 --- a/source/blender/compositor/operations/COM_MultilayerImageOperation.cc +++ b/source/blender/compositor/operations/COM_MultilayerImageOperation.cc @@ -91,7 +91,7 @@ void MultilayerColorOperation::execute_pixel_sampled(float output[4], imbuf::interpolate_nearest_fl(buffer_, output, x, y); break; case PixelSampler::Bilinear: - imbuf::interpolate_bilinear_fl(buffer_, output, x, y); + imbuf::interpolate_bilinear_border_fl(buffer_, output, x, y); break; case PixelSampler::Bicubic: imbuf::interpolate_cubic_bspline_fl(buffer_, output, x, y); diff --git a/source/blender/compositor/operations/COM_RenderLayersProg.cc b/source/blender/compositor/operations/COM_RenderLayersProg.cc index 32458499276..02afacc2108 100644 --- a/source/blender/compositor/operations/COM_RenderLayersProg.cc +++ b/source/blender/compositor/operations/COM_RenderLayersProg.cc @@ -77,7 +77,8 @@ void RenderLayersProg::do_interpolation(float output[4], float x, float y, Pixel math::interpolate_nearest_fl(input_buffer_, output, width, height, elementsize_, x, y); break; case PixelSampler::Bilinear: - math::interpolate_bilinear_fl(input_buffer_, output, width, height, elementsize_, x, y); + math::interpolate_bilinear_border_fl( + input_buffer_, output, width, height, elementsize_, x, y); break; case PixelSampler::Bicubic: math::interpolate_cubic_bspline_fl(input_buffer_, output, width, height, elementsize_, x, y); diff --git a/source/blender/imbuf/IMB_interp.hh b/source/blender/imbuf/IMB_interp.hh index f17943e5abc..43c2602707b 100644 --- a/source/blender/imbuf/IMB_interp.hh +++ b/source/blender/imbuf/IMB_interp.hh @@ -18,6 +18,8 @@ namespace blender::imbuf { +/* Nearest sampling. */ + [[nodiscard]] inline uchar4 interpolate_nearest_byte(const ImBuf *in, float u, float v) { return math::interpolate_nearest_byte(in->byte_buffer.data, in->x, in->y, u, v); @@ -35,6 +37,8 @@ inline void interpolate_nearest_fl(const ImBuf *in, float output[4], float u, fl math::interpolate_nearest_fl(in->float_buffer.data, output, in->x, in->y, 4, u, v); } +/* Nearest sampling with UV wrapping. */ + [[nodiscard]] inline uchar4 interpolate_nearest_wrap_byte(const ImBuf *in, float u, float v) { return math::interpolate_nearest_wrap_byte(in->byte_buffer.data, in->x, in->y, u, v); @@ -44,6 +48,8 @@ inline void interpolate_nearest_fl(const ImBuf *in, float output[4], float u, fl return math::interpolate_nearest_wrap_fl(in->float_buffer.data, in->x, in->y, u, v); } +/* Bilinear sampling. */ + [[nodiscard]] inline uchar4 interpolate_bilinear_byte(const ImBuf *in, float u, float v) { return math::interpolate_bilinear_byte(in->byte_buffer.data, in->x, in->y, u, v); @@ -63,6 +69,29 @@ inline void interpolate_bilinear_fl(const ImBuf *in, float output[4], float u, f memcpy(output, &col, sizeof(col)); } +/* Bilinear sampling, samples near edge blend into transparency. */ + +[[nodiscard]] inline uchar4 interpolate_bilinear_border_byte(const ImBuf *in, float u, float v) +{ + return math::interpolate_bilinear_border_byte(in->byte_buffer.data, in->x, in->y, u, v); +} +[[nodiscard]] inline float4 interpolate_bilinear_border_fl(const ImBuf *in, float u, float v) +{ + return math::interpolate_bilinear_border_fl(in->float_buffer.data, in->x, in->y, u, v); +} +inline void interpolate_bilinear_border_byte(const ImBuf *in, uchar output[4], float u, float v) +{ + uchar4 col = math::interpolate_bilinear_border_byte(in->byte_buffer.data, in->x, in->y, u, v); + memcpy(output, &col, sizeof(col)); +} +inline void interpolate_bilinear_border_fl(const ImBuf *in, float output[4], float u, float v) +{ + float4 col = math::interpolate_bilinear_border_fl(in->float_buffer.data, in->x, in->y, u, v); + memcpy(output, &col, sizeof(col)); +} + +/* Bilinear sampling with UV wrapping. */ + [[nodiscard]] inline uchar4 interpolate_bilinear_wrap_byte(const ImBuf *in, float u, float v) { return math::interpolate_bilinear_wrap_byte(in->byte_buffer.data, in->x, in->y, u, v); @@ -72,6 +101,8 @@ inline void interpolate_bilinear_fl(const ImBuf *in, float output[4], float u, f return math::interpolate_bilinear_wrap_fl(in->float_buffer.data, in->x, in->y, u, v); } +/* Cubic B-Spline sampling. */ + [[nodiscard]] inline uchar4 interpolate_cubic_bspline_byte(const ImBuf *in, float u, float v) { return math::interpolate_cubic_bspline_byte(in->byte_buffer.data, in->x, in->y, u, v); @@ -91,6 +122,8 @@ inline void interpolate_cubic_bspline_fl(const ImBuf *in, float output[4], float memcpy(output, &col, sizeof(col)); } +/* Cubic Mitchell sampling. */ + [[nodiscard]] inline uchar4 interpolate_cubic_mitchell_byte(const ImBuf *in, float u, float v) { return math::interpolate_cubic_mitchell_byte(in->byte_buffer.data, in->x, in->y, u, v); diff --git a/source/blender/imbuf/intern/scaling.cc b/source/blender/imbuf/intern/scaling.cc index bf99aaca942..1d5fcea2f42 100644 --- a/source/blender/imbuf/intern/scaling.cc +++ b/source/blender/imbuf/intern/scaling.cc @@ -1761,12 +1761,12 @@ static void *do_scale_thread(void *data_v) int offset = y * data->newx + x; if (data->byte_buffer) { - interpolate_bilinear_byte(ibuf, data->byte_buffer + 4 * offset, u, v); + interpolate_bilinear_border_byte(ibuf, data->byte_buffer + 4 * offset, u, v); } if (data->float_buffer) { float *pixel = data->float_buffer + ibuf->channels * offset; - blender::math::interpolate_bilinear_fl( + blender::math::interpolate_bilinear_border_fl( ibuf->float_buffer.data, pixel, ibuf->x, ibuf->y, ibuf->channels, u, v); } } diff --git a/source/blender/imbuf/intern/transform.cc b/source/blender/imbuf/intern/transform.cc index c5d11b14c38..03e6373b88b 100644 --- a/source/blender/imbuf/intern/transform.cc +++ b/source/blender/imbuf/intern/transform.cc @@ -38,6 +38,9 @@ struct TransformContext { /* Source UV step delta, when moving along one destination pixel in Y axis. */ float2 add_y; + /* Source corners in destination pixel space, counter-clockwise. */ + float2 src_corners[4]; + IndexRange dst_region_x_range; IndexRange dst_region_y_range; @@ -66,14 +69,15 @@ struct TransformContext { rcti rect; BLI_rcti_init_minmax(&rect); float4x4 inverse = math::invert(transform_matrix); - for (const int2 &src_coords : { - int2(src_crop.xmin, src_crop.ymin), - int2(src_crop.xmax, src_crop.ymin), - int2(src_crop.xmin, src_crop.ymax), - int2(src_crop.xmax, src_crop.ymax), - }) - { - float3 dst_co = math::transform_point(inverse, float3(src_coords.x, src_coords.y, 0.0f)); + const int2 src_coords[4] = {int2(src_crop.xmin, src_crop.ymin), + int2(src_crop.xmax, src_crop.ymin), + int2(src_crop.xmax, src_crop.ymax), + int2(src_crop.xmin, src_crop.ymax)}; + for (int i = 0; i < 4; i++) { + int2 src_co = src_coords[i]; + float3 dst_co = math::transform_point(inverse, float3(src_co.x, src_co.y, 0.0f)); + src_corners[i] = float2(dst_co.x, dst_co.y); + BLI_rcti_do_minmax_v(&rect, int2(dst_co) + margin); BLI_rcti_do_minmax_v(&rect, int2(dst_co) - margin); } @@ -251,10 +255,8 @@ static void process_scanlines(const TransformContext &ctx, IndexRange y_range) * * Do a box filter: for each destination pixel, accumulate XxY samples from source, * based on scaling factors (length of X/Y pixel steps). Use at least 2 samples - * along each direction, so that in case of rotation the resulting edges get - * some anti-aliasing, to match previous Subsampled3x3 filter behavior. The - * "at least 2" can be removed once/if transform edge anti-aliasing is implemented - * in general way for all filters. Use at most 100 samples along each direction, + * along each direction, so that in case of rotation the image gets + * some anti-aliasing. Use at most 100 samples along each direction, * just as some way of clamping possible upper cost. Scaling something down by more * than 100x should rarely if ever happen, worst case they will get some aliasing. */ @@ -336,8 +338,9 @@ template static void transform_scanlines_filter(const TransformContext &ctx, IndexRange y_range) { int channels = ctx.src->channels; + if (ctx.dst->float_buffer.data && ctx.src->float_buffer.data) { - /* Float images. */ + /* Float pixels. */ if (channels == 4) { transform_scanlines(ctx, y_range); } @@ -351,14 +354,109 @@ static void transform_scanlines_filter(const TransformContext &ctx, IndexRange y transform_scanlines(ctx, y_range); } } - else if (ctx.dst->byte_buffer.data && ctx.src->byte_buffer.data) { - /* Byte images. */ + + if (ctx.dst->byte_buffer.data && ctx.src->byte_buffer.data) { + /* Byte pixels. */ if (channels == 4) { transform_scanlines(ctx, y_range); } } } +static float calc_coverage(float2 pos, int2 ipos, float2 delta, bool is_steep) +{ + /* Very approximate: just take difference from coordinate (x or y based on + * steepness) to the integer coordinate. Adjust based on directions + * of the edges. */ + float cov; + if (is_steep) { + cov = fabsf(ipos.x - pos.x); + if (delta.y < 0) { + cov = 1.0f - cov; + } + } + else { + cov = fabsf(ipos.y - pos.y); + if (delta.x > 0) { + cov = 1.0f - cov; + } + } + cov = math::clamp(cov, 0.0f, 1.0f); + /* Resulting coverage is 0.5 .. 1.0 range, since we are only covering + * half of the pixels that should be AA'd (the other half is outside the + * quad and does not get rasterized). Square the coverage to get + * more range, and it looks a bit nicer that way. */ + cov *= cov; + return cov; +} + +static void edge_aa(const TransformContext &ctx) +{ + /* Rasterize along outer source edges into the destination image, + * reducing alpha based on pixel distance to the edge at each pixel. + * This is very approximate and not 100% correct "analytical AA", + * but simple to do and better than nothing. */ + for (int line_idx = 0; line_idx < 4; line_idx++) { + float2 ptA = ctx.src_corners[line_idx]; + float2 ptB = ctx.src_corners[(line_idx + 1) & 3]; + float2 delta = ptB - ptA; + float2 abs_delta = math::abs(delta); + float length = math::max(abs_delta.x, abs_delta.y); + if (length < 1) { + continue; + } + bool is_steep = length == abs_delta.y; + + /* It is very common to have non-rotated strips; check if edge line is + * horizontal or vertical and would not alter the coverage and can + * be skipped. */ + constexpr float NO_ROTATION = 1.0e-6f; + constexpr float NO_AA_CONTRIB = 1.0e-2f; + if (is_steep) { + if ((abs_delta.x < NO_ROTATION) && (fabsf(ptA.x - roundf(ptA.x)) < NO_AA_CONTRIB)) { + continue; + } + } + else { + if ((abs_delta.y < NO_ROTATION) && (fabsf(ptA.y - roundf(ptA.y)) < NO_AA_CONTRIB)) { + continue; + } + } + + /* DDA line raster: step one pixel along the longer direction. */ + delta /= length; + if (ctx.dst->float_buffer.data != nullptr) { + /* Float pixels. */ + float *dst = ctx.dst->float_buffer.data; + for (int i = 0; i < length; i++) { + float2 pos = ptA + i * delta; + int2 ipos = int2(pos); + if (ipos.x >= 0 && ipos.x < ctx.dst->x && ipos.y >= 0 && ipos.y < ctx.dst->y) { + float cov = calc_coverage(pos, ipos, delta, is_steep); + size_t idx = (size_t(ipos.y) * ctx.dst->x + ipos.x) * 4; + dst[idx + 0] *= cov; + dst[idx + 1] *= cov; + dst[idx + 2] *= cov; + dst[idx + 3] *= cov; + } + } + } + if (ctx.dst->byte_buffer.data != nullptr) { + /* Byte pixels. */ + uchar *dst = ctx.dst->byte_buffer.data; + for (int i = 0; i < length; i++) { + float2 pos = ptA + i * delta; + int2 ipos = int2(pos); + if (ipos.x >= 0 && ipos.x < ctx.dst->x && ipos.y >= 0 && ipos.y < ctx.dst->y) { + float cov = calc_coverage(pos, ipos, delta, is_steep); + size_t idx = (size_t(ipos.y) * ctx.dst->x + ipos.x) * 4; + dst[idx + 3] *= cov; + } + } + } + } +} + } // namespace blender::imbuf::transform using namespace blender::imbuf::transform; @@ -403,4 +501,8 @@ void IMB_transform(const ImBuf *src, transform_scanlines_filter(ctx, y_range); } }); + + if (crop && (filter != IMB_FILTER_NEAREST)) { + edge_aa(ctx); + } } diff --git a/source/blender/render/intern/texture_margin.cc b/source/blender/render/intern/texture_margin.cc index 51d30ab0726..073166ca9e0 100644 --- a/source/blender/render/intern/texture_margin.cc +++ b/source/blender/render/intern/texture_margin.cc @@ -274,10 +274,12 @@ class TextureMarginMap { if (found_pixel_in_polygon) { if (ibuf_ptr_fl) { - ibuf_ptr_fl[pixel_index] = imbuf::interpolate_bilinear_fl(ibuf, destX, destY); + ibuf_ptr_fl[pixel_index] = imbuf::interpolate_bilinear_border_fl( + ibuf, destX, destY); } if (ibuf_ptr_ch) { - ibuf_ptr_ch[pixel_index] = imbuf::interpolate_bilinear_byte(ibuf, destX, destY); + ibuf_ptr_ch[pixel_index] = imbuf::interpolate_bilinear_border_byte( + ibuf, destX, destY); } /* Add our new pixels to the assigned pixel map. */ mask[pixel_index] = 1; diff --git a/source/blender/sequencer/intern/effects.cc b/source/blender/sequencer/intern/effects.cc index 06b6668d0ec..fb098641228 100644 --- a/source/blender/sequencer/intern/effects.cc +++ b/source/blender/sequencer/intern/effects.cc @@ -1584,10 +1584,10 @@ static void transform_image(int x, break; case 1: if (dst_fl) { - dst_fl[offset] = imbuf::interpolate_bilinear_fl(ibuf, xt, yt); + dst_fl[offset] = imbuf::interpolate_bilinear_border_fl(ibuf, xt, yt); } else { - dst_ch[offset] = imbuf::interpolate_bilinear_byte(ibuf, xt, yt); + dst_ch[offset] = imbuf::interpolate_bilinear_border_byte(ibuf, xt, yt); } break; case 2: diff --git a/tests/python/sequencer_render_tests.py b/tests/python/sequencer_render_tests.py index 4be39bac722..35583d2c358 100644 --- a/tests/python/sequencer_render_tests.py +++ b/tests/python/sequencer_render_tests.py @@ -50,6 +50,9 @@ def main(): from modules import render_report report = render_report.Report("Sequencer", output_dir, oiiotool) report.set_pixelated(True) + # default error tolerances are quite large, lower them + report.set_fail_threshold(1.0 / 255.0) + report.set_fail_percent(0.01) report.set_reference_dir("reference") test_dir_name = Path(test_dir).name