diff --git a/source/blender/blenlib/BLI_math_interp.h b/source/blender/blenlib/BLI_math_interp.h index d63f2debe90..5cb999ccc40 100644 --- a/source/blender/blenlib/BLI_math_interp.h +++ b/source/blender/blenlib/BLI_math_interp.h @@ -15,24 +15,14 @@ extern "C" { void BLI_bicubic_interpolation_fl( const float *buffer, float *output, int width, int height, int components, float u, float v); -void BLI_bicubic_interpolation_char(const unsigned char *buffer, - unsigned char *output, - int width, - int height, - int components, - float u, - float v); +void BLI_bicubic_interpolation_char( + const unsigned char *buffer, unsigned char *output, int width, int height, float u, float v); void BLI_bilinear_interpolation_fl( const float *buffer, float *output, int width, int height, int components, float u, float v); -void BLI_bilinear_interpolation_char(const unsigned char *buffer, - unsigned char *output, - int width, - int height, - int components, - float u, - float v); +void BLI_bilinear_interpolation_char( + const unsigned char *buffer, unsigned char *output, int width, int height, float u, float v); void BLI_bilinear_interpolation_wrap_fl(const float *buffer, float *output, @@ -44,16 +34,6 @@ void BLI_bilinear_interpolation_wrap_fl(const float *buffer, bool wrap_x, bool wrap_y); -void BLI_bilinear_interpolation_wrap_char(const unsigned char *buffer, - unsigned char *output, - int width, - int height, - int components, - float u, - float v, - bool wrap_x, - bool wrap_y); - #define EWA_MAXIDX 255 extern const float EWA_WTS[EWA_MAXIDX + 1]; diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt index 5c1eed4fec4..fafc5aa574b 100644 --- a/source/blender/blenlib/CMakeLists.txt +++ b/source/blender/blenlib/CMakeLists.txt @@ -524,6 +524,7 @@ if(WITH_GTESTS) tests/BLI_math_bits_test.cc tests/BLI_math_color_test.cc tests/BLI_math_geom_test.cc + tests/BLI_math_interp_test.cc tests/BLI_math_matrix_test.cc tests/BLI_math_matrix_types_test.cc tests/BLI_math_rotation_test.cc diff --git a/source/blender/blenlib/intern/math_interp.c b/source/blender/blenlib/intern/math_interp.c index c7044acb39a..cdf5caec321 100644 --- a/source/blender/blenlib/intern/math_interp.c +++ b/source/blender/blenlib/intern/math_interp.c @@ -7,12 +7,18 @@ */ #include +#include #include "BLI_math_base.h" #include "BLI_math_interp.h" #include "BLI_math_vector.h" +#include "BLI_simd.h" #include "BLI_strict_flags.h" +#if BLI_HAVE_SSE2 && defined(__SSE4_1__) +# include /* _mm_floor_ps */ +#endif + /************************************************************************** * INTERPOLATIONS * @@ -236,221 +242,298 @@ void BLI_bicubic_interpolation_fl( } void BLI_bicubic_interpolation_char( - const uchar *buffer, uchar *output, int width, int height, int components, float u, float v) + const uchar *buffer, uchar *output, int width, int height, float u, float v) { - bicubic_interpolation(buffer, NULL, output, NULL, width, height, components, u, v); + bicubic_interpolation(buffer, NULL, output, NULL, width, height, 4, u, v); } /* BILINEAR INTERPOLATION */ -BLI_INLINE void bilinear_interpolation(const uchar *byte_buffer, - const float *float_buffer, - uchar *byte_output, - float *float_output, - int width, - int height, - int components, - float u, - float v, - bool wrap_x, - bool wrap_y) +BLI_INLINE void bilinear_interpolation_fl(const float *float_buffer, + float *float_output, + int width, + int height, + int components, + float u, + float v, + bool wrap_x, + bool wrap_y) { float a, b; float a_b, ma_b, a_mb, ma_mb; int y1, y2, x1, x2; - /* ImBuf in must have a valid rect or rect_float, assume this is already checked */ + float uf = floorf(u); + float vf = floorf(v); - x1 = (int)floor(u); - x2 = (int)ceil(u); - y1 = (int)floor(v); - y2 = (int)ceil(v); + x1 = (int)uf; + x2 = x1 + 1; + y1 = (int)vf; + y2 = y1 + 1; - if (float_output) { - const float *row1, *row2, *row3, *row4; - const float empty[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + const float *row1, *row2, *row3, *row4; + const float empty[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - /* pixel value must be already wrapped, however values at boundaries may flip */ - if (wrap_x) { - if (x1 < 0) { - x1 = width - 1; - } - if (x2 >= width) { - x2 = 0; - } + /* pixel value must be already wrapped, however values at boundaries may flip */ + if (wrap_x) { + if (x1 < 0) { + x1 = width - 1; } - else if (x2 < 0 || x1 >= width) { - copy_vn_fl(float_output, components, 0.0f); - return; + if (x2 >= width) { + x2 = 0; } + } + else if (x2 < 0 || x1 >= width) { + copy_vn_fl(float_output, components, 0.0f); + return; + } - if (wrap_y) { - if (y1 < 0) { - y1 = height - 1; - } - if (y2 >= height) { - y2 = 0; - } + if (wrap_y) { + if (y1 < 0) { + y1 = height - 1; } - else if (y2 < 0 || y1 >= height) { - copy_vn_fl(float_output, components, 0.0f); - return; + if (y2 >= height) { + y2 = 0; } + } + else if (y2 < 0 || y1 >= height) { + copy_vn_fl(float_output, components, 0.0f); + return; + } - /* sample including outside of edges of image */ - if (x1 < 0 || y1 < 0) { - row1 = empty; - } - else { - row1 = float_buffer + width * y1 * components + components * x1; - } - - if (x1 < 0 || y2 > height - 1) { - row2 = empty; - } - else { - row2 = float_buffer + width * y2 * components + components * x1; - } - - if (x2 > width - 1 || y1 < 0) { - row3 = empty; - } - else { - row3 = float_buffer + width * y1 * components + components * x2; - } - - if (x2 > width - 1 || y2 > height - 1) { - row4 = empty; - } - else { - row4 = float_buffer + width * y2 * components + components * x2; - } - - a = u - floorf(u); - b = v - floorf(v); - a_b = a * b; - ma_b = (1.0f - a) * b; - a_mb = a * (1.0f - b); - ma_mb = (1.0f - a) * (1.0f - b); - - if (components == 1) { - float_output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0]; - } - else if (components == 3) { - float_output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0]; - float_output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1]; - float_output[2] = ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2]; - } - else { - float_output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0]; - float_output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1]; - float_output[2] = ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2]; - float_output[3] = ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3]; - } + /* sample including outside of edges of image */ + if (x1 < 0 || y1 < 0) { + row1 = empty; } else { - const uchar *row1, *row2, *row3, *row4; - uchar empty[4] = {0, 0, 0, 0}; - - /* pixel value must be already wrapped, however values at boundaries may flip */ - if (wrap_x) { - if (x1 < 0) { - x1 = width - 1; - } - if (x2 >= width) { - x2 = 0; - } - } - else if (x2 < 0 || x1 >= width) { - copy_vn_uchar(byte_output, components, 0); - return; - } - - if (wrap_y) { - if (y1 < 0) { - y1 = height - 1; - } - if (y2 >= height) { - y2 = 0; - } - } - else if (y2 < 0 || y1 >= height) { - copy_vn_uchar(byte_output, components, 0); - return; - } - - /* sample including outside of edges of image */ - if (x1 < 0 || y1 < 0) { - row1 = empty; - } - else { - row1 = byte_buffer + width * y1 * components + components * x1; - } - - if (x1 < 0 || y2 > height - 1) { - row2 = empty; - } - else { - row2 = byte_buffer + width * y2 * components + components * x1; - } - - if (x2 > width - 1 || y1 < 0) { - row3 = empty; - } - else { - row3 = byte_buffer + width * y1 * components + components * x2; - } - - if (x2 > width - 1 || y2 > height - 1) { - row4 = empty; - } - else { - row4 = byte_buffer + width * y2 * components + components * x2; - } - - a = u - floorf(u); - b = v - floorf(v); - a_b = a * b; - ma_b = (1.0f - a) * b; - a_mb = a * (1.0f - b); - ma_mb = (1.0f - a) * (1.0f - b); - - if (components == 1) { - byte_output[0] = (uchar)(ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0] + - 0.5f); - } - else if (components == 3) { - byte_output[0] = (uchar)(ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0] + - 0.5f); - byte_output[1] = (uchar)(ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1] + - 0.5f); - byte_output[2] = (uchar)(ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2] + - 0.5f); - } - else { - byte_output[0] = (uchar)(ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0] + - 0.5f); - byte_output[1] = (uchar)(ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1] + - 0.5f); - byte_output[2] = (uchar)(ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2] + - 0.5f); - byte_output[3] = (uchar)(ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3] + - 0.5f); - } + row1 = float_buffer + width * y1 * components + components * x1; } + + if (x1 < 0 || y2 > height - 1) { + row2 = empty; + } + else { + row2 = float_buffer + width * y2 * components + components * x1; + } + + if (x2 > width - 1 || y1 < 0) { + row3 = empty; + } + else { + row3 = float_buffer + width * y1 * components + components * x2; + } + + if (x2 > width - 1 || y2 > height - 1) { + row4 = empty; + } + else { + row4 = float_buffer + width * y2 * components + components * x2; + } + + a = u - uf; + b = v - vf; + a_b = a * b; + ma_b = (1.0f - a) * b; + a_mb = a * (1.0f - b); + ma_mb = (1.0f - a) * (1.0f - b); + + if (components == 1) { + float_output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0]; + } + else if (components == 3) { + float_output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0]; + float_output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1]; + float_output[2] = ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2]; + } + else { +#if BLI_HAVE_SSE2 + __m128 rgba1 = _mm_loadu_ps(row1); + __m128 rgba2 = _mm_loadu_ps(row2); + __m128 rgba3 = _mm_loadu_ps(row3); + __m128 rgba4 = _mm_loadu_ps(row4); + rgba1 = _mm_mul_ps(_mm_set1_ps(ma_mb), rgba1); + rgba2 = _mm_mul_ps(_mm_set1_ps(ma_b), rgba2); + rgba3 = _mm_mul_ps(_mm_set1_ps(a_mb), rgba3); + rgba4 = _mm_mul_ps(_mm_set1_ps(a_b), rgba4); + __m128 rgba13 = _mm_add_ps(rgba1, rgba3); + __m128 rgba24 = _mm_add_ps(rgba2, rgba4); + __m128 rgba = _mm_add_ps(rgba13, rgba24); + _mm_storeu_ps(float_output, rgba); +#else + float_output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0]; + float_output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1]; + float_output[2] = ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2]; + float_output[3] = ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3]; +#endif + } +} + +void BLI_bilinear_interpolation_char( + const uchar *buffer, uchar *output, int width, int height, float u, float v) +{ +#if BLI_HAVE_SSE2 + /* Bilinear interpolation needs to read and blend four image pixels, while + * also handling conditions of sample coordinate being outside of the + * image, in which case black (all zeroes) should be used as the sample + * contribution. + * + * Code below does all that without any branches, by making outside the + * image sample locations still read the first pixel of the image, but + * later making sure that the result is set to zero for that sample. */ + + __m128 uvuv = _mm_set_ps(v, u, v, u); + +# if defined(__SSE4_1__) || defined(__ARM_NEON) && defined(WITH_SSE2NEON) + /* If we're on SSE4 or ARM NEON, just use the simple floor() way. */ + __m128 uvuv_floor = _mm_floor_ps(uvuv); +# else + /* The hard way: truncate, for negative inputs this will round towards zero. + * Then compare with input UV, and subtract 1 for the inputs that were + * negative. */ + __m128 uv_trunc = _mm_cvtepi32_ps(_mm_cvttps_epi32(uvuv)); + __m128 uv_neg = _mm_cmplt_ps(uvuv, uv_trunc); + __m128 uvuv_floor = _mm_sub_ps(uv_trunc, _mm_and_ps(uv_neg, _mm_set1_ps(1.0f))); +# endif + + /* x1, y1, x2, y2 */ + __m128i xy12 = _mm_add_epi32(_mm_cvttps_epi32(uvuv_floor), _mm_set_epi32(1, 1, 0, 0)); + /* Check whether any of the coordinates are outside of the image. */ + __m128i size_minus_1 = _mm_sub_epi32(_mm_set_epi32(height, width, height, width), + _mm_set1_epi32(1)); + __m128i too_lo_xy12 = _mm_cmplt_epi32(xy12, _mm_setzero_si128()); + __m128i too_hi_xy12 = _mm_cmplt_epi32(size_minus_1, xy12); + __m128i invalid_xy12 = _mm_or_si128(too_lo_xy12, too_hi_xy12); + + /* Samples 1,2,3,4 are in this order: x1y1, x1y2, x2y1, x2y2 */ + __m128i x1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(2, 2, 0, 0)); + __m128i y1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(3, 1, 3, 1)); + __m128i invalid_1234 = _mm_or_si128(_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(2, 2, 0, 0)), + _mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(3, 1, 3, 1))); + /* Set x & y to zero for invalid samples. */ + x1234 = _mm_andnot_si128(invalid_1234, x1234); + y1234 = _mm_andnot_si128(invalid_1234, y1234); + + /* Read the four sample values. Do address calculations in C, since SSE + * before 4.1 makes it very cumbersome to do full integer multiplies. */ + int xcoord[4]; + int ycoord[4]; + _mm_storeu_ps((float *)xcoord, _mm_castsi128_ps(x1234)); + _mm_storeu_ps((float *)ycoord, _mm_castsi128_ps(y1234)); + int sample1 = ((const int *)buffer)[ycoord[0] * (int64_t)width + xcoord[0]]; + int sample2 = ((const int *)buffer)[ycoord[1] * (int64_t)width + xcoord[1]]; + int sample3 = ((const int *)buffer)[ycoord[2] * (int64_t)width + xcoord[2]]; + int sample4 = ((const int *)buffer)[ycoord[3] * (int64_t)width + xcoord[3]]; + __m128i samples1234 = _mm_set_epi32(sample4, sample3, sample2, sample1); + /* Set samples to black for the ones that were actually invalid. */ + samples1234 = _mm_andnot_si128(invalid_1234, samples1234); + + /* Expand samples from packed 8-bit RGBA to full floats: + * spread to 16 bit values. */ + __m128i rgba16_12 = _mm_unpacklo_epi8(samples1234, _mm_setzero_si128()); + __m128i rgba16_34 = _mm_unpackhi_epi8(samples1234, _mm_setzero_si128()); + /* Spread to 32 bit values and convert to float. */ + __m128 rgba1 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(rgba16_12, _mm_setzero_si128())); + __m128 rgba2 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(rgba16_12, _mm_setzero_si128())); + __m128 rgba3 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(rgba16_34, _mm_setzero_si128())); + __m128 rgba4 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(rgba16_34, _mm_setzero_si128())); + + /* Calculate interpolation factors: (1-a)*(1-b), (1-a)*b, a*(1-b), a*b */ + __m128 abab = _mm_sub_ps(uvuv, uvuv_floor); + __m128 m_abab = _mm_sub_ps(_mm_set1_ps(1.0f), abab); + __m128 ab_mab = _mm_shuffle_ps(abab, m_abab, _MM_SHUFFLE(3, 2, 1, 0)); + __m128 factors = _mm_mul_ps(_mm_shuffle_ps(ab_mab, ab_mab, _MM_SHUFFLE(0, 0, 2, 2)), + _mm_shuffle_ps(ab_mab, ab_mab, _MM_SHUFFLE(1, 3, 1, 3))); + + /* Blend the samples. */ + rgba1 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(0, 0, 0, 0)), rgba1); + rgba2 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(1, 1, 1, 1)), rgba2); + rgba3 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(2, 2, 2, 2)), rgba3); + rgba4 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(3, 3, 3, 3)), rgba4); + __m128 rgba13 = _mm_add_ps(rgba1, rgba3); + __m128 rgba24 = _mm_add_ps(rgba2, rgba4); + __m128 rgba = _mm_add_ps(rgba13, rgba24); + rgba = _mm_add_ps(rgba, _mm_set1_ps(0.5f)); + /* Pack and write to destination: pack to 16 bit signed, then to 8 bit + * unsigned, then write resulting 32-bit value. */ + __m128i rgba32 = _mm_cvttps_epi32(rgba); + __m128i rgba16 = _mm_packs_epi32(rgba32, _mm_setzero_si128()); + __m128i rgba8 = _mm_packus_epi16(rgba16, _mm_setzero_si128()); + _mm_store_ss((float *)output, _mm_castsi128_ps(rgba8)); + +#else + + float a, b; + float a_b, ma_b, a_mb, ma_mb; + int y1, y2, x1, x2; + + float uf = floorf(u); + float vf = floorf(v); + + x1 = (int)uf; + x2 = x1 + 1; + y1 = (int)vf; + y2 = y1 + 1; + + const uchar *row1, *row2, *row3, *row4; + uchar empty[4] = {0, 0, 0, 0}; + + /* completely outside of the image? */ + if (x2 < 0 || x1 >= width) { + copy_vn_uchar(output, 4, 0); + return; + } + + if (y2 < 0 || y1 >= height) { + copy_vn_uchar(output, 4, 0); + return; + } + + /* sample including outside of edges of image */ + if (x1 < 0 || y1 < 0) { + row1 = empty; + } + else { + row1 = buffer + width * y1 * 4 + 4 * x1; + } + + if (x1 < 0 || y2 > height - 1) { + row2 = empty; + } + else { + row2 = buffer + width * y2 * 4 + 4 * x1; + } + + if (x2 > width - 1 || y1 < 0) { + row3 = empty; + } + else { + row3 = buffer + width * y1 * 4 + 4 * x2; + } + + if (x2 > width - 1 || y2 > height - 1) { + row4 = empty; + } + else { + row4 = buffer + width * y2 * 4 + 4 * x2; + } + + a = u - uf; + b = v - vf; + a_b = a * b; + ma_b = (1.0f - a) * b; + a_mb = a * (1.0f - b); + ma_mb = (1.0f - a) * (1.0f - b); + + output[0] = (uchar)(ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0] + 0.5f); + output[1] = (uchar)(ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1] + 0.5f); + output[2] = (uchar)(ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2] + 0.5f); + output[3] = (uchar)(ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3] + 0.5f); +#endif } void BLI_bilinear_interpolation_fl( const float *buffer, float *output, int width, int height, int components, float u, float v) { - bilinear_interpolation( - NULL, buffer, NULL, output, width, height, components, u, v, false, false); -} - -void BLI_bilinear_interpolation_char( - const uchar *buffer, uchar *output, int width, int height, int components, float u, float v) -{ - bilinear_interpolation( - buffer, NULL, output, NULL, width, height, components, u, v, false, false); + bilinear_interpolation_fl(buffer, output, width, height, components, u, v, false, false); } void BLI_bilinear_interpolation_wrap_fl(const float *buffer, @@ -463,22 +546,7 @@ void BLI_bilinear_interpolation_wrap_fl(const float *buffer, bool wrap_x, bool wrap_y) { - bilinear_interpolation( - NULL, buffer, NULL, output, width, height, components, u, v, wrap_x, wrap_y); -} - -void BLI_bilinear_interpolation_wrap_char(const uchar *buffer, - uchar *output, - int width, - int height, - int components, - float u, - float v, - bool wrap_x, - bool wrap_y) -{ - bilinear_interpolation( - buffer, NULL, output, NULL, width, height, components, u, v, wrap_x, wrap_y); + bilinear_interpolation_fl(buffer, output, width, height, components, u, v, wrap_x, wrap_y); } /************************************************************************** diff --git a/source/blender/blenlib/tests/BLI_math_interp_test.cc b/source/blender/blenlib/tests/BLI_math_interp_test.cc new file mode 100644 index 00000000000..4e553280962 --- /dev/null +++ b/source/blender/blenlib/tests/BLI_math_interp_test.cc @@ -0,0 +1,98 @@ +/* SPDX-FileCopyrightText: 2023 Blender Authors + * + * SPDX-License-Identifier: Apache-2.0 */ + +#include "testing/testing.h" + +#include "BLI_math_interp.h" + +static constexpr int image_width = 3; +static constexpr int image_height = 3; +static constexpr unsigned char image_char[image_height][image_width][4] = { + {{255, 254, 217, 216}, {230, 230, 230, 230}, {240, 160, 90, 20}}, + {{0, 1, 2, 3}, {62, 72, 82, 92}, {126, 127, 128, 129}}, + {{1, 2, 3, 4}, {73, 108, 153, 251}, {128, 129, 130, 131}}, +}; + +TEST(math_interp, BilinearCharExactSamples) +{ + unsigned char res[4]; + unsigned char exp1[4] = {73, 108, 153, 251}; + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 1.0f, 2.0f); + EXPECT_EQ_ARRAY(exp1, res, 4); + unsigned char exp2[4] = {240, 160, 90, 20}; + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 2.0f, 0.0f); + EXPECT_EQ_ARRAY(exp2, res, 4); +} + +TEST(math_interp, BilinearCharHalfwayUSamples) +{ + unsigned char res[4]; + unsigned char exp1[4] = {31, 37, 42, 48}; + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 0.5f, 1.0f); + EXPECT_EQ_ARRAY(exp1, res, 4); + unsigned char exp2[4] = {243, 242, 224, 223}; + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 0.5f, 0.0f); + EXPECT_EQ_ARRAY(exp2, res, 4); +} + +TEST(math_interp, BilinearCharHalfwayVSamples) +{ + unsigned char res[4]; + unsigned char exp1[4] = {1, 2, 3, 4}; + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 0.0f, 1.5f); + EXPECT_EQ_ARRAY(exp1, res, 4); + unsigned char exp2[4] = {127, 128, 129, 130}; + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 2.0f, 1.5f); + EXPECT_EQ_ARRAY(exp2, res, 4); +} + +TEST(math_interp, BilinearCharSamples) +{ + unsigned char res[4]; + unsigned char exp1[4] = {136, 133, 132, 130}; + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 1.25f, 0.625f); + EXPECT_EQ_ARRAY(exp1, res, 4); + unsigned char exp2[4] = {219, 191, 167, 142}; + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 1.4f, 0.1f); + EXPECT_EQ_ARRAY(exp2, res, 4); +} + +TEST(math_interp, BilinearCharPartiallyOutsideImage) +{ + unsigned char res[4]; + unsigned char exp1[4] = {1, 1, 2, 2}; + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, -0.5f, 2.0f); + EXPECT_EQ_ARRAY(exp1, res, 4); + unsigned char exp2[4] = {9, 11, 15, 22}; + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 1.25f, 2.9f); + EXPECT_EQ_ARRAY(exp2, res, 4); + unsigned char exp3[4] = {173, 115, 65, 14}; + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 2.2f, -0.1f); + EXPECT_EQ_ARRAY(exp3, res, 4); +} + +TEST(math_interp, BilinearCharFullyOutsideImage) +{ + unsigned char res[4]; + unsigned char exp[4] = {0, 0, 0, 0}; + /* Out of range on U */ + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, -1.5f, 0); + EXPECT_EQ_ARRAY(exp, res, 4); + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, -1.1f, 0); + EXPECT_EQ_ARRAY(exp, res, 4); + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 3, 0); + EXPECT_EQ_ARRAY(exp, res, 4); + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 5, 0); + EXPECT_EQ_ARRAY(exp, res, 4); + + /* Out of range on V */ + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 0, -3.2f); + EXPECT_EQ_ARRAY(exp, res, 4); + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 0, -1.5f); + EXPECT_EQ_ARRAY(exp, res, 4); + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 0, 3.1f); + EXPECT_EQ_ARRAY(exp, res, 4); + BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 0, 500.0f); + EXPECT_EQ_ARRAY(exp, res, 4); +} diff --git a/source/blender/imbuf/IMB_imbuf.h b/source/blender/imbuf/IMB_imbuf.h index fdb9098b339..3796722c805 100644 --- a/source/blender/imbuf/IMB_imbuf.h +++ b/source/blender/imbuf/IMB_imbuf.h @@ -684,10 +684,11 @@ void nearest_interpolation_color_wrap( const struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v); void bilinear_interpolation_color( const struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v); -void bilinear_interpolation_color_char( - const struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v); -void bilinear_interpolation_color_fl( - const struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v); +void bilinear_interpolation_color_char(const struct ImBuf *in, + unsigned char outI[4], + float u, + float v); +void bilinear_interpolation_color_fl(const struct ImBuf *in, float outF[4], float u, float v); /** * Note about wrapping, the u/v still needs to be within the image bounds, * just the interpolation is wrapped. diff --git a/source/blender/imbuf/intern/imageprocess.cc b/source/blender/imbuf/intern/imageprocess.cc index 544bfac5ef1..32c95e44d09 100644 --- a/source/blender/imbuf/intern/imageprocess.cc +++ b/source/blender/imbuf/intern/imageprocess.cc @@ -83,7 +83,7 @@ void bicubic_interpolation_color(const ImBuf *in, uchar outI[4], float outF[4], BLI_bicubic_interpolation_fl(in->float_buffer.data, outF, in->x, in->y, 4, u, v); } else { - BLI_bicubic_interpolation_char(in->byte_buffer.data, outI, in->x, in->y, 4, u, v); + BLI_bicubic_interpolation_char(in->byte_buffer.data, outI, in->x, in->y, u, v); } } @@ -108,20 +108,18 @@ void bicubic_interpolation(const ImBuf *in, ImBuf *out, float u, float v, int xo /** \name Bi-Linear Interpolation * \{ */ -void bilinear_interpolation_color_fl( - const ImBuf *in, uchar /*outI*/[4], float outF[4], float u, float v) +void bilinear_interpolation_color_fl(const ImBuf *in, float outF[4], float u, float v) { BLI_assert(outF); BLI_assert(in->float_buffer.data); BLI_bilinear_interpolation_fl(in->float_buffer.data, outF, in->x, in->y, 4, u, v); } -void bilinear_interpolation_color_char( - const ImBuf *in, uchar outI[4], float /*outF*/[4], float u, float v) +void bilinear_interpolation_color_char(const ImBuf *in, uchar outI[4], float u, float v) { BLI_assert(outI); BLI_assert(in->byte_buffer.data); - BLI_bilinear_interpolation_char(in->byte_buffer.data, outI, in->x, in->y, 4, u, v); + BLI_bilinear_interpolation_char(in->byte_buffer.data, outI, in->x, in->y, u, v); } void bilinear_interpolation_color(const ImBuf *in, uchar outI[4], float outF[4], float u, float v) @@ -130,7 +128,7 @@ void bilinear_interpolation_color(const ImBuf *in, uchar outI[4], float outF[4], BLI_bilinear_interpolation_fl(in->float_buffer.data, outF, in->x, in->y, 4, u, v); } else { - BLI_bilinear_interpolation_char(in->byte_buffer.data, outI, in->x, in->y, 4, u, v); + BLI_bilinear_interpolation_char(in->byte_buffer.data, outI, in->x, in->y, u, v); } } diff --git a/source/blender/imbuf/intern/scaling.cc b/source/blender/imbuf/intern/scaling.cc index 59311246a3c..f809116f8c1 100644 --- a/source/blender/imbuf/intern/scaling.cc +++ b/source/blender/imbuf/intern/scaling.cc @@ -1762,7 +1762,7 @@ static void *do_scale_thread(void *data_v) if (data->byte_buffer) { uchar *pixel = data->byte_buffer + 4 * offset; - BLI_bilinear_interpolation_char(ibuf->byte_buffer.data, pixel, ibuf->x, ibuf->y, 4, u, v); + BLI_bilinear_interpolation_char(ibuf->byte_buffer.data, pixel, ibuf->x, ibuf->y, u, v); } if (data->float_buffer) { diff --git a/source/blender/imbuf/intern/transform.cc b/source/blender/imbuf/intern/transform.cc index 9db14362c86..833d8ef52f7 100644 --- a/source/blender/imbuf/intern/transform.cc +++ b/source/blender/imbuf/intern/transform.cc @@ -145,34 +145,16 @@ struct TransformUserData { } }; -/** - * \brief Base class for source discarding. - * - * The class decides if a specific uv coordinate from the source buffer should be ignored. - * This is used to mix multiple images over a single output buffer. Discarded pixels will - * not change the output buffer. - */ -class BaseDiscard { - public: - virtual ~BaseDiscard() = default; - - /** - * \brief Should the source pixel at the given uv coordinate be discarded. - */ - virtual bool should_discard(const TransformUserData &user_data, const double2 &uv) = 0; -}; - /** * \brief Crop uv-coordinates that are outside the user data src_crop rect. */ -class CropSource : public BaseDiscard { - public: +struct CropSource { /** * \brief Should the source pixel at the given uv coordinate be discarded. * * Uses user_data.src_crop to determine if the uv coordinate should be skipped. */ - bool should_discard(const TransformUserData &user_data, const double2 &uv) override + static bool should_discard(const TransformUserData &user_data, const double2 &uv) { return uv.x < user_data.src_crop.xmin || uv.x >= user_data.src_crop.xmax || uv.y < user_data.src_crop.ymin || uv.y >= user_data.src_crop.ymax; @@ -182,14 +164,13 @@ class CropSource : public BaseDiscard { /** * \brief Discard that does not discard anything. */ -class NoDiscard : public BaseDiscard { - public: +struct NoDiscard { /** * \brief Should the source pixel at the given uv coordinate be discarded. * * Will never discard any pixels. */ - bool should_discard(const TransformUserData & /*user_data*/, const double2 & /*uv*/) override + static bool should_discard(const TransformUserData & /*user_data*/, const double2 & /*uv*/) { return false; } @@ -250,73 +231,19 @@ class PixelPointer { }; /** - * \brief Wrapping mode for the uv coordinates. - * - * Subclasses have the ability to change the UV coordinates when sampling the source buffer. + * \brief Repeats UV coordinate. */ -class BaseUVWrapping { - public: - /** - * \brief modify the given u coordinate. - */ - virtual double modify_u(const ImBuf *source_buffer, double u) = 0; - - /** - * \brief modify the given v coordinate. - */ - virtual double modify_v(const ImBuf *source_buffer, double v) = 0; - - /** - * \brief modify the given uv coordinate. - */ - double2 modify_uv(const ImBuf *source_buffer, const double2 &uv) - { - return double2(modify_u(source_buffer, uv.x), modify_v(source_buffer, uv.y)); - } -}; - -/** - * \brief UVWrapping method that does not modify the UV coordinates. - */ -class PassThroughUV : public BaseUVWrapping { - public: - double modify_u(const ImBuf * /*source_buffer*/, double u) override - { - return u; - } - - double modify_v(const ImBuf * /*source_buffer*/, double v) override - { - return v; - } -}; - -/** - * \brief UVWrapping method that wrap repeats the UV coordinates. - */ -class WrapRepeatUV : public BaseUVWrapping { - public: - double modify_u(const ImBuf *source_buffer, double u) override - - { - int x = int(floor(u)); - x = x % source_buffer->x; +static float wrap_uv(float value, int size) +{ + int x = int(floorf(value)); + if (UNLIKELY(x < 0 || x >= size)) { + x %= size; if (x < 0) { - x += source_buffer->x; + x += size; } - return x; } - - double modify_v(const ImBuf *source_buffer, double v) override - { - int y = int(floor(v)); - y = y % source_buffer->y; - if (y < 0) { - y += source_buffer->y; - } - return y; - } -}; + return x; +} /* TODO: should we use math_vectors for this. */ template @@ -369,14 +296,10 @@ template< */ int NumChannels, /** - * \brief Wrapping method to perform - * - * Should be a subclass of BaseUVWrapper + * \brief Should UVs wrap */ - typename UVWrapping> + bool UVWrapping> class Sampler { - UVWrapping uv_wrapper; - public: using ChannelType = StorageType; static const int ChannelLen = NumChannels; @@ -384,26 +307,29 @@ class Sampler { void sample(const ImBuf *source, const double2 &uv, SampleType &r_sample) { + float u = float(uv.x); + float v = float(uv.y); + if constexpr (UVWrapping) { + u = wrap_uv(u, source->x); + v = wrap_uv(v, source->y); + } if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v && NumChannels == 4) { - const double2 wrapped_uv = uv_wrapper.modify_uv(source, uv); - bilinear_interpolation_color_fl(source, nullptr, r_sample.data(), UNPACK2(wrapped_uv)); + bilinear_interpolation_color_fl(source, r_sample.data(), u, v); } else if constexpr (Filter == IMB_FILTER_NEAREST && std::is_same_v && NumChannels == 4) { - const double2 wrapped_uv = uv_wrapper.modify_uv(source, uv); - nearest_interpolation_color_char(source, r_sample.data(), nullptr, UNPACK2(wrapped_uv)); + nearest_interpolation_color_char(source, r_sample.data(), nullptr, u, v); } else if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v && NumChannels == 4) { - const double2 wrapped_uv = uv_wrapper.modify_uv(source, uv); - bilinear_interpolation_color_char(source, r_sample.data(), nullptr, UNPACK2(wrapped_uv)); + bilinear_interpolation_color_char(source, r_sample.data(), u, v); } else if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v) { - if constexpr (std::is_same_v) { + if constexpr (UVWrapping) { BLI_bilinear_interpolation_wrap_fl(source->float_buffer.data, r_sample.data(), source->x, @@ -414,18 +340,12 @@ class Sampler { true); } else { - const double2 wrapped_uv = uv_wrapper.modify_uv(source, uv); - BLI_bilinear_interpolation_fl(source->float_buffer.data, - r_sample.data(), - source->x, - source->y, - NumChannels, - UNPACK2(wrapped_uv)); + BLI_bilinear_interpolation_fl( + source->float_buffer.data, r_sample.data(), source->x, source->y, NumChannels, u, v); } } else if constexpr (Filter == IMB_FILTER_NEAREST && std::is_same_v) { - const double2 wrapped_uv = uv_wrapper.modify_uv(source, uv); - sample_nearest_float(source, wrapped_uv, r_sample); + sample_nearest_float(source, u, v, r_sample); } else { /* Unsupported sampler. */ @@ -434,13 +354,16 @@ class Sampler { } private: - void sample_nearest_float(const ImBuf *source, const double2 &uv, SampleType &r_sample) + void sample_nearest_float(const ImBuf *source, + const float u, + const float v, + SampleType &r_sample) { BLI_STATIC_ASSERT(std::is_same_v); /* ImBuf in must have a valid rect or rect_float, assume this is already checked */ - int x1 = int(uv.x); - int y1 = int(uv.y); + int x1 = int(u); + int y1 = int(v); /* Break when sample outside image is requested. */ if (x1 < 0 || x1 >= source->x || y1 < 0 || y1 >= source->y) { @@ -537,9 +460,7 @@ class ChannelConverter { */ template< /** - * \brief Discard function to use. - * - * \attention Should be a subclass of BaseDiscard. + * \brief Discard functor that implements `should_discard`. */ typename Discard, @@ -659,17 +580,17 @@ ScanlineThreadFunc get_scanline_function(const eIMBTransformMode mode) case IMB_TRANSFORM_MODE_REGULAR: return transform_scanline_function< ScanlineProcessor, + Sampler, PixelPointer>>; case IMB_TRANSFORM_MODE_CROP_SRC: return transform_scanline_function< ScanlineProcessor, + Sampler, PixelPointer>>; case IMB_TRANSFORM_MODE_WRAP_REPEAT: return transform_scanline_function< ScanlineProcessor, + Sampler, PixelPointer>>; }