2024-02-02 16:29:01 +01:00 · 2024-02-02 09:52:59 +01:00 · 2024-02-02 10:30:37 +01:00 · 2024-02-02 09:58:16 +01:00 · 2024-02-02 10:00:18 +01:00 · 2024-02-02 10:14:59 +01:00
16 changed files with 393 additions and 155 deletions
--- a/source/blender/blenkernel/intern/tracking_stabilize.cc
+++ b/source/blender/blenkernel/intern/tracking_stabilize.cc
@ -1316,7 +1316,7 @@ static void tracking_stabilize_frame_interpolation_cb(void *__restrict userdata,
      for (int x = 0; x < tmpibuf->x; x++, dst++) {
        vec[0] = float(x);
        mul_v3_m4v3(rvec, mat, vec);
-        *dst = imbuf::interpolate_bilinear_fl(ibuf, rvec[0], rvec[1]);
+        *dst = imbuf::interpolate_bilinear_border_fl(ibuf, rvec[0], rvec[1]);
      }
    }
    else if (data->tracking_filter == TRACKING_FILTER_BICUBIC) {
@ -1342,7 +1342,7 @@ static void tracking_stabilize_frame_interpolation_cb(void *__restrict userdata,
      for (int x = 0; x < tmpibuf->x; x++, dst++) {
        vec[0] = float(x);
        mul_v3_m4v3(rvec, mat, vec);
-        *dst = imbuf::interpolate_bilinear_byte(ibuf, rvec[0], rvec[1]);
+        *dst = imbuf::interpolate_bilinear_border_byte(ibuf, rvec[0], rvec[1]);
      }
    }
    else if (data->tracking_filter == TRACKING_FILTER_BICUBIC) {
--- a/source/blender/blenlib/BLI_math_interp.hh
+++ b/source/blender/blenlib/BLI_math_interp.hh
@ -142,7 +142,7 @@ inline void interpolate_nearest_wrap_fl(
 }

 /**
- * Bilinear sampling.
+ * Bilinear sampling (with black border).
 *
 * Takes four image samples at floor(u,v) and floor(u,v)+1, and blends them
 * based on fractional parts of u,v. Samples outside the image are turned
@ -152,6 +152,26 @@ inline void interpolate_nearest_wrap_fl(
 * to get proper filtering.
 */

+[[nodiscard]] uchar4 interpolate_bilinear_border_byte(
+    const uchar *buffer, int width, int height, float u, float v);
+
+[[nodiscard]] float4 interpolate_bilinear_border_fl(
+    const float *buffer, int width, int height, float u, float v);
+
+void interpolate_bilinear_border_fl(
+    const float *buffer, float *output, int width, int height, int components, float u, float v);
+
+/**
+ * Bilinear sampling.
+ *
+ * Takes four image samples at floor(u,v) and floor(u,v)+1, and blends them
+ * based on fractional parts of u,v.
+ * Samples outside the image are clamped to texels at image edge.
+ *
+ * Note that you probably want to subtract 0.5 from u,v before this function,
+ * to get proper filtering.
+ */
+
 [[nodiscard]] uchar4 interpolate_bilinear_byte(
    const uchar *buffer, int width, int height, float u, float v);

--- a/source/blender/blenlib/BLI_simd.h
+++ b/source/blender/blenlib/BLI_simd.h
@ -30,3 +30,9 @@
 #else
 #  define BLI_HAVE_SSE2 0
 #endif
+
+#if defined(__SSE4_1__) || (defined(__ARM_NEON) && defined(WITH_SSE2NEON))
+#  define BLI_HAVE_SSE4 1
+#else
+#  define BLI_HAVE_SSE4 0
+#endif
--- a/source/blender/blenlib/intern/math_interp.cc
+++ b/source/blender/blenlib/intern/math_interp.cc
@ -17,6 +17,8 @@
 #include "BLI_simd.h"
 #include "BLI_strict_flags.h"

+namespace blender::math {
+
 enum class eCubicFilter {
  BSpline,
  Mitchell,
@ -24,7 +26,7 @@ enum class eCubicFilter {

 /* Calculate cubic filter coefficients, for samples at -1,0,+1,+2.
 * f is 0..1 offset from texel center in pixel space. */
-template<enum eCubicFilter filter> static blender::float4 cubic_filter_coefficients(float f)
+template<enum eCubicFilter filter> static float4 cubic_filter_coefficients(float f)
 {
  float f2 = f * f;
  float f3 = f2 * f;
@ -35,7 +37,7 @@ template<enum eCubicFilter filter> static blender::float4 cubic_filter_coefficie
    float w0 = -w3 + f2 * 0.5f - f * 0.5f + 1.0f / 6.0f;
    float w1 = f3 * 0.5f - f2 * 1.0f + 2.0f / 3.0f;
    float w2 = 1.0f - w0 - w1 - w3;
-    return blender::float4(w0, w1, w2, w3);
+    return float4(w0, w1, w2, w3);
  }
  else if constexpr (filter == eCubicFilter::Mitchell) {
    /* Cubic Mitchell-Netravali filter with B=1/3, C=1/3 parameters. */
@ -43,7 +45,7 @@ template<enum eCubicFilter filter> static blender::float4 cubic_filter_coefficie
    float w1 = 7.0f / 6.0f * f3 - 2.0f * f2 + 8.0f / 9.0f;
    float w2 = -7.0f / 6.0f * f3 + 3.0f / 2.0f * f2 + 0.5f * f + 1.0f / 18.0f;
    float w3 = 7.0f / 18.0f * f3 - 1.0f / 3.0f * f2;
-    return blender::float4(w0, w1, w2, w3);
+    return float4(w0, w1, w2, w3);
  }
 }

@ -54,13 +56,11 @@ template<enum eCubicFilter filter> static blender::float4 cubic_filter_coefficie

 BLI_INLINE __m128 floor_simd(__m128 v)
 {
-#  if defined(__SSE4_1__) || defined(__ARM_NEON) && defined(WITH_SSE2NEON)
-  /* If we're on SSE4 or ARM NEON, just use the simple floor() way. */
+#  if BLI_HAVE_SSE4
  __m128 v_floor = _mm_floor_ps(v);
 #  else
-  /* The hard way: truncate, for negative inputs this will round towards zero.
-   * Then compare with input, and subtract 1 for the inputs that were
-   * negative. */
+  /* Truncate, for negative inputs this will round towards zero. Then compare
+   * with input, and subtract 1 for the inputs that were negative. */
  __m128 v_trunc = _mm_cvtepi32_ps(_mm_cvttps_epi32(v));
  __m128 v_neg = _mm_cmplt_ps(v, v_trunc);
  __m128 v_floor = _mm_sub_ps(v_trunc, _mm_and_ps(v_neg, _mm_set1_ps(1.0f)));
@ -68,6 +68,30 @@ BLI_INLINE __m128 floor_simd(__m128 v)
  return v_floor;
 }

+BLI_INLINE __m128i min_i_simd(__m128i a, __m128i b)
+{
+#  if BLI_HAVE_SSE4
+  return _mm_min_epi32(a, b);
+#  else
+  __m128i cmp = _mm_cmplt_epi32(a, b);
+  a = _mm_and_si128(cmp, a);
+  b = _mm_andnot_si128(cmp, b);
+  return _mm_or_si128(a, b);
+#  endif
+}
+
+BLI_INLINE __m128i max_i_simd(__m128i a, __m128i b)
+{
+#  if BLI_HAVE_SSE4
+  return _mm_max_epi32(a, b);
+#  else
+  __m128i cmp = _mm_cmplt_epi32(b, a);
+  a = _mm_and_si128(cmp, a);
+  b = _mm_andnot_si128(cmp, b);
+  return _mm_or_si128(a, b);
+#  endif
+}
+
 template<eCubicFilter filter>
 BLI_INLINE void bicubic_interpolation_uchar_simd(
    const uchar *src_buffer, uchar *output, int width, int height, float u, float v)
@ -90,8 +114,8 @@ BLI_INLINE void bicubic_interpolation_uchar_simd(
  __m128 frac_uv = _mm_sub_ps(uv, uv_floor);

  /* Calculate pixel weights. */
-  blender::float4 wx = cubic_filter_coefficients<filter>(_mm_cvtss_f32(frac_uv));
-  blender::float4 wy = cubic_filter_coefficients<filter>(
+  float4 wx = cubic_filter_coefficients<filter>(_mm_cvtss_f32(frac_uv));
+  float4 wy = cubic_filter_coefficients<filter>(
      _mm_cvtss_f32(_mm_shuffle_ps(frac_uv, frac_uv, 1)));

  /* Read 4x4 source pixels and blend them. */
@ -134,8 +158,6 @@ template<typename T, eCubicFilter filter>
 static void bicubic_interpolation(
    const T *src_buffer, T *output, int width, int height, int components, float u, float v)
 {
-  using namespace blender;
-
  BLI_assert(src_buffer && output);

 #if BLI_HAVE_SSE2
@ -234,6 +256,7 @@ static void bicubic_interpolation(
  }
 }

+template<bool border>
 BLI_INLINE void bilinear_fl_impl(const float *buffer,
                                 float *output,
                                 int width,
@ -288,33 +311,23 @@ BLI_INLINE void bilinear_fl_impl(const float *buffer,
    return;
  }

-  /* Sample including outside of edges of image. */
-  if (x1 < 0 || y1 < 0) {
-    row1 = empty;
+  /* Sample locations. */
+  if constexpr (border) {
+    row1 = (x1 < 0 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x1) * components;
+    row2 = (x1 < 0 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x1) * components;
+    row3 = (x2 > width - 1 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x2) * components;
+    row4 = (x2 > width - 1 || y2 > height - 1) ? empty :
+                                                 buffer + (int64_t(width) * y2 + x2) * components;
  }
  else {
-    row1 = buffer + width * y1 * components + components * x1;
-  }
-
-  if (x1 < 0 || y2 > height - 1) {
-    row2 = empty;
-  }
-  else {
-    row2 = buffer + width * y2 * components + components * x1;
-  }
-
-  if (x2 > width - 1 || y1 < 0) {
-    row3 = empty;
-  }
-  else {
-    row3 = buffer + width * y1 * components + components * x2;
-  }
-
-  if (x2 > width - 1 || y2 > height - 1) {
-    row4 = empty;
-  }
-  else {
-    row4 = buffer + width * y2 * components + components * x2;
+    x1 = blender::math::clamp(x1, 0, width - 1);
+    x2 = blender::math::clamp(x2, 0, width - 1);
+    y1 = blender::math::clamp(y1, 0, height - 1);
+    y2 = blender::math::clamp(y2, 0, height - 1);
+    row1 = buffer + (int64_t(width) * y1 + x1) * components;
+    row2 = buffer + (int64_t(width) * y2 + x1) * components;
+    row3 = buffer + (int64_t(width) * y1 + x2) * components;
+    row4 = buffer + (int64_t(width) * y2 + x2) * components;
  }

  a = u - uf;
@ -355,23 +368,13 @@ BLI_INLINE void bilinear_fl_impl(const float *buffer,
  }
 }

-namespace blender::math {
-
-uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, float u, float v)
+template<bool border>
+BLI_INLINE uchar4 bilinear_byte_impl(const uchar *buffer, int width, int height, float u, float v)
 {
  BLI_assert(buffer);
  uchar4 res;

 #if BLI_HAVE_SSE2
-  /* Bilinear interpolation needs to read and blend four image pixels, while
-   * also handling conditions of sample coordinate being outside of the
-   * image, in which case black (all zeroes) should be used as the sample
-   * contribution.
-   *
-   * Code below does all that without any branches, by making outside the
-   * image sample locations still read the first pixel of the image, but
-   * later making sure that the result is set to zero for that sample. */
-
  __m128 uvuv = _mm_set_ps(v, u, v, u);
  __m128 uvuv_floor = floor_simd(uvuv);

@ -380,18 +383,42 @@ uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, flo
  /* Check whether any of the coordinates are outside of the image. */
  __m128i size_minus_1 = _mm_sub_epi32(_mm_set_epi32(height, width, height, width),
                                       _mm_set1_epi32(1));
-  __m128i too_lo_xy12 = _mm_cmplt_epi32(xy12, _mm_setzero_si128());
-  __m128i too_hi_xy12 = _mm_cmplt_epi32(size_minus_1, xy12);
-  __m128i invalid_xy12 = _mm_or_si128(too_lo_xy12, too_hi_xy12);

-  /* Samples 1,2,3,4 are in this order: x1y1, x1y2, x2y1, x2y2 */
-  __m128i x1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(2, 2, 0, 0));
-  __m128i y1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(3, 1, 3, 1));
-  __m128i invalid_1234 = _mm_or_si128(_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(2, 2, 0, 0)),
-                                      _mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(3, 1, 3, 1)));
-  /* Set x & y to zero for invalid samples. */
-  x1234 = _mm_andnot_si128(invalid_1234, x1234);
-  y1234 = _mm_andnot_si128(invalid_1234, y1234);
+  /* Samples 1,2,3,4 will be in this order: x1y1, x1y2, x2y1, x2y2. */
+  __m128i x1234, y1234, invalid_1234;
+
+  if constexpr (border) {
+    /* Blend black colors for samples right outside the image: figure out
+     * which of the 4 samples were outside, set their coordinates to zero
+     * and later on put black color into their place. */
+    __m128i too_lo_xy12 = _mm_cmplt_epi32(xy12, _mm_setzero_si128());
+    __m128i too_hi_xy12 = _mm_cmplt_epi32(size_minus_1, xy12);
+    __m128i invalid_xy12 = _mm_or_si128(too_lo_xy12, too_hi_xy12);
+
+    /* Samples 1,2,3,4 are in this order: x1y1, x1y2, x2y1, x2y2 */
+    x1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(2, 2, 0, 0));
+    y1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(3, 1, 3, 1));
+    invalid_1234 = _mm_or_si128(_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(2, 2, 0, 0)),
+                                _mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(3, 1, 3, 1)));
+    /* Set x & y to zero for invalid samples. */
+    x1234 = _mm_andnot_si128(invalid_1234, x1234);
+    y1234 = _mm_andnot_si128(invalid_1234, y1234);
+  }
+  else {
+    /* Clamp samples to image edges, unless all four of them are outside
+     * in which case return black. */
+    __m128i xy12_clamped = max_i_simd(xy12, _mm_setzero_si128());
+    xy12_clamped = min_i_simd(xy12_clamped, size_minus_1);
+    __m128i valid_xy12 = _mm_cmpeq_epi32(xy12, xy12_clamped);
+    __m128i valid_pairs = _mm_and_si128(valid_xy12,
+                                        _mm_shuffle_epi32(valid_xy12, _MM_SHUFFLE(0, 3, 2, 1)));
+    if (_mm_movemask_ps(_mm_castsi128_ps(valid_pairs)) == 0) {
+      return uchar4(0);
+    }
+
+    x1234 = _mm_shuffle_epi32(xy12_clamped, _MM_SHUFFLE(2, 2, 0, 0));
+    y1234 = _mm_shuffle_epi32(xy12_clamped, _MM_SHUFFLE(3, 1, 3, 1));
+  }

  /* Read the four sample values. Do address calculations in C, since SSE
   * before 4.1 makes it very cumbersome to do full integer multiplies. */
@ -404,8 +431,10 @@ uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, flo
  int sample3 = ((const int *)buffer)[ycoord[2] * int64_t(width) + xcoord[2]];
  int sample4 = ((const int *)buffer)[ycoord[3] * int64_t(width) + xcoord[3]];
  __m128i samples1234 = _mm_set_epi32(sample4, sample3, sample2, sample1);
-  /* Set samples to black for the ones that were actually invalid. */
-  samples1234 = _mm_andnot_si128(invalid_1234, samples1234);
+  if constexpr (border) {
+    /* Set samples to black for the ones that were actually invalid. */
+    samples1234 = _mm_andnot_si128(invalid_1234, samples1234);
+  }

  /* Expand samples from packed 8-bit RGBA to full floats:
   * spread to 16 bit values. */
@ -455,35 +484,24 @@ uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, flo
    return uchar4(0);
  }

-  /* Sample including outside of edges of image. */
+  /* Sample locations. */
  const uchar *row1, *row2, *row3, *row4;
  uchar empty[4] = {0, 0, 0, 0};
-  if (x1 < 0 || y1 < 0) {
-    row1 = empty;
+  if constexpr (border) {
+    row1 = (x1 < 0 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x1) * 4;
+    row2 = (x1 < 0 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x1) * 4;
+    row3 = (x2 > width - 1 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x2) * 4;
+    row4 = (x2 > width - 1 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x2) * 4;
  }
  else {
-    row1 = buffer + width * y1 * 4 + 4 * x1;
-  }
-
-  if (x1 < 0 || y2 > height - 1) {
-    row2 = empty;
-  }
-  else {
-    row2 = buffer + width * y2 * 4 + 4 * x1;
-  }
-
-  if (x2 > width - 1 || y1 < 0) {
-    row3 = empty;
-  }
-  else {
-    row3 = buffer + width * y1 * 4 + 4 * x2;
-  }
-
-  if (x2 > width - 1 || y2 > height - 1) {
-    row4 = empty;
-  }
-  else {
-    row4 = buffer + width * y2 * 4 + 4 * x2;
+    x1 = blender::math::clamp(x1, 0, width - 1);
+    x2 = blender::math::clamp(x2, 0, width - 1);
+    y1 = blender::math::clamp(y1, 0, height - 1);
+    y2 = blender::math::clamp(y2, 0, height - 1);
+    row1 = buffer + (int64_t(width) * y1 + x1) * 4;
+    row2 = buffer + (int64_t(width) * y2 + x1) * 4;
+    row3 = buffer + (int64_t(width) * y1 + x2) * 4;
+    row4 = buffer + (int64_t(width) * y2 + x2) * 4;
  }

  float a = u - uf;
@ -502,17 +520,41 @@ uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, flo
  return res;
 }

+uchar4 interpolate_bilinear_border_byte(
+    const uchar *buffer, int width, int height, float u, float v)
+{
+  return bilinear_byte_impl<true>(buffer, width, height, u, v);
+}
+
+uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, float u, float v)
+{
+  return bilinear_byte_impl<false>(buffer, width, height, u, v);
+}
+
+float4 interpolate_bilinear_border_fl(const float *buffer, int width, int height, float u, float v)
+{
+  float4 res;
+  bilinear_fl_impl<true>(buffer, res, width, height, 4, u, v);
+  return res;
+}
+
+void interpolate_bilinear_border_fl(
+    const float *buffer, float *output, int width, int height, int components, float u, float v)
+{
+  bilinear_fl_impl<true>(buffer, output, width, height, components, u, v);
+}
+
 float4 interpolate_bilinear_fl(const float *buffer, int width, int height, float u, float v)
 {
  float4 res;
-  bilinear_fl_impl(buffer, res, width, height, 4, u, v);
+  bilinear_fl_impl<false>(buffer, res, width, height, 4, u, v);
  return res;
 }

 void interpolate_bilinear_fl(
    const float *buffer, float *output, int width, int height, int components, float u, float v)
 {
-  bilinear_fl_impl(buffer, output, width, height, components, u, v);
+  bilinear_fl_impl<false>(buffer, output, width, height, components, u, v);
 }

 void interpolate_bilinear_wrap_fl(const float *buffer,
@ -525,7 +567,7 @@ void interpolate_bilinear_wrap_fl(const float *buffer,
                                  bool wrap_x,
                                  bool wrap_y)
 {
-  bilinear_fl_impl(buffer, output, width, height, components, u, v, wrap_x, wrap_y);
+  bilinear_fl_impl<false>(buffer, output, width, height, components, u, v, wrap_x, wrap_y);
 }

 uchar4 interpolate_bilinear_wrap_byte(const uchar *buffer, int width, int height, float u, float v)
@ -573,7 +615,7 @@ uchar4 interpolate_bilinear_wrap_byte(const uchar *buffer, int width, int height
 float4 interpolate_bilinear_wrap_fl(const float *buffer, int width, int height, float u, float v)
 {
  float4 res;
-  bilinear_fl_impl(buffer, res, width, height, 4, u, v, true, true);
+  bilinear_fl_impl<false>(buffer, res, width, height, 4, u, v, true, true);
  return res;
 }

--- a/source/blender/blenlib/tests/BLI_math_interp_test.cc
+++ b/source/blender/blenlib/tests/BLI_math_interp_test.cc
@ -28,10 +28,10 @@ TEST(math_interp, BilinearCharExactSamples)
 {
  uchar4 res;
  uchar4 exp1 = {73, 108, 153, 251};
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 1.0f, 2.0f);
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 1.0f, 2.0f);
  EXPECT_EQ(exp1, res);
  uchar4 exp2 = {240, 160, 90, 20};
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 2.0f, 0.0f);
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 2.0f, 0.0f);
  EXPECT_EQ(exp2, res);
 }

@ -39,10 +39,10 @@ TEST(math_interp, BilinearCharHalfwayUSamples)
 {
  uchar4 res;
  uchar4 exp1 = {31, 37, 42, 48};
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0.5f, 1.0f);
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0.5f, 1.0f);
  EXPECT_EQ(exp1, res);
  uchar4 exp2 = {243, 242, 224, 223};
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0.5f, 0.0f);
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0.5f, 0.0f);
  EXPECT_EQ(exp2, res);
 }

@ -50,10 +50,10 @@ TEST(math_interp, BilinearCharHalfwayVSamples)
 {
  uchar4 res;
  uchar4 exp1 = {1, 2, 3, 4};
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0.0f, 1.5f);
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0.0f, 1.5f);
  EXPECT_EQ(exp1, res);
  uchar4 exp2 = {127, 128, 129, 130};
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 2.0f, 1.5f);
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 2.0f, 1.5f);
  EXPECT_EQ(exp2, res);
 }

@ -61,10 +61,11 @@ TEST(math_interp, BilinearCharSamples)
 {
  uchar4 res;
  uchar4 exp1 = {136, 133, 132, 130};
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 1.25f, 0.625f);
+  res = interpolate_bilinear_border_byte(
+      image_char[0][0], image_width, image_height, 1.25f, 0.625f);
  EXPECT_EQ(exp1, res);
  uchar4 exp2 = {219, 191, 167, 142};
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 1.4f, 0.1f);
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 1.4f, 0.1f);
  EXPECT_EQ(exp2, res);
 }

@ -72,25 +73,39 @@ TEST(math_interp, BilinearFloatSamples)
 {
  float4 res;
  float4 exp1 = {135.9375f, 133.28125f, 131.5625f, 129.84375f};
-  res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 1.25f, 0.625f);
+  res = interpolate_bilinear_border_fl(image_fl[0][0], image_width, image_height, 1.25f, 0.625f);
  EXPECT_V4_NEAR(exp1, res, float_tolerance);
  float4 exp2 = {219.36f, 191.2f, 166.64f, 142.08f};
-  res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 1.4f, 0.1f);
+  res = interpolate_bilinear_border_fl(image_fl[0][0], image_width, image_height, 1.4f, 0.1f);
  EXPECT_V4_NEAR(exp2, res, float_tolerance);
 }

+TEST(math_interp, BilinearCharPartiallyOutsideImageBorder)
+{
+  uchar4 res;
+  uchar4 exp1 = {1, 1, 2, 2};
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, -0.5f, 2.0f);
+  EXPECT_EQ(exp1, res);
+  uchar4 exp2 = {9, 11, 15, 22};
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 1.25f, 2.9f);
+  EXPECT_EQ(exp2, res);
+  uchar4 exp3 = {173, 115, 65, 14};
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 2.2f, -0.1f);
+  EXPECT_EQ(exp3, res);
+}
+
 TEST(math_interp, BilinearCharPartiallyOutsideImage)
 {
  uchar4 res;
-  uchar4 exp1 = {1, 1, 2, 2};
+  uint4 exp1 = {1, 2, 3, 4};
  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, -0.5f, 2.0f);
-  EXPECT_EQ(exp1, res);
-  uchar4 exp2 = {9, 11, 15, 22};
+  EXPECT_EQ(exp1, uint4(res));
+  uint4 exp2 = {87, 113, 147, 221};
  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 1.25f, 2.9f);
-  EXPECT_EQ(exp2, res);
-  uchar4 exp3 = {173, 115, 65, 14};
+  EXPECT_EQ(exp2, uint4(res));
+  uint4 exp3 = {240, 160, 90, 20};
  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 2.2f, -0.1f);
-  EXPECT_EQ(exp3, res);
+  EXPECT_EQ(exp3, uint4(res));
 }

 TEST(math_interp, BilinearCharPartiallyOutsideImageWrap)
@ -107,16 +122,30 @@ TEST(math_interp, BilinearCharPartiallyOutsideImageWrap)
  EXPECT_EQ(exp3, res);
 }

-TEST(math_interp, BilinearFloatPartiallyOutsideImage)
+TEST(math_interp, BilinearFloatPartiallyOutsideImageBorder)
 {
  float4 res;
  float4 exp1 = {0.5f, 1, 1.5f, 2};
-  res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, -0.5f, 2.0f);
+  res = interpolate_bilinear_border_fl(image_fl[0][0], image_width, image_height, -0.5f, 2.0f);
  EXPECT_V4_NEAR(exp1, res, float_tolerance);
  float4 exp2 = {8.675f, 11.325f, 14.725f, 22.1f};
-  res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 1.25f, 2.9f);
+  res = interpolate_bilinear_border_fl(image_fl[0][0], image_width, image_height, 1.25f, 2.9f);
  EXPECT_V4_NEAR(exp2, res, float_tolerance);
  float4 exp3 = {172.8f, 115.2f, 64.8f, 14.4f};
+  res = interpolate_bilinear_border_fl(image_fl[0][0], image_width, image_height, 2.2f, -0.1f);
+  EXPECT_V4_NEAR(exp3, res, float_tolerance);
+}
+
+TEST(math_interp, BilinearFloatPartiallyOutsideImage)
+{
+  float4 res;
+  float4 exp1 = {1.0f, 2.0f, 3.0f, 4.0f};
+  res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, -0.5f, 2.0f);
+  EXPECT_V4_NEAR(exp1, res, float_tolerance);
+  float4 exp2 = {86.75f, 113.25f, 147.25f, 221.0f};
+  res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 1.25f, 2.9f);
+  EXPECT_V4_NEAR(exp2, res, float_tolerance);
+  float4 exp3 = {240.0f, 160.0f, 90.0f, 20.0f};
  res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 2.2f, -0.1f);
  EXPECT_V4_NEAR(exp3, res, float_tolerance);
 }
@ -151,23 +180,23 @@ TEST(math_interp, BilinearCharFullyOutsideImage)
  uchar4 res;
  uchar4 exp = {0, 0, 0, 0};
  /* Out of range on U */
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, -1.5f, 0);
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, -1.5f, 0);
  EXPECT_EQ(exp, res);
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, -1.1f, 0);
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, -1.1f, 0);
  EXPECT_EQ(exp, res);
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 3, 0);
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 3, 0);
  EXPECT_EQ(exp, res);
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 5, 0);
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 5, 0);
  EXPECT_EQ(exp, res);

  /* Out of range on V */
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0, -3.2f);
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0, -3.2f);
  EXPECT_EQ(exp, res);
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0, -1.5f);
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0, -1.5f);
  EXPECT_EQ(exp, res);
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0, 3.1f);
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0, 3.1f);
  EXPECT_EQ(exp, res);
-  res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0, 500.0f);
+  res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0, 500.0f);
  EXPECT_EQ(exp, res);
 }

--- a/source/blender/compositor/intern/COM_MemoryBuffer.h
+++ b/source/blender/compositor/intern/COM_MemoryBuffer.h
@ -249,17 +249,17 @@ class MemoryBuffer {
        single_y = rel_y - last_y;
      }

-      math::interpolate_bilinear_fl(buffer_, out, 1, 1, num_channels_, single_x, single_y);
+      math::interpolate_bilinear_border_fl(buffer_, out, 1, 1, num_channels_, single_x, single_y);
      return;
    }

-    math::interpolate_bilinear_fl(buffer_,
-                                  out,
-                                  get_width(),
-                                  get_height(),
-                                  num_channels_,
-                                  get_relative_x(x),
-                                  get_relative_y(y));
+    math::interpolate_bilinear_border_fl(buffer_,
+                                         out,
+                                         get_width(),
+                                         get_height(),
+                                         num_channels_,
+                                         get_relative_x(x),
+                                         get_relative_y(y));
  }

  void read_elem_sampled(float x, float y, PixelSampler sampler, float *out) const
--- a/source/blender/compositor/operations/COM_ImageOperation.cc
+++ b/source/blender/compositor/operations/COM_ImageOperation.cc
@ -104,7 +104,7 @@ static void sample_image_at_location(ImBuf *ibuf,
        imbuf::interpolate_nearest_fl(ibuf, color, x, y);
        break;
      case PixelSampler::Bilinear:
-        imbuf::interpolate_bilinear_fl(ibuf, color, x, y);
+        imbuf::interpolate_bilinear_border_fl(ibuf, color, x, y);
        break;
      case PixelSampler::Bicubic:
        imbuf::interpolate_cubic_bspline_fl(ibuf, color, x, y);
@ -118,7 +118,7 @@ static void sample_image_at_location(ImBuf *ibuf,
        byte_color = imbuf::interpolate_nearest_byte(ibuf, x, y);
        break;
      case PixelSampler::Bilinear:
-        byte_color = imbuf::interpolate_bilinear_byte(ibuf, x, y);
+        byte_color = imbuf::interpolate_bilinear_border_byte(ibuf, x, y);
        break;
      case PixelSampler::Bicubic:
        byte_color = imbuf::interpolate_cubic_bspline_byte(ibuf, x, y);
--- a/source/blender/compositor/operations/COM_MovieClipOperation.cc
+++ b/source/blender/compositor/operations/COM_MovieClipOperation.cc
@ -85,7 +85,7 @@ void MovieClipBaseOperation::execute_pixel_sampled(float output[4],
        imbuf::interpolate_nearest_fl(ibuf, output, x, y);
        break;
      case PixelSampler::Bilinear:
-        imbuf::interpolate_bilinear_fl(ibuf, output, x, y);
+        imbuf::interpolate_bilinear_border_fl(ibuf, output, x, y);
        break;
      case PixelSampler::Bicubic:
        imbuf::interpolate_cubic_bspline_fl(ibuf, output, x, y);
--- a/source/blender/compositor/operations/COM_MultilayerImageOperation.cc
+++ b/source/blender/compositor/operations/COM_MultilayerImageOperation.cc
@ -91,7 +91,7 @@ void MultilayerColorOperation::execute_pixel_sampled(float output[4],
          imbuf::interpolate_nearest_fl(buffer_, output, x, y);
          break;
        case PixelSampler::Bilinear:
-          imbuf::interpolate_bilinear_fl(buffer_, output, x, y);
+          imbuf::interpolate_bilinear_border_fl(buffer_, output, x, y);
          break;
        case PixelSampler::Bicubic:
          imbuf::interpolate_cubic_bspline_fl(buffer_, output, x, y);
--- a/source/blender/compositor/operations/COM_RenderLayersProg.cc
+++ b/source/blender/compositor/operations/COM_RenderLayersProg.cc
@ -77,7 +77,8 @@ void RenderLayersProg::do_interpolation(float output[4], float x, float y, Pixel
      math::interpolate_nearest_fl(input_buffer_, output, width, height, elementsize_, x, y);
      break;
    case PixelSampler::Bilinear:
-      math::interpolate_bilinear_fl(input_buffer_, output, width, height, elementsize_, x, y);
+      math::interpolate_bilinear_border_fl(
+          input_buffer_, output, width, height, elementsize_, x, y);
      break;
    case PixelSampler::Bicubic:
      math::interpolate_cubic_bspline_fl(input_buffer_, output, width, height, elementsize_, x, y);
--- a/source/blender/imbuf/IMB_interp.hh
+++ b/source/blender/imbuf/IMB_interp.hh
@ -18,6 +18,8 @@

 namespace blender::imbuf {

+/* Nearest sampling. */
+
 [[nodiscard]] inline uchar4 interpolate_nearest_byte(const ImBuf *in, float u, float v)
 {
  return math::interpolate_nearest_byte(in->byte_buffer.data, in->x, in->y, u, v);
@ -35,6 +37,8 @@ inline void interpolate_nearest_fl(const ImBuf *in, float output[4], float u, fl
  math::interpolate_nearest_fl(in->float_buffer.data, output, in->x, in->y, 4, u, v);
 }

+/* Nearest sampling with UV wrapping. */
+
 [[nodiscard]] inline uchar4 interpolate_nearest_wrap_byte(const ImBuf *in, float u, float v)
 {
  return math::interpolate_nearest_wrap_byte(in->byte_buffer.data, in->x, in->y, u, v);
@ -44,6 +48,8 @@ inline void interpolate_nearest_fl(const ImBuf *in, float output[4], float u, fl
  return math::interpolate_nearest_wrap_fl(in->float_buffer.data, in->x, in->y, u, v);
 }

+/* Bilinear sampling. */
+
 [[nodiscard]] inline uchar4 interpolate_bilinear_byte(const ImBuf *in, float u, float v)
 {
  return math::interpolate_bilinear_byte(in->byte_buffer.data, in->x, in->y, u, v);
@ -63,6 +69,29 @@ inline void interpolate_bilinear_fl(const ImBuf *in, float output[4], float u, f
  memcpy(output, &col, sizeof(col));
 }

+/* Bilinear sampling, samples near edge blend into transparency. */
+
+[[nodiscard]] inline uchar4 interpolate_bilinear_border_byte(const ImBuf *in, float u, float v)
+{
+  return math::interpolate_bilinear_border_byte(in->byte_buffer.data, in->x, in->y, u, v);
+}
+[[nodiscard]] inline float4 interpolate_bilinear_border_fl(const ImBuf *in, float u, float v)
+{
+  return math::interpolate_bilinear_border_fl(in->float_buffer.data, in->x, in->y, u, v);
+}
+inline void interpolate_bilinear_border_byte(const ImBuf *in, uchar output[4], float u, float v)
+{
+  uchar4 col = math::interpolate_bilinear_border_byte(in->byte_buffer.data, in->x, in->y, u, v);
+  memcpy(output, &col, sizeof(col));
+}
+inline void interpolate_bilinear_border_fl(const ImBuf *in, float output[4], float u, float v)
+{
+  float4 col = math::interpolate_bilinear_border_fl(in->float_buffer.data, in->x, in->y, u, v);
+  memcpy(output, &col, sizeof(col));
+}
+
+/* Bilinear sampling with UV wrapping. */
+
 [[nodiscard]] inline uchar4 interpolate_bilinear_wrap_byte(const ImBuf *in, float u, float v)
 {
  return math::interpolate_bilinear_wrap_byte(in->byte_buffer.data, in->x, in->y, u, v);
@ -72,6 +101,8 @@ inline void interpolate_bilinear_fl(const ImBuf *in, float output[4], float u, f
  return math::interpolate_bilinear_wrap_fl(in->float_buffer.data, in->x, in->y, u, v);
 }

+/* Cubic B-Spline sampling. */
+
 [[nodiscard]] inline uchar4 interpolate_cubic_bspline_byte(const ImBuf *in, float u, float v)
 {
  return math::interpolate_cubic_bspline_byte(in->byte_buffer.data, in->x, in->y, u, v);
@ -91,6 +122,8 @@ inline void interpolate_cubic_bspline_fl(const ImBuf *in, float output[4], float
  memcpy(output, &col, sizeof(col));
 }

+/* Cubic Mitchell sampling. */
+
 [[nodiscard]] inline uchar4 interpolate_cubic_mitchell_byte(const ImBuf *in, float u, float v)
 {
  return math::interpolate_cubic_mitchell_byte(in->byte_buffer.data, in->x, in->y, u, v);
--- a/source/blender/imbuf/intern/scaling.cc
+++ b/source/blender/imbuf/intern/scaling.cc
@ -1761,12 +1761,12 @@ static void *do_scale_thread(void *data_v)
      int offset = y * data->newx + x;

      if (data->byte_buffer) {
-        interpolate_bilinear_byte(ibuf, data->byte_buffer + 4 * offset, u, v);
+        interpolate_bilinear_border_byte(ibuf, data->byte_buffer + 4 * offset, u, v);
      }

      if (data->float_buffer) {
        float *pixel = data->float_buffer + ibuf->channels * offset;
-        blender::math::interpolate_bilinear_fl(
+        blender::math::interpolate_bilinear_border_fl(
            ibuf->float_buffer.data, pixel, ibuf->x, ibuf->y, ibuf->channels, u, v);
      }
    }
--- a/source/blender/imbuf/intern/transform.cc
+++ b/source/blender/imbuf/intern/transform.cc
@ -38,6 +38,9 @@ struct TransformContext {
  /* Source UV step delta, when moving along one destination pixel in Y axis. */
  float2 add_y;

+  /* Source corners in destination pixel space, counter-clockwise. */
+  float2 src_corners[4];
+
  IndexRange dst_region_x_range;
  IndexRange dst_region_y_range;

@ -66,14 +69,15 @@ struct TransformContext {
    rcti rect;
    BLI_rcti_init_minmax(&rect);
    float4x4 inverse = math::invert(transform_matrix);
-    for (const int2 &src_coords : {
-             int2(src_crop.xmin, src_crop.ymin),
-             int2(src_crop.xmax, src_crop.ymin),
-             int2(src_crop.xmin, src_crop.ymax),
-             int2(src_crop.xmax, src_crop.ymax),
-         })
-    {
-      float3 dst_co = math::transform_point(inverse, float3(src_coords.x, src_coords.y, 0.0f));
+    const int2 src_coords[4] = {int2(src_crop.xmin, src_crop.ymin),
+                                int2(src_crop.xmax, src_crop.ymin),
+                                int2(src_crop.xmax, src_crop.ymax),
+                                int2(src_crop.xmin, src_crop.ymax)};
+    for (int i = 0; i < 4; i++) {
+      int2 src_co = src_coords[i];
+      float3 dst_co = math::transform_point(inverse, float3(src_co.x, src_co.y, 0.0f));
+      src_corners[i] = float2(dst_co.x, dst_co.y);
+
      BLI_rcti_do_minmax_v(&rect, int2(dst_co) + margin);
      BLI_rcti_do_minmax_v(&rect, int2(dst_co) - margin);
    }
@ -251,10 +255,8 @@ static void process_scanlines(const TransformContext &ctx, IndexRange y_range)
     *
     * Do a box filter: for each destination pixel, accumulate XxY samples from source,
     * based on scaling factors (length of X/Y pixel steps). Use at least 2 samples
-     * along each direction, so that in case of rotation the resulting edges get
-     * some anti-aliasing, to match previous Subsampled3x3 filter behavior. The
-     * "at least 2" can be removed once/if transform edge anti-aliasing is implemented
-     * in general way for all filters. Use at most 100 samples along each direction,
+     * along each direction, so that in case of rotation the image gets
+     * some anti-aliasing. Use at most 100 samples along each direction,
     * just as some way of clamping possible upper cost. Scaling something down by more
     * than 100x should rarely if ever happen, worst case they will get some aliasing.
     */
@ -336,8 +338,9 @@ template<eIMBInterpolationFilterMode Filter>
 static void transform_scanlines_filter(const TransformContext &ctx, IndexRange y_range)
 {
  int channels = ctx.src->channels;
+
  if (ctx.dst->float_buffer.data && ctx.src->float_buffer.data) {
-    /* Float images. */
+    /* Float pixels. */
    if (channels == 4) {
      transform_scanlines<Filter, float, 4>(ctx, y_range);
    }
@ -351,14 +354,109 @@ static void transform_scanlines_filter(const TransformContext &ctx, IndexRange y
      transform_scanlines<Filter, float, 1>(ctx, y_range);
    }
  }
-  else if (ctx.dst->byte_buffer.data && ctx.src->byte_buffer.data) {
-    /* Byte images. */
+
+  if (ctx.dst->byte_buffer.data && ctx.src->byte_buffer.data) {
+    /* Byte pixels. */
    if (channels == 4) {
      transform_scanlines<Filter, uchar, 4>(ctx, y_range);
    }
  }
 }

+static float calc_coverage(float2 pos, int2 ipos, float2 delta, bool is_steep)
+{
+  /* Very approximate: just take difference from coordinate (x or y based on
+   * steepness) to the integer coordinate. Adjust based on directions
+   * of the edges. */
+  float cov;
+  if (is_steep) {
+    cov = fabsf(ipos.x - pos.x);
+    if (delta.y < 0) {
+      cov = 1.0f - cov;
+    }
+  }
+  else {
+    cov = fabsf(ipos.y - pos.y);
+    if (delta.x > 0) {
+      cov = 1.0f - cov;
+    }
+  }
+  cov = math::clamp(cov, 0.0f, 1.0f);
+  /* Resulting coverage is 0.5 .. 1.0 range, since we are only covering
+   * half of the pixels that should be AA'd (the other half is outside the
+   * quad and does not get rasterized). Square the coverage to get
+   * more range, and it looks a bit nicer that way. */
+  cov *= cov;
+  return cov;
+}
+
+static void edge_aa(const TransformContext &ctx)
+{
+  /* Rasterize along outer source edges into the destination image,
+   * reducing alpha based on pixel distance to the edge at each pixel.
+   * This is very approximate and not 100% correct "analytical AA",
+   * but simple to do and better than nothing. */
+  for (int line_idx = 0; line_idx < 4; line_idx++) {
+    float2 ptA = ctx.src_corners[line_idx];
+    float2 ptB = ctx.src_corners[(line_idx + 1) & 3];
+    float2 delta = ptB - ptA;
+    float2 abs_delta = math::abs(delta);
+    float length = math::max(abs_delta.x, abs_delta.y);
+    if (length < 1) {
+      continue;
+    }
+    bool is_steep = length == abs_delta.y;
+
+    /* It is very common to have non-rotated strips; check if edge line is
+     * horizontal or vertical and would not alter the coverage and can
+     * be skipped. */
+    constexpr float NO_ROTATION = 1.0e-6f;
+    constexpr float NO_AA_CONTRIB = 1.0e-2f;
+    if (is_steep) {
+      if ((abs_delta.x < NO_ROTATION) && (fabsf(ptA.x - roundf(ptA.x)) < NO_AA_CONTRIB)) {
+        continue;
+      }
+    }
+    else {
+      if ((abs_delta.y < NO_ROTATION) && (fabsf(ptA.y - roundf(ptA.y)) < NO_AA_CONTRIB)) {
+        continue;
+      }
+    }
+
+    /* DDA line raster: step one pixel along the longer direction. */
+    delta /= length;
+    if (ctx.dst->float_buffer.data != nullptr) {
+      /* Float pixels. */
+      float *dst = ctx.dst->float_buffer.data;
+      for (int i = 0; i < length; i++) {
+        float2 pos = ptA + i * delta;
+        int2 ipos = int2(pos);
+        if (ipos.x >= 0 && ipos.x < ctx.dst->x && ipos.y >= 0 && ipos.y < ctx.dst->y) {
+          float cov = calc_coverage(pos, ipos, delta, is_steep);
+          size_t idx = (size_t(ipos.y) * ctx.dst->x + ipos.x) * 4;
+          dst[idx + 0] *= cov;
+          dst[idx + 1] *= cov;
+          dst[idx + 2] *= cov;
+          dst[idx + 3] *= cov;
+        }
+      }
+    }
+    if (ctx.dst->byte_buffer.data != nullptr) {
+      /* Byte pixels. */
+      uchar *dst = ctx.dst->byte_buffer.data;
+      for (int i = 0; i < length; i++) {
+        float2 pos = ptA + i * delta;
+        int2 ipos = int2(pos);
+        if (ipos.x >= 0 && ipos.x < ctx.dst->x && ipos.y >= 0 && ipos.y < ctx.dst->y) {
+          float cov = calc_coverage(pos, ipos, delta, is_steep);
+          size_t idx = (size_t(ipos.y) * ctx.dst->x + ipos.x) * 4;
+          dst[idx + 3] *= cov;
+        }
+      }
+    }
+  }
+}
+
 }  // namespace blender::imbuf::transform

 using namespace blender::imbuf::transform;
@ -403,4 +501,8 @@ void IMB_transform(const ImBuf *src,
      transform_scanlines_filter<IMB_FILTER_BOX>(ctx, y_range);
    }
  });
+
+  if (crop && (filter != IMB_FILTER_NEAREST)) {
+    edge_aa(ctx);
+  }
 }
--- a/source/blender/render/intern/texture_margin.cc
+++ b/source/blender/render/intern/texture_margin.cc
@ -274,10 +274,12 @@ class TextureMarginMap {

            if (found_pixel_in_polygon) {
              if (ibuf_ptr_fl) {
-                ibuf_ptr_fl[pixel_index] = imbuf::interpolate_bilinear_fl(ibuf, destX, destY);
+                ibuf_ptr_fl[pixel_index] = imbuf::interpolate_bilinear_border_fl(
+                    ibuf, destX, destY);
              }
              if (ibuf_ptr_ch) {
-                ibuf_ptr_ch[pixel_index] = imbuf::interpolate_bilinear_byte(ibuf, destX, destY);
+                ibuf_ptr_ch[pixel_index] = imbuf::interpolate_bilinear_border_byte(
+                    ibuf, destX, destY);
              }
              /* Add our new pixels to the assigned pixel map. */
              mask[pixel_index] = 1;
--- a/source/blender/sequencer/intern/effects.cc
+++ b/source/blender/sequencer/intern/effects.cc
@ -1584,10 +1584,10 @@ static void transform_image(int x,
          break;
        case 1:
          if (dst_fl) {
-            dst_fl[offset] = imbuf::interpolate_bilinear_fl(ibuf, xt, yt);
+            dst_fl[offset] = imbuf::interpolate_bilinear_border_fl(ibuf, xt, yt);
          }
          else {
-            dst_ch[offset] = imbuf::interpolate_bilinear_byte(ibuf, xt, yt);
+            dst_ch[offset] = imbuf::interpolate_bilinear_border_byte(ibuf, xt, yt);
          }
          break;
        case 2:
--- a/tests/python/sequencer_render_tests.py
+++ b/tests/python/sequencer_render_tests.py
@ -50,6 +50,9 @@ def main():
    from modules import render_report
    report = render_report.Report("Sequencer", output_dir, oiiotool)
    report.set_pixelated(True)
+    # default error tolerances are quite large, lower them
+    report.set_fail_threshold(1.0 / 255.0)
+    report.set_fail_percent(0.01)
    report.set_reference_dir("reference")

    test_dir_name = Path(test_dir).name