From c25412142d119ea87050f46fe652ce0c5222312f Mon Sep 17 00:00:00 2001
From: Aras Pranckevicius <aras@nesnausk.org>
Date: Sun, 28 Jan 2024 11:24:27 +0200
Subject: [PATCH] VSE: replace Subsampled3x3 filter by a general Box filter

Part of overall "improve filtering situation" (#116980): replace
Subsampled3x3 (added for blender 3.5 in f210842a7259 et al.) strip
scaling filter with a general Box filter.

Subsampled3x3 is really a Box filter ("average pixel values over NxM
region"), hardcoded to 3x3 size. As such, it works pretty well when
downscaling images by 3x on each axis. But starts to break down
and introduce aliasing at other scaling factors. When scaling up or
scaling down by less than 3x, using total of 9 samples is a bit of
overkill and hurts performance.

So instead, calculate the amount of NxM samples needed by looking
at scaling factors on X/Y axes. Note: use at least 2 samples on each
axis, so that when rotation is present, the result edges will get some
anti-aliasing, just like it was happening in previous filter
implementation.
---
 .../draw/engines/image/image_drawing_mode.hh  |  1 -
 source/blender/imbuf/IMB_imbuf.hh             |  4 +-
 source/blender/imbuf/intern/transform.cc      | 77 ++++++++++---------
 source/blender/imbuf/intern/transform_test.cc | 32 ++++----
 source/blender/makesdna/DNA_sequence_types.h  |  2 +-
 .../blender/makesrna/intern/rna_sequencer.cc  |  8 +-
 source/blender/sequencer/intern/render.cc     | 24 ++----
 7 files changed, 66 insertions(+), 82 deletions(-)
diff --git a/source/blender/draw/engines/image/image_drawing_mode.hh b/source/blender/draw/engines/image/image_drawing_mode.hh
index 91663d05c9a..e56302fbed4 100644
--- a/source/blender/draw/engines/image/image_drawing_mode.hh
+++ b/source/blender/draw/engines/image/image_drawing_mode.hh
@@ -616,7 +616,6 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
                   &texture_buffer,
                   transform_mode,
                   IMB_FILTER_NEAREST,
-                  1,
                   uv_to_texel.ptr(),
                   crop_rect_ptr);
   }
diff --git a/source/blender/imbuf/IMB_imbuf.hh b/source/blender/imbuf/IMB_imbuf.hh
index 028910a7e94..62232b3d380 100644
--- a/source/blender/imbuf/IMB_imbuf.hh
+++ b/source/blender/imbuf/IMB_imbuf.hh
@@ -266,6 +266,7 @@ enum eIMBInterpolationFilterMode {
   IMB_FILTER_BILINEAR,
   IMB_FILTER_CUBIC_BSPLINE,
   IMB_FILTER_CUBIC_MITCHELL,
+  IMB_FILTER_BOX,
 };
 
 /**
@@ -651,8 +652,6 @@ enum eIMBTransformMode {
  * - Only one data type buffer will be used (rect_float has priority over rect)
  * \param mode: Cropping/Wrap repeat effect to apply during transformation.
  * \param filter: Interpolation to use during sampling.
- * \param num_subsamples: Number of subsamples to use. Increasing this would improve the quality,
- * but reduces the performance.
  * \param transform_matrix: Transformation matrix to use.
  * The given matrix should transform between dst pixel space to src pixel space.
  * One unit is one pixel.
@@ -667,7 +666,6 @@ void IMB_transform(const ImBuf *src,
                    ImBuf *dst,
                    eIMBTransformMode mode,
                    eIMBInterpolationFilterMode filter,
-                   const int num_subsamples,
                    const float transform_matrix[4][4],
                    const rctf *src_crop);
 
diff --git a/source/blender/imbuf/intern/transform.cc b/source/blender/imbuf/intern/transform.cc
index 917dbabcb98..c5d11b14c38 100644
--- a/source/blender/imbuf/intern/transform.cc
+++ b/source/blender/imbuf/intern/transform.cc
@@ -15,7 +15,6 @@
 #include "BLI_math_vector.h"
 #include "BLI_rect.h"
 #include "BLI_task.hh"
-#include "BLI_vector.hh"
 
 #include "IMB_imbuf.hh"
 #include "IMB_interp.hh"
@@ -39,42 +38,21 @@ struct TransformContext {
   /* Source UV step delta, when moving along one destination pixel in Y axis. */
   float2 add_y;
 
-  /* Per-subsample source image delta UVs. */
-  Vector<float2, 9> subsampling_deltas;
-
   IndexRange dst_region_x_range;
   IndexRange dst_region_y_range;
 
   /* Cropping region in source image pixel space. */
   rctf src_crop;
 
-  void init(const float4x4 &transform_matrix, const int num_subsamples, const bool has_source_crop)
+  void init(const float4x4 &transform_matrix, const bool has_source_crop)
   {
     start_uv = transform_matrix.location().xy();
     add_x = transform_matrix.x_axis().xy();
     add_y = transform_matrix.y_axis().xy();
-    init_subsampling(num_subsamples);
     init_destination_region(transform_matrix, has_source_crop);
   }
 
  private:
-  void init_subsampling(const int num_subsamples)
-  {
-    float2 subsample_add_x = add_x / num_subsamples;
-    float2 subsample_add_y = add_y / num_subsamples;
-    float2 offset_x = -add_x * 0.5f + subsample_add_x * 0.5f;
-    float2 offset_y = -add_y * 0.5f + subsample_add_y * 0.5f;
-
-    for (int y : IndexRange(0, num_subsamples)) {
-      for (int x : IndexRange(0, num_subsamples)) {
-        float2 delta_uv = offset_x + offset_y;
-        delta_uv += x * subsample_add_x;
-        delta_uv += y * subsample_add_y;
-        subsampling_deltas.append(delta_uv);
-      }
-    }
-  }
-
   void init_destination_region(const float4x4 &transform_matrix, const bool has_source_crop)
   {
     if (!has_source_crop) {
@@ -265,27 +243,47 @@ template<eIMBInterpolationFilterMode Filter,
          bool WrapUV>
 static void process_scanlines(const TransformContext &ctx, IndexRange y_range)
 {
-  /* Note: sample at pixel center for proper filtering. */
-  float2 uv_start = ctx.start_uv + ctx.add_x * 0.5f + ctx.add_y * 0.5f;
+  if constexpr (Filter == IMB_FILTER_BOX) {
 
-  if (ctx.subsampling_deltas.size() > 1) {
     /* Multiple samples per pixel: accumulate them pre-multiplied,
      * divide by sample count and write out (un-pre-multiplying if writing out
-     * to byte image). */
-    const float inv_count = 1.0f / ctx.subsampling_deltas.size();
+     * to byte image).
+     *
+     * Do a box filter: for each destination pixel, accumulate XxY samples from source,
+     * based on scaling factors (length of X/Y pixel steps). Use at least 2 samples
+     * along each direction, so that in case of rotation the resulting edges get
+     * some anti-aliasing, to match previous Subsampled3x3 filter behavior. The
+     * "at least 2" can be removed once/if transform edge anti-aliasing is implemented
+     * in general way for all filters. Use at most 100 samples along each direction,
+     * just as some way of clamping possible upper cost. Scaling something down by more
+     * than 100x should rarely if ever happen, worst case they will get some aliasing.
+     */
+    float2 uv_start = ctx.start_uv;
+    int sub_count_x = int(math::clamp(roundf(math::length(ctx.add_x)), 2.0f, 100.0f));
+    int sub_count_y = int(math::clamp(roundf(math::length(ctx.add_y)), 2.0f, 100.0f));
+    const float inv_count = 1.0f / (sub_count_x * sub_count_y);
+    const float2 sub_step_x = ctx.add_x / sub_count_x;
+    const float2 sub_step_y = ctx.add_y / sub_count_y;
+
     for (int yi : y_range) {
       T *output = init_pixel_pointer<T>(ctx.dst, ctx.dst_region_x_range.first(), yi);
       float2 uv_row = uv_start + yi * ctx.add_y;
       for (int xi : ctx.dst_region_x_range) {
-        float2 uv = uv_row + xi * ctx.add_x;
+        const float2 uv = uv_row + xi * ctx.add_x;
         float sample[4] = {};
 
-        for (const float2 &delta_uv : ctx.subsampling_deltas) {
-          const float2 sub_uv = uv + delta_uv;
-          if (!CropSource || !should_discard(ctx, sub_uv)) {
-            T sub_sample[4];
-            sample_image<Filter, T, SrcChannels, WrapUV>(ctx.src, sub_uv.x, sub_uv.y, sub_sample);
-            add_subsample(sub_sample, sample);
+        for (int sub_y = 0; sub_y < sub_count_y; sub_y++) {
+          for (int sub_x = 0; sub_x < sub_count_x; sub_x++) {
+            float2 delta = (sub_x + 0.5f) * sub_step_x + (sub_y + 0.5f) * sub_step_y;
+            float2 sub_uv = uv + delta;
+            if (!CropSource || !should_discard(ctx, sub_uv)) {
+              T sub_sample[4];
+              sample_image<eIMBInterpolationFilterMode::IMB_FILTER_NEAREST,
+                           T,
+                           SrcChannels,
+                           WrapUV>(ctx.src, sub_uv.x, sub_uv.y, sub_sample);
+              add_subsample(sub_sample, sample);
+            }
           }
         }
 
@@ -297,7 +295,8 @@ static void process_scanlines(const TransformContext &ctx, IndexRange y_range)
     }
   }
   else {
-    /* One sample per pixel. */
+    /* One sample per pixel. Note: sample at pixel center for proper filtering. */
+    float2 uv_start = ctx.start_uv + ctx.add_x * 0.5f + ctx.add_y * 0.5f;
     for (int yi : y_range) {
       T *output = init_pixel_pointer<T>(ctx.dst, ctx.dst_region_x_range.first(), yi);
       float2 uv_row = uv_start + yi * ctx.add_y;
@@ -369,7 +368,6 @@ void IMB_transform(const ImBuf *src,
                    ImBuf *dst,
                    const eIMBTransformMode mode,
                    const eIMBInterpolationFilterMode filter,
-                   const int num_subsamples,
                    const float transform_matrix[4][4],
                    const rctf *src_crop)
 {
@@ -386,7 +384,7 @@ void IMB_transform(const ImBuf *src,
   if (crop) {
     ctx.src_crop = *src_crop;
   }
-  ctx.init(blender::float4x4(transform_matrix), num_subsamples, crop);
+  ctx.init(blender::float4x4(transform_matrix), crop);
 
   threading::parallel_for(ctx.dst_region_y_range, 8, [&](IndexRange y_range) {
     if (filter == IMB_FILTER_NEAREST) {
@@ -401,5 +399,8 @@ void IMB_transform(const ImBuf *src,
     else if (filter == IMB_FILTER_CUBIC_MITCHELL) {
       transform_scanlines_filter<IMB_FILTER_CUBIC_MITCHELL>(ctx, y_range);
     }
+    else if (filter == IMB_FILTER_BOX) {
+      transform_scanlines_filter<IMB_FILTER_BOX>(ctx, y_range);
+    }
   });
 }
diff --git a/source/blender/imbuf/intern/transform_test.cc b/source/blender/imbuf/intern/transform_test.cc
index c564d8eedc1..dde6c939228 100644
--- a/source/blender/imbuf/intern/transform_test.cc
+++ b/source/blender/imbuf/intern/transform_test.cc
@@ -37,29 +37,29 @@ static ImBuf *create_6x2_test_image()
   return img;
 }
 
-static ImBuf *transform_2x_smaller(eIMBInterpolationFilterMode filter, int subsamples)
+static ImBuf *transform_2x_smaller(eIMBInterpolationFilterMode filter)
 {
   ImBuf *src = create_6x2_test_image();
   ImBuf *dst = IMB_allocImBuf(3, 1, 32, IB_rect);
   float4x4 matrix = math::from_scale<float4x4>(float4(2.0f));
-  IMB_transform(src, dst, IMB_TRANSFORM_MODE_REGULAR, filter, subsamples, matrix.ptr(), nullptr);
+  IMB_transform(src, dst, IMB_TRANSFORM_MODE_REGULAR, filter, matrix.ptr(), nullptr);
   IMB_freeImBuf(src);
   return dst;
 }
 
-static ImBuf *transform_fractional_larger(eIMBInterpolationFilterMode filter, int subsamples)
+static ImBuf *transform_fractional_larger(eIMBInterpolationFilterMode filter)
 {
   ImBuf *src = create_6x2_test_image();
   ImBuf *dst = IMB_allocImBuf(9, 7, 32, IB_rect);
   float4x4 matrix = math::from_scale<float4x4>(float4(6.0f / 9.0f, 2.0f / 7.0f, 1.0f, 1.0f));
-  IMB_transform(src, dst, IMB_TRANSFORM_MODE_REGULAR, filter, subsamples, matrix.ptr(), nullptr);
+  IMB_transform(src, dst, IMB_TRANSFORM_MODE_REGULAR, filter, matrix.ptr(), nullptr);
   IMB_freeImBuf(src);
   return dst;
 }
 
 TEST(imbuf_transform, nearest_2x_smaller)
 {
-  ImBuf *res = transform_2x_smaller(IMB_FILTER_NEAREST, 1);
+  ImBuf *res = transform_2x_smaller(IMB_FILTER_NEAREST);
   const ColorTheme4b *got = reinterpret_cast<ColorTheme4b *>(res->byte_buffer.data);
   EXPECT_EQ(got[0], ColorTheme4b(255, 255, 255, 255));
   EXPECT_EQ(got[1], ColorTheme4b(133, 55, 31, 19));
@@ -67,19 +67,20 @@ TEST(imbuf_transform, nearest_2x_smaller)
   IMB_freeImBuf(res);
 }
 
-TEST(imbuf_transform, nearest_subsample3_2x_smaller)
+TEST(imbuf_transform, box_2x_smaller)
 {
-  ImBuf *res = transform_2x_smaller(IMB_FILTER_NEAREST, 3);
+  ImBuf *res = transform_2x_smaller(IMB_FILTER_BOX);
   const ColorTheme4b *got = reinterpret_cast<ColorTheme4b *>(res->byte_buffer.data);
-  EXPECT_EQ(got[0], ColorTheme4b(227, 170, 113, 255));
-  EXPECT_EQ(got[1], ColorTheme4b(133, 55, 31, 17));
-  EXPECT_EQ(got[2], ColorTheme4b(56, 22, 64, 253));
+  /* At 2x reduction should be same as bilinear, save for some rounding errors. */
+  EXPECT_EQ(got[0], ColorTheme4b(191, 128, 64, 255));
+  EXPECT_EQ(got[1], ColorTheme4b(133, 55, 31, 16));
+  EXPECT_EQ(got[2], ColorTheme4b(54, 50, 48, 254));
   IMB_freeImBuf(res);
 }
 
 TEST(imbuf_transform, bilinear_2x_smaller)
 {
-  ImBuf *res = transform_2x_smaller(IMB_FILTER_BILINEAR, 1);
+  ImBuf *res = transform_2x_smaller(IMB_FILTER_BILINEAR);
   const ColorTheme4b *got = reinterpret_cast<ColorTheme4b *>(res->byte_buffer.data);
   EXPECT_EQ(got[0], ColorTheme4b(191, 128, 64, 255));
   EXPECT_EQ(got[1], ColorTheme4b(133, 55, 31, 16));
@@ -89,7 +90,7 @@ TEST(imbuf_transform, bilinear_2x_smaller)
 
 TEST(imbuf_transform, cubic_bspline_2x_smaller)
 {
-  ImBuf *res = transform_2x_smaller(IMB_FILTER_CUBIC_BSPLINE, 1);
+  ImBuf *res = transform_2x_smaller(IMB_FILTER_CUBIC_BSPLINE);
   const ColorTheme4b *got = reinterpret_cast<ColorTheme4b *>(res->byte_buffer.data);
   EXPECT_EQ(got[0], ColorTheme4b(189, 126, 62, 250));
   EXPECT_EQ(got[1], ColorTheme4b(134, 57, 33, 26));
@@ -99,7 +100,7 @@ TEST(imbuf_transform, cubic_bspline_2x_smaller)
 
 TEST(imbuf_transform, cubic_mitchell_2x_smaller)
 {
-  ImBuf *res = transform_2x_smaller(IMB_FILTER_CUBIC_MITCHELL, 1);
+  ImBuf *res = transform_2x_smaller(IMB_FILTER_CUBIC_MITCHELL);
   const ColorTheme4b *got = reinterpret_cast<ColorTheme4b *>(res->byte_buffer.data);
   EXPECT_EQ(got[0], ColorTheme4b(195, 130, 67, 255));
   EXPECT_EQ(got[1], ColorTheme4b(132, 51, 28, 0));
@@ -109,7 +110,7 @@ TEST(imbuf_transform, cubic_mitchell_2x_smaller)
 
 TEST(imbuf_transform, cubic_mitchell_fractional_larger)
 {
-  ImBuf *res = transform_fractional_larger(IMB_FILTER_CUBIC_MITCHELL, 1);
+  ImBuf *res = transform_fractional_larger(IMB_FILTER_CUBIC_MITCHELL);
   const ColorTheme4b *got = reinterpret_cast<ColorTheme4b *>(res->byte_buffer.data);
   EXPECT_EQ(got[0 + 0 * res->x], ColorTheme4b(0, 0, 0, 255));
   EXPECT_EQ(got[1 + 0 * res->x], ColorTheme4b(127, 0, 0, 255));
@@ -138,8 +139,7 @@ TEST(imbuf_transform, nearest_very_large_scale)
   ImBuf *res = IMB_allocImBuf(3841, 1, 32, IB_rect);
   float4x4 matrix = math::from_loc_rot_scale<float4x4>(
       float3(254, 0, 0), math::Quaternion::identity(), float3(3.0f / 3840.0f, 1, 1));
-  IMB_transform(
-      src, res, IMB_TRANSFORM_MODE_REGULAR, IMB_FILTER_NEAREST, 1, matrix.ptr(), nullptr);
+  IMB_transform(src, res, IMB_TRANSFORM_MODE_REGULAR, IMB_FILTER_NEAREST, matrix.ptr(), nullptr);
 
   /* Check result: leftmost red, middle green, two rightmost pixels blue and black.
    * If the transform code internally does not have enough precision while stepping
diff --git a/source/blender/makesdna/DNA_sequence_types.h b/source/blender/makesdna/DNA_sequence_types.h
index 598d7ffcdee..66d69d003ac 100644
--- a/source/blender/makesdna/DNA_sequence_types.h
+++ b/source/blender/makesdna/DNA_sequence_types.h
@@ -838,7 +838,7 @@ typedef enum SequenceColorTag {
 enum {
   SEQ_TRANSFORM_FILTER_NEAREST = 0,
   SEQ_TRANSFORM_FILTER_BILINEAR = 1,
-  SEQ_TRANSFORM_FILTER_NEAREST_3x3 = 2,
+  SEQ_TRANSFORM_FILTER_BOX = 2,
   SEQ_TRANSFORM_FILTER_CUBIC_BSPLINE = 3,
   SEQ_TRANSFORM_FILTER_CUBIC_MITCHELL = 4,
 };
diff --git a/source/blender/makesrna/intern/rna_sequencer.cc b/source/blender/makesrna/intern/rna_sequencer.cc
index 1e2c6464dc4..200a8210b5a 100644
--- a/source/blender/makesrna/intern/rna_sequencer.cc
+++ b/source/blender/makesrna/intern/rna_sequencer.cc
@@ -1726,11 +1726,11 @@ static const EnumPropertyItem transform_filter_items[] = {
      "Cubic B-Spline",
      "Cubic B-Spline filter (blurry but no ringing) on 4" BLI_STR_UTF8_MULTIPLICATION_SIGN
      "4 samples"},
-    {SEQ_TRANSFORM_FILTER_NEAREST_3x3,
-     "SUBSAMPLING_3x3",
+    {SEQ_TRANSFORM_FILTER_BOX,
+     "BOX",
      0,
-     "Subsampling (3" BLI_STR_UTF8_MULTIPLICATION_SIGN "3)",
-     "Use nearest with 3" BLI_STR_UTF8_MULTIPLICATION_SIGN "3 subsamples"},
+     "Box",
+     "Averages source image samples that fall under destination pixel"},
     {0, nullptr, 0, nullptr, nullptr},
 };
 
diff --git a/source/blender/sequencer/intern/render.cc b/source/blender/sequencer/intern/render.cc
index 51d39a75c46..1dced0ed19b 100644
--- a/source/blender/sequencer/intern/render.cc
+++ b/source/blender/sequencer/intern/render.cc
@@ -465,14 +465,8 @@ static void sequencer_thumbnail_transform(ImBuf *in, ImBuf *out)
                        blender::float3{scale_x, scale_y, 1.0f});
   transform_pivot_set_m4(transform_matrix, pivot);
   invert_m4(transform_matrix);
-  const int num_subsamples = 1;
-  IMB_transform(in,
-                out,
-                IMB_TRANSFORM_MODE_REGULAR,
-                IMB_FILTER_NEAREST,
-                num_subsamples,
-                transform_matrix,
-                nullptr);
+  IMB_transform(
+      in, out, IMB_TRANSFORM_MODE_REGULAR, IMB_FILTER_NEAREST, transform_matrix, nullptr);
 }
 
 /* Check whether transform introduces transparent ares in the result (happens when the transformed
@@ -537,7 +531,6 @@ static void sequencer_preprocess_transform_crop(
 
   const StripTransform *transform = seq->strip->transform;
   eIMBInterpolationFilterMode filter = IMB_FILTER_NEAREST;
-  int num_subsamples = 1;
   switch (transform->filter) {
     case SEQ_TRANSFORM_FILTER_NEAREST:
       filter = IMB_FILTER_NEAREST;
@@ -551,19 +544,12 @@ static void sequencer_preprocess_transform_crop(
     case SEQ_TRANSFORM_FILTER_CUBIC_MITCHELL:
       filter = IMB_FILTER_CUBIC_MITCHELL;
       break;
-    case SEQ_TRANSFORM_FILTER_NEAREST_3x3:
-      filter = IMB_FILTER_NEAREST;
-      num_subsamples = 3;
+    case SEQ_TRANSFORM_FILTER_BOX:
+      filter = IMB_FILTER_BOX;
       break;
   }
 
-  IMB_transform(in,
-                out,
-                IMB_TRANSFORM_MODE_CROP_SRC,
-                filter,
-                num_subsamples,
-                transform_matrix,
-                &source_crop);
+  IMB_transform(in, out, IMB_TRANSFORM_MODE_CROP_SRC, filter, transform_matrix, &source_crop);
 
   if (!seq_image_transform_transparency_gained(context, seq)) {
     out->planes = in->planes;
-- 
2.30.2