2024-01-16 20:39:19 +01:00 · 2024-01-16 19:23:42 +01:00
1 changed files with 282 additions and 555 deletions
--- a/source/blender/imbuf/intern/transform.cc
+++ b/source/blender/imbuf/intern/transform.cc
@ -6,7 +6,6 @@
 * \ingroup imbuf
 */
 #include <array>
 #include <type_traits>
 #include "BLI_math_color_blend.h"
@ -22,73 +21,39 @@
 namespace blender::imbuf::transform {
-struct TransformUserData {
+struct TransformContext {
  /** \brief Source image buffer to read from. */
  const ImBuf *src;
  /** \brief Destination image buffer to write to. */
  ImBuf *dst;
-  /** \brief UV coordinates at the origin (0,0) in source image space. */
+  eIMBTransformMode mode;
  /* UV coordinates at the destination origin (0,0) in source image space. */
  float2 start_uv;
-  /**
+  /* Source UV step delta, when moving along one destination pixel in X axis. */
   * \brief delta UV coordinates along the source image buffer, when moving a single pixel in the X
   * axis of the dst image buffer.
   */
  float2 add_x;
-  /**
+  /* Source UV step delta, when moving along one destination pixel in Y axis. */
   * \brief delta UV coordinate along the source image buffer, when moving a single pixel in the Y
   * axes of the dst image buffer.
   */
  float2 add_y;
-  struct {
+  /* Per-subsample source image delta UVs. */
-    /**
+  Vector<float2, 9> subsampling_deltas;
     * Contains per sub-sample a delta to be added to the uv of the source image buffer.
     */
    Vector<float2, 9> delta_uvs;
  } subsampling;
-  struct {
+  IndexRange dst_region_x_range;
-    IndexRange x_range;
+  IndexRange dst_region_y_range;
    IndexRange y_range;
  } destination_region;
-  /**
+  /* Cropping region in source image pixel space. */
   * \brief Cropping region in source image pixel space.
   */
  rctf src_crop;
-  /**
+  void init(const float4x4 &transform_matrix, const int num_subsamples, const bool has_source_crop)
   * \brief Initialize the start_uv, add_x and add_y fields based on the given transform matrix.
   */
  void init(const float4x4 &transform_matrix,
            const int num_subsamples,
            const bool do_crop_destination_region)
  {
-    init_start_uv(transform_matrix);
+    start_uv = transform_matrix.location().xy();
-    init_add_x(transform_matrix);
+    add_x = transform_matrix.x_axis().xy();
-    init_add_y(transform_matrix);
+    add_y = transform_matrix.y_axis().xy();
    init_subsampling(num_subsamples);
-    init_destination_region(transform_matrix, do_crop_destination_region);
+    init_destination_region(transform_matrix, has_source_crop);
  }
 private:
  void init_start_uv(const float4x4 &transform_matrix)
  {
    start_uv = transform_matrix.location().xy();
  }
  void init_add_x(const float4x4 &transform_matrix)
  {
    add_x = transform_matrix.x_axis().xy();
  }
  void init_add_y(const float4x4 &transform_matrix)
  {
    add_y = transform_matrix.y_axis().xy();
  }
  void init_subsampling(const int num_subsamples)
  {
    float2 subsample_add_x = add_x / num_subsamples;
@ -101,17 +66,16 @@ struct TransformUserData {
        float2 delta_uv = offset_x + offset_y;
        delta_uv += x * subsample_add_x;
        delta_uv += y * subsample_add_y;
-        subsampling.delta_uvs.append(delta_uv);
+        subsampling_deltas.append(delta_uv);
      }
    }
  }
-  void init_destination_region(const float4x4 &transform_matrix,
+  void init_destination_region(const float4x4 &transform_matrix, const bool has_source_crop)
                               const bool do_crop_destination_region)
  {
-    if (!do_crop_destination_region) {
+    if (!has_source_crop) {
-      destination_region.x_range = IndexRange(dst->x);
+      dst_region_x_range = IndexRange(dst->x);
-      destination_region.y_range = IndexRange(dst->y);
+      dst_region_y_range = IndexRange(dst->y);
      return;
    }
@ -136,99 +100,28 @@ struct TransformUserData {
    rcti dest_rect;
    BLI_rcti_init(&dest_rect, 0, dst->x, 0, dst->y);
    BLI_rcti_isect(&rect, &dest_rect, &rect);
-    destination_region.x_range = IndexRange(rect.xmin, BLI_rcti_size_x(&rect));
+    dst_region_x_range = IndexRange(rect.xmin, BLI_rcti_size_x(&rect));
-    destination_region.y_range = IndexRange(rect.ymin, BLI_rcti_size_y(&rect));
+    dst_region_y_range = IndexRange(rect.ymin, BLI_rcti_size_y(&rect));
  }
 };
-/**
+/* Crop uv-coordinates that are outside the user data src_crop rect. */
- * \brief Crop uv-coordinates that are outside the user data src_crop rect.
+static bool should_discard(const TransformContext &ctx, const float2 &uv)
- */
+{
-struct CropSource {
+  return uv.x < ctx.src_crop.xmin || uv.x >= ctx.src_crop.xmax || uv.y < ctx.src_crop.ymin ||
-  /**
+         uv.y >= ctx.src_crop.ymax;
-   * \brief Should the source pixel at the given uv coordinate be discarded.
+}
   *
   * Uses user_data.src_crop to determine if the uv coordinate should be skipped.
   */
  static bool should_discard(const TransformUserData &user_data, const float2 &uv)
  {
    return uv.x < user_data.src_crop.xmin || uv.x >= user_data.src_crop.xmax ||
           uv.y < user_data.src_crop.ymin || uv.y >= user_data.src_crop.ymax;
  }
 };
-/**
+template<typename T> static T *init_pixel_pointer(const ImBuf *image, int x, int y);
- * \brief Discard that does not discard anything.
+template<> uchar *init_pixel_pointer(const ImBuf *image, int x, int y)
- */
+{
-struct NoDiscard {
+  return image->byte_buffer.data + (size_t(y) * image->x + x) * image->channels;
-  /**
+}
-   * \brief Should the source pixel at the given uv coordinate be discarded.
+template<> float *init_pixel_pointer(const ImBuf *image, int x, int y)
-   *
+{
-   * Will never discard any pixels.
+  return image->float_buffer.data + (size_t(y) * image->x + x) * image->channels;
-   */
+}
  static bool should_discard(const TransformUserData & /*user_data*/, const float2 & /*uv*/)
  {
    return false;
  }
 };
 /**
 * \brief Pointer to a pixel to write to in serial.
 */
 template<
    /**
     * \brief Kind of buffer.
     * Possible options: float, uchar.
     */
    typename StorageType = float,
    /**
     * \brief Number of channels of a single pixel.
     */
    int NumChannels = 4>
 class PixelPointer {
 public:
  static const int ChannelLen = NumChannels;
 private:
  StorageType *pointer;
 public:
  void init_pixel_pointer(const ImBuf *image_buffer, int2 start_coordinate)
  {
    const size_t offset = (start_coordinate.y * size_t(image_buffer->x) + start_coordinate.x) *
                          NumChannels;
    if constexpr (std::is_same_v<StorageType, float>) {
      pointer = image_buffer->float_buffer.data + offset;
    }
    else if constexpr (std::is_same_v<StorageType, uchar>) {
      pointer = const_cast<uchar *>(
          static_cast<const uchar *>(static_cast<const void *>(image_buffer->byte_buffer.data)) +
          offset);
    }
    else {
      pointer = nullptr;
    }
  }
  /**
   * \brief Get pointer to the current pixel to write to.
   */
  StorageType *get_pointer()
  {
    return pointer;
  }
  void increase_pixel_pointer()
  {
    pointer += NumChannels;
  }
 };
 /**
 * \brief Repeats UV coordinate.
 */
 static float wrap_uv(float value, int size)
 {
  int x = int(floorf(value));
@ -241,416 +134,246 @@ static float wrap_uv(float value, int size)
  return x;
 }
-/* TODO: should we use math_vectors for this. */
+template<typename T, int NumChannels>
-template<typename StorageType, int NumChannels>
+static void add_subsample(const T *src, T *dst, int sample_number)
-class Pixel : public std::array<StorageType, NumChannels> {
+{
- public:
+  BLI_STATIC_ASSERT((is_same_any_v<T, uchar, float>), "Only uchar and float channels supported.");
-  void clear()
+
-  {
+  float factor = 1.0 / (sample_number + 1);
-    for (int channel_index : IndexRange(NumChannels)) {
+  if constexpr (std::is_same_v<T, uchar>) {
-      (*this)[channel_index] = 0;
+    BLI_STATIC_ASSERT(NumChannels == 4, "Pixels using uchar requires to have 4 channels.");
    blend_color_interpolate_byte(dst, dst, src, factor);
  }
  else if constexpr (std::is_same_v<T, float> && NumChannels == 4) {
    blend_color_interpolate_float(dst, dst, src, factor);
  }
  else if constexpr (std::is_same_v<T, float>) {
    for (int i : IndexRange(NumChannels)) {
      dst[i] = dst[i] * (1.0f - factor) + src[i] * factor;
    }
  }
 }
-  void add_subsample(const Pixel<StorageType, NumChannels> other, int sample_number)
+template<int NumChannels>
-  {
+static void sample_nearest_float(const ImBuf *source, float u, float v, float *r_sample)
-    BLI_STATIC_ASSERT((std::is_same_v<StorageType, uchar>) || (std::is_same_v<StorageType, float>),
+{
-                      "Only uchar and float channels supported.");
+  int x1 = int(u);
  int y1 = int(v);
-    float factor = 1.0 / (sample_number + 1);
+  /* Break when sample outside image is requested. */
-    if constexpr (std::is_same_v<StorageType, uchar>) {
+  if (x1 < 0 || x1 >= source->x || y1 < 0 || y1 >= source->y) {
      BLI_STATIC_ASSERT(NumChannels == 4, "Pixels using uchar requires to have 4 channels.");
      blend_color_interpolate_byte(this->data(), this->data(), other.data(), factor);
    }
    else if constexpr (std::is_same_v<StorageType, float> && NumChannels == 4) {
      blend_color_interpolate_float(this->data(), this->data(), other.data(), factor);
    }
    else if constexpr (std::is_same_v<StorageType, float>) {
      for (int channel_index : IndexRange(NumChannels)) {
        (*this)[channel_index] = (*this)[channel_index] * (1.0 - factor) +
                                 other[channel_index] * factor;
      }
    }
  }
 };
 /**
 * \brief Read a sample from an image buffer.
 *
 * A sampler can read from an image buffer.
 */
 template<
    /** \brief Interpolation mode to use when sampling. */
    eIMBInterpolationFilterMode Filter,
    /** \brief storage type of a single pixel channel (uchar or float). */
    typename StorageType,
    /**
     * \brief number of channels if the image to read.
     *
     * Must match the actual channels of the image buffer that is sampled.
     */
    int NumChannels,
    /**
     * \brief Should UVs wrap
     */
    bool UVWrapping>
 class Sampler {
 public:
  using ChannelType = StorageType;
  static const int ChannelLen = NumChannels;
  using SampleType = Pixel<StorageType, NumChannels>;
  void sample(const ImBuf *source, const float2 &uv, SampleType &r_sample)
  {
    float u = uv.x;
    float v = uv.y;
    if constexpr (UVWrapping) {
      u = wrap_uv(u, source->x);
      v = wrap_uv(v, source->y);
    }
    /* BLI_bilinear_interpolation functions use `floor(uv)` and `floor(uv)+1`
     * texels. For proper mapping between pixel and texel spaces, need to
     * subtract 0.5. Same for bicubic. */
    if constexpr (Filter == IMB_FILTER_BILINEAR || Filter == IMB_FILTER_BICUBIC) {
      u -= 0.5f;
      v -= 0.5f;
    }
    if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v<StorageType, float> &&
                  NumChannels == 4)
    {
      bilinear_interpolation_color_fl(source, r_sample.data(), u, v);
    }
    else if constexpr (Filter == IMB_FILTER_NEAREST && std::is_same_v<StorageType, uchar> &&
                       NumChannels == 4)
    {
      nearest_interpolation_color_char(source, r_sample.data(), nullptr, u, v);
    }
    else if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v<StorageType, uchar> &&
                       NumChannels == 4)
    {
      bilinear_interpolation_color_char(source, r_sample.data(), u, v);
    }
    else if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v<StorageType, float>) {
      if constexpr (UVWrapping) {
        BLI_bilinear_interpolation_wrap_fl(source->float_buffer.data,
                                           r_sample.data(),
                                           source->x,
                                           source->y,
                                           NumChannels,
                                           UNPACK2(uv),
                                           true,
                                           true);
      }
      else {
        BLI_bilinear_interpolation_fl(
            source->float_buffer.data, r_sample.data(), source->x, source->y, NumChannels, u, v);
      }
    }
    else if constexpr (Filter == IMB_FILTER_NEAREST && std::is_same_v<StorageType, float>) {
      sample_nearest_float(source, u, v, r_sample);
    }
    else if constexpr (Filter == IMB_FILTER_BICUBIC && std::is_same_v<StorageType, float>) {
      BLI_bicubic_interpolation_fl(
          source->float_buffer.data, r_sample.data(), source->x, source->y, NumChannels, u, v);
    }
    else if constexpr (Filter == IMB_FILTER_BICUBIC && std::is_same_v<StorageType, uchar> &&
                       NumChannels == 4)
    {
      BLI_bicubic_interpolation_char(
          source->byte_buffer.data, r_sample.data(), source->x, source->y, u, v);
    }
    else {
      /* Unsupported sampler. */
      BLI_assert_unreachable();
    }
  }
 private:
  void sample_nearest_float(const ImBuf *source,
                            const float u,
                            const float v,
                            SampleType &r_sample)
  {
    BLI_STATIC_ASSERT(std::is_same_v<StorageType, float>);
    /* ImBuf in must have a valid rect or rect_float, assume this is already checked */
    int x1 = int(u);
    int y1 = int(v);
    /* Break when sample outside image is requested. */
    if (x1 < 0 || x1 >= source->x || y1 < 0 || y1 >= source->y) {
      for (int i = 0; i < NumChannels; i++) {
        r_sample[i] = 0.0f;
      }
      return;
    }
    const size_t offset = (size_t(source->x) * y1 + x1) * NumChannels;
    const float *dataF = source->float_buffer.data + offset;
    for (int i = 0; i < NumChannels; i++) {
-      r_sample[i] = dataF[i];
+      r_sample[i] = 0.0f;
    }
  }
 };
 /**
 * \brief Change the number of channels and store it.
 *
 * Template class to convert and store a sample in a PixelPointer.
 * It supports:
 * - 4 channel uchar -> 4 channel uchar.
 * - 4 channel float -> 4 channel float.
 * - 3 channel float -> 4 channel float.
 * - 2 channel float -> 4 channel float.
 * - 1 channel float -> 4 channel float.
 */
 template<typename StorageType, int SourceNumChannels, int DestinationNumChannels>
 class ChannelConverter {
 public:
  using SampleType = Pixel<StorageType, SourceNumChannels>;
  using PixelType = PixelPointer<StorageType, DestinationNumChannels>;
  /**
   * \brief Convert the number of channels of the given sample to match the pixel pointer and
   * store it at the location the pixel_pointer points at.
   */
  void convert_and_store(const SampleType &sample, PixelType &pixel_pointer)
  {
    if constexpr (std::is_same_v<StorageType, uchar>) {
      BLI_STATIC_ASSERT(SourceNumChannels == 4, "Unsigned chars always have 4 channels.");
      BLI_STATIC_ASSERT(DestinationNumChannels == 4, "Unsigned chars always have 4 channels.");
      copy_v4_v4_uchar(pixel_pointer.get_pointer(), sample.data());
    }
    else if constexpr (std::is_same_v<StorageType, float> && SourceNumChannels == 4 &&
                       DestinationNumChannels == 4)
    {
      copy_v4_v4(pixel_pointer.get_pointer(), sample.data());
    }
    else if constexpr (std::is_same_v<StorageType, float> && SourceNumChannels == 3 &&
                       DestinationNumChannels == 4)
    {
      copy_v4_fl4(pixel_pointer.get_pointer(), sample[0], sample[1], sample[2], 1.0f);
    }
    else if constexpr (std::is_same_v<StorageType, float> && SourceNumChannels == 2 &&
                       DestinationNumChannels == 4)
    {
      copy_v4_fl4(pixel_pointer.get_pointer(), sample[0], sample[1], 0.0f, 1.0f);
    }
    else if constexpr (std::is_same_v<StorageType, float> && SourceNumChannels == 1 &&
                       DestinationNumChannels == 4)
    {
      copy_v4_fl4(pixel_pointer.get_pointer(), sample[0], sample[0], sample[0], 1.0f);
    }
    else {
      BLI_assert_unreachable();
    }
    return;
  }
-  void mix_and_store(const SampleType &sample, PixelType &pixel_pointer, const float mix_factor)
+  size_t offset = (size_t(source->x) * y1 + x1) * NumChannels;
-  {
+  const float *dataF = source->float_buffer.data + offset;
-    if constexpr (std::is_same_v<StorageType, uchar>) {
+  for (int i = 0; i < NumChannels; i++) {
-      BLI_STATIC_ASSERT(SourceNumChannels == 4, "Unsigned chars always have 4 channels.");
+    r_sample[i] = dataF[i];
      BLI_STATIC_ASSERT(DestinationNumChannels == 4, "Unsigned chars always have 4 channels.");
      blend_color_interpolate_byte(
          pixel_pointer.get_pointer(), pixel_pointer.get_pointer(), sample.data(), mix_factor);
    }
    else if constexpr (std::is_same_v<StorageType, float> && SourceNumChannels == 4 &&
                       DestinationNumChannels == 4)
    {
      blend_color_interpolate_float(
          pixel_pointer.get_pointer(), pixel_pointer.get_pointer(), sample.data(), mix_factor);
    }
    else {
      BLI_assert_unreachable();
    }
  }
 };
 /**
 * \brief Processor for a scanline.
 */
 template<
    /**
     * \brief Discard functor that implements `should_discard`.
     */
    typename Discard,
    /**
     * \brief Color interpolation function to read from the source buffer.
     */
    typename Sampler,
    /**
     * \brief Kernel to store to the destination buffer.
     * Should be an PixelPointer
     */
    typename OutputPixelPointer>
 class ScanlineProcessor {
  Discard discarder;
  OutputPixelPointer output;
  Sampler sampler;
  /**
   * \brief Channels sizzling logic to convert between the input image buffer and the output
   * image buffer.
   */
  ChannelConverter<typename Sampler::ChannelType,
                   Sampler::ChannelLen,
                   OutputPixelPointer::ChannelLen>
      channel_converter;
 public:
  /**
   * \brief Inner loop of the transformations, processing a full scanline.
   */
  void process(const TransformUserData *user_data, int scanline)
  {
    if (user_data->subsampling.delta_uvs.size() > 1) {
      process_with_subsampling(user_data, scanline);
    }
    else {
      process_one_sample_per_pixel(user_data, scanline);
    }
  }
 private:
  void process_one_sample_per_pixel(const TransformUserData *user_data, int scanline)
  {
    /* Note: sample at pixel center for proper filtering. */
    float pixel_x = 0.5f;
    float pixel_y = scanline + 0.5f;
    float2 uv0 = user_data->start_uv + user_data->add_x * pixel_x + user_data->add_y * pixel_y;
    output.init_pixel_pointer(user_data->dst,
                              int2(user_data->destination_region.x_range.first(), scanline));
    for (int xi : user_data->destination_region.x_range) {
      float2 uv = uv0 + xi * user_data->add_x;
      if (!discarder.should_discard(*user_data, uv)) {
        typename Sampler::SampleType sample;
        sampler.sample(user_data->src, uv, sample);
        channel_converter.convert_and_store(sample, output);
      }
      output.increase_pixel_pointer();
    }
  }
  void process_with_subsampling(const TransformUserData *user_data, int scanline)
  {
    /* Note: sample at pixel center for proper filtering. */
    float pixel_x = 0.5f;
    float pixel_y = scanline + 0.5f;
    float2 uv0 = user_data->start_uv + user_data->add_x * pixel_x + user_data->add_y * pixel_y;
    output.init_pixel_pointer(user_data->dst,
                              int2(user_data->destination_region.x_range.first(), scanline));
    for (int xi : user_data->destination_region.x_range) {
      float2 uv = uv0 + xi * user_data->add_x;
      typename Sampler::SampleType sample;
      sample.clear();
      int num_subsamples_added = 0;
      for (const float2 &delta_uv : user_data->subsampling.delta_uvs) {
        const float2 subsample_uv = uv + delta_uv;
        if (!discarder.should_discard(*user_data, subsample_uv)) {
          typename Sampler::SampleType sub_sample;
          sampler.sample(user_data->src, subsample_uv, sub_sample);
          sample.add_subsample(sub_sample, num_subsamples_added);
          num_subsamples_added += 1;
        }
      }
      if (num_subsamples_added != 0) {
        const float mix_weight = float(num_subsamples_added) /
                                 user_data->subsampling.delta_uvs.size();
        channel_converter.mix_and_store(sample, output, mix_weight);
      }
      output.increase_pixel_pointer();
    }
  }
 };
 /**
 * \brief callback function for threaded transformation.
 */
 template<typename Processor> void transform_scanline_function(void *custom_data, int scanline)
 {
  const TransformUserData *user_data = static_cast<const TransformUserData *>(custom_data);
  Processor processor;
  processor.process(user_data, scanline);
 }
 /* Read a pixel from an image buffer, with filtering/wrapping parameters. */
 template<eIMBInterpolationFilterMode Filter, typename T, int NumChannels, bool WrapUV>
 static void sample_image(const ImBuf *source, float u, float v, T *r_sample)
 {
  if constexpr (WrapUV) {
    u = wrap_uv(u, source->x);
    v = wrap_uv(v, source->y);
  }
  /* BLI_bilinear_interpolation functions use `floor(uv)` and `floor(uv)+1`
   * texels. For proper mapping between pixel and texel spaces, need to
   * subtract 0.5. Same for bicubic. */
  if constexpr (Filter == IMB_FILTER_BILINEAR || Filter == IMB_FILTER_BICUBIC) {
    u -= 0.5f;
    v -= 0.5f;
  }
  if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v<T, float> && NumChannels == 4) {
    bilinear_interpolation_color_fl(source, r_sample, u, v);
  }
  else if constexpr (Filter == IMB_FILTER_NEAREST && std::is_same_v<T, uchar> && NumChannels == 4)
  {
    nearest_interpolation_color_char(source, r_sample, nullptr, u, v);
  }
  else if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v<T, uchar> && NumChannels == 4)
  {
    bilinear_interpolation_color_char(source, r_sample, u, v);
  }
  else if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v<T, float>) {
    if constexpr (WrapUV) {
      BLI_bilinear_interpolation_wrap_fl(source->float_buffer.data,
                                         r_sample,
                                         source->x,
                                         source->y,
                                         NumChannels,
                                         u,
                                         v,
                                         true,
                                         true);
    }
    else {
      BLI_bilinear_interpolation_fl(
          source->float_buffer.data, r_sample, source->x, source->y, NumChannels, u, v);
    }
  }
  else if constexpr (Filter == IMB_FILTER_NEAREST && std::is_same_v<T, float>) {
    sample_nearest_float<NumChannels>(source, u, v, r_sample);
  }
  else if constexpr (Filter == IMB_FILTER_BICUBIC && std::is_same_v<T, float>) {
    BLI_bicubic_interpolation_fl(
        source->float_buffer.data, r_sample, source->x, source->y, NumChannels, u, v);
  }
  else if constexpr (Filter == IMB_FILTER_BICUBIC && std::is_same_v<T, uchar> && NumChannels == 4)
  {
    BLI_bicubic_interpolation_char(source->byte_buffer.data, r_sample, source->x, source->y, u, v);
  }
  else {
    /* Unsupported sampler. */
    BLI_assert_unreachable();
  }
 }
 template<typename T, int SrcChannels> static void store_sample(const T *sample, T *dst)
 {
  if constexpr (std::is_same_v<T, uchar>) {
    BLI_STATIC_ASSERT(SrcChannels == 4, "Unsigned chars always have 4 channels.");
    copy_v4_v4_uchar(dst, sample);
  }
  else if constexpr (std::is_same_v<T, float> && SrcChannels == 4) {
    copy_v4_v4(dst, sample);
  }
  else if constexpr (std::is_same_v<T, float> && SrcChannels == 3) {
    copy_v4_fl4(dst, sample[0], sample[1], sample[2], 1.0f);
  }
  else if constexpr (std::is_same_v<T, float> && SrcChannels == 2) {
    copy_v4_fl4(dst, sample[0], sample[1], 0.0f, 1.0f);
  }
  else if constexpr (std::is_same_v<T, float> && SrcChannels == 1) {
    /* Note: single channel sample is stored as grayscale. */
    copy_v4_fl4(dst, sample[0], sample[0], sample[0], 1.0f);
  }
  else {
    BLI_assert_unreachable();
  }
 }
 template<typename T, int SrcChannels>
 static void mix_and_store_sample(const T *sample, T *dst, const float mix_factor)
 {
  if constexpr (std::is_same_v<T, uchar>) {
    BLI_STATIC_ASSERT(SrcChannels == 4, "Unsigned chars always have 4 channels.");
    blend_color_interpolate_byte(dst, dst, sample, mix_factor);
  }
  else if constexpr (std::is_same_v<T, float> && SrcChannels == 4) {
    blend_color_interpolate_float(dst, dst, sample, mix_factor);
  }
  else {
    BLI_assert_unreachable();
  }
 }
 /* Process a block of destination image scanlines. */
 template<eIMBInterpolationFilterMode Filter,
-         typename StorageType,
+         typename T,
-         int SourceNumChannels,
+         int SrcChannels,
-         int DestinationNumChannels>
+         bool CropSource,
-ScanlineThreadFunc get_scanline_function(const eIMBTransformMode mode)
+         bool WrapUV>
-
+static void process_scanlines(const TransformContext &ctx, IndexRange y_range)
 {
-  switch (mode) {
+  /* Note: sample at pixel center for proper filtering. */
-    case IMB_TRANSFORM_MODE_REGULAR:
+  float2 uv_start = ctx.start_uv + ctx.add_x * 0.5f + ctx.add_y * 0.5f;
      return transform_scanline_function<
          ScanlineProcessor<NoDiscard,
                            Sampler<Filter, StorageType, SourceNumChannels, false>,
                            PixelPointer<StorageType, DestinationNumChannels>>>;
    case IMB_TRANSFORM_MODE_CROP_SRC:
      return transform_scanline_function<
          ScanlineProcessor<CropSource,
                            Sampler<Filter, StorageType, SourceNumChannels, false>,
                            PixelPointer<StorageType, DestinationNumChannels>>>;
    case IMB_TRANSFORM_MODE_WRAP_REPEAT:
      return transform_scanline_function<
          ScanlineProcessor<NoDiscard,
                            Sampler<Filter, StorageType, SourceNumChannels, true>,
                            PixelPointer<StorageType, DestinationNumChannels>>>;
  }
-  BLI_assert_unreachable();
+  if (ctx.subsampling_deltas.size() > 1) {
-  return nullptr;
+    /* Multiple samples per pixel. */
-}
+    for (int yi : y_range) {
      T *output = init_pixel_pointer<T>(ctx.dst, ctx.dst_region_x_range.first(), yi);
      float2 uv_row = uv_start + yi * ctx.add_y;
      for (int xi : ctx.dst_region_x_range) {
        float2 uv = uv_row + xi * ctx.add_x;
        T sample[4] = {};
        int num_subsamples_added = 0;
-template<eIMBInterpolationFilterMode Filter>
+        for (const float2 &delta_uv : ctx.subsampling_deltas) {
-ScanlineThreadFunc get_scanline_function(const TransformUserData *user_data,
+          const float2 sub_uv = uv + delta_uv;
-                                         const eIMBTransformMode mode)
+          if (!CropSource || !should_discard(ctx, sub_uv)) {
-{
+            T sub_sample[4];
-  const ImBuf *src = user_data->src;
+            sample_image<Filter, T, SrcChannels, WrapUV>(ctx.src, sub_uv.x, sub_uv.y, sub_sample);
-  const ImBuf *dst = user_data->dst;
+            add_subsample<T, SrcChannels>(sub_sample, sample, num_subsamples_added);
            num_subsamples_added += 1;
          }
        }
-  if (src->channels == 4 && dst->channels == 4) {
+        if (num_subsamples_added != 0) {
-    return get_scanline_function<Filter, float, 4, 4>(mode);
+          const float mix_weight = float(num_subsamples_added) / ctx.subsampling_deltas.size();
-  }
+          mix_and_store_sample<T, SrcChannels>(sample, output, mix_weight);
-  if (src->channels == 3 && dst->channels == 4) {
+        }
-    return get_scanline_function<Filter, float, 3, 4>(mode);
+        output += 4;
  }
  if (src->channels == 2 && dst->channels == 4) {
    return get_scanline_function<Filter, float, 2, 4>(mode);
  }
  if (src->channels == 1 && dst->channels == 4) {
    return get_scanline_function<Filter, float, 1, 4>(mode);
  }
  return nullptr;
 }
 template<eIMBInterpolationFilterMode Filter>
 static void transform_threaded(TransformUserData *user_data, const eIMBTransformMode mode)
 {
  ScanlineThreadFunc scanline_func = nullptr;
  if (user_data->dst->float_buffer.data && user_data->src->float_buffer.data) {
    scanline_func = get_scanline_function<Filter>(user_data, mode);
  }
  else if (user_data->dst->byte_buffer.data && user_data->src->byte_buffer.data) {
    /* Number of channels is always 4 when using uchar buffers (sRGB + straight alpha). */
    scanline_func = get_scanline_function<Filter, uchar, 4, 4>(mode);
  }
  if (scanline_func != nullptr) {
    threading::parallel_for(user_data->destination_region.y_range, 8, [&](IndexRange range) {
      for (int scanline : range) {
        scanline_func(user_data, scanline);
      }
-    });
+    }
  }
  else {
    /* One sample per pixel. */
    for (int yi : y_range) {
      T *output = init_pixel_pointer<T>(ctx.dst, ctx.dst_region_x_range.first(), yi);
      float2 uv_row = uv_start + yi * ctx.add_y;
      for (int xi : ctx.dst_region_x_range) {
        float2 uv = uv_row + xi * ctx.add_x;
        if (!CropSource || !should_discard(ctx, uv)) {
          T sample[4];
          sample_image<Filter, T, SrcChannels, WrapUV>(ctx.src, uv.x, uv.y, sample);
          store_sample<T, SrcChannels>(sample, output);
        }
        output += 4;
      }
    }
  }
 }
 template<eIMBInterpolationFilterMode Filter, typename T, int SrcChannels>
 static void transform_scanlines(const TransformContext &ctx, IndexRange y_range)
 {
  switch (ctx.mode) {
    case IMB_TRANSFORM_MODE_REGULAR:
      process_scanlines<Filter, T, SrcChannels, false, false>(ctx, y_range);
      break;
    case IMB_TRANSFORM_MODE_CROP_SRC:
      process_scanlines<Filter, T, SrcChannels, true, false>(ctx, y_range);
      break;
    case IMB_TRANSFORM_MODE_WRAP_REPEAT:
      process_scanlines<Filter, T, SrcChannels, false, true>(ctx, y_range);
      break;
    default:
      BLI_assert_unreachable();
      break;
  }
 }
 template<eIMBInterpolationFilterMode Filter>
 static void transform_scanlines_filter(const TransformContext &ctx, IndexRange y_range)
 {
  int channels = ctx.src->channels;
  if (ctx.dst->float_buffer.data && ctx.src->float_buffer.data) {
    /* Float images. */
    if (channels == 4) {
      transform_scanlines<Filter, float, 4>(ctx, y_range);
    }
    else if (channels == 3) {
      transform_scanlines<Filter, float, 3>(ctx, y_range);
    }
    else if (channels == 2) {
      transform_scanlines<Filter, float, 2>(ctx, y_range);
    }
    else if (channels == 1) {
      transform_scanlines<Filter, float, 1>(ctx, y_range);
    }
  }
  else if (ctx.dst->byte_buffer.data && ctx.src->byte_buffer.data) {
    /* Byte images. */
    if (channels == 4) {
      transform_scanlines<Filter, uchar, 4>(ctx, y_range);
    }
  }
 }
@ -659,6 +382,7 @@ static void transform_threaded(TransformUserData *user_data, const eIMBTransform
 extern "C" {
 using namespace blender::imbuf::transform;
 using namespace blender;
 void IMB_transform(const ImBuf *src,
                   ImBuf *dst,
@ -671,25 +395,28 @@ void IMB_transform(const ImBuf *src,
  BLI_assert_msg(mode != IMB_TRANSFORM_MODE_CROP_SRC || src_crop != nullptr,
                 "No source crop rect given, but crop source is requested. Or source crop rect "
                 "was given, but crop source was not requested.");
  BLI_assert_msg(dst->channels == 4, "Destination image must have 4 channels.");
-  TransformUserData user_data;
+  TransformContext ctx;
-  user_data.src = src;
+  ctx.src = src;
-  user_data.dst = dst;
+  ctx.dst = dst;
-  if (mode == IMB_TRANSFORM_MODE_CROP_SRC) {
+  ctx.mode = mode;
-    user_data.src_crop = *src_crop;
+  bool crop = mode == IMB_TRANSFORM_MODE_CROP_SRC;
  if (crop) {
    ctx.src_crop = *src_crop;
  }
-  user_data.init(blender::float4x4(transform_matrix),
+  ctx.init(blender::float4x4(transform_matrix), num_subsamples, crop);
                 num_subsamples,
                 ELEM(mode, IMB_TRANSFORM_MODE_CROP_SRC));
-  if (filter == IMB_FILTER_NEAREST) {
+  threading::parallel_for(ctx.dst_region_y_range, 8, [&](IndexRange y_range) {
-    transform_threaded<IMB_FILTER_NEAREST>(&user_data, mode);
+    if (filter == IMB_FILTER_NEAREST) {
-  }
+      transform_scanlines_filter<IMB_FILTER_NEAREST>(ctx, y_range);
-  else if (filter == IMB_FILTER_BILINEAR) {
+    }
-    transform_threaded<IMB_FILTER_BILINEAR>(&user_data, mode);
+    else if (filter == IMB_FILTER_BILINEAR) {
-  }
+      transform_scanlines_filter<IMB_FILTER_BILINEAR>(ctx, y_range);
-  else if (filter == IMB_FILTER_BICUBIC) {
+    }
-    transform_threaded<IMB_FILTER_BICUBIC>(&user_data, mode);
+    else if (filter == IMB_FILTER_BICUBIC) {
-  }
+      transform_scanlines_filter<IMB_FILTER_BICUBIC>(ctx, y_range);
    }
  });
 }
 }