2023-11-01 10:49:18 +01:00 · 2023-09-25 12:34:08 +02:00 · 2023-10-27 15:49:28 +02:00 · 2023-10-29 11:07:37 +01:00 · 2023-09-25 12:28:43 +02:00 · 2023-10-27 15:53:28 +02:00
7 changed files with 575 additions and 20 deletions
--- a/source/blender/compositor/CMakeLists.txt
+++ b/source/blender/compositor/CMakeLists.txt
@ -345,6 +345,8 @@ if(WITH_COMPOSITOR_CPU)
    operations/COM_GaussianXBlurOperation.h
    operations/COM_GaussianYBlurOperation.cc
    operations/COM_GaussianYBlurOperation.h
+    operations/COM_SummedAreaTableOperation.h
+    operations/COM_SummedAreaTableOperation.cc
    operations/COM_KuwaharaAnisotropicOperation.cc
    operations/COM_KuwaharaAnisotropicOperation.h
    operations/COM_KuwaharaAnisotropicStructureTensorOperation.cc
@ -665,6 +667,7 @@ if(WITH_COMPOSITOR_CPU)
      tests/COM_BufferRange_test.cc
      tests/COM_BuffersIterator_test.cc
      tests/COM_NodeOperation_test.cc
+      tests/COM_ComputeSummedAreaTableOperation_test.cc
    )
    set(TEST_INC
    )
--- a/source/blender/compositor/nodes/COM_KuwaharaNode.cc
+++ b/source/blender/compositor/nodes/COM_KuwaharaNode.cc
@ -12,6 +12,7 @@
 #include "COM_KuwaharaAnisotropicOperation.h"
 #include "COM_KuwaharaAnisotropicStructureTensorOperation.h"
 #include "COM_KuwaharaClassicOperation.h"
+#include "COM_SummedAreaTableOperation.h"

 namespace blender::compositor {

@ -23,12 +24,24 @@ void KuwaharaNode::convert_to_operations(NodeConverter &converter,

  switch (data->variation) {
    case CMP_NODE_KUWAHARA_CLASSIC: {
-      KuwaharaClassicOperation *operation = new KuwaharaClassicOperation();
+      KuwaharaClassicOperation *kuwahara_classic = new KuwaharaClassicOperation();
+      converter.add_operation(kuwahara_classic);
+      converter.map_input_socket(get_input_socket(0), kuwahara_classic->get_input_socket(0));
+      converter.map_input_socket(get_input_socket(1), kuwahara_classic->get_input_socket(1));

-      converter.add_operation(operation);
-      converter.map_input_socket(get_input_socket(0), operation->get_input_socket(0));
-      converter.map_input_socket(get_input_socket(1), operation->get_input_socket(1));
-      converter.map_output_socket(get_output_socket(0), operation->get_output_socket());
+      SummedAreaTableOperation *sat = new SummedAreaTableOperation();
+      sat->set_mode(SummedAreaTableOperation::eMode::Identity);
+      converter.add_operation(sat);
+      converter.map_input_socket(get_input_socket(0), sat->get_input_socket(0));
+      converter.add_link(sat->get_output_socket(0), kuwahara_classic->get_input_socket(2));
+
+      SummedAreaTableOperation *sat_squared = new SummedAreaTableOperation();
+      sat_squared->set_mode(SummedAreaTableOperation::eMode::Squared);
+      converter.add_operation(sat_squared);
+      converter.map_input_socket(get_input_socket(0), sat_squared->get_input_socket(0));
+      converter.add_link(sat_squared->get_output_socket(0), kuwahara_classic->get_input_socket(3));
+
+      converter.map_output_socket(get_output_socket(0), kuwahara_classic->get_output_socket(0));
      break;
    }

--- a/source/blender/compositor/operations/COM_KuwaharaClassicOperation.cc
+++ b/source/blender/compositor/operations/COM_KuwaharaClassicOperation.cc
@ -16,6 +16,8 @@ KuwaharaClassicOperation::KuwaharaClassicOperation()
 {
  this->add_input_socket(DataType::Color);
  this->add_input_socket(DataType::Value);
+  this->add_input_socket(DataType::Color);
+  this->add_input_socket(DataType::Color);
  this->add_output_socket(DataType::Color);

  this->flags_.is_fullframe_operation = true;
@ -25,12 +27,16 @@ void KuwaharaClassicOperation::init_execution()
 {
  image_reader_ = this->get_input_socket_reader(0);
  size_reader_ = this->get_input_socket_reader(1);
+  sat_reader_ = this->get_input_socket_reader(2);
+  sat_squared_reader_ = this->get_input_socket_reader(3);
 }

 void KuwaharaClassicOperation::deinit_execution()
 {
  image_reader_ = nullptr;
  size_reader_ = nullptr;
+  sat_reader_ = nullptr;
+  sat_squared_reader_ = nullptr;
 }

 void KuwaharaClassicOperation::execute_pixel_sampled(float output[4],
@ -46,13 +52,44 @@ void KuwaharaClassicOperation::execute_pixel_sampled(float output[4],
  size_reader_->read_sampled(size, x, y, sampler);
  const int kernel_size = int(math::max(0.0f, size[0]));

-  /* Split surroundings of pixel into 4 overlapping regions. */
-  for (int dy = -kernel_size; dy <= kernel_size; dy++) {
-    for (int dx = -kernel_size; dx <= kernel_size; dx++) {
+  /* Naive implementation is more accurate for small kernel sizes. */
+  if (kernel_size >= 4) {
+    for (int q = 0; q < 4; q++) {
+      /* A fancy expression to compute the sign of the quadrant q. */
+      int2 sign = int2((q % 2) * 2 - 1, ((q / 2) * 2 - 1));

-      int xx = x + dx;
-      int yy = y + dy;
-      if (xx >= 0 && yy >= 0 && xx < this->get_width() && yy < this->get_height()) {
+      int2 lower_bound = int2(x, y) -
+                         int2(sign.x > 0 ? 0 : kernel_size, sign.y > 0 ? 0 : kernel_size);
+      int2 upper_bound = int2(x, y) +
+                         int2(sign.x < 0 ? 0 : kernel_size, sign.y < 0 ? 0 : kernel_size);
+
+      /* Limit the quadrants to the image bounds. */
+      int2 image_bound = int2(this->get_width(), this->get_height()) - int2(1);
+      int2 corrected_lower_bound = math::min(image_bound, math::max(int2(0, 0), lower_bound));
+      int2 corrected_upper_bound = math::min(image_bound, math::max(int2(0, 0), upper_bound));
+      int2 region_size = corrected_upper_bound - corrected_lower_bound + int2(1, 1);
+      quadrant_pixel_count[q] = region_size.x * region_size.y;
+
+      rcti kernel_area;
+      kernel_area.xmin = corrected_lower_bound[0];
+      kernel_area.ymin = corrected_lower_bound[1];
+      kernel_area.xmax = corrected_upper_bound[0];
+      kernel_area.ymax = corrected_upper_bound[1];
+
+      mean_of_color[q] = summed_area_table_sum_tiled(sat_reader_, kernel_area);
+      mean_of_squared_color[q] = summed_area_table_sum_tiled(sat_squared_reader_, kernel_area);
+    }
+  }
+  else {
+    /* Split surroundings of pixel into 4 overlapping regions. */
+    for (int dy = -kernel_size; dy <= kernel_size; dy++) {
+      for (int dx = -kernel_size; dx <= kernel_size; dx++) {
+
+        int xx = x + dx;
+        int yy = y + dy;
+        if (xx < 0 || yy < 0 || xx >= this->get_width() || yy >= this->get_height()) {
+          continue;
+        }

        float4 color;
        image_reader_->read_sampled(color, xx, yy, sampler);
@ -115,24 +152,60 @@ void KuwaharaClassicOperation::update_memory_buffer_partial(MemoryBuffer *output
 {
  MemoryBuffer *image = inputs[0];
  MemoryBuffer *size_image = inputs[1];
+  MemoryBuffer *sat = inputs[2];
+  MemoryBuffer *sat_squared = inputs[3];
+
+  int width = image->get_width();
+  int height = image->get_height();

  for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
    const int x = it.x;
    const int y = it.y;

-    float4 mean_of_color[] = {float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
-    float4 mean_of_squared_color[] = {float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
-    int quadrant_pixel_count[] = {0, 0, 0, 0};
+    float4 mean_of_color[4] = {float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
+    float4 mean_of_squared_color[4] = {float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
+    int quadrant_pixel_count[4] = {0, 0, 0, 0};

    const int kernel_size = int(math::max(0.0f, *size_image->get_elem(x, y)));

-    /* Split surroundings of pixel into 4 overlapping regions. */
-    for (int dy = -kernel_size; dy <= kernel_size; dy++) {
-      for (int dx = -kernel_size; dx <= kernel_size; dx++) {
+    /* Naive implementation is more accurate for small kernel sizes. */
+    if (kernel_size >= 4) {
+      for (int q = 0; q < 4; q++) {
+        /* A fancy expression to compute the sign of the quadrant q. */
+        int2 sign = int2((q % 2) * 2 - 1, ((q / 2) * 2 - 1));

-        int xx = x + dx;
-        int yy = y + dy;
-        if (xx >= 0 && yy >= 0 && xx < image->get_width() && yy < image->get_height()) {
+        int2 lower_bound = int2(x, y) -
+                           int2(sign.x > 0 ? 0 : kernel_size, sign.y > 0 ? 0 : kernel_size);
+        int2 upper_bound = int2(x, y) +
+                           int2(sign.x < 0 ? 0 : kernel_size, sign.y < 0 ? 0 : kernel_size);
+
+        /* Limit the quadrants to the image bounds. */
+        int2 image_bound = int2(width, height) - int2(1);
+        int2 corrected_lower_bound = math::min(image_bound, math::max(int2(0, 0), lower_bound));
+        int2 corrected_upper_bound = math::min(image_bound, math::max(int2(0, 0), upper_bound));
+        int2 region_size = corrected_upper_bound - corrected_lower_bound + int2(1, 1);
+        quadrant_pixel_count[q] = region_size.x * region_size.y;
+
+        rcti kernel_area;
+        kernel_area.xmin = corrected_lower_bound[0];
+        kernel_area.ymin = corrected_lower_bound[1];
+        kernel_area.xmax = corrected_upper_bound[0];
+        kernel_area.ymax = corrected_upper_bound[1];
+
+        mean_of_color[q] = summed_area_table_sum(sat, kernel_area);
+        mean_of_squared_color[q] = summed_area_table_sum(sat_squared, kernel_area);
+      }
+    }
+    else {
+      /* Split surroundings of pixel into 4 overlapping regions. */
+      for (int dy = -kernel_size; dy <= kernel_size; dy++) {
+        for (int dx = -kernel_size; dx <= kernel_size; dx++) {
+
+          int xx = x + dx;
+          int yy = y + dy;
+          if (xx < 0 || yy < 0 || xx >= image->get_width() || yy >= image->get_height()) {
+            continue;
+          }

          float4 color;
          image->read_elem(xx, yy, &color.x);
--- a/source/blender/compositor/operations/COM_KuwaharaClassicOperation.h
+++ b/source/blender/compositor/operations/COM_KuwaharaClassicOperation.h
@ -11,6 +11,8 @@ namespace blender::compositor {
 class KuwaharaClassicOperation : public MultiThreadedOperation {
  SocketReader *image_reader_;
  SocketReader *size_reader_;
+  SocketReader *sat_reader_;
+  SocketReader *sat_squared_reader_;

 public:
  KuwaharaClassicOperation();
--- a/source/blender/compositor/operations/COM_SummedAreaTableOperation.cc
+++ b/source/blender/compositor/operations/COM_SummedAreaTableOperation.cc
@ -0,0 +1,217 @@
+/* SPDX-FileCopyrightText: 2023 Blender Foundation
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "BLI_math_vector.hh"
+#include "BLI_math_vector_types.hh"
+#include "BLI_task.hh"
+
+#include "COM_SummedAreaTableOperation.h"
+
+namespace blender::compositor {
+
+SummedAreaTableOperation::SummedAreaTableOperation()
+{
+  this->add_input_socket(DataType::Color);
+  this->add_output_socket(DataType::Color);
+
+  mode_ = eMode::Identity;
+
+  this->flags_.is_fullframe_operation = true;
+}
+
+void SummedAreaTableOperation::init_execution()
+{
+  SingleThreadedOperation::init_execution();
+  image_reader_ = this->get_input_socket_reader(0);
+}
+
+void SummedAreaTableOperation::deinit_execution()
+{
+  image_reader_ = nullptr;
+  SingleThreadedOperation::deinit_execution();
+}
+
+bool SummedAreaTableOperation::determine_depending_area_of_interest(
+    rcti * /*input*/, ReadBufferOperation *read_operation, rcti *output)
+{
+  rcti image_input;
+
+  NodeOperation *operation = get_input_operation(0);
+  image_input.xmax = operation->get_width();
+  image_input.xmin = 0;
+  image_input.ymax = operation->get_height();
+  image_input.ymin = 0;
+  if (operation->determine_depending_area_of_interest(&image_input, read_operation, output)) {
+    return true;
+  }
+  return false;
+}
+
+void SummedAreaTableOperation::get_area_of_interest(int input_idx,
+                                                    const rcti & /*output_area*/,
+                                                    rcti &r_input_area)
+{
+  r_input_area = get_input_operation(input_idx)->get_canvas();
+}
+
+void SummedAreaTableOperation::update_memory_buffer(MemoryBuffer *output,
+                                                    const rcti &area,
+                                                    Span<MemoryBuffer *> inputs)
+{
+  /* Note: although this is a single threaded call, multithreading is used. */
+  MemoryBuffer *image = inputs[0];
+
+  /* First pass: copy input to output and sum horizontally. */
+  threading::parallel_for(IndexRange(area.ymin, area.ymax), 1, [&](const IndexRange range_y) {
+    for (const int y : range_y) {
+      float4 accumulated_color = float4(0.0f);
+      for (const int x : IndexRange(area.xmin, area.xmax)) {
+        const float4 color = float4(image->get_elem(x, y));
+        accumulated_color += mode_ == eMode::Squared ? color * color : color;
+        copy_v4_v4(output->get_elem(x, y), accumulated_color);
+      }
+    }
+  });
+
+  /* Second pass: vertical sum. */
+  threading::parallel_for(IndexRange(area.xmin, area.xmax), 1, [&](const IndexRange range_x) {
+    for (const int x : range_x) {
+      float4 accumulated_color = float4(0.0f);
+      for (const int y : IndexRange(area.ymin, area.ymax)) {
+        const float4 color = float4(output->get_elem(x, y));
+        accumulated_color += color;
+        copy_v4_v4(output->get_elem(x, y), accumulated_color);
+      }
+    }
+  });
+}
+
+MemoryBuffer *SummedAreaTableOperation::create_memory_buffer(rcti *area)
+{
+  /* Note: although this is a single threaded call, multithreading is used. */
+  MemoryBuffer *output = new MemoryBuffer(DataType::Color, *area);
+
+  /* First pass: copy input to output and sum horizontally. */
+  threading::parallel_for(IndexRange(area->ymin, area->ymax), 1, [&](const IndexRange range_y) {
+    for (const int y : range_y) {
+      float4 accumulated_color = float4(0.0f);
+      for (const int x : IndexRange(area->xmin, area->xmax)) {
+        float4 color;
+        image_reader_->read(&color.x, x, y, nullptr);
+        accumulated_color += mode_ == eMode::Squared ? color * color : color;
+        copy_v4_v4(output->get_elem(x, y), accumulated_color);
+      }
+    }
+  });
+
+  /* Second pass: vertical sum. */
+  threading::parallel_for(IndexRange(area->xmin, area->xmax), 1, [&](const IndexRange range_x) {
+    for (const int x : range_x) {
+      float4 accumulated_color = float4(0.0f);
+      for (const int y : IndexRange(area->ymin, area->ymax)) {
+
+        accumulated_color += float4(output->get_elem(x, y));
+        copy_v4_v4(output->get_elem(x, y), accumulated_color);
+      }
+    }
+  });
+
+  return output;
+}
+
+void SummedAreaTableOperation::set_mode(eMode mode)
+{
+  mode_ = mode;
+}
+
+SummedAreaTableOperation::eMode SummedAreaTableOperation::get_mode()
+{
+  return mode_;
+}
+
+float4 summed_area_table_sum_tiled(SocketReader *buffer, const rcti &area)
+{
+  /*
+   * a, b, c and d are the bounding box of the given area. They are defined as follows:
+   *
+   *     y
+   *     ▲
+   *     │
+   *     ├──────x───────x
+   *     │      │c     d│
+   *     ├──────x───────x
+   *     │      │a     b│
+   *     └──────┴───────┴──────► x
+   *
+   * Note: this is the same definition as in https://en.wikipedia.org/wiki/Summed-area_table
+   * but using the blender convention with the origin being at the lower left.
+   */
+
+  BLI_assert(area.xmin <= area.xmax && area.ymin <= area.ymax);
+
+  int2 lower_bound(area.xmin, area.ymin);
+  int2 upper_bound(area.xmax, area.ymax);
+
+  int2 corrected_lower_bound = lower_bound - int2(1, 1);
+  int2 corrected_upper_bound;
+  corrected_upper_bound[0] = math::min((int)buffer->get_width() - 1, upper_bound[0]);
+  corrected_upper_bound[1] = math::min((int)buffer->get_height() - 1, upper_bound[1]);
+
+  float4 a, b, c, d, addend, substrahend;
+  buffer->read_sampled(&a.x, corrected_upper_bound[0], corrected_upper_bound[1], PixelSampler::Nearest);
+  buffer->read_sampled(&d.x, corrected_lower_bound[0], corrected_lower_bound[1], PixelSampler::Nearest);
+  addend = a + d;
+
+  buffer->read_sampled(&b.x, corrected_lower_bound[0], corrected_upper_bound[1], PixelSampler::Nearest);
+  buffer->read_sampled(&c.x, corrected_upper_bound[0], corrected_lower_bound[1], PixelSampler::Nearest);
+  substrahend = b + c;
+
+  float4 sum = addend - substrahend;
+
+  return sum;
+}
+
+float4 summed_area_table_sum(MemoryBuffer *buffer, const rcti &area)
+{
+  /*
+   * a, b, c and d are the bounding box of the given area. They are defined as follows:
+   *
+   *     y
+   *     ▲
+   *     │
+   *     ├──────x───────x
+   *     │      │c     d│
+   *     ├──────x───────x
+   *     │      │a     b│
+   *     └──────┴───────┴──────► x
+   *
+   * Note: this is the same definition as in https://en.wikipedia.org/wiki/Summed-area_table
+   * but using the blender convention with the origin being at the lower left.
+   */
+
+  BLI_assert(area.xmin <= area.xmax && area.ymin <= area.ymax);
+
+  int2 lower_bound(area.xmin, area.ymin);
+  int2 upper_bound(area.xmax, area.ymax);
+
+  int2 corrected_lower_bound = lower_bound - int2(1, 1);
+  int2 corrected_upper_bound;
+  corrected_upper_bound[0] = math::min(buffer->get_width() - 1, upper_bound[0]);
+  corrected_upper_bound[1] = math::min(buffer->get_height() - 1, upper_bound[1]);
+
+  float4 a, b, c, d, addend, substrahend;
+  buffer->read_elem_checked(corrected_upper_bound[0], corrected_upper_bound[1], a);
+  buffer->read_elem_checked(corrected_lower_bound[0], corrected_lower_bound[1], d);
+  addend = a + d;
+
+  buffer->read_elem_checked(corrected_lower_bound[0], corrected_upper_bound[1], b);
+  buffer->read_elem_checked(corrected_upper_bound[0], corrected_lower_bound[1], c);
+  substrahend = b + c;
+
+  float4 sum = addend - substrahend;
+
+  return sum;
+}
+
+}  // namespace blender::compositor
--- a/source/blender/compositor/operations/COM_SummedAreaTableOperation.h
+++ b/source/blender/compositor/operations/COM_SummedAreaTableOperation.h
@ -0,0 +1,56 @@
+/* SPDX-FileCopyrightText: 2023 Blender Foundation
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include "COM_SingleThreadedOperation.h"
+
+namespace blender::compositor {
+
+/**
+ * \brief SummedAreaTableOperation class computes the summed area table.
+ */
+class SummedAreaTableOperation : public SingleThreadedOperation {
+
+ public:
+  SummedAreaTableOperation();
+
+  enum eMode { Identity = 1, Squared };
+
+  void set_mode(const eMode mode);
+  eMode get_mode();
+
+  /**
+   * Initialize the execution
+   */
+  void init_execution() override;
+
+  /**
+   * Deinitialize the execution
+   */
+  void deinit_execution() override;
+
+  bool determine_depending_area_of_interest(rcti *input,
+                                            ReadBufferOperation *read_operation,
+                                            rcti *output) override;
+
+  void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
+
+  MemoryBuffer *create_memory_buffer(rcti *rect) override;
+
+  void update_memory_buffer(MemoryBuffer *output,
+                            const rcti &area,
+                            Span<MemoryBuffer *> inputs) override;
+
+ private:
+  SocketReader *image_reader_;
+  eMode mode_;
+};
+
+/* Computes the sum of the rectangular region defined by the given area from the
+ * given summed area table. All coordinates within the area are included. */
+float4 summed_area_table_sum(MemoryBuffer *buffer, const rcti &area);
+float4 summed_area_table_sum_tiled(SocketReader *buffer, const rcti &area);
+
+}  // namespace blender::compositor
--- a/source/blender/compositor/tests/COM_ComputeSummedAreaTableOperation_test.cc
+++ b/source/blender/compositor/tests/COM_ComputeSummedAreaTableOperation_test.cc
@ -0,0 +1,191 @@
+/* SPDX-FileCopyrightText: 2023 Blender Foundation
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "testing/testing.h"
+
+#include "COM_SummedAreaTableOperation.h"
+
+namespace blender::compositor::tests {
+
+struct SatParams {
+  /* Input parameters. */
+  SummedAreaTableOperation::eMode mode;
+  eExecutionModel execution_model;
+  rcti area;
+  float4 fill_value;
+
+  /* Expected output values. */
+  std::vector<std::vector<float>> values;
+};
+
+class SummedAreaTableTestP : public testing::TestWithParam<SatParams> {
+};
+
+TEST_P(SummedAreaTableTestP, Values)
+{
+  SatParams params = GetParam();
+
+  SummedAreaTableOperation sat = SummedAreaTableOperation();
+
+  sat.set_execution_model(params.execution_model);
+  sat.set_mode(params.mode);
+  const rcti area = params.area;
+  MemoryBuffer output(DataType::Color, area);
+
+  std::shared_ptr<MemoryBuffer> input = std::make_shared<MemoryBuffer>(DataType::Color, area);
+  input->fill(area, &params.fill_value.x);
+
+  sat.update_memory_buffer(&output, area, Span<MemoryBuffer *>{input.get()});
+
+  /* First row. */
+  EXPECT_FLOAT_EQ(output.get_elem(0, 0)[0], params.values[0][0]);
+  EXPECT_FLOAT_EQ(output.get_elem(1, 0)[1], params.values[0][1]);
+  EXPECT_FLOAT_EQ(output.get_elem(2, 0)[2], params.values[0][2]);
+
+  /* Second row. */
+  EXPECT_FLOAT_EQ(output.get_elem(0, 1)[3], params.values[1][0]);
+  EXPECT_FLOAT_EQ(output.get_elem(1, 1)[0], params.values[1][1]);
+  EXPECT_FLOAT_EQ(output.get_elem(2, 1)[1], params.values[1][2]);
+}
+
+INSTANTIATE_TEST_SUITE_P(FullFrame5x2_IdentityOnes,
+                         SummedAreaTableTestP,
+                         testing::Values(SatParams{
+                             SummedAreaTableOperation::eMode::Identity,
+                             eExecutionModel::FullFrame,
+                             rcti{0, 5, 0, 2},         /* Area. */
+                             {1.0f, 1.0f, 1.0f, 1.0f}, /* Fill value. */
+
+                             /* Expected output. */
+                             {{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, {2.0f, 4.0f, 6.0f, 8.0f, 10.0f}}
+
+                         }));
+
+INSTANTIATE_TEST_SUITE_P(
+    FullFrame5x2_SquaredOnes,
+    SummedAreaTableTestP,
+    testing::Values(SatParams{
+        SummedAreaTableOperation::eMode::Squared,
+        eExecutionModel::FullFrame,
+        rcti{0, 5, 0, 2},         /* Area. */
+        {1.0f, 1.0f, 1.0f, 1.0f}, /* Fill value. */
+
+        /* Expect identical to when using Identity SAT, since all inputs are 1. */
+        {{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, {2.0f, 4.0f, 6.0f, 8.0f, 10.0f}}
+
+    }));
+
+INSTANTIATE_TEST_SUITE_P(FullFrame3x2_Squared,
+                         SummedAreaTableTestP,
+                         testing::Values(SatParams{SummedAreaTableOperation::eMode::Squared,
+                                                   eExecutionModel::FullFrame,
+                                                   rcti{0, 3, 0, 2},        /* Area. */
+                                                   {2.0f, 2.0f, 1.5f, .1f}, /* Fill value. */
+
+                                                   /* Expected output. */
+                                                   {
+                                                       {4.0f, 8.0f, 6.75f},
+                                                       {0.02f, 16.0f, 24.0f},
+                                                   }}));
+
+class SummedAreaTableSumTest : public ::testing::Test {
+ public:
+  SummedAreaTableSumTest()
+  {
+    operation_ = std::make_shared<SummedAreaTableOperation>();
+  }
+
+ protected:
+  void SetUp() override
+  {
+    operation_->set_execution_model(eExecutionModel::FullFrame);
+    operation_->set_mode(SummedAreaTableOperation::eMode::Squared);
+
+    area_ = rcti{0, 5, 0, 4};
+    sat_ = std::make_shared<MemoryBuffer>(DataType::Color, area_);
+
+    const float val[4] = {1.0f, 2.0f, 1.5f, 0.1f};
+    std::shared_ptr<MemoryBuffer> input = std::make_shared<MemoryBuffer>(DataType::Color, area_);
+    input->fill(area_, val);
+    std::shared_ptr<MemoryBuffer> offset = std::make_shared<MemoryBuffer>(
+        DataType::Value, area_, true);
+    offset->fill(area_, &offset_);
+
+    operation_->update_memory_buffer(
+        sat_.get(), area_, Span<MemoryBuffer *>{input.get(), offset.get()});
+  }
+
+  std::shared_ptr<SummedAreaTableOperation> operation_;
+  std::shared_ptr<MemoryBuffer> sat_;
+  rcti area_;
+  float offset_ = 0.0f;
+};
+
+TEST_F(SummedAreaTableSumTest, FullyInside)
+{
+  rcti area;
+  area.xmin = 1;
+  area.xmax = 3;
+  area.ymin = 1;
+  area.ymax = 3;
+  float4 sum = summed_area_table_sum(sat_.get(), area);
+  EXPECT_EQ(sum[0], 9);
+}
+
+TEST_F(SummedAreaTableSumTest, LeftEdge)
+{
+  rcti area;
+  area.xmin = 0;
+  area.xmax = 2;
+  area.ymin = 0;
+  area.ymax = 2;
+  float4 sum = summed_area_table_sum(sat_.get(), area);
+  EXPECT_EQ(sum[0], 9);
+}
+
+TEST_F(SummedAreaTableSumTest, RightEdge)
+{
+  rcti area;
+  area.xmin = area_.xmax - 2;
+  area.xmax = area_.xmax;
+  area.ymin = 0;
+  area.ymax = 2;
+  float4 sum = summed_area_table_sum(sat_.get(), area);
+  EXPECT_EQ(sum[0], 6);
+}
+
+TEST_F(SummedAreaTableSumTest, LowerRightCorner)
+{
+  rcti area;
+  area.xmin = area_.xmax - 1;
+  area.xmax = area_.xmax;
+  area.ymin = area_.ymax - 1;
+  area.ymax = area_.ymax;
+  float4 sum = summed_area_table_sum(sat_.get(), area);
+  EXPECT_EQ(sum[0], 1);
+}
+
+TEST_F(SummedAreaTableSumTest, TopLine)
+{
+  rcti area;
+  area.xmin = 0;
+  area.xmax = 1;
+  area.ymin = 0;
+  area.ymax = 0;
+  float4 sum = summed_area_table_sum(sat_.get(), area);
+  EXPECT_EQ(sum[0], 2);
+}
+
+TEST_F(SummedAreaTableSumTest, ButtomLine)
+{
+  rcti area;
+  area.xmin = 0;
+  area.xmax = 4;
+  area.ymin = 3;
+  area.ymax = 3;
+  float4 sum = summed_area_table_sum(sat_.get(), area);
+  EXPECT_EQ(sum[0], 5);
+}
+
+}  // namespace blender::compositor::tests