Speedup classic Kuwahara filter by summed area table #111150

Merged
Habib Gahbiche merged 30 commits from zazizizou/blender:com-kuwahara-sat into main 2023-11-01 10:49:18 +01:00
7 changed files with 575 additions and 20 deletions

View File

@ -345,6 +345,8 @@ if(WITH_COMPOSITOR_CPU)
operations/COM_GaussianXBlurOperation.h
operations/COM_GaussianYBlurOperation.cc
operations/COM_GaussianYBlurOperation.h
operations/COM_SummedAreaTableOperation.h
operations/COM_SummedAreaTableOperation.cc
operations/COM_KuwaharaAnisotropicOperation.cc
operations/COM_KuwaharaAnisotropicOperation.h
operations/COM_KuwaharaAnisotropicStructureTensorOperation.cc
@ -665,6 +667,7 @@ if(WITH_COMPOSITOR_CPU)
tests/COM_BufferRange_test.cc
tests/COM_BuffersIterator_test.cc
tests/COM_NodeOperation_test.cc
tests/COM_ComputeSummedAreaTableOperation_test.cc
)
set(TEST_INC
)

View File

@ -12,6 +12,7 @@
#include "COM_KuwaharaAnisotropicOperation.h"
#include "COM_KuwaharaAnisotropicStructureTensorOperation.h"
#include "COM_KuwaharaClassicOperation.h"
#include "COM_SummedAreaTableOperation.h"
namespace blender::compositor {
@ -23,12 +24,24 @@ void KuwaharaNode::convert_to_operations(NodeConverter &converter,
switch (data->variation) {
case CMP_NODE_KUWAHARA_CLASSIC: {
KuwaharaClassicOperation *operation = new KuwaharaClassicOperation();
KuwaharaClassicOperation *kuwahara_classic = new KuwaharaClassicOperation();
converter.add_operation(kuwahara_classic);
converter.map_input_socket(get_input_socket(0), kuwahara_classic->get_input_socket(0));
converter.map_input_socket(get_input_socket(1), kuwahara_classic->get_input_socket(1));
converter.add_operation(operation);
converter.map_input_socket(get_input_socket(0), operation->get_input_socket(0));
converter.map_input_socket(get_input_socket(1), operation->get_input_socket(1));
converter.map_output_socket(get_output_socket(0), operation->get_output_socket());
SummedAreaTableOperation *sat = new SummedAreaTableOperation();
sat->set_mode(SummedAreaTableOperation::eMode::Identity);
converter.add_operation(sat);
converter.map_input_socket(get_input_socket(0), sat->get_input_socket(0));
converter.add_link(sat->get_output_socket(0), kuwahara_classic->get_input_socket(2));
SummedAreaTableOperation *sat_squared = new SummedAreaTableOperation();
zazizizou marked this conversation as resolved Outdated

Add a kuwahara_classic->set_use_sat(true); just for clarity.

Add a `kuwahara_classic->set_use_sat(true);` just for clarity.
sat_squared->set_mode(SummedAreaTableOperation::eMode::Squared);
converter.add_operation(sat_squared);
converter.map_input_socket(get_input_socket(0), sat_squared->get_input_socket(0));
converter.add_link(sat_squared->get_output_socket(0), kuwahara_classic->get_input_socket(3));
converter.map_output_socket(get_output_socket(0), kuwahara_classic->get_output_socket(0));
break;
}

View File

@ -16,6 +16,8 @@ KuwaharaClassicOperation::KuwaharaClassicOperation()
{
this->add_input_socket(DataType::Color);
this->add_input_socket(DataType::Value);
this->add_input_socket(DataType::Color);
this->add_input_socket(DataType::Color);
this->add_output_socket(DataType::Color);
this->flags_.is_fullframe_operation = true;
@ -25,12 +27,16 @@ void KuwaharaClassicOperation::init_execution()
{
image_reader_ = this->get_input_socket_reader(0);
size_reader_ = this->get_input_socket_reader(1);
sat_reader_ = this->get_input_socket_reader(2);
sat_squared_reader_ = this->get_input_socket_reader(3);
}
void KuwaharaClassicOperation::deinit_execution()
{
image_reader_ = nullptr;
size_reader_ = nullptr;
sat_reader_ = nullptr;
sat_squared_reader_ = nullptr;
}
void KuwaharaClassicOperation::execute_pixel_sampled(float output[4],
@ -46,13 +52,44 @@ void KuwaharaClassicOperation::execute_pixel_sampled(float output[4],
size_reader_->read_sampled(size, x, y, sampler);
const int kernel_size = int(math::max(0.0f, size[0]));
/* Split surroundings of pixel into 4 overlapping regions. */
for (int dy = -kernel_size; dy <= kernel_size; dy++) {
for (int dx = -kernel_size; dx <= kernel_size; dx++) {
/* Naive implementation is more accurate for small kernel sizes. */
if (kernel_size >= 4) {
for (int q = 0; q < 4; q++) {
/* A fancy expression to compute the sign of the quadrant q. */
int2 sign = int2((q % 2) * 2 - 1, ((q / 2) * 2 - 1));
int xx = x + dx;
int yy = y + dy;
if (xx >= 0 && yy >= 0 && xx < this->get_width() && yy < this->get_height()) {
int2 lower_bound = int2(x, y) -
int2(sign.x > 0 ? 0 : kernel_size, sign.y > 0 ? 0 : kernel_size);
int2 upper_bound = int2(x, y) +
int2(sign.x < 0 ? 0 : kernel_size, sign.y < 0 ? 0 : kernel_size);
/* Limit the quadrants to the image bounds. */
int2 image_bound = int2(this->get_width(), this->get_height()) - int2(1);
int2 corrected_lower_bound = math::min(image_bound, math::max(int2(0, 0), lower_bound));
int2 corrected_upper_bound = math::min(image_bound, math::max(int2(0, 0), upper_bound));
int2 region_size = corrected_upper_bound - corrected_lower_bound + int2(1, 1);
quadrant_pixel_count[q] = region_size.x * region_size.y;
rcti kernel_area;
kernel_area.xmin = corrected_lower_bound[0];
kernel_area.ymin = corrected_lower_bound[1];
kernel_area.xmax = corrected_upper_bound[0];
kernel_area.ymax = corrected_upper_bound[1];
mean_of_color[q] = summed_area_table_sum_tiled(sat_reader_, kernel_area);
mean_of_squared_color[q] = summed_area_table_sum_tiled(sat_squared_reader_, kernel_area);
}
}
else {
/* Split surroundings of pixel into 4 overlapping regions. */
for (int dy = -kernel_size; dy <= kernel_size; dy++) {
for (int dx = -kernel_size; dx <= kernel_size; dx++) {
int xx = x + dx;
int yy = y + dy;
if (xx < 0 || yy < 0 || xx >= this->get_width() || yy >= this->get_height()) {
continue;
}
float4 color;
image_reader_->read_sampled(color, xx, yy, sampler);
@ -115,24 +152,60 @@ void KuwaharaClassicOperation::update_memory_buffer_partial(MemoryBuffer *output
{
MemoryBuffer *image = inputs[0];
MemoryBuffer *size_image = inputs[1];
MemoryBuffer *sat = inputs[2];
MemoryBuffer *sat_squared = inputs[3];
int width = image->get_width();
int height = image->get_height();
for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
const int x = it.x;
const int y = it.y;
float4 mean_of_color[] = {float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
float4 mean_of_squared_color[] = {float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
int quadrant_pixel_count[] = {0, 0, 0, 0};
float4 mean_of_color[4] = {float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
float4 mean_of_squared_color[4] = {float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
int quadrant_pixel_count[4] = {0, 0, 0, 0};
const int kernel_size = int(math::max(0.0f, *size_image->get_elem(x, y)));
/* Split surroundings of pixel into 4 overlapping regions. */
for (int dy = -kernel_size; dy <= kernel_size; dy++) {
for (int dx = -kernel_size; dx <= kernel_size; dx++) {
/* Naive implementation is more accurate for small kernel sizes. */
if (kernel_size >= 4) {
for (int q = 0; q < 4; q++) {
/* A fancy expression to compute the sign of the quadrant q. */
int2 sign = int2((q % 2) * 2 - 1, ((q / 2) * 2 - 1));
int xx = x + dx;
int yy = y + dy;
if (xx >= 0 && yy >= 0 && xx < image->get_width() && yy < image->get_height()) {
int2 lower_bound = int2(x, y) -
int2(sign.x > 0 ? 0 : kernel_size, sign.y > 0 ? 0 : kernel_size);
int2 upper_bound = int2(x, y) +
int2(sign.x < 0 ? 0 : kernel_size, sign.y < 0 ? 0 : kernel_size);
/* Limit the quadrants to the image bounds. */
int2 image_bound = int2(width, height) - int2(1);
int2 corrected_lower_bound = math::min(image_bound, math::max(int2(0, 0), lower_bound));
int2 corrected_upper_bound = math::min(image_bound, math::max(int2(0, 0), upper_bound));
int2 region_size = corrected_upper_bound - corrected_lower_bound + int2(1, 1);
quadrant_pixel_count[q] = region_size.x * region_size.y;
rcti kernel_area;
kernel_area.xmin = corrected_lower_bound[0];
kernel_area.ymin = corrected_lower_bound[1];
kernel_area.xmax = corrected_upper_bound[0];
kernel_area.ymax = corrected_upper_bound[1];
mean_of_color[q] = summed_area_table_sum(sat, kernel_area);
mean_of_squared_color[q] = summed_area_table_sum(sat_squared, kernel_area);
}
}
else {
/* Split surroundings of pixel into 4 overlapping regions. */
for (int dy = -kernel_size; dy <= kernel_size; dy++) {
for (int dx = -kernel_size; dx <= kernel_size; dx++) {
int xx = x + dx;
int yy = y + dy;
if (xx < 0 || yy < 0 || xx >= image->get_width() || yy >= image->get_height()) {
continue;
}
float4 color;
image->read_elem(xx, yy, &color.x);

View File

@ -11,6 +11,8 @@ namespace blender::compositor {
class KuwaharaClassicOperation : public MultiThreadedOperation {
SocketReader *image_reader_;
SocketReader *size_reader_;
SocketReader *sat_reader_;
SocketReader *sat_squared_reader_;
public:
KuwaharaClassicOperation();

View File

@ -0,0 +1,217 @@
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_math_vector.hh"
#include "BLI_math_vector_types.hh"
#include "BLI_task.hh"
#include "COM_SummedAreaTableOperation.h"
namespace blender::compositor {
SummedAreaTableOperation::SummedAreaTableOperation()
{
this->add_input_socket(DataType::Color);
this->add_output_socket(DataType::Color);
OmarEmaraDev marked this conversation as resolved Outdated

What is this Value input?

What is this Value input?

This was needed to subtract the mean from image. Not needed anymore, will remove.

This was needed to subtract the mean from image. Not needed anymore, will remove.
mode_ = eMode::Identity;
this->flags_.is_fullframe_operation = true;
}
void SummedAreaTableOperation::init_execution()
{
SingleThreadedOperation::init_execution();
image_reader_ = this->get_input_socket_reader(0);
}
void SummedAreaTableOperation::deinit_execution()
{
image_reader_ = nullptr;
SingleThreadedOperation::deinit_execution();
}
bool SummedAreaTableOperation::determine_depending_area_of_interest(
rcti * /*input*/, ReadBufferOperation *read_operation, rcti *output)
{
rcti image_input;
NodeOperation *operation = get_input_operation(0);
image_input.xmax = operation->get_width();
image_input.xmin = 0;
image_input.ymax = operation->get_height();
image_input.ymin = 0;
if (operation->determine_depending_area_of_interest(&image_input, read_operation, output)) {
return true;
}
return false;
}
void SummedAreaTableOperation::get_area_of_interest(int input_idx,
const rcti & /*output_area*/,
rcti &r_input_area)
{
r_input_area = get_input_operation(input_idx)->get_canvas();
}
void SummedAreaTableOperation::update_memory_buffer(MemoryBuffer *output,
const rcti &area,
Span<MemoryBuffer *> inputs)
{
/* Note: although this is a single threaded call, multithreading is used. */
MemoryBuffer *image = inputs[0];
/* First pass: copy input to output and sum horizontally. */
threading::parallel_for(IndexRange(area.ymin, area.ymax), 1, [&](const IndexRange range_y) {
for (const int y : range_y) {
zazizizou marked this conversation as resolved Outdated

It is sufficient to have a single parallel loop over rows and a serial loop over columns, too much parallelism will hurt performance.

This copy loop can be fused with the horizontal pass.

It is sufficient to have a single parallel loop over rows and a serial loop over columns, too much parallelism will hurt performance. This copy loop can be fused with the horizontal pass.
float4 accumulated_color = float4(0.0f);
for (const int x : IndexRange(area.xmin, area.xmax)) {
const float4 color = float4(image->get_elem(x, y));
accumulated_color += mode_ == eMode::Squared ? color * color : color;
copy_v4_v4(output->get_elem(x, y), accumulated_color);
}
}
});
/* Second pass: vertical sum. */
threading::parallel_for(IndexRange(area.xmin, area.xmax), 1, [&](const IndexRange range_x) {
for (const int x : range_x) {
float4 accumulated_color = float4(0.0f);
for (const int y : IndexRange(area.ymin, area.ymax)) {
const float4 color = float4(output->get_elem(x, y));
zazizizou marked this conversation as resolved Outdated

Use a temporary accumulated_color variable and avoid reading the buffer again just like the above loop. Then, use get_elem instead of read_elem_checked.

Use a temporary `accumulated_color` variable and avoid reading the buffer again just like the above loop. Then, use `get_elem` instead of `read_elem_checked`.
accumulated_color += color;
copy_v4_v4(output->get_elem(x, y), accumulated_color);
}
}
});
}
MemoryBuffer *SummedAreaTableOperation::create_memory_buffer(rcti *area)
{
/* Note: although this is a single threaded call, multithreading is used. */
MemoryBuffer *output = new MemoryBuffer(DataType::Color, *area);
/* First pass: copy input to output and sum horizontally. */
threading::parallel_for(IndexRange(area->ymin, area->ymax), 1, [&](const IndexRange range_y) {
for (const int y : range_y) {
float4 accumulated_color = float4(0.0f);
for (const int x : IndexRange(area->xmin, area->xmax)) {
float4 color;
image_reader_->read(&color.x, x, y, nullptr);
accumulated_color += mode_ == eMode::Squared ? color * color : color;
copy_v4_v4(output->get_elem(x, y), accumulated_color);
}
zazizizou marked this conversation as resolved Outdated

Use read instead of read_sampled. Same applies for all read_sampled calls below.

Use `read` instead of `read_sampled`. Same applies for all `read_sampled` calls below.
}
zazizizou marked this conversation as resolved Outdated

This can be more compact.

  • Use a for each loop on ranges. for (const int y : sub_y_range) {
  • Accumulate a color instead of reading the previous output.
  • Use the get_elem function.
  • Use a copy function.
  threading::parallel_for(IndexRange(area.ymin, area.ymax), 1, [&](const IndexRange sub_y_range) {
    for (const int y : sub_y_range) {
      float4 accumulated_color = float4(0.0f);
      for (const int x : IndexRange(area.xmin, area.xmax)) {
        const float4 color = float4(image->get_elem(x, y));
        accumulated_color += color * color;
        copy_v4_v4(output->get_elem(x, y), accumulated_color);
      }
    }
  });
This can be more compact. - Use a for each loop on ranges. `for (const int y : sub_y_range) {` - Accumulate a color instead of reading the previous output. - Use the `get_elem` function. - Use a copy function. ```cpp threading::parallel_for(IndexRange(area.ymin, area.ymax), 1, [&](const IndexRange sub_y_range) { for (const int y : sub_y_range) { float4 accumulated_color = float4(0.0f); for (const int x : IndexRange(area.xmin, area.xmax)) { const float4 color = float4(image->get_elem(x, y)); accumulated_color += color * color; copy_v4_v4(output->get_elem(x, y), accumulated_color); } } }); ```

Will do, thanks for the tip :)

Will do, thanks for the tip :)
});
/* Second pass: vertical sum. */
threading::parallel_for(IndexRange(area->xmin, area->xmax), 1, [&](const IndexRange range_x) {
for (const int x : range_x) {
float4 accumulated_color = float4(0.0f);
for (const int y : IndexRange(area->ymin, area->ymax)) {
accumulated_color += float4(output->get_elem(x, y));
copy_v4_v4(output->get_elem(x, y), accumulated_color);
}
zazizizou marked this conversation as resolved
Review

Same as above.

Same as above.
}
});
return output;
}
void SummedAreaTableOperation::set_mode(eMode mode)
{
mode_ = mode;
}
zazizizou marked this conversation as resolved Outdated

I think we should attempt to multithread the SAT computation. Not sure if there is anything stopping us from doing that, but a two pass prefix sum should be easy to implement and efficient to parallelize on the CPU.

| Image | -> Prefix sum from left to right -> | Horizontal Pass Result | -> Prefix sum from bottom to top -> | Desired SAT |

Each of the prefix sums can simply be a parallel loop over rows/columns.

I think we should attempt to multithread the SAT computation. Not sure if there is anything stopping us from doing that, but a two pass prefix sum should be easy to implement and efficient to parallelize on the CPU. ``` | Image | -> Prefix sum from left to right -> | Horizontal Pass Result | -> Prefix sum from bottom to top -> | Desired SAT | ``` Each of the prefix sums can simply be a parallel loop over rows/columns.

As discussed in the meeting, my concern was using SingleThreadedOperation for a multi-threaded execution. I will upload a patch using TBB.

As discussed in the meeting, my concern was using `SingleThreadedOperation` for a multi-threaded execution. I will upload a patch using TBB.
SummedAreaTableOperation::eMode SummedAreaTableOperation::get_mode()
{
return mode_;
}
float4 summed_area_table_sum_tiled(SocketReader *buffer, const rcti &area)
{
/*
* a, b, c and d are the bounding box of the given area. They are defined as follows:
*
* y
*
*
* xx
* c d
* xx
* a b
* x
*
* Note: this is the same definition as in https://en.wikipedia.org/wiki/Summed-area_table
* but using the blender convention with the origin being at the lower left.
*/
BLI_assert(area.xmin <= area.xmax && area.ymin <= area.ymax);
int2 lower_bound(area.xmin, area.ymin);
int2 upper_bound(area.xmax, area.ymax);
int2 corrected_lower_bound = lower_bound - int2(1, 1);
int2 corrected_upper_bound;
corrected_upper_bound[0] = math::min((int)buffer->get_width() - 1, upper_bound[0]);
corrected_upper_bound[1] = math::min((int)buffer->get_height() - 1, upper_bound[1]);
float4 a, b, c, d, addend, substrahend;
buffer->read_sampled(&a.x, corrected_upper_bound[0], corrected_upper_bound[1], PixelSampler::Nearest);
buffer->read_sampled(&d.x, corrected_lower_bound[0], corrected_lower_bound[1], PixelSampler::Nearest);
addend = a + d;
buffer->read_sampled(&b.x, corrected_lower_bound[0], corrected_upper_bound[1], PixelSampler::Nearest);
zazizizou marked this conversation as resolved Outdated

Use UNPACK2.

Use `UNPACK2`.
buffer->read_sampled(&c.x, corrected_upper_bound[0], corrected_lower_bound[1], PixelSampler::Nearest);
substrahend = b + c;
float4 sum = addend - substrahend;
return sum;
}
float4 summed_area_table_sum(MemoryBuffer *buffer, const rcti &area)
{
/*
* a, b, c and d are the bounding box of the given area. They are defined as follows:
*
* y
*
*
* xx
* c d
* xx
* a b
* x
*
* Note: this is the same definition as in https://en.wikipedia.org/wiki/Summed-area_table
* but using the blender convention with the origin being at the lower left.
*/
BLI_assert(area.xmin <= area.xmax && area.ymin <= area.ymax);
int2 lower_bound(area.xmin, area.ymin);
int2 upper_bound(area.xmax, area.ymax);
int2 corrected_lower_bound = lower_bound - int2(1, 1);
int2 corrected_upper_bound;
corrected_upper_bound[0] = math::min(buffer->get_width() - 1, upper_bound[0]);
corrected_upper_bound[1] = math::min(buffer->get_height() - 1, upper_bound[1]);
float4 a, b, c, d, addend, substrahend;
buffer->read_elem_checked(corrected_upper_bound[0], corrected_upper_bound[1], a);
buffer->read_elem_checked(corrected_lower_bound[0], corrected_lower_bound[1], d);
addend = a + d;
buffer->read_elem_checked(corrected_lower_bound[0], corrected_upper_bound[1], b);
buffer->read_elem_checked(corrected_upper_bound[0], corrected_lower_bound[1], c);
substrahend = b + c;
float4 sum = addend - substrahend;
return sum;
}
} // namespace blender::compositor

View File

@ -0,0 +1,56 @@
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "COM_SingleThreadedOperation.h"
namespace blender::compositor {
/**
* \brief SummedAreaTableOperation class computes the summed area table.
zazizizou marked this conversation as resolved Outdated

Update comment.

Update comment.
*/
class SummedAreaTableOperation : public SingleThreadedOperation {
public:
SummedAreaTableOperation();
enum eMode { Identity = 1, Squared };
void set_mode(const eMode mode);
eMode get_mode();
/**
* Initialize the execution
*/
void init_execution() override;
/**
* Deinitialize the execution
*/
void deinit_execution() override;
bool determine_depending_area_of_interest(rcti *input,
ReadBufferOperation *read_operation,
rcti *output) override;
void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
MemoryBuffer *create_memory_buffer(rcti *rect) override;
void update_memory_buffer(MemoryBuffer *output,
const rcti &area,
Span<MemoryBuffer *> inputs) override;
private:
SocketReader *image_reader_;
eMode mode_;
};
/* Computes the sum of the rectangular region defined by the given area from the
* given summed area table. All coordinates within the area are included. */
float4 summed_area_table_sum(MemoryBuffer *buffer, const rcti &area);
float4 summed_area_table_sum_tiled(SocketReader *buffer, const rcti &area);
} // namespace blender::compositor

View File

@ -0,0 +1,191 @@
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "testing/testing.h"
#include "COM_SummedAreaTableOperation.h"
namespace blender::compositor::tests {
struct SatParams {
/* Input parameters. */
SummedAreaTableOperation::eMode mode;
eExecutionModel execution_model;
rcti area;
float4 fill_value;
/* Expected output values. */
std::vector<std::vector<float>> values;
};
class SummedAreaTableTestP : public testing::TestWithParam<SatParams> {
};
TEST_P(SummedAreaTableTestP, Values)
{
SatParams params = GetParam();
SummedAreaTableOperation sat = SummedAreaTableOperation();
sat.set_execution_model(params.execution_model);
sat.set_mode(params.mode);
const rcti area = params.area;
MemoryBuffer output(DataType::Color, area);
std::shared_ptr<MemoryBuffer> input = std::make_shared<MemoryBuffer>(DataType::Color, area);
input->fill(area, &params.fill_value.x);
sat.update_memory_buffer(&output, area, Span<MemoryBuffer *>{input.get()});
/* First row. */
EXPECT_FLOAT_EQ(output.get_elem(0, 0)[0], params.values[0][0]);
EXPECT_FLOAT_EQ(output.get_elem(1, 0)[1], params.values[0][1]);
EXPECT_FLOAT_EQ(output.get_elem(2, 0)[2], params.values[0][2]);
/* Second row. */
EXPECT_FLOAT_EQ(output.get_elem(0, 1)[3], params.values[1][0]);
EXPECT_FLOAT_EQ(output.get_elem(1, 1)[0], params.values[1][1]);
EXPECT_FLOAT_EQ(output.get_elem(2, 1)[1], params.values[1][2]);
}
INSTANTIATE_TEST_SUITE_P(FullFrame5x2_IdentityOnes,
SummedAreaTableTestP,
testing::Values(SatParams{
SummedAreaTableOperation::eMode::Identity,
eExecutionModel::FullFrame,
rcti{0, 5, 0, 2}, /* Area. */
{1.0f, 1.0f, 1.0f, 1.0f}, /* Fill value. */
/* Expected output. */
{{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, {2.0f, 4.0f, 6.0f, 8.0f, 10.0f}}
}));
INSTANTIATE_TEST_SUITE_P(
FullFrame5x2_SquaredOnes,
SummedAreaTableTestP,
testing::Values(SatParams{
SummedAreaTableOperation::eMode::Squared,
eExecutionModel::FullFrame,
rcti{0, 5, 0, 2}, /* Area. */
{1.0f, 1.0f, 1.0f, 1.0f}, /* Fill value. */
/* Expect identical to when using Identity SAT, since all inputs are 1. */
{{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, {2.0f, 4.0f, 6.0f, 8.0f, 10.0f}}
}));
INSTANTIATE_TEST_SUITE_P(FullFrame3x2_Squared,
SummedAreaTableTestP,
testing::Values(SatParams{SummedAreaTableOperation::eMode::Squared,
eExecutionModel::FullFrame,
rcti{0, 3, 0, 2}, /* Area. */
{2.0f, 2.0f, 1.5f, .1f}, /* Fill value. */
/* Expected output. */
{
{4.0f, 8.0f, 6.75f},
{0.02f, 16.0f, 24.0f},
}}));
class SummedAreaTableSumTest : public ::testing::Test {
public:
SummedAreaTableSumTest()
{
operation_ = std::make_shared<SummedAreaTableOperation>();
}
protected:
void SetUp() override
{
operation_->set_execution_model(eExecutionModel::FullFrame);
operation_->set_mode(SummedAreaTableOperation::eMode::Squared);
area_ = rcti{0, 5, 0, 4};
sat_ = std::make_shared<MemoryBuffer>(DataType::Color, area_);
const float val[4] = {1.0f, 2.0f, 1.5f, 0.1f};
std::shared_ptr<MemoryBuffer> input = std::make_shared<MemoryBuffer>(DataType::Color, area_);
input->fill(area_, val);
std::shared_ptr<MemoryBuffer> offset = std::make_shared<MemoryBuffer>(
DataType::Value, area_, true);
offset->fill(area_, &offset_);
operation_->update_memory_buffer(
sat_.get(), area_, Span<MemoryBuffer *>{input.get(), offset.get()});
}
std::shared_ptr<SummedAreaTableOperation> operation_;
std::shared_ptr<MemoryBuffer> sat_;
rcti area_;
float offset_ = 0.0f;
};
TEST_F(SummedAreaTableSumTest, FullyInside)
{
rcti area;
area.xmin = 1;
area.xmax = 3;
area.ymin = 1;
area.ymax = 3;
float4 sum = summed_area_table_sum(sat_.get(), area);
EXPECT_EQ(sum[0], 9);
zazizizou marked this conversation as resolved Outdated

Any specific reason to use ASSERT_EQ instead of EXPECT_EQ ? The ASSERT will stop the test. It is typically used for cases when the rest of the test will be impossible. For example, when you expect function to give you a pointer to an object and you check for it be non-nullptr before looking into its properties.

Any specific reason to use `ASSERT_EQ ` instead of `EXPECT_EQ `? The ASSERT will stop the test. It is typically used for cases when the rest of the test will be impossible. For example, when you expect function to give you a pointer to an object and you check for it be non-nullptr before looking into its properties.

No specific reason, but it doesn't make a difference here because there is a single assert per test. I can update it in a later patch for clarity

No specific reason, but it doesn't make a difference here because there is a single assert per test. I can update it in a later patch for clarity
}
TEST_F(SummedAreaTableSumTest, LeftEdge)
{
rcti area;
area.xmin = 0;
area.xmax = 2;
area.ymin = 0;
area.ymax = 2;
float4 sum = summed_area_table_sum(sat_.get(), area);
EXPECT_EQ(sum[0], 9);
}
TEST_F(SummedAreaTableSumTest, RightEdge)
{
rcti area;
area.xmin = area_.xmax - 2;
area.xmax = area_.xmax;
area.ymin = 0;
area.ymax = 2;
float4 sum = summed_area_table_sum(sat_.get(), area);
EXPECT_EQ(sum[0], 6);
}
TEST_F(SummedAreaTableSumTest, LowerRightCorner)
{
rcti area;
area.xmin = area_.xmax - 1;
area.xmax = area_.xmax;
area.ymin = area_.ymax - 1;
area.ymax = area_.ymax;
float4 sum = summed_area_table_sum(sat_.get(), area);
EXPECT_EQ(sum[0], 1);
}
TEST_F(SummedAreaTableSumTest, TopLine)
{
rcti area;
area.xmin = 0;
area.xmax = 1;
area.ymin = 0;
area.ymax = 0;
float4 sum = summed_area_table_sum(sat_.get(), area);
EXPECT_EQ(sum[0], 2);
}
TEST_F(SummedAreaTableSumTest, ButtomLine)
{
rcti area;
area.xmin = 0;
area.xmax = 4;
area.ymin = 3;
area.ymax = 3;
float4 sum = summed_area_table_sum(sat_.get(), area);
EXPECT_EQ(sum[0], 5);
}
} // namespace blender::compositor::tests