Speedup classic Kuwahara filter by summed area table #111150

Merged
Habib Gahbiche merged 30 commits from zazizizou/blender:com-kuwahara-sat into main 2023-11-01 10:49:18 +01:00
4 changed files with 24 additions and 33 deletions
Showing only changes of commit 4cf2afcbb3 - Show all commits

View File

@ -7,7 +7,6 @@
#include "COM_KuwaharaNode.h"
#include "COM_CalculateMeanOperation.h"
#include "COM_GaussianXBlurOperation.h"
zazizizou marked this conversation as resolved Outdated

Unnecessary include.

Unnecessary include.
#include "COM_GaussianYBlurOperation.h"
#include "COM_KuwaharaAnisotropicOperation.h"

View File

@ -13,7 +13,6 @@ namespace blender::compositor {
SummedAreaTableOperation::SummedAreaTableOperation()
{
this->add_input_socket(DataType::Color);
this->add_input_socket(DataType::Value);
this->add_output_socket(DataType::Color);
OmarEmaraDev marked this conversation as resolved Outdated

What is this Value input?

What is this Value input?

This was needed to subtract the mean from image. Not needed anymore, will remove.

This was needed to subtract the mean from image. Not needed anymore, will remove.
mode_ = eMode::Identity;
@ -78,11 +77,11 @@ void SummedAreaTableOperation::update_memory_buffer(MemoryBuffer *output,
/* Second pass: vertical sum. */
threading::parallel_for(IndexRange(area.xmin, area.xmax), 1, [&](const IndexRange range_x) {
for (const int x : range_x) {
float4 accumulated_color = float4(0.0f);
for (const int y : IndexRange(area.ymin, area.ymax)) {
float4 color;
output->read_elem_checked(x, y - 1, &color.x);
float *out = output->get_elem(x, y);
copy_v4_v4(out, float4(out) + color);
const float4 color = float4(output->get_elem(x, y));
zazizizou marked this conversation as resolved Outdated

Use a temporary accumulated_color variable and avoid reading the buffer again just like the above loop. Then, use get_elem instead of read_elem_checked.

Use a temporary `accumulated_color` variable and avoid reading the buffer again just like the above loop. Then, use `get_elem` instead of `read_elem_checked`.
accumulated_color += color;
copy_v4_v4(output->get_elem(x, y), accumulated_color);
}
}
});
@ -92,16 +91,14 @@ MemoryBuffer *SummedAreaTableOperation::create_memory_buffer(rcti *area)
{
/* Note: although this is a single threaded call, multithreading is used. */
MemoryBuffer *output = new MemoryBuffer(DataType::Color, *area);
PixelSampler sampler = PixelSampler::Nearest;
/* First pass: copy input to output and sum horizontally. */
threading::parallel_for(IndexRange(area->ymin, area->ymax), 1, [&](const IndexRange range_y) {
for (const int y : range_y) {
float4 accumulated_color = float4(0.0f);
for (const int x : IndexRange(area->xmin, area->xmax)) {
float4 color;
image_reader_->read_sampled(&color.x, x, y, sampler);
image_reader_->read(&color.x, x, y, nullptr);
accumulated_color += mode_ == eMode::Squared ? color * color : color;
copy_v4_v4(output->get_elem(x, y), accumulated_color);
}
zazizizou marked this conversation as resolved Outdated

Use read instead of read_sampled. Same applies for all read_sampled calls below.

Use `read` instead of `read_sampled`. Same applies for all `read_sampled` calls below.
@ -111,11 +108,11 @@ MemoryBuffer *SummedAreaTableOperation::create_memory_buffer(rcti *area)
/* Second pass: vertical sum. */
threading::parallel_for(IndexRange(area->xmin, area->xmax), 1, [&](const IndexRange range_x) {
for (const int x : range_x) {
float4 accumulated_color = float4(0.0f);
for (const int y : IndexRange(area->ymin, area->ymax)) {
float4 color;
output->read_elem_checked(x, y - 1, &color.x);
float *out = output->get_elem(x, y);
copy_v4_v4(out, float4(out) + color);
accumulated_color += float4(output->get_elem(x, y));
copy_v4_v4(output->get_elem(x, y), accumulated_color);
}
zazizizou marked this conversation as resolved
Review

Same as above.

Same as above.
}
});
@ -162,16 +159,12 @@ float4 summed_area_table_sum_tiled(SocketReader *buffer, const rcti &area)
corrected_upper_bound[1] = math::min((int)buffer->get_height() - 1, upper_bound[1]);
float4 a, b, c, d, addend, substrahend;
buffer->read_sampled(
&a.x, corrected_upper_bound[0], corrected_upper_bound[1], PixelSampler::Nearest);
buffer->read_sampled(
&d.x, corrected_lower_bound[0], corrected_lower_bound[1], PixelSampler::Nearest);
buffer->read_sampled(&a.x, UNPACK2(corrected_upper_bound), PixelSampler::Nearest);
buffer->read_sampled(&d.x, UNPACK2(corrected_lower_bound), PixelSampler::Nearest);
addend = a + d;
buffer->read_sampled(
&b.x, corrected_lower_bound[0], corrected_upper_bound[1], PixelSampler::Nearest);
buffer->read_sampled(
&c.x, corrected_upper_bound[0], corrected_lower_bound[1], PixelSampler::Nearest);
buffer->read_sampled(&b.x, UNPACK2(corrected_lower_bound), PixelSampler::Nearest);
zazizizou marked this conversation as resolved Outdated

Use UNPACK2.

Use `UNPACK2`.
buffer->read_sampled(&c.x, UNPACK2(corrected_upper_bound), PixelSampler::Nearest);
substrahend = b + c;
float4 sum = addend - substrahend;
@ -208,12 +201,12 @@ float4 summed_area_table_sum(MemoryBuffer *buffer, const rcti &area)
corrected_upper_bound[1] = math::min(buffer->get_height() - 1, upper_bound[1]);
float4 a, b, c, d, addend, substrahend;
buffer->read_elem_checked(corrected_upper_bound[0], corrected_upper_bound[1], a);
buffer->read_elem_checked(corrected_lower_bound[0], corrected_lower_bound[1], d);
buffer->read_elem_checked(UNPACK2(corrected_upper_bound), a);
buffer->read_elem_checked(UNPACK2(corrected_lower_bound), d);
addend = a + d;
buffer->read_elem_checked(corrected_lower_bound[0], corrected_upper_bound[1], b);
buffer->read_elem_checked(corrected_upper_bound[0], corrected_lower_bound[1], c);
buffer->read_elem_checked(UNPACK2(corrected_lower_bound), b);
buffer->read_elem_checked(UNPACK2(corrected_upper_bound), c);
substrahend = b + c;
float4 sum = addend - substrahend;

View File

@ -9,8 +9,7 @@
namespace blender::compositor {
/**
* \brief base class of CalculateMean, implementing the simple CalculateMean
* \ingroup operation
* \brief SummedAreaTableOperation class computes the summed area table.
zazizizou marked this conversation as resolved Outdated

Update comment.

Update comment.
*/
class SummedAreaTableOperation : public SingleThreadedOperation {

View File

@ -130,7 +130,7 @@ TEST_F(SummedAreaTableSumTest, FullyInside)
area.ymin = 1;
area.ymax = 3;
float4 sum = summed_area_table_sum(sat_.get(), area);
ASSERT_EQ(sum[0], 9);
EXPECT_EQ(sum[0], 9);
zazizizou marked this conversation as resolved Outdated

Any specific reason to use ASSERT_EQ instead of EXPECT_EQ ? The ASSERT will stop the test. It is typically used for cases when the rest of the test will be impossible. For example, when you expect function to give you a pointer to an object and you check for it be non-nullptr before looking into its properties.

Any specific reason to use `ASSERT_EQ ` instead of `EXPECT_EQ `? The ASSERT will stop the test. It is typically used for cases when the rest of the test will be impossible. For example, when you expect function to give you a pointer to an object and you check for it be non-nullptr before looking into its properties.

No specific reason, but it doesn't make a difference here because there is a single assert per test. I can update it in a later patch for clarity

No specific reason, but it doesn't make a difference here because there is a single assert per test. I can update it in a later patch for clarity
}
TEST_F(SummedAreaTableSumTest, LeftEdge)
@ -141,7 +141,7 @@ TEST_F(SummedAreaTableSumTest, LeftEdge)
area.ymin = 0;
area.ymax = 2;
float4 sum = summed_area_table_sum(sat_.get(), area);
ASSERT_EQ(sum[0], 9);
EXPECT_EQ(sum[0], 9);
}
TEST_F(SummedAreaTableSumTest, RightEdge)
@ -152,7 +152,7 @@ TEST_F(SummedAreaTableSumTest, RightEdge)
area.ymin = 0;
area.ymax = 2;
float4 sum = summed_area_table_sum(sat_.get(), area);
ASSERT_EQ(sum[0], 6);
EXPECT_EQ(sum[0], 6);
}
TEST_F(SummedAreaTableSumTest, LowerRightCorner)
@ -163,7 +163,7 @@ TEST_F(SummedAreaTableSumTest, LowerRightCorner)
area.ymin = area_.ymax - 1;
area.ymax = area_.ymax;
float4 sum = summed_area_table_sum(sat_.get(), area);
ASSERT_EQ(sum[0], 1);
EXPECT_EQ(sum[0], 1);
}
TEST_F(SummedAreaTableSumTest, TopLine)
@ -174,7 +174,7 @@ TEST_F(SummedAreaTableSumTest, TopLine)
area.ymin = 0;
area.ymax = 0;
float4 sum = summed_area_table_sum(sat_.get(), area);
ASSERT_EQ(sum[0], 2);
EXPECT_EQ(sum[0], 2);
}
TEST_F(SummedAreaTableSumTest, ButtomLine)
@ -185,7 +185,7 @@ TEST_F(SummedAreaTableSumTest, ButtomLine)
area.ymin = 3;
area.ymax = 3;
float4 sum = summed_area_table_sum(sat_.get(), area);
ASSERT_EQ(sum[0], 5);
EXPECT_EQ(sum[0], 5);
}
} // namespace blender::compositor::tests