Speedup classic Kuwahara filter by summed area table #111150
|
@ -7,7 +7,6 @@
|
|||
|
||||
#include "COM_KuwaharaNode.h"
|
||||
|
||||
#include "COM_CalculateMeanOperation.h"
|
||||
#include "COM_GaussianXBlurOperation.h"
|
||||
zazizizou marked this conversation as resolved
Outdated
|
||||
#include "COM_GaussianYBlurOperation.h"
|
||||
#include "COM_KuwaharaAnisotropicOperation.h"
|
||||
|
|
|
@ -13,7 +13,6 @@ namespace blender::compositor {
|
|||
SummedAreaTableOperation::SummedAreaTableOperation()
|
||||
{
|
||||
this->add_input_socket(DataType::Color);
|
||||
this->add_input_socket(DataType::Value);
|
||||
this->add_output_socket(DataType::Color);
|
||||
OmarEmaraDev marked this conversation as resolved
Outdated
Omar Emara
commented
What is this Value input? What is this Value input?
Habib Gahbiche
commented
This was needed to subtract the mean from image. Not needed anymore, will remove. This was needed to subtract the mean from image. Not needed anymore, will remove.
|
||||
|
||||
mode_ = eMode::Identity;
|
||||
|
@ -78,11 +77,11 @@ void SummedAreaTableOperation::update_memory_buffer(MemoryBuffer *output,
|
|||
/* Second pass: vertical sum. */
|
||||
threading::parallel_for(IndexRange(area.xmin, area.xmax), 1, [&](const IndexRange range_x) {
|
||||
for (const int x : range_x) {
|
||||
float4 accumulated_color = float4(0.0f);
|
||||
for (const int y : IndexRange(area.ymin, area.ymax)) {
|
||||
float4 color;
|
||||
output->read_elem_checked(x, y - 1, &color.x);
|
||||
float *out = output->get_elem(x, y);
|
||||
copy_v4_v4(out, float4(out) + color);
|
||||
const float4 color = float4(output->get_elem(x, y));
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
Use a temporary Use a temporary `accumulated_color` variable and avoid reading the buffer again just like the above loop. Then, use `get_elem` instead of `read_elem_checked`.
|
||||
accumulated_color += color;
|
||||
copy_v4_v4(output->get_elem(x, y), accumulated_color);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
@ -92,16 +91,14 @@ MemoryBuffer *SummedAreaTableOperation::create_memory_buffer(rcti *area)
|
|||
{
|
||||
/* Note: although this is a single threaded call, multithreading is used. */
|
||||
MemoryBuffer *output = new MemoryBuffer(DataType::Color, *area);
|
||||
PixelSampler sampler = PixelSampler::Nearest;
|
||||
|
||||
/* First pass: copy input to output and sum horizontally. */
|
||||
threading::parallel_for(IndexRange(area->ymin, area->ymax), 1, [&](const IndexRange range_y) {
|
||||
for (const int y : range_y) {
|
||||
float4 accumulated_color = float4(0.0f);
|
||||
for (const int x : IndexRange(area->xmin, area->xmax)) {
|
||||
|
||||
float4 color;
|
||||
image_reader_->read_sampled(&color.x, x, y, sampler);
|
||||
image_reader_->read(&color.x, x, y, nullptr);
|
||||
accumulated_color += mode_ == eMode::Squared ? color * color : color;
|
||||
copy_v4_v4(output->get_elem(x, y), accumulated_color);
|
||||
}
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
Use Use `read` instead of `read_sampled`. Same applies for all `read_sampled` calls below.
|
||||
|
@ -111,11 +108,11 @@ MemoryBuffer *SummedAreaTableOperation::create_memory_buffer(rcti *area)
|
|||
/* Second pass: vertical sum. */
|
||||
threading::parallel_for(IndexRange(area->xmin, area->xmax), 1, [&](const IndexRange range_x) {
|
||||
for (const int x : range_x) {
|
||||
float4 accumulated_color = float4(0.0f);
|
||||
for (const int y : IndexRange(area->ymin, area->ymax)) {
|
||||
float4 color;
|
||||
output->read_elem_checked(x, y - 1, &color.x);
|
||||
float *out = output->get_elem(x, y);
|
||||
copy_v4_v4(out, float4(out) + color);
|
||||
|
||||
accumulated_color += float4(output->get_elem(x, y));
|
||||
copy_v4_v4(output->get_elem(x, y), accumulated_color);
|
||||
}
|
||||
zazizizou marked this conversation as resolved
Omar Emara
commented
Same as above. Same as above.
|
||||
}
|
||||
});
|
||||
|
@ -162,16 +159,12 @@ float4 summed_area_table_sum_tiled(SocketReader *buffer, const rcti &area)
|
|||
corrected_upper_bound[1] = math::min((int)buffer->get_height() - 1, upper_bound[1]);
|
||||
|
||||
float4 a, b, c, d, addend, substrahend;
|
||||
buffer->read_sampled(
|
||||
&a.x, corrected_upper_bound[0], corrected_upper_bound[1], PixelSampler::Nearest);
|
||||
buffer->read_sampled(
|
||||
&d.x, corrected_lower_bound[0], corrected_lower_bound[1], PixelSampler::Nearest);
|
||||
buffer->read_sampled(&a.x, UNPACK2(corrected_upper_bound), PixelSampler::Nearest);
|
||||
buffer->read_sampled(&d.x, UNPACK2(corrected_lower_bound), PixelSampler::Nearest);
|
||||
addend = a + d;
|
||||
|
||||
buffer->read_sampled(
|
||||
&b.x, corrected_lower_bound[0], corrected_upper_bound[1], PixelSampler::Nearest);
|
||||
buffer->read_sampled(
|
||||
&c.x, corrected_upper_bound[0], corrected_lower_bound[1], PixelSampler::Nearest);
|
||||
buffer->read_sampled(&b.x, UNPACK2(corrected_lower_bound), PixelSampler::Nearest);
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
Use Use `UNPACK2`.
|
||||
buffer->read_sampled(&c.x, UNPACK2(corrected_upper_bound), PixelSampler::Nearest);
|
||||
substrahend = b + c;
|
||||
|
||||
float4 sum = addend - substrahend;
|
||||
|
@ -208,12 +201,12 @@ float4 summed_area_table_sum(MemoryBuffer *buffer, const rcti &area)
|
|||
corrected_upper_bound[1] = math::min(buffer->get_height() - 1, upper_bound[1]);
|
||||
|
||||
float4 a, b, c, d, addend, substrahend;
|
||||
buffer->read_elem_checked(corrected_upper_bound[0], corrected_upper_bound[1], a);
|
||||
buffer->read_elem_checked(corrected_lower_bound[0], corrected_lower_bound[1], d);
|
||||
buffer->read_elem_checked(UNPACK2(corrected_upper_bound), a);
|
||||
buffer->read_elem_checked(UNPACK2(corrected_lower_bound), d);
|
||||
addend = a + d;
|
||||
|
||||
buffer->read_elem_checked(corrected_lower_bound[0], corrected_upper_bound[1], b);
|
||||
buffer->read_elem_checked(corrected_upper_bound[0], corrected_lower_bound[1], c);
|
||||
buffer->read_elem_checked(UNPACK2(corrected_lower_bound), b);
|
||||
buffer->read_elem_checked(UNPACK2(corrected_upper_bound), c);
|
||||
substrahend = b + c;
|
||||
|
||||
float4 sum = addend - substrahend;
|
||||
|
|
|
@ -9,8 +9,7 @@
|
|||
namespace blender::compositor {
|
||||
|
||||
/**
|
||||
* \brief base class of CalculateMean, implementing the simple CalculateMean
|
||||
* \ingroup operation
|
||||
* \brief SummedAreaTableOperation class computes the summed area table.
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
Update comment. Update comment.
|
||||
*/
|
||||
class SummedAreaTableOperation : public SingleThreadedOperation {
|
||||
|
||||
|
|
|
@ -130,7 +130,7 @@ TEST_F(SummedAreaTableSumTest, FullyInside)
|
|||
area.ymin = 1;
|
||||
area.ymax = 3;
|
||||
float4 sum = summed_area_table_sum(sat_.get(), area);
|
||||
ASSERT_EQ(sum[0], 9);
|
||||
EXPECT_EQ(sum[0], 9);
|
||||
zazizizou marked this conversation as resolved
Outdated
Sergey Sharybin
commented
Any specific reason to use Any specific reason to use `ASSERT_EQ ` instead of `EXPECT_EQ `? The ASSERT will stop the test. It is typically used for cases when the rest of the test will be impossible. For example, when you expect function to give you a pointer to an object and you check for it be non-nullptr before looking into its properties.
Habib Gahbiche
commented
No specific reason, but it doesn't make a difference here because there is a single assert per test. I can update it in a later patch for clarity No specific reason, but it doesn't make a difference here because there is a single assert per test. I can update it in a later patch for clarity
|
||||
}
|
||||
|
||||
TEST_F(SummedAreaTableSumTest, LeftEdge)
|
||||
|
@ -141,7 +141,7 @@ TEST_F(SummedAreaTableSumTest, LeftEdge)
|
|||
area.ymin = 0;
|
||||
area.ymax = 2;
|
||||
float4 sum = summed_area_table_sum(sat_.get(), area);
|
||||
ASSERT_EQ(sum[0], 9);
|
||||
EXPECT_EQ(sum[0], 9);
|
||||
}
|
||||
|
||||
TEST_F(SummedAreaTableSumTest, RightEdge)
|
||||
|
@ -152,7 +152,7 @@ TEST_F(SummedAreaTableSumTest, RightEdge)
|
|||
area.ymin = 0;
|
||||
area.ymax = 2;
|
||||
float4 sum = summed_area_table_sum(sat_.get(), area);
|
||||
ASSERT_EQ(sum[0], 6);
|
||||
EXPECT_EQ(sum[0], 6);
|
||||
}
|
||||
|
||||
TEST_F(SummedAreaTableSumTest, LowerRightCorner)
|
||||
|
@ -163,7 +163,7 @@ TEST_F(SummedAreaTableSumTest, LowerRightCorner)
|
|||
area.ymin = area_.ymax - 1;
|
||||
area.ymax = area_.ymax;
|
||||
float4 sum = summed_area_table_sum(sat_.get(), area);
|
||||
ASSERT_EQ(sum[0], 1);
|
||||
EXPECT_EQ(sum[0], 1);
|
||||
}
|
||||
|
||||
TEST_F(SummedAreaTableSumTest, TopLine)
|
||||
|
@ -174,7 +174,7 @@ TEST_F(SummedAreaTableSumTest, TopLine)
|
|||
area.ymin = 0;
|
||||
area.ymax = 0;
|
||||
float4 sum = summed_area_table_sum(sat_.get(), area);
|
||||
ASSERT_EQ(sum[0], 2);
|
||||
EXPECT_EQ(sum[0], 2);
|
||||
}
|
||||
|
||||
TEST_F(SummedAreaTableSumTest, ButtomLine)
|
||||
|
@ -185,7 +185,7 @@ TEST_F(SummedAreaTableSumTest, ButtomLine)
|
|||
area.ymin = 3;
|
||||
area.ymax = 3;
|
||||
float4 sum = summed_area_table_sum(sat_.get(), area);
|
||||
ASSERT_EQ(sum[0], 5);
|
||||
EXPECT_EQ(sum[0], 5);
|
||||
}
|
||||
|
||||
} // namespace blender::compositor::tests
|
||||
|
|
Unnecessary include.