Speedup classic Kuwahara filter by summed area table #111150

Merged
Habib Gahbiche merged 30 commits from zazizizou/blender:com-kuwahara-sat into main 2023-11-01 10:49:18 +01:00
8 changed files with 56 additions and 62 deletions
Showing only changes of commit de0f57b22a - Show all commits

View File

@ -30,7 +30,7 @@ void KuwaharaNode::convert_to_operations(NodeConverter &converter,
converter.add_operation(kuwahara_classic);
converter.map_input_socket(get_input_socket(0), kuwahara_classic->get_input_socket(0));
if(data->fast) {
if (data->fast) {
SummedAreaTableOperation *sat = new SummedAreaTableOperation();
sat->set_mode(SummedAreaTableOperation::eMode::Identity);
converter.add_operation(sat);
@ -41,7 +41,8 @@ void KuwaharaNode::convert_to_operations(NodeConverter &converter,
sat_squared->set_mode(SummedAreaTableOperation::eMode::Squared);
converter.add_operation(sat_squared);
converter.map_input_socket(get_input_socket(0), sat_squared->get_input_socket(0));
converter.add_link(sat_squared->get_output_socket(0), kuwahara_classic->get_input_socket(2));
converter.add_link(sat_squared->get_output_socket(0),
kuwahara_classic->get_input_socket(2));
// todo: remove
// debug

View File

@ -46,13 +46,15 @@ void KuwaharaClassicOperation::execute_pixel_sampled(float output[4],
float3 mean_of_squared_color[] = {float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f)};
int quadrant_pixel_count[] = {0, 0, 0, 0};
if(use_sat_) {
if (use_sat_) {
for (int q = 0; q < 4; q++) {
/* A fancy expression to compute the sign of the quadrant q. */
int2 sign = int2((q % 2) * 2 - 1, ((q / 2) * 2 - 1));
int2 lower_bound = int2(x, y) - int2(sign.x > 0 ? 0 : kernel_size_, sign.y > 0 ? 0 : kernel_size_);
int2 upper_bound = int2(x, y) + int2(sign.x < 0 ? 0 : kernel_size_, sign.y < 0 ? 0 : kernel_size_);
int2 lower_bound = int2(x, y) -
int2(sign.x > 0 ? 0 : kernel_size_, sign.y > 0 ? 0 : kernel_size_);
int2 upper_bound = int2(x, y) +
int2(sign.x < 0 ? 0 : kernel_size_, sign.y < 0 ? 0 : kernel_size_);
/* Limit the quadrants to the image bounds. */
int2 image_bound = int2(this->get_width(), this->get_height()) - int2(1);
@ -68,10 +70,11 @@ void KuwaharaClassicOperation::execute_pixel_sampled(float output[4],
kernel_area.ymax = corrected_upper_bound[1];
mean_of_color[q] = summed_area_table_sum_tiled(sat_reader_, kernel_area).xyz();
mean_of_squared_color[q] = summed_area_table_sum_tiled(sat_squared_reader_, kernel_area).xyz();
mean_of_squared_color[q] =
summed_area_table_sum_tiled(sat_squared_reader_, kernel_area).xyz();
}
} else {
}
else {
/* Split surroundings of pixel into 4 overlapping regions. */
for (int dy = -kernel_size_; dy <= kernel_size_; dy++) {
@ -181,13 +184,15 @@ void KuwaharaClassicOperation::update_memory_buffer_partial(MemoryBuffer *output
float3 mean_of_squared_color[4] = {float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f)};
int quadrant_pixel_count[4] = {0, 0, 0, 0};
if(use_sat_) {
if (use_sat_) {
for (int q = 0; q < 4; q++) {
/* A fancy expression to compute the sign of the quadrant q. */
int2 sign = int2((q % 2) * 2 - 1, ((q / 2) * 2 - 1));
int2 lower_bound = int2(x, y) - int2(sign.x > 0 ? 0 : kernel_size_, sign.y > 0 ? 0 : kernel_size_);
int2 upper_bound = int2(x, y) + int2(sign.x < 0 ? 0 : kernel_size_, sign.y < 0 ? 0 : kernel_size_);
int2 lower_bound = int2(x, y) -
int2(sign.x > 0 ? 0 : kernel_size_, sign.y > 0 ? 0 : kernel_size_);
int2 upper_bound = int2(x, y) +
int2(sign.x < 0 ? 0 : kernel_size_, sign.y < 0 ? 0 : kernel_size_);
/* Limit the quadrants to the image bounds. */
int2 image_bound = int2(width, height) - int2(1);
@ -205,8 +210,8 @@ void KuwaharaClassicOperation::update_memory_buffer_partial(MemoryBuffer *output
mean_of_color[q] = summed_area_table_sum(sat, kernel_area).xyz();
mean_of_squared_color[q] = summed_area_table_sum(sat_squared, kernel_area).xyz();
}
} else {
}
else {
/* Split surroundings of pixel into 4 overlapping regions. */
for (int dy = -kernel_size_; dy <= kernel_size_; dy++) {
for (int dx = -kernel_size_; dx <= kernel_size_; dx++) {

View File

@ -30,7 +30,7 @@ class KuwaharaClassicOperation : public MultiThreadedOperation {
int get_kernel_size();
void set_use_sat(bool use_sat);
bool get_use_sat();
void update_memory_buffer_partial(MemoryBuffer *output,
const rcti &area,
Span<MemoryBuffer *> inputs) override;

View File

@ -1,4 +1,4 @@
/* SPDX-FileCopyrightText: 2011 Blender Foundation
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
@ -157,16 +157,20 @@ float4 summed_area_table_sum_tiled(SocketReader *buffer, const rcti &area)
int2 corrected_lower_bound = lower_bound - int2(1, 1);
int2 corrected_upper_bound;
corrected_upper_bound[0] = math::min((int) buffer->get_width() - 1, upper_bound[0]);
corrected_upper_bound[1] = math::min((int) buffer->get_height() - 1, upper_bound[1]);
corrected_upper_bound[0] = math::min((int)buffer->get_width() - 1, upper_bound[0]);
corrected_upper_bound[1] = math::min((int)buffer->get_height() - 1, upper_bound[1]);
float4 a, b, c, d, addend, substrahend;
buffer->read_sampled(&a.x, corrected_upper_bound[0], corrected_upper_bound[1], PixelSampler::Nearest);
buffer->read_sampled(&d.x, corrected_lower_bound[0], corrected_lower_bound[1], PixelSampler::Nearest);
buffer->read_sampled(
&a.x, corrected_upper_bound[0], corrected_upper_bound[1], PixelSampler::Nearest);
buffer->read_sampled(
zazizizou marked this conversation as resolved Outdated

Use UNPACK2.

Use `UNPACK2`.
&d.x, corrected_lower_bound[0], corrected_lower_bound[1], PixelSampler::Nearest);
addend = a + d;
buffer->read_sampled(&b.x, corrected_lower_bound[0], corrected_upper_bound[1], PixelSampler::Nearest);
buffer->read_sampled(&c.x, corrected_upper_bound[0], corrected_lower_bound[1], PixelSampler::Nearest);
buffer->read_sampled(
&b.x, corrected_lower_bound[0], corrected_upper_bound[1], PixelSampler::Nearest);
buffer->read_sampled(
&c.x, corrected_upper_bound[0], corrected_lower_bound[1], PixelSampler::Nearest);
substrahend = b + c;
float4 sum = addend - substrahend;

View File

@ -1,4 +1,4 @@
/* SPDX-FileCopyrightText: 2011 Blender Foundation
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
@ -6,7 +6,6 @@
#include "COM_SingleThreadedOperation.h"
namespace blender::compositor {
/**
@ -18,11 +17,7 @@ class SummedAreaTableOperation : public SingleThreadedOperation {
public:
SummedAreaTableOperation();
enum eMode
{
Identity = 1,
Squared
};
enum eMode { Identity = 1, Squared };
void set_mode(const eMode mode);
eMode get_mode();
@ -41,20 +36,17 @@ class SummedAreaTableOperation : public SingleThreadedOperation {
ReadBufferOperation *read_operation,
rcti *output) override;
void get_area_of_interest(int input_idx,
const rcti &output_area,
rcti &r_input_area) override;
void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
MemoryBuffer *create_memory_buffer(rcti *rect) override;
void update_memory_buffer(MemoryBuffer *output,
const rcti & area,
const rcti &area,
Span<MemoryBuffer *> inputs) override;
private:
private:
SocketReader *image_reader_;
eMode mode_;
eMode mode_;
};
float4 summed_area_table_sum(MemoryBuffer *buffer, const rcti &area);

View File

@ -1,4 +1,4 @@
/* SPDX-FileCopyrightText: 2021 Blender Foundation
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
@ -8,8 +8,7 @@
namespace blender::compositor::tests {
// todo: remove
void print_area(MemoryBuffer *input, const rcti &area, std::string description = "")
static void print_area(MemoryBuffer *input, const rcti &area, std::string description = "")
{
std::cout << description << ":" << std::endl;
for (BuffersIterator<float> it = input->iterate_with({}, area); !it.is_end(); ++it) {
@ -27,7 +26,8 @@ TEST(SummedTableArea, FullFrame_5x2)
SummedAreaTableOperation sat;
sat.set_execution_model(eExecutionModel::FullFrame);
// sat.set_mode(SummedAreaTableOperation::eMode::Identity);
sat.set_mode(SummedAreaTableOperation::eMode::Identity);
const rcti area{0, 5, 0, 2};
MemoryBuffer output(DataType::Color, area);
@ -38,11 +38,10 @@ TEST(SummedTableArea, FullFrame_5x2)
print_area(&input, area, "input");
/* Doesn't work because of a dependency of Operations on nodetree. */
// sat.render(&output, Span<rcti>{area}, inputs);
sat.update_memory_buffer(&output, area, inputs);
/* sat.render() doesn't work because of a dependency of Operations on nodetree,
* so call sat.update_memory_buffer() directly instead. */
print_area(&output, area, "output");
sat.update_memory_buffer(&output, area, inputs);
/* First row. */
EXPECT_FLOAT_EQ(output.get_elem(0, 0)[0], 1);
@ -75,8 +74,6 @@ TEST(SummedTableArea, FullFrame_3x2_squared)
print_area(&input, area, "input");
/* Doesn't work because of a dependency of Operations on nodetree. */
// sat.render(&output, Span<rcti>{area}, inputs);
sat.update_memory_buffer(&output, area, inputs);
print_area(&output, area, "output");
@ -158,15 +155,9 @@ class SummedTableAreaSumTest : public ::testing::Test {
input.fill(area_, val);
Span<MemoryBuffer *> inputs{&input};
/* Doesn't work because of a dependency of Operations on nodetree. */
// sat.render(&output, Span<rcti>{area}, inputs);
operation_->update_memory_buffer(sat_.get(), area_, inputs);
// print_area(sat_.get(), area_);
}
// void TearDown() override {}
std::shared_ptr<SummedAreaTableOperation> operation_;
std::shared_ptr<MemoryBuffer> sat_;
rcti area_;
@ -190,28 +181,29 @@ TEST_F(SummedTableAreaSumTest, RightEdge)
{
rcti area{.xmin = area_.xmax - 2, .xmax = area_.xmax, .ymin = 0, .ymax = 2};
float4 sum = summed_area_table_sum(sat_.get(), area);
ASSERT_EQ(sum[0], 9);
ASSERT_EQ(sum[0], 6);
}
TEST_F(SummedTableAreaSumTest, LowerRightCorner)
{
rcti area{.xmin = area_.xmax, .xmax = area_.xmax, .ymin = 0, .ymax = 0};
rcti area{
.xmin = area_.xmax - 1, .xmax = area_.xmax, .ymin = area_.ymax - 1, .ymax = area_.ymax};
float4 sum = summed_area_table_sum(sat_.get(), area);
ASSERT_EQ(sum[0], 1);
}
TEST_F(SummedTableAreaSumTest, TopLine)
{
rcti area{.xmin = 0, .xmax = 2, .ymin = 0, .ymax = 0};
rcti area{.xmin = 0, .xmax = 1, .ymin = 0, .ymax = 0};
float4 sum = summed_area_table_sum(sat_.get(), area);
ASSERT_EQ(sum[0], 4);
ASSERT_EQ(sum[0], 2);
}
TEST_F(SummedTableAreaSumTest, RightLine)
TEST_F(SummedTableAreaSumTest, ButtomLine)
{
rcti area{.xmin = 0, .xmax = 0, .ymin = 0, .ymax = 3};
rcti area{.xmin = 0, .xmax = 4, .ymin = 3, .ymax = 3};
float4 sum = summed_area_table_sum(sat_.get(), area);
ASSERT_EQ(sum[0], 3);
ASSERT_EQ(sum[0], 5);
}
} // namespace blender::compositor::tests

View File

@ -8884,9 +8884,8 @@ static void def_cmp_kuwahara(StructRNA *srna)
prop = RNA_def_property(srna, "fast", PROP_BOOLEAN, PROP_NONE);
RNA_def_property_boolean_sdna(prop, nullptr, "fast", 1);
RNA_def_property_ui_text(prop,
"Fast",
"Use faster computation. Might produce artefacts for large images.");
RNA_def_property_ui_text(
prop, "Fast", "Use faster computation. Might produce artefacts for large images.");
RNA_def_property_update(prop, NC_NODE | NA_EDITED, "rna_Node_update");
}

View File

@ -67,7 +67,8 @@ static void node_composit_buts_kuwahara(uiLayout *layout, bContext * /*C*/, Poin
uiItemR(col, ptr, "uniformity", UI_ITEM_NONE, nullptr, ICON_NONE);
uiItemR(col, ptr, "sharpness", UI_ITEM_NONE, nullptr, ICON_NONE);
uiItemR(col, ptr, "eccentricity", UI_ITEM_NONE, nullptr, ICON_NONE);
} else if (variation == CMP_NODE_KUWAHARA_CLASSIC) {
}
else if (variation == CMP_NODE_KUWAHARA_CLASSIC) {
uiItemR(col, ptr, "fast", UI_ITEM_NONE, nullptr, ICON_NONE);
}
}