Speedup classic Kuwahara filter by summed area table #111150
|
@ -27,11 +27,12 @@ void KuwaharaNode::convert_to_operations(NodeConverter &converter,
|
|||
case CMP_NODE_KUWAHARA_CLASSIC: {
|
||||
KuwaharaClassicOperation *kuwahara_classic = new KuwaharaClassicOperation();
|
||||
kuwahara_classic->set_kernel_size(data->size);
|
||||
kuwahara_classic->set_use_sat(data->fast);
|
||||
converter.add_operation(kuwahara_classic);
|
||||
converter.map_input_socket(get_input_socket(0), kuwahara_classic->get_input_socket(0));
|
||||
if(kuwahara_classic->get_kernel_size() >= 4) {
|
||||
/* Naive computation is faster for small kernel sizes. */
|
||||
kuwahara_classic->set_use_sat(true);
|
||||
converter.add_operation(kuwahara_classic);
|
||||
converter.map_input_socket(get_input_socket(0), kuwahara_classic->get_input_socket(0));
|
||||
|
||||
if (data->fast) {
|
||||
SummedAreaTableOperation *sat = new SummedAreaTableOperation();
|
||||
sat->set_mode(SummedAreaTableOperation::eMode::Identity);
|
||||
converter.add_operation(sat);
|
||||
zazizizou marked this conversation as resolved
Outdated
|
||||
|
@ -44,29 +45,8 @@ void KuwaharaNode::convert_to_operations(NodeConverter &converter,
|
|||
converter.map_input_socket(get_input_socket(0), sat_squared->get_input_socket(0));
|
||||
converter.add_link(sat_squared->get_output_socket(0),
|
||||
kuwahara_classic->get_input_socket(2));
|
||||
|
||||
/* Using offset to improve precision.
|
||||
*
|
||||
* Summed area table can produce very large numbers, e.g. for 4k images with channel
|
||||
* values between 0 and 1, sum values can reach 10^7, but sum values are still around 10
|
||||
* This causes precision issues for single precision floating point values.
|
||||
* In order to improve precision, we subtract the mean value from the image as suggested in
|
||||
* the paper
|
||||
*
|
||||
* G. Facciolo et al. "Integral Images for Block Matching" 2014.
|
||||
*
|
||||
* Note: best results are achieved using this optimization as well as the running error
|
||||
* compensation in SummedAreaTableOperation.
|
||||
*/
|
||||
CalculateMeanOperation *mean = new CalculateMeanOperation();
|
||||
/* Compute the meam from the green channel. */
|
||||
mean->set_setting(3);
|
||||
converter.add_operation(mean);
|
||||
converter.map_input_socket(get_input_socket(0), mean->get_input_socket(0));
|
||||
converter.add_link(mean->get_output_socket(0), kuwahara_classic->get_input_socket(3));
|
||||
converter.add_link(mean->get_output_socket(0), sat->get_input_socket(1));
|
||||
converter.add_link(mean->get_output_socket(0), sat_squared->get_input_socket(1));
|
||||
}
|
||||
|
||||
converter.map_output_socket(get_output_socket(0), kuwahara_classic->get_output_socket(0));
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ void KuwaharaClassicOperation::init_execution()
|
|||
image_reader_ = this->get_input_socket_reader(0);
|
||||
sat_reader_ = this->get_input_socket_reader(1);
|
||||
sat_squared_reader_ = this->get_input_socket_reader(2);
|
||||
offset_reader_ = this->get_input_socket_reader(3);
|
||||
}
|
||||
|
||||
void KuwaharaClassicOperation::deinit_execution()
|
||||
|
@ -77,7 +76,6 @@ void KuwaharaClassicOperation::execute_pixel_sampled(float output[4],
|
|||
}
|
||||
}
|
||||
else {
|
||||
|
||||
/* Split surroundings of pixel into 4 overlapping regions. */
|
||||
for (int dy = -kernel_size_; dy <= kernel_size_; dy++) {
|
||||
for (int dx = -kernel_size_; dx <= kernel_size_; dx++) {
|
||||
|
@ -182,9 +180,6 @@ void KuwaharaClassicOperation::update_memory_buffer_partial(MemoryBuffer *output
|
|||
const int x = it.x;
|
||||
const int y = it.y;
|
||||
|
||||
BLI_assert(it.get_num_inputs() == 4);
|
||||
const float offset = *it.in(3);
|
||||
|
||||
float3 mean_of_color[4] = {float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f)};
|
||||
float3 mean_of_squared_color[4] = {float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f)};
|
||||
int quadrant_pixel_count[4] = {0, 0, 0, 0};
|
||||
|
@ -276,9 +271,9 @@ void KuwaharaClassicOperation::update_memory_buffer_partial(MemoryBuffer *output
|
|||
}
|
||||
}
|
||||
|
||||
it.out[0] = mean_of_color[min_index].x + offset;
|
||||
it.out[1] = mean_of_color[min_index].y + offset;
|
||||
it.out[2] = mean_of_color[min_index].z + offset;
|
||||
it.out[0] = mean_of_color[min_index].x;
|
||||
it.out[1] = mean_of_color[min_index].y;
|
||||
it.out[2] = mean_of_color[min_index].z;
|
||||
|
||||
/* No changes for alpha channel. */
|
||||
it.out[3] = image->get_value(x, y, 3);
|
||||
|
|
|
@ -12,7 +12,6 @@ class KuwaharaClassicOperation : public MultiThreadedOperation {
|
|||
SocketReader *image_reader_;
|
||||
SocketReader *sat_reader_;
|
||||
SocketReader *sat_squared_reader_;
|
||||
SocketReader *offset_reader_;
|
||||
|
||||
int kernel_size_;
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include "BLI_math_vector.hh"
|
||||
#include "BLI_math_vector_types.hh"
|
||||
#include "BLI_task.hh"
|
||||
|
||||
#include "COM_SummedAreaTableOperation.h"
|
||||
|
||||
|
@ -59,119 +60,219 @@ void SummedAreaTableOperation::update_memory_buffer(MemoryBuffer *output,
|
|||
const rcti &area,
|
||||
Span<MemoryBuffer *> inputs)
|
||||
{
|
||||
/* Note: although this is a single threaded call, multithreading is used. */
|
||||
MemoryBuffer *image = inputs[0];
|
||||
|
||||
/* Track floating point error. See below. */
|
||||
float4 running_compensation = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
/* First pass: copy values from input to output and square values if necessary. */
|
||||
threading::parallel_for(IndexRange(area.ymin, area.ymax), 1, [&](const IndexRange range_y) {
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
It is sufficient to have a single parallel loop over rows and a serial loop over columns, too much parallelism will hurt performance. This copy loop can be fused with the horizontal pass. It is sufficient to have a single parallel loop over rows and a serial loop over columns, too much parallelism will hurt performance.
This copy loop can be fused with the horizontal pass.
|
||||
threading::parallel_for(IndexRange(area.xmin, area.xmax), 1, [&](const IndexRange range_x) {
|
||||
for (int64_t y = *range_y.begin(); y < *range_y.end(); y++) {
|
||||
for (int64_t x = *range_x.begin(); x < *range_x.end(); x++) {
|
||||
|
||||
for (BuffersIterator<float> it = output->iterate_with({inputs}, area); !it.is_end(); ++it) {
|
||||
const int x = it.x;
|
||||
const int y = it.y;
|
||||
float4 color;
|
||||
image->read_elem(x, y, &color.x);
|
||||
|
||||
BLI_assert(it.get_num_inputs() == 2);
|
||||
const float offset = *it.in(1);
|
||||
float *out = output->get_elem(x, y);
|
||||
|
||||
float4 color, upper, left, upper_left;
|
||||
image->read_elem(x, y, &color.x);
|
||||
color -= offset;
|
||||
switch (mode_) {
|
||||
case eMode::Squared: {
|
||||
color *= color;
|
||||
break;
|
||||
}
|
||||
case eMode::Identity: {
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
Use a temporary Use a temporary `accumulated_color` variable and avoid reading the buffer again just like the above loop. Then, use `get_elem` instead of `read_elem_checked`.
|
||||
/* Pass. */
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
BLI_assert_msg(0, "Mode not implemented");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
output->read_elem_checked(x, y - 1, &upper.x);
|
||||
output->read_elem_checked(x - 1, y, &left.x);
|
||||
output->read_elem_checked(x - 1, y - 1, &upper_left.x);
|
||||
|
||||
float4 sum = upper + left - upper_left;
|
||||
|
||||
float4 v;
|
||||
switch (mode_) {
|
||||
case eMode::Squared: {
|
||||
v = color * color;
|
||||
break;
|
||||
out[0] = color.x;
|
||||
out[1] = color.y;
|
||||
out[2] = color.z;
|
||||
out[3] = color.w;
|
||||
}
|
||||
}
|
||||
case eMode::Identity: {
|
||||
v = color;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
BLI_assert_msg(0, "Mode not implemented");
|
||||
break;
|
||||
});
|
||||
});
|
||||
|
||||
/* Second pass. */
|
||||
threading::parallel_for(IndexRange(area.ymin, area.ymax), 1, [&](const IndexRange range_y) {
|
||||
for (int64_t y = *range_y.begin(); y < *range_y.end(); y++) {
|
||||
/* Track floating point error. See below. */
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
Use Use `read` instead of `read_sampled`. Same applies for all `read_sampled` calls below.
|
||||
float4 running_compensation = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
This can be more compact.
This can be more compact.
- Use a for each loop on ranges. `for (const int y : sub_y_range) {`
- Accumulate a color instead of reading the previous output.
- Use the `get_elem` function.
- Use a copy function.
```cpp
threading::parallel_for(IndexRange(area.ymin, area.ymax), 1, [&](const IndexRange sub_y_range) {
for (const int y : sub_y_range) {
float4 accumulated_color = float4(0.0f);
for (const int x : IndexRange(area.xmin, area.xmax)) {
const float4 color = float4(image->get_elem(x, y));
accumulated_color += color * color;
copy_v4_v4(output->get_elem(x, y), accumulated_color);
}
}
});
```
Habib Gahbiche
commented
Will do, thanks for the tip :) Will do, thanks for the tip :)
|
||||
for (int x = area.xmin; x < area.xmax; x++) {
|
||||
|
||||
float4 color;
|
||||
output->read_elem_checked(x - 1, y, &color.x);
|
||||
|
||||
float *out = output->get_elem(x, y);
|
||||
|
||||
out[0] += color.x;
|
||||
out[1] += color.y;
|
||||
out[2] += color.z;
|
||||
out[3] += color.w;
|
||||
zazizizou marked this conversation as resolved
Omar Emara
commented
Same as above. Same as above.
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
/* Using Kahan Summation algorithm to compensate for floating point inaccuracies caused by
|
||||
* summing up large number of values.
|
||||
* The idea is to introduce a variable to keep track of the error (here called `running_error`)
|
||||
* and then correct the error in the next iteration. */
|
||||
float4 difference = v - running_compensation;
|
||||
float4 temp = sum + difference;
|
||||
/* `(temp - sum)` cancels the high-order part of `difference`. Subtracting `difference` again
|
||||
* recovers `difference` for the next iteration. */
|
||||
running_compensation = (temp - sum) - difference;
|
||||
sum = temp;
|
||||
/* Third pass: vertical sum. */
|
||||
threading::parallel_for(IndexRange(area.xmin, area.xmax), 1, [&](const IndexRange range_x) {
|
||||
for (int64_t x = *range_x.begin(); x < *range_x.end(); x++) {
|
||||
for (int y = area.ymin; y < area.ymax; y++) {
|
||||
float4 color;
|
||||
output->read_elem_checked(x, y - 1, &color.x);
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
I think we should attempt to multithread the SAT computation. Not sure if there is anything stopping us from doing that, but a two pass prefix sum should be easy to implement and efficient to parallelize on the CPU.
Each of the prefix sums can simply be a parallel loop over rows/columns. I think we should attempt to multithread the SAT computation. Not sure if there is anything stopping us from doing that, but a two pass prefix sum should be easy to implement and efficient to parallelize on the CPU.
```
| Image | -> Prefix sum from left to right -> | Horizontal Pass Result | -> Prefix sum from bottom to top -> | Desired SAT |
```
Each of the prefix sums can simply be a parallel loop over rows/columns.
Habib Gahbiche
commented
As discussed in the meeting, my concern was using As discussed in the meeting, my concern was using `SingleThreadedOperation` for a multi-threaded execution. I will upload a patch using TBB.
|
||||
|
||||
it.out[0] = sum.x;
|
||||
it.out[1] = sum.y;
|
||||
it.out[2] = sum.z;
|
||||
it.out[3] = sum.w;
|
||||
}
|
||||
float *out = output->get_elem(x, y);
|
||||
|
||||
out[0] += color.x;
|
||||
out[1] += color.y;
|
||||
out[2] += color.z;
|
||||
out[3] += color.w;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
MemoryBuffer *SummedAreaTableOperation::create_memory_buffer(rcti *rect)
|
||||
MemoryBuffer *SummedAreaTableOperation::create_memory_buffer(rcti *area)
|
||||
{
|
||||
MemoryBuffer *result = new MemoryBuffer(DataType::Color, *rect);
|
||||
MemoryBuffer *output = new MemoryBuffer(DataType::Color, *area);
|
||||
PixelSampler sampler = PixelSampler::Nearest;
|
||||
|
||||
/* Track floating point error. See below. */
|
||||
float4 running_compensation = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
/* First pass: copy values from input to output and square values if necessary. */
|
||||
threading::parallel_for(IndexRange(area->ymin, area->ymax), 1, [&](const IndexRange range_y) {
|
||||
threading::parallel_for(IndexRange(area->xmin, area->xmax), 1, [&](const IndexRange range_x) {
|
||||
for (float y = float(*range_y.begin()); y < float(*range_y.end()); y++) {
|
||||
for (float x = float(*range_x.begin()); x < float(*range_x.end()); x++) {
|
||||
|
||||
for (BuffersIterator<float> it = result->iterate_with({}, *rect); !it.is_end(); ++it) {
|
||||
const int x = it.x;
|
||||
const int y = it.y;
|
||||
float4 color;
|
||||
image_reader_->read_sampled(&color.x, x, y, sampler);
|
||||
|
||||
float4 color, upper, left, upper_left;
|
||||
image_reader_->read_sampled(color, x, y, sampler);
|
||||
float *out = output->get_elem(x, y);
|
||||
|
||||
result->read_elem_checked(x, y - 1, &upper.x);
|
||||
result->read_elem_checked(x - 1, y, &left.x);
|
||||
result->read_elem_checked(x - 1, y - 1, &upper_left.x);
|
||||
switch (mode_) {
|
||||
case eMode::Squared: {
|
||||
color *= color;
|
||||
break;
|
||||
}
|
||||
case eMode::Identity: {
|
||||
/* Pass. */
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
BLI_assert_msg(0, "Mode not implemented");
|
||||
break;
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
Use Use `UNPACK2`.
|
||||
}
|
||||
}
|
||||
|
||||
float4 sum = upper + left - upper_left;
|
||||
|
||||
float4 v;
|
||||
switch (mode_) {
|
||||
case eMode::Squared: {
|
||||
v = color * color;
|
||||
break;
|
||||
out[0] = color.x;
|
||||
out[1] = color.y;
|
||||
out[2] = color.z;
|
||||
out[3] = color.w;
|
||||
}
|
||||
}
|
||||
case eMode::Identity: {
|
||||
v = color;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
BLI_assert_msg(0, "Mode not implemented");
|
||||
break;
|
||||
});
|
||||
});
|
||||
|
||||
/* Second pass. */
|
||||
threading::parallel_for(IndexRange(area->ymin, area->ymax), 1, [&](const IndexRange range_y) {
|
||||
for (int64_t y = *range_y.begin(); y < *range_y.end(); y++) {
|
||||
/* Track floating point error. See below. */
|
||||
float4 running_compensation = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
for (int x = area->xmin; x < area->xmax; x++) {
|
||||
|
||||
float4 color;
|
||||
output->read_elem_checked(x - 1, y, &color.x);
|
||||
|
||||
float *out = output->get_elem(x, y);
|
||||
|
||||
out[0] += color.x;
|
||||
out[1] += color.y;
|
||||
out[2] += color.z;
|
||||
out[3] += color.w;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
/* Using Kahan Summation algorithm to compensate for floating point inaccuracies caused by
|
||||
* summing up large number of values.
|
||||
* The idea is to introduce a variable to keep track of the error (here called `running_error`)
|
||||
* and then correct the error in the next iteration. */
|
||||
float4 difference = v - running_compensation;
|
||||
float4 temp = sum + difference;
|
||||
/* `(temp - sum)` cancels the high-order part of `difference`. Subtracting `difference` again
|
||||
* recovers `difference` for the next iteration. */
|
||||
running_compensation = (temp - sum) - difference;
|
||||
sum = temp;
|
||||
/* Third pass: vertical sum. */
|
||||
threading::parallel_for(IndexRange(area->xmin, area->xmax), 1, [&](const IndexRange range_x) {
|
||||
for (int64_t x = *range_x.begin(); x < *range_x.end(); x++) {
|
||||
for (int y = area->ymin; y < area->ymax; y++) {
|
||||
float4 color;
|
||||
output->read_elem_checked(x, y - 1, &color.x);
|
||||
|
||||
it.out[0] = sum.x;
|
||||
it.out[1] = sum.y;
|
||||
it.out[2] = sum.z;
|
||||
it.out[3] = sum.w;
|
||||
}
|
||||
float *out = output->get_elem(x, y);
|
||||
|
||||
return result;
|
||||
out[0] += color.x;
|
||||
out[1] += color.y;
|
||||
out[2] += color.z;
|
||||
out[3] += color.w;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
//MemoryBuffer *SummedAreaTableOperation::create_memory_buffer(rcti *rect)
|
||||
//{
|
||||
// MemoryBuffer *result = new MemoryBuffer(DataType::Color, *rect);
|
||||
// PixelSampler sampler = PixelSampler::Nearest;
|
||||
//
|
||||
// /* Track floating point error. See below. */
|
||||
// float4 running_compensation = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
//
|
||||
// for (BuffersIterator<float> it = result->iterate_with({}, *rect); !it.is_end(); ++it) {
|
||||
// const int x = it.x;
|
||||
// const int y = it.y;
|
||||
//
|
||||
// float4 color, upper, left, upper_left;
|
||||
// image_reader_->read_sampled(color, x, y, sampler);
|
||||
//
|
||||
// result->read_elem_checked(x, y - 1, &upper.x);
|
||||
// result->read_elem_checked(x - 1, y, &left.x);
|
||||
// result->read_elem_checked(x - 1, y - 1, &upper_left.x);
|
||||
//
|
||||
// float4 sum = upper + left - upper_left;
|
||||
//
|
||||
// float4 v;
|
||||
// switch (mode_) {
|
||||
// case eMode::Squared: {
|
||||
// v = color * color;
|
||||
// break;
|
||||
// }
|
||||
// case eMode::Identity: {
|
||||
// v = color;
|
||||
// break;
|
||||
// }
|
||||
// default: {
|
||||
// BLI_assert_msg(0, "Mode not implemented");
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// /* Using Kahan Summation algorithm to compensate for floating point inaccuracies caused by
|
||||
// * summing up large number of values.
|
||||
// * The idea is to introduce a variable to keep track of the error (here called `running_error`)
|
||||
// * and then correct the error in the next iteration. */
|
||||
// float4 difference = v - running_compensation;
|
||||
// float4 temp = sum + difference;
|
||||
// /* `(temp - sum)` cancels the high-order part of `difference`. Subtracting `difference` again
|
||||
// * recovers `difference` for the next iteration. */
|
||||
// running_compensation = (temp - sum) - difference;
|
||||
// sum = temp;
|
||||
//
|
||||
// it.out[0] = sum.x;
|
||||
// it.out[1] = sum.y;
|
||||
// it.out[2] = sum.z;
|
||||
// it.out[3] = sum.w;
|
||||
// }
|
||||
//
|
||||
// return result;
|
||||
//}
|
||||
|
||||
void SummedAreaTableOperation::set_mode(eMode mode)
|
||||
{
|
||||
mode_ = mode;
|
||||
|
|
|
@ -12,7 +12,6 @@ struct SatParams {
|
|||
/* Input parameters. */
|
||||
SummedAreaTableOperation::eMode mode;
|
||||
eExecutionModel execution_model;
|
||||
float offset;
|
||||
rcti area;
|
||||
float4 fill_value;
|
||||
|
||||
|
@ -36,11 +35,8 @@ TEST_P(SummedAreaTableTestP, Values)
|
|||
|
||||
std::shared_ptr<MemoryBuffer> input = std::make_shared<MemoryBuffer>(DataType::Color, area);
|
||||
input->fill(area, ¶ms.fill_value.x);
|
||||
std::shared_ptr<MemoryBuffer> offset = std::make_shared<MemoryBuffer>(
|
||||
DataType::Value, area, true);
|
||||
offset->fill(area, ¶ms.offset);
|
||||
|
||||
sat.update_memory_buffer(&output, area, Span<MemoryBuffer *>{input.get(), offset.get()});
|
||||
sat.update_memory_buffer(&output, area, Span<MemoryBuffer *>{input.get()});
|
||||
|
||||
/* First row. */
|
||||
EXPECT_FLOAT_EQ(output.get_elem(0, 0)[0], params.values[0][0]);
|
||||
|
@ -57,54 +53,45 @@ INSTANTIATE_TEST_SUITE_P(FullFrame5x2_IdentityOnes,
|
|||
SummedAreaTableTestP,
|
||||
testing::Values(SatParams{SummedAreaTableOperation::eMode::Identity,
|
||||
eExecutionModel::FullFrame,
|
||||
0.0f, /* Offset. */
|
||||
rcti{0, 5, 0, 2}, /* Area. */
|
||||
{1.0f, 1.0f, 1.0f, 1.0f}, /* Fill value. */
|
||||
|
||||
/* Expected output. */
|
||||
{{1, 2, 3, 4, 5}, {2, 4, 6, 8, 10}}}));
|
||||
{{1.0f, 2.0f, 3.0f, 4.0f, 5.0f},
|
||||
{2.0f, 4.0f, 6.0f, 8.0f, 10.0f}}
|
||||
|
||||
}));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(FullFrame5x2_SquaredOnes,
|
||||
SummedAreaTableTestP,
|
||||
testing::Values(SatParams{SummedAreaTableOperation::eMode::Squared,
|
||||
eExecutionModel::FullFrame,
|
||||
rcti{0, 5, 0, 2}, /* Area. */
|
||||
{1.0f, 1.0f, 1.0f, 1.0f}, /* Fill value. */
|
||||
|
||||
/* Expect identical to when using Identity SAT, since all inputs are 1. */
|
||||
{{1.0f, 2.0f, 3.0f, 4.0f, 5.0f},
|
||||
{2.0f, 4.0f, 6.0f, 8.0f, 10.0f}}
|
||||
|
||||
}));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(FullFrame3x2_Squared,
|
||||
SummedAreaTableTestP,
|
||||
testing::Values(SatParams{SummedAreaTableOperation::eMode::Identity,
|
||||
testing::Values(SatParams{SummedAreaTableOperation::eMode::Squared,
|
||||
eExecutionModel::FullFrame,
|
||||
0.0f, /* Offset. */
|
||||
rcti{0, 3, 0, 2}, /* Area. */
|
||||
{2.0f, 2.0f, 1.5f, .1f}, /* Fill value. */
|
||||
|
||||
/* Expected output. */
|
||||
{
|
||||
{2, 4, 4.5},
|
||||
{0.2, 8, 12},
|
||||
{4.0f, 8.0f, 6.75f},
|
||||
{0.02f, 16.0f, 24.0f},
|
||||
}}));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(FullFrame3x2_IdentityPositiveOffset,
|
||||
SummedAreaTableTestP,
|
||||
testing::Values(SatParams{
|
||||
SummedAreaTableOperation::eMode::Identity,
|
||||
eExecutionModel::FullFrame,
|
||||
42.0f, /* Offset. */
|
||||
rcti{0, 3, 0, 2}, /* Area. */
|
||||
{2.0f, 1.24f, 5.0f, 1.0f}, /* Fill value. */
|
||||
|
||||
/* Expected output. */
|
||||
{{-40.0f, -81.519997f, -111.0f}, {-82.0f, -160.0f, -244.56001f}}}));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(FullFrame3x2_SquaredNegativeOffset,
|
||||
SummedAreaTableTestP,
|
||||
testing::Values(SatParams{
|
||||
SummedAreaTableOperation::eMode::Identity,
|
||||
eExecutionModel::FullFrame,
|
||||
-0.5f, /* Offset. */
|
||||
rcti{0, 3, 0, 2}, /* Area. */
|
||||
{2.0f, 1.24f, 5.0f, 1.0f}, /* Fill value. */
|
||||
|
||||
/* Expected output. */
|
||||
{{2.5f, 3.48f, 16.5f}, {3.0f, 10.0f, 10.440001f}}}));
|
||||
|
||||
class SummedTableAreaSumTest : public ::testing::Test {
|
||||
class SummedAreaTableSumTest : public ::testing::Test {
|
||||
public:
|
||||
SummedTableAreaSumTest()
|
||||
SummedAreaTableSumTest()
|
||||
{
|
||||
operation_ = std::make_shared<SummedAreaTableOperation>();
|
||||
}
|
||||
|
@ -135,7 +122,7 @@ class SummedTableAreaSumTest : public ::testing::Test {
|
|||
float offset_ = 0.0f;
|
||||
};
|
||||
|
||||
TEST_F(SummedTableAreaSumTest, FullyInside)
|
||||
TEST_F(SummedAreaTableSumTest, FullyInside)
|
||||
{
|
||||
rcti area;
|
||||
area.xmin = 1;
|
||||
|
@ -146,7 +133,7 @@ TEST_F(SummedTableAreaSumTest, FullyInside)
|
|||
ASSERT_EQ(sum[0], 9);
|
||||
zazizizou marked this conversation as resolved
Outdated
Sergey Sharybin
commented
Any specific reason to use Any specific reason to use `ASSERT_EQ ` instead of `EXPECT_EQ `? The ASSERT will stop the test. It is typically used for cases when the rest of the test will be impossible. For example, when you expect function to give you a pointer to an object and you check for it be non-nullptr before looking into its properties.
Habib Gahbiche
commented
No specific reason, but it doesn't make a difference here because there is a single assert per test. I can update it in a later patch for clarity No specific reason, but it doesn't make a difference here because there is a single assert per test. I can update it in a later patch for clarity
|
||||
}
|
||||
|
||||
TEST_F(SummedTableAreaSumTest, LeftEdge)
|
||||
TEST_F(SummedAreaTableSumTest, LeftEdge)
|
||||
{
|
||||
rcti area;
|
||||
area.xmin = 0;
|
||||
|
@ -157,7 +144,7 @@ TEST_F(SummedTableAreaSumTest, LeftEdge)
|
|||
ASSERT_EQ(sum[0], 9);
|
||||
}
|
||||
|
||||
TEST_F(SummedTableAreaSumTest, RightEdge)
|
||||
TEST_F(SummedAreaTableSumTest, RightEdge)
|
||||
{
|
||||
rcti area;
|
||||
area.xmin = area_.xmax - 2;
|
||||
|
@ -168,7 +155,7 @@ TEST_F(SummedTableAreaSumTest, RightEdge)
|
|||
ASSERT_EQ(sum[0], 6);
|
||||
}
|
||||
|
||||
TEST_F(SummedTableAreaSumTest, LowerRightCorner)
|
||||
TEST_F(SummedAreaTableSumTest, LowerRightCorner)
|
||||
{
|
||||
rcti area;
|
||||
area.xmin = area_.xmax - 1;
|
||||
|
@ -179,7 +166,7 @@ TEST_F(SummedTableAreaSumTest, LowerRightCorner)
|
|||
ASSERT_EQ(sum[0], 1);
|
||||
}
|
||||
|
||||
TEST_F(SummedTableAreaSumTest, TopLine)
|
||||
TEST_F(SummedAreaTableSumTest, TopLine)
|
||||
{
|
||||
rcti area;
|
||||
area.xmin = 0;
|
||||
|
@ -190,7 +177,7 @@ TEST_F(SummedTableAreaSumTest, TopLine)
|
|||
ASSERT_EQ(sum[0], 2);
|
||||
}
|
||||
|
||||
TEST_F(SummedTableAreaSumTest, ButtomLine)
|
||||
TEST_F(SummedAreaTableSumTest, ButtomLine)
|
||||
{
|
||||
rcti area;
|
||||
area.xmin = 0;
|
||||
|
|
|
@ -1033,8 +1033,6 @@ typedef struct NodeKuwaharaData {
|
|||
int uniformity;
|
||||
float sharpness;
|
||||
float eccentricity;
|
||||
short fast;
|
||||
char _pad[2];
|
||||
} NodeKuwaharaData;
|
||||
|
||||
typedef struct NodeAntiAliasingData {
|
||||
|
|
|
@ -8630,12 +8630,6 @@ static void def_cmp_kuwahara(StructRNA *srna)
|
|||
"Eccentricity",
|
||||
"Controls how directional the filter is. 0 means the filter is completely omnidirectional "
|
||||
"while 2 means it is maximally directed along the edges of the image");
|
||||
|
||||
prop = RNA_def_property(srna, "fast", PROP_BOOLEAN, PROP_NONE);
|
||||
RNA_def_property_boolean_sdna(prop, nullptr, "fast", 1);
|
||||
RNA_def_property_ui_text(
|
||||
prop, "Fast", "Use faster computation. Might produce artefacts for large images.");
|
||||
|
||||
RNA_def_property_update(prop, NC_NODE | NA_EDITED, "rna_Node_update");
|
||||
}
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
I think this should be called I think this should be called `High Precision`, as it conveys the meaning better to the user.
Habib Gahbiche
commented
Option will be removed. Option will be removed.
|
||||
|
||||
|
|
|
@ -63,9 +63,6 @@ static void node_composit_buts_kuwahara(uiLayout *layout, bContext * /*C*/, Poin
|
|||
uiItemR(col, ptr, "sharpness", UI_ITEM_NONE, nullptr, ICON_NONE);
|
||||
uiItemR(col, ptr, "eccentricity", UI_ITEM_NONE, nullptr, ICON_NONE);
|
||||
}
|
||||
else if (variation == CMP_NODE_KUWAHARA_CLASSIC) {
|
||||
uiItemR(col, ptr, "fast", UI_ITEM_NONE, nullptr, ICON_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
using namespace blender::realtime_compositor;
|
||||
|
|
Add a
kuwahara_classic->set_use_sat(true);
just for clarity.