Speedup classic Kuwahara filter by summed area table #111150
|
@ -345,6 +345,8 @@ if(WITH_COMPOSITOR_CPU)
|
|||
operations/COM_GaussianXBlurOperation.h
|
||||
operations/COM_GaussianYBlurOperation.cc
|
||||
operations/COM_GaussianYBlurOperation.h
|
||||
operations/COM_SummedAreaTableOperation.h
|
||||
operations/COM_SummedAreaTableOperation.cc
|
||||
operations/COM_KuwaharaAnisotropicOperation.cc
|
||||
operations/COM_KuwaharaAnisotropicOperation.h
|
||||
operations/COM_KuwaharaAnisotropicStructureTensorOperation.cc
|
||||
|
@ -665,6 +667,7 @@ if(WITH_COMPOSITOR_CPU)
|
|||
tests/COM_BufferRange_test.cc
|
||||
tests/COM_BuffersIterator_test.cc
|
||||
tests/COM_NodeOperation_test.cc
|
||||
tests/COM_ComputeSummedAreaTableOperation_test.cc
|
||||
)
|
||||
set(TEST_INC
|
||||
)
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "COM_KuwaharaAnisotropicOperation.h"
|
||||
#include "COM_KuwaharaAnisotropicStructureTensorOperation.h"
|
||||
#include "COM_KuwaharaClassicOperation.h"
|
||||
#include "COM_SummedAreaTableOperation.h"
|
||||
|
||||
namespace blender::compositor {
|
||||
|
||||
|
@ -23,12 +24,24 @@ void KuwaharaNode::convert_to_operations(NodeConverter &converter,
|
|||
|
||||
switch (data->variation) {
|
||||
case CMP_NODE_KUWAHARA_CLASSIC: {
|
||||
KuwaharaClassicOperation *operation = new KuwaharaClassicOperation();
|
||||
KuwaharaClassicOperation *kuwahara_classic = new KuwaharaClassicOperation();
|
||||
converter.add_operation(kuwahara_classic);
|
||||
converter.map_input_socket(get_input_socket(0), kuwahara_classic->get_input_socket(0));
|
||||
converter.map_input_socket(get_input_socket(1), kuwahara_classic->get_input_socket(1));
|
||||
|
||||
converter.add_operation(operation);
|
||||
converter.map_input_socket(get_input_socket(0), operation->get_input_socket(0));
|
||||
converter.map_input_socket(get_input_socket(1), operation->get_input_socket(1));
|
||||
converter.map_output_socket(get_output_socket(0), operation->get_output_socket());
|
||||
SummedAreaTableOperation *sat = new SummedAreaTableOperation();
|
||||
sat->set_mode(SummedAreaTableOperation::eMode::Identity);
|
||||
converter.add_operation(sat);
|
||||
converter.map_input_socket(get_input_socket(0), sat->get_input_socket(0));
|
||||
converter.add_link(sat->get_output_socket(0), kuwahara_classic->get_input_socket(2));
|
||||
|
||||
SummedAreaTableOperation *sat_squared = new SummedAreaTableOperation();
|
||||
zazizizou marked this conversation as resolved
Outdated
|
||||
sat_squared->set_mode(SummedAreaTableOperation::eMode::Squared);
|
||||
converter.add_operation(sat_squared);
|
||||
converter.map_input_socket(get_input_socket(0), sat_squared->get_input_socket(0));
|
||||
converter.add_link(sat_squared->get_output_socket(0), kuwahara_classic->get_input_socket(3));
|
||||
|
||||
converter.map_output_socket(get_output_socket(0), kuwahara_classic->get_output_socket(0));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -16,6 +16,8 @@ KuwaharaClassicOperation::KuwaharaClassicOperation()
|
|||
{
|
||||
this->add_input_socket(DataType::Color);
|
||||
this->add_input_socket(DataType::Value);
|
||||
this->add_input_socket(DataType::Color);
|
||||
this->add_input_socket(DataType::Color);
|
||||
this->add_output_socket(DataType::Color);
|
||||
|
||||
this->flags_.is_fullframe_operation = true;
|
||||
|
@ -25,12 +27,16 @@ void KuwaharaClassicOperation::init_execution()
|
|||
{
|
||||
image_reader_ = this->get_input_socket_reader(0);
|
||||
size_reader_ = this->get_input_socket_reader(1);
|
||||
sat_reader_ = this->get_input_socket_reader(2);
|
||||
sat_squared_reader_ = this->get_input_socket_reader(3);
|
||||
}
|
||||
|
||||
void KuwaharaClassicOperation::deinit_execution()
|
||||
{
|
||||
image_reader_ = nullptr;
|
||||
size_reader_ = nullptr;
|
||||
sat_reader_ = nullptr;
|
||||
sat_squared_reader_ = nullptr;
|
||||
}
|
||||
|
||||
void KuwaharaClassicOperation::execute_pixel_sampled(float output[4],
|
||||
|
@ -46,13 +52,44 @@ void KuwaharaClassicOperation::execute_pixel_sampled(float output[4],
|
|||
size_reader_->read_sampled(size, x, y, sampler);
|
||||
const int kernel_size = int(math::max(0.0f, size[0]));
|
||||
|
||||
/* Split surroundings of pixel into 4 overlapping regions. */
|
||||
for (int dy = -kernel_size; dy <= kernel_size; dy++) {
|
||||
for (int dx = -kernel_size; dx <= kernel_size; dx++) {
|
||||
/* Naive implementation is more accurate for small kernel sizes. */
|
||||
if (kernel_size >= 4) {
|
||||
for (int q = 0; q < 4; q++) {
|
||||
/* A fancy expression to compute the sign of the quadrant q. */
|
||||
int2 sign = int2((q % 2) * 2 - 1, ((q / 2) * 2 - 1));
|
||||
|
||||
int xx = x + dx;
|
||||
int yy = y + dy;
|
||||
if (xx >= 0 && yy >= 0 && xx < this->get_width() && yy < this->get_height()) {
|
||||
int2 lower_bound = int2(x, y) -
|
||||
int2(sign.x > 0 ? 0 : kernel_size, sign.y > 0 ? 0 : kernel_size);
|
||||
int2 upper_bound = int2(x, y) +
|
||||
int2(sign.x < 0 ? 0 : kernel_size, sign.y < 0 ? 0 : kernel_size);
|
||||
|
||||
/* Limit the quadrants to the image bounds. */
|
||||
int2 image_bound = int2(this->get_width(), this->get_height()) - int2(1);
|
||||
int2 corrected_lower_bound = math::min(image_bound, math::max(int2(0, 0), lower_bound));
|
||||
int2 corrected_upper_bound = math::min(image_bound, math::max(int2(0, 0), upper_bound));
|
||||
int2 region_size = corrected_upper_bound - corrected_lower_bound + int2(1, 1);
|
||||
quadrant_pixel_count[q] = region_size.x * region_size.y;
|
||||
|
||||
rcti kernel_area;
|
||||
kernel_area.xmin = corrected_lower_bound[0];
|
||||
kernel_area.ymin = corrected_lower_bound[1];
|
||||
kernel_area.xmax = corrected_upper_bound[0];
|
||||
kernel_area.ymax = corrected_upper_bound[1];
|
||||
|
||||
mean_of_color[q] = summed_area_table_sum_tiled(sat_reader_, kernel_area);
|
||||
mean_of_squared_color[q] = summed_area_table_sum_tiled(sat_squared_reader_, kernel_area);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Split surroundings of pixel into 4 overlapping regions. */
|
||||
for (int dy = -kernel_size; dy <= kernel_size; dy++) {
|
||||
for (int dx = -kernel_size; dx <= kernel_size; dx++) {
|
||||
|
||||
int xx = x + dx;
|
||||
int yy = y + dy;
|
||||
if (xx < 0 || yy < 0 || xx >= this->get_width() || yy >= this->get_height()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
float4 color;
|
||||
image_reader_->read_sampled(color, xx, yy, sampler);
|
||||
|
@ -115,24 +152,60 @@ void KuwaharaClassicOperation::update_memory_buffer_partial(MemoryBuffer *output
|
|||
{
|
||||
MemoryBuffer *image = inputs[0];
|
||||
MemoryBuffer *size_image = inputs[1];
|
||||
MemoryBuffer *sat = inputs[2];
|
||||
MemoryBuffer *sat_squared = inputs[3];
|
||||
|
||||
int width = image->get_width();
|
||||
int height = image->get_height();
|
||||
|
||||
for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
|
||||
const int x = it.x;
|
||||
const int y = it.y;
|
||||
|
||||
float4 mean_of_color[] = {float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
|
||||
float4 mean_of_squared_color[] = {float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
|
||||
int quadrant_pixel_count[] = {0, 0, 0, 0};
|
||||
float4 mean_of_color[4] = {float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
|
||||
float4 mean_of_squared_color[4] = {float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f)};
|
||||
int quadrant_pixel_count[4] = {0, 0, 0, 0};
|
||||
|
||||
const int kernel_size = int(math::max(0.0f, *size_image->get_elem(x, y)));
|
||||
|
||||
/* Split surroundings of pixel into 4 overlapping regions. */
|
||||
for (int dy = -kernel_size; dy <= kernel_size; dy++) {
|
||||
for (int dx = -kernel_size; dx <= kernel_size; dx++) {
|
||||
/* Naive implementation is more accurate for small kernel sizes. */
|
||||
if (kernel_size >= 4) {
|
||||
for (int q = 0; q < 4; q++) {
|
||||
/* A fancy expression to compute the sign of the quadrant q. */
|
||||
int2 sign = int2((q % 2) * 2 - 1, ((q / 2) * 2 - 1));
|
||||
|
||||
int xx = x + dx;
|
||||
int yy = y + dy;
|
||||
if (xx >= 0 && yy >= 0 && xx < image->get_width() && yy < image->get_height()) {
|
||||
int2 lower_bound = int2(x, y) -
|
||||
int2(sign.x > 0 ? 0 : kernel_size, sign.y > 0 ? 0 : kernel_size);
|
||||
int2 upper_bound = int2(x, y) +
|
||||
int2(sign.x < 0 ? 0 : kernel_size, sign.y < 0 ? 0 : kernel_size);
|
||||
|
||||
/* Limit the quadrants to the image bounds. */
|
||||
int2 image_bound = int2(width, height) - int2(1);
|
||||
int2 corrected_lower_bound = math::min(image_bound, math::max(int2(0, 0), lower_bound));
|
||||
int2 corrected_upper_bound = math::min(image_bound, math::max(int2(0, 0), upper_bound));
|
||||
int2 region_size = corrected_upper_bound - corrected_lower_bound + int2(1, 1);
|
||||
quadrant_pixel_count[q] = region_size.x * region_size.y;
|
||||
|
||||
rcti kernel_area;
|
||||
kernel_area.xmin = corrected_lower_bound[0];
|
||||
kernel_area.ymin = corrected_lower_bound[1];
|
||||
kernel_area.xmax = corrected_upper_bound[0];
|
||||
kernel_area.ymax = corrected_upper_bound[1];
|
||||
|
||||
mean_of_color[q] = summed_area_table_sum(sat, kernel_area);
|
||||
mean_of_squared_color[q] = summed_area_table_sum(sat_squared, kernel_area);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Split surroundings of pixel into 4 overlapping regions. */
|
||||
for (int dy = -kernel_size; dy <= kernel_size; dy++) {
|
||||
for (int dx = -kernel_size; dx <= kernel_size; dx++) {
|
||||
|
||||
int xx = x + dx;
|
||||
int yy = y + dy;
|
||||
if (xx < 0 || yy < 0 || xx >= image->get_width() || yy >= image->get_height()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
float4 color;
|
||||
image->read_elem(xx, yy, &color.x);
|
||||
|
|
|
@ -11,6 +11,8 @@ namespace blender::compositor {
|
|||
class KuwaharaClassicOperation : public MultiThreadedOperation {
|
||||
SocketReader *image_reader_;
|
||||
SocketReader *size_reader_;
|
||||
SocketReader *sat_reader_;
|
||||
SocketReader *sat_squared_reader_;
|
||||
|
||||
public:
|
||||
KuwaharaClassicOperation();
|
||||
|
|
|
@ -0,0 +1,217 @@
|
|||
/* SPDX-FileCopyrightText: 2023 Blender Foundation
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#include "BLI_math_vector.hh"
|
||||
#include "BLI_math_vector_types.hh"
|
||||
#include "BLI_task.hh"
|
||||
|
||||
#include "COM_SummedAreaTableOperation.h"
|
||||
|
||||
namespace blender::compositor {
|
||||
|
||||
SummedAreaTableOperation::SummedAreaTableOperation()
|
||||
{
|
||||
this->add_input_socket(DataType::Color);
|
||||
this->add_output_socket(DataType::Color);
|
||||
OmarEmaraDev marked this conversation as resolved
Outdated
Omar Emara
commented
What is this Value input? What is this Value input?
Habib Gahbiche
commented
This was needed to subtract the mean from image. Not needed anymore, will remove. This was needed to subtract the mean from image. Not needed anymore, will remove.
|
||||
|
||||
mode_ = eMode::Identity;
|
||||
|
||||
this->flags_.is_fullframe_operation = true;
|
||||
}
|
||||
|
||||
void SummedAreaTableOperation::init_execution()
|
||||
{
|
||||
SingleThreadedOperation::init_execution();
|
||||
image_reader_ = this->get_input_socket_reader(0);
|
||||
}
|
||||
|
||||
void SummedAreaTableOperation::deinit_execution()
|
||||
{
|
||||
image_reader_ = nullptr;
|
||||
SingleThreadedOperation::deinit_execution();
|
||||
}
|
||||
|
||||
bool SummedAreaTableOperation::determine_depending_area_of_interest(
|
||||
rcti * /*input*/, ReadBufferOperation *read_operation, rcti *output)
|
||||
{
|
||||
rcti image_input;
|
||||
|
||||
NodeOperation *operation = get_input_operation(0);
|
||||
image_input.xmax = operation->get_width();
|
||||
image_input.xmin = 0;
|
||||
image_input.ymax = operation->get_height();
|
||||
image_input.ymin = 0;
|
||||
if (operation->determine_depending_area_of_interest(&image_input, read_operation, output)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void SummedAreaTableOperation::get_area_of_interest(int input_idx,
|
||||
const rcti & /*output_area*/,
|
||||
rcti &r_input_area)
|
||||
{
|
||||
r_input_area = get_input_operation(input_idx)->get_canvas();
|
||||
}
|
||||
|
||||
void SummedAreaTableOperation::update_memory_buffer(MemoryBuffer *output,
|
||||
const rcti &area,
|
||||
Span<MemoryBuffer *> inputs)
|
||||
{
|
||||
/* Note: although this is a single threaded call, multithreading is used. */
|
||||
MemoryBuffer *image = inputs[0];
|
||||
|
||||
/* First pass: copy input to output and sum horizontally. */
|
||||
threading::parallel_for(IndexRange(area.ymin, area.ymax), 1, [&](const IndexRange range_y) {
|
||||
for (const int y : range_y) {
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
It is sufficient to have a single parallel loop over rows and a serial loop over columns, too much parallelism will hurt performance. This copy loop can be fused with the horizontal pass. It is sufficient to have a single parallel loop over rows and a serial loop over columns, too much parallelism will hurt performance.
This copy loop can be fused with the horizontal pass.
|
||||
float4 accumulated_color = float4(0.0f);
|
||||
for (const int x : IndexRange(area.xmin, area.xmax)) {
|
||||
const float4 color = float4(image->get_elem(x, y));
|
||||
accumulated_color += mode_ == eMode::Squared ? color * color : color;
|
||||
copy_v4_v4(output->get_elem(x, y), accumulated_color);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
/* Second pass: vertical sum. */
|
||||
threading::parallel_for(IndexRange(area.xmin, area.xmax), 1, [&](const IndexRange range_x) {
|
||||
for (const int x : range_x) {
|
||||
float4 accumulated_color = float4(0.0f);
|
||||
for (const int y : IndexRange(area.ymin, area.ymax)) {
|
||||
const float4 color = float4(output->get_elem(x, y));
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
Use a temporary Use a temporary `accumulated_color` variable and avoid reading the buffer again just like the above loop. Then, use `get_elem` instead of `read_elem_checked`.
|
||||
accumulated_color += color;
|
||||
copy_v4_v4(output->get_elem(x, y), accumulated_color);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
MemoryBuffer *SummedAreaTableOperation::create_memory_buffer(rcti *area)
|
||||
{
|
||||
/* Note: although this is a single threaded call, multithreading is used. */
|
||||
MemoryBuffer *output = new MemoryBuffer(DataType::Color, *area);
|
||||
|
||||
/* First pass: copy input to output and sum horizontally. */
|
||||
threading::parallel_for(IndexRange(area->ymin, area->ymax), 1, [&](const IndexRange range_y) {
|
||||
for (const int y : range_y) {
|
||||
float4 accumulated_color = float4(0.0f);
|
||||
for (const int x : IndexRange(area->xmin, area->xmax)) {
|
||||
float4 color;
|
||||
image_reader_->read(&color.x, x, y, nullptr);
|
||||
accumulated_color += mode_ == eMode::Squared ? color * color : color;
|
||||
copy_v4_v4(output->get_elem(x, y), accumulated_color);
|
||||
}
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
Use Use `read` instead of `read_sampled`. Same applies for all `read_sampled` calls below.
|
||||
}
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
This can be more compact.
This can be more compact.
- Use a for each loop on ranges. `for (const int y : sub_y_range) {`
- Accumulate a color instead of reading the previous output.
- Use the `get_elem` function.
- Use a copy function.
```cpp
threading::parallel_for(IndexRange(area.ymin, area.ymax), 1, [&](const IndexRange sub_y_range) {
for (const int y : sub_y_range) {
float4 accumulated_color = float4(0.0f);
for (const int x : IndexRange(area.xmin, area.xmax)) {
const float4 color = float4(image->get_elem(x, y));
accumulated_color += color * color;
copy_v4_v4(output->get_elem(x, y), accumulated_color);
}
}
});
```
Habib Gahbiche
commented
Will do, thanks for the tip :) Will do, thanks for the tip :)
|
||||
});
|
||||
|
||||
/* Second pass: vertical sum. */
|
||||
threading::parallel_for(IndexRange(area->xmin, area->xmax), 1, [&](const IndexRange range_x) {
|
||||
for (const int x : range_x) {
|
||||
float4 accumulated_color = float4(0.0f);
|
||||
for (const int y : IndexRange(area->ymin, area->ymax)) {
|
||||
|
||||
accumulated_color += float4(output->get_elem(x, y));
|
||||
copy_v4_v4(output->get_elem(x, y), accumulated_color);
|
||||
}
|
||||
zazizizou marked this conversation as resolved
Omar Emara
commented
Same as above. Same as above.
|
||||
}
|
||||
});
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
void SummedAreaTableOperation::set_mode(eMode mode)
|
||||
{
|
||||
mode_ = mode;
|
||||
}
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
I think we should attempt to multithread the SAT computation. Not sure if there is anything stopping us from doing that, but a two pass prefix sum should be easy to implement and efficient to parallelize on the CPU.
Each of the prefix sums can simply be a parallel loop over rows/columns. I think we should attempt to multithread the SAT computation. Not sure if there is anything stopping us from doing that, but a two pass prefix sum should be easy to implement and efficient to parallelize on the CPU.
```
| Image | -> Prefix sum from left to right -> | Horizontal Pass Result | -> Prefix sum from bottom to top -> | Desired SAT |
```
Each of the prefix sums can simply be a parallel loop over rows/columns.
Habib Gahbiche
commented
As discussed in the meeting, my concern was using As discussed in the meeting, my concern was using `SingleThreadedOperation` for a multi-threaded execution. I will upload a patch using TBB.
|
||||
|
||||
SummedAreaTableOperation::eMode SummedAreaTableOperation::get_mode()
|
||||
{
|
||||
return mode_;
|
||||
}
|
||||
|
||||
float4 summed_area_table_sum_tiled(SocketReader *buffer, const rcti &area)
|
||||
{
|
||||
/*
|
||||
* a, b, c and d are the bounding box of the given area. They are defined as follows:
|
||||
*
|
||||
* y
|
||||
* ▲
|
||||
* │
|
||||
* ├──────x───────x
|
||||
* │ │c d│
|
||||
* ├──────x───────x
|
||||
* │ │a b│
|
||||
* └──────┴───────┴──────► x
|
||||
*
|
||||
* Note: this is the same definition as in https://en.wikipedia.org/wiki/Summed-area_table
|
||||
* but using the blender convention with the origin being at the lower left.
|
||||
*/
|
||||
|
||||
BLI_assert(area.xmin <= area.xmax && area.ymin <= area.ymax);
|
||||
|
||||
int2 lower_bound(area.xmin, area.ymin);
|
||||
int2 upper_bound(area.xmax, area.ymax);
|
||||
|
||||
int2 corrected_lower_bound = lower_bound - int2(1, 1);
|
||||
int2 corrected_upper_bound;
|
||||
corrected_upper_bound[0] = math::min((int)buffer->get_width() - 1, upper_bound[0]);
|
||||
corrected_upper_bound[1] = math::min((int)buffer->get_height() - 1, upper_bound[1]);
|
||||
|
||||
float4 a, b, c, d, addend, substrahend;
|
||||
buffer->read_sampled(&a.x, corrected_upper_bound[0], corrected_upper_bound[1], PixelSampler::Nearest);
|
||||
buffer->read_sampled(&d.x, corrected_lower_bound[0], corrected_lower_bound[1], PixelSampler::Nearest);
|
||||
addend = a + d;
|
||||
|
||||
buffer->read_sampled(&b.x, corrected_lower_bound[0], corrected_upper_bound[1], PixelSampler::Nearest);
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
Use Use `UNPACK2`.
|
||||
buffer->read_sampled(&c.x, corrected_upper_bound[0], corrected_lower_bound[1], PixelSampler::Nearest);
|
||||
substrahend = b + c;
|
||||
|
||||
float4 sum = addend - substrahend;
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
float4 summed_area_table_sum(MemoryBuffer *buffer, const rcti &area)
|
||||
{
|
||||
/*
|
||||
* a, b, c and d are the bounding box of the given area. They are defined as follows:
|
||||
*
|
||||
* y
|
||||
* ▲
|
||||
* │
|
||||
* ├──────x───────x
|
||||
* │ │c d│
|
||||
* ├──────x───────x
|
||||
* │ │a b│
|
||||
* └──────┴───────┴──────► x
|
||||
*
|
||||
* Note: this is the same definition as in https://en.wikipedia.org/wiki/Summed-area_table
|
||||
* but using the blender convention with the origin being at the lower left.
|
||||
*/
|
||||
|
||||
BLI_assert(area.xmin <= area.xmax && area.ymin <= area.ymax);
|
||||
|
||||
int2 lower_bound(area.xmin, area.ymin);
|
||||
int2 upper_bound(area.xmax, area.ymax);
|
||||
|
||||
int2 corrected_lower_bound = lower_bound - int2(1, 1);
|
||||
int2 corrected_upper_bound;
|
||||
corrected_upper_bound[0] = math::min(buffer->get_width() - 1, upper_bound[0]);
|
||||
corrected_upper_bound[1] = math::min(buffer->get_height() - 1, upper_bound[1]);
|
||||
|
||||
float4 a, b, c, d, addend, substrahend;
|
||||
buffer->read_elem_checked(corrected_upper_bound[0], corrected_upper_bound[1], a);
|
||||
buffer->read_elem_checked(corrected_lower_bound[0], corrected_lower_bound[1], d);
|
||||
addend = a + d;
|
||||
|
||||
buffer->read_elem_checked(corrected_lower_bound[0], corrected_upper_bound[1], b);
|
||||
buffer->read_elem_checked(corrected_upper_bound[0], corrected_lower_bound[1], c);
|
||||
substrahend = b + c;
|
||||
|
||||
float4 sum = addend - substrahend;
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
} // namespace blender::compositor
|
|
@ -0,0 +1,56 @@
|
|||
/* SPDX-FileCopyrightText: 2023 Blender Foundation
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "COM_SingleThreadedOperation.h"
|
||||
|
||||
namespace blender::compositor {
|
||||
|
||||
/**
|
||||
* \brief SummedAreaTableOperation class computes the summed area table.
|
||||
zazizizou marked this conversation as resolved
Outdated
Omar Emara
commented
Update comment. Update comment.
|
||||
*/
|
||||
class SummedAreaTableOperation : public SingleThreadedOperation {
|
||||
|
||||
public:
|
||||
SummedAreaTableOperation();
|
||||
|
||||
enum eMode { Identity = 1, Squared };
|
||||
|
||||
void set_mode(const eMode mode);
|
||||
eMode get_mode();
|
||||
|
||||
/**
|
||||
* Initialize the execution
|
||||
*/
|
||||
void init_execution() override;
|
||||
|
||||
/**
|
||||
* Deinitialize the execution
|
||||
*/
|
||||
void deinit_execution() override;
|
||||
|
||||
bool determine_depending_area_of_interest(rcti *input,
|
||||
ReadBufferOperation *read_operation,
|
||||
rcti *output) override;
|
||||
|
||||
void get_area_of_interest(int input_idx, const rcti &output_area, rcti &r_input_area) override;
|
||||
|
||||
MemoryBuffer *create_memory_buffer(rcti *rect) override;
|
||||
|
||||
void update_memory_buffer(MemoryBuffer *output,
|
||||
const rcti &area,
|
||||
Span<MemoryBuffer *> inputs) override;
|
||||
|
||||
private:
|
||||
SocketReader *image_reader_;
|
||||
eMode mode_;
|
||||
};
|
||||
|
||||
/* Computes the sum of the rectangular region defined by the given area from the
|
||||
* given summed area table. All coordinates within the area are included. */
|
||||
float4 summed_area_table_sum(MemoryBuffer *buffer, const rcti &area);
|
||||
float4 summed_area_table_sum_tiled(SocketReader *buffer, const rcti &area);
|
||||
|
||||
} // namespace blender::compositor
|
|
@ -0,0 +1,191 @@
|
|||
/* SPDX-FileCopyrightText: 2023 Blender Foundation
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#include "testing/testing.h"
|
||||
|
||||
#include "COM_SummedAreaTableOperation.h"
|
||||
|
||||
namespace blender::compositor::tests {
|
||||
|
||||
struct SatParams {
|
||||
/* Input parameters. */
|
||||
SummedAreaTableOperation::eMode mode;
|
||||
eExecutionModel execution_model;
|
||||
rcti area;
|
||||
float4 fill_value;
|
||||
|
||||
/* Expected output values. */
|
||||
std::vector<std::vector<float>> values;
|
||||
};
|
||||
|
||||
class SummedAreaTableTestP : public testing::TestWithParam<SatParams> {
|
||||
};
|
||||
|
||||
TEST_P(SummedAreaTableTestP, Values)
|
||||
{
|
||||
SatParams params = GetParam();
|
||||
|
||||
SummedAreaTableOperation sat = SummedAreaTableOperation();
|
||||
|
||||
sat.set_execution_model(params.execution_model);
|
||||
sat.set_mode(params.mode);
|
||||
const rcti area = params.area;
|
||||
MemoryBuffer output(DataType::Color, area);
|
||||
|
||||
std::shared_ptr<MemoryBuffer> input = std::make_shared<MemoryBuffer>(DataType::Color, area);
|
||||
input->fill(area, ¶ms.fill_value.x);
|
||||
|
||||
sat.update_memory_buffer(&output, area, Span<MemoryBuffer *>{input.get()});
|
||||
|
||||
/* First row. */
|
||||
EXPECT_FLOAT_EQ(output.get_elem(0, 0)[0], params.values[0][0]);
|
||||
EXPECT_FLOAT_EQ(output.get_elem(1, 0)[1], params.values[0][1]);
|
||||
EXPECT_FLOAT_EQ(output.get_elem(2, 0)[2], params.values[0][2]);
|
||||
|
||||
/* Second row. */
|
||||
EXPECT_FLOAT_EQ(output.get_elem(0, 1)[3], params.values[1][0]);
|
||||
EXPECT_FLOAT_EQ(output.get_elem(1, 1)[0], params.values[1][1]);
|
||||
EXPECT_FLOAT_EQ(output.get_elem(2, 1)[1], params.values[1][2]);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(FullFrame5x2_IdentityOnes,
|
||||
SummedAreaTableTestP,
|
||||
testing::Values(SatParams{
|
||||
SummedAreaTableOperation::eMode::Identity,
|
||||
eExecutionModel::FullFrame,
|
||||
rcti{0, 5, 0, 2}, /* Area. */
|
||||
{1.0f, 1.0f, 1.0f, 1.0f}, /* Fill value. */
|
||||
|
||||
/* Expected output. */
|
||||
{{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, {2.0f, 4.0f, 6.0f, 8.0f, 10.0f}}
|
||||
|
||||
}));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
FullFrame5x2_SquaredOnes,
|
||||
SummedAreaTableTestP,
|
||||
testing::Values(SatParams{
|
||||
SummedAreaTableOperation::eMode::Squared,
|
||||
eExecutionModel::FullFrame,
|
||||
rcti{0, 5, 0, 2}, /* Area. */
|
||||
{1.0f, 1.0f, 1.0f, 1.0f}, /* Fill value. */
|
||||
|
||||
/* Expect identical to when using Identity SAT, since all inputs are 1. */
|
||||
{{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, {2.0f, 4.0f, 6.0f, 8.0f, 10.0f}}
|
||||
|
||||
}));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(FullFrame3x2_Squared,
|
||||
SummedAreaTableTestP,
|
||||
testing::Values(SatParams{SummedAreaTableOperation::eMode::Squared,
|
||||
eExecutionModel::FullFrame,
|
||||
rcti{0, 3, 0, 2}, /* Area. */
|
||||
{2.0f, 2.0f, 1.5f, .1f}, /* Fill value. */
|
||||
|
||||
/* Expected output. */
|
||||
{
|
||||
{4.0f, 8.0f, 6.75f},
|
||||
{0.02f, 16.0f, 24.0f},
|
||||
}}));
|
||||
|
||||
class SummedAreaTableSumTest : public ::testing::Test {
|
||||
public:
|
||||
SummedAreaTableSumTest()
|
||||
{
|
||||
operation_ = std::make_shared<SummedAreaTableOperation>();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override
|
||||
{
|
||||
operation_->set_execution_model(eExecutionModel::FullFrame);
|
||||
operation_->set_mode(SummedAreaTableOperation::eMode::Squared);
|
||||
|
||||
area_ = rcti{0, 5, 0, 4};
|
||||
sat_ = std::make_shared<MemoryBuffer>(DataType::Color, area_);
|
||||
|
||||
const float val[4] = {1.0f, 2.0f, 1.5f, 0.1f};
|
||||
std::shared_ptr<MemoryBuffer> input = std::make_shared<MemoryBuffer>(DataType::Color, area_);
|
||||
input->fill(area_, val);
|
||||
std::shared_ptr<MemoryBuffer> offset = std::make_shared<MemoryBuffer>(
|
||||
DataType::Value, area_, true);
|
||||
offset->fill(area_, &offset_);
|
||||
|
||||
operation_->update_memory_buffer(
|
||||
sat_.get(), area_, Span<MemoryBuffer *>{input.get(), offset.get()});
|
||||
}
|
||||
|
||||
std::shared_ptr<SummedAreaTableOperation> operation_;
|
||||
std::shared_ptr<MemoryBuffer> sat_;
|
||||
rcti area_;
|
||||
float offset_ = 0.0f;
|
||||
};
|
||||
|
||||
TEST_F(SummedAreaTableSumTest, FullyInside)
|
||||
{
|
||||
rcti area;
|
||||
area.xmin = 1;
|
||||
area.xmax = 3;
|
||||
area.ymin = 1;
|
||||
area.ymax = 3;
|
||||
float4 sum = summed_area_table_sum(sat_.get(), area);
|
||||
EXPECT_EQ(sum[0], 9);
|
||||
zazizizou marked this conversation as resolved
Outdated
Sergey Sharybin
commented
Any specific reason to use Any specific reason to use `ASSERT_EQ ` instead of `EXPECT_EQ `? The ASSERT will stop the test. It is typically used for cases when the rest of the test will be impossible. For example, when you expect function to give you a pointer to an object and you check for it be non-nullptr before looking into its properties.
Habib Gahbiche
commented
No specific reason, but it doesn't make a difference here because there is a single assert per test. I can update it in a later patch for clarity No specific reason, but it doesn't make a difference here because there is a single assert per test. I can update it in a later patch for clarity
|
||||
}
|
||||
|
||||
TEST_F(SummedAreaTableSumTest, LeftEdge)
|
||||
{
|
||||
rcti area;
|
||||
area.xmin = 0;
|
||||
area.xmax = 2;
|
||||
area.ymin = 0;
|
||||
area.ymax = 2;
|
||||
float4 sum = summed_area_table_sum(sat_.get(), area);
|
||||
EXPECT_EQ(sum[0], 9);
|
||||
}
|
||||
|
||||
TEST_F(SummedAreaTableSumTest, RightEdge)
|
||||
{
|
||||
rcti area;
|
||||
area.xmin = area_.xmax - 2;
|
||||
area.xmax = area_.xmax;
|
||||
area.ymin = 0;
|
||||
area.ymax = 2;
|
||||
float4 sum = summed_area_table_sum(sat_.get(), area);
|
||||
EXPECT_EQ(sum[0], 6);
|
||||
}
|
||||
|
||||
TEST_F(SummedAreaTableSumTest, LowerRightCorner)
|
||||
{
|
||||
rcti area;
|
||||
area.xmin = area_.xmax - 1;
|
||||
area.xmax = area_.xmax;
|
||||
area.ymin = area_.ymax - 1;
|
||||
area.ymax = area_.ymax;
|
||||
float4 sum = summed_area_table_sum(sat_.get(), area);
|
||||
EXPECT_EQ(sum[0], 1);
|
||||
}
|
||||
|
||||
TEST_F(SummedAreaTableSumTest, TopLine)
|
||||
{
|
||||
rcti area;
|
||||
area.xmin = 0;
|
||||
area.xmax = 1;
|
||||
area.ymin = 0;
|
||||
area.ymax = 0;
|
||||
float4 sum = summed_area_table_sum(sat_.get(), area);
|
||||
EXPECT_EQ(sum[0], 2);
|
||||
}
|
||||
|
||||
TEST_F(SummedAreaTableSumTest, ButtomLine)
|
||||
{
|
||||
rcti area;
|
||||
area.xmin = 0;
|
||||
area.xmax = 4;
|
||||
area.ymin = 3;
|
||||
area.ymax = 3;
|
||||
float4 sum = summed_area_table_sum(sat_.get(), area);
|
||||
EXPECT_EQ(sum[0], 5);
|
||||
}
|
||||
|
||||
} // namespace blender::compositor::tests
|
Add a
kuwahara_classic->set_use_sat(true);
just for clarity.