Realtime Compositor: Improve classic Kuwahara precision #114191
|
@ -178,7 +178,6 @@ set(GLSL_SRC
|
|||
shaders/compositor_smaa_edge_detection.glsl
|
||||
shaders/compositor_smaa_neighborhood_blending.glsl
|
||||
shaders/compositor_split_viewer.glsl
|
||||
shaders/compositor_summed_area_table_compute_complete_blocks.glsl
|
||||
shaders/compositor_summed_area_table_compute_complete_x_prologues.glsl
|
||||
shaders/compositor_summed_area_table_compute_complete_y_prologues.glsl
|
||||
shaders/compositor_summed_area_table_compute_incomplete_prologues.glsl
|
||||
|
|
|
@ -23,7 +23,9 @@ enum class SummedAreaTableOperation : uint8_t {
|
|||
* a box filter. */
|
||||
void summed_area_table(Context &context,
|
||||
Result &input,
|
||||
Result &output,
|
||||
Result &blocks,
|
||||
Result &x_prologues,
|
||||
Result &y_prologues,
|
||||
SummedAreaTableOperation operation = SummedAreaTableOperation::Identity);
|
||||
|
||||
} // namespace blender::realtime_compositor
|
||||
|
|
|
@ -54,6 +54,7 @@ static const char *get_compute_incomplete_prologues_shader(SummedAreaTableOperat
|
|||
static void compute_incomplete_prologues(Context &context,
|
||||
Result &input,
|
||||
SummedAreaTableOperation operation,
|
||||
Result &blocks,
|
||||
Result &incomplete_x_prologues,
|
||||
Result &incomplete_y_prologues)
|
||||
{
|
||||
|
@ -67,6 +68,9 @@ static void compute_incomplete_prologues(Context &context,
|
|||
const int2 input_size = input.domain().size;
|
||||
const int2 number_of_groups = math::divide_ceil(input_size, group_size);
|
||||
|
||||
blocks.allocate_texture(input.domain());
|
||||
blocks.bind_as_image(shader, "blocks_img");
|
||||
|
||||
incomplete_x_prologues.allocate_texture(Domain(int2(input_size.y, number_of_groups.x)));
|
||||
incomplete_x_prologues.bind_as_image(shader, "incomplete_x_prologues_img");
|
||||
|
||||
|
@ -151,57 +155,11 @@ static void compute_complete_y_prologues(Context &context,
|
|||
complete_y_prologues.unbind_as_image();
|
||||
}
|
||||
|
||||
static const char *get_compute_complete_blocks_shader(SummedAreaTableOperation operation)
|
||||
{
|
||||
switch (operation) {
|
||||
case SummedAreaTableOperation::Identity:
|
||||
return "compositor_summed_area_table_compute_complete_blocks_identity";
|
||||
case SummedAreaTableOperation::Square:
|
||||
return "compositor_summed_area_table_compute_complete_blocks_square";
|
||||
}
|
||||
|
||||
BLI_assert_unreachable();
|
||||
return "";
|
||||
}
|
||||
|
||||
/* Computes the final summed area table blocks from the complete X and Y prologues using equation
|
||||
* (41) to implement the fourth pass of Algorithm SAT. That equation simply uses an intermediate
|
||||
* shared memory to cascade the accumulation of rows and then column in each block using the
|
||||
* prologues as initial values and writes each step of the latter accumulation to the output. */
|
||||
static void compute_complete_blocks(Context &context,
|
||||
Result &input,
|
||||
Result &complete_x_prologues,
|
||||
Result &complete_y_prologues,
|
||||
SummedAreaTableOperation operation,
|
||||
Result &output)
|
||||
{
|
||||
GPUShader *shader = context.get_shader(get_compute_complete_blocks_shader(operation),
|
||||
ResultPrecision::Full);
|
||||
GPU_shader_bind(shader);
|
||||
|
||||
input.bind_as_texture(shader, "input_tx");
|
||||
complete_x_prologues.bind_as_texture(shader, "complete_x_prologues_tx");
|
||||
complete_y_prologues.bind_as_texture(shader, "complete_y_prologues_tx");
|
||||
|
||||
output.allocate_texture(input.domain());
|
||||
output.bind_as_image(shader, "output_img", true);
|
||||
|
||||
const int2 group_size = int2(16);
|
||||
const int2 input_size = input.domain().size;
|
||||
const int2 number_of_groups = math::divide_ceil(input_size, group_size);
|
||||
|
||||
GPU_compute_dispatch(shader, number_of_groups.x, number_of_groups.y, 1);
|
||||
|
||||
GPU_shader_unbind();
|
||||
input.unbind_as_texture();
|
||||
complete_x_prologues.unbind_as_texture();
|
||||
complete_y_prologues.unbind_as_texture();
|
||||
output.unbind_as_image();
|
||||
}
|
||||
|
||||
void summed_area_table(Context &context,
|
||||
Result &input,
|
||||
Result &output,
|
||||
Result &blocks,
|
||||
Result &x_prologues,
|
||||
Result &y_prologues,
|
||||
SummedAreaTableOperation operation)
|
||||
{
|
||||
Result incomplete_x_prologues = context.create_temporary_result(ResultType::Color,
|
||||
|
@ -209,27 +167,18 @@ void summed_area_table(Context &context,
|
|||
Result incomplete_y_prologues = context.create_temporary_result(ResultType::Color,
|
||||
ResultPrecision::Full);
|
||||
compute_incomplete_prologues(
|
||||
context, input, operation, incomplete_x_prologues, incomplete_y_prologues);
|
||||
context, input, operation, blocks, incomplete_x_prologues, incomplete_y_prologues);
|
||||
|
||||
Result complete_x_prologues = context.create_temporary_result(ResultType::Color,
|
||||
ResultPrecision::Full);
|
||||
Result complete_x_prologues_sum = context.create_temporary_result(ResultType::Color,
|
||||
ResultPrecision::Full);
|
||||
compute_complete_x_prologues(
|
||||
context, input, incomplete_x_prologues, complete_x_prologues, complete_x_prologues_sum);
|
||||
context, input, incomplete_x_prologues, x_prologues, complete_x_prologues_sum);
|
||||
incomplete_x_prologues.release();
|
||||
|
||||
Result complete_y_prologues = context.create_temporary_result(ResultType::Color,
|
||||
ResultPrecision::Full);
|
||||
compute_complete_y_prologues(
|
||||
context, input, incomplete_y_prologues, complete_x_prologues_sum, complete_y_prologues);
|
||||
context, input, incomplete_y_prologues, complete_x_prologues_sum, y_prologues);
|
||||
incomplete_y_prologues.release();
|
||||
complete_x_prologues_sum.release();
|
||||
|
||||
compute_complete_blocks(
|
||||
context, input, complete_x_prologues, complete_y_prologues, operation, output);
|
||||
complete_x_prologues.release();
|
||||
complete_y_prologues.release();
|
||||
}
|
||||
|
||||
} // namespace blender::realtime_compositor
|
||||
|
|
|
@ -35,9 +35,13 @@ void main()
|
|||
int quadrant_pixel_count = region_size.x * region_size.y;
|
||||
|
||||
#if defined(SUMMED_AREA_TABLE)
|
||||
mean_of_color_of_quadrants[q] = summed_area_table_sum(table_tx, lower_bound, upper_bound);
|
||||
mean_of_squared_color_of_quadrants[q] = summed_area_table_sum(
|
||||
squared_table_tx, lower_bound, upper_bound);
|
||||
mean_of_color_of_quadrants[q] = summed_area_table_sum(
|
||||
blocks_tx, x_prologues_tx, y_prologues_tx, lower_bound, upper_bound);
|
||||
mean_of_squared_color_of_quadrants[q] = summed_area_table_sum(squared_blocks_tx,
|
||||
squared_x_prologues_tx,
|
||||
squared_y_prologues_tx,
|
||||
lower_bound,
|
||||
upper_bound);
|
||||
#else
|
||||
for (int j = 0; j <= radius; j++) {
|
||||
for (int i = 0; i <= radius; i++) {
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
/* SPDX-FileCopyrightText: 2023 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
|
||||
|
||||
/* An intermediate shared memory where the result of X accumulation will be stored. */
|
||||
shared vec4 block[gl_WorkGroupSize.x][gl_WorkGroupSize.y];
|
||||
|
||||
void main()
|
||||
{
|
||||
/* Accumulate the block along the horizontal direction starting from the X prologue value,
|
||||
* writing each accumulation step to the intermediate shared memory. */
|
||||
if (gl_LocalInvocationID.x == 0) {
|
||||
ivec2 x_prologue_texel = ivec2(gl_GlobalInvocationID.y, gl_WorkGroupID.x);
|
||||
vec4 x_accumulated_color = texture_load(complete_x_prologues_tx, x_prologue_texel, vec4(0.0));
|
||||
for (int i = 0; i < gl_WorkGroupSize.x; i++) {
|
||||
ivec2 texel = ivec2(gl_WorkGroupID.x * gl_WorkGroupSize.x + i, gl_GlobalInvocationID.y);
|
||||
x_accumulated_color += OPERATION(texture_load(input_tx, texel, vec4(0.0)));
|
||||
block[i][gl_LocalInvocationID.y] = x_accumulated_color;
|
||||
}
|
||||
}
|
||||
|
||||
/* Make sure the result of X accumulation is completely done. */
|
||||
barrier();
|
||||
|
||||
/* Accumulate the block along the vertical direction starting from the Y prologue value,
|
||||
* writing each accumulation step to the output image. */
|
||||
if (gl_LocalInvocationID.y == 0) {
|
||||
ivec2 y_prologue_texel = ivec2(gl_GlobalInvocationID.x, gl_WorkGroupID.y);
|
||||
vec4 y_accumulated_color = texture_load(complete_y_prologues_tx, y_prologue_texel, vec4(0.0));
|
||||
for (int i = 0; i < gl_WorkGroupSize.y; i++) {
|
||||
y_accumulated_color += block[gl_LocalInvocationID.x][i];
|
||||
ivec2 texel = ivec2(gl_GlobalInvocationID.x, gl_WorkGroupID.y * gl_WorkGroupSize.y + i);
|
||||
imageStore(output_img, texel, y_accumulated_color);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -41,6 +41,8 @@ void main()
|
|||
vec4 y_accumulated_color = vec4(0.0);
|
||||
for (int i = 0; i < gl_WorkGroupSize.y; i++) {
|
||||
y_accumulated_color += block[gl_LocalInvocationID.x][i];
|
||||
ivec2 texel = ivec2(gl_GlobalInvocationID.x, gl_WorkGroupID.y * gl_WorkGroupSize.y + i);
|
||||
imageStore(blocks_img, texel, y_accumulated_color);
|
||||
}
|
||||
|
||||
/* Note that the first row of prologues is the result of accumulating a virtual block that is
|
||||
|
|
|
@ -28,8 +28,12 @@ GPU_SHADER_CREATE_INFO(compositor_kuwahara_classic_convolution_variable_size)
|
|||
GPU_SHADER_CREATE_INFO(compositor_kuwahara_classic_summed_area_table_shared)
|
||||
.additional_info("compositor_kuwahara_classic_shared")
|
||||
.define("SUMMED_AREA_TABLE")
|
||||
.sampler(0, ImageType::FLOAT_2D, "table_tx")
|
||||
.sampler(1, ImageType::FLOAT_2D, "squared_table_tx");
|
||||
.sampler(0, ImageType::FLOAT_2D, "blocks_tx")
|
||||
.sampler(1, ImageType::FLOAT_2D, "x_prologues_tx")
|
||||
.sampler(2, ImageType::FLOAT_2D, "y_prologues_tx")
|
||||
.sampler(3, ImageType::FLOAT_2D, "squared_blocks_tx")
|
||||
.sampler(4, ImageType::FLOAT_2D, "squared_x_prologues_tx")
|
||||
.sampler(5, ImageType::FLOAT_2D, "squared_y_prologues_tx");
|
||||
|
||||
GPU_SHADER_CREATE_INFO(compositor_kuwahara_classic_summed_area_table_constant_size)
|
||||
.additional_info("compositor_kuwahara_classic_summed_area_table_shared")
|
||||
|
@ -39,7 +43,7 @@ GPU_SHADER_CREATE_INFO(compositor_kuwahara_classic_summed_area_table_constant_si
|
|||
|
||||
GPU_SHADER_CREATE_INFO(compositor_kuwahara_classic_summed_area_table_variable_size)
|
||||
.additional_info("compositor_kuwahara_classic_summed_area_table_shared")
|
||||
.sampler(2, ImageType::FLOAT_2D, "size_tx")
|
||||
.sampler(6, ImageType::FLOAT_2D, "size_tx")
|
||||
.define("VARIABLE_SIZE")
|
||||
.do_static_compilation(true);
|
||||
|
||||
|
|
|
@ -7,8 +7,9 @@
|
|||
GPU_SHADER_CREATE_INFO(compositor_summed_area_table_compute_incomplete_prologues_shared)
|
||||
.local_group_size(16, 16)
|
||||
.sampler(0, ImageType::FLOAT_2D, "input_tx")
|
||||
.image(0, GPU_RGBA32F, Qualifier::WRITE, ImageType::FLOAT_2D, "incomplete_x_prologues_img")
|
||||
.image(1, GPU_RGBA32F, Qualifier::WRITE, ImageType::FLOAT_2D, "incomplete_y_prologues_img")
|
||||
.image(0, GPU_RGBA32F, Qualifier::WRITE, ImageType::FLOAT_2D, "blocks_img")
|
||||
.image(1, GPU_RGBA32F, Qualifier::WRITE, ImageType::FLOAT_2D, "incomplete_x_prologues_img")
|
||||
.image(2, GPU_RGBA32F, Qualifier::WRITE, ImageType::FLOAT_2D, "incomplete_y_prologues_img")
|
||||
.compute_source("compositor_summed_area_table_compute_incomplete_prologues.glsl");
|
||||
|
||||
GPU_SHADER_CREATE_INFO(compositor_summed_area_table_compute_incomplete_prologues_identity)
|
||||
|
@ -36,21 +37,3 @@ GPU_SHADER_CREATE_INFO(compositor_summed_area_table_compute_complete_y_prologues
|
|||
.image(0, GPU_RGBA32F, Qualifier::WRITE, ImageType::FLOAT_2D, "complete_y_prologues_img")
|
||||
.compute_source("compositor_summed_area_table_compute_complete_y_prologues.glsl")
|
||||
.do_static_compilation(true);
|
||||
|
||||
GPU_SHADER_CREATE_INFO(compositor_summed_area_table_compute_complete_blocks_shared)
|
||||
.local_group_size(16, 16)
|
||||
.sampler(0, ImageType::FLOAT_2D, "input_tx")
|
||||
.sampler(1, ImageType::FLOAT_2D, "complete_x_prologues_tx")
|
||||
.sampler(2, ImageType::FLOAT_2D, "complete_y_prologues_tx")
|
||||
.image(0, GPU_RGBA32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "output_img")
|
||||
.compute_source("compositor_summed_area_table_compute_complete_blocks.glsl");
|
||||
|
||||
GPU_SHADER_CREATE_INFO(compositor_summed_area_table_compute_complete_blocks_identity)
|
||||
.additional_info("compositor_summed_area_table_compute_complete_blocks_shared")
|
||||
.define("OPERATION(value)", "value")
|
||||
.do_static_compilation(true);
|
||||
|
||||
GPU_SHADER_CREATE_INFO(compositor_summed_area_table_compute_complete_blocks_square)
|
||||
.additional_info("compositor_summed_area_table_compute_complete_blocks_shared")
|
||||
.define("OPERATION(value)", "value * value")
|
||||
.do_static_compilation(true);
|
||||
|
|
|
@ -2,6 +2,27 @@
|
|||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
|
||||
|
||||
#define SAT_BLOCK_SIZE 16
|
||||
|
||||
vec4 summed_area_table_value(sampler2D blocks,
|
||||
sampler2D x_prologues,
|
||||
sampler2D y_prologues,
|
||||
ivec2 texel,
|
||||
out vec4 inter_value)
|
||||
{
|
||||
inter_value = vec4(0.0);
|
||||
int start_y = (texel.y / SAT_BLOCK_SIZE) * SAT_BLOCK_SIZE;
|
||||
for (int i = start_y; i <= texel.y; i++) {
|
||||
inter_value += texture_load(x_prologues, ivec2(i, texel.x / SAT_BLOCK_SIZE), vec4(0.0));
|
||||
}
|
||||
|
||||
inter_value += texture_load(y_prologues, texel / ivec2(1, SAT_BLOCK_SIZE), vec4(0.0));
|
||||
|
||||
return texture_load(blocks, texel, vec4(0.0));
|
||||
}
|
||||
|
||||
/* Computes the sum of the rectangular region defined by the given lower and upper bounds from the
|
||||
* given summed area table. It is assumed that the given upper bound is larger than the given lower
|
||||
* bound, otherwise, undefined behavior is invoked. Looking at the diagram below, in order to
|
||||
|
@ -35,17 +56,31 @@
|
|||
* The aforementioned equation eliminates the edges between regions X, C, and A since they get
|
||||
* subtracted with C and A. To avoid this, we subtract 1 from the lower bound and fallback to zero
|
||||
* for out of bound sampling. */
|
||||
|
||||
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
|
||||
|
||||
vec4 summed_area_table_sum(sampler2D table, ivec2 lower_bound, ivec2 upper_bound)
|
||||
vec4 summed_area_table_sum(sampler2D blocks,
|
||||
sampler2D x_prologues,
|
||||
sampler2D y_prologues,
|
||||
ivec2 lower_bound,
|
||||
ivec2 upper_bound)
|
||||
{
|
||||
ivec2 corrected_lower_bound = lower_bound - ivec2(1);
|
||||
ivec2 corrected_upper_bound = min(texture_size(table) - ivec2(1), upper_bound);
|
||||
vec4 addend = texture_load(table, corrected_upper_bound, vec4(0.0)) +
|
||||
texture_load(table, corrected_lower_bound, vec4(0.0));
|
||||
vec4 subtrahend =
|
||||
texture_load(table, ivec2(corrected_lower_bound.x, corrected_upper_bound.y), vec4(0.0)) +
|
||||
texture_load(table, ivec2(corrected_upper_bound.x, corrected_lower_bound.y), vec4(0.0));
|
||||
return addend - subtrahend;
|
||||
ivec2 lower_left_texel = lower_bound - ivec2(1);
|
||||
ivec2 upper_right_texel = min(texture_size(blocks) - ivec2(1), upper_bound);
|
||||
ivec2 upper_left_texel = ivec2(lower_left_texel.x, upper_right_texel.y);
|
||||
ivec2 lower_right_texel = ivec2(upper_right_texel.x, lower_left_texel.y);
|
||||
|
||||
vec4 inter_upper_right, inter_lower_left, inter_upper_left, inter_lower_right;
|
||||
|
||||
vec4 intra_upper_right = summed_area_table_value(
|
||||
blocks, x_prologues, y_prologues, upper_right_texel, inter_upper_right);
|
||||
vec4 intra_lower_left = summed_area_table_value(
|
||||
blocks, x_prologues, y_prologues, lower_left_texel, inter_lower_left);
|
||||
vec4 intra_upper_left = summed_area_table_value(
|
||||
blocks, x_prologues, y_prologues, upper_left_texel, inter_upper_left);
|
||||
vec4 intra_lower_right = summed_area_table_value(
|
||||
blocks, x_prologues, y_prologues, lower_right_texel, inter_lower_right);
|
||||
|
||||
vec4 intra_value = (intra_upper_right + intra_lower_left) -
|
||||
(intra_upper_left + intra_lower_right);
|
||||
vec4 inter_value = (inter_upper_right + inter_lower_left) -
|
||||
(inter_upper_left + inter_lower_right);
|
||||
return intra_value + inter_value;
|
||||
}
|
||||
|
|
|
@ -122,13 +122,25 @@ class ConvertKuwaharaOperation : public NodeOperation {
|
|||
|
||||
void execute_classic_summed_area_table()
|
||||
{
|
||||
Result table = context().create_temporary_result(ResultType::Color, ResultPrecision::Full);
|
||||
summed_area_table(context(), get_input("Image"), table);
|
||||
Result blocks = context().create_temporary_result(ResultType::Color, ResultPrecision::Full);
|
||||
Result x_prologues = context().create_temporary_result(ResultType::Color,
|
||||
ResultPrecision::Full);
|
||||
Result y_prologues = context().create_temporary_result(ResultType::Color,
|
||||
ResultPrecision::Full);
|
||||
summed_area_table(context(), get_input("Image"), blocks, x_prologues, y_prologues);
|
||||
|
||||
Result squared_table = context().create_temporary_result(ResultType::Color,
|
||||
ResultPrecision::Full);
|
||||
summed_area_table(
|
||||
context(), get_input("Image"), squared_table, SummedAreaTableOperation::Square);
|
||||
Result squared_blocks = context().create_temporary_result(ResultType::Color,
|
||||
ResultPrecision::Full);
|
||||
Result squared_x_prologues = context().create_temporary_result(ResultType::Color,
|
||||
ResultPrecision::Full);
|
||||
Result squared_y_prologues = context().create_temporary_result(ResultType::Color,
|
||||
ResultPrecision::Full);
|
||||
summed_area_table(context(),
|
||||
get_input("Image"),
|
||||
squared_blocks,
|
||||
squared_x_prologues,
|
||||
squared_y_prologues,
|
||||
SummedAreaTableOperation::Square);
|
||||
|
||||
GPUShader *shader = context().get_shader(get_classic_summed_area_table_shader_name());
|
||||
GPU_shader_bind(shader);
|
||||
|
@ -141,8 +153,13 @@ class ConvertKuwaharaOperation : public NodeOperation {
|
|||
size_input.bind_as_texture(shader, "size_tx");
|
||||
}
|
||||
|
||||
table.bind_as_texture(shader, "table_tx");
|
||||
squared_table.bind_as_texture(shader, "squared_table_tx");
|
||||
blocks.bind_as_texture(shader, "blocks_tx");
|
||||
x_prologues.bind_as_texture(shader, "x_prologues_tx");
|
||||
y_prologues.bind_as_texture(shader, "y_prologues_tx");
|
||||
|
||||
squared_blocks.bind_as_texture(shader, "squared_blocks_tx");
|
||||
squared_x_prologues.bind_as_texture(shader, "squared_x_prologues_tx");
|
||||
squared_y_prologues.bind_as_texture(shader, "squared_y_prologues_tx");
|
||||
|
||||
const Domain domain = compute_domain();
|
||||
Result &output_image = get_result("Image");
|
||||
|
@ -151,13 +168,21 @@ class ConvertKuwaharaOperation : public NodeOperation {
|
|||
|
||||
compute_dispatch_threads_at_least(shader, domain.size);
|
||||
|
||||
table.unbind_as_texture();
|
||||
squared_table.unbind_as_texture();
|
||||
blocks.unbind_as_texture();
|
||||
x_prologues.unbind_as_texture();
|
||||
y_prologues.unbind_as_texture();
|
||||
squared_blocks.unbind_as_texture();
|
||||
squared_x_prologues.unbind_as_texture();
|
||||
squared_y_prologues.unbind_as_texture();
|
||||
output_image.unbind_as_image();
|
||||
GPU_shader_unbind();
|
||||
|
||||
table.release();
|
||||
squared_table.release();
|
||||
blocks.release();
|
||||
x_prologues.release();
|
||||
y_prologues.release();
|
||||
squared_blocks.release();
|
||||
squared_x_prologues.release();
|
||||
squared_y_prologues.release();
|
||||
}
|
||||
|
||||
/* An implementation of the Anisotropic Kuwahara filter described in the paper:
|
||||
|
|
Loading…
Reference in New Issue