Realtime Compositor: Implement Fast Gaussian blur #120431
|
@ -63,35 +63,42 @@ set(SRC
|
|||
COM_texture_pool.hh
|
||||
COM_utilities.hh
|
||||
|
||||
algorithms/intern/deriche_gaussian_blur.cc
|
||||
algorithms/intern/jump_flooding.cc
|
||||
algorithms/intern/morphological_blur.cc
|
||||
algorithms/intern/morphological_distance.cc
|
||||
algorithms/intern/morphological_distance_feather.cc
|
||||
algorithms/intern/parallel_reduction.cc
|
||||
algorithms/intern/realize_on_domain.cc
|
||||
algorithms/intern/recursive_gaussian_blur.cc
|
||||
algorithms/intern/smaa.cc
|
||||
algorithms/intern/summed_area_table.cc
|
||||
algorithms/intern/symmetric_separable_blur.cc
|
||||
algorithms/intern/symmetric_separable_blur_variable_size.cc
|
||||
algorithms/intern/transform.cc
|
||||
algorithms/intern/van_vliet_gaussian_blur.cc
|
||||
|
||||
algorithms/COM_algorithm_deriche_gaussian_blur.hh
|
||||
algorithms/COM_algorithm_jump_flooding.hh
|
||||
algorithms/COM_algorithm_morphological_blur.hh
|
||||
algorithms/COM_algorithm_morphological_distance.hh
|
||||
algorithms/COM_algorithm_morphological_distance_feather.hh
|
||||
algorithms/COM_algorithm_parallel_reduction.hh
|
||||
algorithms/COM_algorithm_realize_on_domain.hh
|
||||
algorithms/COM_algorithm_recursive_gaussian_blur.hh
|
||||
algorithms/COM_algorithm_smaa.hh
|
||||
algorithms/COM_algorithm_summed_area_table.hh
|
||||
algorithms/COM_algorithm_symmetric_separable_blur.hh
|
||||
algorithms/COM_algorithm_symmetric_separable_blur_variable_size.hh
|
||||
algorithms/COM_algorithm_transform.hh
|
||||
algorithms/COM_algorithm_van_vliet_gaussian_blur.hh
|
||||
|
||||
cached_resources/intern/bokeh_kernel.cc
|
||||
cached_resources/intern/cached_image.cc
|
||||
cached_resources/intern/cached_mask.cc
|
||||
cached_resources/intern/cached_shader.cc
|
||||
cached_resources/intern/cached_texture.cc
|
||||
cached_resources/intern/deriche_gaussian_coefficients.cc
|
||||
cached_resources/intern/distortion_grid.cc
|
||||
cached_resources/intern/keying_screen.cc
|
||||
cached_resources/intern/morphological_distance_feather_weights.cc
|
||||
|
@ -99,6 +106,7 @@ set(SRC
|
|||
cached_resources/intern/smaa_precomputed_textures.cc
|
||||
cached_resources/intern/symmetric_blur_weights.cc
|
||||
cached_resources/intern/symmetric_separable_blur_weights.cc
|
||||
cached_resources/intern/van_vliet_gaussian_coefficients.cc
|
||||
|
||||
cached_resources/COM_bokeh_kernel.hh
|
||||
cached_resources/COM_cached_image.hh
|
||||
|
@ -106,6 +114,7 @@ set(SRC
|
|||
cached_resources/COM_cached_resource.hh
|
||||
cached_resources/COM_cached_shader.hh
|
||||
cached_resources/COM_cached_texture.hh
|
||||
cached_resources/COM_deriche_gaussian_coefficients.hh
|
||||
cached_resources/COM_distortion_grid.hh
|
||||
cached_resources/COM_keying_screen.hh
|
||||
cached_resources/COM_morphological_distance_feather_weights.hh
|
||||
|
@ -113,6 +122,7 @@ set(SRC
|
|||
cached_resources/COM_smaa_precomputed_textures.hh
|
||||
cached_resources/COM_symmetric_blur_weights.hh
|
||||
cached_resources/COM_symmetric_separable_blur_weights.hh
|
||||
cached_resources/COM_van_vliet_gaussian_coefficients.hh
|
||||
)
|
||||
|
||||
set(LIB
|
||||
|
@ -142,6 +152,8 @@ set(GLSL_SRC
|
|||
shaders/compositor_defocus_radius_from_depth.glsl
|
||||
shaders/compositor_defocus_radius_from_scale.glsl
|
||||
shaders/compositor_despeckle.glsl
|
||||
shaders/compositor_deriche_gaussian_blur.glsl
|
||||
shaders/compositor_deriche_gaussian_blur_sum.glsl
|
||||
shaders/compositor_directional_blur.glsl
|
||||
shaders/compositor_displace.glsl
|
||||
shaders/compositor_double_edge_mask_compute_boundary.glsl
|
||||
|
@ -215,6 +227,8 @@ set(GLSL_SRC
|
|||
shaders/compositor_symmetric_separable_blur_variable_size.glsl
|
||||
shaders/compositor_tone_map_photoreceptor.glsl
|
||||
shaders/compositor_tone_map_simple.glsl
|
||||
shaders/compositor_van_vliet_gaussian_blur.glsl
|
||||
shaders/compositor_van_vliet_gaussian_blur_sum.glsl
|
||||
shaders/compositor_write_output.glsl
|
||||
shaders/compositor_z_combine_compute_mask.glsl
|
||||
shaders/compositor_z_combine_from_mask.glsl
|
||||
|
@ -290,6 +304,7 @@ set(SRC_SHADER_CREATE_INFOS
|
|||
shaders/infos/compositor_convert_info.hh
|
||||
shaders/infos/compositor_cryptomatte_info.hh
|
||||
shaders/infos/compositor_defocus_info.hh
|
||||
shaders/infos/compositor_deriche_gaussian_blur_info.hh
|
||||
shaders/infos/compositor_despeckle_info.hh
|
||||
shaders/infos/compositor_directional_blur_info.hh
|
||||
shaders/infos/compositor_displace_info.hh
|
||||
|
@ -334,6 +349,7 @@ set(SRC_SHADER_CREATE_INFOS
|
|||
shaders/infos/compositor_symmetric_separable_blur_variable_size_info.hh
|
||||
shaders/infos/compositor_tone_map_photoreceptor_info.hh
|
||||
shaders/infos/compositor_tone_map_simple_info.hh
|
||||
shaders/infos/compositor_van_vliet_gaussian_blur_info.hh
|
||||
shaders/infos/compositor_write_output_info.hh
|
||||
shaders/infos/compositor_z_combine_info.hh
|
||||
)
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "COM_cached_mask.hh"
|
||||
#include "COM_cached_shader.hh"
|
||||
#include "COM_cached_texture.hh"
|
||||
#include "COM_deriche_gaussian_coefficients.hh"
|
||||
#include "COM_distortion_grid.hh"
|
||||
#include "COM_keying_screen.hh"
|
||||
#include "COM_morphological_distance_feather_weights.hh"
|
||||
|
@ -16,6 +17,7 @@
|
|||
#include "COM_smaa_precomputed_textures.hh"
|
||||
#include "COM_symmetric_blur_weights.hh"
|
||||
#include "COM_symmetric_separable_blur_weights.hh"
|
||||
#include "COM_van_vliet_gaussian_coefficients.hh"
|
||||
|
||||
namespace blender::realtime_compositor {
|
||||
|
||||
|
@ -55,6 +57,8 @@ class StaticCacheManager {
|
|||
CachedShaderContainer cached_shaders;
|
||||
BokehKernelContainer bokeh_kernels;
|
||||
CachedImageContainer cached_images;
|
||||
DericheGaussianCoefficientsContainer deriche_gaussian_coefficients;
|
||||
VanVlietGaussianCoefficientsContainer van_vliet_gaussian_coefficients;
|
||||
|
||||
/* Reset the cache manager by deleting the cached resources that are no longer needed because
|
||||
* they weren't used in the last evaluation and prepare the remaining cached resources to track
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "BLI_math_vector_types.hh"
|
||||
|
||||
#include "COM_context.hh"
|
||||
#include "COM_result.hh"
|
||||
|
||||
namespace blender::realtime_compositor {
|
||||
|
||||
/* Blur the input using a fourth order IIR filter approximating a Gaussian filter of the given
|
||||
* sigma computed using Deriche's design method. This is based on the following paper:
|
||||
*
|
||||
* Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
|
||||
* 1993.
|
||||
*
|
||||
* This differs from the standard symmetric separable blur algorithm in that it is faster for high
|
||||
* sigma values, the downside is that it consumes more memory and is only an approximation that
|
||||
* might suffer from fringing and artifacts, though those are typically unnoticeable. This filter
|
||||
* is numerically unstable and not accurate for sigma values larger than 32, in those cases, use
|
||||
* the Van Vliet filter instead. Further, for sigma values less than 3, use direct convolution
|
||||
* instead, since it is faster and more accurate. Neumann boundary is assumed.
|
||||
*
|
||||
* The output is written to the given output result, which will be allocated internally and is thus
|
||||
* expected not to be previously allocated. */
|
||||
void deriche_gaussian_blur(Context &context, Result &input, Result &output, float2 sigma);
|
||||
|
||||
} // namespace blender::realtime_compositor
|
|
@ -0,0 +1,24 @@
|
|||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "BLI_math_vector_types.hh"
|
||||
|
||||
#include "COM_context.hh"
|
||||
#include "COM_result.hh"
|
||||
|
||||
namespace blender::realtime_compositor {
|
||||
|
||||
/* Blur the input using a recursive Gaussian blur algorithm given a certain radius. This differs
|
||||
* from the standard symmetric separable blur algorithm in that it is orders of magnitude faster
|
||||
* for very high radius value, the downside is that it consumes more memory and is only an
|
||||
* approximation that might suffer from fringing and artifacts, though those are typically
|
||||
* unnoticeable. Neumann boundary is assumed.
|
||||
*
|
||||
* The output is written to the given output result, which will be allocated internally and is thus
|
||||
* expected not to be previously allocated. */
|
||||
void recursive_gaussian_blur(Context &context, Result &input, Result &output, float2 radius);
|
||||
|
||||
} // namespace blender::realtime_compositor
|
|
@ -0,0 +1,36 @@
|
|||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "BLI_math_vector_types.hh"
|
||||
|
||||
#include "COM_context.hh"
|
||||
#include "COM_result.hh"
|
||||
|
||||
namespace blender::realtime_compositor {
|
||||
|
||||
/* Blur the input using a fourth order IIR filter approximating a Gaussian filter of the given
|
||||
* sigma computed using Van Vliet's design method. This is based on the following paper:
|
||||
*
|
||||
* Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
|
||||
* filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
|
||||
* 98EX170). Vol. 1. IEEE, 1998.
|
||||
*
|
||||
* However, we internally split the fourth order IIR filter into two second order sections in order
|
||||
* to improve its numerical stability and improve parallelism. See the implementation for more
|
||||
* information.
|
||||
*
|
||||
* This differs from the standard symmetric separable blur algorithm in that it is faster for high
|
||||
* sigma values, the downside is that it consumes more memory and is only an approximation that
|
||||
* might suffer from fringing and artifacts, though those are typically unnoticeable. This filter
|
||||
* is not accurate for sigma values less than 32, in those cases, use the Deriche filter instead.
|
||||
* Further, for sigma values less than 3, use direct convolution instead, since it is faster and
|
||||
* more accurate. Neumann boundary is assumed.
|
||||
*
|
||||
* The output is written to the given output result, which will be allocated internally and is thus
|
||||
* expected not to be previously allocated. */
|
||||
void van_vliet_gaussian_blur(Context &context, Result &input, Result &output, float2 sigma);
|
||||
|
||||
} // namespace blender::realtime_compositor
|
|
@ -0,0 +1,119 @@
|
|||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#include "BLI_assert.h"
|
||||
#include "BLI_math_base.hh"
|
||||
#include "BLI_math_vector.hh"
|
||||
|
||||
#include "GPU_shader.hh"
|
||||
|
||||
#include "COM_context.hh"
|
||||
#include "COM_result.hh"
|
||||
#include "COM_utilities.hh"
|
||||
|
||||
#include "COM_algorithm_deriche_gaussian_blur.hh"
|
||||
#include "COM_deriche_gaussian_coefficients.hh"
|
||||
|
||||
namespace blender::realtime_compositor {
|
||||
|
||||
/* Sum the causal and non causal outputs of the filter and write the sum to the output. This is
|
||||
* because the Deriche filter is a parallel interconnection filter, meaning its output is the sum
|
||||
* of its causal and non causal filters. The output is expected not to be allocated as it will be
|
||||
* allocated internally.
|
||||
*
|
||||
* The output is allocated and written transposed, that is, with a height equivalent to the width
|
||||
* of the input and vice versa. This is done as a performance optimization. The blur pass will
|
||||
* blur the image horizontally and write it to the intermediate output transposed. Then the
|
||||
* vertical pass will execute the same horizontal blur shader, but since its input is transposed,
|
||||
* it will effectively do a vertical blur and write to the output transposed, effectively undoing
|
||||
* the transposition in the horizontal pass. This is done to improve spatial cache locality in the
|
||||
* shader and to avoid having two separate shaders for each blur pass. */
|
||||
static void sum_causal_and_non_causal_results(Context &context,
|
||||
Result &causal_input,
|
||||
Result &non_causal_input,
|
||||
Result &output)
|
||||
{
|
||||
GPUShader *shader = context.get_shader("compositor_deriche_gaussian_blur_sum");
|
||||
GPU_shader_bind(shader);
|
||||
|
||||
causal_input.bind_as_texture(shader, "causal_input_tx");
|
||||
non_causal_input.bind_as_texture(shader, "non_causal_input_tx");
|
||||
|
||||
const Domain domain = causal_input.domain();
|
||||
const int2 transposed_domain = int2(domain.size.y, domain.size.x);
|
||||
output.allocate_texture(transposed_domain);
|
||||
output.bind_as_image(shader, "output_img");
|
||||
|
||||
compute_dispatch_threads_at_least(shader, domain.size);
|
||||
|
||||
GPU_shader_unbind();
|
||||
causal_input.unbind_as_texture();
|
||||
non_causal_input.unbind_as_texture();
|
||||
output.unbind_as_image();
|
||||
}
|
||||
|
||||
static void blur_pass(Context &context, Result &input, Result &output, float sigma)
|
||||
{
|
||||
GPUShader *shader = context.get_shader("compositor_deriche_gaussian_blur");
|
||||
GPU_shader_bind(shader);
|
||||
|
||||
const DericheGaussianCoefficients &coefficients =
|
||||
context.cache_manager().deriche_gaussian_coefficients.get(context, sigma);
|
||||
|
||||
GPU_shader_uniform_4fv(shader,
|
||||
"causal_feedforward_coefficients",
|
||||
float4(coefficients.causal_feedforward_coefficients()));
|
||||
GPU_shader_uniform_4fv(shader,
|
||||
"non_causal_feedforward_coefficients",
|
||||
float4(coefficients.non_causal_feedforward_coefficients()));
|
||||
GPU_shader_uniform_4fv(
|
||||
shader, "feedback_coefficients", float4(coefficients.feedback_coefficients()));
|
||||
GPU_shader_uniform_1f(
|
||||
shader, "causal_boundary_coefficient", float(coefficients.causal_boundary_coefficient()));
|
||||
GPU_shader_uniform_1f(shader,
|
||||
"non_causal_boundary_coefficient",
|
||||
float(coefficients.non_causal_boundary_coefficient()));
|
||||
|
||||
input.bind_as_texture(shader, "input_tx");
|
||||
|
||||
const Domain domain = input.domain();
|
||||
|
||||
Result causal_result = context.create_temporary_result(ResultType::Color);
|
||||
causal_result.allocate_texture(domain);
|
||||
causal_result.bind_as_image(shader, "causal_output_img");
|
||||
|
||||
Result non_causal_result = context.create_temporary_result(ResultType::Color);
|
||||
non_causal_result.allocate_texture(domain);
|
||||
non_causal_result.bind_as_image(shader, "non_causal_output_img");
|
||||
|
||||
/* The second dispatch dimension is two dispatches, one for the causal filter and one for the non
|
||||
* causal one. */
|
||||
compute_dispatch_threads_at_least(shader, int2(domain.size.y, 2), int2(128, 2));
|
||||
|
||||
GPU_shader_unbind();
|
||||
input.unbind_as_texture();
|
||||
causal_result.unbind_as_image();
|
||||
non_causal_result.unbind_as_image();
|
||||
|
||||
sum_causal_and_non_causal_results(context, causal_result, non_causal_result, output);
|
||||
causal_result.release();
|
||||
non_causal_result.release();
|
||||
}
|
||||
|
||||
void deriche_gaussian_blur(Context &context, Result &input, Result &output, float2 sigma)
|
||||
{
|
||||
BLI_assert_msg(math::reduce_max(sigma) >= 3.0f,
|
||||
"Deriche filter is slower and less accurate than direct convolution for sigma "
|
||||
"values less 3. Use direct convolution blur instead.");
|
||||
BLI_assert_msg(math::reduce_max(sigma) < 32.0f,
|
||||
"Deriche filter is not accurate nor numerically stable for sigma values larger "
|
||||
"than 32. Use Van Vliet filter instead.");
|
||||
|
||||
Result horizontal_pass_result = context.create_temporary_result(ResultType::Color);
|
||||
blur_pass(context, input, horizontal_pass_result, sigma.x);
|
||||
blur_pass(context, horizontal_pass_result, output, sigma.y);
|
||||
horizontal_pass_result.release();
|
||||
}
|
||||
|
||||
} // namespace blender::realtime_compositor
|
|
@ -0,0 +1,68 @@
|
|||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#include "BLI_math_base.hh"
|
||||
#include "BLI_math_vector.hh"
|
||||
|
||||
#include "COM_context.hh"
|
||||
#include "COM_result.hh"
|
||||
|
||||
#include "COM_algorithm_deriche_gaussian_blur.hh"
|
||||
#include "COM_algorithm_symmetric_separable_blur.hh"
|
||||
#include "COM_algorithm_van_vliet_gaussian_blur.hh"
|
||||
|
||||
namespace blender::realtime_compositor {
|
||||
|
||||
/* Compute the Gaussian sigma from the radius, where the radius is in pixels. Blender's filter is
|
||||
* truncated at |x| > 3 * sigma as can be seen in the R_FILTER_GAUSS case of the RE_filter_value
|
||||
* function, so we divide by three to get the approximate sigma value. Further, ensure the radius
|
||||
* is at least 1 since recursive Gaussian implementations can't handle zero radii. */
|
||||
static float2 compute_sigma_from_radius(float2 radius)
|
||||
{
|
||||
return math::max(float2(1.0f), radius) / 3.0f;
|
||||
}
|
||||
|
||||
/* Apply a recursive Gaussian blur algorithm on the input based on the general method outlined
|
||||
* in the following paper:
|
||||
*
|
||||
* Hale, Dave. "Recursive gaussian filters." CWP-546 (2006).
|
||||
*
|
||||
* In particular, based on the table in Section 5 Conclusion, for very low radius blur, we use a
|
||||
* direct separable Gaussian convolution. For medium blur radius, we use the fourth order IIR
|
||||
* Deriche filter based on the following paper:
|
||||
*
|
||||
* Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
|
||||
* 1993.
|
||||
*
|
||||
* For high radius blur, we use the fourth order IIR Van Vliet filter based on the following paper:
|
||||
*
|
||||
* Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
|
||||
* filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
|
||||
* 98EX170). Vol. 1. IEEE, 1998.
|
||||
*
|
||||
* That's because direct convolution is faster and more accurate for very low radius, while the
|
||||
* Deriche filter is more accurate for medium blur radius, while Van Vliet is more accurate for
|
||||
* high blur radius. The criteria suggested by the paper is a sigma value threshold of 3 and 32 for
|
||||
* the Deriche and Van Vliet filters respectively, which we apply on the larger of the two
|
||||
* dimensions. */
|
||||
void recursive_gaussian_blur(Context &context, Result &input, Result &output, float2 radius)
|
||||
{
|
||||
/* The radius is in pixel units, while both recursive implementations expect the sigma value of
|
||||
* the Gaussian function. */
|
||||
const float2 sigma = compute_sigma_from_radius(radius);
|
||||
|
||||
if (math::reduce_max(sigma) < 3.0f) {
|
||||
symmetric_separable_blur(context, input, output, radius);
|
||||
return;
|
||||
}
|
||||
|
||||
if (math::reduce_max(sigma) < 32.0f) {
|
||||
deriche_gaussian_blur(context, input, output, sigma);
|
||||
return;
|
||||
}
|
||||
|
||||
van_vliet_gaussian_blur(context, input, output, sigma);
|
||||
}
|
||||
|
||||
} // namespace blender::realtime_compositor
|
|
@ -0,0 +1,155 @@
|
|||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#include "BLI_assert.h"
|
||||
#include "BLI_math_base.hh"
|
||||
#include "BLI_math_vector.hh"
|
||||
|
||||
#include "GPU_shader.hh"
|
||||
|
||||
#include "COM_context.hh"
|
||||
#include "COM_result.hh"
|
||||
#include "COM_utilities.hh"
|
||||
|
||||
#include "COM_algorithm_van_vliet_gaussian_blur.hh"
|
||||
#include "COM_van_vliet_gaussian_coefficients.hh"
|
||||
|
||||
namespace blender::realtime_compositor {
|
||||
|
||||
/* Sum all four of the causal and non causal outputs of the first and second filters and write the
|
||||
* sum to the output. This is because the Van Vliet filter is implemented as a bank of 2 parallel
|
||||
* second order filters, meaning its output is the sum of the causal and non causal filters of both
|
||||
* filters. The output is expected not to be allocated as it will be allocated internally.
|
||||
*
|
||||
* The output is allocated and written transposed, that is, with a height equivalent to the width
|
||||
* of the input and vice versa. This is done as a performance optimization. The blur pass will
|
||||
* blur the image horizontally and write it to the intermediate output transposed. Then the
|
||||
* vertical pass will execute the same horizontal blur shader, but since its input is transposed,
|
||||
* it will effectively do a vertical blur and write to the output transposed, effectively undoing
|
||||
* the transposition in the horizontal pass. This is done to improve spatial cache locality in the
|
||||
* shader and to avoid having two separate shaders for each blur pass. */
|
||||
static void sum_causal_and_non_causal_results(Context &context,
|
||||
Result &first_causal_input,
|
||||
Result &first_non_causal_input,
|
||||
Result &second_causal_input,
|
||||
Result &second_non_causal_input,
|
||||
Result &output)
|
||||
{
|
||||
GPUShader *shader = context.get_shader("compositor_van_vliet_gaussian_blur_sum");
|
||||
GPU_shader_bind(shader);
|
||||
|
||||
first_causal_input.bind_as_texture(shader, "first_causal_input_tx");
|
||||
first_non_causal_input.bind_as_texture(shader, "first_non_causal_input_tx");
|
||||
second_causal_input.bind_as_texture(shader, "second_causal_input_tx");
|
||||
second_non_causal_input.bind_as_texture(shader, "second_non_causal_input_tx");
|
||||
|
||||
const Domain domain = first_causal_input.domain();
|
||||
const int2 transposed_domain = int2(domain.size.y, domain.size.x);
|
||||
output.allocate_texture(transposed_domain);
|
||||
output.bind_as_image(shader, "output_img");
|
||||
|
||||
compute_dispatch_threads_at_least(shader, domain.size);
|
||||
|
||||
GPU_shader_unbind();
|
||||
first_causal_input.unbind_as_texture();
|
||||
first_non_causal_input.unbind_as_texture();
|
||||
second_causal_input.unbind_as_texture();
|
||||
second_non_causal_input.unbind_as_texture();
|
||||
output.unbind_as_image();
|
||||
}
|
||||
|
||||
static void blur_pass(Context &context, Result &input, Result &output, float sigma)
|
||||
{
|
||||
GPUShader *shader = context.get_shader("compositor_van_vliet_gaussian_blur");
|
||||
GPU_shader_bind(shader);
|
||||
|
||||
const VanVlietGaussianCoefficients &coefficients =
|
||||
context.cache_manager().van_vliet_gaussian_coefficients.get(context, sigma);
|
||||
|
||||
GPU_shader_uniform_2fv(
|
||||
shader, "first_feedback_coefficients", float2(coefficients.first_feedback_coefficients()));
|
||||
GPU_shader_uniform_2fv(shader,
|
||||
"first_causal_feedforward_coefficients",
|
||||
float2(coefficients.first_causal_feedforward_coefficients()));
|
||||
GPU_shader_uniform_2fv(shader,
|
||||
"first_non_causal_feedforward_coefficients",
|
||||
float2(coefficients.first_non_causal_feedforward_coefficients()));
|
||||
GPU_shader_uniform_2fv(
|
||||
shader, "second_feedback_coefficients", float2(coefficients.second_feedback_coefficients()));
|
||||
GPU_shader_uniform_2fv(shader,
|
||||
"second_causal_feedforward_coefficients",
|
||||
float2(coefficients.second_causal_feedforward_coefficients()));
|
||||
GPU_shader_uniform_2fv(shader,
|
||||
"second_non_causal_feedforward_coefficients",
|
||||
float2(coefficients.second_non_causal_feedforward_coefficients()));
|
||||
GPU_shader_uniform_1f(shader,
|
||||
"first_causal_boundary_coefficient",
|
||||
float(coefficients.first_causal_boundary_coefficient()));
|
||||
GPU_shader_uniform_1f(shader,
|
||||
"first_non_causal_boundary_coefficient",
|
||||
float(coefficients.first_non_causal_boundary_coefficient()));
|
||||
GPU_shader_uniform_1f(shader,
|
||||
"second_causal_boundary_coefficient",
|
||||
float(coefficients.second_causal_boundary_coefficient()));
|
||||
GPU_shader_uniform_1f(shader,
|
||||
"second_non_causal_boundary_coefficient",
|
||||
float(coefficients.second_non_causal_boundary_coefficient()));
|
||||
|
||||
input.bind_as_texture(shader, "input_tx");
|
||||
|
||||
const Domain domain = input.domain();
|
||||
|
||||
Result first_causal_result = context.create_temporary_result(ResultType::Color);
|
||||
first_causal_result.allocate_texture(domain);
|
||||
first_causal_result.bind_as_image(shader, "first_causal_output_img");
|
||||
|
||||
Result first_non_causal_result = context.create_temporary_result(ResultType::Color);
|
||||
first_non_causal_result.allocate_texture(domain);
|
||||
first_non_causal_result.bind_as_image(shader, "first_non_causal_output_img");
|
||||
|
||||
Result second_causal_result = context.create_temporary_result(ResultType::Color);
|
||||
second_causal_result.allocate_texture(domain);
|
||||
second_causal_result.bind_as_image(shader, "second_causal_output_img");
|
||||
|
||||
Result second_non_causal_result = context.create_temporary_result(ResultType::Color);
|
||||
second_non_causal_result.allocate_texture(domain);
|
||||
second_non_causal_result.bind_as_image(shader, "second_non_causal_output_img");
|
||||
|
||||
/* The second dispatch dimension is 4 dispatches, one for the first causal filter, one for the
|
||||
* first non causal filter, one for the second causal filter, and one for the second non causal
|
||||
* filter. */
|
||||
compute_dispatch_threads_at_least(shader, int2(domain.size.y, 4), int2(64, 4));
|
||||
|
||||
GPU_shader_unbind();
|
||||
input.unbind_as_texture();
|
||||
first_causal_result.unbind_as_image();
|
||||
first_non_causal_result.unbind_as_image();
|
||||
second_causal_result.unbind_as_image();
|
||||
second_non_causal_result.unbind_as_image();
|
||||
|
||||
sum_causal_and_non_causal_results(context,
|
||||
first_causal_result,
|
||||
first_non_causal_result,
|
||||
second_causal_result,
|
||||
second_non_causal_result,
|
||||
output);
|
||||
first_causal_result.release();
|
||||
first_non_causal_result.release();
|
||||
second_causal_result.release();
|
||||
second_non_causal_result.release();
|
||||
}
|
||||
|
||||
void van_vliet_gaussian_blur(Context &context, Result &input, Result &output, float2 sigma)
|
||||
{
|
||||
BLI_assert_msg(math::reduce_max(sigma) >= 32.0f,
|
||||
"Van Vliet filter is less accurate for sigma values less than 32. Use Deriche "
|
||||
"filter instead or direct convolution instead.");
|
||||
|
||||
Result horizontal_pass_result = context.create_temporary_result(ResultType::Color);
|
||||
blur_pass(context, input, horizontal_pass_result, sigma.x);
|
||||
blur_pass(context, horizontal_pass_result, output, sigma.y);
|
||||
horizontal_pass_result.release();
|
||||
}
|
||||
|
||||
} // namespace blender::realtime_compositor
|
|
@ -0,0 +1,87 @@
|
|||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include "BLI_map.hh"
|
||||
#include "BLI_math_vector_types.hh"
|
||||
|
||||
#include "COM_cached_resource.hh"
|
||||
|
||||
namespace blender::realtime_compositor {
|
||||
|
||||
class Context;
|
||||
|
||||
/* ------------------------------------------------------------------------------------------------
|
||||
* Deriche Gaussian Coefficients Key.
|
||||
*/
|
||||
class DericheGaussianCoefficientsKey {
|
||||
public:
|
||||
float sigma;
|
||||
|
||||
DericheGaussianCoefficientsKey(float sigma);
|
||||
|
||||
uint64_t hash() const;
|
||||
};
|
||||
|
||||
bool operator==(const DericheGaussianCoefficientsKey &a, const DericheGaussianCoefficientsKey &b);
|
||||
|
||||
/* -------------------------------------------------------------------------------------------------
|
||||
* Deriche Gaussian Coefficients.
|
||||
*
|
||||
* A caches resource that computes and caches the coefficients of the fourth order IIR filter
|
||||
* approximating a Gaussian filter computed using Deriche's design method. This is based on the
|
||||
* following paper:
|
||||
*
|
||||
* Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
|
||||
* 1993.
|
||||
*/
|
||||
class DericheGaussianCoefficients : public CachedResource {
|
||||
private:
|
||||
/* The d_ii coefficients in Equation (28) and (29). Those are the same for the causal and non
|
||||
* causal filters as can be seen in Equation (31). */
|
||||
double4 feedback_coefficients_;
|
||||
/* The n_ii^+ coefficients in Equation (28). */
|
||||
double4 causal_feedforward_coefficients_;
|
||||
/* The n_ii^- coefficients in Equation (29). */
|
||||
double4 non_causal_feedforward_coefficients_;
|
||||
/* The difference equation in Equation (28) rely on previous outputs to compute the new output,
|
||||
* and those previous outputs need to be properly initialized somehow. To do Neumann boundary
|
||||
* condition, we multiply the boundary value with this coefficient to simulate an infinite stream
|
||||
* of the boundary value. See the implementation for more information. */
|
||||
double causal_boundary_coefficient_;
|
||||
/* Same as causal_boundary_coefficient_ but for the non causal filter. */
|
||||
double non_causal_boundary_coefficient_;
|
||||
|
||||
public:
|
||||
DericheGaussianCoefficients(Context &context, float sigma);
|
||||
|
||||
const double4 &feedback_coefficients() const;
|
||||
const double4 &causal_feedforward_coefficients() const;
|
||||
const double4 &non_causal_feedforward_coefficients() const;
|
||||
double causal_boundary_coefficient() const;
|
||||
double non_causal_boundary_coefficient() const;
|
||||
};
|
||||
|
||||
/* ------------------------------------------------------------------------------------------------
|
||||
* Deriche Gaussian Coefficients Container.
|
||||
*/
|
||||
class DericheGaussianCoefficientsContainer : CachedResourceContainer {
|
||||
private:
|
||||
Map<DericheGaussianCoefficientsKey, std::unique_ptr<DericheGaussianCoefficients>> map_;
|
||||
|
||||
public:
|
||||
void reset() override;
|
||||
|
||||
/* Check if there is an available DericheGaussianCoefficients cached resource with the given
|
||||
* parameters in the container, if one exists, return it, otherwise, return a newly created one
|
||||
* and add it to the container. In both cases, tag the cached resource as needed to keep it
|
||||
* cached for the next evaluation. */
|
||||
DericheGaussianCoefficients &get(Context &context, float sigma);
|
||||
};
|
||||
|
||||
} // namespace blender::realtime_compositor
|
|
@ -0,0 +1,106 @@
|
|||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include "BLI_map.hh"
|
||||
#include "BLI_math_vector_types.hh"
|
||||
|
||||
#include "COM_cached_resource.hh"
|
||||
|
||||
namespace blender::realtime_compositor {
|
||||
|
||||
class Context;
|
||||
|
||||
/* ------------------------------------------------------------------------------------------------
|
||||
* Van Vliet Gaussian Coefficients Key.
|
||||
*/
|
||||
class VanVlietGaussianCoefficientsKey {
|
||||
public:
|
||||
float sigma;
|
||||
|
||||
VanVlietGaussianCoefficientsKey(float sigma);
|
||||
|
||||
uint64_t hash() const;
|
||||
};
|
||||
|
||||
bool operator==(const VanVlietGaussianCoefficientsKey &a,
|
||||
const VanVlietGaussianCoefficientsKey &b);
|
||||
|
||||
/* -------------------------------------------------------------------------------------------------
|
||||
* Van Vliet Gaussian Coefficients.
|
||||
*
|
||||
* A caches resource that computes and caches the coefficients of the fourth order IIR filter
|
||||
* approximating a Gaussian filter computed using Van Vliet's design method. This is based on the
|
||||
* following paper:
|
||||
*
|
||||
* Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
|
||||
* filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
|
||||
* 98EX170). Vol. 1. IEEE, 1998.
|
||||
*
|
||||
* However, to improve the numerical stability of the filter, it is decomposed into a bank of
|
||||
* two parallel second order IIR filters, each having a causal and a non causal filter. */
|
||||
class VanVlietGaussianCoefficients : public CachedResource {
|
||||
private:
|
||||
/* The causal and non causal feedforward coefficients for the first second order filter. */
|
||||
double2 first_causal_feedforward_coefficients_;
|
||||
double2 first_non_causal_feedforward_coefficients_;
|
||||
/* The feedback coefficients for the first second order filter. This is the same for both the
|
||||
* causal and non causal filters. */
|
||||
double2 first_feedback_coefficients_;
|
||||
|
||||
/* The causal and non causal feedforward coefficients for the second second order filter. */
|
||||
double2 second_causal_feedforward_coefficients_;
|
||||
double2 second_non_causal_feedforward_coefficients_;
|
||||
/* The feedback coefficients for the second second order filter. This is the same for both the
|
||||
* causal and non causal filters. */
|
||||
double2 second_feedback_coefficients_;
|
||||
|
||||
/* The difference equation of the IIR filter rely on previous outputs to compute the new output,
|
||||
* and those previous outputs need to be properly initialized somehow. To do Neumann boundary
|
||||
* condition, we multiply the boundary value with this coefficient to simulate an infinite stream
|
||||
* of the boundary value. See the implementation for more information. */
|
||||
double first_causal_boundary_coefficient_;
|
||||
double first_non_causal_boundary_coefficient_;
|
||||
double second_causal_boundary_coefficient_;
|
||||
double second_non_causal_boundary_coefficient_;
|
||||
|
||||
public:
|
||||
VanVlietGaussianCoefficients(Context &context, float sigma);
|
||||
|
||||
const double2 &first_causal_feedforward_coefficients() const;
|
||||
const double2 &first_non_causal_feedforward_coefficients() const;
|
||||
const double2 &first_feedback_coefficients() const;
|
||||
|
||||
const double2 &second_causal_feedforward_coefficients() const;
|
||||
const double2 &second_non_causal_feedforward_coefficients() const;
|
||||
const double2 &second_feedback_coefficients() const;
|
||||
|
||||
double first_causal_boundary_coefficient() const;
|
||||
double first_non_causal_boundary_coefficient() const;
|
||||
double second_causal_boundary_coefficient() const;
|
||||
double second_non_causal_boundary_coefficient() const;
|
||||
};
|
||||
|
||||
/* ------------------------------------------------------------------------------------------------
|
||||
* Van Vliet Gaussian Coefficients Container.
|
||||
*/
|
||||
class VanVlietGaussianCoefficientsContainer : CachedResourceContainer {
|
||||
private:
|
||||
Map<VanVlietGaussianCoefficientsKey, std::unique_ptr<VanVlietGaussianCoefficients>> map_;
|
||||
|
||||
public:
|
||||
void reset() override;
|
||||
|
||||
/* Check if there is an available VanVlietGaussianCoefficients cached resource with the given
|
||||
* parameters in the container, if one exists, return it, otherwise, return a newly created one
|
||||
* and add it to the container. In both cases, tag the cached resource as needed to keep it
|
||||
* cached for the next evaluation. */
|
||||
VanVlietGaussianCoefficients &get(Context &context, float sigma);
|
||||
};
|
||||
|
||||
} // namespace blender::realtime_compositor
|
|
@ -0,0 +1,319 @@
|
|||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
/* -------------------------------------------------------------------------------------------------
|
||||
* Deriche Gaussian Coefficients.
|
||||
*
|
||||
* Computes the coefficients of the fourth order IIR filter approximating a Gaussian filter
|
||||
* computed using Deriche's design method. This is based on the following paper:
|
||||
*
|
||||
* Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
|
||||
* 1993.
|
||||
*
|
||||
* But with corrections in the normalization scale from the following paper, as will be seen in the
|
||||
* implementation:
|
||||
*
|
||||
* Farneback, Gunnar, and Carl-Fredrik Westin. Improving Deriche-style recursive Gaussian
|
||||
* filters. Journal of Mathematical Imaging and Vision 26.3 (2006): 293-299.
|
||||
*
|
||||
* The Deriche filter is computed as the sum of a causal and a non causal sequence of second order
|
||||
* difference equations as can be seen in Equation (30) in Deriche's paper, and the target of this
|
||||
* class is to compute the feedback, causal feedforward, and non causal feedforward coefficients of
|
||||
* the filter. */
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include "BLI_hash.hh"
|
||||
#include "BLI_math_base.hh"
|
||||
#include "BLI_math_vector.hh"
|
||||
|
||||
#include "COM_context.hh"
|
||||
#include "COM_deriche_gaussian_coefficients.hh"
|
||||
|
||||
namespace blender::realtime_compositor {
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Deriche Gaussian Coefficients Key.
|
||||
*/
|
||||
|
||||
DericheGaussianCoefficientsKey::DericheGaussianCoefficientsKey(float sigma) : sigma(sigma) {}
|
||||
|
||||
uint64_t DericheGaussianCoefficientsKey::hash() const
|
||||
{
|
||||
return get_default_hash(sigma);
|
||||
}
|
||||
|
||||
bool operator==(const DericheGaussianCoefficientsKey &a, const DericheGaussianCoefficientsKey &b)
|
||||
{
|
||||
return a.sigma == b.sigma;
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------------------------------
|
||||
* Deriche Gaussian Coefficients.
|
||||
*/
|
||||
|
||||
/* The base constant coefficients computed using Deriche's method with 10 digits of precision.
|
||||
* Those are available in Deriche's paper by comparing Equations (19) and (38). */
|
||||
inline constexpr static double a0 = 1.6797292232361107;
|
||||
inline constexpr static double a1 = 3.7348298269103580;
|
||||
inline constexpr static double b0 = 1.7831906544515104;
|
||||
inline constexpr static double b1 = 1.7228297663338028;
|
||||
inline constexpr static double c0 = -0.6802783501806897;
|
||||
inline constexpr static double c1 = -0.2598300478959625;
|
||||
inline constexpr static double w0 = 0.6318113174569493;
|
||||
inline constexpr static double w1 = 1.9969276832487770;
|
||||
|
||||
/* Computes n00 in Equation (21) in Deriche's paper. */
|
||||
static double compute_numerator_0()
|
||||
{
|
||||
return a0 + c0;
|
||||
}
|
||||
|
||||
/* Computes n11 in Equation (21) in Deriche's paper. */
|
||||
static double compute_numerator_1(float sigma)
|
||||
{
|
||||
const double multiplier1 = math::exp(-b1 / sigma);
|
||||
const double term1 = c1 * math::sin(w1 / sigma) - (c0 + 2.0 * a0) * math::cos(w1 / sigma);
|
||||
|
||||
const double multiplier2 = math::exp(-b0 / sigma);
|
||||
const double term2 = a1 * math::sin(w0 / sigma) - (2.0 * c0 + a0) * math::cos(w0 / sigma);
|
||||
|
||||
return multiplier1 * term1 + multiplier2 * term2;
|
||||
}
|
||||
|
||||
/* Computes n22 in Equation (21) in Deriche's paper. */
|
||||
static double compute_numerator_2(float sigma)
|
||||
{
|
||||
const double multiplier1 = 2.0 * math::exp(-(b0 / sigma) - (b1 / sigma));
|
||||
const double term11 = (a0 + c0) * math::cos(w1 / sigma) * math::cos(w0 / sigma);
|
||||
const double term12 = math::cos(w1 / sigma) * a1 * math::sin(w0 / sigma);
|
||||
const double term13 = math::cos(w0 / sigma) * c1 * math::sin(w1 / sigma);
|
||||
const double term1 = term11 - term12 - term13;
|
||||
|
||||
const double term2 = c0 * math::exp(-2.0 * (b0 / sigma));
|
||||
const double term3 = a0 * math::exp(-2.0 * (b1 / sigma));
|
||||
|
||||
return multiplier1 * term1 + term2 + term3;
|
||||
}
|
||||
|
||||
/* Computes n33 in Equation (21) in Deriche's paper. */
|
||||
static double compute_numerator_3(float sigma)
|
||||
{
|
||||
const double multiplier1 = math::exp(-(b1 / sigma) - 2.0 * (b0 / sigma));
|
||||
const double term1 = c1 * math::sin(w1 / sigma) - math::cos(w1 / sigma) * c0;
|
||||
|
||||
const double multiplier2 = math::exp(-(b0 / sigma) - 2.0 * (b1 / sigma));
|
||||
const double term2 = a1 * math::sin(w0 / sigma) - math::cos(w0 / sigma) * a0;
|
||||
|
||||
return multiplier1 * term1 + multiplier2 * term2;
|
||||
}
|
||||
|
||||
/* Computes and packs the numerators in Equation (21) in Deriche's paper. */
|
||||
static double4 compute_numerator(float sigma)
|
||||
{
|
||||
const double n0 = compute_numerator_0();
|
||||
const double n1 = compute_numerator_1(sigma);
|
||||
const double n2 = compute_numerator_2(sigma);
|
||||
const double n3 = compute_numerator_3(sigma);
|
||||
|
||||
return double4(n0, n1, n2, n3);
|
||||
}
|
||||
|
||||
/* Computes d11 in Equation (22) in Deriche's paper. */
|
||||
static double compute_denominator_1(float sigma)
|
||||
{
|
||||
const double term1 = -2.0 * math::exp(-(b0 / sigma)) * math::cos(w0 / sigma);
|
||||
const double term2 = 2.0 * math::exp(-(b1 / sigma)) * math::cos(w1 / sigma);
|
||||
|
||||
return term1 - term2;
|
||||
}
|
||||
|
||||
/* Computes d22 in Equation (22) in Deriche's paper. */
|
||||
static double compute_denominator_2(float sigma)
|
||||
{
|
||||
const double term1 = 4.0 * math::cos(w1 / sigma) * math::cos(w0 / sigma);
|
||||
const double multiplier1 = math::exp(-(b0 / sigma) - (b1 / sigma));
|
||||
|
||||
const double term2 = math::exp(-2.0 * (b1 / sigma));
|
||||
const double term3 = math::exp(-2.0 * (b0 / sigma));
|
||||
|
||||
return term1 * multiplier1 + term2 + term3;
|
||||
}
|
||||
|
||||
/* Computes d33 in Equation (22) in Deriche's paper. */
|
||||
static double compute_denominator_3(float sigma)
|
||||
{
|
||||
const double term1 = -2.0 * math::cos(w0 / sigma);
|
||||
const double multiplier1 = math::exp(-(b0 / sigma) - 2.0 * (b1 / sigma));
|
||||
|
||||
const double term2 = 2.0 * math::cos(w1 / sigma);
|
||||
const double multiplier2 = math::exp(-(b1 / sigma) - 2.0 * (b0 / sigma));
|
||||
|
||||
return term1 * multiplier1 - term2 * multiplier2;
|
||||
}
|
||||
|
||||
/* Computes d44 in Equation (22) in Deriche's paper. */
|
||||
static double compute_denominator_4(float sigma)
|
||||
{
|
||||
return math::exp(-2.0 * (b0 / sigma) - 2.0 * (b1 / sigma));
|
||||
}
|
||||
|
||||
/* Computes and packs the denominators in Equation (22) in Deriche's paper. */
|
||||
static double4 compute_denominator(float sigma)
|
||||
{
|
||||
const double d1 = compute_denominator_1(sigma);
|
||||
const double d2 = compute_denominator_2(sigma);
|
||||
const double d3 = compute_denominator_3(sigma);
|
||||
const double d4 = compute_denominator_4(sigma);
|
||||
|
||||
return double4(d1, d2, d3, d4);
|
||||
}
|
||||
|
||||
/* Computes the normalization scale that the feedforward coefficients should be divided by to
|
||||
* match the unit integral of the Gaussian. The scaling factor proposed by Deriche's paper in
|
||||
* Equation (50) is wrong due to missing terms. A correct scaling factor is presented in
|
||||
* Farneback's paper in Equation (25), which is implemented in this method. */
|
||||
static float compute_normalization_scale(const double4 &causal_feedforward_coefficients,
|
||||
const double4 &feedback_coefficients)
|
||||
{
|
||||
const double causal_feedforwad_sum = math::reduce_add(causal_feedforward_coefficients);
|
||||
const double feedback_sum = 1.0 + math::reduce_add(feedback_coefficients);
|
||||
return 2.0 * (causal_feedforwad_sum / feedback_sum) - causal_feedforward_coefficients[0];
|
||||
}
|
||||
|
||||
/* Computes the non causal feedforward coefficients from the feedback and causal feedforward
|
||||
* coefficients based on Equation (31) in Deriche's paper. Notice that the equation is linear, so
|
||||
* the coefficients can be computed after the normalization of the causal feedforward
|
||||
* coefficients. */
|
||||
static double4 compute_non_causal_feedforward_coefficients(
|
||||
const double4 &causal_feedforward_coefficients, const double4 &feedback_coefficients)
|
||||
{
|
||||
const double n1 = causal_feedforward_coefficients[1] -
|
||||
feedback_coefficients[0] * causal_feedforward_coefficients[0];
|
||||
const double n2 = causal_feedforward_coefficients[2] -
|
||||
feedback_coefficients[1] * causal_feedforward_coefficients[0];
|
||||
const double n3 = causal_feedforward_coefficients[3] -
|
||||
feedback_coefficients[2] * causal_feedforward_coefficients[0];
|
||||
const double n4 = -feedback_coefficients[3] * causal_feedforward_coefficients[0];
|
||||
|
||||
return double4(n1, n2, n3, n4);
|
||||
}
|
||||
|
||||
/* The IIR filter difference equation relies on previous outputs to compute new outputs, those
|
||||
* previous outputs are not really defined at the start of the filter. To do Neumann boundary
|
||||
* condition, we initialize the previous output with a special value that is a function of the
|
||||
* boundary value. This special value is computed by multiply the boundary value with a coefficient
|
||||
* to simulate an infinite stream of the boundary value.
|
||||
*
|
||||
* The function for the coefficient can be derived by substituting the boundary value for previous
|
||||
* inputs, equating all current and previous outputs to the same value, and finally rearranging to
|
||||
* compute that same output value.
|
||||
*
|
||||
* Start by the difference equation where b_i are the feedforward coefficients and a_i are the
|
||||
* feedback coefficients:
|
||||
*
|
||||
* y[n] = \sum_{i = 0}^3 b_i x[n - i] - \sum_{i = 0}^3 a_i y[n - i]
|
||||
*
|
||||
* Assume all outputs are y and all inputs are x, which is the boundary value:
|
||||
*
|
||||
* y = \sum_{i = 0}^3 b_i x - \sum_{i = 0}^3 a_i y
|
||||
*
|
||||
* Now rearrange to compute y:
|
||||
*
|
||||
* y = x \sum_{i = 0}^3 b_i - y \sum_{i = 0}^3 a_i
|
||||
* y + y \sum_{i = 0}^3 a_i = x \sum_{i = 0}^3 b_i
|
||||
* y (1 + \sum_{i = 0}^3 a_i) = x \sum_{i = 0}^3 b_i
|
||||
* y = x \cdot \frac{\sum_{i = 0}^3 b_i}{1 + \sum_{i = 0}^3 a_i}
|
||||
*
|
||||
* So our coefficient is the value that is multiplied by the boundary value x. Had x been zero,
|
||||
* that is, we are doing Dirichlet boundary condition, the equations still hold. */
|
||||
static double compute_boundary_coefficient(const double4 &feedforward_coefficients,
|
||||
const double4 &feedback_coefficients)
|
||||
{
|
||||
return math::reduce_add(feedforward_coefficients) /
|
||||
(1.0 + math::reduce_add(feedback_coefficients));
|
||||
}
|
||||
|
||||
/* Computes the feedback, causal feedforward, and non causal feedforward coefficients given a
|
||||
* target Gaussian sigma value as used in Equations (28) and (29) in Deriche's paper. */
|
||||
DericheGaussianCoefficients::DericheGaussianCoefficients(Context & /*context*/, float sigma)
|
||||
{
|
||||
/* The numerator coefficients are the causal feedforward coefficients and the denominator
|
||||
* coefficients are the feedback coefficients as can be seen in Equation (28). */
|
||||
causal_feedforward_coefficients_ = compute_numerator(sigma);
|
||||
feedback_coefficients_ = compute_denominator(sigma);
|
||||
|
||||
/* Normalize the feedforward coefficients as discussed in Section "5.4 Normalization" in
|
||||
* Deriche's paper. Feedback coefficients do not need normalization. */
|
||||
causal_feedforward_coefficients_ /= compute_normalization_scale(causal_feedforward_coefficients_,
|
||||
feedback_coefficients_);
|
||||
|
||||
/* Compute the non causal feedforward coefficients from the feedback and normalized causal
|
||||
* feedforward coefficients based on Equation (31) from Deriche's paper. Since the causal
|
||||
* coefficients are already normalized, this doesn't need normalization. */
|
||||
non_causal_feedforward_coefficients_ = compute_non_causal_feedforward_coefficients(
|
||||
causal_feedforward_coefficients_, feedback_coefficients_);
|
||||
|
||||
/* Compute the boundary coefficient for both the causal and non causal filters. */
|
||||
causal_boundary_coefficient_ = compute_boundary_coefficient(causal_feedforward_coefficients_,
|
||||
feedback_coefficients_);
|
||||
non_causal_boundary_coefficient_ = compute_boundary_coefficient(
|
||||
non_causal_feedforward_coefficients_, feedback_coefficients_);
|
||||
}
|
||||
|
||||
const double4 &DericheGaussianCoefficients::feedback_coefficients() const
|
||||
{
|
||||
return feedback_coefficients_;
|
||||
}
|
||||
|
||||
const double4 &DericheGaussianCoefficients::causal_feedforward_coefficients() const
|
||||
{
|
||||
return causal_feedforward_coefficients_;
|
||||
}
|
||||
|
||||
const double4 &DericheGaussianCoefficients::non_causal_feedforward_coefficients() const
|
||||
{
|
||||
return non_causal_feedforward_coefficients_;
|
||||
}
|
||||
|
||||
double DericheGaussianCoefficients::causal_boundary_coefficient() const
|
||||
{
|
||||
return causal_boundary_coefficient_;
|
||||
}
|
||||
|
||||
double DericheGaussianCoefficients::non_causal_boundary_coefficient() const
|
||||
{
|
||||
return non_causal_boundary_coefficient_;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Deriche Gaussian Coefficients Container.
|
||||
*/
|
||||
|
||||
void DericheGaussianCoefficientsContainer::reset()
|
||||
{
|
||||
/* First, delete all resources that are no longer needed. */
|
||||
map_.remove_if([](auto item) { return !item.value->needed; });
|
||||
|
||||
/* Second, reset the needed status of the remaining resources to false to ready them to track
|
||||
* their needed status for the next evaluation. */
|
||||
for (auto &value : map_.values()) {
|
||||
value->needed = false;
|
||||
}
|
||||
}
|
||||
|
||||
DericheGaussianCoefficients &DericheGaussianCoefficientsContainer::get(Context &context,
|
||||
float sigma)
|
||||
{
|
||||
const DericheGaussianCoefficientsKey key(sigma);
|
||||
|
||||
auto &deriche_gaussian_coefficients = *map_.lookup_or_add_cb(
|
||||
key, [&]() { return std::make_unique<DericheGaussianCoefficients>(context, sigma); });
|
||||
|
||||
deriche_gaussian_coefficients.needed = true;
|
||||
return deriche_gaussian_coefficients;
|
||||
}
|
||||
|
||||
} // namespace blender::realtime_compositor
|
|
@ -0,0 +1,536 @@
|
|||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
/* -------------------------------------------------------------------------------------------------
|
||||
* Van Vliet Gaussian Coefficients.
|
||||
*
|
||||
* Computes the coefficients of the fourth order IIR filter approximating a Gaussian filter
|
||||
* computed using Van Vliet's design method. This is based on the following paper:
|
||||
*
|
||||
* Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
|
||||
* filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
|
||||
* 98EX170). Vol. 1. IEEE, 1998.
|
||||
*
|
||||
*
|
||||
* The filter is computed as the cascade of a causal and a non causal sequences of second order
|
||||
* difference equations as can be seen in Equation (11) in Van Vliet's paper. The coefficients are
|
||||
* the same for both the causal and non causal sequences.
|
||||
*
|
||||
* However, to improve its numerical stability, we decompose the 4th order filter into a parallel
|
||||
* bank of second order filers using the methods of partial fractions as demonstrated in the follow
|
||||
* book:
|
||||
*
|
||||
* Oppenheim, Alan V. Discrete-time signal processing. Pearson Education India, 1999.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
#include <complex>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include "BLI_assert.h"
|
||||
#include "BLI_hash.hh"
|
||||
#include "BLI_math_base.hh"
|
||||
#include "BLI_math_vector.hh"
|
||||
|
||||
#include "COM_context.hh"
|
||||
#include "COM_van_vliet_gaussian_coefficients.hh"
|
||||
|
||||
namespace blender::realtime_compositor {
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Van Vliet Gaussian Coefficients Key.
|
||||
*/
|
||||
|
||||
VanVlietGaussianCoefficientsKey::VanVlietGaussianCoefficientsKey(float sigma) : sigma(sigma) {}
|
||||
|
||||
uint64_t VanVlietGaussianCoefficientsKey::hash() const
|
||||
{
|
||||
return get_default_hash(sigma);
|
||||
}
|
||||
|
||||
bool operator==(const VanVlietGaussianCoefficientsKey &a, const VanVlietGaussianCoefficientsKey &b)
|
||||
{
|
||||
return a.sigma == b.sigma;
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------------------------------
|
||||
* Van Vliet Gaussian Coefficients.
|
||||
*/
|
||||
|
||||
/* Computes the variance of the Gaussian filter represented by the given poles scaled by the given
|
||||
* scale factor. This is based on Equation (20) in Van Vliet's paper. */
|
||||
static double compute_scaled_poles_variance(const std::array<std::complex<double>, 4> &poles,
|
||||
double scale_factor)
|
||||
{
|
||||
std::complex<double> variance = std::complex<double>(0.0, 0.0);
|
||||
for (const std::complex<double> &pole : poles) {
|
||||
const double magnitude = std::pow(std::abs(pole), 1.0 / scale_factor);
|
||||
const double phase = std::arg(pole) / scale_factor;
|
||||
const std::complex<double> multiplier1 = std::polar(magnitude, phase);
|
||||
const std::complex<double> multiplier2 = std::pow(magnitude - std::polar(1.0, phase), -2.0);
|
||||
variance += 2.0 * multiplier1 * multiplier2;
|
||||
}
|
||||
|
||||
/* The variance is actually real valued as guaranteed by Equations (10) and (2) since the poles
|
||||
* are complex conjugate pairs. See Section 3.3 of the paper. */
|
||||
return variance.real();
|
||||
}
|
||||
|
||||
/* Computes the partial derivative with respect to the scale factor at the given scale factor of
|
||||
* the variance of the Gaussian filter represented by the given poles scaled by the given scale
|
||||
* factor. This is based on the partial derivative with respect to the scale factor of Equation
|
||||
* (20) in Van Vliet's paper.
|
||||
*
|
||||
* The derivative is not listed in the paper, but was computed manually as the sum of the following
|
||||
* for each of the poles:
|
||||
*
|
||||
* \frac{
|
||||
* 2a^\frac{1}{x}e^\frac{ib}{x} (e^\frac{ib}{x}+a^\frac{1}{x}) (\ln(a)-ib)
|
||||
* }{
|
||||
* x^2 (a^\frac{1}{x}-e^\frac{ib}{x})^3
|
||||
* }
|
||||
*
|
||||
* Where "x" is the scale factor, "a" is the magnitude of the pole, and "b" is its phase. */
|
||||
static double compute_scaled_poles_variance_derivative(
|
||||
const std::array<std::complex<double>, 4> &poles, double scale_factor)
|
||||
{
|
||||
std::complex<double> variance_derivative = std::complex<double>(0.0, 0.0);
|
||||
for (const std::complex<double> &pole : poles) {
|
||||
const double magnitude = std::pow(std::abs(pole), 1.0 / scale_factor);
|
||||
const double phase = std::arg(pole) / scale_factor;
|
||||
|
||||
const std::complex<double> multiplier1 = std::polar(magnitude, phase);
|
||||
const std::complex<double> multiplier2 = magnitude + std::polar(1.0, phase);
|
||||
const std::complex<double> multiplier3 = std::log(std::abs(pole)) -
|
||||
std::complex<double>(0.0, std::arg(pole));
|
||||
|
||||
const std::complex<double> divisor1 = std::pow(magnitude - std::polar(1.0, phase), 3.0);
|
||||
const std::complex<double> divisor2 = math::square(scale_factor);
|
||||
|
||||
variance_derivative += 2.0 * multiplier1 * multiplier2 * multiplier3 / (divisor1 * divisor2);
|
||||
}
|
||||
|
||||
/* The variance derivative is actually real valued as guaranteed by Equations (10) and (2) since
|
||||
* the poles are complex conjugate pairs. See Section 3.3 of the paper. */
|
||||
return variance_derivative.real();
|
||||
}
|
||||
|
||||
/* The poles were computed for a Gaussian filter with a sigma value of 2, in order to generalize
|
||||
* that for any sigma value, we need to scale the poles by a certain scaling factor as described in
|
||||
* Section 4.2 of Van Vliet's paper. To find the scaling factor, we start from an initial guess of
|
||||
* half sigma, then iteratively improve the guess using Newton's method by computing the variance
|
||||
* and its derivative based on Equation (20). */
|
||||
static double find_scale_factor(const std::array<std::complex<double>, 4> &poles,
|
||||
float reference_sigma)
|
||||
{
|
||||
const double reference_variance = math::square(reference_sigma);
|
||||
|
||||
/* Note that the poles were computed for a Gaussian filter with a sigma value of 2, so it it
|
||||
* as if we have a base scale of 2, and we start with half sigma as an initial guess. See
|
||||
* Section 4.2 for more information */
|
||||
double scale_factor = reference_sigma / 2.0;
|
||||
|
||||
const int maximum_interations = 10;
|
||||
for (int i = 0; i < maximum_interations; i++) {
|
||||
const double variance = compute_scaled_poles_variance(poles, scale_factor);
|
||||
|
||||
/* Close enough, we have found our scale factor. */
|
||||
if (math::abs(reference_variance - variance) < 1.0e-8) {
|
||||
return scale_factor;
|
||||
}
|
||||
|
||||
/* Improve guess using Newton's method. Notice that Newton's method is a root finding method,
|
||||
* so we supply the difference to the reference variance as our function, since the zero point
|
||||
* will be when the variance is equal to the reference one. The derivative is not affected
|
||||
* since the reference variance is a constant. */
|
||||
const double derivative = compute_scaled_poles_variance_derivative(poles, scale_factor);
|
||||
scale_factor -= (variance - reference_variance) / derivative;
|
||||
}
|
||||
|
||||
/* The paper mentions that only a few iterations are needed, so if we didn't converge after
|
||||
* maximum_interations, something is probably wrong. */
|
||||
BLI_assert_unreachable();
|
||||
return scale_factor;
|
||||
}
|
||||
|
||||
/* The poles were computed for a Gaussian filter with a sigma value of 2, so scale them using
|
||||
* Equation (19) in Van Vliet's paper to have the given sigma value. This involves finding the
|
||||
* appropriate scale factor based on Equation (20), see Section 4.2 and the find_scale_factor
|
||||
* method for more information. */
|
||||
static std::array<std::complex<double>, 4> computed_scaled_poles(
|
||||
const std::array<std::complex<double>, 4> &poles, float sigma)
|
||||
{
|
||||
const double scale_factor = find_scale_factor(poles, sigma);
|
||||
|
||||
std::array<std::complex<double>, 4> scaled_poles;
|
||||
for (int i = 0; i < poles.size(); i++) {
|
||||
const std::complex<double> &pole = poles[i];
|
||||
const double magnitude = std::pow(std::abs(pole), 1.0 / scale_factor);
|
||||
const double phase = std::arg(pole) / scale_factor;
|
||||
scaled_poles[i] = std::polar(magnitude, phase);
|
||||
}
|
||||
|
||||
return scaled_poles;
|
||||
}
|
||||
|
||||
/* Compute the causal poles from the non causal ones. Since the Gaussian is a real even function,
|
||||
* the causal poles are just the inverse of the non causal poles, as noted in Equation (2) in Van
|
||||
* Vliet's paper. */
|
||||
static std::array<std::complex<double>, 4> compute_causal_poles(
|
||||
const std::array<std::complex<double>, 4> &non_causal_poles)
|
||||
{
|
||||
std::array<std::complex<double>, 4> causal_poles;
|
||||
for (int i = 0; i < non_causal_poles.size(); i++) {
|
||||
causal_poles[i] = 1.0 / non_causal_poles[i];
|
||||
}
|
||||
|
||||
return causal_poles;
|
||||
}
|
||||
|
||||
/* Computes the feedback coefficients from the given poles based on the equations in Equation (13)
|
||||
* in Van Vliet's paper. See Section 3.2 for more information. */
|
||||
static double4 compute_feedback_coefficients(const std::array<std::complex<double>, 4> &poles)
|
||||
{
|
||||
/* Compute the gain of the poles, which is the "b" at the end of Equation (13). */
|
||||
std::complex<double> gain = std::complex<double>(1.0, 0.0);
|
||||
for (const std::complex<double> &pole : poles) {
|
||||
gain /= pole;
|
||||
}
|
||||
|
||||
/* Compute the coefficients b4, b3, b2, and b1 based on the expressions b_N, b_N-1, b_N-2, and
|
||||
* b_N-3 respectively in Equation (13). b4 and b3 are trivial, while b2 and b1 can be computed by
|
||||
* drawing the following summation trees, where each path from the root to the leaf is multiplied
|
||||
* and added:
|
||||
*
|
||||
* b2
|
||||
* ____/|\____
|
||||
* / | \
|
||||
* i --> 2 3 4
|
||||
* | / \ /|\
|
||||
* j --> 1 1 2 1 2 3
|
||||
*
|
||||
* b1
|
||||
* ___/ \___
|
||||
* / \
|
||||
* i --> 3 4
|
||||
* | / \
|
||||
* j --> 2 2 3
|
||||
* | | / \
|
||||
* k --> 1 1 1 2
|
||||
*
|
||||
* Notice that the values of i, j, and k are 1-index, so we need to subtract one when accessing
|
||||
* the poles. */
|
||||
const std::complex<double> b4 = gain;
|
||||
const std::complex<double> b3 = -gain * (poles[0] + poles[1] + poles[2] + poles[3]);
|
||||
const std::complex<double> b2 = gain * (poles[1] * poles[0] + poles[2] * poles[0] +
|
||||
poles[2] * poles[1] + poles[3] * poles[0] +
|
||||
poles[3] * poles[1] + poles[3] * poles[2]);
|
||||
const std::complex<double> b1 = -gain * (poles[2] * poles[1] * poles[0] +
|
||||
poles[3] * poles[1] * poles[0] +
|
||||
poles[3] * poles[2] * poles[0] +
|
||||
poles[3] * poles[2] * poles[1]);
|
||||
|
||||
/* The coefficients are actually real valued as guaranteed by Equations (10) and (2) since
|
||||
* the poles are complex conjugate pairs. See Section 3.3 of the paper. */
|
||||
const double4 coefficients = double4(b1.real(), b2.real(), b3.real(), b4.real());
|
||||
|
||||
return coefficients;
|
||||
}
|
||||
|
||||
/* Computes the feedforward coefficient from the feedback coefficients based on Equation (12) of
|
||||
* Van Vliet's paper. See Section 3.2 for more information. */
|
||||
static double compute_feedforward_coefficient(const double4 &feedback_coefficients)
|
||||
{
|
||||
return 1.0 + math::reduce_add(feedback_coefficients);
|
||||
}
|
||||
|
||||
/* Computes the residue of the partial fraction of the transfer function of the given causal poles
|
||||
* and gain for the given target pole. This essentially evaluates Equation (3.41) in Oppenheim's
|
||||
* book, where d_k is the target pole and assuming the transfer function is in the form given in
|
||||
* Equation (3.39), where d_k are the poles. See the following derivation for the gain value.
|
||||
*
|
||||
* For the particular case of the Van Vliet's system, there are no zeros, so the numerator in
|
||||
* Equation (3.39) is one. Further note that Van Vliet's formulation is different from the expected
|
||||
* form, so we need to rearrange Equation (3) in to match the form in Equation (3.39), which is
|
||||
* shown below.
|
||||
*
|
||||
* Start from the causal term of Equation (3):
|
||||
*
|
||||
* H_+(z) = \prod_{i=1}^N \frac{d_i - 1}{d_i - z^{-1}}
|
||||
*
|
||||
* Divide by d_i:
|
||||
*
|
||||
* H_+(z) = \prod_{i=1}^N \frac{1 - d_i^{-1}}{1 - d_i^{-1}z^{-1}}
|
||||
*
|
||||
* Move the numerator to its own product:
|
||||
*
|
||||
* H_+(z) = \prod_{i=1}^N 1 - d_i^{-1} \prod_{i=1}^N \frac{1}{1 - d_i^{-1}z^{-1}}
|
||||
*
|
||||
* And we reach the same form as Equation (3.39). Where the first product term is b0 / a0 and is
|
||||
* also the given gain value, which is also the same as the feedforward coefficient denoted by
|
||||
* the alpha in Equation (12). Further d_i^{-1} in our derivation is the same as d_k in Equation
|
||||
* (3.39), the discrepancy in the inverse operator is the fact that Van Vliet's derivation assume
|
||||
* non causal poles, while Oppenheim's assume causal poles, which are inverse of each other as can
|
||||
* be seen in the compute_causal_poles function. */
|
||||
static std::complex<double> compute_partial_fraction_residue(
|
||||
const std::array<std::complex<double>, 4> &poles,
|
||||
const std::complex<double> &target_pole,
|
||||
double gain)
|
||||
{
|
||||
/* Evaluating Equation (3.41) actually corresponds to omitting the terms in Equation (3.39) that
|
||||
* corresponds to the target pole or its conjugate, because they get canceled by the first term
|
||||
* in Equation (3.41). That's we are essentially evaluating the limit as the expression tends to
|
||||
* the target pole. */
|
||||
std::complex<double> target_pole_inverse = 1.0 / target_pole;
|
||||
std::complex<double> residue = std::complex<double>(1.0, 0.0);
|
||||
for (const std::complex<double> &pole : poles) {
|
||||
if (pole != target_pole && pole != std::conj(target_pole)) {
|
||||
residue *= 1.0 - pole * target_pole_inverse;
|
||||
}
|
||||
}
|
||||
|
||||
/* Remember that the gain is the b0 / a0 expression in Equation (3.39). */
|
||||
return gain / residue;
|
||||
}
|
||||
|
||||
/* Evaluates the causal transfer function at the reciprocal of the given pole, which will be the
|
||||
* non causal pole if the given pole is a causal one, as discussed in the compute_causal_poles
|
||||
* function. The causal transfer function is given in Equation (3) in Van Vliet's paper, but we
|
||||
* compute it in the form derived in the description of the compute_partial_fraction_residue
|
||||
* function, also see the aforementioned function for the gain value. */
|
||||
static std::complex<double> compute_causal_transfer_function_at_non_causal_pole(
|
||||
const std::array<std::complex<double>, 4> &poles,
|
||||
const std::complex<double> &target_pole,
|
||||
double gain)
|
||||
{
|
||||
std::complex<double> result = std::complex<double>(1.0, 0.0);
|
||||
for (const std::complex<double> &pole : poles) {
|
||||
result *= 1.0 - pole * target_pole;
|
||||
}
|
||||
|
||||
return gain / result;
|
||||
}
|
||||
|
||||
/* Combine each pole and its conjugate counterpart into a second order section and assign its
|
||||
* coefficients to the given coefficients value. The residue of the pole and its transfer value in
|
||||
* the partial fraction of its transfer function are given.
|
||||
*
|
||||
* TODO: Properly document this function and prove its equations. */
|
||||
static void compute_second_order_section(const std::complex<double> &pole,
|
||||
const std::complex<double> &residue,
|
||||
const std::complex<double> &transfer_value,
|
||||
double2 &r_feedback_coefficients,
|
||||
double2 &r_causal_feedforward_coefficients,
|
||||
double2 &r_non_causal_feedforward_coefficients)
|
||||
{
|
||||
const std::complex<double> parallel_residue = residue * transfer_value;
|
||||
const std::complex<double> pole_inverse = 1.0 / pole;
|
||||
|
||||
r_feedback_coefficients = double2(-2.0 * pole.real(), std::norm(pole));
|
||||
|
||||
const double causal_feedforward_1 = parallel_residue.imag() / pole_inverse.imag();
|
||||
const double causal_feedforward_0 = parallel_residue.real() -
|
||||
causal_feedforward_1 * pole_inverse.real();
|
||||
r_causal_feedforward_coefficients = double2(causal_feedforward_0, causal_feedforward_1);
|
||||
|
||||
const double non_causal_feedforward_1 = causal_feedforward_1 -
|
||||
causal_feedforward_0 * r_feedback_coefficients[0];
|
||||
const double non_causal_feedforward_2 = -causal_feedforward_0 * r_feedback_coefficients[1];
|
||||
r_non_causal_feedforward_coefficients = double2(non_causal_feedforward_1,
|
||||
non_causal_feedforward_2);
|
||||
}
|
||||
|
||||
/* The IIR filter difference equation relies on previous outputs to compute new outputs, those
|
||||
* previous outputs are not really defined at the start of the filter. To do Neumann boundary
|
||||
* condition, we initialize the previous output with a special value that is a function of the
|
||||
* boundary value. This special value is computed by multiply the boundary value with a coefficient
|
||||
* to simulate an infinite stream of the boundary value.
|
||||
*
|
||||
* The function for the coefficient can be derived by substituting the boundary value for previous
|
||||
* inputs, equating all current and previous outputs to the same value, and finally rearranging to
|
||||
* compute that same output value.
|
||||
*
|
||||
* Start by the difference equation where b_i are the feedforward coefficients and a_i are the
|
||||
* feedback coefficients:
|
||||
*
|
||||
* y[n] = \sum_{i = 0}^3 b_i x[n - i] - \sum_{i = 0}^3 a_i y[n - i]
|
||||
*
|
||||
* Assume all outputs are y and all inputs are x, which is the boundary value:
|
||||
*
|
||||
* y = \sum_{i = 0}^3 b_i x - \sum_{i = 0}^3 a_i y
|
||||
*
|
||||
* Now rearrange to compute y:
|
||||
*
|
||||
* y = x \sum_{i = 0}^3 b_i - y \sum_{i = 0}^3 a_i
|
||||
* y + y \sum_{i = 0}^3 a_i = x \sum_{i = 0}^3 b_i
|
||||
* y (1 + \sum_{i = 0}^3 a_i) = x \sum_{i = 0}^3 b_i
|
||||
* y = x \cdot \frac{\sum_{i = 0}^3 b_i}{1 + \sum_{i = 0}^3 a_i}
|
||||
*
|
||||
* So our coefficient is the value that is multiplied by the boundary value x. Had x been zero,
|
||||
* that is, we are doing Dirichlet boundary condition, the equations still hold. */
|
||||
static double compute_boundary_coefficient(const double2 &feedback_coefficients,
|
||||
const double2 &feedforward_coefficients)
|
||||
{
|
||||
return math::reduce_add(feedforward_coefficients) /
|
||||
(1.0 + math::reduce_add(feedback_coefficients));
|
||||
}
|
||||
|
||||
/* Computes the feedback and feedforward coefficients for the 4th order Van Vliet Gaussian filter
|
||||
* given a target Gaussian sigma value. We first scale the poles of the filter to match the sigma
|
||||
* value based on the method described in Section 4.2 of Van Vliet's paper, then we compute the
|
||||
* coefficients from the scaled poles based on Equations (12) and (13). */
|
||||
VanVlietGaussianCoefficients::VanVlietGaussianCoefficients(Context & /*context*/, float sigma)
|
||||
{
|
||||
|
||||
/* The 4th order (N=4) poles for the Gaussian filter of a sigma of 2 computed by minimizing the
|
||||
* maximum error (L-infinity) to true Gaussian as provided in Van Vliet's paper Table (1) fourth
|
||||
* column. Notice that the second and fourth poles are the complex conjugates of the first and
|
||||
* third poles respectively as noted in the table description. */
|
||||
const std::array<std::complex<double>, 4> poles = {
|
||||
std::complex<double>(1.12075, 1.27788),
|
||||
std::complex<double>(1.12075, -1.27788),
|
||||
std::complex<double>(1.76952, 0.46611),
|
||||
std::complex<double>(1.76952, -0.46611),
|
||||
};
|
||||
|
||||
const std::array<std::complex<double>, 4> scaled_poles = computed_scaled_poles(poles, sigma);
|
||||
|
||||
/* The given poles are actually the non causal poles, since they are outside of the unit circle,
|
||||
* as demonstrated in Section 3.4 of Van Vliet's paper. And we compute the causal poles from
|
||||
* those. */
|
||||
const std::array<std::complex<double>, 4> non_causal_poles = scaled_poles;
|
||||
const std::array<std::complex<double>, 4> causal_poles = compute_causal_poles(non_causal_poles);
|
||||
|
||||
/* Compute the feedforward and feedback coefficients, noting that those are functions of the non
|
||||
* causal poles. */
|
||||
const double4 feedback_coefficients = compute_feedback_coefficients(non_causal_poles);
|
||||
const double feedforward_coefficient = compute_feedforward_coefficient(feedback_coefficients);
|
||||
|
||||
/* We only compute the residue for two of the causal poles, since the other two are complex
|
||||
* conjugates of those two, and their residues will also be the complex conjugate of their
|
||||
* respective counterpart. The gain is the feedforward coefficient as discussed in the function
|
||||
* description. */
|
||||
const std::complex<double> first_residue = compute_partial_fraction_residue(
|
||||
causal_poles, causal_poles[0], feedforward_coefficient);
|
||||
const std::complex<double> second_residue = compute_partial_fraction_residue(
|
||||
causal_poles, causal_poles[2], feedforward_coefficient);
|
||||
|
||||
/* We only compute the transfer value of for two of the non causal poles, since the other two are
|
||||
* complex conjugates of those two, and their transfer values will also be the complex conjugate
|
||||
* of their respective counterpart. The gain is the feedforward coefficient as discussed in the
|
||||
* function description. */
|
||||
const std::complex<double> first_transfer_value =
|
||||
compute_causal_transfer_function_at_non_causal_pole(
|
||||
causal_poles, causal_poles[0], feedforward_coefficient);
|
||||
const std::complex<double> second_transfer_value =
|
||||
compute_causal_transfer_function_at_non_causal_pole(
|
||||
causal_poles, causal_poles[2], feedforward_coefficient);
|
||||
|
||||
/* Combine each pole and its conjugate counterpart into a second order section and assign its
|
||||
* coefficients. */
|
||||
compute_second_order_section(causal_poles[0],
|
||||
first_residue,
|
||||
first_transfer_value,
|
||||
first_feedback_coefficients_,
|
||||
first_causal_feedforward_coefficients_,
|
||||
first_non_causal_feedforward_coefficients_);
|
||||
compute_second_order_section(causal_poles[2],
|
||||
second_residue,
|
||||
second_transfer_value,
|
||||
second_feedback_coefficients_,
|
||||
second_causal_feedforward_coefficients_,
|
||||
second_non_causal_feedforward_coefficients_);
|
||||
|
||||
/* Compute the boundary coefficients for all four of second order sections. */
|
||||
first_causal_boundary_coefficient_ = compute_boundary_coefficient(
|
||||
first_feedback_coefficients_, first_causal_feedforward_coefficients_);
|
||||
first_non_causal_boundary_coefficient_ = compute_boundary_coefficient(
|
||||
first_feedback_coefficients_, first_non_causal_feedforward_coefficients_);
|
||||
second_causal_boundary_coefficient_ = compute_boundary_coefficient(
|
||||
second_feedback_coefficients_, second_causal_feedforward_coefficients_);
|
||||
second_non_causal_boundary_coefficient_ = compute_boundary_coefficient(
|
||||
second_feedback_coefficients_, second_non_causal_feedforward_coefficients_);
|
||||
}
|
||||
|
||||
const double2 &VanVlietGaussianCoefficients::first_causal_feedforward_coefficients() const
|
||||
{
|
||||
return first_causal_feedforward_coefficients_;
|
||||
}
|
||||
|
||||
const double2 &VanVlietGaussianCoefficients::first_non_causal_feedforward_coefficients() const
|
||||
{
|
||||
return first_non_causal_feedforward_coefficients_;
|
||||
}
|
||||
|
||||
const double2 &VanVlietGaussianCoefficients::first_feedback_coefficients() const
|
||||
{
|
||||
return first_feedback_coefficients_;
|
||||
}
|
||||
|
||||
const double2 &VanVlietGaussianCoefficients::second_causal_feedforward_coefficients() const
|
||||
{
|
||||
return second_causal_feedforward_coefficients_;
|
||||
}
|
||||
|
||||
const double2 &VanVlietGaussianCoefficients::second_non_causal_feedforward_coefficients() const
|
||||
{
|
||||
return second_non_causal_feedforward_coefficients_;
|
||||
}
|
||||
|
||||
const double2 &VanVlietGaussianCoefficients::second_feedback_coefficients() const
|
||||
{
|
||||
return second_feedback_coefficients_;
|
||||
}
|
||||
|
||||
double VanVlietGaussianCoefficients::first_causal_boundary_coefficient() const
|
||||
{
|
||||
return first_causal_boundary_coefficient_;
|
||||
}
|
||||
|
||||
double VanVlietGaussianCoefficients::first_non_causal_boundary_coefficient() const
|
||||
{
|
||||
return first_non_causal_boundary_coefficient_;
|
||||
}
|
||||
|
||||
double VanVlietGaussianCoefficients::second_causal_boundary_coefficient() const
|
||||
{
|
||||
return second_causal_boundary_coefficient_;
|
||||
}
|
||||
|
||||
double VanVlietGaussianCoefficients::second_non_causal_boundary_coefficient() const
|
||||
{
|
||||
return second_non_causal_boundary_coefficient_;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Van Vliet Gaussian Coefficients Container.
|
||||
*/
|
||||
|
||||
void VanVlietGaussianCoefficientsContainer::reset()
|
||||
{
|
||||
/* First, delete all resources that are no longer needed. */
|
||||
map_.remove_if([](auto item) { return !item.value->needed; });
|
||||
|
||||
/* Second, reset the needed status of the remaining resources to false to ready them to track
|
||||
* their needed status for the next evaluation. */
|
||||
for (auto &value : map_.values()) {
|
||||
value->needed = false;
|
||||
}
|
||||
}
|
||||
|
||||
VanVlietGaussianCoefficients &VanVlietGaussianCoefficientsContainer::get(Context &context,
|
||||
float sigma)
|
||||
{
|
||||
const VanVlietGaussianCoefficientsKey key(sigma);
|
||||
|
||||
auto &deriche_gaussian_coefficients = *map_.lookup_or_add_cb(
|
||||
key, [&]() { return std::make_unique<VanVlietGaussianCoefficients>(context, sigma); });
|
||||
|
||||
deriche_gaussian_coefficients.needed = true;
|
||||
return deriche_gaussian_coefficients;
|
||||
}
|
||||
|
||||
} // namespace blender::realtime_compositor
|
|
@ -20,6 +20,8 @@ void StaticCacheManager::reset()
|
|||
cached_shaders.reset();
|
||||
bokeh_kernels.reset();
|
||||
cached_images.reset();
|
||||
deriche_gaussian_coefficients.reset();
|
||||
van_vliet_gaussian_coefficients.reset();
|
||||
}
|
||||
|
||||
} // namespace blender::realtime_compositor
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
/* Blur the input horizontally by applying a fourth order IIR filter approximating a Gaussian
|
||||
* filter using Deriche's design method. This is based on the following paper:
|
||||
*
|
||||
* Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
|
||||
* 1993.
|
||||
*
|
||||
* We run two filters per row in parallel, one for the causal filter and one for the non causal
|
||||
* filter, storing the result of each separately. See the DericheGaussianCoefficients class and the
|
||||
* implementation for more information. */
|
||||
|
||||
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
|
||||
|
||||
#define FILTER_ORDER 4
|
||||
|
||||
void main()
|
||||
{
|
||||
/* The shader runs parallel across rows but serially across columns. */
|
||||
int y = int(gl_GlobalInvocationID.x);
|
||||
int width = texture_size(input_tx).x;
|
||||
|
||||
/* The second dispatch dimension is two dispatches, one for the causal filter and one for the non
|
||||
* causal one. */
|
||||
bool is_causal = gl_GlobalInvocationID.y == 0;
|
||||
vec4 feedforward_coefficients = is_causal ? causal_feedforward_coefficients :
|
||||
non_causal_feedforward_coefficients;
|
||||
float boundary_coefficient = is_causal ? causal_boundary_coefficient :
|
||||
non_causal_boundary_coefficient;
|
||||
|
||||
/* Create an array that holds the last FILTER_ORDER inputs along with the current input. The
|
||||
* current input is at index 0 and the oldest input is at index FILTER_ORDER. We assume Neumann
|
||||
* boundary condition, so we initialize all inputs by the boundary pixel. */
|
||||
ivec2 boundary_texel = is_causal ? ivec2(0, y) : ivec2(width - 1, y);
|
||||
vec4 input_boundary = texture_load(input_tx, boundary_texel);
|
||||
vec4 inputs[FILTER_ORDER + 1] = vec4[](
|
||||
input_boundary, input_boundary, input_boundary, input_boundary, input_boundary);
|
||||
|
||||
/* Create an array that holds the last FILTER_ORDER outputs along with the current output. The
|
||||
* current output is at index 0 and the oldest output is at index FILTER_ORDER. We assume Neumann
|
||||
* boundary condition, so we initialize all outputs by the boundary pixel multiplied by the
|
||||
* boundary coefficient. See the DericheGaussianCoefficients class for more information on the
|
||||
* boundary handing. */
|
||||
vec4 output_boundary = input_boundary * boundary_coefficient;
|
||||
vec4 outputs[FILTER_ORDER + 1] = vec4[](
|
||||
output_boundary, output_boundary, output_boundary, output_boundary, output_boundary);
|
||||
|
||||
for (int x = 0; x < width; x++) {
|
||||
/* Run forward across rows for the causal filter and backward for the non causal filter. */
|
||||
ivec2 texel = is_causal ? ivec2(x, y) : ivec2(width - 1 - x, y);
|
||||
inputs[0] = texture_load(input_tx, texel);
|
||||
|
||||
/* Compute Equation (28) for the causal filter or Equation (29) for the non causal filter. The
|
||||
* only difference is that the non causal filter ignores the current value and starts from the
|
||||
* previous input, as can be seen in the subscript of the first input term in both equations.
|
||||
* So add one while indexing the non causal inputs. */
|
||||
outputs[0] = vec4(0.0);
|
||||
int first_input_index = is_causal ? 0 : 1;
|
||||
for (int i = 0; i < FILTER_ORDER; i++) {
|
||||
outputs[0] += feedforward_coefficients[i] * inputs[first_input_index + i];
|
||||
outputs[0] -= feedback_coefficients[i] * outputs[i + 1];
|
||||
}
|
||||
|
||||
/* Store the causal and non causal outputs independently, then sum them in a separate shader
|
||||
* dispatch for better parallelism. */
|
||||
if (is_causal) {
|
||||
imageStore(causal_output_img, texel, outputs[0]);
|
||||
}
|
||||
else {
|
||||
imageStore(non_causal_output_img, texel, outputs[0]);
|
||||
}
|
||||
|
||||
/* Shift the inputs temporally by one. The oldest input is discarded, while the current input
|
||||
* will retain its value but will be overwritten with the new current value in the next
|
||||
* iteration. */
|
||||
for (int i = FILTER_ORDER; i >= 1; i--) {
|
||||
inputs[i] = inputs[i - 1];
|
||||
}
|
||||
|
||||
/* Shift the outputs temporally by one. The oldest output is discarded, while the current
|
||||
* output will retain its value but will be overwritten with the new current value in the next
|
||||
* iteration. */
|
||||
for (int i = FILTER_ORDER; i >= 1; i--) {
|
||||
outputs[i] = outputs[i - 1];
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
|
||||
|
||||
/* The Deriche filter is a parallel interconnection filter, meaning its output is the sum of its
|
||||
* causal and non causal filters. */
|
||||
vec4 filter_output = texture_load(causal_input_tx, texel) +
|
||||
texture_load(non_causal_input_tx, texel);
|
||||
|
||||
/* Write the color using the transposed texel. See the sum_causal_and_non_causal_results method
|
||||
* in the deriche_gaussian_blur.cc file for more information on the rational behind this. */
|
||||
imageStore(output_img, texel.yx, filter_output);
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
/* Blur the input horizontally by applying a fourth order IIR filter approximating a Gaussian
|
||||
* filter using Van Vliet's design method. This is based on the following paper:
|
||||
*
|
||||
* Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
|
||||
* filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
|
||||
* 98EX170). Vol. 1. IEEE, 1998.
|
||||
*
|
||||
* We decomposed the filter into two second order filters, so we actually run four filters per row
|
||||
* in parallel, one for the first causal filter, one for the first non causal filter, one for the
|
||||
* second causal filter, and one for the second non causal filter, storing the result of each
|
||||
* separately. See the VanVlietGaussianCoefficients class and the implementation for more
|
||||
* information. */
|
||||
|
||||
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
|
||||
|
||||
#define FILTER_ORDER 2
|
||||
|
||||
void main()
|
||||
{
|
||||
/* The shader runs parallel across rows but serially across columns. */
|
||||
int y = int(gl_GlobalInvocationID.x);
|
||||
int width = texture_size(input_tx).x;
|
||||
|
||||
/* The second dispatch dimension is four dispatches:
|
||||
*
|
||||
* 0 -> First causal filter.
|
||||
* 1 -> First non causal filter.
|
||||
* 2 -> Second causal filter.
|
||||
* 3 -> Second non causal filter.
|
||||
*
|
||||
* We detect causality by even numbers. */
|
||||
bool is_causal = gl_GlobalInvocationID.y % 2 == 0;
|
||||
vec2 first_feedforward_coefficients = is_causal ? first_causal_feedforward_coefficients :
|
||||
first_non_causal_feedforward_coefficients;
|
||||
float first_boundary_coefficient = is_causal ? first_causal_boundary_coefficient :
|
||||
first_non_causal_boundary_coefficient;
|
||||
vec2 second_feedforward_coefficients = is_causal ? second_causal_feedforward_coefficients :
|
||||
second_non_causal_feedforward_coefficients;
|
||||
float second_boundary_coefficient = is_causal ? second_causal_boundary_coefficient :
|
||||
second_non_causal_boundary_coefficient;
|
||||
/* And we detect the filter by order. */
|
||||
bool is_first_filter = gl_GlobalInvocationID.y < 2;
|
||||
vec2 feedforward_coefficients = is_first_filter ? first_feedforward_coefficients :
|
||||
second_feedforward_coefficients;
|
||||
vec2 feedback_coefficients = is_first_filter ? first_feedback_coefficients :
|
||||
second_feedback_coefficients;
|
||||
float boundary_coefficient = is_first_filter ? first_boundary_coefficient :
|
||||
second_boundary_coefficient;
|
||||
|
||||
/* Create an array that holds the last FILTER_ORDER inputs along with the current input. The
|
||||
* current input is at index 0 and the oldest input is at index FILTER_ORDER. We assume Neumann
|
||||
* boundary condition, so we initialize all inputs by the boundary pixel. */
|
||||
ivec2 boundary_texel = is_causal ? ivec2(0, y) : ivec2(width - 1, y);
|
||||
vec4 input_boundary = texture_load(input_tx, boundary_texel);
|
||||
vec4 inputs[FILTER_ORDER + 1] = vec4[](input_boundary, input_boundary, input_boundary);
|
||||
|
||||
/* Create an array that holds the last FILTER_ORDER outputs along with the current output. The
|
||||
* current output is at index 0 and the oldest output is at index FILTER_ORDER. We assume Neumann
|
||||
* boundary condition, so we initialize all outputs by the boundary pixel multiplied by the
|
||||
* boundary coefficient. See the VanVlietGaussianCoefficients class for more information on the
|
||||
* boundary handing. */
|
||||
vec4 output_boundary = input_boundary * boundary_coefficient;
|
||||
vec4 outputs[FILTER_ORDER + 1] = vec4[](output_boundary, output_boundary, output_boundary);
|
||||
|
||||
for (int x = 0; x < width; x++) {
|
||||
/* Run forward across rows for the causal filter and backward for the non causal filter. */
|
||||
ivec2 texel = is_causal ? ivec2(x, y) : ivec2(width - 1 - x, y);
|
||||
inputs[0] = texture_load(input_tx, texel);
|
||||
|
||||
/* Compute the filter based on its difference equation, this is not in the Van Vliet paper
|
||||
* because the filter was decomposed, but it is essentially similar to Equation (28) for the
|
||||
* causal filter or Equation (29) for the non causal filter in Deriche's paper, except it is
|
||||
* second order, not fourth order.
|
||||
*
|
||||
* Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
|
||||
* 1993.
|
||||
*
|
||||
* The only difference is that the non causal filter ignores the current value and starts from
|
||||
* the previous input, as can be seen in the subscript of the first input term in both
|
||||
* equations. So add one while indexing the non causal inputs. */
|
||||
outputs[0] = vec4(0.0);
|
||||
int first_input_index = is_causal ? 0 : 1;
|
||||
for (int i = 0; i < FILTER_ORDER; i++) {
|
||||
outputs[0] += feedforward_coefficients[i] * inputs[first_input_index + i];
|
||||
outputs[0] -= feedback_coefficients[i] * outputs[i + 1];
|
||||
}
|
||||
|
||||
/* Store the causal and non causal outputs of each of the two filters independently, then sum
|
||||
* them in a separate shader dispatch for better parallelism. */
|
||||
if (is_causal) {
|
||||
if (is_first_filter) {
|
||||
imageStore(first_causal_output_img, texel, outputs[0]);
|
||||
}
|
||||
else {
|
||||
imageStore(second_causal_output_img, texel, outputs[0]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (is_first_filter) {
|
||||
imageStore(first_non_causal_output_img, texel, outputs[0]);
|
||||
}
|
||||
else {
|
||||
imageStore(second_non_causal_output_img, texel, outputs[0]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Shift the inputs temporally by one. The oldest input is discarded, while the current input
|
||||
* will retain its value but will be overwritten with the new current value in the next
|
||||
* iteration. */
|
||||
for (int i = FILTER_ORDER; i >= 1; i--) {
|
||||
inputs[i] = inputs[i - 1];
|
||||
}
|
||||
|
||||
/* Shift the outputs temporally by one. The oldest output is discarded, while the current
|
||||
* output will retain its value but will be overwritten with the new current value in the next
|
||||
* iteration. */
|
||||
for (int i = FILTER_ORDER; i >= 1; i--) {
|
||||
outputs[i] = outputs[i - 1];
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
|
||||
|
||||
/* The Van Vliet filter is a parallel interconnection filter, meaning its output is the sum of
|
||||
* all of its causal and non causal filters. */
|
||||
vec4 filter_output = texture_load(first_causal_input_tx, texel) +
|
||||
texture_load(first_non_causal_input_tx, texel) +
|
||||
texture_load(second_causal_input_tx, texel) +
|
||||
texture_load(second_non_causal_input_tx, texel);
|
||||
|
||||
/* Write the color using the transposed texel. See the sum_causal_and_non_causal_results method
|
||||
* in the van_vliet_gaussian_blur.cc file for more information on the rational behind this. */
|
||||
imageStore(output_img, texel.yx, filter_output);
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#include "gpu_shader_create_info.hh"
|
||||
|
||||
GPU_SHADER_CREATE_INFO(compositor_deriche_gaussian_blur)
|
||||
.local_group_size(128, 2)
|
||||
.push_constant(Type::VEC4, "causal_feedforward_coefficients")
|
||||
.push_constant(Type::VEC4, "non_causal_feedforward_coefficients")
|
||||
.push_constant(Type::VEC4, "feedback_coefficients")
|
||||
.push_constant(Type::FLOAT, "causal_boundary_coefficient")
|
||||
.push_constant(Type::FLOAT, "non_causal_boundary_coefficient")
|
||||
.sampler(0, ImageType::FLOAT_2D, "input_tx")
|
||||
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "causal_output_img")
|
||||
.image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "non_causal_output_img")
|
||||
.compute_source("compositor_deriche_gaussian_blur.glsl")
|
||||
.do_static_compilation(true);
|
||||
|
||||
GPU_SHADER_CREATE_INFO(compositor_deriche_gaussian_blur_sum)
|
||||
.local_group_size(16, 16)
|
||||
.sampler(0, ImageType::FLOAT_2D, "causal_input_tx")
|
||||
.sampler(1, ImageType::FLOAT_2D, "non_causal_input_tx")
|
||||
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
|
||||
.compute_source("compositor_deriche_gaussian_blur_sum.glsl")
|
||||
.do_static_compilation(true);
|
|
@ -0,0 +1,33 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#include "gpu_shader_create_info.hh"
|
||||
|
||||
GPU_SHADER_CREATE_INFO(compositor_van_vliet_gaussian_blur)
|
||||
.local_group_size(64, 4)
|
||||
.push_constant(Type::VEC2, "first_feedback_coefficients")
|
||||
.push_constant(Type::VEC2, "first_causal_feedforward_coefficients")
|
||||
.push_constant(Type::VEC2, "first_non_causal_feedforward_coefficients")
|
||||
.push_constant(Type::VEC2, "second_feedback_coefficients")
|
||||
.push_constant(Type::VEC2, "second_causal_feedforward_coefficients")
|
||||
.push_constant(Type::VEC2, "second_non_causal_feedforward_coefficients")
|
||||
.push_constant(Type::FLOAT, "first_causal_boundary_coefficient")
|
||||
.push_constant(Type::FLOAT, "first_non_causal_boundary_coefficient")
|
||||
.push_constant(Type::FLOAT, "second_causal_boundary_coefficient")
|
||||
.push_constant(Type::FLOAT, "second_non_causal_boundary_coefficient")
|
||||
.sampler(0, ImageType::FLOAT_2D, "input_tx")
|
||||
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "first_causal_output_img")
|
||||
.image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "first_non_causal_output_img")
|
||||
.image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "second_causal_output_img")
|
||||
.image(3, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "second_non_causal_output_img")
|
||||
.compute_source("compositor_van_vliet_gaussian_blur.glsl")
|
||||
.do_static_compilation(true);
|
||||
|
||||
GPU_SHADER_CREATE_INFO(compositor_van_vliet_gaussian_blur_sum)
|
||||
.local_group_size(16, 16)
|
||||
.sampler(0, ImageType::FLOAT_2D, "first_causal_input_tx")
|
||||
.sampler(1, ImageType::FLOAT_2D, "first_non_causal_input_tx")
|
||||
.sampler(2, ImageType::FLOAT_2D, "second_causal_input_tx")
|
||||
.sampler(3, ImageType::FLOAT_2D, "second_non_causal_input_tx")
|
||||
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
|
||||
.compute_source("compositor_van_vliet_gaussian_blur_sum.glsl")
|
||||
.do_static_compilation(true);
|
|
@ -19,6 +19,7 @@
|
|||
#include "GPU_shader.hh"
|
||||
#include "GPU_texture.hh"
|
||||
|
||||
#include "COM_algorithm_recursive_gaussian_blur.hh"
|
||||
#include "COM_algorithm_symmetric_separable_blur.hh"
|
||||
#include "COM_node_operation.hh"
|
||||
#include "COM_symmetric_blur_weights.hh"
|
||||
|
@ -106,7 +107,11 @@ class BlurOperation : public NodeOperation {
|
|||
return;
|
||||
}
|
||||
|
||||
if (use_variable_size()) {
|
||||
if (node_storage(bnode()).filtertype == R_FILTER_FAST_GAUSS) {
|
||||
recursive_gaussian_blur(
|
||||
context(), get_input("Image"), get_result("Image"), compute_blur_radius());
|
||||
}
|
||||
else if (use_variable_size()) {
|
||||
execute_variable_size();
|
||||
}
|
||||
else if (use_separable_filter()) {
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit d9c72c9c34a205f5012b5dbd84bb17116775df49
|
||||
Subproject commit 3a1d189a550119157aa4246de27d9d661b6f6548
|
Loading…
Reference in New Issue