Realtime Compositor: Implement Fast Gaussian blur #120431

Merged
Omar Emara merged 31 commits from OmarEmaraDev/blender:recursive-gaussian-blur into main 2024-05-01 09:57:43 +02:00
21 changed files with 1821 additions and 2 deletions

View File

@ -63,35 +63,42 @@ set(SRC
COM_texture_pool.hh
COM_utilities.hh
algorithms/intern/deriche_gaussian_blur.cc
algorithms/intern/jump_flooding.cc
algorithms/intern/morphological_blur.cc
algorithms/intern/morphological_distance.cc
algorithms/intern/morphological_distance_feather.cc
algorithms/intern/parallel_reduction.cc
algorithms/intern/realize_on_domain.cc
algorithms/intern/recursive_gaussian_blur.cc
algorithms/intern/smaa.cc
algorithms/intern/summed_area_table.cc
algorithms/intern/symmetric_separable_blur.cc
algorithms/intern/symmetric_separable_blur_variable_size.cc
algorithms/intern/transform.cc
algorithms/intern/van_vliet_gaussian_blur.cc
algorithms/COM_algorithm_deriche_gaussian_blur.hh
algorithms/COM_algorithm_jump_flooding.hh
algorithms/COM_algorithm_morphological_blur.hh
algorithms/COM_algorithm_morphological_distance.hh
algorithms/COM_algorithm_morphological_distance_feather.hh
algorithms/COM_algorithm_parallel_reduction.hh
algorithms/COM_algorithm_realize_on_domain.hh
algorithms/COM_algorithm_recursive_gaussian_blur.hh
algorithms/COM_algorithm_smaa.hh
algorithms/COM_algorithm_summed_area_table.hh
algorithms/COM_algorithm_symmetric_separable_blur.hh
algorithms/COM_algorithm_symmetric_separable_blur_variable_size.hh
algorithms/COM_algorithm_transform.hh
algorithms/COM_algorithm_van_vliet_gaussian_blur.hh
cached_resources/intern/bokeh_kernel.cc
cached_resources/intern/cached_image.cc
cached_resources/intern/cached_mask.cc
cached_resources/intern/cached_shader.cc
cached_resources/intern/cached_texture.cc
cached_resources/intern/deriche_gaussian_coefficients.cc
cached_resources/intern/distortion_grid.cc
cached_resources/intern/keying_screen.cc
cached_resources/intern/morphological_distance_feather_weights.cc
@ -99,6 +106,7 @@ set(SRC
cached_resources/intern/smaa_precomputed_textures.cc
cached_resources/intern/symmetric_blur_weights.cc
cached_resources/intern/symmetric_separable_blur_weights.cc
cached_resources/intern/van_vliet_gaussian_coefficients.cc
cached_resources/COM_bokeh_kernel.hh
cached_resources/COM_cached_image.hh
@ -106,6 +114,7 @@ set(SRC
cached_resources/COM_cached_resource.hh
cached_resources/COM_cached_shader.hh
cached_resources/COM_cached_texture.hh
cached_resources/COM_deriche_gaussian_coefficients.hh
cached_resources/COM_distortion_grid.hh
cached_resources/COM_keying_screen.hh
cached_resources/COM_morphological_distance_feather_weights.hh
@ -113,6 +122,7 @@ set(SRC
cached_resources/COM_smaa_precomputed_textures.hh
cached_resources/COM_symmetric_blur_weights.hh
cached_resources/COM_symmetric_separable_blur_weights.hh
cached_resources/COM_van_vliet_gaussian_coefficients.hh
)
set(LIB
@ -142,6 +152,8 @@ set(GLSL_SRC
shaders/compositor_defocus_radius_from_depth.glsl
shaders/compositor_defocus_radius_from_scale.glsl
shaders/compositor_despeckle.glsl
shaders/compositor_deriche_gaussian_blur.glsl
shaders/compositor_deriche_gaussian_blur_sum.glsl
shaders/compositor_directional_blur.glsl
shaders/compositor_displace.glsl
shaders/compositor_double_edge_mask_compute_boundary.glsl
@ -215,6 +227,8 @@ set(GLSL_SRC
shaders/compositor_symmetric_separable_blur_variable_size.glsl
shaders/compositor_tone_map_photoreceptor.glsl
shaders/compositor_tone_map_simple.glsl
shaders/compositor_van_vliet_gaussian_blur.glsl
shaders/compositor_van_vliet_gaussian_blur_sum.glsl
shaders/compositor_write_output.glsl
shaders/compositor_z_combine_compute_mask.glsl
shaders/compositor_z_combine_from_mask.glsl
@ -290,6 +304,7 @@ set(SRC_SHADER_CREATE_INFOS
shaders/infos/compositor_convert_info.hh
shaders/infos/compositor_cryptomatte_info.hh
shaders/infos/compositor_defocus_info.hh
shaders/infos/compositor_deriche_gaussian_blur_info.hh
shaders/infos/compositor_despeckle_info.hh
shaders/infos/compositor_directional_blur_info.hh
shaders/infos/compositor_displace_info.hh
@ -334,6 +349,7 @@ set(SRC_SHADER_CREATE_INFOS
shaders/infos/compositor_symmetric_separable_blur_variable_size_info.hh
shaders/infos/compositor_tone_map_photoreceptor_info.hh
shaders/infos/compositor_tone_map_simple_info.hh
shaders/infos/compositor_van_vliet_gaussian_blur_info.hh
shaders/infos/compositor_write_output_info.hh
shaders/infos/compositor_z_combine_info.hh
)

View File

@ -9,6 +9,7 @@
#include "COM_cached_mask.hh"
#include "COM_cached_shader.hh"
#include "COM_cached_texture.hh"
#include "COM_deriche_gaussian_coefficients.hh"
#include "COM_distortion_grid.hh"
#include "COM_keying_screen.hh"
#include "COM_morphological_distance_feather_weights.hh"
@ -16,6 +17,7 @@
#include "COM_smaa_precomputed_textures.hh"
#include "COM_symmetric_blur_weights.hh"
#include "COM_symmetric_separable_blur_weights.hh"
#include "COM_van_vliet_gaussian_coefficients.hh"
namespace blender::realtime_compositor {
@ -55,6 +57,8 @@ class StaticCacheManager {
CachedShaderContainer cached_shaders;
BokehKernelContainer bokeh_kernels;
CachedImageContainer cached_images;
DericheGaussianCoefficientsContainer deriche_gaussian_coefficients;
VanVlietGaussianCoefficientsContainer van_vliet_gaussian_coefficients;
/* Reset the cache manager by deleting the cached resources that are no longer needed because
* they weren't used in the last evaluation and prepare the remaining cached resources to track

View File

@ -0,0 +1,31 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_math_vector_types.hh"
#include "COM_context.hh"
#include "COM_result.hh"
namespace blender::realtime_compositor {
/* Blur the input using a fourth order IIR filter approximating a Gaussian filter of the given
* sigma computed using Deriche's design method. This is based on the following paper:
*
* Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
* 1993.
*
* This differs from the standard symmetric separable blur algorithm in that it is faster for high
* sigma values, the downside is that it consumes more memory and is only an approximation that
* might suffer from fringing and artifacts, though those are typically unnoticeable. This filter
* is numerically unstable and not accurate for sigma values larger than 32, in those cases, use
* the Van Vliet filter instead. Further, for sigma values less than 3, use direct convolution
* instead, since it is faster and more accurate. Neumann boundary is assumed.
*
* The output is written to the given output result, which will be allocated internally and is thus
* expected not to be previously allocated. */
void deriche_gaussian_blur(Context &context, Result &input, Result &output, float2 sigma);
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,24 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_math_vector_types.hh"
#include "COM_context.hh"
#include "COM_result.hh"
namespace blender::realtime_compositor {
/* Blur the input using a recursive Gaussian blur algorithm given a certain radius. This differs
* from the standard symmetric separable blur algorithm in that it is orders of magnitude faster
* for very high radius value, the downside is that it consumes more memory and is only an
* approximation that might suffer from fringing and artifacts, though those are typically
* unnoticeable. Neumann boundary is assumed.
*
* The output is written to the given output result, which will be allocated internally and is thus
* expected not to be previously allocated. */
void recursive_gaussian_blur(Context &context, Result &input, Result &output, float2 radius);
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,36 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_math_vector_types.hh"
#include "COM_context.hh"
#include "COM_result.hh"
namespace blender::realtime_compositor {
/* Blur the input using a fourth order IIR filter approximating a Gaussian filter of the given
* sigma computed using Van Vliet's design method. This is based on the following paper:
*
* Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
* filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
* 98EX170). Vol. 1. IEEE, 1998.
*
* However, we internally split the fourth order IIR filter into two second order sections in order
* to improve its numerical stability and improve parallelism. See the implementation for more
* information.
*
* This differs from the standard symmetric separable blur algorithm in that it is faster for high
* sigma values, the downside is that it consumes more memory and is only an approximation that
* might suffer from fringing and artifacts, though those are typically unnoticeable. This filter
* is not accurate for sigma values less than 32, in those cases, use the Deriche filter instead.
* Further, for sigma values less than 3, use direct convolution instead, since it is faster and
* more accurate. Neumann boundary is assumed.
*
* The output is written to the given output result, which will be allocated internally and is thus
* expected not to be previously allocated. */
void van_vliet_gaussian_blur(Context &context, Result &input, Result &output, float2 sigma);
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,119 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_assert.h"
#include "BLI_math_base.hh"
#include "BLI_math_vector.hh"
#include "GPU_shader.hh"
#include "COM_context.hh"
#include "COM_result.hh"
#include "COM_utilities.hh"
#include "COM_algorithm_deriche_gaussian_blur.hh"
#include "COM_deriche_gaussian_coefficients.hh"
namespace blender::realtime_compositor {
/* Sum the causal and non causal outputs of the filter and write the sum to the output. This is
* because the Deriche filter is a parallel interconnection filter, meaning its output is the sum
* of its causal and non causal filters. The output is expected not to be allocated as it will be
* allocated internally.
*
* The output is allocated and written transposed, that is, with a height equivalent to the width
* of the input and vice versa. This is done as a performance optimization. The blur pass will
* blur the image horizontally and write it to the intermediate output transposed. Then the
* vertical pass will execute the same horizontal blur shader, but since its input is transposed,
* it will effectively do a vertical blur and write to the output transposed, effectively undoing
* the transposition in the horizontal pass. This is done to improve spatial cache locality in the
* shader and to avoid having two separate shaders for each blur pass. */
static void sum_causal_and_non_causal_results(Context &context,
Result &causal_input,
Result &non_causal_input,
Result &output)
{
GPUShader *shader = context.get_shader("compositor_deriche_gaussian_blur_sum");
GPU_shader_bind(shader);
causal_input.bind_as_texture(shader, "causal_input_tx");
non_causal_input.bind_as_texture(shader, "non_causal_input_tx");
const Domain domain = causal_input.domain();
const int2 transposed_domain = int2(domain.size.y, domain.size.x);
output.allocate_texture(transposed_domain);
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, domain.size);
GPU_shader_unbind();
causal_input.unbind_as_texture();
non_causal_input.unbind_as_texture();
output.unbind_as_image();
}
static void blur_pass(Context &context, Result &input, Result &output, float sigma)
{
GPUShader *shader = context.get_shader("compositor_deriche_gaussian_blur");
GPU_shader_bind(shader);
const DericheGaussianCoefficients &coefficients =
context.cache_manager().deriche_gaussian_coefficients.get(context, sigma);
GPU_shader_uniform_4fv(shader,
"causal_feedforward_coefficients",
float4(coefficients.causal_feedforward_coefficients()));
GPU_shader_uniform_4fv(shader,
"non_causal_feedforward_coefficients",
float4(coefficients.non_causal_feedforward_coefficients()));
GPU_shader_uniform_4fv(
shader, "feedback_coefficients", float4(coefficients.feedback_coefficients()));
GPU_shader_uniform_1f(
shader, "causal_boundary_coefficient", float(coefficients.causal_boundary_coefficient()));
GPU_shader_uniform_1f(shader,
"non_causal_boundary_coefficient",
float(coefficients.non_causal_boundary_coefficient()));
input.bind_as_texture(shader, "input_tx");
const Domain domain = input.domain();
Result causal_result = context.create_temporary_result(ResultType::Color);
causal_result.allocate_texture(domain);
causal_result.bind_as_image(shader, "causal_output_img");
Result non_causal_result = context.create_temporary_result(ResultType::Color);
non_causal_result.allocate_texture(domain);
non_causal_result.bind_as_image(shader, "non_causal_output_img");
/* The second dispatch dimension is two dispatches, one for the causal filter and one for the non
* causal one. */
compute_dispatch_threads_at_least(shader, int2(domain.size.y, 2), int2(128, 2));
GPU_shader_unbind();
input.unbind_as_texture();
causal_result.unbind_as_image();
non_causal_result.unbind_as_image();
sum_causal_and_non_causal_results(context, causal_result, non_causal_result, output);
causal_result.release();
non_causal_result.release();
}
void deriche_gaussian_blur(Context &context, Result &input, Result &output, float2 sigma)
{
BLI_assert_msg(math::reduce_max(sigma) >= 3.0f,
"Deriche filter is slower and less accurate than direct convolution for sigma "
"values less 3. Use direct convolution blur instead.");
BLI_assert_msg(math::reduce_max(sigma) < 32.0f,
"Deriche filter is not accurate nor numerically stable for sigma values larger "
"than 32. Use Van Vliet filter instead.");
Result horizontal_pass_result = context.create_temporary_result(ResultType::Color);
blur_pass(context, input, horizontal_pass_result, sigma.x);
blur_pass(context, horizontal_pass_result, output, sigma.y);
horizontal_pass_result.release();
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,68 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_math_base.hh"
#include "BLI_math_vector.hh"
#include "COM_context.hh"
#include "COM_result.hh"
#include "COM_algorithm_deriche_gaussian_blur.hh"
#include "COM_algorithm_symmetric_separable_blur.hh"
#include "COM_algorithm_van_vliet_gaussian_blur.hh"
namespace blender::realtime_compositor {
/* Compute the Gaussian sigma from the radius, where the radius is in pixels. Blender's filter is
* truncated at |x| > 3 * sigma as can be seen in the R_FILTER_GAUSS case of the RE_filter_value
* function, so we divide by three to get the approximate sigma value. Further, ensure the radius
* is at least 1 since recursive Gaussian implementations can't handle zero radii. */
static float2 compute_sigma_from_radius(float2 radius)
{
return math::max(float2(1.0f), radius) / 3.0f;
}
/* Apply a recursive Gaussian blur algorithm on the input based on the general method outlined
* in the following paper:
*
* Hale, Dave. "Recursive gaussian filters." CWP-546 (2006).
*
* In particular, based on the table in Section 5 Conclusion, for very low radius blur, we use a
* direct separable Gaussian convolution. For medium blur radius, we use the fourth order IIR
* Deriche filter based on the following paper:
*
* Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
* 1993.
*
* For high radius blur, we use the fourth order IIR Van Vliet filter based on the following paper:
*
* Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
* filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
* 98EX170). Vol. 1. IEEE, 1998.
*
* That's because direct convolution is faster and more accurate for very low radius, while the
* Deriche filter is more accurate for medium blur radius, while Van Vliet is more accurate for
* high blur radius. The criteria suggested by the paper is a sigma value threshold of 3 and 32 for
* the Deriche and Van Vliet filters respectively, which we apply on the larger of the two
* dimensions. */
void recursive_gaussian_blur(Context &context, Result &input, Result &output, float2 radius)
{
/* The radius is in pixel units, while both recursive implementations expect the sigma value of
* the Gaussian function. */
const float2 sigma = compute_sigma_from_radius(radius);
if (math::reduce_max(sigma) < 3.0f) {
symmetric_separable_blur(context, input, output, radius);
return;
}
if (math::reduce_max(sigma) < 32.0f) {
deriche_gaussian_blur(context, input, output, sigma);
return;
}
van_vliet_gaussian_blur(context, input, output, sigma);
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,155 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_assert.h"
#include "BLI_math_base.hh"
#include "BLI_math_vector.hh"
#include "GPU_shader.hh"
#include "COM_context.hh"
#include "COM_result.hh"
#include "COM_utilities.hh"
#include "COM_algorithm_van_vliet_gaussian_blur.hh"
#include "COM_van_vliet_gaussian_coefficients.hh"
namespace blender::realtime_compositor {
/* Sum all four of the causal and non causal outputs of the first and second filters and write the
* sum to the output. This is because the Van Vliet filter is implemented as a bank of 2 parallel
* second order filters, meaning its output is the sum of the causal and non causal filters of both
* filters. The output is expected not to be allocated as it will be allocated internally.
*
* The output is allocated and written transposed, that is, with a height equivalent to the width
* of the input and vice versa. This is done as a performance optimization. The blur pass will
* blur the image horizontally and write it to the intermediate output transposed. Then the
* vertical pass will execute the same horizontal blur shader, but since its input is transposed,
* it will effectively do a vertical blur and write to the output transposed, effectively undoing
* the transposition in the horizontal pass. This is done to improve spatial cache locality in the
* shader and to avoid having two separate shaders for each blur pass. */
static void sum_causal_and_non_causal_results(Context &context,
Result &first_causal_input,
Result &first_non_causal_input,
Result &second_causal_input,
Result &second_non_causal_input,
Result &output)
{
GPUShader *shader = context.get_shader("compositor_van_vliet_gaussian_blur_sum");
GPU_shader_bind(shader);
first_causal_input.bind_as_texture(shader, "first_causal_input_tx");
first_non_causal_input.bind_as_texture(shader, "first_non_causal_input_tx");
second_causal_input.bind_as_texture(shader, "second_causal_input_tx");
second_non_causal_input.bind_as_texture(shader, "second_non_causal_input_tx");
const Domain domain = first_causal_input.domain();
const int2 transposed_domain = int2(domain.size.y, domain.size.x);
output.allocate_texture(transposed_domain);
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, domain.size);
GPU_shader_unbind();
first_causal_input.unbind_as_texture();
first_non_causal_input.unbind_as_texture();
second_causal_input.unbind_as_texture();
second_non_causal_input.unbind_as_texture();
output.unbind_as_image();
}
static void blur_pass(Context &context, Result &input, Result &output, float sigma)
{
GPUShader *shader = context.get_shader("compositor_van_vliet_gaussian_blur");
GPU_shader_bind(shader);
const VanVlietGaussianCoefficients &coefficients =
context.cache_manager().van_vliet_gaussian_coefficients.get(context, sigma);
GPU_shader_uniform_2fv(
shader, "first_feedback_coefficients", float2(coefficients.first_feedback_coefficients()));
GPU_shader_uniform_2fv(shader,
"first_causal_feedforward_coefficients",
float2(coefficients.first_causal_feedforward_coefficients()));
GPU_shader_uniform_2fv(shader,
"first_non_causal_feedforward_coefficients",
float2(coefficients.first_non_causal_feedforward_coefficients()));
GPU_shader_uniform_2fv(
shader, "second_feedback_coefficients", float2(coefficients.second_feedback_coefficients()));
GPU_shader_uniform_2fv(shader,
"second_causal_feedforward_coefficients",
float2(coefficients.second_causal_feedforward_coefficients()));
GPU_shader_uniform_2fv(shader,
"second_non_causal_feedforward_coefficients",
float2(coefficients.second_non_causal_feedforward_coefficients()));
GPU_shader_uniform_1f(shader,
"first_causal_boundary_coefficient",
float(coefficients.first_causal_boundary_coefficient()));
GPU_shader_uniform_1f(shader,
"first_non_causal_boundary_coefficient",
float(coefficients.first_non_causal_boundary_coefficient()));
GPU_shader_uniform_1f(shader,
"second_causal_boundary_coefficient",
float(coefficients.second_causal_boundary_coefficient()));
GPU_shader_uniform_1f(shader,
"second_non_causal_boundary_coefficient",
float(coefficients.second_non_causal_boundary_coefficient()));
input.bind_as_texture(shader, "input_tx");
const Domain domain = input.domain();
Result first_causal_result = context.create_temporary_result(ResultType::Color);
first_causal_result.allocate_texture(domain);
first_causal_result.bind_as_image(shader, "first_causal_output_img");
Result first_non_causal_result = context.create_temporary_result(ResultType::Color);
first_non_causal_result.allocate_texture(domain);
first_non_causal_result.bind_as_image(shader, "first_non_causal_output_img");
Result second_causal_result = context.create_temporary_result(ResultType::Color);
second_causal_result.allocate_texture(domain);
second_causal_result.bind_as_image(shader, "second_causal_output_img");
Result second_non_causal_result = context.create_temporary_result(ResultType::Color);
second_non_causal_result.allocate_texture(domain);
second_non_causal_result.bind_as_image(shader, "second_non_causal_output_img");
/* The second dispatch dimension is 4 dispatches, one for the first causal filter, one for the
* first non causal filter, one for the second causal filter, and one for the second non causal
* filter. */
compute_dispatch_threads_at_least(shader, int2(domain.size.y, 4), int2(64, 4));
GPU_shader_unbind();
input.unbind_as_texture();
first_causal_result.unbind_as_image();
first_non_causal_result.unbind_as_image();
second_causal_result.unbind_as_image();
second_non_causal_result.unbind_as_image();
sum_causal_and_non_causal_results(context,
first_causal_result,
first_non_causal_result,
second_causal_result,
second_non_causal_result,
output);
first_causal_result.release();
first_non_causal_result.release();
second_causal_result.release();
second_non_causal_result.release();
}
void van_vliet_gaussian_blur(Context &context, Result &input, Result &output, float2 sigma)
{
BLI_assert_msg(math::reduce_max(sigma) >= 32.0f,
"Van Vliet filter is less accurate for sigma values less than 32. Use Deriche "
"filter instead or direct convolution instead.");
Result horizontal_pass_result = context.create_temporary_result(ResultType::Color);
blur_pass(context, input, horizontal_pass_result, sigma.x);
blur_pass(context, horizontal_pass_result, output, sigma.y);
horizontal_pass_result.release();
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,87 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include <cstdint>
#include <memory>
#include "BLI_map.hh"
#include "BLI_math_vector_types.hh"
#include "COM_cached_resource.hh"
namespace blender::realtime_compositor {
class Context;
/* ------------------------------------------------------------------------------------------------
* Deriche Gaussian Coefficients Key.
*/
class DericheGaussianCoefficientsKey {
public:
float sigma;
DericheGaussianCoefficientsKey(float sigma);
uint64_t hash() const;
};
bool operator==(const DericheGaussianCoefficientsKey &a, const DericheGaussianCoefficientsKey &b);
/* -------------------------------------------------------------------------------------------------
* Deriche Gaussian Coefficients.
*
* A caches resource that computes and caches the coefficients of the fourth order IIR filter
* approximating a Gaussian filter computed using Deriche's design method. This is based on the
* following paper:
*
* Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
* 1993.
*/
class DericheGaussianCoefficients : public CachedResource {
private:
/* The d_ii coefficients in Equation (28) and (29). Those are the same for the causal and non
* causal filters as can be seen in Equation (31). */
double4 feedback_coefficients_;
/* The n_ii^+ coefficients in Equation (28). */
double4 causal_feedforward_coefficients_;
/* The n_ii^- coefficients in Equation (29). */
double4 non_causal_feedforward_coefficients_;
/* The difference equation in Equation (28) rely on previous outputs to compute the new output,
* and those previous outputs need to be properly initialized somehow. To do Neumann boundary
* condition, we multiply the boundary value with this coefficient to simulate an infinite stream
* of the boundary value. See the implementation for more information. */
double causal_boundary_coefficient_;
/* Same as causal_boundary_coefficient_ but for the non causal filter. */
double non_causal_boundary_coefficient_;
public:
DericheGaussianCoefficients(Context &context, float sigma);
const double4 &feedback_coefficients() const;
const double4 &causal_feedforward_coefficients() const;
const double4 &non_causal_feedforward_coefficients() const;
double causal_boundary_coefficient() const;
double non_causal_boundary_coefficient() const;
};
/* ------------------------------------------------------------------------------------------------
* Deriche Gaussian Coefficients Container.
*/
class DericheGaussianCoefficientsContainer : CachedResourceContainer {
private:
Map<DericheGaussianCoefficientsKey, std::unique_ptr<DericheGaussianCoefficients>> map_;
public:
void reset() override;
/* Check if there is an available DericheGaussianCoefficients cached resource with the given
* parameters in the container, if one exists, return it, otherwise, return a newly created one
* and add it to the container. In both cases, tag the cached resource as needed to keep it
* cached for the next evaluation. */
DericheGaussianCoefficients &get(Context &context, float sigma);
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,106 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include <cstdint>
#include <memory>
#include "BLI_map.hh"
#include "BLI_math_vector_types.hh"
#include "COM_cached_resource.hh"
namespace blender::realtime_compositor {
class Context;
/* ------------------------------------------------------------------------------------------------
* Van Vliet Gaussian Coefficients Key.
*/
class VanVlietGaussianCoefficientsKey {
public:
float sigma;
VanVlietGaussianCoefficientsKey(float sigma);
uint64_t hash() const;
};
bool operator==(const VanVlietGaussianCoefficientsKey &a,
const VanVlietGaussianCoefficientsKey &b);
/* -------------------------------------------------------------------------------------------------
* Van Vliet Gaussian Coefficients.
*
* A caches resource that computes and caches the coefficients of the fourth order IIR filter
* approximating a Gaussian filter computed using Van Vliet's design method. This is based on the
* following paper:
*
* Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
* filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
* 98EX170). Vol. 1. IEEE, 1998.
*
* However, to improve the numerical stability of the filter, it is decomposed into a bank of
* two parallel second order IIR filters, each having a causal and a non causal filter. */
class VanVlietGaussianCoefficients : public CachedResource {
private:
/* The causal and non causal feedforward coefficients for the first second order filter. */
double2 first_causal_feedforward_coefficients_;
double2 first_non_causal_feedforward_coefficients_;
/* The feedback coefficients for the first second order filter. This is the same for both the
* causal and non causal filters. */
double2 first_feedback_coefficients_;
/* The causal and non causal feedforward coefficients for the second second order filter. */
double2 second_causal_feedforward_coefficients_;
double2 second_non_causal_feedforward_coefficients_;
/* The feedback coefficients for the second second order filter. This is the same for both the
* causal and non causal filters. */
double2 second_feedback_coefficients_;
/* The difference equation of the IIR filter rely on previous outputs to compute the new output,
* and those previous outputs need to be properly initialized somehow. To do Neumann boundary
* condition, we multiply the boundary value with this coefficient to simulate an infinite stream
* of the boundary value. See the implementation for more information. */
double first_causal_boundary_coefficient_;
double first_non_causal_boundary_coefficient_;
double second_causal_boundary_coefficient_;
double second_non_causal_boundary_coefficient_;
public:
VanVlietGaussianCoefficients(Context &context, float sigma);
const double2 &first_causal_feedforward_coefficients() const;
const double2 &first_non_causal_feedforward_coefficients() const;
const double2 &first_feedback_coefficients() const;
const double2 &second_causal_feedforward_coefficients() const;
const double2 &second_non_causal_feedforward_coefficients() const;
const double2 &second_feedback_coefficients() const;
double first_causal_boundary_coefficient() const;
double first_non_causal_boundary_coefficient() const;
double second_causal_boundary_coefficient() const;
double second_non_causal_boundary_coefficient() const;
};
/* ------------------------------------------------------------------------------------------------
* Van Vliet Gaussian Coefficients Container.
*/
class VanVlietGaussianCoefficientsContainer : CachedResourceContainer {
private:
Map<VanVlietGaussianCoefficientsKey, std::unique_ptr<VanVlietGaussianCoefficients>> map_;
public:
void reset() override;
/* Check if there is an available VanVlietGaussianCoefficients cached resource with the given
* parameters in the container, if one exists, return it, otherwise, return a newly created one
* and add it to the container. In both cases, tag the cached resource as needed to keep it
* cached for the next evaluation. */
VanVlietGaussianCoefficients &get(Context &context, float sigma);
};
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,319 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/* -------------------------------------------------------------------------------------------------
* Deriche Gaussian Coefficients.
*
* Computes the coefficients of the fourth order IIR filter approximating a Gaussian filter
* computed using Deriche's design method. This is based on the following paper:
*
* Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
* 1993.
*
* But with corrections in the normalization scale from the following paper, as will be seen in the
* implementation:
*
* Farneback, Gunnar, and Carl-Fredrik Westin. Improving Deriche-style recursive Gaussian
* filters. Journal of Mathematical Imaging and Vision 26.3 (2006): 293-299.
*
* The Deriche filter is computed as the sum of a causal and a non causal sequence of second order
* difference equations as can be seen in Equation (30) in Deriche's paper, and the target of this
* class is to compute the feedback, causal feedforward, and non causal feedforward coefficients of
* the filter. */
#include <cstdint>
#include <memory>
#include "BLI_hash.hh"
#include "BLI_math_base.hh"
#include "BLI_math_vector.hh"
#include "COM_context.hh"
#include "COM_deriche_gaussian_coefficients.hh"
namespace blender::realtime_compositor {
/* --------------------------------------------------------------------
* Deriche Gaussian Coefficients Key.
*/
DericheGaussianCoefficientsKey::DericheGaussianCoefficientsKey(float sigma) : sigma(sigma) {}
uint64_t DericheGaussianCoefficientsKey::hash() const
{
return get_default_hash(sigma);
}
bool operator==(const DericheGaussianCoefficientsKey &a, const DericheGaussianCoefficientsKey &b)
{
return a.sigma == b.sigma;
}
/* -------------------------------------------------------------------------------------------------
* Deriche Gaussian Coefficients.
*/
/* The base constant coefficients computed using Deriche's method with 10 digits of precision.
* Those are available in Deriche's paper by comparing Equations (19) and (38). */
inline constexpr static double a0 = 1.6797292232361107;
inline constexpr static double a1 = 3.7348298269103580;
inline constexpr static double b0 = 1.7831906544515104;
inline constexpr static double b1 = 1.7228297663338028;
inline constexpr static double c0 = -0.6802783501806897;
inline constexpr static double c1 = -0.2598300478959625;
inline constexpr static double w0 = 0.6318113174569493;
inline constexpr static double w1 = 1.9969276832487770;
/* Computes n00 in Equation (21) in Deriche's paper. */
static double compute_numerator_0()
{
return a0 + c0;
}
/* Computes n11 in Equation (21) in Deriche's paper. */
static double compute_numerator_1(float sigma)
{
const double multiplier1 = math::exp(-b1 / sigma);
const double term1 = c1 * math::sin(w1 / sigma) - (c0 + 2.0 * a0) * math::cos(w1 / sigma);
const double multiplier2 = math::exp(-b0 / sigma);
const double term2 = a1 * math::sin(w0 / sigma) - (2.0 * c0 + a0) * math::cos(w0 / sigma);
return multiplier1 * term1 + multiplier2 * term2;
}
/* Computes n22 in Equation (21) in Deriche's paper. */
static double compute_numerator_2(float sigma)
{
const double multiplier1 = 2.0 * math::exp(-(b0 / sigma) - (b1 / sigma));
const double term11 = (a0 + c0) * math::cos(w1 / sigma) * math::cos(w0 / sigma);
const double term12 = math::cos(w1 / sigma) * a1 * math::sin(w0 / sigma);
const double term13 = math::cos(w0 / sigma) * c1 * math::sin(w1 / sigma);
const double term1 = term11 - term12 - term13;
const double term2 = c0 * math::exp(-2.0 * (b0 / sigma));
const double term3 = a0 * math::exp(-2.0 * (b1 / sigma));
return multiplier1 * term1 + term2 + term3;
}
/* Computes n33 in Equation (21) in Deriche's paper. */
static double compute_numerator_3(float sigma)
{
const double multiplier1 = math::exp(-(b1 / sigma) - 2.0 * (b0 / sigma));
const double term1 = c1 * math::sin(w1 / sigma) - math::cos(w1 / sigma) * c0;
const double multiplier2 = math::exp(-(b0 / sigma) - 2.0 * (b1 / sigma));
const double term2 = a1 * math::sin(w0 / sigma) - math::cos(w0 / sigma) * a0;
return multiplier1 * term1 + multiplier2 * term2;
}
/* Computes and packs the numerators in Equation (21) in Deriche's paper. */
static double4 compute_numerator(float sigma)
{
const double n0 = compute_numerator_0();
const double n1 = compute_numerator_1(sigma);
const double n2 = compute_numerator_2(sigma);
const double n3 = compute_numerator_3(sigma);
return double4(n0, n1, n2, n3);
}
/* Computes d11 in Equation (22) in Deriche's paper. */
static double compute_denominator_1(float sigma)
{
const double term1 = -2.0 * math::exp(-(b0 / sigma)) * math::cos(w0 / sigma);
const double term2 = 2.0 * math::exp(-(b1 / sigma)) * math::cos(w1 / sigma);
return term1 - term2;
}
/* Computes d22 in Equation (22) in Deriche's paper. */
static double compute_denominator_2(float sigma)
{
const double term1 = 4.0 * math::cos(w1 / sigma) * math::cos(w0 / sigma);
const double multiplier1 = math::exp(-(b0 / sigma) - (b1 / sigma));
const double term2 = math::exp(-2.0 * (b1 / sigma));
const double term3 = math::exp(-2.0 * (b0 / sigma));
return term1 * multiplier1 + term2 + term3;
}
/* Computes d33 in Equation (22) in Deriche's paper. */
static double compute_denominator_3(float sigma)
{
const double term1 = -2.0 * math::cos(w0 / sigma);
const double multiplier1 = math::exp(-(b0 / sigma) - 2.0 * (b1 / sigma));
const double term2 = 2.0 * math::cos(w1 / sigma);
const double multiplier2 = math::exp(-(b1 / sigma) - 2.0 * (b0 / sigma));
return term1 * multiplier1 - term2 * multiplier2;
}
/* Computes d44 in Equation (22) in Deriche's paper. */
static double compute_denominator_4(float sigma)
{
return math::exp(-2.0 * (b0 / sigma) - 2.0 * (b1 / sigma));
}
/* Computes and packs the denominators in Equation (22) in Deriche's paper. */
static double4 compute_denominator(float sigma)
{
const double d1 = compute_denominator_1(sigma);
const double d2 = compute_denominator_2(sigma);
const double d3 = compute_denominator_3(sigma);
const double d4 = compute_denominator_4(sigma);
return double4(d1, d2, d3, d4);
}
/* Computes the normalization scale that the feedforward coefficients should be divided by to
* match the unit integral of the Gaussian. The scaling factor proposed by Deriche's paper in
* Equation (50) is wrong due to missing terms. A correct scaling factor is presented in
* Farneback's paper in Equation (25), which is implemented in this method. */
static float compute_normalization_scale(const double4 &causal_feedforward_coefficients,
const double4 &feedback_coefficients)
{
const double causal_feedforwad_sum = math::reduce_add(causal_feedforward_coefficients);
const double feedback_sum = 1.0 + math::reduce_add(feedback_coefficients);
return 2.0 * (causal_feedforwad_sum / feedback_sum) - causal_feedforward_coefficients[0];
}
/* Computes the non causal feedforward coefficients from the feedback and causal feedforward
* coefficients based on Equation (31) in Deriche's paper. Notice that the equation is linear, so
* the coefficients can be computed after the normalization of the causal feedforward
* coefficients. */
static double4 compute_non_causal_feedforward_coefficients(
const double4 &causal_feedforward_coefficients, const double4 &feedback_coefficients)
{
const double n1 = causal_feedforward_coefficients[1] -
feedback_coefficients[0] * causal_feedforward_coefficients[0];
const double n2 = causal_feedforward_coefficients[2] -
feedback_coefficients[1] * causal_feedforward_coefficients[0];
const double n3 = causal_feedforward_coefficients[3] -
feedback_coefficients[2] * causal_feedforward_coefficients[0];
const double n4 = -feedback_coefficients[3] * causal_feedforward_coefficients[0];
return double4(n1, n2, n3, n4);
}
/* The IIR filter difference equation relies on previous outputs to compute new outputs, those
* previous outputs are not really defined at the start of the filter. To do Neumann boundary
* condition, we initialize the previous output with a special value that is a function of the
* boundary value. This special value is computed by multiply the boundary value with a coefficient
* to simulate an infinite stream of the boundary value.
*
* The function for the coefficient can be derived by substituting the boundary value for previous
* inputs, equating all current and previous outputs to the same value, and finally rearranging to
* compute that same output value.
*
* Start by the difference equation where b_i are the feedforward coefficients and a_i are the
* feedback coefficients:
*
* y[n] = \sum_{i = 0}^3 b_i x[n - i] - \sum_{i = 0}^3 a_i y[n - i]
*
* Assume all outputs are y and all inputs are x, which is the boundary value:
*
* y = \sum_{i = 0}^3 b_i x - \sum_{i = 0}^3 a_i y
*
* Now rearrange to compute y:
*
* y = x \sum_{i = 0}^3 b_i - y \sum_{i = 0}^3 a_i
* y + y \sum_{i = 0}^3 a_i = x \sum_{i = 0}^3 b_i
* y (1 + \sum_{i = 0}^3 a_i) = x \sum_{i = 0}^3 b_i
* y = x \cdot \frac{\sum_{i = 0}^3 b_i}{1 + \sum_{i = 0}^3 a_i}
*
* So our coefficient is the value that is multiplied by the boundary value x. Had x been zero,
* that is, we are doing Dirichlet boundary condition, the equations still hold. */
static double compute_boundary_coefficient(const double4 &feedforward_coefficients,
const double4 &feedback_coefficients)
{
return math::reduce_add(feedforward_coefficients) /
(1.0 + math::reduce_add(feedback_coefficients));
}
/* Computes the feedback, causal feedforward, and non causal feedforward coefficients given a
* target Gaussian sigma value as used in Equations (28) and (29) in Deriche's paper. */
DericheGaussianCoefficients::DericheGaussianCoefficients(Context & /*context*/, float sigma)
{
/* The numerator coefficients are the causal feedforward coefficients and the denominator
* coefficients are the feedback coefficients as can be seen in Equation (28). */
causal_feedforward_coefficients_ = compute_numerator(sigma);
feedback_coefficients_ = compute_denominator(sigma);
/* Normalize the feedforward coefficients as discussed in Section "5.4 Normalization" in
* Deriche's paper. Feedback coefficients do not need normalization. */
causal_feedforward_coefficients_ /= compute_normalization_scale(causal_feedforward_coefficients_,
feedback_coefficients_);
/* Compute the non causal feedforward coefficients from the feedback and normalized causal
* feedforward coefficients based on Equation (31) from Deriche's paper. Since the causal
* coefficients are already normalized, this doesn't need normalization. */
non_causal_feedforward_coefficients_ = compute_non_causal_feedforward_coefficients(
causal_feedforward_coefficients_, feedback_coefficients_);
/* Compute the boundary coefficient for both the causal and non causal filters. */
causal_boundary_coefficient_ = compute_boundary_coefficient(causal_feedforward_coefficients_,
feedback_coefficients_);
non_causal_boundary_coefficient_ = compute_boundary_coefficient(
non_causal_feedforward_coefficients_, feedback_coefficients_);
}
const double4 &DericheGaussianCoefficients::feedback_coefficients() const
{
return feedback_coefficients_;
}
const double4 &DericheGaussianCoefficients::causal_feedforward_coefficients() const
{
return causal_feedforward_coefficients_;
}
const double4 &DericheGaussianCoefficients::non_causal_feedforward_coefficients() const
{
return non_causal_feedforward_coefficients_;
}
double DericheGaussianCoefficients::causal_boundary_coefficient() const
{
return causal_boundary_coefficient_;
}
double DericheGaussianCoefficients::non_causal_boundary_coefficient() const
{
return non_causal_boundary_coefficient_;
}
/* --------------------------------------------------------------------
* Deriche Gaussian Coefficients Container.
*/
void DericheGaussianCoefficientsContainer::reset()
{
/* First, delete all resources that are no longer needed. */
map_.remove_if([](auto item) { return !item.value->needed; });
/* Second, reset the needed status of the remaining resources to false to ready them to track
* their needed status for the next evaluation. */
for (auto &value : map_.values()) {
value->needed = false;
}
}
DericheGaussianCoefficients &DericheGaussianCoefficientsContainer::get(Context &context,
float sigma)
{
const DericheGaussianCoefficientsKey key(sigma);
auto &deriche_gaussian_coefficients = *map_.lookup_or_add_cb(
key, [&]() { return std::make_unique<DericheGaussianCoefficients>(context, sigma); });
deriche_gaussian_coefficients.needed = true;
return deriche_gaussian_coefficients;
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,536 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/* -------------------------------------------------------------------------------------------------
* Van Vliet Gaussian Coefficients.
*
* Computes the coefficients of the fourth order IIR filter approximating a Gaussian filter
* computed using Van Vliet's design method. This is based on the following paper:
*
* Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
* filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
* 98EX170). Vol. 1. IEEE, 1998.
*
*
* The filter is computed as the cascade of a causal and a non causal sequences of second order
* difference equations as can be seen in Equation (11) in Van Vliet's paper. The coefficients are
* the same for both the causal and non causal sequences.
*
* However, to improve its numerical stability, we decompose the 4th order filter into a parallel
* bank of second order filers using the methods of partial fractions as demonstrated in the follow
* book:
*
* Oppenheim, Alan V. Discrete-time signal processing. Pearson Education India, 1999.
*
*/
#include <array>
#include <complex>
#include <cstdint>
#include <memory>
#include "BLI_assert.h"
#include "BLI_hash.hh"
#include "BLI_math_base.hh"
#include "BLI_math_vector.hh"
#include "COM_context.hh"
#include "COM_van_vliet_gaussian_coefficients.hh"
namespace blender::realtime_compositor {
/* --------------------------------------------------------------------
* Van Vliet Gaussian Coefficients Key.
*/
VanVlietGaussianCoefficientsKey::VanVlietGaussianCoefficientsKey(float sigma) : sigma(sigma) {}
uint64_t VanVlietGaussianCoefficientsKey::hash() const
{
return get_default_hash(sigma);
}
bool operator==(const VanVlietGaussianCoefficientsKey &a, const VanVlietGaussianCoefficientsKey &b)
{
return a.sigma == b.sigma;
}
/* -------------------------------------------------------------------------------------------------
* Van Vliet Gaussian Coefficients.
*/
/* Computes the variance of the Gaussian filter represented by the given poles scaled by the given
* scale factor. This is based on Equation (20) in Van Vliet's paper. */
static double compute_scaled_poles_variance(const std::array<std::complex<double>, 4> &poles,
double scale_factor)
{
std::complex<double> variance = std::complex<double>(0.0, 0.0);
for (const std::complex<double> &pole : poles) {
const double magnitude = std::pow(std::abs(pole), 1.0 / scale_factor);
const double phase = std::arg(pole) / scale_factor;
const std::complex<double> multiplier1 = std::polar(magnitude, phase);
const std::complex<double> multiplier2 = std::pow(magnitude - std::polar(1.0, phase), -2.0);
variance += 2.0 * multiplier1 * multiplier2;
}
/* The variance is actually real valued as guaranteed by Equations (10) and (2) since the poles
* are complex conjugate pairs. See Section 3.3 of the paper. */
return variance.real();
}
/* Computes the partial derivative with respect to the scale factor at the given scale factor of
* the variance of the Gaussian filter represented by the given poles scaled by the given scale
* factor. This is based on the partial derivative with respect to the scale factor of Equation
* (20) in Van Vliet's paper.
*
* The derivative is not listed in the paper, but was computed manually as the sum of the following
* for each of the poles:
*
* \frac{
* 2a^\frac{1}{x}e^\frac{ib}{x} (e^\frac{ib}{x}+a^\frac{1}{x}) (\ln(a)-ib)
* }{
* x^2 (a^\frac{1}{x}-e^\frac{ib}{x})^3
* }
*
* Where "x" is the scale factor, "a" is the magnitude of the pole, and "b" is its phase. */
static double compute_scaled_poles_variance_derivative(
const std::array<std::complex<double>, 4> &poles, double scale_factor)
{
std::complex<double> variance_derivative = std::complex<double>(0.0, 0.0);
for (const std::complex<double> &pole : poles) {
const double magnitude = std::pow(std::abs(pole), 1.0 / scale_factor);
const double phase = std::arg(pole) / scale_factor;
const std::complex<double> multiplier1 = std::polar(magnitude, phase);
const std::complex<double> multiplier2 = magnitude + std::polar(1.0, phase);
const std::complex<double> multiplier3 = std::log(std::abs(pole)) -
std::complex<double>(0.0, std::arg(pole));
const std::complex<double> divisor1 = std::pow(magnitude - std::polar(1.0, phase), 3.0);
const std::complex<double> divisor2 = math::square(scale_factor);
variance_derivative += 2.0 * multiplier1 * multiplier2 * multiplier3 / (divisor1 * divisor2);
}
/* The variance derivative is actually real valued as guaranteed by Equations (10) and (2) since
* the poles are complex conjugate pairs. See Section 3.3 of the paper. */
return variance_derivative.real();
}
/* The poles were computed for a Gaussian filter with a sigma value of 2, in order to generalize
* that for any sigma value, we need to scale the poles by a certain scaling factor as described in
* Section 4.2 of Van Vliet's paper. To find the scaling factor, we start from an initial guess of
* half sigma, then iteratively improve the guess using Newton's method by computing the variance
* and its derivative based on Equation (20). */
static double find_scale_factor(const std::array<std::complex<double>, 4> &poles,
float reference_sigma)
{
const double reference_variance = math::square(reference_sigma);
/* Note that the poles were computed for a Gaussian filter with a sigma value of 2, so it it
* as if we have a base scale of 2, and we start with half sigma as an initial guess. See
* Section 4.2 for more information */
double scale_factor = reference_sigma / 2.0;
const int maximum_interations = 10;
for (int i = 0; i < maximum_interations; i++) {
const double variance = compute_scaled_poles_variance(poles, scale_factor);
/* Close enough, we have found our scale factor. */
if (math::abs(reference_variance - variance) < 1.0e-8) {
return scale_factor;
}
/* Improve guess using Newton's method. Notice that Newton's method is a root finding method,
* so we supply the difference to the reference variance as our function, since the zero point
* will be when the variance is equal to the reference one. The derivative is not affected
* since the reference variance is a constant. */
const double derivative = compute_scaled_poles_variance_derivative(poles, scale_factor);
scale_factor -= (variance - reference_variance) / derivative;
}
/* The paper mentions that only a few iterations are needed, so if we didn't converge after
* maximum_interations, something is probably wrong. */
BLI_assert_unreachable();
return scale_factor;
}
/* The poles were computed for a Gaussian filter with a sigma value of 2, so scale them using
* Equation (19) in Van Vliet's paper to have the given sigma value. This involves finding the
* appropriate scale factor based on Equation (20), see Section 4.2 and the find_scale_factor
* method for more information. */
static std::array<std::complex<double>, 4> computed_scaled_poles(
const std::array<std::complex<double>, 4> &poles, float sigma)
{
const double scale_factor = find_scale_factor(poles, sigma);
std::array<std::complex<double>, 4> scaled_poles;
for (int i = 0; i < poles.size(); i++) {
const std::complex<double> &pole = poles[i];
const double magnitude = std::pow(std::abs(pole), 1.0 / scale_factor);
const double phase = std::arg(pole) / scale_factor;
scaled_poles[i] = std::polar(magnitude, phase);
}
return scaled_poles;
}
/* Compute the causal poles from the non causal ones. Since the Gaussian is a real even function,
* the causal poles are just the inverse of the non causal poles, as noted in Equation (2) in Van
* Vliet's paper. */
static std::array<std::complex<double>, 4> compute_causal_poles(
const std::array<std::complex<double>, 4> &non_causal_poles)
{
std::array<std::complex<double>, 4> causal_poles;
for (int i = 0; i < non_causal_poles.size(); i++) {
causal_poles[i] = 1.0 / non_causal_poles[i];
}
return causal_poles;
}
/* Computes the feedback coefficients from the given poles based on the equations in Equation (13)
* in Van Vliet's paper. See Section 3.2 for more information. */
static double4 compute_feedback_coefficients(const std::array<std::complex<double>, 4> &poles)
{
/* Compute the gain of the poles, which is the "b" at the end of Equation (13). */
std::complex<double> gain = std::complex<double>(1.0, 0.0);
for (const std::complex<double> &pole : poles) {
gain /= pole;
}
/* Compute the coefficients b4, b3, b2, and b1 based on the expressions b_N, b_N-1, b_N-2, and
* b_N-3 respectively in Equation (13). b4 and b3 are trivial, while b2 and b1 can be computed by
* drawing the following summation trees, where each path from the root to the leaf is multiplied
* and added:
*
* b2
* ____/|\____
* / | \
* i --> 2 3 4
* | / \ /|\
* j --> 1 1 2 1 2 3
*
* b1
* ___/ \___
* / \
* i --> 3 4
* | / \
* j --> 2 2 3
* | | / \
* k --> 1 1 1 2
*
* Notice that the values of i, j, and k are 1-index, so we need to subtract one when accessing
* the poles. */
const std::complex<double> b4 = gain;
const std::complex<double> b3 = -gain * (poles[0] + poles[1] + poles[2] + poles[3]);
const std::complex<double> b2 = gain * (poles[1] * poles[0] + poles[2] * poles[0] +
poles[2] * poles[1] + poles[3] * poles[0] +
poles[3] * poles[1] + poles[3] * poles[2]);
const std::complex<double> b1 = -gain * (poles[2] * poles[1] * poles[0] +
poles[3] * poles[1] * poles[0] +
poles[3] * poles[2] * poles[0] +
poles[3] * poles[2] * poles[1]);
/* The coefficients are actually real valued as guaranteed by Equations (10) and (2) since
* the poles are complex conjugate pairs. See Section 3.3 of the paper. */
const double4 coefficients = double4(b1.real(), b2.real(), b3.real(), b4.real());
return coefficients;
}
/* Computes the feedforward coefficient from the feedback coefficients based on Equation (12) of
* Van Vliet's paper. See Section 3.2 for more information. */
static double compute_feedforward_coefficient(const double4 &feedback_coefficients)
{
return 1.0 + math::reduce_add(feedback_coefficients);
}
/* Computes the residue of the partial fraction of the transfer function of the given causal poles
* and gain for the given target pole. This essentially evaluates Equation (3.41) in Oppenheim's
* book, where d_k is the target pole and assuming the transfer function is in the form given in
* Equation (3.39), where d_k are the poles. See the following derivation for the gain value.
*
* For the particular case of the Van Vliet's system, there are no zeros, so the numerator in
* Equation (3.39) is one. Further note that Van Vliet's formulation is different from the expected
* form, so we need to rearrange Equation (3) in to match the form in Equation (3.39), which is
* shown below.
*
* Start from the causal term of Equation (3):
*
* H_+(z) = \prod_{i=1}^N \frac{d_i - 1}{d_i - z^{-1}}
*
* Divide by d_i:
*
* H_+(z) = \prod_{i=1}^N \frac{1 - d_i^{-1}}{1 - d_i^{-1}z^{-1}}
*
* Move the numerator to its own product:
*
* H_+(z) = \prod_{i=1}^N 1 - d_i^{-1} \prod_{i=1}^N \frac{1}{1 - d_i^{-1}z^{-1}}
*
* And we reach the same form as Equation (3.39). Where the first product term is b0 / a0 and is
* also the given gain value, which is also the same as the feedforward coefficient denoted by
* the alpha in Equation (12). Further d_i^{-1} in our derivation is the same as d_k in Equation
* (3.39), the discrepancy in the inverse operator is the fact that Van Vliet's derivation assume
* non causal poles, while Oppenheim's assume causal poles, which are inverse of each other as can
* be seen in the compute_causal_poles function. */
static std::complex<double> compute_partial_fraction_residue(
const std::array<std::complex<double>, 4> &poles,
const std::complex<double> &target_pole,
double gain)
{
/* Evaluating Equation (3.41) actually corresponds to omitting the terms in Equation (3.39) that
* corresponds to the target pole or its conjugate, because they get canceled by the first term
* in Equation (3.41). That's we are essentially evaluating the limit as the expression tends to
* the target pole. */
std::complex<double> target_pole_inverse = 1.0 / target_pole;
std::complex<double> residue = std::complex<double>(1.0, 0.0);
for (const std::complex<double> &pole : poles) {
if (pole != target_pole && pole != std::conj(target_pole)) {
residue *= 1.0 - pole * target_pole_inverse;
}
}
/* Remember that the gain is the b0 / a0 expression in Equation (3.39). */
return gain / residue;
}
/* Evaluates the causal transfer function at the reciprocal of the given pole, which will be the
* non causal pole if the given pole is a causal one, as discussed in the compute_causal_poles
* function. The causal transfer function is given in Equation (3) in Van Vliet's paper, but we
* compute it in the form derived in the description of the compute_partial_fraction_residue
* function, also see the aforementioned function for the gain value. */
static std::complex<double> compute_causal_transfer_function_at_non_causal_pole(
const std::array<std::complex<double>, 4> &poles,
const std::complex<double> &target_pole,
double gain)
{
std::complex<double> result = std::complex<double>(1.0, 0.0);
for (const std::complex<double> &pole : poles) {
result *= 1.0 - pole * target_pole;
}
return gain / result;
}
/* Combine each pole and its conjugate counterpart into a second order section and assign its
* coefficients to the given coefficients value. The residue of the pole and its transfer value in
* the partial fraction of its transfer function are given.
*
* TODO: Properly document this function and prove its equations. */
static void compute_second_order_section(const std::complex<double> &pole,
const std::complex<double> &residue,
const std::complex<double> &transfer_value,
double2 &r_feedback_coefficients,
double2 &r_causal_feedforward_coefficients,
double2 &r_non_causal_feedforward_coefficients)
{
const std::complex<double> parallel_residue = residue * transfer_value;
const std::complex<double> pole_inverse = 1.0 / pole;
r_feedback_coefficients = double2(-2.0 * pole.real(), std::norm(pole));
const double causal_feedforward_1 = parallel_residue.imag() / pole_inverse.imag();
const double causal_feedforward_0 = parallel_residue.real() -
causal_feedforward_1 * pole_inverse.real();
r_causal_feedforward_coefficients = double2(causal_feedforward_0, causal_feedforward_1);
const double non_causal_feedforward_1 = causal_feedforward_1 -
causal_feedforward_0 * r_feedback_coefficients[0];
const double non_causal_feedforward_2 = -causal_feedforward_0 * r_feedback_coefficients[1];
r_non_causal_feedforward_coefficients = double2(non_causal_feedforward_1,
non_causal_feedforward_2);
}
/* The IIR filter difference equation relies on previous outputs to compute new outputs, those
* previous outputs are not really defined at the start of the filter. To do Neumann boundary
* condition, we initialize the previous output with a special value that is a function of the
* boundary value. This special value is computed by multiply the boundary value with a coefficient
* to simulate an infinite stream of the boundary value.
*
* The function for the coefficient can be derived by substituting the boundary value for previous
* inputs, equating all current and previous outputs to the same value, and finally rearranging to
* compute that same output value.
*
* Start by the difference equation where b_i are the feedforward coefficients and a_i are the
* feedback coefficients:
*
* y[n] = \sum_{i = 0}^3 b_i x[n - i] - \sum_{i = 0}^3 a_i y[n - i]
*
* Assume all outputs are y and all inputs are x, which is the boundary value:
*
* y = \sum_{i = 0}^3 b_i x - \sum_{i = 0}^3 a_i y
*
* Now rearrange to compute y:
*
* y = x \sum_{i = 0}^3 b_i - y \sum_{i = 0}^3 a_i
* y + y \sum_{i = 0}^3 a_i = x \sum_{i = 0}^3 b_i
* y (1 + \sum_{i = 0}^3 a_i) = x \sum_{i = 0}^3 b_i
* y = x \cdot \frac{\sum_{i = 0}^3 b_i}{1 + \sum_{i = 0}^3 a_i}
*
* So our coefficient is the value that is multiplied by the boundary value x. Had x been zero,
* that is, we are doing Dirichlet boundary condition, the equations still hold. */
static double compute_boundary_coefficient(const double2 &feedback_coefficients,
const double2 &feedforward_coefficients)
{
return math::reduce_add(feedforward_coefficients) /
(1.0 + math::reduce_add(feedback_coefficients));
}
/* Computes the feedback and feedforward coefficients for the 4th order Van Vliet Gaussian filter
* given a target Gaussian sigma value. We first scale the poles of the filter to match the sigma
* value based on the method described in Section 4.2 of Van Vliet's paper, then we compute the
* coefficients from the scaled poles based on Equations (12) and (13). */
VanVlietGaussianCoefficients::VanVlietGaussianCoefficients(Context & /*context*/, float sigma)
{
/* The 4th order (N=4) poles for the Gaussian filter of a sigma of 2 computed by minimizing the
* maximum error (L-infinity) to true Gaussian as provided in Van Vliet's paper Table (1) fourth
* column. Notice that the second and fourth poles are the complex conjugates of the first and
* third poles respectively as noted in the table description. */
const std::array<std::complex<double>, 4> poles = {
std::complex<double>(1.12075, 1.27788),
std::complex<double>(1.12075, -1.27788),
std::complex<double>(1.76952, 0.46611),
std::complex<double>(1.76952, -0.46611),
};
const std::array<std::complex<double>, 4> scaled_poles = computed_scaled_poles(poles, sigma);
/* The given poles are actually the non causal poles, since they are outside of the unit circle,
* as demonstrated in Section 3.4 of Van Vliet's paper. And we compute the causal poles from
* those. */
const std::array<std::complex<double>, 4> non_causal_poles = scaled_poles;
const std::array<std::complex<double>, 4> causal_poles = compute_causal_poles(non_causal_poles);
/* Compute the feedforward and feedback coefficients, noting that those are functions of the non
* causal poles. */
const double4 feedback_coefficients = compute_feedback_coefficients(non_causal_poles);
const double feedforward_coefficient = compute_feedforward_coefficient(feedback_coefficients);
/* We only compute the residue for two of the causal poles, since the other two are complex
* conjugates of those two, and their residues will also be the complex conjugate of their
* respective counterpart. The gain is the feedforward coefficient as discussed in the function
* description. */
const std::complex<double> first_residue = compute_partial_fraction_residue(
causal_poles, causal_poles[0], feedforward_coefficient);
const std::complex<double> second_residue = compute_partial_fraction_residue(
causal_poles, causal_poles[2], feedforward_coefficient);
/* We only compute the transfer value of for two of the non causal poles, since the other two are
* complex conjugates of those two, and their transfer values will also be the complex conjugate
* of their respective counterpart. The gain is the feedforward coefficient as discussed in the
* function description. */
const std::complex<double> first_transfer_value =
compute_causal_transfer_function_at_non_causal_pole(
causal_poles, causal_poles[0], feedforward_coefficient);
const std::complex<double> second_transfer_value =
compute_causal_transfer_function_at_non_causal_pole(
causal_poles, causal_poles[2], feedforward_coefficient);
/* Combine each pole and its conjugate counterpart into a second order section and assign its
* coefficients. */
compute_second_order_section(causal_poles[0],
first_residue,
first_transfer_value,
first_feedback_coefficients_,
first_causal_feedforward_coefficients_,
first_non_causal_feedforward_coefficients_);
compute_second_order_section(causal_poles[2],
second_residue,
second_transfer_value,
second_feedback_coefficients_,
second_causal_feedforward_coefficients_,
second_non_causal_feedforward_coefficients_);
/* Compute the boundary coefficients for all four of second order sections. */
first_causal_boundary_coefficient_ = compute_boundary_coefficient(
first_feedback_coefficients_, first_causal_feedforward_coefficients_);
first_non_causal_boundary_coefficient_ = compute_boundary_coefficient(
first_feedback_coefficients_, first_non_causal_feedforward_coefficients_);
second_causal_boundary_coefficient_ = compute_boundary_coefficient(
second_feedback_coefficients_, second_causal_feedforward_coefficients_);
second_non_causal_boundary_coefficient_ = compute_boundary_coefficient(
second_feedback_coefficients_, second_non_causal_feedforward_coefficients_);
}
const double2 &VanVlietGaussianCoefficients::first_causal_feedforward_coefficients() const
{
return first_causal_feedforward_coefficients_;
}
const double2 &VanVlietGaussianCoefficients::first_non_causal_feedforward_coefficients() const
{
return first_non_causal_feedforward_coefficients_;
}
const double2 &VanVlietGaussianCoefficients::first_feedback_coefficients() const
{
return first_feedback_coefficients_;
}
const double2 &VanVlietGaussianCoefficients::second_causal_feedforward_coefficients() const
{
return second_causal_feedforward_coefficients_;
}
const double2 &VanVlietGaussianCoefficients::second_non_causal_feedforward_coefficients() const
{
return second_non_causal_feedforward_coefficients_;
}
const double2 &VanVlietGaussianCoefficients::second_feedback_coefficients() const
{
return second_feedback_coefficients_;
}
double VanVlietGaussianCoefficients::first_causal_boundary_coefficient() const
{
return first_causal_boundary_coefficient_;
}
double VanVlietGaussianCoefficients::first_non_causal_boundary_coefficient() const
{
return first_non_causal_boundary_coefficient_;
}
double VanVlietGaussianCoefficients::second_causal_boundary_coefficient() const
{
return second_causal_boundary_coefficient_;
}
double VanVlietGaussianCoefficients::second_non_causal_boundary_coefficient() const
{
return second_non_causal_boundary_coefficient_;
}
/* --------------------------------------------------------------------
* Van Vliet Gaussian Coefficients Container.
*/
void VanVlietGaussianCoefficientsContainer::reset()
{
/* First, delete all resources that are no longer needed. */
map_.remove_if([](auto item) { return !item.value->needed; });
/* Second, reset the needed status of the remaining resources to false to ready them to track
* their needed status for the next evaluation. */
for (auto &value : map_.values()) {
value->needed = false;
}
}
VanVlietGaussianCoefficients &VanVlietGaussianCoefficientsContainer::get(Context &context,
float sigma)
{
const VanVlietGaussianCoefficientsKey key(sigma);
auto &deriche_gaussian_coefficients = *map_.lookup_or_add_cb(
key, [&]() { return std::make_unique<VanVlietGaussianCoefficients>(context, sigma); });
deriche_gaussian_coefficients.needed = true;
return deriche_gaussian_coefficients;
}
} // namespace blender::realtime_compositor

View File

@ -20,6 +20,8 @@ void StaticCacheManager::reset()
cached_shaders.reset();
bokeh_kernels.reset();
cached_images.reset();
deriche_gaussian_coefficients.reset();
van_vliet_gaussian_coefficients.reset();
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,89 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/* Blur the input horizontally by applying a fourth order IIR filter approximating a Gaussian
* filter using Deriche's design method. This is based on the following paper:
*
* Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
* 1993.
*
* We run two filters per row in parallel, one for the causal filter and one for the non causal
* filter, storing the result of each separately. See the DericheGaussianCoefficients class and the
* implementation for more information. */
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
#define FILTER_ORDER 4
void main()
{
/* The shader runs parallel across rows but serially across columns. */
int y = int(gl_GlobalInvocationID.x);
int width = texture_size(input_tx).x;
/* The second dispatch dimension is two dispatches, one for the causal filter and one for the non
* causal one. */
bool is_causal = gl_GlobalInvocationID.y == 0;
vec4 feedforward_coefficients = is_causal ? causal_feedforward_coefficients :
non_causal_feedforward_coefficients;
float boundary_coefficient = is_causal ? causal_boundary_coefficient :
non_causal_boundary_coefficient;
/* Create an array that holds the last FILTER_ORDER inputs along with the current input. The
* current input is at index 0 and the oldest input is at index FILTER_ORDER. We assume Neumann
* boundary condition, so we initialize all inputs by the boundary pixel. */
ivec2 boundary_texel = is_causal ? ivec2(0, y) : ivec2(width - 1, y);
vec4 input_boundary = texture_load(input_tx, boundary_texel);
vec4 inputs[FILTER_ORDER + 1] = vec4[](
input_boundary, input_boundary, input_boundary, input_boundary, input_boundary);
/* Create an array that holds the last FILTER_ORDER outputs along with the current output. The
* current output is at index 0 and the oldest output is at index FILTER_ORDER. We assume Neumann
* boundary condition, so we initialize all outputs by the boundary pixel multiplied by the
* boundary coefficient. See the DericheGaussianCoefficients class for more information on the
* boundary handing. */
vec4 output_boundary = input_boundary * boundary_coefficient;
vec4 outputs[FILTER_ORDER + 1] = vec4[](
output_boundary, output_boundary, output_boundary, output_boundary, output_boundary);
for (int x = 0; x < width; x++) {
/* Run forward across rows for the causal filter and backward for the non causal filter. */
ivec2 texel = is_causal ? ivec2(x, y) : ivec2(width - 1 - x, y);
inputs[0] = texture_load(input_tx, texel);
/* Compute Equation (28) for the causal filter or Equation (29) for the non causal filter. The
* only difference is that the non causal filter ignores the current value and starts from the
* previous input, as can be seen in the subscript of the first input term in both equations.
* So add one while indexing the non causal inputs. */
outputs[0] = vec4(0.0);
int first_input_index = is_causal ? 0 : 1;
for (int i = 0; i < FILTER_ORDER; i++) {
outputs[0] += feedforward_coefficients[i] * inputs[first_input_index + i];
outputs[0] -= feedback_coefficients[i] * outputs[i + 1];
}
/* Store the causal and non causal outputs independently, then sum them in a separate shader
* dispatch for better parallelism. */
if (is_causal) {
imageStore(causal_output_img, texel, outputs[0]);
}
else {
imageStore(non_causal_output_img, texel, outputs[0]);
}
/* Shift the inputs temporally by one. The oldest input is discarded, while the current input
* will retain its value but will be overwritten with the new current value in the next
* iteration. */
for (int i = FILTER_ORDER; i >= 1; i--) {
inputs[i] = inputs[i - 1];
}
/* Shift the outputs temporally by one. The oldest output is discarded, while the current
* output will retain its value but will be overwritten with the new current value in the next
* iteration. */
for (int i = FILTER_ORDER; i >= 1; i--) {
outputs[i] = outputs[i - 1];
}
}
}

View File

@ -0,0 +1,19 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
/* The Deriche filter is a parallel interconnection filter, meaning its output is the sum of its
* causal and non causal filters. */
vec4 filter_output = texture_load(causal_input_tx, texel) +
texture_load(non_causal_input_tx, texel);
/* Write the color using the transposed texel. See the sum_causal_and_non_causal_results method
* in the deriche_gaussian_blur.cc file for more information on the rational behind this. */
imageStore(output_img, texel.yx, filter_output);
}

View File

@ -0,0 +1,125 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/* Blur the input horizontally by applying a fourth order IIR filter approximating a Gaussian
* filter using Van Vliet's design method. This is based on the following paper:
*
* Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
* filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
* 98EX170). Vol. 1. IEEE, 1998.
*
* We decomposed the filter into two second order filters, so we actually run four filters per row
* in parallel, one for the first causal filter, one for the first non causal filter, one for the
* second causal filter, and one for the second non causal filter, storing the result of each
* separately. See the VanVlietGaussianCoefficients class and the implementation for more
* information. */
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
#define FILTER_ORDER 2
void main()
{
/* The shader runs parallel across rows but serially across columns. */
int y = int(gl_GlobalInvocationID.x);
int width = texture_size(input_tx).x;
/* The second dispatch dimension is four dispatches:
*
* 0 -> First causal filter.
* 1 -> First non causal filter.
* 2 -> Second causal filter.
* 3 -> Second non causal filter.
*
* We detect causality by even numbers. */
bool is_causal = gl_GlobalInvocationID.y % 2 == 0;
vec2 first_feedforward_coefficients = is_causal ? first_causal_feedforward_coefficients :
first_non_causal_feedforward_coefficients;
float first_boundary_coefficient = is_causal ? first_causal_boundary_coefficient :
first_non_causal_boundary_coefficient;
vec2 second_feedforward_coefficients = is_causal ? second_causal_feedforward_coefficients :
second_non_causal_feedforward_coefficients;
float second_boundary_coefficient = is_causal ? second_causal_boundary_coefficient :
second_non_causal_boundary_coefficient;
/* And we detect the filter by order. */
bool is_first_filter = gl_GlobalInvocationID.y < 2;
vec2 feedforward_coefficients = is_first_filter ? first_feedforward_coefficients :
second_feedforward_coefficients;
vec2 feedback_coefficients = is_first_filter ? first_feedback_coefficients :
second_feedback_coefficients;
float boundary_coefficient = is_first_filter ? first_boundary_coefficient :
second_boundary_coefficient;
/* Create an array that holds the last FILTER_ORDER inputs along with the current input. The
* current input is at index 0 and the oldest input is at index FILTER_ORDER. We assume Neumann
* boundary condition, so we initialize all inputs by the boundary pixel. */
ivec2 boundary_texel = is_causal ? ivec2(0, y) : ivec2(width - 1, y);
vec4 input_boundary = texture_load(input_tx, boundary_texel);
vec4 inputs[FILTER_ORDER + 1] = vec4[](input_boundary, input_boundary, input_boundary);
/* Create an array that holds the last FILTER_ORDER outputs along with the current output. The
* current output is at index 0 and the oldest output is at index FILTER_ORDER. We assume Neumann
* boundary condition, so we initialize all outputs by the boundary pixel multiplied by the
* boundary coefficient. See the VanVlietGaussianCoefficients class for more information on the
* boundary handing. */
vec4 output_boundary = input_boundary * boundary_coefficient;
vec4 outputs[FILTER_ORDER + 1] = vec4[](output_boundary, output_boundary, output_boundary);
for (int x = 0; x < width; x++) {
/* Run forward across rows for the causal filter and backward for the non causal filter. */
ivec2 texel = is_causal ? ivec2(x, y) : ivec2(width - 1 - x, y);
inputs[0] = texture_load(input_tx, texel);
/* Compute the filter based on its difference equation, this is not in the Van Vliet paper
* because the filter was decomposed, but it is essentially similar to Equation (28) for the
* causal filter or Equation (29) for the non causal filter in Deriche's paper, except it is
* second order, not fourth order.
*
* Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
* 1993.
*
* The only difference is that the non causal filter ignores the current value and starts from
* the previous input, as can be seen in the subscript of the first input term in both
* equations. So add one while indexing the non causal inputs. */
outputs[0] = vec4(0.0);
int first_input_index = is_causal ? 0 : 1;
for (int i = 0; i < FILTER_ORDER; i++) {
outputs[0] += feedforward_coefficients[i] * inputs[first_input_index + i];
outputs[0] -= feedback_coefficients[i] * outputs[i + 1];
}
/* Store the causal and non causal outputs of each of the two filters independently, then sum
* them in a separate shader dispatch for better parallelism. */
if (is_causal) {
if (is_first_filter) {
imageStore(first_causal_output_img, texel, outputs[0]);
}
else {
imageStore(second_causal_output_img, texel, outputs[0]);
}
}
else {
if (is_first_filter) {
imageStore(first_non_causal_output_img, texel, outputs[0]);
}
else {
imageStore(second_non_causal_output_img, texel, outputs[0]);
}
}
/* Shift the inputs temporally by one. The oldest input is discarded, while the current input
* will retain its value but will be overwritten with the new current value in the next
* iteration. */
for (int i = FILTER_ORDER; i >= 1; i--) {
inputs[i] = inputs[i - 1];
}
/* Shift the outputs temporally by one. The oldest output is discarded, while the current
* output will retain its value but will be overwritten with the new current value in the next
* iteration. */
for (int i = FILTER_ORDER; i >= 1; i--) {
outputs[i] = outputs[i - 1];
}
}
}

View File

@ -0,0 +1,21 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
/* The Van Vliet filter is a parallel interconnection filter, meaning its output is the sum of
* all of its causal and non causal filters. */
vec4 filter_output = texture_load(first_causal_input_tx, texel) +
texture_load(first_non_causal_input_tx, texel) +
texture_load(second_causal_input_tx, texel) +
texture_load(second_non_causal_input_tx, texel);
/* Write the color using the transposed texel. See the sum_causal_and_non_causal_results method
* in the van_vliet_gaussian_blur.cc file for more information on the rational behind this. */
imageStore(output_img, texel.yx, filter_output);
}

View File

@ -0,0 +1,24 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(compositor_deriche_gaussian_blur)
.local_group_size(128, 2)
.push_constant(Type::VEC4, "causal_feedforward_coefficients")
.push_constant(Type::VEC4, "non_causal_feedforward_coefficients")
.push_constant(Type::VEC4, "feedback_coefficients")
.push_constant(Type::FLOAT, "causal_boundary_coefficient")
.push_constant(Type::FLOAT, "non_causal_boundary_coefficient")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "causal_output_img")
.image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "non_causal_output_img")
.compute_source("compositor_deriche_gaussian_blur.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_deriche_gaussian_blur_sum)
.local_group_size(16, 16)
.sampler(0, ImageType::FLOAT_2D, "causal_input_tx")
.sampler(1, ImageType::FLOAT_2D, "non_causal_input_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_deriche_gaussian_blur_sum.glsl")
.do_static_compilation(true);

View File

@ -0,0 +1,33 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(compositor_van_vliet_gaussian_blur)
.local_group_size(64, 4)
.push_constant(Type::VEC2, "first_feedback_coefficients")
.push_constant(Type::VEC2, "first_causal_feedforward_coefficients")
.push_constant(Type::VEC2, "first_non_causal_feedforward_coefficients")
.push_constant(Type::VEC2, "second_feedback_coefficients")
.push_constant(Type::VEC2, "second_causal_feedforward_coefficients")
.push_constant(Type::VEC2, "second_non_causal_feedforward_coefficients")
.push_constant(Type::FLOAT, "first_causal_boundary_coefficient")
.push_constant(Type::FLOAT, "first_non_causal_boundary_coefficient")
.push_constant(Type::FLOAT, "second_causal_boundary_coefficient")
.push_constant(Type::FLOAT, "second_non_causal_boundary_coefficient")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "first_causal_output_img")
.image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "first_non_causal_output_img")
.image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "second_causal_output_img")
.image(3, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "second_non_causal_output_img")
.compute_source("compositor_van_vliet_gaussian_blur.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_van_vliet_gaussian_blur_sum)
.local_group_size(16, 16)
.sampler(0, ImageType::FLOAT_2D, "first_causal_input_tx")
.sampler(1, ImageType::FLOAT_2D, "first_non_causal_input_tx")
.sampler(2, ImageType::FLOAT_2D, "second_causal_input_tx")
.sampler(3, ImageType::FLOAT_2D, "second_non_causal_input_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_van_vliet_gaussian_blur_sum.glsl")
.do_static_compilation(true);

View File

@ -19,6 +19,7 @@
#include "GPU_shader.hh"
#include "GPU_texture.hh"
#include "COM_algorithm_recursive_gaussian_blur.hh"
#include "COM_algorithm_symmetric_separable_blur.hh"
#include "COM_node_operation.hh"
#include "COM_symmetric_blur_weights.hh"
@ -106,7 +107,11 @@ class BlurOperation : public NodeOperation {
return;
}
if (use_variable_size()) {
if (node_storage(bnode()).filtertype == R_FILTER_FAST_GAUSS) {
recursive_gaussian_blur(
context(), get_input("Image"), get_result("Image"), compute_blur_radius());
}
else if (use_variable_size()) {
execute_variable_size();
}
else if (use_separable_filter()) {

@ -1 +1 @@
Subproject commit d9c72c9c34a205f5012b5dbd84bb17116775df49
Subproject commit 3a1d189a550119157aa4246de27d9d661b6f6548