2024-05-01 09:57:43 +02:00
21 changed files with 1821 additions and 2 deletions
--- a/source/blender/compositor/realtime_compositor/CMakeLists.txt
+++ b/source/blender/compositor/realtime_compositor/CMakeLists.txt
@ -63,35 +63,42 @@ set(SRC
  COM_texture_pool.hh
  COM_utilities.hh

+  algorithms/intern/deriche_gaussian_blur.cc
  algorithms/intern/jump_flooding.cc
  algorithms/intern/morphological_blur.cc
  algorithms/intern/morphological_distance.cc
  algorithms/intern/morphological_distance_feather.cc
  algorithms/intern/parallel_reduction.cc
  algorithms/intern/realize_on_domain.cc
+  algorithms/intern/recursive_gaussian_blur.cc
  algorithms/intern/smaa.cc
  algorithms/intern/summed_area_table.cc
  algorithms/intern/symmetric_separable_blur.cc
  algorithms/intern/symmetric_separable_blur_variable_size.cc
  algorithms/intern/transform.cc
+  algorithms/intern/van_vliet_gaussian_blur.cc

+  algorithms/COM_algorithm_deriche_gaussian_blur.hh
  algorithms/COM_algorithm_jump_flooding.hh
  algorithms/COM_algorithm_morphological_blur.hh
  algorithms/COM_algorithm_morphological_distance.hh
  algorithms/COM_algorithm_morphological_distance_feather.hh
  algorithms/COM_algorithm_parallel_reduction.hh
  algorithms/COM_algorithm_realize_on_domain.hh
+  algorithms/COM_algorithm_recursive_gaussian_blur.hh
  algorithms/COM_algorithm_smaa.hh
  algorithms/COM_algorithm_summed_area_table.hh
  algorithms/COM_algorithm_symmetric_separable_blur.hh
  algorithms/COM_algorithm_symmetric_separable_blur_variable_size.hh
  algorithms/COM_algorithm_transform.hh
+  algorithms/COM_algorithm_van_vliet_gaussian_blur.hh

  cached_resources/intern/bokeh_kernel.cc
  cached_resources/intern/cached_image.cc
  cached_resources/intern/cached_mask.cc
  cached_resources/intern/cached_shader.cc
  cached_resources/intern/cached_texture.cc
+  cached_resources/intern/deriche_gaussian_coefficients.cc
  cached_resources/intern/distortion_grid.cc
  cached_resources/intern/keying_screen.cc
  cached_resources/intern/morphological_distance_feather_weights.cc
@ -99,6 +106,7 @@ set(SRC
  cached_resources/intern/smaa_precomputed_textures.cc
  cached_resources/intern/symmetric_blur_weights.cc
  cached_resources/intern/symmetric_separable_blur_weights.cc
+  cached_resources/intern/van_vliet_gaussian_coefficients.cc

  cached_resources/COM_bokeh_kernel.hh
  cached_resources/COM_cached_image.hh
@ -106,6 +114,7 @@ set(SRC
  cached_resources/COM_cached_resource.hh
  cached_resources/COM_cached_shader.hh
  cached_resources/COM_cached_texture.hh
+  cached_resources/COM_deriche_gaussian_coefficients.hh
  cached_resources/COM_distortion_grid.hh
  cached_resources/COM_keying_screen.hh
  cached_resources/COM_morphological_distance_feather_weights.hh
@ -113,6 +122,7 @@ set(SRC
  cached_resources/COM_smaa_precomputed_textures.hh
  cached_resources/COM_symmetric_blur_weights.hh
  cached_resources/COM_symmetric_separable_blur_weights.hh
+  cached_resources/COM_van_vliet_gaussian_coefficients.hh
 )

 set(LIB
@ -142,6 +152,8 @@ set(GLSL_SRC
  shaders/compositor_defocus_radius_from_depth.glsl
  shaders/compositor_defocus_radius_from_scale.glsl
  shaders/compositor_despeckle.glsl
+  shaders/compositor_deriche_gaussian_blur.glsl
+  shaders/compositor_deriche_gaussian_blur_sum.glsl
  shaders/compositor_directional_blur.glsl
  shaders/compositor_displace.glsl
  shaders/compositor_double_edge_mask_compute_boundary.glsl
@ -215,6 +227,8 @@ set(GLSL_SRC
  shaders/compositor_symmetric_separable_blur_variable_size.glsl
  shaders/compositor_tone_map_photoreceptor.glsl
  shaders/compositor_tone_map_simple.glsl
+  shaders/compositor_van_vliet_gaussian_blur.glsl
+  shaders/compositor_van_vliet_gaussian_blur_sum.glsl
  shaders/compositor_write_output.glsl
  shaders/compositor_z_combine_compute_mask.glsl
  shaders/compositor_z_combine_from_mask.glsl
@ -290,6 +304,7 @@ set(SRC_SHADER_CREATE_INFOS
  shaders/infos/compositor_convert_info.hh
  shaders/infos/compositor_cryptomatte_info.hh
  shaders/infos/compositor_defocus_info.hh
+  shaders/infos/compositor_deriche_gaussian_blur_info.hh
  shaders/infos/compositor_despeckle_info.hh
  shaders/infos/compositor_directional_blur_info.hh
  shaders/infos/compositor_displace_info.hh
@ -334,6 +349,7 @@ set(SRC_SHADER_CREATE_INFOS
  shaders/infos/compositor_symmetric_separable_blur_variable_size_info.hh
  shaders/infos/compositor_tone_map_photoreceptor_info.hh
  shaders/infos/compositor_tone_map_simple_info.hh
+  shaders/infos/compositor_van_vliet_gaussian_blur_info.hh
  shaders/infos/compositor_write_output_info.hh
  shaders/infos/compositor_z_combine_info.hh
 )
--- a/source/blender/compositor/realtime_compositor/COM_static_cache_manager.hh
+++ b/source/blender/compositor/realtime_compositor/COM_static_cache_manager.hh
@ -9,6 +9,7 @@
 #include "COM_cached_mask.hh"
 #include "COM_cached_shader.hh"
 #include "COM_cached_texture.hh"
+#include "COM_deriche_gaussian_coefficients.hh"
 #include "COM_distortion_grid.hh"
 #include "COM_keying_screen.hh"
 #include "COM_morphological_distance_feather_weights.hh"
@ -16,6 +17,7 @@
 #include "COM_smaa_precomputed_textures.hh"
 #include "COM_symmetric_blur_weights.hh"
 #include "COM_symmetric_separable_blur_weights.hh"
+#include "COM_van_vliet_gaussian_coefficients.hh"

 namespace blender::realtime_compositor {

@ -55,6 +57,8 @@ class StaticCacheManager {
  CachedShaderContainer cached_shaders;
  BokehKernelContainer bokeh_kernels;
  CachedImageContainer cached_images;
+  DericheGaussianCoefficientsContainer deriche_gaussian_coefficients;
+  VanVlietGaussianCoefficientsContainer van_vliet_gaussian_coefficients;

  /* Reset the cache manager by deleting the cached resources that are no longer needed because
   * they weren't used in the last evaluation and prepare the remaining cached resources to track
--- a/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_deriche_gaussian_blur.hh
+++ b/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_deriche_gaussian_blur.hh
@ -0,0 +1,31 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include "BLI_math_vector_types.hh"
+
+#include "COM_context.hh"
+#include "COM_result.hh"
+
+namespace blender::realtime_compositor {
+
+/* Blur the input using a fourth order IIR filter approximating a Gaussian filter of the given
+ * sigma computed using Deriche's design method. This is based on the following paper:
+ *
+ *   Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
+ *   1993.
+ *
+ * This differs from the standard symmetric separable blur algorithm in that it is faster for high
+ * sigma values, the downside is that it consumes more memory and is only an approximation that
+ * might suffer from fringing and artifacts, though those are typically unnoticeable. This filter
+ * is numerically unstable and not accurate for sigma values larger than 32, in those cases, use
+ * the Van Vliet filter instead. Further, for sigma values less than 3, use direct convolution
+ * instead, since it is faster and more accurate. Neumann boundary is assumed.
+ *
+ * The output is written to the given output result, which will be allocated internally and is thus
+ * expected not to be previously allocated. */
+void deriche_gaussian_blur(Context &context, Result &input, Result &output, float2 sigma);
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_recursive_gaussian_blur.hh
+++ b/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_recursive_gaussian_blur.hh
@ -0,0 +1,24 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include "BLI_math_vector_types.hh"
+
+#include "COM_context.hh"
+#include "COM_result.hh"
+
+namespace blender::realtime_compositor {
+
+/* Blur the input using a recursive Gaussian blur algorithm given a certain radius. This differs
+ * from the standard symmetric separable blur algorithm in that it is orders of magnitude faster
+ * for very high radius value, the downside is that it consumes more memory and is only an
+ * approximation that might suffer from fringing and artifacts, though those are typically
+ * unnoticeable. Neumann boundary is assumed.
+ *
+ * The output is written to the given output result, which will be allocated internally and is thus
+ * expected not to be previously allocated. */
+void recursive_gaussian_blur(Context &context, Result &input, Result &output, float2 radius);
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_van_vliet_gaussian_blur.hh
+++ b/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_van_vliet_gaussian_blur.hh
@ -0,0 +1,36 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include "BLI_math_vector_types.hh"
+
+#include "COM_context.hh"
+#include "COM_result.hh"
+
+namespace blender::realtime_compositor {
+
+/* Blur the input using a fourth order IIR filter approximating a Gaussian filter of the given
+ * sigma computed using Van Vliet's design method. This is based on the following paper:
+ *
+ *   Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
+ *   filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
+ *   98EX170). Vol. 1. IEEE, 1998.
+ *
+ * However, we internally split the fourth order IIR filter into two second order sections in order
+ * to improve its numerical stability and improve parallelism. See the implementation for more
+ * information.
+ *
+ * This differs from the standard symmetric separable blur algorithm in that it is faster for high
+ * sigma values, the downside is that it consumes more memory and is only an approximation that
+ * might suffer from fringing and artifacts, though those are typically unnoticeable. This filter
+ * is not accurate for sigma values less than 32, in those cases, use the Deriche filter instead.
+ * Further, for sigma values less than 3, use direct convolution instead, since it is faster and
+ * more accurate. Neumann boundary is assumed.
+ *
+ * The output is written to the given output result, which will be allocated internally and is thus
+ * expected not to be previously allocated. */
+void van_vliet_gaussian_blur(Context &context, Result &input, Result &output, float2 sigma);
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/algorithms/intern/deriche_gaussian_blur.cc
+++ b/source/blender/compositor/realtime_compositor/algorithms/intern/deriche_gaussian_blur.cc
@ -0,0 +1,119 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "BLI_assert.h"
+#include "BLI_math_base.hh"
+#include "BLI_math_vector.hh"
+
+#include "GPU_shader.hh"
+
+#include "COM_context.hh"
+#include "COM_result.hh"
+#include "COM_utilities.hh"
+
+#include "COM_algorithm_deriche_gaussian_blur.hh"
+#include "COM_deriche_gaussian_coefficients.hh"
+
+namespace blender::realtime_compositor {
+
+/* Sum the causal and non causal outputs of the filter and write the sum to the output. This is
+ * because the Deriche filter is a parallel interconnection filter, meaning its output is the sum
+ * of its causal and non causal filters. The output is expected not to be allocated as it will be
+ * allocated internally.
+ *
+ * The output is allocated and written transposed, that is, with a height equivalent to the width
+ * of the input and vice versa. This is done as a performance optimization. The blur pass will
+ * blur the image horizontally and write it to the intermediate output transposed. Then the
+ * vertical pass will execute the same horizontal blur shader, but since its input is transposed,
+ * it will effectively do a vertical blur and write to the output transposed, effectively undoing
+ * the transposition in the horizontal pass. This is done to improve spatial cache locality in the
+ * shader and to avoid having two separate shaders for each blur pass. */
+static void sum_causal_and_non_causal_results(Context &context,
+                                              Result &causal_input,
+                                              Result &non_causal_input,
+                                              Result &output)
+{
+  GPUShader *shader = context.get_shader("compositor_deriche_gaussian_blur_sum");
+  GPU_shader_bind(shader);
+
+  causal_input.bind_as_texture(shader, "causal_input_tx");
+  non_causal_input.bind_as_texture(shader, "non_causal_input_tx");
+
+  const Domain domain = causal_input.domain();
+  const int2 transposed_domain = int2(domain.size.y, domain.size.x);
+  output.allocate_texture(transposed_domain);
+  output.bind_as_image(shader, "output_img");
+
+  compute_dispatch_threads_at_least(shader, domain.size);
+
+  GPU_shader_unbind();
+  causal_input.unbind_as_texture();
+  non_causal_input.unbind_as_texture();
+  output.unbind_as_image();
+}
+
+static void blur_pass(Context &context, Result &input, Result &output, float sigma)
+{
+  GPUShader *shader = context.get_shader("compositor_deriche_gaussian_blur");
+  GPU_shader_bind(shader);
+
+  const DericheGaussianCoefficients &coefficients =
+      context.cache_manager().deriche_gaussian_coefficients.get(context, sigma);
+
+  GPU_shader_uniform_4fv(shader,
+                         "causal_feedforward_coefficients",
+                         float4(coefficients.causal_feedforward_coefficients()));
+  GPU_shader_uniform_4fv(shader,
+                         "non_causal_feedforward_coefficients",
+                         float4(coefficients.non_causal_feedforward_coefficients()));
+  GPU_shader_uniform_4fv(
+      shader, "feedback_coefficients", float4(coefficients.feedback_coefficients()));
+  GPU_shader_uniform_1f(
+      shader, "causal_boundary_coefficient", float(coefficients.causal_boundary_coefficient()));
+  GPU_shader_uniform_1f(shader,
+                        "non_causal_boundary_coefficient",
+                        float(coefficients.non_causal_boundary_coefficient()));
+
+  input.bind_as_texture(shader, "input_tx");
+
+  const Domain domain = input.domain();
+
+  Result causal_result = context.create_temporary_result(ResultType::Color);
+  causal_result.allocate_texture(domain);
+  causal_result.bind_as_image(shader, "causal_output_img");
+
+  Result non_causal_result = context.create_temporary_result(ResultType::Color);
+  non_causal_result.allocate_texture(domain);
+  non_causal_result.bind_as_image(shader, "non_causal_output_img");
+
+  /* The second dispatch dimension is two dispatches, one for the causal filter and one for the non
+   * causal one. */
+  compute_dispatch_threads_at_least(shader, int2(domain.size.y, 2), int2(128, 2));
+
+  GPU_shader_unbind();
+  input.unbind_as_texture();
+  causal_result.unbind_as_image();
+  non_causal_result.unbind_as_image();
+
+  sum_causal_and_non_causal_results(context, causal_result, non_causal_result, output);
+  causal_result.release();
+  non_causal_result.release();
+}
+
+void deriche_gaussian_blur(Context &context, Result &input, Result &output, float2 sigma)
+{
+  BLI_assert_msg(math::reduce_max(sigma) >= 3.0f,
+                 "Deriche filter is slower and less accurate than direct convolution for sigma "
+                 "values less 3. Use direct convolution blur instead.");
+  BLI_assert_msg(math::reduce_max(sigma) < 32.0f,
+                 "Deriche filter is not accurate nor numerically stable for sigma values larger "
+                 "than 32. Use Van Vliet filter instead.");
+
+  Result horizontal_pass_result = context.create_temporary_result(ResultType::Color);
+  blur_pass(context, input, horizontal_pass_result, sigma.x);
+  blur_pass(context, horizontal_pass_result, output, sigma.y);
+  horizontal_pass_result.release();
+}
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/algorithms/intern/recursive_gaussian_blur.cc
+++ b/source/blender/compositor/realtime_compositor/algorithms/intern/recursive_gaussian_blur.cc
@ -0,0 +1,68 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "BLI_math_base.hh"
+#include "BLI_math_vector.hh"
+
+#include "COM_context.hh"
+#include "COM_result.hh"
+
+#include "COM_algorithm_deriche_gaussian_blur.hh"
+#include "COM_algorithm_symmetric_separable_blur.hh"
+#include "COM_algorithm_van_vliet_gaussian_blur.hh"
+
+namespace blender::realtime_compositor {
+
+/* Compute the Gaussian sigma from the radius, where the radius is in pixels. Blender's filter is
+ * truncated at |x| > 3 * sigma as can be seen in the R_FILTER_GAUSS case of the RE_filter_value
+ * function, so we divide by three to get the approximate sigma value. Further, ensure the radius
+ * is at least 1 since recursive Gaussian implementations can't handle zero radii. */
+static float2 compute_sigma_from_radius(float2 radius)
+{
+  return math::max(float2(1.0f), radius) / 3.0f;
+}
+
+/* Apply a recursive Gaussian blur algorithm on the input based on the general method outlined
+ * in the following paper:
+ *
+ *  Hale, Dave. "Recursive gaussian filters." CWP-546 (2006).
+ *
+ * In particular, based on the table in Section 5 Conclusion, for very low radius blur, we use a
+ * direct separable Gaussian convolution. For medium blur radius, we use the fourth order IIR
+ * Deriche filter based on the following paper:
+ *
+ *   Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
+ *   1993.
+ *
+ * For high radius blur, we use the fourth order IIR Van Vliet filter based on the following paper:
+ *
+ *   Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
+ *   filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
+ *   98EX170). Vol. 1. IEEE, 1998.
+ *
+ * That's because direct convolution is faster and more accurate for very low radius, while the
+ * Deriche filter is more accurate for medium blur radius, while Van Vliet is more accurate for
+ * high blur radius. The criteria suggested by the paper is a sigma value threshold of 3 and 32 for
+ * the Deriche and Van Vliet filters respectively, which we apply on the larger of the two
+ * dimensions. */
+void recursive_gaussian_blur(Context &context, Result &input, Result &output, float2 radius)
+{
+  /* The radius is in pixel units, while both recursive implementations expect the sigma value of
+   * the Gaussian function. */
+  const float2 sigma = compute_sigma_from_radius(radius);
+
+  if (math::reduce_max(sigma) < 3.0f) {
+    symmetric_separable_blur(context, input, output, radius);
+    return;
+  }
+
+  if (math::reduce_max(sigma) < 32.0f) {
+    deriche_gaussian_blur(context, input, output, sigma);
+    return;
+  }
+
+  van_vliet_gaussian_blur(context, input, output, sigma);
+}
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/algorithms/intern/van_vliet_gaussian_blur.cc
+++ b/source/blender/compositor/realtime_compositor/algorithms/intern/van_vliet_gaussian_blur.cc
@ -0,0 +1,155 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "BLI_assert.h"
+#include "BLI_math_base.hh"
+#include "BLI_math_vector.hh"
+
+#include "GPU_shader.hh"
+
+#include "COM_context.hh"
+#include "COM_result.hh"
+#include "COM_utilities.hh"
+
+#include "COM_algorithm_van_vliet_gaussian_blur.hh"
+#include "COM_van_vliet_gaussian_coefficients.hh"
+
+namespace blender::realtime_compositor {
+
+/* Sum all four of the causal and non causal outputs of the first and second filters and write the
+ * sum to the output. This is because the Van Vliet filter is implemented as a bank of 2 parallel
+ * second order filters, meaning its output is the sum of the causal and non causal filters of both
+ * filters. The output is expected not to be allocated as it will be allocated internally.
+ *
+ * The output is allocated and written transposed, that is, with a height equivalent to the width
+ * of the input and vice versa. This is done as a performance optimization. The blur pass will
+ * blur the image horizontally and write it to the intermediate output transposed. Then the
+ * vertical pass will execute the same horizontal blur shader, but since its input is transposed,
+ * it will effectively do a vertical blur and write to the output transposed, effectively undoing
+ * the transposition in the horizontal pass. This is done to improve spatial cache locality in the
+ * shader and to avoid having two separate shaders for each blur pass. */
+static void sum_causal_and_non_causal_results(Context &context,
+                                              Result &first_causal_input,
+                                              Result &first_non_causal_input,
+                                              Result &second_causal_input,
+                                              Result &second_non_causal_input,
+                                              Result &output)
+{
+  GPUShader *shader = context.get_shader("compositor_van_vliet_gaussian_blur_sum");
+  GPU_shader_bind(shader);
+
+  first_causal_input.bind_as_texture(shader, "first_causal_input_tx");
+  first_non_causal_input.bind_as_texture(shader, "first_non_causal_input_tx");
+  second_causal_input.bind_as_texture(shader, "second_causal_input_tx");
+  second_non_causal_input.bind_as_texture(shader, "second_non_causal_input_tx");
+
+  const Domain domain = first_causal_input.domain();
+  const int2 transposed_domain = int2(domain.size.y, domain.size.x);
+  output.allocate_texture(transposed_domain);
+  output.bind_as_image(shader, "output_img");
+
+  compute_dispatch_threads_at_least(shader, domain.size);
+
+  GPU_shader_unbind();
+  first_causal_input.unbind_as_texture();
+  first_non_causal_input.unbind_as_texture();
+  second_causal_input.unbind_as_texture();
+  second_non_causal_input.unbind_as_texture();
+  output.unbind_as_image();
+}
+
+static void blur_pass(Context &context, Result &input, Result &output, float sigma)
+{
+  GPUShader *shader = context.get_shader("compositor_van_vliet_gaussian_blur");
+  GPU_shader_bind(shader);
+
+  const VanVlietGaussianCoefficients &coefficients =
+      context.cache_manager().van_vliet_gaussian_coefficients.get(context, sigma);
+
+  GPU_shader_uniform_2fv(
+      shader, "first_feedback_coefficients", float2(coefficients.first_feedback_coefficients()));
+  GPU_shader_uniform_2fv(shader,
+                         "first_causal_feedforward_coefficients",
+                         float2(coefficients.first_causal_feedforward_coefficients()));
+  GPU_shader_uniform_2fv(shader,
+                         "first_non_causal_feedforward_coefficients",
+                         float2(coefficients.first_non_causal_feedforward_coefficients()));
+  GPU_shader_uniform_2fv(
+      shader, "second_feedback_coefficients", float2(coefficients.second_feedback_coefficients()));
+  GPU_shader_uniform_2fv(shader,
+                         "second_causal_feedforward_coefficients",
+                         float2(coefficients.second_causal_feedforward_coefficients()));
+  GPU_shader_uniform_2fv(shader,
+                         "second_non_causal_feedforward_coefficients",
+                         float2(coefficients.second_non_causal_feedforward_coefficients()));
+  GPU_shader_uniform_1f(shader,
+                        "first_causal_boundary_coefficient",
+                        float(coefficients.first_causal_boundary_coefficient()));
+  GPU_shader_uniform_1f(shader,
+                        "first_non_causal_boundary_coefficient",
+                        float(coefficients.first_non_causal_boundary_coefficient()));
+  GPU_shader_uniform_1f(shader,
+                        "second_causal_boundary_coefficient",
+                        float(coefficients.second_causal_boundary_coefficient()));
+  GPU_shader_uniform_1f(shader,
+                        "second_non_causal_boundary_coefficient",
+                        float(coefficients.second_non_causal_boundary_coefficient()));
+
+  input.bind_as_texture(shader, "input_tx");
+
+  const Domain domain = input.domain();
+
+  Result first_causal_result = context.create_temporary_result(ResultType::Color);
+  first_causal_result.allocate_texture(domain);
+  first_causal_result.bind_as_image(shader, "first_causal_output_img");
+
+  Result first_non_causal_result = context.create_temporary_result(ResultType::Color);
+  first_non_causal_result.allocate_texture(domain);
+  first_non_causal_result.bind_as_image(shader, "first_non_causal_output_img");
+
+  Result second_causal_result = context.create_temporary_result(ResultType::Color);
+  second_causal_result.allocate_texture(domain);
+  second_causal_result.bind_as_image(shader, "second_causal_output_img");
+
+  Result second_non_causal_result = context.create_temporary_result(ResultType::Color);
+  second_non_causal_result.allocate_texture(domain);
+  second_non_causal_result.bind_as_image(shader, "second_non_causal_output_img");
+
+  /* The second dispatch dimension is 4 dispatches, one for the first causal filter, one for the
+   * first non causal filter, one for the second causal filter, and one for the second non causal
+   * filter. */
+  compute_dispatch_threads_at_least(shader, int2(domain.size.y, 4), int2(64, 4));
+
+  GPU_shader_unbind();
+  input.unbind_as_texture();
+  first_causal_result.unbind_as_image();
+  first_non_causal_result.unbind_as_image();
+  second_causal_result.unbind_as_image();
+  second_non_causal_result.unbind_as_image();
+
+  sum_causal_and_non_causal_results(context,
+                                    first_causal_result,
+                                    first_non_causal_result,
+                                    second_causal_result,
+                                    second_non_causal_result,
+                                    output);
+  first_causal_result.release();
+  first_non_causal_result.release();
+  second_causal_result.release();
+  second_non_causal_result.release();
+}
+
+void van_vliet_gaussian_blur(Context &context, Result &input, Result &output, float2 sigma)
+{
+  BLI_assert_msg(math::reduce_max(sigma) >= 32.0f,
+                 "Van Vliet filter is less accurate for sigma values less than 32. Use Deriche "
+                 "filter instead or direct convolution instead.");
+
+  Result horizontal_pass_result = context.create_temporary_result(ResultType::Color);
+  blur_pass(context, input, horizontal_pass_result, sigma.x);
+  blur_pass(context, horizontal_pass_result, output, sigma.y);
+  horizontal_pass_result.release();
+}
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/cached_resources/COM_deriche_gaussian_coefficients.hh
+++ b/source/blender/compositor/realtime_compositor/cached_resources/COM_deriche_gaussian_coefficients.hh
@ -0,0 +1,87 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "BLI_map.hh"
+#include "BLI_math_vector_types.hh"
+
+#include "COM_cached_resource.hh"
+
+namespace blender::realtime_compositor {
+
+class Context;
+
+/* ------------------------------------------------------------------------------------------------
+ * Deriche Gaussian Coefficients Key.
+ */
+class DericheGaussianCoefficientsKey {
+ public:
+  float sigma;
+
+  DericheGaussianCoefficientsKey(float sigma);
+
+  uint64_t hash() const;
+};
+
+bool operator==(const DericheGaussianCoefficientsKey &a, const DericheGaussianCoefficientsKey &b);
+
+/* -------------------------------------------------------------------------------------------------
+ * Deriche Gaussian Coefficients.
+ *
+ * A caches resource that computes and caches the coefficients of the fourth order IIR filter
+ * approximating a Gaussian filter computed using Deriche's design method. This is based on the
+ * following paper:
+ *
+ *   Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
+ *   1993.
+ */
+class DericheGaussianCoefficients : public CachedResource {
+ private:
+  /* The d_ii coefficients in Equation (28) and (29). Those are the same for the causal and non
+   * causal filters as can be seen in Equation (31). */
+  double4 feedback_coefficients_;
+  /* The n_ii^+ coefficients in Equation (28). */
+  double4 causal_feedforward_coefficients_;
+  /* The n_ii^- coefficients in Equation (29). */
+  double4 non_causal_feedforward_coefficients_;
+  /* The difference equation in Equation (28) rely on previous outputs to compute the new output,
+   * and those previous outputs need to be properly initialized somehow. To do Neumann boundary
+   * condition, we multiply the boundary value with this coefficient to simulate an infinite stream
+   * of the boundary value. See the implementation for more information. */
+  double causal_boundary_coefficient_;
+  /* Same as causal_boundary_coefficient_ but for the non causal filter. */
+  double non_causal_boundary_coefficient_;
+
+ public:
+  DericheGaussianCoefficients(Context &context, float sigma);
+
+  const double4 &feedback_coefficients() const;
+  const double4 &causal_feedforward_coefficients() const;
+  const double4 &non_causal_feedforward_coefficients() const;
+  double causal_boundary_coefficient() const;
+  double non_causal_boundary_coefficient() const;
+};
+
+/* ------------------------------------------------------------------------------------------------
+ * Deriche Gaussian Coefficients Container.
+ */
+class DericheGaussianCoefficientsContainer : CachedResourceContainer {
+ private:
+  Map<DericheGaussianCoefficientsKey, std::unique_ptr<DericheGaussianCoefficients>> map_;
+
+ public:
+  void reset() override;
+
+  /* Check if there is an available DericheGaussianCoefficients cached resource with the given
+   * parameters in the container, if one exists, return it, otherwise, return a newly created one
+   * and add it to the container. In both cases, tag the cached resource as needed to keep it
+   * cached for the next evaluation. */
+  DericheGaussianCoefficients &get(Context &context, float sigma);
+};
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/cached_resources/COM_van_vliet_gaussian_coefficients.hh
+++ b/source/blender/compositor/realtime_compositor/cached_resources/COM_van_vliet_gaussian_coefficients.hh
@ -0,0 +1,106 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "BLI_map.hh"
+#include "BLI_math_vector_types.hh"
+
+#include "COM_cached_resource.hh"
+
+namespace blender::realtime_compositor {
+
+class Context;
+
+/* ------------------------------------------------------------------------------------------------
+ * Van Vliet Gaussian Coefficients Key.
+ */
+class VanVlietGaussianCoefficientsKey {
+ public:
+  float sigma;
+
+  VanVlietGaussianCoefficientsKey(float sigma);
+
+  uint64_t hash() const;
+};
+
+bool operator==(const VanVlietGaussianCoefficientsKey &a,
+                const VanVlietGaussianCoefficientsKey &b);
+
+/* -------------------------------------------------------------------------------------------------
+ * Van Vliet Gaussian Coefficients.
+ *
+ * A caches resource that computes and caches the coefficients of the fourth order IIR filter
+ * approximating a Gaussian filter computed using Van Vliet's design method. This is based on the
+ * following paper:
+ *
+ *   Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
+ *   filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
+ *   98EX170). Vol. 1. IEEE, 1998.
+ *
+ * However, to improve the numerical stability of the filter, it is decomposed into a bank of
+ * two parallel second order IIR filters, each having a causal and a non causal filter. */
+class VanVlietGaussianCoefficients : public CachedResource {
+ private:
+  /* The causal and non causal feedforward coefficients for the first second order filter. */
+  double2 first_causal_feedforward_coefficients_;
+  double2 first_non_causal_feedforward_coefficients_;
+  /* The feedback coefficients for the first second order filter. This is the same for both the
+   * causal and non causal filters. */
+  double2 first_feedback_coefficients_;
+
+  /* The causal and non causal feedforward coefficients for the second second order filter. */
+  double2 second_causal_feedforward_coefficients_;
+  double2 second_non_causal_feedforward_coefficients_;
+  /* The feedback coefficients for the second second order filter. This is the same for both the
+   * causal and non causal filters. */
+  double2 second_feedback_coefficients_;
+
+  /* The difference equation of the IIR filter rely on previous outputs to compute the new output,
+   * and those previous outputs need to be properly initialized somehow. To do Neumann boundary
+   * condition, we multiply the boundary value with this coefficient to simulate an infinite stream
+   * of the boundary value. See the implementation for more information. */
+  double first_causal_boundary_coefficient_;
+  double first_non_causal_boundary_coefficient_;
+  double second_causal_boundary_coefficient_;
+  double second_non_causal_boundary_coefficient_;
+
+ public:
+  VanVlietGaussianCoefficients(Context &context, float sigma);
+
+  const double2 &first_causal_feedforward_coefficients() const;
+  const double2 &first_non_causal_feedforward_coefficients() const;
+  const double2 &first_feedback_coefficients() const;
+
+  const double2 &second_causal_feedforward_coefficients() const;
+  const double2 &second_non_causal_feedforward_coefficients() const;
+  const double2 &second_feedback_coefficients() const;
+
+  double first_causal_boundary_coefficient() const;
+  double first_non_causal_boundary_coefficient() const;
+  double second_causal_boundary_coefficient() const;
+  double second_non_causal_boundary_coefficient() const;
+};
+
+/* ------------------------------------------------------------------------------------------------
+ * Van Vliet Gaussian Coefficients Container.
+ */
+class VanVlietGaussianCoefficientsContainer : CachedResourceContainer {
+ private:
+  Map<VanVlietGaussianCoefficientsKey, std::unique_ptr<VanVlietGaussianCoefficients>> map_;
+
+ public:
+  void reset() override;
+
+  /* Check if there is an available VanVlietGaussianCoefficients cached resource with the given
+   * parameters in the container, if one exists, return it, otherwise, return a newly created one
+   * and add it to the container. In both cases, tag the cached resource as needed to keep it
+   * cached for the next evaluation. */
+  VanVlietGaussianCoefficients &get(Context &context, float sigma);
+};
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/cached_resources/intern/deriche_gaussian_coefficients.cc
+++ b/source/blender/compositor/realtime_compositor/cached_resources/intern/deriche_gaussian_coefficients.cc
@ -0,0 +1,319 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/* -------------------------------------------------------------------------------------------------
+ * Deriche Gaussian Coefficients.
+ *
+ * Computes the coefficients of the fourth order IIR filter approximating a Gaussian filter
+ * computed using Deriche's design method. This is based on the following paper:
+ *
+ *   Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
+ *   1993.
+ *
+ * But with corrections in the normalization scale from the following paper, as will be seen in the
+ * implementation:
+ *
+ *   Farneback, Gunnar, and Carl-Fredrik Westin. Improving Deriche-style recursive Gaussian
+ *   filters. Journal of Mathematical Imaging and Vision 26.3 (2006): 293-299.
+ *
+ * The Deriche filter is computed as the sum of a causal and a non causal sequence of second order
+ * difference equations as can be seen in Equation (30) in Deriche's paper, and the target of this
+ * class is to compute the feedback, causal feedforward, and non causal feedforward coefficients of
+ * the filter. */
+
+#include <cstdint>
+#include <memory>
+
+#include "BLI_hash.hh"
+#include "BLI_math_base.hh"
+#include "BLI_math_vector.hh"
+
+#include "COM_context.hh"
+#include "COM_deriche_gaussian_coefficients.hh"
+
+namespace blender::realtime_compositor {
+
+/* --------------------------------------------------------------------
+ * Deriche Gaussian Coefficients Key.
+ */
+
+DericheGaussianCoefficientsKey::DericheGaussianCoefficientsKey(float sigma) : sigma(sigma) {}
+
+uint64_t DericheGaussianCoefficientsKey::hash() const
+{
+  return get_default_hash(sigma);
+}
+
+bool operator==(const DericheGaussianCoefficientsKey &a, const DericheGaussianCoefficientsKey &b)
+{
+  return a.sigma == b.sigma;
+}
+
+/* -------------------------------------------------------------------------------------------------
+ * Deriche Gaussian Coefficients.
+ */
+
+/* The base constant coefficients computed using Deriche's method with 10 digits of precision.
+ * Those are available in Deriche's paper by comparing Equations (19) and (38). */
+inline constexpr static double a0 = 1.6797292232361107;
+inline constexpr static double a1 = 3.7348298269103580;
+inline constexpr static double b0 = 1.7831906544515104;
+inline constexpr static double b1 = 1.7228297663338028;
+inline constexpr static double c0 = -0.6802783501806897;
+inline constexpr static double c1 = -0.2598300478959625;
+inline constexpr static double w0 = 0.6318113174569493;
+inline constexpr static double w1 = 1.9969276832487770;
+
+/* Computes n00 in Equation (21) in Deriche's paper. */
+static double compute_numerator_0()
+{
+  return a0 + c0;
+}
+
+/* Computes n11 in Equation (21) in Deriche's paper. */
+static double compute_numerator_1(float sigma)
+{
+  const double multiplier1 = math::exp(-b1 / sigma);
+  const double term1 = c1 * math::sin(w1 / sigma) - (c0 + 2.0 * a0) * math::cos(w1 / sigma);
+
+  const double multiplier2 = math::exp(-b0 / sigma);
+  const double term2 = a1 * math::sin(w0 / sigma) - (2.0 * c0 + a0) * math::cos(w0 / sigma);
+
+  return multiplier1 * term1 + multiplier2 * term2;
+}
+
+/* Computes n22 in Equation (21) in Deriche's paper. */
+static double compute_numerator_2(float sigma)
+{
+  const double multiplier1 = 2.0 * math::exp(-(b0 / sigma) - (b1 / sigma));
+  const double term11 = (a0 + c0) * math::cos(w1 / sigma) * math::cos(w0 / sigma);
+  const double term12 = math::cos(w1 / sigma) * a1 * math::sin(w0 / sigma);
+  const double term13 = math::cos(w0 / sigma) * c1 * math::sin(w1 / sigma);
+  const double term1 = term11 - term12 - term13;
+
+  const double term2 = c0 * math::exp(-2.0 * (b0 / sigma));
+  const double term3 = a0 * math::exp(-2.0 * (b1 / sigma));
+
+  return multiplier1 * term1 + term2 + term3;
+}
+
+/* Computes n33 in Equation (21) in Deriche's paper. */
+static double compute_numerator_3(float sigma)
+{
+  const double multiplier1 = math::exp(-(b1 / sigma) - 2.0 * (b0 / sigma));
+  const double term1 = c1 * math::sin(w1 / sigma) - math::cos(w1 / sigma) * c0;
+
+  const double multiplier2 = math::exp(-(b0 / sigma) - 2.0 * (b1 / sigma));
+  const double term2 = a1 * math::sin(w0 / sigma) - math::cos(w0 / sigma) * a0;
+
+  return multiplier1 * term1 + multiplier2 * term2;
+}
+
+/* Computes and packs the numerators in Equation (21) in Deriche's paper. */
+static double4 compute_numerator(float sigma)
+{
+  const double n0 = compute_numerator_0();
+  const double n1 = compute_numerator_1(sigma);
+  const double n2 = compute_numerator_2(sigma);
+  const double n3 = compute_numerator_3(sigma);
+
+  return double4(n0, n1, n2, n3);
+}
+
+/* Computes d11 in Equation (22) in Deriche's paper. */
+static double compute_denominator_1(float sigma)
+{
+  const double term1 = -2.0 * math::exp(-(b0 / sigma)) * math::cos(w0 / sigma);
+  const double term2 = 2.0 * math::exp(-(b1 / sigma)) * math::cos(w1 / sigma);
+
+  return term1 - term2;
+}
+
+/* Computes d22 in Equation (22) in Deriche's paper. */
+static double compute_denominator_2(float sigma)
+{
+  const double term1 = 4.0 * math::cos(w1 / sigma) * math::cos(w0 / sigma);
+  const double multiplier1 = math::exp(-(b0 / sigma) - (b1 / sigma));
+
+  const double term2 = math::exp(-2.0 * (b1 / sigma));
+  const double term3 = math::exp(-2.0 * (b0 / sigma));
+
+  return term1 * multiplier1 + term2 + term3;
+}
+
+/* Computes d33 in Equation (22) in Deriche's paper. */
+static double compute_denominator_3(float sigma)
+{
+  const double term1 = -2.0 * math::cos(w0 / sigma);
+  const double multiplier1 = math::exp(-(b0 / sigma) - 2.0 * (b1 / sigma));
+
+  const double term2 = 2.0 * math::cos(w1 / sigma);
+  const double multiplier2 = math::exp(-(b1 / sigma) - 2.0 * (b0 / sigma));
+
+  return term1 * multiplier1 - term2 * multiplier2;
+}
+
+/* Computes d44 in Equation (22) in Deriche's paper. */
+static double compute_denominator_4(float sigma)
+{
+  return math::exp(-2.0 * (b0 / sigma) - 2.0 * (b1 / sigma));
+}
+
+/* Computes and packs the denominators in Equation (22) in Deriche's paper. */
+static double4 compute_denominator(float sigma)
+{
+  const double d1 = compute_denominator_1(sigma);
+  const double d2 = compute_denominator_2(sigma);
+  const double d3 = compute_denominator_3(sigma);
+  const double d4 = compute_denominator_4(sigma);
+
+  return double4(d1, d2, d3, d4);
+}
+
+/* Computes the normalization scale that the feedforward coefficients should be divided by to
+ * match the unit integral of the Gaussian. The scaling factor proposed by Deriche's paper in
+ * Equation (50) is wrong due to missing terms. A correct scaling factor is presented in
+ * Farneback's paper in Equation (25), which is implemented in this method. */
+static float compute_normalization_scale(const double4 &causal_feedforward_coefficients,
+                                         const double4 &feedback_coefficients)
+{
+  const double causal_feedforwad_sum = math::reduce_add(causal_feedforward_coefficients);
+  const double feedback_sum = 1.0 + math::reduce_add(feedback_coefficients);
+  return 2.0 * (causal_feedforwad_sum / feedback_sum) - causal_feedforward_coefficients[0];
+}
+
+/* Computes the non causal feedforward coefficients from the feedback and causal feedforward
+ * coefficients based on Equation (31) in Deriche's paper. Notice that the equation is linear, so
+ * the coefficients can be computed after the normalization of the causal feedforward
+ * coefficients. */
+static double4 compute_non_causal_feedforward_coefficients(
+    const double4 &causal_feedforward_coefficients, const double4 &feedback_coefficients)
+{
+  const double n1 = causal_feedforward_coefficients[1] -
+                    feedback_coefficients[0] * causal_feedforward_coefficients[0];
+  const double n2 = causal_feedforward_coefficients[2] -
+                    feedback_coefficients[1] * causal_feedforward_coefficients[0];
+  const double n3 = causal_feedforward_coefficients[3] -
+                    feedback_coefficients[2] * causal_feedforward_coefficients[0];
+  const double n4 = -feedback_coefficients[3] * causal_feedforward_coefficients[0];
+
+  return double4(n1, n2, n3, n4);
+}
+
+/* The IIR filter difference equation relies on previous outputs to compute new outputs, those
+ * previous outputs are not really defined at the start of the filter. To do Neumann boundary
+ * condition, we initialize the previous output with a special value that is a function of the
+ * boundary value. This special value is computed by multiply the boundary value with a coefficient
+ * to simulate an infinite stream of the boundary value.
+ *
+ * The function for the coefficient can be derived by substituting the boundary value for previous
+ * inputs, equating all current and previous outputs to the same value, and finally rearranging to
+ * compute that same output value.
+ *
+ * Start by the difference equation where b_i are the feedforward coefficients and a_i are the
+ * feedback coefficients:
+ *
+ *   y[n] = \sum_{i = 0}^3 b_i x[n - i] - \sum_{i = 0}^3 a_i y[n - i]
+ *
+ * Assume all outputs are y and all inputs are x, which is the boundary value:
+ *
+ *   y = \sum_{i = 0}^3 b_i x - \sum_{i = 0}^3 a_i y
+ *
+ * Now rearrange to compute y:
+ *
+ *   y = x \sum_{i = 0}^3 b_i - y \sum_{i = 0}^3 a_i
+ *   y + y \sum_{i = 0}^3 a_i = x \sum_{i = 0}^3 b_i
+ *   y (1 + \sum_{i = 0}^3 a_i) = x \sum_{i = 0}^3 b_i
+ *   y = x \cdot \frac{\sum_{i = 0}^3 b_i}{1 + \sum_{i = 0}^3 a_i}
+ *
+ * So our coefficient is the value that is multiplied by the boundary value x. Had x been zero,
+ * that is, we are doing Dirichlet boundary condition, the equations still hold. */
+static double compute_boundary_coefficient(const double4 &feedforward_coefficients,
+                                           const double4 &feedback_coefficients)
+{
+  return math::reduce_add(feedforward_coefficients) /
+         (1.0 + math::reduce_add(feedback_coefficients));
+}
+
+/* Computes the feedback, causal feedforward, and non causal feedforward coefficients given a
+ * target Gaussian sigma value as used in Equations (28) and (29) in Deriche's paper. */
+DericheGaussianCoefficients::DericheGaussianCoefficients(Context & /*context*/, float sigma)
+{
+  /* The numerator coefficients are the causal feedforward coefficients and the denominator
+   * coefficients are the feedback coefficients as can be seen in Equation (28). */
+  causal_feedforward_coefficients_ = compute_numerator(sigma);
+  feedback_coefficients_ = compute_denominator(sigma);
+
+  /* Normalize the feedforward coefficients as discussed in Section "5.4 Normalization" in
+   * Deriche's paper. Feedback coefficients do not need normalization. */
+  causal_feedforward_coefficients_ /= compute_normalization_scale(causal_feedforward_coefficients_,
+                                                                  feedback_coefficients_);
+
+  /* Compute the non causal feedforward coefficients from the feedback and normalized causal
+   * feedforward coefficients based on Equation (31) from Deriche's paper. Since the causal
+   * coefficients are already normalized, this doesn't need normalization. */
+  non_causal_feedforward_coefficients_ = compute_non_causal_feedforward_coefficients(
+      causal_feedforward_coefficients_, feedback_coefficients_);
+
+  /* Compute the boundary coefficient for both the causal and non causal filters. */
+  causal_boundary_coefficient_ = compute_boundary_coefficient(causal_feedforward_coefficients_,
+                                                              feedback_coefficients_);
+  non_causal_boundary_coefficient_ = compute_boundary_coefficient(
+      non_causal_feedforward_coefficients_, feedback_coefficients_);
+}
+
+const double4 &DericheGaussianCoefficients::feedback_coefficients() const
+{
+  return feedback_coefficients_;
+}
+
+const double4 &DericheGaussianCoefficients::causal_feedforward_coefficients() const
+{
+  return causal_feedforward_coefficients_;
+}
+
+const double4 &DericheGaussianCoefficients::non_causal_feedforward_coefficients() const
+{
+  return non_causal_feedforward_coefficients_;
+}
+
+double DericheGaussianCoefficients::causal_boundary_coefficient() const
+{
+  return causal_boundary_coefficient_;
+}
+
+double DericheGaussianCoefficients::non_causal_boundary_coefficient() const
+{
+  return non_causal_boundary_coefficient_;
+}
+
+/* --------------------------------------------------------------------
+ * Deriche Gaussian Coefficients Container.
+ */
+
+void DericheGaussianCoefficientsContainer::reset()
+{
+  /* First, delete all resources that are no longer needed. */
+  map_.remove_if([](auto item) { return !item.value->needed; });
+
+  /* Second, reset the needed status of the remaining resources to false to ready them to track
+   * their needed status for the next evaluation. */
+  for (auto &value : map_.values()) {
+    value->needed = false;
+  }
+}
+
+DericheGaussianCoefficients &DericheGaussianCoefficientsContainer::get(Context &context,
+                                                                       float sigma)
+{
+  const DericheGaussianCoefficientsKey key(sigma);
+
+  auto &deriche_gaussian_coefficients = *map_.lookup_or_add_cb(
+      key, [&]() { return std::make_unique<DericheGaussianCoefficients>(context, sigma); });
+
+  deriche_gaussian_coefficients.needed = true;
+  return deriche_gaussian_coefficients;
+}
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/cached_resources/intern/van_vliet_gaussian_coefficients.cc
+++ b/source/blender/compositor/realtime_compositor/cached_resources/intern/van_vliet_gaussian_coefficients.cc
@ -0,0 +1,536 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/* -------------------------------------------------------------------------------------------------
+ * Van Vliet Gaussian Coefficients.
+ *
+ * Computes the coefficients of the fourth order IIR filter approximating a Gaussian filter
+ * computed using Van Vliet's design method. This is based on the following paper:
+ *
+ *   Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
+ *   filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
+ *   98EX170). Vol. 1. IEEE, 1998.
+ *
+ *
+ * The filter is computed as the cascade of a causal and a non causal sequences of second order
+ * difference equations as can be seen in Equation (11) in Van Vliet's paper. The coefficients are
+ * the same for both the causal and non causal sequences.
+ *
+ * However, to improve its numerical stability, we decompose the 4th order filter into a parallel
+ * bank of second order filers using the methods of partial fractions as demonstrated in the follow
+ * book:
+ *
+ *   Oppenheim, Alan V. Discrete-time signal processing. Pearson Education India, 1999.
+ *
+ */
+
+#include <array>
+#include <complex>
+#include <cstdint>
+#include <memory>
+
+#include "BLI_assert.h"
+#include "BLI_hash.hh"
+#include "BLI_math_base.hh"
+#include "BLI_math_vector.hh"
+
+#include "COM_context.hh"
+#include "COM_van_vliet_gaussian_coefficients.hh"
+
+namespace blender::realtime_compositor {
+
+/* --------------------------------------------------------------------
+ * Van Vliet Gaussian Coefficients Key.
+ */
+
+VanVlietGaussianCoefficientsKey::VanVlietGaussianCoefficientsKey(float sigma) : sigma(sigma) {}
+
+uint64_t VanVlietGaussianCoefficientsKey::hash() const
+{
+  return get_default_hash(sigma);
+}
+
+bool operator==(const VanVlietGaussianCoefficientsKey &a, const VanVlietGaussianCoefficientsKey &b)
+{
+  return a.sigma == b.sigma;
+}
+
+/* -------------------------------------------------------------------------------------------------
+ * Van Vliet Gaussian Coefficients.
+ */
+
+/* Computes the variance of the Gaussian filter represented by the given poles scaled by the given
+ * scale factor. This is based on Equation (20) in Van Vliet's paper. */
+static double compute_scaled_poles_variance(const std::array<std::complex<double>, 4> &poles,
+                                            double scale_factor)
+{
+  std::complex<double> variance = std::complex<double>(0.0, 0.0);
+  for (const std::complex<double> &pole : poles) {
+    const double magnitude = std::pow(std::abs(pole), 1.0 / scale_factor);
+    const double phase = std::arg(pole) / scale_factor;
+    const std::complex<double> multiplier1 = std::polar(magnitude, phase);
+    const std::complex<double> multiplier2 = std::pow(magnitude - std::polar(1.0, phase), -2.0);
+    variance += 2.0 * multiplier1 * multiplier2;
+  }
+
+  /* The variance is actually real valued as guaranteed by Equations (10) and (2) since the poles
+   * are complex conjugate pairs. See Section 3.3 of the paper. */
+  return variance.real();
+}
+
+/* Computes the partial derivative with respect to the scale factor at the given scale factor of
+ * the variance of the Gaussian filter represented by the given poles scaled by the given scale
+ * factor. This is based on the partial derivative with respect to the scale factor of Equation
+ * (20) in Van Vliet's paper.
+ *
+ * The derivative is not listed in the paper, but was computed manually as the sum of the following
+ * for each of the poles:
+ *
+ *   \frac{
+ *     2a^\frac{1}{x}e^\frac{ib}{x} (e^\frac{ib}{x}+a^\frac{1}{x}) (\ln(a)-ib)
+ *   }{
+ *     x^2 (a^\frac{1}{x}-e^\frac{ib}{x})^3
+ *   }
+ *
+ * Where "x" is the scale factor, "a" is the magnitude of the pole, and "b" is its phase. */
+static double compute_scaled_poles_variance_derivative(
+    const std::array<std::complex<double>, 4> &poles, double scale_factor)
+{
+  std::complex<double> variance_derivative = std::complex<double>(0.0, 0.0);
+  for (const std::complex<double> &pole : poles) {
+    const double magnitude = std::pow(std::abs(pole), 1.0 / scale_factor);
+    const double phase = std::arg(pole) / scale_factor;
+
+    const std::complex<double> multiplier1 = std::polar(magnitude, phase);
+    const std::complex<double> multiplier2 = magnitude + std::polar(1.0, phase);
+    const std::complex<double> multiplier3 = std::log(std::abs(pole)) -
+                                             std::complex<double>(0.0, std::arg(pole));
+
+    const std::complex<double> divisor1 = std::pow(magnitude - std::polar(1.0, phase), 3.0);
+    const std::complex<double> divisor2 = math::square(scale_factor);
+
+    variance_derivative += 2.0 * multiplier1 * multiplier2 * multiplier3 / (divisor1 * divisor2);
+  }
+
+  /* The variance derivative is actually real valued as guaranteed by Equations (10) and (2) since
+   * the poles are complex conjugate pairs. See Section 3.3 of the paper. */
+  return variance_derivative.real();
+}
+
+/* The poles were computed for a Gaussian filter with a sigma value of 2, in order to generalize
+ * that for any sigma value, we need to scale the poles by a certain scaling factor as described in
+ * Section 4.2 of Van Vliet's paper. To find the scaling factor, we start from an initial guess of
+ * half sigma, then iteratively improve the guess using Newton's method by computing the variance
+ * and its derivative based on Equation (20). */
+static double find_scale_factor(const std::array<std::complex<double>, 4> &poles,
+                                float reference_sigma)
+{
+  const double reference_variance = math::square(reference_sigma);
+
+  /* Note that the poles were computed for a Gaussian filter with a sigma value of 2, so it it
+   * as if we have a base scale of 2, and we start with half sigma as an initial guess. See
+   * Section 4.2 for more information */
+  double scale_factor = reference_sigma / 2.0;
+
+  const int maximum_interations = 10;
+  for (int i = 0; i < maximum_interations; i++) {
+    const double variance = compute_scaled_poles_variance(poles, scale_factor);
+
+    /* Close enough, we have found our scale factor. */
+    if (math::abs(reference_variance - variance) < 1.0e-8) {
+      return scale_factor;
+    }
+
+    /* Improve guess using Newton's method. Notice that Newton's method is a root finding method,
+     * so we supply the difference to the reference variance as our function, since the zero point
+     * will be when the variance is equal to the reference one. The derivative is not affected
+     * since the reference variance is a constant. */
+    const double derivative = compute_scaled_poles_variance_derivative(poles, scale_factor);
+    scale_factor -= (variance - reference_variance) / derivative;
+  }
+
+  /* The paper mentions that only a few iterations are needed, so if we didn't converge after
+   * maximum_interations, something is probably wrong. */
+  BLI_assert_unreachable();
+  return scale_factor;
+}
+
+/* The poles were computed for a Gaussian filter with a sigma value of 2, so scale them using
+ * Equation (19) in Van Vliet's paper to have the given sigma value. This involves finding the
+ * appropriate scale factor based on Equation (20), see Section 4.2 and the find_scale_factor
+ * method for more information. */
+static std::array<std::complex<double>, 4> computed_scaled_poles(
+    const std::array<std::complex<double>, 4> &poles, float sigma)
+{
+  const double scale_factor = find_scale_factor(poles, sigma);
+
+  std::array<std::complex<double>, 4> scaled_poles;
+  for (int i = 0; i < poles.size(); i++) {
+    const std::complex<double> &pole = poles[i];
+    const double magnitude = std::pow(std::abs(pole), 1.0 / scale_factor);
+    const double phase = std::arg(pole) / scale_factor;
+    scaled_poles[i] = std::polar(magnitude, phase);
+  }
+
+  return scaled_poles;
+}
+
+/* Compute the causal poles from the non causal ones. Since the Gaussian is a real even function,
+ * the causal poles are just the inverse of the non causal poles, as noted in Equation (2) in Van
+ * Vliet's paper. */
+static std::array<std::complex<double>, 4> compute_causal_poles(
+    const std::array<std::complex<double>, 4> &non_causal_poles)
+{
+  std::array<std::complex<double>, 4> causal_poles;
+  for (int i = 0; i < non_causal_poles.size(); i++) {
+    causal_poles[i] = 1.0 / non_causal_poles[i];
+  }
+
+  return causal_poles;
+}
+
+/* Computes the feedback coefficients from the given poles based on the equations in Equation (13)
+ * in Van Vliet's paper. See Section 3.2 for more information. */
+static double4 compute_feedback_coefficients(const std::array<std::complex<double>, 4> &poles)
+{
+  /* Compute the gain of the poles, which is the "b" at the end of Equation (13). */
+  std::complex<double> gain = std::complex<double>(1.0, 0.0);
+  for (const std::complex<double> &pole : poles) {
+    gain /= pole;
+  }
+
+  /* Compute the coefficients b4, b3, b2, and b1 based on the expressions b_N, b_N-1, b_N-2, and
+   * b_N-3 respectively in Equation (13). b4 and b3 are trivial, while b2 and b1 can be computed by
+   * drawing the following summation trees, where each path from the root to the leaf is multiplied
+   * and added:
+   *
+   *                  b2
+   *             ____/|\____
+   *            /     |     \
+   *   i -->   2      3      4
+   *           |     / \    /|\
+   *   j -->   1    1   2  1 2 3
+   *
+   *                 b1
+   *             ___/ \___
+   *            /         \
+   *   i -->   3           4
+   *           |          / \
+   *   j -->   2         2   3
+   *           |         |  / \
+   *   k -->   1         1 1   2
+   *
+   * Notice that the values of i, j, and k are 1-index, so we need to subtract one when accessing
+   * the poles. */
+  const std::complex<double> b4 = gain;
+  const std::complex<double> b3 = -gain * (poles[0] + poles[1] + poles[2] + poles[3]);
+  const std::complex<double> b2 = gain * (poles[1] * poles[0] + poles[2] * poles[0] +
+                                          poles[2] * poles[1] + poles[3] * poles[0] +
+                                          poles[3] * poles[1] + poles[3] * poles[2]);
+  const std::complex<double> b1 = -gain * (poles[2] * poles[1] * poles[0] +
+                                           poles[3] * poles[1] * poles[0] +
+                                           poles[3] * poles[2] * poles[0] +
+                                           poles[3] * poles[2] * poles[1]);
+
+  /* The coefficients are actually real valued as guaranteed by Equations (10) and (2) since
+   * the poles are complex conjugate pairs. See Section 3.3 of the paper. */
+  const double4 coefficients = double4(b1.real(), b2.real(), b3.real(), b4.real());
+
+  return coefficients;
+}
+
+/* Computes the feedforward coefficient from the feedback coefficients based on Equation (12) of
+ * Van Vliet's paper. See Section 3.2 for more information. */
+static double compute_feedforward_coefficient(const double4 &feedback_coefficients)
+{
+  return 1.0 + math::reduce_add(feedback_coefficients);
+}
+
+/* Computes the residue of the partial fraction of the transfer function of the given causal poles
+ * and gain for the given target pole. This essentially evaluates Equation (3.41) in Oppenheim's
+ * book, where d_k is the target pole and assuming the transfer function is in the form given in
+ * Equation (3.39), where d_k are the poles. See the following derivation for the gain value.
+ *
+ * For the particular case of the Van Vliet's system, there are no zeros, so the numerator in
+ * Equation (3.39) is one. Further note that Van Vliet's formulation is different from the expected
+ * form, so we need to rearrange Equation (3) in to match the form in Equation (3.39), which is
+ * shown below.
+ *
+ * Start from the causal term of Equation (3):
+ *
+ *   H_+(z) = \prod_{i=1}^N \frac{d_i - 1}{d_i - z^{-1}}
+ *
+ * Divide by d_i:
+ *
+ *   H_+(z) = \prod_{i=1}^N \frac{1 - d_i^{-1}}{1 - d_i^{-1}z^{-1}}
+ *
+ * Move the numerator to its own product:
+ *
+ *   H_+(z) = \prod_{i=1}^N 1 - d_i^{-1} \prod_{i=1}^N \frac{1}{1 - d_i^{-1}z^{-1}}
+ *
+ * And we reach the same form as Equation (3.39). Where the first product term is b0 / a0 and is
+ * also the given gain value, which is also the same as the feedforward coefficient denoted by
+ * the alpha in Equation (12). Further d_i^{-1} in our derivation is the same as d_k in Equation
+ * (3.39), the discrepancy in the inverse operator is the fact that Van Vliet's derivation assume
+ * non causal poles, while Oppenheim's assume causal poles, which are inverse of each other as can
+ * be seen in the compute_causal_poles function. */
+static std::complex<double> compute_partial_fraction_residue(
+    const std::array<std::complex<double>, 4> &poles,
+    const std::complex<double> &target_pole,
+    double gain)
+{
+  /* Evaluating Equation (3.41) actually corresponds to omitting the terms in Equation (3.39) that
+   * corresponds to the target pole or its conjugate, because they get canceled by the first term
+   * in Equation (3.41). That's we are essentially evaluating the limit as the expression tends to
+   * the target pole. */
+  std::complex<double> target_pole_inverse = 1.0 / target_pole;
+  std::complex<double> residue = std::complex<double>(1.0, 0.0);
+  for (const std::complex<double> &pole : poles) {
+    if (pole != target_pole && pole != std::conj(target_pole)) {
+      residue *= 1.0 - pole * target_pole_inverse;
+    }
+  }
+
+  /* Remember that the gain is the b0 / a0 expression in Equation (3.39). */
+  return gain / residue;
+}
+
+/* Evaluates the causal transfer function at the reciprocal of the given pole, which will be the
+ * non causal pole if the given pole is a causal one, as discussed in the compute_causal_poles
+ * function. The causal transfer function is given in Equation (3) in Van Vliet's paper, but we
+ * compute it in the form derived in the description of the compute_partial_fraction_residue
+ * function, also see the aforementioned function for the gain value. */
+static std::complex<double> compute_causal_transfer_function_at_non_causal_pole(
+    const std::array<std::complex<double>, 4> &poles,
+    const std::complex<double> &target_pole,
+    double gain)
+{
+  std::complex<double> result = std::complex<double>(1.0, 0.0);
+  for (const std::complex<double> &pole : poles) {
+    result *= 1.0 - pole * target_pole;
+  }
+
+  return gain / result;
+}
+
+/* Combine each pole and its conjugate counterpart into a second order section and assign its
+ * coefficients to the given coefficients value. The residue of the pole and its transfer value in
+ * the partial fraction of its transfer function are given.
+ *
+ * TODO: Properly document this function and prove its equations. */
+static void compute_second_order_section(const std::complex<double> &pole,
+                                         const std::complex<double> &residue,
+                                         const std::complex<double> &transfer_value,
+                                         double2 &r_feedback_coefficients,
+                                         double2 &r_causal_feedforward_coefficients,
+                                         double2 &r_non_causal_feedforward_coefficients)
+{
+  const std::complex<double> parallel_residue = residue * transfer_value;
+  const std::complex<double> pole_inverse = 1.0 / pole;
+
+  r_feedback_coefficients = double2(-2.0 * pole.real(), std::norm(pole));
+
+  const double causal_feedforward_1 = parallel_residue.imag() / pole_inverse.imag();
+  const double causal_feedforward_0 = parallel_residue.real() -
+                                      causal_feedforward_1 * pole_inverse.real();
+  r_causal_feedforward_coefficients = double2(causal_feedforward_0, causal_feedforward_1);
+
+  const double non_causal_feedforward_1 = causal_feedforward_1 -
+                                          causal_feedforward_0 * r_feedback_coefficients[0];
+  const double non_causal_feedforward_2 = -causal_feedforward_0 * r_feedback_coefficients[1];
+  r_non_causal_feedforward_coefficients = double2(non_causal_feedforward_1,
+                                                  non_causal_feedforward_2);
+}
+
+/* The IIR filter difference equation relies on previous outputs to compute new outputs, those
+ * previous outputs are not really defined at the start of the filter. To do Neumann boundary
+ * condition, we initialize the previous output with a special value that is a function of the
+ * boundary value. This special value is computed by multiply the boundary value with a coefficient
+ * to simulate an infinite stream of the boundary value.
+ *
+ * The function for the coefficient can be derived by substituting the boundary value for previous
+ * inputs, equating all current and previous outputs to the same value, and finally rearranging to
+ * compute that same output value.
+ *
+ * Start by the difference equation where b_i are the feedforward coefficients and a_i are the
+ * feedback coefficients:
+ *
+ *   y[n] = \sum_{i = 0}^3 b_i x[n - i] - \sum_{i = 0}^3 a_i y[n - i]
+ *
+ * Assume all outputs are y and all inputs are x, which is the boundary value:
+ *
+ *   y = \sum_{i = 0}^3 b_i x - \sum_{i = 0}^3 a_i y
+ *
+ * Now rearrange to compute y:
+ *
+ *   y = x \sum_{i = 0}^3 b_i - y \sum_{i = 0}^3 a_i
+ *   y + y \sum_{i = 0}^3 a_i = x \sum_{i = 0}^3 b_i
+ *   y (1 + \sum_{i = 0}^3 a_i) = x \sum_{i = 0}^3 b_i
+ *   y = x \cdot \frac{\sum_{i = 0}^3 b_i}{1 + \sum_{i = 0}^3 a_i}
+ *
+ * So our coefficient is the value that is multiplied by the boundary value x. Had x been zero,
+ * that is, we are doing Dirichlet boundary condition, the equations still hold. */
+static double compute_boundary_coefficient(const double2 &feedback_coefficients,
+                                           const double2 &feedforward_coefficients)
+{
+  return math::reduce_add(feedforward_coefficients) /
+         (1.0 + math::reduce_add(feedback_coefficients));
+}
+
+/* Computes the feedback and feedforward coefficients for the 4th order Van Vliet Gaussian filter
+ * given a target Gaussian sigma value. We first scale the poles of the filter to match the sigma
+ * value based on the method described in Section 4.2 of Van Vliet's paper, then we compute the
+ * coefficients from the scaled poles based on Equations (12) and (13). */
+VanVlietGaussianCoefficients::VanVlietGaussianCoefficients(Context & /*context*/, float sigma)
+{
+
+  /* The 4th order (N=4) poles for the Gaussian filter of a sigma of 2 computed by minimizing the
+   * maximum error (L-infinity) to true Gaussian as provided in Van Vliet's paper Table (1) fourth
+   * column. Notice that the second and fourth poles are the complex conjugates of the first and
+   * third poles respectively as noted in the table description. */
+  const std::array<std::complex<double>, 4> poles = {
+      std::complex<double>(1.12075, 1.27788),
+      std::complex<double>(1.12075, -1.27788),
+      std::complex<double>(1.76952, 0.46611),
+      std::complex<double>(1.76952, -0.46611),
+  };
+
+  const std::array<std::complex<double>, 4> scaled_poles = computed_scaled_poles(poles, sigma);
+
+  /* The given poles are actually the non causal poles, since they are outside of the unit circle,
+   * as demonstrated in Section 3.4 of Van Vliet's paper. And we compute the causal poles from
+   * those. */
+  const std::array<std::complex<double>, 4> non_causal_poles = scaled_poles;
+  const std::array<std::complex<double>, 4> causal_poles = compute_causal_poles(non_causal_poles);
+
+  /* Compute the feedforward and feedback coefficients, noting that those are functions of the non
+   * causal poles. */
+  const double4 feedback_coefficients = compute_feedback_coefficients(non_causal_poles);
+  const double feedforward_coefficient = compute_feedforward_coefficient(feedback_coefficients);
+
+  /* We only compute the residue for two of the causal poles, since the other two are complex
+   * conjugates of those two, and their residues will also be the complex conjugate of their
+   * respective counterpart. The gain is the feedforward coefficient as discussed in the function
+   * description. */
+  const std::complex<double> first_residue = compute_partial_fraction_residue(
+      causal_poles, causal_poles[0], feedforward_coefficient);
+  const std::complex<double> second_residue = compute_partial_fraction_residue(
+      causal_poles, causal_poles[2], feedforward_coefficient);
+
+  /* We only compute the transfer value of for two of the non causal poles, since the other two are
+   * complex conjugates of those two, and their transfer values will also be the complex conjugate
+   * of their respective counterpart. The gain is the feedforward coefficient as discussed in the
+   * function description. */
+  const std::complex<double> first_transfer_value =
+      compute_causal_transfer_function_at_non_causal_pole(
+          causal_poles, causal_poles[0], feedforward_coefficient);
+  const std::complex<double> second_transfer_value =
+      compute_causal_transfer_function_at_non_causal_pole(
+          causal_poles, causal_poles[2], feedforward_coefficient);
+
+  /* Combine each pole and its conjugate counterpart into a second order section and assign its
+   * coefficients. */
+  compute_second_order_section(causal_poles[0],
+                               first_residue,
+                               first_transfer_value,
+                               first_feedback_coefficients_,
+                               first_causal_feedforward_coefficients_,
+                               first_non_causal_feedforward_coefficients_);
+  compute_second_order_section(causal_poles[2],
+                               second_residue,
+                               second_transfer_value,
+                               second_feedback_coefficients_,
+                               second_causal_feedforward_coefficients_,
+                               second_non_causal_feedforward_coefficients_);
+
+  /* Compute the boundary coefficients for all four of second order sections. */
+  first_causal_boundary_coefficient_ = compute_boundary_coefficient(
+      first_feedback_coefficients_, first_causal_feedforward_coefficients_);
+  first_non_causal_boundary_coefficient_ = compute_boundary_coefficient(
+      first_feedback_coefficients_, first_non_causal_feedforward_coefficients_);
+  second_causal_boundary_coefficient_ = compute_boundary_coefficient(
+      second_feedback_coefficients_, second_causal_feedforward_coefficients_);
+  second_non_causal_boundary_coefficient_ = compute_boundary_coefficient(
+      second_feedback_coefficients_, second_non_causal_feedforward_coefficients_);
+}
+
+const double2 &VanVlietGaussianCoefficients::first_causal_feedforward_coefficients() const
+{
+  return first_causal_feedforward_coefficients_;
+}
+
+const double2 &VanVlietGaussianCoefficients::first_non_causal_feedforward_coefficients() const
+{
+  return first_non_causal_feedforward_coefficients_;
+}
+
+const double2 &VanVlietGaussianCoefficients::first_feedback_coefficients() const
+{
+  return first_feedback_coefficients_;
+}
+
+const double2 &VanVlietGaussianCoefficients::second_causal_feedforward_coefficients() const
+{
+  return second_causal_feedforward_coefficients_;
+}
+
+const double2 &VanVlietGaussianCoefficients::second_non_causal_feedforward_coefficients() const
+{
+  return second_non_causal_feedforward_coefficients_;
+}
+
+const double2 &VanVlietGaussianCoefficients::second_feedback_coefficients() const
+{
+  return second_feedback_coefficients_;
+}
+
+double VanVlietGaussianCoefficients::first_causal_boundary_coefficient() const
+{
+  return first_causal_boundary_coefficient_;
+}
+
+double VanVlietGaussianCoefficients::first_non_causal_boundary_coefficient() const
+{
+  return first_non_causal_boundary_coefficient_;
+}
+
+double VanVlietGaussianCoefficients::second_causal_boundary_coefficient() const
+{
+  return second_causal_boundary_coefficient_;
+}
+
+double VanVlietGaussianCoefficients::second_non_causal_boundary_coefficient() const
+{
+  return second_non_causal_boundary_coefficient_;
+}
+
+/* --------------------------------------------------------------------
+ * Van Vliet Gaussian Coefficients Container.
+ */
+
+void VanVlietGaussianCoefficientsContainer::reset()
+{
+  /* First, delete all resources that are no longer needed. */
+  map_.remove_if([](auto item) { return !item.value->needed; });
+
+  /* Second, reset the needed status of the remaining resources to false to ready them to track
+   * their needed status for the next evaluation. */
+  for (auto &value : map_.values()) {
+    value->needed = false;
+  }
+}
+
+VanVlietGaussianCoefficients &VanVlietGaussianCoefficientsContainer::get(Context &context,
+                                                                         float sigma)
+{
+  const VanVlietGaussianCoefficientsKey key(sigma);
+
+  auto &deriche_gaussian_coefficients = *map_.lookup_or_add_cb(
+      key, [&]() { return std::make_unique<VanVlietGaussianCoefficients>(context, sigma); });
+
+  deriche_gaussian_coefficients.needed = true;
+  return deriche_gaussian_coefficients;
+}
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/intern/static_cache_manager.cc
+++ b/source/blender/compositor/realtime_compositor/intern/static_cache_manager.cc
@ -20,6 +20,8 @@ void StaticCacheManager::reset()
  cached_shaders.reset();
  bokeh_kernels.reset();
  cached_images.reset();
+  deriche_gaussian_coefficients.reset();
+  van_vliet_gaussian_coefficients.reset();
 }

 }  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_deriche_gaussian_blur.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_deriche_gaussian_blur.glsl
@ -0,0 +1,89 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/* Blur the input horizontally by applying a fourth order IIR filter approximating a Gaussian
+ * filter using Deriche's design method. This is based on the following paper:
+ *
+ *   Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
+ *   1993.
+ *
+ * We run two filters per row in parallel, one for the causal filter and one for the non causal
+ * filter, storing the result of each separately. See the DericheGaussianCoefficients class and the
+ * implementation for more information. */
+
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+#define FILTER_ORDER 4
+
+void main()
+{
+  /* The shader runs parallel across rows but serially across columns. */
+  int y = int(gl_GlobalInvocationID.x);
+  int width = texture_size(input_tx).x;
+
+  /* The second dispatch dimension is two dispatches, one for the causal filter and one for the non
+   * causal one. */
+  bool is_causal = gl_GlobalInvocationID.y == 0;
+  vec4 feedforward_coefficients = is_causal ? causal_feedforward_coefficients :
+                                              non_causal_feedforward_coefficients;
+  float boundary_coefficient = is_causal ? causal_boundary_coefficient :
+                                           non_causal_boundary_coefficient;
+
+  /* Create an array that holds the last FILTER_ORDER inputs along with the current input. The
+   * current input is at index 0 and the oldest input is at index FILTER_ORDER. We assume Neumann
+   * boundary condition, so we initialize all inputs by the boundary pixel. */
+  ivec2 boundary_texel = is_causal ? ivec2(0, y) : ivec2(width - 1, y);
+  vec4 input_boundary = texture_load(input_tx, boundary_texel);
+  vec4 inputs[FILTER_ORDER + 1] = vec4[](
+      input_boundary, input_boundary, input_boundary, input_boundary, input_boundary);
+
+  /* Create an array that holds the last FILTER_ORDER outputs along with the current output. The
+   * current output is at index 0 and the oldest output is at index FILTER_ORDER. We assume Neumann
+   * boundary condition, so we initialize all outputs by the boundary pixel multiplied by the
+   * boundary coefficient. See the DericheGaussianCoefficients class for more information on the
+   * boundary handing. */
+  vec4 output_boundary = input_boundary * boundary_coefficient;
+  vec4 outputs[FILTER_ORDER + 1] = vec4[](
+      output_boundary, output_boundary, output_boundary, output_boundary, output_boundary);
+
+  for (int x = 0; x < width; x++) {
+    /* Run forward across rows for the causal filter and backward for the non causal filter. */
+    ivec2 texel = is_causal ? ivec2(x, y) : ivec2(width - 1 - x, y);
+    inputs[0] = texture_load(input_tx, texel);
+
+    /* Compute Equation (28) for the causal filter or Equation (29) for the non causal filter. The
+     * only difference is that the non causal filter ignores the current value and starts from the
+     * previous input, as can be seen in the subscript of the first input term in both equations.
+     * So add one while indexing the non causal inputs. */
+    outputs[0] = vec4(0.0);
+    int first_input_index = is_causal ? 0 : 1;
+    for (int i = 0; i < FILTER_ORDER; i++) {
+      outputs[0] += feedforward_coefficients[i] * inputs[first_input_index + i];
+      outputs[0] -= feedback_coefficients[i] * outputs[i + 1];
+    }
+
+    /* Store the causal and non causal outputs independently, then sum them in a separate shader
+     * dispatch for better parallelism. */
+    if (is_causal) {
+      imageStore(causal_output_img, texel, outputs[0]);
+    }
+    else {
+      imageStore(non_causal_output_img, texel, outputs[0]);
+    }
+
+    /* Shift the inputs temporally by one. The oldest input is discarded, while the current input
+     * will retain its value but will be overwritten with the new current value in the next
+     * iteration. */
+    for (int i = FILTER_ORDER; i >= 1; i--) {
+      inputs[i] = inputs[i - 1];
+    }
+
+    /* Shift the outputs temporally by one. The oldest output is discarded, while the current
+     * output will retain its value but will be overwritten with the new current value in the next
+     * iteration. */
+    for (int i = FILTER_ORDER; i >= 1; i--) {
+      outputs[i] = outputs[i - 1];
+    }
+  }
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_deriche_gaussian_blur_sum.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_deriche_gaussian_blur_sum.glsl
@ -0,0 +1,19 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /* The Deriche filter is a parallel interconnection filter, meaning its output is the sum of its
+   * causal and non causal filters. */
+  vec4 filter_output = texture_load(causal_input_tx, texel) +
+                       texture_load(non_causal_input_tx, texel);
+
+  /* Write the color using the transposed texel. See the sum_causal_and_non_causal_results method
+   * in the deriche_gaussian_blur.cc file for more information on the rational behind this. */
+  imageStore(output_img, texel.yx, filter_output);
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_van_vliet_gaussian_blur.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_van_vliet_gaussian_blur.glsl
@ -0,0 +1,125 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/* Blur the input horizontally by applying a fourth order IIR filter approximating a Gaussian
+ * filter using Van Vliet's design method. This is based on the following paper:
+ *
+ *   Van Vliet, Lucas J., Ian T. Young, and Piet W. Verbeek. "Recursive Gaussian derivative
+ *   filters." Proceedings. Fourteenth International Conference on Pattern Recognition (Cat. No.
+ *   98EX170). Vol. 1. IEEE, 1998.
+ *
+ * We decomposed the filter into two second order filters, so we actually run four filters per row
+ * in parallel, one for the first causal filter, one for the first non causal filter, one for the
+ * second causal filter, and one for the second non causal filter, storing the result of each
+ * separately. See the VanVlietGaussianCoefficients class and the implementation for more
+ * information. */
+
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+#define FILTER_ORDER 2
+
+void main()
+{
+  /* The shader runs parallel across rows but serially across columns. */
+  int y = int(gl_GlobalInvocationID.x);
+  int width = texture_size(input_tx).x;
+
+  /* The second dispatch dimension is four dispatches:
+   *
+   *   0 -> First causal filter.
+   *   1 -> First non causal filter.
+   *   2 -> Second causal filter.
+   *   3 -> Second non causal filter.
+   *
+   * We detect causality by even numbers. */
+  bool is_causal = gl_GlobalInvocationID.y % 2 == 0;
+  vec2 first_feedforward_coefficients = is_causal ? first_causal_feedforward_coefficients :
+                                                    first_non_causal_feedforward_coefficients;
+  float first_boundary_coefficient = is_causal ? first_causal_boundary_coefficient :
+                                                 first_non_causal_boundary_coefficient;
+  vec2 second_feedforward_coefficients = is_causal ? second_causal_feedforward_coefficients :
+                                                     second_non_causal_feedforward_coefficients;
+  float second_boundary_coefficient = is_causal ? second_causal_boundary_coefficient :
+                                                  second_non_causal_boundary_coefficient;
+  /* And we detect the filter by order. */
+  bool is_first_filter = gl_GlobalInvocationID.y < 2;
+  vec2 feedforward_coefficients = is_first_filter ? first_feedforward_coefficients :
+                                                    second_feedforward_coefficients;
+  vec2 feedback_coefficients = is_first_filter ? first_feedback_coefficients :
+                                                 second_feedback_coefficients;
+  float boundary_coefficient = is_first_filter ? first_boundary_coefficient :
+                                                 second_boundary_coefficient;
+
+  /* Create an array that holds the last FILTER_ORDER inputs along with the current input. The
+   * current input is at index 0 and the oldest input is at index FILTER_ORDER. We assume Neumann
+   * boundary condition, so we initialize all inputs by the boundary pixel. */
+  ivec2 boundary_texel = is_causal ? ivec2(0, y) : ivec2(width - 1, y);
+  vec4 input_boundary = texture_load(input_tx, boundary_texel);
+  vec4 inputs[FILTER_ORDER + 1] = vec4[](input_boundary, input_boundary, input_boundary);
+
+  /* Create an array that holds the last FILTER_ORDER outputs along with the current output. The
+   * current output is at index 0 and the oldest output is at index FILTER_ORDER. We assume Neumann
+   * boundary condition, so we initialize all outputs by the boundary pixel multiplied by the
+   * boundary coefficient. See the VanVlietGaussianCoefficients class for more information on the
+   * boundary handing. */
+  vec4 output_boundary = input_boundary * boundary_coefficient;
+  vec4 outputs[FILTER_ORDER + 1] = vec4[](output_boundary, output_boundary, output_boundary);
+
+  for (int x = 0; x < width; x++) {
+    /* Run forward across rows for the causal filter and backward for the non causal filter. */
+    ivec2 texel = is_causal ? ivec2(x, y) : ivec2(width - 1 - x, y);
+    inputs[0] = texture_load(input_tx, texel);
+
+    /* Compute the filter based on its difference equation, this is not in the Van Vliet paper
+     * because the filter was decomposed, but it is essentially similar to Equation (28) for the
+     * causal filter or Equation (29) for the non causal filter in Deriche's paper, except it is
+     * second order, not fourth order.
+     *
+     *   Deriche, Rachid. Recursively implementating the Gaussian and its derivatives. Diss. INRIA,
+     *   1993.
+     *
+     * The only difference is that the non causal filter ignores the current value and starts from
+     * the previous input, as can be seen in the subscript of the first input term in both
+     * equations. So add one while indexing the non causal inputs. */
+    outputs[0] = vec4(0.0);
+    int first_input_index = is_causal ? 0 : 1;
+    for (int i = 0; i < FILTER_ORDER; i++) {
+      outputs[0] += feedforward_coefficients[i] * inputs[first_input_index + i];
+      outputs[0] -= feedback_coefficients[i] * outputs[i + 1];
+    }
+
+    /* Store the causal and non causal outputs of each of the two filters independently, then sum
+     * them in a separate shader dispatch for better parallelism. */
+    if (is_causal) {
+      if (is_first_filter) {
+        imageStore(first_causal_output_img, texel, outputs[0]);
+      }
+      else {
+        imageStore(second_causal_output_img, texel, outputs[0]);
+      }
+    }
+    else {
+      if (is_first_filter) {
+        imageStore(first_non_causal_output_img, texel, outputs[0]);
+      }
+      else {
+        imageStore(second_non_causal_output_img, texel, outputs[0]);
+      }
+    }
+
+    /* Shift the inputs temporally by one. The oldest input is discarded, while the current input
+     * will retain its value but will be overwritten with the new current value in the next
+     * iteration. */
+    for (int i = FILTER_ORDER; i >= 1; i--) {
+      inputs[i] = inputs[i - 1];
+    }
+
+    /* Shift the outputs temporally by one. The oldest output is discarded, while the current
+     * output will retain its value but will be overwritten with the new current value in the next
+     * iteration. */
+    for (int i = FILTER_ORDER; i >= 1; i--) {
+      outputs[i] = outputs[i - 1];
+    }
+  }
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_van_vliet_gaussian_blur_sum.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_van_vliet_gaussian_blur_sum.glsl
@ -0,0 +1,21 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /* The Van Vliet filter is a parallel interconnection filter, meaning its output is the sum of
+   * all of its causal and non causal filters. */
+  vec4 filter_output = texture_load(first_causal_input_tx, texel) +
+                       texture_load(first_non_causal_input_tx, texel) +
+                       texture_load(second_causal_input_tx, texel) +
+                       texture_load(second_non_causal_input_tx, texel);
+
+  /* Write the color using the transposed texel. See the sum_causal_and_non_causal_results method
+   * in the van_vliet_gaussian_blur.cc file for more information on the rational behind this. */
+  imageStore(output_img, texel.yx, filter_output);
+}
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_deriche_gaussian_blur_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_deriche_gaussian_blur_info.hh
@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(compositor_deriche_gaussian_blur)
+    .local_group_size(128, 2)
+    .push_constant(Type::VEC4, "causal_feedforward_coefficients")
+    .push_constant(Type::VEC4, "non_causal_feedforward_coefficients")
+    .push_constant(Type::VEC4, "feedback_coefficients")
+    .push_constant(Type::FLOAT, "causal_boundary_coefficient")
+    .push_constant(Type::FLOAT, "non_causal_boundary_coefficient")
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "causal_output_img")
+    .image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "non_causal_output_img")
+    .compute_source("compositor_deriche_gaussian_blur.glsl")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_deriche_gaussian_blur_sum)
+    .local_group_size(16, 16)
+    .sampler(0, ImageType::FLOAT_2D, "causal_input_tx")
+    .sampler(1, ImageType::FLOAT_2D, "non_causal_input_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_deriche_gaussian_blur_sum.glsl")
+    .do_static_compilation(true);
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_van_vliet_gaussian_blur_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_van_vliet_gaussian_blur_info.hh
@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(compositor_van_vliet_gaussian_blur)
+    .local_group_size(64, 4)
+    .push_constant(Type::VEC2, "first_feedback_coefficients")
+    .push_constant(Type::VEC2, "first_causal_feedforward_coefficients")
+    .push_constant(Type::VEC2, "first_non_causal_feedforward_coefficients")
+    .push_constant(Type::VEC2, "second_feedback_coefficients")
+    .push_constant(Type::VEC2, "second_causal_feedforward_coefficients")
+    .push_constant(Type::VEC2, "second_non_causal_feedforward_coefficients")
+    .push_constant(Type::FLOAT, "first_causal_boundary_coefficient")
+    .push_constant(Type::FLOAT, "first_non_causal_boundary_coefficient")
+    .push_constant(Type::FLOAT, "second_causal_boundary_coefficient")
+    .push_constant(Type::FLOAT, "second_non_causal_boundary_coefficient")
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "first_causal_output_img")
+    .image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "first_non_causal_output_img")
+    .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "second_causal_output_img")
+    .image(3, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "second_non_causal_output_img")
+    .compute_source("compositor_van_vliet_gaussian_blur.glsl")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_van_vliet_gaussian_blur_sum)
+    .local_group_size(16, 16)
+    .sampler(0, ImageType::FLOAT_2D, "first_causal_input_tx")
+    .sampler(1, ImageType::FLOAT_2D, "first_non_causal_input_tx")
+    .sampler(2, ImageType::FLOAT_2D, "second_causal_input_tx")
+    .sampler(3, ImageType::FLOAT_2D, "second_non_causal_input_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_van_vliet_gaussian_blur_sum.glsl")
+    .do_static_compilation(true);
--- a/source/blender/nodes/composite/nodes/node_composite_blur.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_blur.cc
@ -19,6 +19,7 @@
 #include "GPU_shader.hh"
 #include "GPU_texture.hh"

+#include "COM_algorithm_recursive_gaussian_blur.hh"
 #include "COM_algorithm_symmetric_separable_blur.hh"
 #include "COM_node_operation.hh"
 #include "COM_symmetric_blur_weights.hh"
@ -106,7 +107,11 @@ class BlurOperation : public NodeOperation {
      return;
    }

-    if (use_variable_size()) {
+    if (node_storage(bnode()).filtertype == R_FILTER_FAST_GAUSS) {
+      recursive_gaussian_blur(
+          context(), get_input("Image"), get_result("Image"), compute_blur_radius());
+    }
+    else if (use_variable_size()) {
      execute_variable_size();
    }
    else if (use_separable_filter()) {
--- a/tests/data
+++ b/tests/data
@ -1 +1 @@
-Subproject commit d9c72c9c34a205f5012b5dbd84bb17116775df49
+Subproject commit 3a1d189a550119157aa4246de27d9d661b6f6548