2023-04-09 15:42:31 +02:00 · 2023-03-26 16:49:43 +02:00 · 2023-03-26 17:49:58 +02:00 · 2023-03-23 14:25:46 +01:00 · 2023-03-23 14:36:26 +01:00
5 changed files with 295 additions and 9 deletions
--- a/source/blender/compositor/realtime_compositor/CMakeLists.txt
+++ b/source/blender/compositor/realtime_compositor/CMakeLists.txt
@ -106,6 +106,8 @@ set(GLSL_SRC
  shaders/compositor_ellipse_mask.glsl
  shaders/compositor_filter.glsl
  shaders/compositor_flip.glsl
+  shaders/compositor_glare_fog_glow_downsample.glsl
+  shaders/compositor_glare_fog_glow_upsample.glsl
  shaders/compositor_glare_ghost_accumulate.glsl
  shaders/compositor_glare_ghost_base.glsl
  shaders/compositor_glare_highlights.glsl
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_fog_glow_downsample.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_fog_glow_downsample.glsl
@ -0,0 +1,102 @@
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+#if defined(KARIS_AVERAGE)
+/* Computes the weighted average of the given four colors, which are assumed to the colors of
+ * spatially neighbouring pixels. The weights are computed so as to reduce the contributions of
+ * fireflies on the result by applying a form of local tone mapping as described by Brian Karis in
+ * the article "Graphic Rants: Tone Mapping".
+ *
+ * https://graphicrants.blogspot.com/2013/12/tone-mapping.html */
+vec4 karis_brightness_weighted_sum(vec4 color1, vec4 color2, vec4 color3, vec4 color4)
+{
+  vec4 brightness = vec4(max_v3(color1), max_v3(color2), max_v3(color3), max_v3(color4));
+  vec4 weights = 1.0 / (brightness + 1.0);
+  return weighted_sum(color1, color2, color3, color4, weights);
+}
+#endif
+
+void main()
+{
+  /* Each invocation corresponds to one output pixel, where the output has half the size of the
+   * input. */
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /* Add 0.5 to evaluate the sampler at the center of the pixel and divide by the image size to get
+   * the coordinates into the sampler's expected [0, 1] range. */
+  vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(imageSize(output_img));
+
+  /* All the offsets in the following code section are in the normalized pixel space of the input
+   * texture, so compute its normalized pixel size. */
+  vec2 pixel_size = 1.0 / vec2(texture_size(input_tx));
+
+  /* Each invocation downsamples a 6x6 area of pixels around the center of the corresponding output
+   * pixel, but instead of sampling each of the 36 pixels in the area, we only sample 13 positions
+   * using bilinear fetches at the center of a number of overlapping square 4-pixel groups. This
+   * downsampling strategy is described in the talk:
+   *
+   *   Next Generation Post Processing in Call of Duty: Advanced Warfare
+   *   https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
+   *
+   * In particular, the downsampling strategy is described and illustrated in slide 153 titled
+   * "Downsampling - Our Solution". This is employed as it significantly improves the stability of
+   * the glare as can be seen in the videos in the talk. */
+  vec4 center = texture(input_tx, coordinates);
+  vec4 upper_left_near = texture(input_tx, coordinates + pixel_size * vec2(-1.0, 1.0));
+  vec4 upper_right_near = texture(input_tx, coordinates + pixel_size * vec2(1.0, 1.0));
+  vec4 lower_left_near = texture(input_tx, coordinates + pixel_size * vec2(-1.0, -1.0));
+  vec4 lower_right_near = texture(input_tx, coordinates + pixel_size * vec2(1.0, -1.0));
+  vec4 left_far = texture(input_tx, coordinates + pixel_size * vec2(-2.0, 0.0));
+  vec4 right_far = texture(input_tx, coordinates + pixel_size * vec2(2.0, 0.0));
+  vec4 upper_far = texture(input_tx, coordinates + pixel_size * vec2(0.0, 2.0));
+  vec4 lower_far = texture(input_tx, coordinates + pixel_size * vec2(0.0, -2.0));
+  vec4 upper_left_far = texture(input_tx, coordinates + pixel_size * vec2(-2.0, 2.0));
+  vec4 upper_right_far = texture(input_tx, coordinates + pixel_size * vec2(2.0, 2.0));
+  vec4 lower_left_far = texture(input_tx, coordinates + pixel_size * vec2(-2.0, -2.0));
+  vec4 lower_right_far = texture(input_tx, coordinates + pixel_size * vec2(2.0, -2.0));
+
+#if defined(SIMPLE_AVERAGE)
+  /* The original weights equation mentioned in slide 153 is:
+   *   0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
+   * The 0.5 corresponds to the center group of pixels and the 0.125 corresponds to the other
+   * groups of pixels. The center is sampled 4 times, the far non corner pixels are sampled 2
+   * times, the near corner pixels are sampled only once; but their weight is quadruple the weights
+   * of other groups; so they count as sampled 4 times, finally the far corner pixels are sampled
+   * only once, essentially totalling 32 samples. So the weights are as used in the following code
+   * section. */
+  vec4 result = (4.0 / 32.0) * center +
+                (4.0 / 32.0) *
+                    (upper_left_near + upper_right_near + lower_left_near + lower_right_near) +
+                (2.0 / 32.0) * (left_far + right_far + upper_far + lower_far) +
+                (1.0 / 32.0) *
+                    (upper_left_far + upper_right_far + lower_left_far + lower_right_far);
+#elif defined(KARIS_AVERAGE)
+  /* Reduce the contributions of fireflies on the result by reducing each group of pixels using a
+   * Karis brightness weighted sum. This is described in slide 168 titled "Fireflies - Partial
+   * Karis Average".
+   *
+   * This needn't be done on all downsampling passes, but only the first one, since fireflies
+   * will not survive the first pass, later passes can use the weighted average. */
+  vec4 center_weighted_sum = karis_brightness_weighted_sum(
+      upper_left_near, upper_right_near, lower_right_near, lower_left_near);
+  vec4 upper_left_weighted_sum = karis_brightness_weighted_sum(
+      upper_left_far, upper_far, center, left_far);
+  vec4 upper_right_weighted_sum = karis_brightness_weighted_sum(
+      upper_far, upper_right_far, right_far, center);
+  vec4 lower_right_weighted_sum = karis_brightness_weighted_sum(
+      center, right_far, lower_right_far, lower_far);
+  vec4 lower_left_weighted_sum = karis_brightness_weighted_sum(
+      left_far, center, lower_far, lower_left_far);
+
+  /* The original weights equation mentioned in slide 153 is:
+   *   0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
+   * Multiply both sides by 8 and you get:
+   *   4 + 1 + 1 + 1 + 1 = 8
+   * So the weights are as used in the following code section. */
+  vec4 result = (4.0 / 8.0) * center_weighted_sum +
+                (1.0 / 8.0) * (upper_left_weighted_sum + upper_right_weighted_sum +
+                               lower_left_weighted_sum + lower_right_weighted_sum);
+#endif
+
+  imageStore(output_img, texel, result);
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_fog_glow_upsample.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_fog_glow_upsample.glsl
@ -0,0 +1,37 @@
+void main()
+{
+  /* Each invocation corresponds to one output pixel, where the output has twice the size of the
+   * input. */
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /* Add 0.5 to evaluate the sampler at the center of the pixel and divide by the image size to get
+   * the coordinates into the sampler's expected [0, 1] range. */
+  vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(imageSize(output_img));
+
+  /* All the offsets in the following code section are in the normalized pixel space of the output
+   * image, so compute its normalized pixel size. */
+  vec2 pixel_size = 1.0 / vec2(imageSize(output_img));
+
+  /* Upsample by applying a 3x3 tent filter on the bi-linearly interpolated values evaluated at the
+   * center of neighbouring output pixels. As more tent filter upsampling passes are applied, the
+   * result approximates a large sized Gaussian filter. This upsampling strategy is described in
+   * the talk:
+   *
+   *   Next Generation Post Processing in Call of Duty: Advanced Warfare
+   *   https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
+   *
+   * In particular, the upsampling strategy is described and illustrated in slide 162 titled
+   * "Upsampling - Our Solution". */
+  vec4 upsampled = vec4(0.0);
+  upsampled += (4.0 / 16.0) * texture(input_tx, coordinates);
+  upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(-1.0, 0.0));
+  upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(0.0, 1.0));
+  upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(1.0, 0.0));
+  upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(0.0, -1.0));
+  upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(-1.0, -1.0));
+  upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(-1.0, 1.0));
+  upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(1.0, -1.0));
+  upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(1.0, 1.0));
+
+  imageStore(output_img, texel, imageLoad(output_img, texel) + upsampled);
+}
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_glare_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_glare_info.hh
@ -104,3 +104,30 @@ GPU_SHADER_CREATE_INFO(compositor_glare_streaks_accumulate)
    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "accumulated_streaks_img")
    .compute_source("compositor_glare_streaks_accumulate.glsl")
    .do_static_compilation(true);
+
+/* --------
+ * Fog Glow
+ * -------- */
+
+GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_downsample_shared)
+    .local_group_size(16, 16)
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_glare_fog_glow_downsample.glsl");
+
+GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_downsample_simple_average)
+    .define("SIMPLE_AVERAGE")
+    .additional_info("compositor_glare_fog_glow_downsample_shared")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_downsample_karis_average)
+    .define("KARIS_AVERAGE")
+    .additional_info("compositor_glare_fog_glow_downsample_shared")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_upsample)
+    .local_group_size(16, 16)
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_glare_fog_glow_upsample.glsl")
+    .do_static_compilation(true);
--- a/source/blender/nodes/composite/nodes/node_composite_glare.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_glare.cc
@ -7,6 +7,7 @@

 #include <array>

+#include "BLI_array.hh"
 #include "BLI_assert.h"
 #include "BLI_index_range.hh"
 #include "BLI_math_base.h"
@ -33,6 +34,7 @@
 #include "node_composite_util.hh"

 #define MAX_GLARE_ITERATIONS 5
+#define MAX_GLARE_SIZE 9

 namespace blender::nodes::node_composite_glare_cc {

@ -131,11 +133,6 @@ class GlareOperation : public NodeOperation {
      return true;
    }

-    /* The fog glow mode is currently unsupported. */
-    if (node_storage(bnode()).type == CMP_NODE_GLARE_FOG_GLOW) {
-      return true;
-    }
-
    return false;
  }

@ -693,11 +690,132 @@ class GlareOperation : public NodeOperation {
   * Fog Glow Glare.
   * --------------- */

-  /* Not yet implemented. Unreachable code due to the is_identity method. */
-  Result execute_fog_glow(Result & /*highlights_result*/)
+  /* Fog glow is computed by first progressively half-downsampling the highlights down to a certain
+   * size, then progressively double-upsampling the last downsampled result up to the original size
+   * of the highlights, adding the downsampled result of the same size in each upsampling step.
+   * This can be illustrated as follows:
+   *
+   *              Highlights  ---+---> Fog Glare
+   *                  |                   |
+   *              Downsampled ---+---> Upsampled
+   *                  |                   |
+   *              Downsampled ---+---> Upsampled
+   *                  |                   |
+   *              Downsampled ---+---> Upsampled
+   *                  |                   ^
+   *                 ...                  |
+   *              Downsampled ------------'
+   *
+   * The smooth downsampling followed by smooth upsampling can be thought of as a cheap way to
+   * approximate a large radius blur, and adding the corresponding downsampled result while
+   * upsampling is done to counter the attenuation that happens during downsampling.
+   *
+   * Smaller downsampled results contribute to larger glare size, so controlling the size can be
+   * done by stopping downsampling down to a certain size, where the maximum possible size is
+   * achieved when downsampling happens down to the smallest size of 2. */
+  Result execute_fog_glow(Result &highlights_result)
  {
-    BLI_assert_unreachable();
-    return Result(ResultType::Color, texture_pool());
+    /* The maximum possible glare size is achieved when we downsampled down to the smallest size of
+     * 2, which would result in a downsampling chain length of the binary logarithm of the smaller
+     * dimension of the size of the highlights.
+     *
+     * However, as users might want a smaller glare size, we reduce the chain length by the halving
+     * count supplied by the user. */
+    const int2 glare_size = get_glare_size();
+    const int smaller_glare_dimension = math::min(glare_size.x, glare_size.y);
+    const int chain_length = int(std::log2(smaller_glare_dimension)) -
+                             compute_fog_glare_size_halving_count();
+
+    Array<Result> downsample_chain = compute_fog_glow_downsample_chain(highlights_result,
+                                                                       chain_length);
+
+    /* Notice that for a chain length of n, we need (n - 1) upsampling passes. */
+    const IndexRange upsample_passes_range(chain_length - 1);
+    GPUShader *shader = shader_manager().get("compositor_glare_fog_glow_upsample");
+    GPU_shader_bind(shader);
+
+    for (const int i : upsample_passes_range) {
+      Result &input = downsample_chain[upsample_passes_range.last() - i + 1];
+      input.bind_as_texture(shader, "input_tx");
+      GPU_texture_filter_mode(input.texture(), true);
+
+      const Result &output = downsample_chain[upsample_passes_range.last() - i];
+      output.bind_as_image(shader, "output_img", true);
+
+      compute_dispatch_threads_at_least(shader, output.domain().size);
+
+      input.unbind_as_texture();
+      output.unbind_as_image();
+      input.release();
+    }
+
+    GPU_shader_unbind();
+
+    return downsample_chain[0];
+  }
+
+  /* Progressively downsample the given result into a result with half the size for the given chain
+   * length, returning an array containing the chain of downsampled results. The first result of
+   * the chain is the given result itself for easier handling. The chain length is expected not
+   * to exceed the binary logarithm of the smaller dimension of the given result, because that
+   * would result in downsampling passes that produce useless textures with just one pixel. */
+  Array<Result> compute_fog_glow_downsample_chain(Result &highlights_result, int chain_length)
+  {
+    const Result downsampled_result = Result::Temporary(ResultType::Color, texture_pool());
+    Array<Result> downsample_chain(chain_length, downsampled_result);
+
+    /* We assign the original highlights result to the first result of the chain to make the code
+     * easier. In turn, the number of passes is one less than the chain length, because the first
+     * result needn't be computed. */
+    downsample_chain[0] = highlights_result;
+    const IndexRange downsample_passes_range(chain_length - 1);
+
+    GPUShader *shader;
+    for (const int i : downsample_passes_range) {
+      /* For the first downsample pass, we use a special "Karis" downsample pass that applies a
+       * form of local tone mapping to reduce the contributions of fireflies, see the shader for
+       * more information. Later passes use a simple average downsampling filter because fireflies
+       * doesn't service the first pass. */
+      if (i == downsample_passes_range.first()) {
+        shader = shader_manager().get("compositor_glare_fog_glow_downsample_karis_average");
+        GPU_shader_bind(shader);
+      }
+      else {
+        shader = shader_manager().get("compositor_glare_fog_glow_downsample_simple_average");
+        GPU_shader_bind(shader);
+      }
+
+      const Result &input = downsample_chain[i];
+      input.bind_as_texture(shader, "input_tx");
+      GPU_texture_filter_mode(input.texture(), true);
+
+      Result &output = downsample_chain[i + 1];
+      output.allocate_texture(input.domain().size / 2);
+      output.bind_as_image(shader, "output_img");
+
+      compute_dispatch_threads_at_least(shader, output.domain().size);
+
+      input.unbind_as_texture();
+      output.unbind_as_image();
+      GPU_shader_unbind();
+    }
+
+    return downsample_chain;
+  }
+
+  /* The fog glow has a maximum possible size when the fog glow size is equal to MAX_GLARE_SIZE and
+   * halves for every unit decrement of the fog glow size. This method computes the number of
+   * halving that should take place, which is simply the difference to MAX_GLARE_SIZE. */
+  int compute_fog_glare_size_halving_count()
+  {
+    return MAX_GLARE_SIZE - get_fog_glow_size();
+  }
+
+  /* The size of the fog glow relative to its maximum possible size, see the
+   * compute_fog_glare_size_halving_count() method for more information. */
+  int get_fog_glow_size()
+  {
+    return node_storage(bnode()).size;
  }

  /* ----------