Realtime Compositor: Implement Fog Glow Glare node #106042

Merged
Omar Emara merged 5 commits from OmarEmaraDev/blender:fog-glow-glare-node into main 2023-04-09 15:42:31 +02:00
5 changed files with 295 additions and 9 deletions

View File

@ -106,6 +106,8 @@ set(GLSL_SRC
shaders/compositor_ellipse_mask.glsl
shaders/compositor_filter.glsl
shaders/compositor_flip.glsl
shaders/compositor_glare_fog_glow_downsample.glsl
shaders/compositor_glare_fog_glow_upsample.glsl
shaders/compositor_glare_ghost_accumulate.glsl
shaders/compositor_glare_ghost_base.glsl
shaders/compositor_glare_highlights.glsl

View File

@ -0,0 +1,102 @@
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
#if defined(KARIS_AVERAGE)
/* Computes the weighted average of the given four colors, which are assumed to the colors of
* spatially neighbouring pixels. The weights are computed so as to reduce the contributions of
* fireflies on the result by applying a form of local tone mapping as described by Brian Karis in
* the article "Graphic Rants: Tone Mapping".
*
* https://graphicrants.blogspot.com/2013/12/tone-mapping.html */
vec4 karis_brightness_weighted_sum(vec4 color1, vec4 color2, vec4 color3, vec4 color4)
{
vec4 brightness = vec4(max_v3(color1), max_v3(color2), max_v3(color3), max_v3(color4));
vec4 weights = 1.0 / (brightness + 1.0);
return weighted_sum(color1, color2, color3, color4, weights);
}
#endif
void main()
{
/* Each invocation corresponds to one output pixel, where the output has half the size of the
* input. */
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
/* Add 0.5 to evaluate the sampler at the center of the pixel and divide by the image size to get
* the coordinates into the sampler's expected [0, 1] range. */
vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(imageSize(output_img));
/* All the offsets in the following code section are in the normalized pixel space of the input
* texture, so compute its normalized pixel size. */
vec2 pixel_size = 1.0 / vec2(texture_size(input_tx));
/* Each invocation downsamples a 6x6 area of pixels around the center of the corresponding output
* pixel, but instead of sampling each of the 36 pixels in the area, we only sample 13 positions
* using bilinear fetches at the center of a number of overlapping square 4-pixel groups. This
* downsampling strategy is described in the talk:
*
* Next Generation Post Processing in Call of Duty: Advanced Warfare
* https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
*
* In particular, the downsampling strategy is described and illustrated in slide 153 titled
* "Downsampling - Our Solution". This is employed as it significantly improves the stability of
* the glare as can be seen in the videos in the talk. */
vec4 center = texture(input_tx, coordinates);
vec4 upper_left_near = texture(input_tx, coordinates + pixel_size * vec2(-1.0, 1.0));
vec4 upper_right_near = texture(input_tx, coordinates + pixel_size * vec2(1.0, 1.0));
vec4 lower_left_near = texture(input_tx, coordinates + pixel_size * vec2(-1.0, -1.0));
vec4 lower_right_near = texture(input_tx, coordinates + pixel_size * vec2(1.0, -1.0));
vec4 left_far = texture(input_tx, coordinates + pixel_size * vec2(-2.0, 0.0));
vec4 right_far = texture(input_tx, coordinates + pixel_size * vec2(2.0, 0.0));
vec4 upper_far = texture(input_tx, coordinates + pixel_size * vec2(0.0, 2.0));
vec4 lower_far = texture(input_tx, coordinates + pixel_size * vec2(0.0, -2.0));
vec4 upper_left_far = texture(input_tx, coordinates + pixel_size * vec2(-2.0, 2.0));
vec4 upper_right_far = texture(input_tx, coordinates + pixel_size * vec2(2.0, 2.0));
vec4 lower_left_far = texture(input_tx, coordinates + pixel_size * vec2(-2.0, -2.0));
vec4 lower_right_far = texture(input_tx, coordinates + pixel_size * vec2(2.0, -2.0));
#if defined(SIMPLE_AVERAGE)
/* The original weights equation mentioned in slide 153 is:
* 0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
* The 0.5 corresponds to the center group of pixels and the 0.125 corresponds to the other
* groups of pixels. The center is sampled 4 times, the far non corner pixels are sampled 2
* times, the near corner pixels are sampled only once; but their weight is quadruple the weights
* of other groups; so they count as sampled 4 times, finally the far corner pixels are sampled
* only once, essentially totalling 32 samples. So the weights are as used in the following code
* section. */
vec4 result = (4.0 / 32.0) * center +
(4.0 / 32.0) *
(upper_left_near + upper_right_near + lower_left_near + lower_right_near) +
(2.0 / 32.0) * (left_far + right_far + upper_far + lower_far) +
(1.0 / 32.0) *
(upper_left_far + upper_right_far + lower_left_far + lower_right_far);
#elif defined(KARIS_AVERAGE)
/* Reduce the contributions of fireflies on the result by reducing each group of pixels using a
* Karis brightness weighted sum. This is described in slide 168 titled "Fireflies - Partial
* Karis Average".
*
* This needn't be done on all downsampling passes, but only the first one, since fireflies
* will not survive the first pass, later passes can use the weighted average. */
vec4 center_weighted_sum = karis_brightness_weighted_sum(
upper_left_near, upper_right_near, lower_right_near, lower_left_near);
vec4 upper_left_weighted_sum = karis_brightness_weighted_sum(
upper_left_far, upper_far, center, left_far);
vec4 upper_right_weighted_sum = karis_brightness_weighted_sum(
upper_far, upper_right_far, right_far, center);
vec4 lower_right_weighted_sum = karis_brightness_weighted_sum(
center, right_far, lower_right_far, lower_far);
vec4 lower_left_weighted_sum = karis_brightness_weighted_sum(
left_far, center, lower_far, lower_left_far);
/* The original weights equation mentioned in slide 153 is:
* 0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
* Multiply both sides by 8 and you get:
* 4 + 1 + 1 + 1 + 1 = 8
* So the weights are as used in the following code section. */
vec4 result = (4.0 / 8.0) * center_weighted_sum +
(1.0 / 8.0) * (upper_left_weighted_sum + upper_right_weighted_sum +
lower_left_weighted_sum + lower_right_weighted_sum);
#endif
imageStore(output_img, texel, result);
}

View File

@ -0,0 +1,37 @@
void main()
{
/* Each invocation corresponds to one output pixel, where the output has twice the size of the
* input. */
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
/* Add 0.5 to evaluate the sampler at the center of the pixel and divide by the image size to get
* the coordinates into the sampler's expected [0, 1] range. */
vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(imageSize(output_img));
/* All the offsets in the following code section are in the normalized pixel space of the output
* image, so compute its normalized pixel size. */
vec2 pixel_size = 1.0 / vec2(imageSize(output_img));
/* Upsample by applying a 3x3 tent filter on the bi-linearly interpolated values evaluated at the
* center of neighbouring output pixels. As more tent filter upsampling passes are applied, the
* result approximates a large sized Gaussian filter. This upsampling strategy is described in
* the talk:
*
* Next Generation Post Processing in Call of Duty: Advanced Warfare
* https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
*
* In particular, the upsampling strategy is described and illustrated in slide 162 titled
* "Upsampling - Our Solution". */
vec4 upsampled = vec4(0.0);
upsampled += (4.0 / 16.0) * texture(input_tx, coordinates);
upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(-1.0, 0.0));
upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(0.0, 1.0));
upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(1.0, 0.0));
upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(0.0, -1.0));
upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(-1.0, -1.0));
upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(-1.0, 1.0));
upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(1.0, -1.0));
upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(1.0, 1.0));
imageStore(output_img, texel, imageLoad(output_img, texel) + upsampled);
}

View File

@ -104,3 +104,30 @@ GPU_SHADER_CREATE_INFO(compositor_glare_streaks_accumulate)
.image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "accumulated_streaks_img")
.compute_source("compositor_glare_streaks_accumulate.glsl")
.do_static_compilation(true);
/* --------
* Fog Glow
* -------- */
GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_downsample_shared)
.local_group_size(16, 16)
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_glare_fog_glow_downsample.glsl");
GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_downsample_simple_average)
.define("SIMPLE_AVERAGE")
.additional_info("compositor_glare_fog_glow_downsample_shared")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_downsample_karis_average)
.define("KARIS_AVERAGE")
.additional_info("compositor_glare_fog_glow_downsample_shared")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_upsample)
.local_group_size(16, 16)
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "output_img")
OmarEmaraDev marked this conversation as resolved

I would suggest a variant with GPU_R11F_G11F_B10F since it will half the bandwidth usage and memory required. We don't need the extra alpha component since we only scatter light, not coverage. I'm not sure if it will not produce color degradation but I guess it will not be a problem since we never had this issue with the current implementation which does use GPU_R11F_G11F_B10F.

I would suggest a variant with `GPU_R11F_G11F_B10F` since it will half the bandwidth usage and memory required. We don't need the extra alpha component since we only scatter light, not coverage. I'm not sure if it will not produce color degradation but I guess it will not be a problem since we never had this issue with the current implementation which does use `GPU_R11F_G11F_B10F`.
Review

Will read that presentation and check if it can be useful.
I can confirm the issue with the quality setting, will investigate that.

In most cases, bright areas are sharp enough at their boundaries that the knee parameter will not make a difference. So my guess is that its absence will not significantly affect users.

Will read that presentation and check if it can be useful. I can confirm the issue with the quality setting, will investigate that. In most cases, bright areas are sharp enough at their boundaries that the `knee` parameter will not make a difference. So my guess is that its absence will not significantly affect users.
.compute_source("compositor_glare_fog_glow_upsample.glsl")
.do_static_compilation(true);

View File

@ -7,6 +7,7 @@
#include <array>
#include "BLI_array.hh"
#include "BLI_assert.h"
#include "BLI_index_range.hh"
#include "BLI_math_base.h"
@ -33,6 +34,7 @@
#include "node_composite_util.hh"
#define MAX_GLARE_ITERATIONS 5
#define MAX_GLARE_SIZE 9
namespace blender::nodes::node_composite_glare_cc {
@ -131,11 +133,6 @@ class GlareOperation : public NodeOperation {
return true;
}
/* The fog glow mode is currently unsupported. */
if (node_storage(bnode()).type == CMP_NODE_GLARE_FOG_GLOW) {
return true;
}
return false;
}
@ -693,11 +690,132 @@ class GlareOperation : public NodeOperation {
* Fog Glow Glare.
* --------------- */
/* Not yet implemented. Unreachable code due to the is_identity method. */
Result execute_fog_glow(Result & /*highlights_result*/)
/* Fog glow is computed by first progressively half-downsampling the highlights down to a certain
* size, then progressively double-upsampling the last downsampled result up to the original size
* of the highlights, adding the downsampled result of the same size in each upsampling step.
* This can be illustrated as follows:
*
* Highlights ---+---> Fog Glare
* | |
* Downsampled ---+---> Upsampled
* | |
* Downsampled ---+---> Upsampled
* | |
* Downsampled ---+---> Upsampled
* | ^
* ... |
* Downsampled ------------'
*
* The smooth downsampling followed by smooth upsampling can be thought of as a cheap way to
* approximate a large radius blur, and adding the corresponding downsampled result while
* upsampling is done to counter the attenuation that happens during downsampling.
*
* Smaller downsampled results contribute to larger glare size, so controlling the size can be
* done by stopping downsampling down to a certain size, where the maximum possible size is
* achieved when downsampling happens down to the smallest size of 2. */
Result execute_fog_glow(Result &highlights_result)
{
BLI_assert_unreachable();
return Result(ResultType::Color, texture_pool());
/* The maximum possible glare size is achieved when we downsampled down to the smallest size of
* 2, which would result in a downsampling chain length of the binary logarithm of the smaller
* dimension of the size of the highlights.
*
* However, as users might want a smaller glare size, we reduce the chain length by the halving
* count supplied by the user. */
const int2 glare_size = get_glare_size();
const int smaller_glare_dimension = math::min(glare_size.x, glare_size.y);
const int chain_length = int(std::log2(smaller_glare_dimension)) -
compute_fog_glare_size_halving_count();
Array<Result> downsample_chain = compute_fog_glow_downsample_chain(highlights_result,
chain_length);
/* Notice that for a chain length of n, we need (n - 1) upsampling passes. */
mod_moder marked this conversation as resolved
const IndexRange upsample_passes_range(chain_length);
...
for (const int i : upsample_passes_range.drop_back()) {

Maybe it makes sense to stick to the formalized way to explicitly state that we are skip of the last element in the loop?

```C++ const IndexRange upsample_passes_range(chain_length); ... for (const int i : upsample_passes_range.drop_back()) { ``` Maybe it makes sense to stick to the formalized way to explicitly state that we are skip of the last element in the loop?
Review

I wouldn't say the target is to skip the last element. It is just that the number of passes is naturally equal to the number of chain results minus one.
So I think it make sense to have an explicit index range for that.

I wouldn't say the target is to skip the last element. It is just that the number of passes is naturally equal to the number of chain results minus one. So I think it make sense to have an explicit index range for that.
const IndexRange upsample_passes_range(chain_length - 1);
GPUShader *shader = shader_manager().get("compositor_glare_fog_glow_upsample");
GPU_shader_bind(shader);
for (const int i : upsample_passes_range) {
Result &input = downsample_chain[upsample_passes_range.last() - i + 1];
input.bind_as_texture(shader, "input_tx");
GPU_texture_filter_mode(input.texture(), true);
const Result &output = downsample_chain[upsample_passes_range.last() - i];
output.bind_as_image(shader, "output_img", true);
compute_dispatch_threads_at_least(shader, output.domain().size);
input.unbind_as_texture();
output.unbind_as_image();
input.release();
}
GPU_shader_unbind();
return downsample_chain[0];
}
/* Progressively downsample the given result into a result with half the size for the given chain
* length, returning an array containing the chain of downsampled results. The first result of
* the chain is the given result itself for easier handling. The chain length is expected not
* to exceed the binary logarithm of the smaller dimension of the given result, because that
* would result in downsampling passes that produce useless textures with just one pixel. */
Array<Result> compute_fog_glow_downsample_chain(Result &highlights_result, int chain_length)
{
const Result downsampled_result = Result::Temporary(ResultType::Color, texture_pool());
Array<Result> downsample_chain(chain_length, downsampled_result);
/* We assign the original highlights result to the first result of the chain to make the code
* easier. In turn, the number of passes is one less than the chain length, because the first
* result needn't be computed. */
downsample_chain[0] = highlights_result;
const IndexRange downsample_passes_range(chain_length - 1);
GPUShader *shader;
for (const int i : downsample_passes_range) {
/* For the first downsample pass, we use a special "Karis" downsample pass that applies a
* form of local tone mapping to reduce the contributions of fireflies, see the shader for
* more information. Later passes use a simple average downsampling filter because fireflies
* doesn't service the first pass. */
if (i == downsample_passes_range.first()) {
shader = shader_manager().get("compositor_glare_fog_glow_downsample_karis_average");
GPU_shader_bind(shader);
}
else {
shader = shader_manager().get("compositor_glare_fog_glow_downsample_simple_average");
GPU_shader_bind(shader);
}
const Result &input = downsample_chain[i];
input.bind_as_texture(shader, "input_tx");
GPU_texture_filter_mode(input.texture(), true);
Result &output = downsample_chain[i + 1];
output.allocate_texture(input.domain().size / 2);
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, output.domain().size);
input.unbind_as_texture();
output.unbind_as_image();
GPU_shader_unbind();
}
return downsample_chain;
}
/* The fog glow has a maximum possible size when the fog glow size is equal to MAX_GLARE_SIZE and
* halves for every unit decrement of the fog glow size. This method computes the number of
* halving that should take place, which is simply the difference to MAX_GLARE_SIZE. */
int compute_fog_glare_size_halving_count()
{
return MAX_GLARE_SIZE - get_fog_glow_size();
}
/* The size of the fog glow relative to its maximum possible size, see the
* compute_fog_glare_size_halving_count() method for more information. */
int get_fog_glow_size()
{
return node_storage(bnode()).size;
}
/* ----------