diff --git a/source/blender/compositor/realtime_compositor/CMakeLists.txt b/source/blender/compositor/realtime_compositor/CMakeLists.txt index 539e3fce191..e550c42eb72 100644 --- a/source/blender/compositor/realtime_compositor/CMakeLists.txt +++ b/source/blender/compositor/realtime_compositor/CMakeLists.txt @@ -106,6 +106,8 @@ set(GLSL_SRC shaders/compositor_ellipse_mask.glsl shaders/compositor_filter.glsl shaders/compositor_flip.glsl + shaders/compositor_glare_fog_glow_downsample.glsl + shaders/compositor_glare_fog_glow_upsample.glsl shaders/compositor_glare_ghost_accumulate.glsl shaders/compositor_glare_ghost_base.glsl shaders/compositor_glare_highlights.glsl diff --git a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_fog_glow_downsample.glsl b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_fog_glow_downsample.glsl new file mode 100644 index 00000000000..fae71ae3cb2 --- /dev/null +++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_fog_glow_downsample.glsl @@ -0,0 +1,102 @@ +#pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +#if defined(KARIS_AVERAGE) +/* Computes the weighted average of the given four colors, which are assumed to the colors of + * spatially neighbouring pixels. The weights are computed so as to reduce the contributions of + * fireflies on the result by applying a form of local tone mapping as described by Brian Karis in + * the article "Graphic Rants: Tone Mapping". + * + * https://graphicrants.blogspot.com/2013/12/tone-mapping.html */ +vec4 karis_brightness_weighted_sum(vec4 color1, vec4 color2, vec4 color3, vec4 color4) +{ + vec4 brightness = vec4(max_v3(color1), max_v3(color2), max_v3(color3), max_v3(color4)); + vec4 weights = 1.0 / (brightness + 1.0); + return weighted_sum(color1, color2, color3, color4, weights); +} +#endif + +void main() +{ + /* Each invocation corresponds to one output pixel, where the output has half the size of the + * input. */ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + /* Add 0.5 to evaluate the sampler at the center of the pixel and divide by the image size to get + * the coordinates into the sampler's expected [0, 1] range. */ + vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(imageSize(output_img)); + + /* All the offsets in the following code section are in the normalized pixel space of the input + * texture, so compute its normalized pixel size. */ + vec2 pixel_size = 1.0 / vec2(texture_size(input_tx)); + + /* Each invocation downsamples a 6x6 area of pixels around the center of the corresponding output + * pixel, but instead of sampling each of the 36 pixels in the area, we only sample 13 positions + * using bilinear fetches at the center of a number of overlapping square 4-pixel groups. This + * downsampling strategy is described in the talk: + * + * Next Generation Post Processing in Call of Duty: Advanced Warfare + * https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare + * + * In particular, the downsampling strategy is described and illustrated in slide 153 titled + * "Downsampling - Our Solution". This is employed as it significantly improves the stability of + * the glare as can be seen in the videos in the talk. */ + vec4 center = texture(input_tx, coordinates); + vec4 upper_left_near = texture(input_tx, coordinates + pixel_size * vec2(-1.0, 1.0)); + vec4 upper_right_near = texture(input_tx, coordinates + pixel_size * vec2(1.0, 1.0)); + vec4 lower_left_near = texture(input_tx, coordinates + pixel_size * vec2(-1.0, -1.0)); + vec4 lower_right_near = texture(input_tx, coordinates + pixel_size * vec2(1.0, -1.0)); + vec4 left_far = texture(input_tx, coordinates + pixel_size * vec2(-2.0, 0.0)); + vec4 right_far = texture(input_tx, coordinates + pixel_size * vec2(2.0, 0.0)); + vec4 upper_far = texture(input_tx, coordinates + pixel_size * vec2(0.0, 2.0)); + vec4 lower_far = texture(input_tx, coordinates + pixel_size * vec2(0.0, -2.0)); + vec4 upper_left_far = texture(input_tx, coordinates + pixel_size * vec2(-2.0, 2.0)); + vec4 upper_right_far = texture(input_tx, coordinates + pixel_size * vec2(2.0, 2.0)); + vec4 lower_left_far = texture(input_tx, coordinates + pixel_size * vec2(-2.0, -2.0)); + vec4 lower_right_far = texture(input_tx, coordinates + pixel_size * vec2(2.0, -2.0)); + +#if defined(SIMPLE_AVERAGE) + /* The original weights equation mentioned in slide 153 is: + * 0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1 + * The 0.5 corresponds to the center group of pixels and the 0.125 corresponds to the other + * groups of pixels. The center is sampled 4 times, the far non corner pixels are sampled 2 + * times, the near corner pixels are sampled only once; but their weight is quadruple the weights + * of other groups; so they count as sampled 4 times, finally the far corner pixels are sampled + * only once, essentially totalling 32 samples. So the weights are as used in the following code + * section. */ + vec4 result = (4.0 / 32.0) * center + + (4.0 / 32.0) * + (upper_left_near + upper_right_near + lower_left_near + lower_right_near) + + (2.0 / 32.0) * (left_far + right_far + upper_far + lower_far) + + (1.0 / 32.0) * + (upper_left_far + upper_right_far + lower_left_far + lower_right_far); +#elif defined(KARIS_AVERAGE) + /* Reduce the contributions of fireflies on the result by reducing each group of pixels using a + * Karis brightness weighted sum. This is described in slide 168 titled "Fireflies - Partial + * Karis Average". + * + * This needn't be done on all downsampling passes, but only the first one, since fireflies + * will not survive the first pass, later passes can use the weighted average. */ + vec4 center_weighted_sum = karis_brightness_weighted_sum( + upper_left_near, upper_right_near, lower_right_near, lower_left_near); + vec4 upper_left_weighted_sum = karis_brightness_weighted_sum( + upper_left_far, upper_far, center, left_far); + vec4 upper_right_weighted_sum = karis_brightness_weighted_sum( + upper_far, upper_right_far, right_far, center); + vec4 lower_right_weighted_sum = karis_brightness_weighted_sum( + center, right_far, lower_right_far, lower_far); + vec4 lower_left_weighted_sum = karis_brightness_weighted_sum( + left_far, center, lower_far, lower_left_far); + + /* The original weights equation mentioned in slide 153 is: + * 0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1 + * Multiply both sides by 8 and you get: + * 4 + 1 + 1 + 1 + 1 = 8 + * So the weights are as used in the following code section. */ + vec4 result = (4.0 / 8.0) * center_weighted_sum + + (1.0 / 8.0) * (upper_left_weighted_sum + upper_right_weighted_sum + + lower_left_weighted_sum + lower_right_weighted_sum); +#endif + + imageStore(output_img, texel, result); +} diff --git a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_fog_glow_upsample.glsl b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_fog_glow_upsample.glsl new file mode 100644 index 00000000000..517a97e3d3a --- /dev/null +++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_fog_glow_upsample.glsl @@ -0,0 +1,37 @@ +void main() +{ + /* Each invocation corresponds to one output pixel, where the output has twice the size of the + * input. */ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + /* Add 0.5 to evaluate the sampler at the center of the pixel and divide by the image size to get + * the coordinates into the sampler's expected [0, 1] range. */ + vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(imageSize(output_img)); + + /* All the offsets in the following code section are in the normalized pixel space of the output + * image, so compute its normalized pixel size. */ + vec2 pixel_size = 1.0 / vec2(imageSize(output_img)); + + /* Upsample by applying a 3x3 tent filter on the bi-linearly interpolated values evaluated at the + * center of neighbouring output pixels. As more tent filter upsampling passes are applied, the + * result approximates a large sized Gaussian filter. This upsampling strategy is described in + * the talk: + * + * Next Generation Post Processing in Call of Duty: Advanced Warfare + * https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare + * + * In particular, the upsampling strategy is described and illustrated in slide 162 titled + * "Upsampling - Our Solution". */ + vec4 upsampled = vec4(0.0); + upsampled += (4.0 / 16.0) * texture(input_tx, coordinates); + upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(-1.0, 0.0)); + upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(0.0, 1.0)); + upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(1.0, 0.0)); + upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(0.0, -1.0)); + upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(-1.0, -1.0)); + upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(-1.0, 1.0)); + upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(1.0, -1.0)); + upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(1.0, 1.0)); + + imageStore(output_img, texel, imageLoad(output_img, texel) + upsampled); +} diff --git a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_glare_info.hh b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_glare_info.hh index 029bb027d5b..ec92321e062 100644 --- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_glare_info.hh +++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_glare_info.hh @@ -104,3 +104,30 @@ GPU_SHADER_CREATE_INFO(compositor_glare_streaks_accumulate) .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "accumulated_streaks_img") .compute_source("compositor_glare_streaks_accumulate.glsl") .do_static_compilation(true); + +/* -------- + * Fog Glow + * -------- */ + +GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_downsample_shared) + .local_group_size(16, 16) + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_glare_fog_glow_downsample.glsl"); + +GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_downsample_simple_average) + .define("SIMPLE_AVERAGE") + .additional_info("compositor_glare_fog_glow_downsample_shared") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_downsample_karis_average) + .define("KARIS_AVERAGE") + .additional_info("compositor_glare_fog_glow_downsample_shared") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_upsample) + .local_group_size(16, 16) + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_glare_fog_glow_upsample.glsl") + .do_static_compilation(true); diff --git a/source/blender/nodes/composite/nodes/node_composite_glare.cc b/source/blender/nodes/composite/nodes/node_composite_glare.cc index 54cc6cb81ce..2b303957e73 100644 --- a/source/blender/nodes/composite/nodes/node_composite_glare.cc +++ b/source/blender/nodes/composite/nodes/node_composite_glare.cc @@ -7,6 +7,7 @@ #include +#include "BLI_array.hh" #include "BLI_assert.h" #include "BLI_index_range.hh" #include "BLI_math_base.h" @@ -33,6 +34,7 @@ #include "node_composite_util.hh" #define MAX_GLARE_ITERATIONS 5 +#define MAX_GLARE_SIZE 9 namespace blender::nodes::node_composite_glare_cc { @@ -131,11 +133,6 @@ class GlareOperation : public NodeOperation { return true; } - /* The fog glow mode is currently unsupported. */ - if (node_storage(bnode()).type == CMP_NODE_GLARE_FOG_GLOW) { - return true; - } - return false; } @@ -693,11 +690,132 @@ class GlareOperation : public NodeOperation { * Fog Glow Glare. * --------------- */ - /* Not yet implemented. Unreachable code due to the is_identity method. */ - Result execute_fog_glow(Result & /*highlights_result*/) + /* Fog glow is computed by first progressively half-downsampling the highlights down to a certain + * size, then progressively double-upsampling the last downsampled result up to the original size + * of the highlights, adding the downsampled result of the same size in each upsampling step. + * This can be illustrated as follows: + * + * Highlights ---+---> Fog Glare + * | | + * Downsampled ---+---> Upsampled + * | | + * Downsampled ---+---> Upsampled + * | | + * Downsampled ---+---> Upsampled + * | ^ + * ... | + * Downsampled ------------' + * + * The smooth downsampling followed by smooth upsampling can be thought of as a cheap way to + * approximate a large radius blur, and adding the corresponding downsampled result while + * upsampling is done to counter the attenuation that happens during downsampling. + * + * Smaller downsampled results contribute to larger glare size, so controlling the size can be + * done by stopping downsampling down to a certain size, where the maximum possible size is + * achieved when downsampling happens down to the smallest size of 2. */ + Result execute_fog_glow(Result &highlights_result) { - BLI_assert_unreachable(); - return Result(ResultType::Color, texture_pool()); + /* The maximum possible glare size is achieved when we downsampled down to the smallest size of + * 2, which would result in a downsampling chain length of the binary logarithm of the smaller + * dimension of the size of the highlights. + * + * However, as users might want a smaller glare size, we reduce the chain length by the halving + * count supplied by the user. */ + const int2 glare_size = get_glare_size(); + const int smaller_glare_dimension = math::min(glare_size.x, glare_size.y); + const int chain_length = int(std::log2(smaller_glare_dimension)) - + compute_fog_glare_size_halving_count(); + + Array downsample_chain = compute_fog_glow_downsample_chain(highlights_result, + chain_length); + + /* Notice that for a chain length of n, we need (n - 1) upsampling passes. */ + const IndexRange upsample_passes_range(chain_length - 1); + GPUShader *shader = shader_manager().get("compositor_glare_fog_glow_upsample"); + GPU_shader_bind(shader); + + for (const int i : upsample_passes_range) { + Result &input = downsample_chain[upsample_passes_range.last() - i + 1]; + input.bind_as_texture(shader, "input_tx"); + GPU_texture_filter_mode(input.texture(), true); + + const Result &output = downsample_chain[upsample_passes_range.last() - i]; + output.bind_as_image(shader, "output_img", true); + + compute_dispatch_threads_at_least(shader, output.domain().size); + + input.unbind_as_texture(); + output.unbind_as_image(); + input.release(); + } + + GPU_shader_unbind(); + + return downsample_chain[0]; + } + + /* Progressively downsample the given result into a result with half the size for the given chain + * length, returning an array containing the chain of downsampled results. The first result of + * the chain is the given result itself for easier handling. The chain length is expected not + * to exceed the binary logarithm of the smaller dimension of the given result, because that + * would result in downsampling passes that produce useless textures with just one pixel. */ + Array compute_fog_glow_downsample_chain(Result &highlights_result, int chain_length) + { + const Result downsampled_result = Result::Temporary(ResultType::Color, texture_pool()); + Array downsample_chain(chain_length, downsampled_result); + + /* We assign the original highlights result to the first result of the chain to make the code + * easier. In turn, the number of passes is one less than the chain length, because the first + * result needn't be computed. */ + downsample_chain[0] = highlights_result; + const IndexRange downsample_passes_range(chain_length - 1); + + GPUShader *shader; + for (const int i : downsample_passes_range) { + /* For the first downsample pass, we use a special "Karis" downsample pass that applies a + * form of local tone mapping to reduce the contributions of fireflies, see the shader for + * more information. Later passes use a simple average downsampling filter because fireflies + * doesn't service the first pass. */ + if (i == downsample_passes_range.first()) { + shader = shader_manager().get("compositor_glare_fog_glow_downsample_karis_average"); + GPU_shader_bind(shader); + } + else { + shader = shader_manager().get("compositor_glare_fog_glow_downsample_simple_average"); + GPU_shader_bind(shader); + } + + const Result &input = downsample_chain[i]; + input.bind_as_texture(shader, "input_tx"); + GPU_texture_filter_mode(input.texture(), true); + + Result &output = downsample_chain[i + 1]; + output.allocate_texture(input.domain().size / 2); + output.bind_as_image(shader, "output_img"); + + compute_dispatch_threads_at_least(shader, output.domain().size); + + input.unbind_as_texture(); + output.unbind_as_image(); + GPU_shader_unbind(); + } + + return downsample_chain; + } + + /* The fog glow has a maximum possible size when the fog glow size is equal to MAX_GLARE_SIZE and + * halves for every unit decrement of the fog glow size. This method computes the number of + * halving that should take place, which is simply the difference to MAX_GLARE_SIZE. */ + int compute_fog_glare_size_halving_count() + { + return MAX_GLARE_SIZE - get_fog_glow_size(); + } + + /* The size of the fog glow relative to its maximum possible size, see the + * compute_fog_glare_size_halving_count() method for more information. */ + int get_fog_glow_size() + { + return node_storage(bnode()).size; } /* ----------