Realtime Compositor: Implement Keying node #108393

Merged
Omar Emara merged 4 commits from OmarEmaraDev/blender:keying-node into main 2023-06-24 13:02:41 +02:00
20 changed files with 781 additions and 129 deletions

View File

@ -68,10 +68,14 @@ set(SRC
COM_texture_pool.hh
COM_utilities.hh
algorithms/intern/algorithm_parallel_reduction.cc
algorithms/intern/morphological_distance.cc
algorithms/intern/morphological_distance_feather.cc
algorithms/intern/parallel_reduction.cc
algorithms/intern/smaa.cc
algorithms/intern/symmetric_separable_blur.cc
algorithms/COM_algorithm_morphological_distance.hh
algorithms/COM_algorithm_morphological_distance_feather.hh
algorithms/COM_algorithm_parallel_reduction.hh
algorithms/COM_algorithm_smaa.hh
algorithms/COM_algorithm_symmetric_separable_blur.hh
@ -135,6 +139,11 @@ set(GLSL_SRC
shaders/compositor_glare_streaks_filter.glsl
shaders/compositor_id_mask.glsl
shaders/compositor_image_crop.glsl
shaders/compositor_keying_compute_image.glsl
shaders/compositor_keying_compute_matte.glsl
shaders/compositor_keying_extract_chroma.glsl
shaders/compositor_keying_replace_chroma.glsl
shaders/compositor_keying_tweak_matte.glsl
shaders/compositor_map_uv.glsl
shaders/compositor_morphological_distance.glsl
shaders/compositor_morphological_distance_feather.glsl
@ -239,6 +248,7 @@ set(SRC_SHADER_CREATE_INFOS
shaders/infos/compositor_glare_info.hh
shaders/infos/compositor_id_mask_info.hh
shaders/infos/compositor_image_crop_info.hh
shaders/infos/compositor_keying_info.hh
shaders/infos/compositor_map_uv_info.hh
shaders/infos/compositor_morphological_distance_feather_info.hh
shaders/infos/compositor_morphological_distance_info.hh

View File

@ -164,6 +164,20 @@ class Result {
* the discussion above for more information. */
void pass_through(Result &target);
/* Steal the allocated data from the given source result and assign it to this result, then
* remove any references to the data from the source result. It is assumed that:
*
* - Both results are of the same type.
* - This result is not allocated but the source result is allocated.
* - Neither of the results is a proxy one, that is, has a master result.
*
* This is different from proxy results and the pass_through mechanism in that it can be used on
* temporary results. This is most useful in multi-step compositor operations where some steps
* can be optional, in that case, intermediate results can be temporary results that can
* eventually be stolen by the actual output of the operation. See the uses of the method for
* a practical example of use. */
void steal_data(Result &source);
/* Transform the result by the given transformation. This effectively pre-multiply the given
* transformation by the current transformation of the domain of the result. */
void transform(const float3x3 &transformation);

View File

@ -0,0 +1,18 @@
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "COM_context.hh"
#include "COM_result.hh"
namespace blender::realtime_compositor {
/* Dilate or erode the given input using a morphological operator with a circular structuring
* element of radius equivalent to the absolute value of the given distance parameter. A positive
* distance corresponds to dilate operator, while a negative distance corresponds to an erode
* operator. */
void morphological_distance(Context &context, Result &input, Result &output, int distance);
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,22 @@
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "DNA_scene_types.h"
#include "COM_context.hh"
#include "COM_result.hh"
namespace blender::realtime_compositor {
/* Dilate or erode the given input using a morphological inverse distance operation evaluated at
* the given falloff. The radius of the structuring element is equivalent to the absolute value of
* the given distance parameter. A positive distance corresponds to a dilate operator, while a
* negative distance corresponds to an erode operator. See the implementation and shader for more
* information. */
void morphological_distance_feather(
Context &context, Result &input, Result &output, int distance, int falloff_type = PROP_SMOOTH);
} // namespace blender::realtime_compositor

View File

@ -6,6 +6,8 @@
#include "BLI_math_vector_types.hh"
#include "DNA_scene_types.h"
#include "COM_context.hh"
#include "COM_result.hh"
@ -22,8 +24,8 @@ void symmetric_separable_blur(Context &context,
Result &input,
Result &output,
float2 radius,
int filter_type,
bool extend_bounds,
bool gamma_correct);
int filter_type = R_FILTER_GAUSS,
bool extend_bounds = false,
bool gamma_correct = false);
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,46 @@
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_math_base.hh"
#include "GPU_shader.h"
#include "GPU_texture.h"
#include "COM_context.hh"
#include "COM_result.hh"
#include "COM_utilities.hh"
#include "COM_algorithm_morphological_distance.hh"
namespace blender::realtime_compositor {
static const char *get_shader_name(int distance)
{
if (distance > 0) {
return "compositor_morphological_distance_dilate";
}
return "compositor_morphological_distance_erode";
}
void morphological_distance(Context &context, Result &input, Result &output, int distance)
{
GPUShader *shader = context.shader_manager().get(get_shader_name(distance));
GPU_shader_bind(shader);
/* Pass the absolute value of the distance. We have specialized shaders for each sign. */
GPU_shader_uniform_1i(shader, "radius", math::abs(distance));
input.bind_as_texture(shader, "input_tx");
output.allocate_texture(input.domain());
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, input.domain().size);
GPU_shader_unbind();
output.unbind_as_image();
input.unbind_as_texture();
}
} // namespace blender::realtime_compositor

View File

@ -0,0 +1,107 @@
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_math_base.hh"
#include "BLI_math_vector_types.hh"
#include "GPU_shader.h"
#include "GPU_texture.h"
#include "COM_algorithm_symmetric_separable_blur.hh"
#include "COM_context.hh"
#include "COM_morphological_distance_feather_weights.hh"
#include "COM_result.hh"
#include "COM_utilities.hh"
namespace blender::realtime_compositor {
const char *get_shader_name(int distance)
{
if (distance > 0) {
return "compositor_morphological_distance_feather_dilate";
}
return "compositor_morphological_distance_feather_erode";
}
static Result horizontal_pass(Context &context, Result &input, int distance, int falloff_type)
{
GPUShader *shader = context.shader_manager().get(get_shader_name(distance));
GPU_shader_bind(shader);
input.bind_as_texture(shader, "input_tx");
const MorphologicalDistanceFeatherWeights &weights =
context.cache_manager().morphological_distance_feather_weights.get(falloff_type,
math::abs(distance));
weights.bind_weights_as_texture(shader, "weights_tx");
weights.bind_distance_falloffs_as_texture(shader, "falloffs_tx");
/* We allocate an output image of a transposed size, that is, with a height equivalent to the
* width of the input and vice versa. This is done as a performance optimization. The shader
* will process the image horizontally and write it to the intermediate output transposed. Then
* the vertical pass will execute the same horizontal pass shader, but since its input is
* transposed, it will effectively do a vertical pass and write to the output transposed,
* effectively undoing the transposition in the horizontal pass. This is done to improve
* spatial cache locality in the shader and to avoid having two separate shaders for each of
* the passes. */
const Domain domain = input.domain();
const int2 transposed_domain = int2(domain.size.y, domain.size.x);
Result output = Result::Temporary(ResultType::Float, context.texture_pool());
output.allocate_texture(transposed_domain);
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, domain.size);
GPU_shader_unbind();
input.unbind_as_texture();
weights.unbind_weights_as_texture();
weights.unbind_distance_falloffs_as_texture();
output.unbind_as_image();
return output;
}
static void vertical_pass(Context &context,
Result &original_input,
Result &horizontal_pass_result,
Result &output,
int distance,
int falloff_type)
{
GPUShader *shader = context.shader_manager().get(get_shader_name(distance));
GPU_shader_bind(shader);
horizontal_pass_result.bind_as_texture(shader, "input_tx");
const MorphologicalDistanceFeatherWeights &weights =
context.cache_manager().morphological_distance_feather_weights.get(falloff_type,
math::abs(distance));
weights.bind_weights_as_texture(shader, "weights_tx");
weights.bind_distance_falloffs_as_texture(shader, "falloffs_tx");
const Domain domain = original_input.domain();
output.allocate_texture(domain);
output.bind_as_image(shader, "output_img");
/* Notice that the domain is transposed, see the note on the horizontal pass function for more
* information on the reasoning behind this. */
compute_dispatch_threads_at_least(shader, int2(domain.size.y, domain.size.x));
GPU_shader_unbind();
horizontal_pass_result.unbind_as_texture();
weights.unbind_weights_as_texture();
weights.unbind_distance_falloffs_as_texture();
output.unbind_as_image();
}
void morphological_distance_feather(
Context &context, Result &input, Result &output, int distance, int falloff_type)
{
Result horizontal_pass_result = horizontal_pass(context, input, distance, falloff_type);
vertical_pass(context, input, horizontal_pass_result, output, distance, falloff_type);
horizontal_pass_result.release();
}
} // namespace blender::realtime_compositor

View File

@ -10,6 +10,7 @@
#include "GPU_texture.h"
#include "COM_context.hh"
#include "COM_result.hh"
#include "COM_utilities.hh"
#include "COM_algorithm_symmetric_separable_blur.hh"
@ -18,6 +19,15 @@
namespace blender::realtime_compositor {
static const char *get_blur_shader(ResultType type)
{
if (type == ResultType::Float) {
return "compositor_symmetric_separable_blur_float";
}
return "compositor_symmetric_separable_blur_color";
}
static Result horizontal_pass(Context &context,
Result &input,
float radius,
@ -25,7 +35,7 @@ static Result horizontal_pass(Context &context,
bool extend_bounds,
bool gamma_correct)
{
GPUShader *shader = context.shader_manager().get("compositor_symmetric_separable_blur");
GPUShader *shader = context.shader_manager().get(get_blur_shader(input.type()));
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "extend_bounds", extend_bounds);
@ -53,7 +63,7 @@ static Result horizontal_pass(Context &context,
* pass. */
const int2 transposed_domain = int2(domain.size.y, domain.size.x);
Result output = Result::Temporary(ResultType::Color, context.texture_pool());
Result output = Result::Temporary(input.type(), context.texture_pool());
output.allocate_texture(transposed_domain);
output.bind_as_image(shader, "output_img");
@ -76,7 +86,7 @@ static void vertical_pass(Context &context,
bool extend_bounds,
bool gamma_correct)
{
GPUShader *shader = context.shader_manager().get("compositor_symmetric_separable_blur");
GPUShader *shader = context.shader_manager().get(get_blur_shader(original_input.type()));
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "extend_bounds", extend_bounds);

View File

@ -2,6 +2,7 @@
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_assert.h"
#include "BLI_math_matrix_types.hh"
#include "BLI_math_vector_types.hh"
@ -133,6 +134,33 @@ void Result::pass_through(Result &target)
target.master_ = this;
}
void Result::steal_data(Result &source)
{
BLI_assert(type_ == source.type_);
BLI_assert(!is_allocated() && source.is_allocated());
BLI_assert(master_ == nullptr && source.master_ == nullptr);
is_single_value_ = source.is_single_value_;
texture_ = source.texture_;
texture_pool_ = source.texture_pool_;
domain_ = source.domain_;
switch (type_) {
case ResultType::Float:
float_value_ = source.float_value_;
break;
case ResultType::Vector:
vector_value_ = source.vector_value_;
break;
case ResultType::Color:
color_value_ = source.color_value_;
break;
}
source.texture_ = nullptr;
source.texture_pool_ = nullptr;
}
void Result::transform(const float3x3 &transformation)
{
domain_.transform(transformation);
@ -235,6 +263,7 @@ void Result::release()
reference_count_--;
if (reference_count_ == 0) {
texture_pool_->release(texture_);
texture_ = nullptr;
}
}

View File

@ -0,0 +1,21 @@
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 key = texture_load(key_tx, texel);
vec4 color = texture_load(input_tx, texel);
float matte = texture_load(matte_tx, texel).x;
/* Alpha multiply the matte to the image. */
color *= matte;
/* Color despill. */
ivec3 key_argmax = argmax(key.rgb);
float weighted_average = mix(color[key_argmax.y], color[key_argmax.z], despill_balance);
color[key_argmax.x] -= (color[key_argmax.x] - weighted_average) * despill_factor;
imageStore(output_img, texel, color);
}

View File

@ -0,0 +1,32 @@
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
float compute_saturation(vec4 color, ivec3 argmax)
{
float weighted_average = mix(color[argmax.y], color[argmax.z], key_balance);
return (color[argmax.x] - weighted_average) * abs(1.0 - weighted_average);
}
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 input_color = texture_load(input_tx, texel);
/* We assume that the keying screen will not be overexposed in the image, so if the input
* brightness is high, we assume the pixel is opaque. */
if (min_v3(input_color) > 1.0f) {
imageStore(output_img, texel, vec4(1.0));
return;
}
vec4 key_color = texture_load(key_tx, texel);
ivec3 key_argmax = argmax(key_color.rgb);
float input_saturation = compute_saturation(input_color, key_argmax);
float key_saturation = compute_saturation(key_color, key_argmax);
float matte = 1.0f - clamp(input_saturation / key_saturation, 0.0, 1.0);
imageStore(output_img, texel, vec4(matte));
}

View File

@ -0,0 +1,12 @@
#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 color_ycca;
rgba_to_ycca_itu_709(texture_load(input_tx, texel), color_ycca);
imageStore(output_img, texel, color_ycca);
}

View File

@ -0,0 +1,17 @@
#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 color_ycca;
rgba_to_ycca_itu_709(texture_load(input_tx, texel), color_ycca);
color_ycca.yz = texture_load(new_chroma_tx, texel).yz;
vec4 color_rgba;
ycca_to_rgba_itu_709(color_ycca, color_rgba);
imageStore(output_img, texel, color_rgba);
}

View File

@ -0,0 +1,54 @@
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
float matte = texture_load(input_matte_tx, texel).x;
/* Search the neighbourhood around the current matte value and identify if it lies along the
* edges of the matte. This is needs to be computed only when we need to compute the edges output
* or tweak the levels of the matte. */
bool is_edge = false;
if (compute_edges || black_level != 0.0 || white_level != 1.0) {
/* Count the number of neighbours whose matte is sufficiently similar to the current matte,
* as controlled by the edge_tolerance factor. */
int count = 0;
for (int j = -edge_search_radius; j <= edge_search_radius; j++) {
for (int i = -edge_search_radius; i <= edge_search_radius; i++) {
float neighbour_matte = texture_load(input_matte_tx, texel + ivec2(i, j)).x;
count += int(distance(matte, neighbour_matte) < edge_tolerance);
}
}
/* If the number of neighbours that are sufficiently similar to the center matte is less that
* 90% of the total number of neighbours, then that means the variance is high in that areas
* and it is considered an edge. */
is_edge = count < ((edge_search_radius * 2 + 1) * (edge_search_radius * 2 + 1)) * 0.9;
}
float tweaked_matte = matte;
/* Remap the matte using the black and white levels, but only for areas that are not on the edge
* of the matte to preserve details. Also check for equality between levels to avoid zero
* division. */
if (!is_edge && white_level != black_level) {
tweaked_matte = clamp((matte - black_level) / (white_level - black_level), 0.0, 1.0);
}
/* Exclude unwanted areas using the provided garbage matte, 1 means unwanted, so invert the
* garbage matte and take the minimum. */
if (apply_garbage_matte) {
float garbage_matte = texture_load(garbage_matte_tx, texel).x;
tweaked_matte = min(tweaked_matte, 1.0 - garbage_matte);
}
/* Include wanted areas that were incorrectly keyed using the provided core matte. */
if (apply_core_matte) {
float core_matte = texture_load(core_matte_tx, texel).x;
tweaked_matte = max(tweaked_matte, core_matte);
}
imageStore(output_matte_img, texel, vec4(tweaked_matte));
imageStore(output_edges_img, texel, vec4(is_edge ? 1.0 : 0.0));
}

View File

@ -0,0 +1,57 @@
/* SPDX-FileCopyrightText: 2023 Blender Foundation
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(compositor_keying_extract_chroma)
.local_group_size(16, 16)
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_keying_extract_chroma.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_keying_replace_chroma)
.local_group_size(16, 16)
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_2D, "new_chroma_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_keying_replace_chroma.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_keying_compute_matte)
.local_group_size(16, 16)
.push_constant(Type::FLOAT, "key_balance")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_2D, "key_tx")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_keying_compute_matte.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_keying_tweak_matte)
.local_group_size(16, 16)
.push_constant(Type::BOOL, "compute_edges")
.push_constant(Type::BOOL, "apply_core_matte")
.push_constant(Type::BOOL, "apply_garbage_matte")
.push_constant(Type::INT, "edge_search_radius")
.push_constant(Type::FLOAT, "edge_tolerance")
.push_constant(Type::FLOAT, "black_level")
.push_constant(Type::FLOAT, "white_level")
.sampler(0, ImageType::FLOAT_2D, "input_matte_tx")
.sampler(1, ImageType::FLOAT_2D, "garbage_matte_tx")
.sampler(2, ImageType::FLOAT_2D, "core_matte_tx")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_matte_img")
.image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_edges_img")
.compute_source("compositor_keying_tweak_matte.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_keying_compute_image)
.local_group_size(16, 16)
.push_constant(Type::FLOAT, "despill_factor")
.push_constant(Type::FLOAT, "despill_balance")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_2D, "matte_tx")
.sampler(2, ImageType::FLOAT_2D, "key_tx")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_keying_compute_image.glsl")
.do_static_compilation(true);

View File

@ -4,13 +4,21 @@
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur)
GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur_shared)
.local_group_size(16, 16)
.push_constant(Type::BOOL, "extend_bounds")
.push_constant(Type::BOOL, "gamma_correct_input")
.push_constant(Type::BOOL, "gamma_uncorrect_output")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_1D, "weights_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_symmetric_separable_blur.glsl")
.compute_source("compositor_symmetric_separable_blur.glsl");
GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur_float)
.additional_info("compositor_symmetric_separable_blur_shared")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur_color)
.additional_info("compositor_symmetric_separable_blur_shared")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.do_static_compilation(true);

View File

@ -54,6 +54,20 @@ mat2 rot2_from_angle(float a)
return mat2(c, -s, s, c);
}
/* Computes the full argmax of the given vector, that is, the index of the greatest component will
OmarEmaraDev marked this conversation as resolved Outdated

Add documentation. At least add what implementation if follows (ie: python nympy).

Add documentation. At least add what implementation if follows (ie: python nympy).
* be in the returned x component, the index of the smallest component will be in the returned z
* component, and the index of the middle component will be in the returned y component.
*
* This is computed by utilizing the fact that booleans are converted to the integers 0 and 1 for
* false and true respectively. So if we compare every component to all other components using the
* greaterThan comparator, we get 0 for the greatest component, because no other component is
* greater, 1 for the middle component, and 2 for the smallest component. */
ivec3 argmax(vec3 v)
{
return ivec3(greaterThan(v, v.xxx)) + ivec3(greaterThan(v, v.yyy)) +
ivec3(greaterThan(v, v.zzz));
}
#define min3(a, b, c) min(a, min(b, c))
#define min4(a, b, c, d) min(a, min3(b, c, d))
#define min5(a, b, c, d, e) min(a, min4(b, c, d, e))

View File

@ -19,7 +19,8 @@
#include "GPU_state.h"
#include "GPU_texture.h"
#include "COM_morphological_distance_feather_weights.hh"
#include "COM_algorithm_morphological_distance.hh"
#include "COM_algorithm_morphological_distance_feather.hh"
#include "COM_node_operation.hh"
#include "COM_utilities.hh"
@ -175,33 +176,7 @@ class DilateErodeOperation : public NodeOperation {
void execute_distance()
{
GPUShader *shader = shader_manager().get(get_morphological_distance_shader_name());
GPU_shader_bind(shader);
/* Pass the absolute value of the distance. We have specialized shaders for each sign. */
GPU_shader_uniform_1i(shader, "radius", math::abs(get_distance()));
const Result &input_mask = get_input("Mask");
input_mask.bind_as_texture(shader, "input_tx");
const Domain domain = compute_domain();
Result &output_mask = get_result("Mask");
output_mask.allocate_texture(domain);
output_mask.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, domain.size);
GPU_shader_unbind();
output_mask.unbind_as_image();
input_mask.unbind_as_texture();
}
const char *get_morphological_distance_shader_name()
{
if (get_distance() > 0) {
return "compositor_morphological_distance_dilate";
}
return "compositor_morphological_distance_erode";
morphological_distance(context(), get_input("Mask"), get_result("Mask"), get_distance());
}
/* ------------------------------------------
@ -244,87 +219,11 @@ class DilateErodeOperation : public NodeOperation {
void execute_distance_feather()
{
GPUTexture *horizontal_pass_result = execute_distance_feather_horizontal_pass();
execute_distance_feather_vertical_pass(horizontal_pass_result);
}
GPUTexture *execute_distance_feather_horizontal_pass()
{
GPUShader *shader = shader_manager().get(get_morphological_distance_feather_shader_name());
GPU_shader_bind(shader);
const Result &input_image = get_input("Mask");
input_image.bind_as_texture(shader, "input_tx");
const MorphologicalDistanceFeatherWeights &weights =
context().cache_manager().morphological_distance_feather_weights.get(
node_storage(bnode()).falloff, math::abs(get_distance()));
weights.bind_weights_as_texture(shader, "weights_tx");
weights.bind_distance_falloffs_as_texture(shader, "falloffs_tx");
/* We allocate an output image of a transposed size, that is, with a height equivalent to the
* width of the input and vice versa. This is done as a performance optimization. The shader
* will process the image horizontally and write it to the intermediate output transposed. Then
* the vertical pass will execute the same horizontal pass shader, but since its input is
* transposed, it will effectively do a vertical pass and write to the output transposed,
* effectively undoing the transposition in the horizontal pass. This is done to improve
* spatial cache locality in the shader and to avoid having two separate shaders for each of
* the passes. */
const Domain domain = compute_domain();
const int2 transposed_domain = int2(domain.size.y, domain.size.x);
GPUTexture *horizontal_pass_result = texture_pool().acquire_color(transposed_domain);
const int image_unit = GPU_shader_get_sampler_binding(shader, "output_img");
GPU_texture_image_bind(horizontal_pass_result, image_unit);
compute_dispatch_threads_at_least(shader, domain.size);
GPU_shader_unbind();
input_image.unbind_as_texture();
weights.unbind_weights_as_texture();
weights.unbind_distance_falloffs_as_texture();
GPU_texture_image_unbind(horizontal_pass_result);
return horizontal_pass_result;
}
void execute_distance_feather_vertical_pass(GPUTexture *horizontal_pass_result)
{
GPUShader *shader = shader_manager().get(get_morphological_distance_feather_shader_name());
GPU_shader_bind(shader);
GPU_memory_barrier(GPU_BARRIER_TEXTURE_FETCH);
const int texture_image_unit = GPU_shader_get_sampler_binding(shader, "input_tx");
GPU_texture_bind(horizontal_pass_result, texture_image_unit);
const MorphologicalDistanceFeatherWeights &weights =
context().cache_manager().morphological_distance_feather_weights.get(
node_storage(bnode()).falloff, math::abs(get_distance()));
weights.bind_weights_as_texture(shader, "weights_tx");
weights.bind_distance_falloffs_as_texture(shader, "falloffs_tx");
const Domain domain = compute_domain();
Result &output_image = get_result("Mask");
output_image.allocate_texture(domain);
output_image.bind_as_image(shader, "output_img");
/* Notice that the domain is transposed, see the note on the horizontal pass method for more
* information on the reasoning behind this. */
compute_dispatch_threads_at_least(shader, int2(domain.size.y, domain.size.x));
GPU_shader_unbind();
output_image.unbind_as_image();
weights.unbind_weights_as_texture();
weights.unbind_distance_falloffs_as_texture();
GPU_texture_unbind(horizontal_pass_result);
}
const char *get_morphological_distance_feather_shader_name()
{
if (get_distance() > 0) {
return "compositor_morphological_distance_feather_dilate";
}
return "compositor_morphological_distance_feather_erode";
morphological_distance_feather(context(),
get_input("Mask"),
get_result("Mask"),
get_distance(),
node_storage(bnode()).falloff);
}
/* ---------------

View File

@ -7,13 +7,22 @@
*/
#include "BLI_math_base.h"
#include "BLI_math_vector_types.hh"
#include "DNA_movieclip_types.h"
#include "DNA_scene_types.h"
#include "UI_interface.h"
#include "UI_resources.h"
#include "GPU_shader.h"
#include "GPU_texture.h"
#include "COM_algorithm_morphological_distance.hh"
#include "COM_algorithm_morphological_distance_feather.hh"
#include "COM_algorithm_symmetric_separable_blur.hh"
#include "COM_node_operation.hh"
#include "COM_utilities.hh"
#include "node_composite_util.hh"
@ -21,12 +30,18 @@
namespace blender::nodes::node_composite_keying_cc {
NODE_STORAGE_FUNCS(NodeKeyingData)
static void cmp_node_keying_declare(NodeDeclarationBuilder &b)
{
b.add_input<decl::Color>("Image").default_value({0.8f, 0.8f, 0.8f, 1.0f});
b.add_input<decl::Color>("Key Color").default_value({1.0f, 1.0f, 1.0f, 1.0f});
b.add_input<decl::Float>("Garbage Matte").hide_value();
b.add_input<decl::Float>("Core Matte").hide_value();
b.add_input<decl::Color>("Image")
.default_value({0.8f, 0.8f, 0.8f, 1.0f})
.compositor_domain_priority(0);
b.add_input<decl::Color>("Key Color")
.default_value({1.0f, 1.0f, 1.0f, 1.0f})
.compositor_domain_priority(1);
b.add_input<decl::Float>("Garbage Matte").hide_value().compositor_domain_priority(2);
b.add_input<decl::Float>("Core Matte").hide_value().compositor_domain_priority(3);
b.add_output<decl::Color>("Image");
b.add_output<decl::Float>("Matte");
b.add_output<decl::Float>("Edges");
@ -72,10 +87,277 @@ class KeyingOperation : public NodeOperation {
void execute() override
{
get_input("Image").pass_through(get_result("Image"));
get_result("Matte").allocate_invalid();
get_result("Edges").allocate_invalid();
context().set_info_message("Viewport compositor setup not fully supported");
Result blurred_input = compute_blurred_input();
Result matte = compute_matte(blurred_input);
blurred_input.release();
/* This also computes the edges output if needed. */
Result tweaked_matte = compute_tweaked_matte(matte);
matte.release();
Result &output_image = get_result("Image");
Result &output_matte = get_result("Matte");
if (output_image.should_compute() || output_matte.should_compute()) {
Result blurred_matte = compute_blurred_matte(tweaked_matte);
tweaked_matte.release();
Result morphed_matte = compute_morphed_matte(blurred_matte);
blurred_matte.release();
Result feathered_matte = compute_feathered_matte(morphed_matte);
morphed_matte.release();
if (output_image.should_compute()) {
compute_image(feathered_matte);
}
if (output_matte.should_compute()) {
output_matte.steal_data(feathered_matte);
}
else {
feathered_matte.release();
}
}
}
Result compute_blurred_input()
{
/* No blur needed, return the original matte. We also increment the reference count of the
* input because the caller will release it after the call, and we want to extend its life
* since it is now returned as the output. */
const float blur_size = node_storage(bnode()).blur_pre;
if (blur_size == 0.0f) {
Result output = get_input("Image");
output.increment_reference_count();
return output;
}
Result chroma = extract_input_chroma();
Result blurred_chroma = Result::Temporary(ResultType::Color, context().texture_pool());
symmetric_separable_blur(context(), chroma, blurred_chroma, float2(blur_size), R_FILTER_BOX);
chroma.release();
Result blurred_input = replace_input_chroma(blurred_chroma);
blurred_chroma.release();
return blurred_input;
}
Result extract_input_chroma()
{
GPUShader *shader = context().shader_manager().get("compositor_keying_extract_chroma");
GPU_shader_bind(shader);
Result &input = get_input("Image");
input.bind_as_texture(shader, "input_tx");
Result output = Result::Temporary(ResultType::Color, context().texture_pool());
output.allocate_texture(input.domain());
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, input.domain().size);
GPU_shader_unbind();
input.unbind_as_texture();
output.unbind_as_image();
return output;
}
Result replace_input_chroma(Result &new_chroma)
{
GPUShader *shader = context().shader_manager().get("compositor_keying_replace_chroma");
GPU_shader_bind(shader);
Result &input = get_input("Image");
input.bind_as_texture(shader, "input_tx");
new_chroma.bind_as_texture(shader, "new_chroma_tx");
Result output = Result::Temporary(ResultType::Color, context().texture_pool());
output.allocate_texture(input.domain());
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, input.domain().size);
GPU_shader_unbind();
input.unbind_as_texture();
new_chroma.unbind_as_texture();
output.unbind_as_image();
return output;
}
Result compute_matte(Result &input)
{
GPUShader *shader = context().shader_manager().get("compositor_keying_compute_matte");
GPU_shader_bind(shader);
GPU_shader_uniform_1f(shader, "key_balance", node_storage(bnode()).screen_balance);
input.bind_as_texture(shader, "input_tx");
Result &key_color = get_input("Key Color");
key_color.bind_as_texture(shader, "key_tx");
Result output = Result::Temporary(ResultType::Float, context().texture_pool());
output.allocate_texture(input.domain());
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, input.domain().size);
GPU_shader_unbind();
input.unbind_as_texture();
key_color.unbind_as_texture();
output.unbind_as_image();
return output;
}
Result compute_tweaked_matte(Result &input_matte)
{
Result &output_edges = get_result("Edges");
const float black_level = node_storage(bnode()).clip_black;
const float white_level = node_storage(bnode()).clip_white;
const bool core_matte_exists = node().input_by_identifier("Core Matte")->is_logically_linked();
const bool garbage_matte_exists =
node().input_by_identifier("Garbage Matte")->is_logically_linked();
/* The edges output is not needed and the matte is not tweaked, so return the original matte.
* We also increment the reference count of the input because the caller will release it after
* the call, and we want to extend its life since it is now returned as the output. */
if (!output_edges.should_compute() && (black_level == 0.0f && white_level == 1.0f) &&
!core_matte_exists && !garbage_matte_exists)
{
Result output_matte = input_matte;
input_matte.increment_reference_count();
return output_matte;
}
GPUShader *shader = context().shader_manager().get("compositor_keying_tweak_matte");
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "compute_edges", output_edges.should_compute());
GPU_shader_uniform_1b(shader, "apply_core_matte", core_matte_exists);
GPU_shader_uniform_1b(shader, "apply_garbage_matte", garbage_matte_exists);
GPU_shader_uniform_1i(shader, "edge_search_radius", node_storage(bnode()).edge_kernel_radius);
GPU_shader_uniform_1f(shader, "edge_tolerance", node_storage(bnode()).edge_kernel_tolerance);
GPU_shader_uniform_1f(shader, "black_level", black_level);
GPU_shader_uniform_1f(shader, "white_level", white_level);
input_matte.bind_as_texture(shader, "input_matte_tx");
Result &garbage_matte = get_input("Garbage Matte");
garbage_matte.bind_as_texture(shader, "garbage_matte_tx");
Result &core_matte = get_input("Core Matte");
core_matte.bind_as_texture(shader, "core_matte_tx");
Result output_matte = Result::Temporary(ResultType::Float, context().texture_pool());
output_matte.allocate_texture(input_matte.domain());
output_matte.bind_as_image(shader, "output_matte_img");
output_edges.allocate_texture(input_matte.domain());
output_edges.bind_as_image(shader, "output_edges_img");
compute_dispatch_threads_at_least(shader, input_matte.domain().size);
GPU_shader_unbind();
input_matte.unbind_as_texture();
garbage_matte.unbind_as_texture();
core_matte.unbind_as_texture();
output_matte.unbind_as_image();
output_edges.unbind_as_image();
return output_matte;
}
Result compute_blurred_matte(Result &input_matte)
{
const float blur_size = node_storage(bnode()).blur_post;
/* No blur needed, return the original matte. We also increment the reference count of the
* input because the caller will release it after the call, and we want to extend its life
* since it is now returned as the output. */
if (blur_size == 0.0f) {
Result output_matte = input_matte;
input_matte.increment_reference_count();
return output_matte;
}
Result blurred_matte = Result::Temporary(ResultType::Float, context().texture_pool());
symmetric_separable_blur(context(), input_matte, blurred_matte, float2(blur_size));
return blurred_matte;
}
Result compute_morphed_matte(Result &input_matte)
{
const int distance = node_storage(bnode()).dilate_distance;
/* No morphology needed, return the original matte. We also increment the reference count of
* the input because the caller will release it after the call, and we want to extend its life
* since it is now returned as the output. */
if (distance == 0) {
Result output_matte = input_matte;
input_matte.increment_reference_count();
return output_matte;
}
Result morphed_matte = Result::Temporary(ResultType::Float, context().texture_pool());
morphological_distance(context(), input_matte, morphed_matte, distance);
return morphed_matte;
}
Result compute_feathered_matte(Result &input_matte)
{
const int distance = node_storage(bnode()).feather_distance;
/* No feathering needed, return the original matte. We also increment the reference count of
* the input because the caller will release it after the call, and we want to extend its life
* since it is now returned as the output. */
if (distance == 0) {
Result output_matte = input_matte;
input_matte.increment_reference_count();
return output_matte;
}
Result feathered_matte = Result::Temporary(ResultType::Float, context().texture_pool());
morphological_distance_feather(
context(), input_matte, feathered_matte, distance, node_storage(bnode()).feather_falloff);
return feathered_matte;
}
void compute_image(Result &matte)
{
GPUShader *shader = context().shader_manager().get("compositor_keying_compute_image");
GPU_shader_bind(shader);
GPU_shader_uniform_1f(shader, "despill_factor", node_storage(bnode()).despill_factor);
GPU_shader_uniform_1f(shader, "despill_balance", node_storage(bnode()).despill_balance);
Result &input = get_input("Image");
input.bind_as_texture(shader, "input_tx");
Result &key = get_input("Key Color");
key.bind_as_texture(shader, "key_tx");
matte.bind_as_texture(shader, "matte_tx");
Result &output = get_result("Image");
output.allocate_texture(matte.domain());
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, input.domain().size);
GPU_shader_unbind();
input.unbind_as_texture();
key.unbind_as_texture();
matte.unbind_as_texture();
output.unbind_as_image();
}
};
@ -99,8 +381,6 @@ void register_node_type_cmp_keying()
node_type_storage(
&ntype, "NodeKeyingData", node_free_standard_storage, node_copy_standard_storage);
ntype.get_compositor_operation = file_ns::get_compositor_operation;
ntype.realtime_compositor_unsupported_message = N_(
"Node not supported in the Viewport compositor");
nodeRegisterType(&ntype);
}