diff --git a/release/scripts/startup/bl_ui/properties_data_camera.py b/release/scripts/startup/bl_ui/properties_data_camera.py index 0f839eac126..963ffc60806 100644 --- a/release/scripts/startup/bl_ui/properties_data_camera.py +++ b/release/scripts/startup/bl_ui/properties_data_camera.py @@ -201,7 +201,7 @@ class DATA_PT_camera(CameraButtonsPanel, Panel): class DATA_PT_camera_dof(CameraButtonsPanel, Panel): bl_label = "Depth of Field" bl_options = {'DEFAULT_CLOSED'} - COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_WORKBENCH'} + COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_EEVEE_NEXT', 'BLENDER_WORKBENCH'} def draw_header(self, context): cam = context.camera @@ -228,7 +228,7 @@ class DATA_PT_camera_dof(CameraButtonsPanel, Panel): class DATA_PT_camera_dof_aperture(CameraButtonsPanel, Panel): bl_label = "Aperture" bl_parent_id = "DATA_PT_camera_dof" - COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_WORKBENCH'} + COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_EEVEE_NEXT', 'BLENDER_WORKBENCH'} def draw(self, context): layout = self.layout diff --git a/release/scripts/startup/bl_ui/properties_render.py b/release/scripts/startup/bl_ui/properties_render.py index 630364f01f4..8a637b00362 100644 --- a/release/scripts/startup/bl_ui/properties_render.py +++ b/release/scripts/startup/bl_ui/properties_render.py @@ -223,7 +223,7 @@ class RENDER_PT_motion_blur_curve(RenderButtonsPanel, Panel): class RENDER_PT_eevee_depth_of_field(RenderButtonsPanel, Panel): bl_label = "Depth of Field" bl_options = {'DEFAULT_CLOSED'} - COMPAT_ENGINES = {'BLENDER_EEVEE'} + COMPAT_ENGINES = {'BLENDER_EEVEE', 'BLENDER_EEVEE_NEXT'} @classmethod def poll(cls, context): diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt index cf673e9d4a7..e907c17e9d1 100644 --- a/source/blender/draw/CMakeLists.txt +++ b/source/blender/draw/CMakeLists.txt @@ -134,6 +134,7 @@ set(SRC engines/eevee/eevee_temporal_sampling.c engines/eevee/eevee_volumes.c engines/eevee_next/eevee_camera.cc + engines/eevee_next/eevee_depth_of_field.cc engines/eevee_next/eevee_engine.cc engines/eevee_next/eevee_film.cc engines/eevee_next/eevee_instance.cc @@ -362,6 +363,21 @@ set(GLSL_SRC engines/eevee_next/shaders/eevee_attributes_lib.glsl engines/eevee_next/shaders/eevee_camera_lib.glsl + engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl + engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl + engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl + engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl + engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl + engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl engines/eevee_next/shaders/eevee_film_comp.glsl engines/eevee_next/shaders/eevee_film_frag.glsl engines/eevee_next/shaders/eevee_film_lib.glsl diff --git a/source/blender/draw/engines/eevee_next/eevee_camera.hh b/source/blender/draw/engines/eevee_next/eevee_camera.hh index 8bf64199246..49f9b14e11b 100644 --- a/source/blender/draw/engines/eevee_next/eevee_camera.hh +++ b/source/blender/draw/engines/eevee_next/eevee_camera.hh @@ -82,7 +82,6 @@ class Camera { private: Instance &inst_; - /** Double buffered to detect changes and have history for re-projection. */ CameraDataBuf data_; public: @@ -112,6 +111,10 @@ class Camera { { return data_.type == CAMERA_ORTHO; } + bool is_perspective() const + { + return data_.type == CAMERA_PERSP; + } const float3 &position() const { return *reinterpret_cast(data_.viewinv[3]); diff --git a/source/blender/draw/engines/eevee_next/eevee_defines.hh b/source/blender/draw/engines/eevee_next/eevee_defines.hh index cb02689f34a..8240af14203 100644 --- a/source/blender/draw/engines/eevee_next/eevee_defines.hh +++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh @@ -44,8 +44,22 @@ /* Minimum visibility size. */ #define LIGHTPROBE_FILTER_VIS_GROUP_SIZE 16 +/* Film. */ #define FILM_GROUP_SIZE 16 +/* Motion Blur. */ #define MOTION_BLUR_GROUP_SIZE 32 - #define MOTION_BLUR_DILATE_GROUP_SIZE 512 + +/* Depth Of Field. */ +#define DOF_TILES_SIZE 8 +#define DOF_TILES_FLATTEN_GROUP_SIZE DOF_TILES_SIZE +#define DOF_TILES_DILATE_GROUP_SIZE 8 +#define DOF_BOKEH_LUT_SIZE 32 +#define DOF_MAX_SLIGHT_FOCUS_RADIUS 5 +#define DOF_REDUCE_GROUP_SIZE 8 +#define DOF_DEFAULT_GROUP_SIZE 32 +#define DOF_FILTER_GROUP_SIZE 8 +#define DOF_GATHER_GROUP_SIZE DOF_TILES_SIZE +#define DOF_RESOLVE_GROUP_SIZE (DOF_TILES_SIZE * 2) +#define DOF_MIP_MAX 4 diff --git a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc new file mode 100644 index 00000000000..26129192d9e --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc @@ -0,0 +1,720 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * Depth of field post process effect. + * + * There are 2 methods to achieve this effect. + * - The first uses projection matrix offsetting and sample accumulation to give + * reference quality depth of field. But this needs many samples to hide the + * under-sampling. + * - The second one is a post-processing based one. It follows the + * implementation described in the presentation + * "Life of a Bokeh - Siggraph 2018" from Guillaume Abadie. + * There are some difference with our actual implementation that prioritize quality. + */ + +#include "DRW_render.h" + +#include "BKE_camera.h" +#include "DNA_camera_types.h" + +#include "GPU_platform.h" +#include "GPU_texture.h" +#include "GPU_uniform_buffer.h" + +#include "eevee_camera.hh" +#include "eevee_instance.hh" +#include "eevee_sampling.hh" +#include "eevee_shader.hh" +#include "eevee_shader_shared.hh" + +#include "eevee_depth_of_field.hh" + +namespace blender::eevee { + +/* -------------------------------------------------------------------- */ +/** \name Depth of field + * \{ */ + +void DepthOfField::init() +{ + const SceneEEVEE &sce_eevee = inst_.scene->eevee; + const Object *camera_object_eval = inst_.camera_eval_object; + const ::Camera *camera = (camera_object_eval) ? + reinterpret_cast(camera_object_eval->data) : + nullptr; + if (camera == nullptr) { + /* Set to invalid value for update detection */ + data_.scatter_color_threshold = -1.0f; + return; + } + /* Reminder: These are parameters not interpolated by motion blur. */ + int update = 0; + int sce_flag = sce_eevee.flag; + update += assign_if_different(do_hq_slight_focus_, + (sce_flag & SCE_EEVEE_DOF_HQ_SLIGHT_FOCUS) != 0); + update += assign_if_different(do_jitter_, (sce_flag & SCE_EEVEE_DOF_JITTER) != 0); + update += assign_if_different(user_overblur_, sce_eevee.bokeh_overblur / 100.0f); + update += assign_if_different(fx_max_coc_, sce_eevee.bokeh_max_size); + update += assign_if_different(data_.scatter_color_threshold, sce_eevee.bokeh_threshold); + update += assign_if_different(data_.scatter_neighbor_max_color, sce_eevee.bokeh_neighbor_max); + update += assign_if_different(data_.denoise_factor, sce_eevee.bokeh_denoise_fac); + update += assign_if_different(data_.bokeh_blades, float(camera->dof.aperture_blades)); + if (update > 0) { + inst_.sampling.reset(); + } +} + +void DepthOfField::sync() +{ + const Camera &camera = inst_.camera; + const Object *camera_object_eval = inst_.camera_eval_object; + const ::Camera *camera_data = (camera_object_eval) ? + reinterpret_cast(camera_object_eval->data) : + nullptr; + + int update = 0; + + if (camera_data == nullptr || (camera_data->dof.flag & CAM_DOF_ENABLED) == 0) { + update += assign_if_different(jitter_radius_, 0.0f); + update += assign_if_different(fx_radius_, 0.0f); + if (update > 0) { + inst_.sampling.reset(); + } + return; + } + + float2 anisotropic_scale = {clamp_f(1.0f / camera_data->dof.aperture_ratio, 1e-5f, 1.0f), + clamp_f(camera_data->dof.aperture_ratio, 1e-5f, 1.0f)}; + update += assign_if_different(data_.bokeh_anisotropic_scale, anisotropic_scale); + update += assign_if_different(data_.bokeh_rotation, camera_data->dof.aperture_rotation); + update += assign_if_different(focus_distance_, + BKE_camera_object_dof_distance(camera_object_eval)); + data_.bokeh_anisotropic_scale_inv = 1.0f / data_.bokeh_anisotropic_scale; + + float fstop = max_ff(camera_data->dof.aperture_fstop, 1e-5f); + + if (update) { + inst_.sampling.reset(); + } + + float aperture = 1.0f / (2.0f * fstop); + if (camera.is_perspective()) { + aperture *= camera_data->lens * 1e-3f; + } + + if (camera.is_orthographic()) { + /* FIXME: Why is this needed? Some kind of implicit unit conversion? */ + aperture *= 0.04f; + /* Really strange behavior from Cycles but replicating. */ + focus_distance_ += camera.data_get().clip_near; + } + + if (camera.is_panoramic()) { + /* FIXME: Eyeballed. */ + aperture *= 0.185f; + } + + if (camera_data->dof.aperture_ratio < 1.0) { + /* If ratio is scaling the bokeh outwards, we scale the aperture so that + * the gather kernel size will encompass the maximum axis. */ + aperture /= max_ff(camera_data->dof.aperture_ratio, 1e-5f); + } + + float jitter_radius, fx_radius; + + /* Balance blur radius between fx dof and jitter dof. */ + if (do_jitter_ && (inst_.sampling.dof_ring_count_get() > 0) && !camera.is_panoramic() && + !inst_.is_viewport()) { + /* Compute a minimal overblur radius to fill the gaps between the samples. + * This is just the simplified form of dividing the area of the bokeh by + * the number of samples. */ + float minimal_overblur = 1.0f / sqrtf(inst_.sampling.dof_sample_count_get()); + + fx_radius = (minimal_overblur + user_overblur_) * aperture; + /* Avoid dilating the shape. Over-blur only soften. */ + jitter_radius = max_ff(0.0f, aperture - fx_radius); + } + else { + jitter_radius = 0.0f; + fx_radius = aperture; + } + + /* Disable post fx if result wouldn't be noticeable. */ + if (fx_max_coc_ < 0.5f) { + fx_radius = 0.0f; + } + + update += assign_if_different(jitter_radius_, jitter_radius); + update += assign_if_different(fx_radius_, fx_radius); + if (update > 0) { + inst_.sampling.reset(); + } + + if (fx_radius_ == 0.0f) { + return; + } + + /* TODO(fclem): Once we render into multiple view, we will need to use the maximum resolution. */ + int2 max_render_res = inst_.film.render_extent_get(); + int2 half_res = math::divide_ceil(max_render_res, int2(2)); + int2 reduce_size = math::ceil_to_multiple(half_res, int2(1 < (DOF_MIP_MAX - 1))); + + data_.gather_uv_fac = 1.0f / float2(reduce_size); + + /* Now that we know the maximum render resolution of every view, using depth of field, allocate + * the reduced buffers. Color needs to be signed format here. See note in shader for + * explanation. Do not use texture pool because of needs mipmaps. */ + reduced_color_tx_.ensure_2d(GPU_RGBA16F, reduce_size, nullptr, DOF_MIP_MAX); + reduced_coc_tx_.ensure_2d(GPU_R16F, reduce_size, nullptr, DOF_MIP_MAX); + GPU_texture_wrap_mode(reduced_color_tx_, false, false); + GPU_texture_wrap_mode(reduced_coc_tx_, false, false); + + reduced_color_tx_.ensure_mip_views(); + reduced_coc_tx_.ensure_mip_views(); + + /* Resize the scatter list to contain enough entry to cover half the screen with sprites (which + * is unlikely due to local contrast test). */ + data_.scatter_max_rect = (reduced_color_tx_.pixel_count() / 4) / 2; + scatter_fg_list_buf_.resize(data_.scatter_max_rect); + scatter_bg_list_buf_.resize(data_.scatter_max_rect); + + bokeh_lut_pass_sync(); + setup_pass_sync(); + stabilize_pass_sync(); + downsample_pass_sync(); + reduce_pass_sync(); + tiles_flatten_pass_sync(); + tiles_dilate_pass_sync(); + gather_pass_sync(); + filter_pass_sync(); + scatter_pass_sync(); + hole_fill_pass_sync(); + resolve_pass_sync(); +} + +void DepthOfField::jitter_apply(float4x4 &winmat, float4x4 &viewmat) +{ + if (jitter_radius_ == 0.0f) { + return; + } + + float radius, theta; + inst_.sampling.dof_disk_sample_get(&radius, &theta); + + if (data_.bokeh_blades >= 3.0f) { + theta = circle_to_polygon_angle(data_.bokeh_blades, theta); + radius *= circle_to_polygon_radius(data_.bokeh_blades, theta); + } + radius *= jitter_radius_; + theta += data_.bokeh_rotation; + + /* Sample in View Space. */ + float2 sample = float2(radius * cosf(theta), radius * sinf(theta)); + sample *= data_.bokeh_anisotropic_scale; + /* Convert to NDC Space. */ + float3 jitter = float3(UNPACK2(sample), -focus_distance_); + float3 center = float3(0.0f, 0.0f, -focus_distance_); + mul_project_m4_v3(winmat.ptr(), jitter); + mul_project_m4_v3(winmat.ptr(), center); + + const bool is_ortho = (winmat[2][3] != -1.0f); + if (is_ortho) { + sample *= focus_distance_; + } + /* Translate origin. */ + sub_v2_v2(viewmat[3], sample); + /* Skew winmat Z axis. */ + add_v2_v2(winmat[2], center - jitter); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Passes setup. + * \{ */ + +void DepthOfField::bokeh_lut_pass_sync() +{ + const bool has_anisotropy = data_.bokeh_anisotropic_scale != float2(1.0f); + if (!has_anisotropy && (data_.bokeh_blades == 0.0)) { + /* No need for LUTs in these cases. */ + bokeh_lut_ps_ = nullptr; + return; + } + + /* Precompute bokeh texture. */ + bokeh_lut_ps_ = DRW_pass_create("Dof.bokeh_lut_ps_", DRW_STATE_NO_DRAW); + GPUShader *sh = inst_.shaders.static_shader_get(DOF_BOKEH_LUT); + DRWShadingGroup *grp = DRW_shgroup_create(sh, bokeh_lut_ps_); + DRW_shgroup_uniform_block(grp, "dof_buf", data_); + DRW_shgroup_uniform_image_ref(grp, "out_gather_lut_img", &bokeh_gather_lut_tx_); + DRW_shgroup_uniform_image_ref(grp, "out_scatter_lut_img", &bokeh_scatter_lut_tx_); + DRW_shgroup_uniform_image_ref(grp, "out_resolve_lut_img", &bokeh_resolve_lut_tx_); + DRW_shgroup_call_compute(grp, 1, 1, 1); +} + +void DepthOfField::setup_pass_sync() +{ + RenderBuffers &render_buffers = inst_.render_buffers; + + setup_ps_ = DRW_pass_create("Dof.setup_ps_", DRW_STATE_NO_DRAW); + GPUShader *sh = inst_.shaders.static_shader_get(DOF_SETUP); + DRWShadingGroup *grp = DRW_shgroup_create(sh, setup_ps_); + DRW_shgroup_uniform_texture_ref_ex(grp, "color_tx", &input_color_tx_, no_filter); + DRW_shgroup_uniform_texture_ref_ex(grp, "depth_tx", &render_buffers.depth_tx, no_filter); + DRW_shgroup_uniform_block(grp, "dof_buf", data_); + DRW_shgroup_uniform_image_ref(grp, "out_color_img", &setup_color_tx_); + DRW_shgroup_uniform_image_ref(grp, "out_coc_img", &setup_coc_tx_); + DRW_shgroup_call_compute_ref(grp, dispatch_setup_size_); + DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH); +} + +void DepthOfField::stabilize_pass_sync() +{ + stabilize_ps_ = DRW_pass_create("Dof.stabilize_ps_", DRW_STATE_NO_DRAW); + GPUShader *sh = inst_.shaders.static_shader_get(DOF_STABILIZE); + DRWShadingGroup *grp = DRW_shgroup_create(sh, stabilize_ps_); + DRW_shgroup_uniform_texture_ref_ex(grp, "coc_tx", &setup_coc_tx_, no_filter); + DRW_shgroup_uniform_texture_ref_ex(grp, "color_tx", &setup_color_tx_, no_filter); + DRW_shgroup_uniform_block(grp, "dof_buf", data_); + DRW_shgroup_uniform_image(grp, "out_coc_img", reduced_coc_tx_.mip_view(0)); + DRW_shgroup_uniform_image(grp, "out_color_img", reduced_color_tx_.mip_view(0)); + DRW_shgroup_call_compute_ref(grp, dispatch_stabilize_size_); + DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH); +} + +void DepthOfField::downsample_pass_sync() +{ + downsample_ps_ = DRW_pass_create("Dof.downsample_ps_", DRW_STATE_NO_DRAW); + GPUShader *sh = inst_.shaders.static_shader_get(DOF_DOWNSAMPLE); + DRWShadingGroup *grp = DRW_shgroup_create(sh, downsample_ps_); + DRW_shgroup_uniform_texture_ex(grp, "color_tx", reduced_color_tx_.mip_view(0), no_filter); + DRW_shgroup_uniform_texture_ex(grp, "coc_tx", reduced_coc_tx_.mip_view(0), no_filter); + DRW_shgroup_uniform_image_ref(grp, "out_color_img", &downsample_tx_); + DRW_shgroup_call_compute_ref(grp, dispatch_downsample_size_); + DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH); +} + +void DepthOfField::reduce_pass_sync() +{ + reduce_ps_ = DRW_pass_create("Dof.reduce_ps_", DRW_STATE_NO_DRAW); + GPUShader *sh = inst_.shaders.static_shader_get(DOF_REDUCE); + DRWShadingGroup *grp = DRW_shgroup_create(sh, reduce_ps_); + DRW_shgroup_uniform_block(grp, "dof_buf", data_); + DRW_shgroup_uniform_texture_ref_ex(grp, "downsample_tx", &downsample_tx_, no_filter); + DRW_shgroup_storage_block(grp, "scatter_fg_list_buf", scatter_fg_list_buf_); + DRW_shgroup_storage_block(grp, "scatter_bg_list_buf", scatter_bg_list_buf_); + DRW_shgroup_storage_block(grp, "scatter_fg_indirect_buf", scatter_fg_indirect_buf_); + DRW_shgroup_storage_block(grp, "scatter_bg_indirect_buf", scatter_bg_indirect_buf_); + DRW_shgroup_uniform_image(grp, "inout_color_lod0_img", reduced_color_tx_.mip_view(0)); + DRW_shgroup_uniform_image(grp, "out_color_lod1_img", reduced_color_tx_.mip_view(1)); + DRW_shgroup_uniform_image(grp, "out_color_lod2_img", reduced_color_tx_.mip_view(2)); + DRW_shgroup_uniform_image(grp, "out_color_lod3_img", reduced_color_tx_.mip_view(3)); + DRW_shgroup_uniform_image(grp, "in_coc_lod0_img", reduced_coc_tx_.mip_view(0)); + DRW_shgroup_uniform_image(grp, "out_coc_lod1_img", reduced_coc_tx_.mip_view(1)); + DRW_shgroup_uniform_image(grp, "out_coc_lod2_img", reduced_coc_tx_.mip_view(2)); + DRW_shgroup_uniform_image(grp, "out_coc_lod3_img", reduced_coc_tx_.mip_view(3)); + /* Sync writes to inout_color_lod0_img from stabilize_ps_. */ + DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_IMAGE_ACCESS); + DRW_shgroup_call_compute_ref(grp, dispatch_reduce_size_); + /* NOTE: Command buffer barrier is done automatically by the GPU backend. */ + DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_STORAGE); +} + +void DepthOfField::tiles_flatten_pass_sync() +{ + tiles_flatten_ps_ = DRW_pass_create("Dof.tiles_flatten_ps_", DRW_STATE_NO_DRAW); + GPUShader *sh = inst_.shaders.static_shader_get(DOF_TILES_FLATTEN); + DRWShadingGroup *grp = DRW_shgroup_create(sh, tiles_flatten_ps_); + /* NOTE(fclem): We should use the reduced_coc_tx_ as it is stable, but we need the slight focus + * flag from the setup pass. A better way would be to do the brute-force in focus gather without + * this. */ + DRW_shgroup_uniform_texture_ref_ex(grp, "coc_tx", &setup_coc_tx_, no_filter); + DRW_shgroup_uniform_image_ref(grp, "out_tiles_fg_img", &tiles_fg_tx_.current()); + DRW_shgroup_uniform_image_ref(grp, "out_tiles_bg_img", &tiles_bg_tx_.current()); + DRW_shgroup_call_compute_ref(grp, dispatch_tiles_flatten_size_); + DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_IMAGE_ACCESS); +} + +void DepthOfField::tiles_dilate_pass_sync() +{ + tiles_dilate_minmax_ps_ = DRW_pass_create("Dof.tiles_dilate_minmax_ps_", DRW_STATE_NO_DRAW); + tiles_dilate_minabs_ps_ = DRW_pass_create("Dof.tiles_dilate_minabs_ps_", DRW_STATE_NO_DRAW); + for (int pass = 0; pass < 2; pass++) { + DRWPass *drw_pass = (pass == 0) ? tiles_dilate_minmax_ps_ : tiles_dilate_minabs_ps_; + GPUShader *sh = inst_.shaders.static_shader_get((pass == 0) ? DOF_TILES_DILATE_MINMAX : + DOF_TILES_DILATE_MINABS); + DRWShadingGroup *grp = DRW_shgroup_create(sh, drw_pass); + DRW_shgroup_uniform_image_ref(grp, "in_tiles_fg_img", &tiles_fg_tx_.previous()); + DRW_shgroup_uniform_image_ref(grp, "in_tiles_bg_img", &tiles_bg_tx_.previous()); + DRW_shgroup_uniform_image_ref(grp, "out_tiles_fg_img", &tiles_fg_tx_.current()); + DRW_shgroup_uniform_image_ref(grp, "out_tiles_bg_img", &tiles_bg_tx_.current()); + DRW_shgroup_uniform_bool(grp, "dilate_slight_focus", &tiles_dilate_slight_focus_, 1); + DRW_shgroup_uniform_int(grp, "ring_count", &tiles_dilate_ring_count_, 1); + DRW_shgroup_uniform_int(grp, "ring_width_multiplier", &tiles_dilate_ring_width_mul_, 1); + DRW_shgroup_call_compute_ref(grp, dispatch_tiles_dilate_size_); + DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_IMAGE_ACCESS); + } +} + +void DepthOfField::gather_pass_sync() +{ + gather_fg_ps_ = DRW_pass_create("Dof.gather_fg_ps_", DRW_STATE_NO_DRAW); + gather_bg_ps_ = DRW_pass_create("Dof.gather_bg_ps_", DRW_STATE_NO_DRAW); + for (int pass = 0; pass < 2; pass++) { + SwapChain &color_chain = (pass == 0) ? color_fg_tx_ : color_bg_tx_; + SwapChain &weight_chain = (pass == 0) ? weight_fg_tx_ : weight_bg_tx_; + bool use_lut = bokeh_lut_ps_ != nullptr; + eShaderType sh_type = (pass == 0) ? + (use_lut ? DOF_GATHER_FOREGROUND_LUT : DOF_GATHER_FOREGROUND) : + (use_lut ? DOF_GATHER_BACKGROUND_LUT : DOF_GATHER_BACKGROUND); + GPUShader *sh = inst_.shaders.static_shader_get(sh_type); + DRWShadingGroup *grp = DRW_shgroup_create(sh, (pass == 0) ? gather_fg_ps_ : gather_bg_ps_); + inst_.sampling.bind_resources(grp); + DRW_shgroup_uniform_block(grp, "dof_buf", data_); + DRW_shgroup_uniform_texture_ex(grp, "color_bilinear_tx", reduced_color_tx_, gather_bilinear); + DRW_shgroup_uniform_texture_ex(grp, "color_tx", reduced_color_tx_, gather_nearest); + DRW_shgroup_uniform_texture_ex(grp, "coc_tx", reduced_coc_tx_, gather_nearest); + DRW_shgroup_uniform_image_ref(grp, "in_tiles_fg_img", &tiles_fg_tx_.current()); + DRW_shgroup_uniform_image_ref(grp, "in_tiles_bg_img", &tiles_bg_tx_.current()); + DRW_shgroup_uniform_image_ref(grp, "out_color_img", &color_chain.current()); + DRW_shgroup_uniform_image_ref(grp, "out_weight_img", &weight_chain.current()); + DRW_shgroup_uniform_image_ref(grp, "out_occlusion_img", &occlusion_tx_); + DRW_shgroup_uniform_texture_ref(grp, "bokeh_lut_tx", &bokeh_gather_lut_tx_); + DRW_shgroup_call_compute_ref(grp, dispatch_gather_size_); + DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH); + } +} + +void DepthOfField::filter_pass_sync() +{ + filter_fg_ps_ = DRW_pass_create("Dof.filter_fg_ps_", DRW_STATE_NO_DRAW); + filter_bg_ps_ = DRW_pass_create("Dof.filter_bg_ps_", DRW_STATE_NO_DRAW); + for (int pass = 0; pass < 2; pass++) { + SwapChain &color_chain = (pass == 0) ? color_fg_tx_ : color_bg_tx_; + SwapChain &weight_chain = (pass == 0) ? weight_fg_tx_ : weight_bg_tx_; + GPUShader *sh = inst_.shaders.static_shader_get(DOF_FILTER); + DRWShadingGroup *grp = DRW_shgroup_create(sh, (pass == 0) ? filter_fg_ps_ : filter_bg_ps_); + DRW_shgroup_uniform_texture_ref(grp, "color_tx", &color_chain.previous()); + DRW_shgroup_uniform_texture_ref(grp, "weight_tx", &weight_chain.previous()); + DRW_shgroup_uniform_image_ref(grp, "out_color_img", &color_chain.current()); + DRW_shgroup_uniform_image_ref(grp, "out_weight_img", &weight_chain.current()); + DRW_shgroup_call_compute_ref(grp, dispatch_filter_size_); + DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH); + } +} + +void DepthOfField::scatter_pass_sync() +{ + DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ADD_FULL; + scatter_fg_ps_ = DRW_pass_create("Dof.scatter_fg_ps_", state); + scatter_bg_ps_ = DRW_pass_create("Dof.scatter_bg_ps_", state); + for (int pass = 0; pass < 2; pass++) { + GPUStorageBuf *scatter_buf = (pass == 0) ? scatter_fg_indirect_buf_ : scatter_bg_indirect_buf_; + GPUStorageBuf *rect_list_buf = (pass == 0) ? scatter_fg_list_buf_ : scatter_bg_list_buf_; + + GPUShader *sh = inst_.shaders.static_shader_get(DOF_SCATTER); + DRWShadingGroup *grp = DRW_shgroup_create(sh, (pass == 0) ? scatter_fg_ps_ : scatter_bg_ps_); + DRW_shgroup_uniform_bool_copy(grp, "use_bokeh_lut", bokeh_lut_ps_ != nullptr); + DRW_shgroup_storage_block(grp, "scatter_list_buf", rect_list_buf); + DRW_shgroup_uniform_texture_ref(grp, "bokeh_lut_tx", &bokeh_scatter_lut_tx_); + DRW_shgroup_uniform_texture_ref(grp, "occlusion_tx", &occlusion_tx_); + DRW_shgroup_call_procedural_indirect(grp, GPU_PRIM_TRI_STRIP, nullptr, scatter_buf); + } +} + +void DepthOfField::hole_fill_pass_sync() +{ + hole_fill_ps_ = DRW_pass_create("Dof.hole_fill_ps_", DRW_STATE_NO_DRAW); + GPUShader *sh = inst_.shaders.static_shader_get(DOF_GATHER_HOLE_FILL); + DRWShadingGroup *grp = DRW_shgroup_create(sh, hole_fill_ps_); + inst_.sampling.bind_resources(grp); + DRW_shgroup_uniform_block(grp, "dof_buf", data_); + DRW_shgroup_uniform_texture_ex(grp, "color_bilinear_tx", reduced_color_tx_, gather_bilinear); + DRW_shgroup_uniform_texture_ex(grp, "color_tx", reduced_color_tx_, gather_nearest); + DRW_shgroup_uniform_texture_ex(grp, "coc_tx", reduced_coc_tx_, gather_nearest); + DRW_shgroup_uniform_image_ref(grp, "in_tiles_fg_img", &tiles_fg_tx_.current()); + DRW_shgroup_uniform_image_ref(grp, "in_tiles_bg_img", &tiles_bg_tx_.current()); + DRW_shgroup_uniform_image_ref(grp, "out_color_img", &hole_fill_color_tx_); + DRW_shgroup_uniform_image_ref(grp, "out_weight_img", &hole_fill_weight_tx_); + DRW_shgroup_call_compute_ref(grp, dispatch_gather_size_); + DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH); +} + +void DepthOfField::resolve_pass_sync() +{ + eGPUSamplerState with_filter = GPU_SAMPLER_FILTER; + RenderBuffers &render_buffers = inst_.render_buffers; + + resolve_ps_ = DRW_pass_create("Dof.resolve_ps_", DRW_STATE_NO_DRAW); + bool use_lut = bokeh_lut_ps_ != nullptr; + eShaderType sh_type = do_hq_slight_focus_ ? (use_lut ? DOF_RESOLVE_LUT_HQ : DOF_RESOLVE_HQ) : + (use_lut ? DOF_RESOLVE_LUT : DOF_RESOLVE); + GPUShader *sh = inst_.shaders.static_shader_get(sh_type); + DRWShadingGroup *grp = DRW_shgroup_create(sh, resolve_ps_); + inst_.sampling.bind_resources(grp); + DRW_shgroup_uniform_block(grp, "dof_buf", data_); + DRW_shgroup_uniform_texture_ref_ex(grp, "depth_tx", &render_buffers.depth_tx, no_filter); + DRW_shgroup_uniform_texture_ref_ex(grp, "color_tx", &input_color_tx_, no_filter); + DRW_shgroup_uniform_texture_ref_ex(grp, "color_bg_tx", &color_bg_tx_.current(), with_filter); + DRW_shgroup_uniform_texture_ref_ex(grp, "color_fg_tx", &color_fg_tx_.current(), with_filter); + DRW_shgroup_uniform_image_ref(grp, "in_tiles_fg_img", &tiles_fg_tx_.current()); + DRW_shgroup_uniform_image_ref(grp, "in_tiles_bg_img", &tiles_bg_tx_.current()); + DRW_shgroup_uniform_texture_ref(grp, "weight_bg_tx", &weight_bg_tx_.current()); + DRW_shgroup_uniform_texture_ref(grp, "weight_fg_tx", &weight_fg_tx_.current()); + DRW_shgroup_uniform_texture_ref(grp, "color_hole_fill_tx", &hole_fill_color_tx_); + DRW_shgroup_uniform_texture_ref(grp, "weight_hole_fill_tx", &hole_fill_weight_tx_); + DRW_shgroup_uniform_texture_ref(grp, "bokeh_lut_tx", &bokeh_resolve_lut_tx_); + DRW_shgroup_uniform_image_ref(grp, "out_color_img", &output_color_tx_); + DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH); + DRW_shgroup_call_compute_ref(grp, dispatch_resolve_size_); + DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Post-FX Rendering. + * \{ */ + +void DepthOfField::render(GPUTexture **input_tx, GPUTexture **output_tx) +{ + if (fx_radius_ == 0.0f) { + return; + } + + input_color_tx_ = *input_tx; + output_color_tx_ = *output_tx; + extent_ = {GPU_texture_width(input_color_tx_), GPU_texture_height(input_color_tx_)}; + + { + const CameraData &cam_data = inst_.camera.data_get(); + data_.camera_type = cam_data.type; + /* OPTI(fclem) Could be optimized. */ + float3 jitter = float3(fx_radius_, 0.0f, -focus_distance_); + float3 center = float3(0.0f, 0.0f, -focus_distance_); + mul_project_m4_v3(cam_data.winmat.ptr(), jitter); + mul_project_m4_v3(cam_data.winmat.ptr(), center); + /* Simplify CoC calculation to a simple MADD. */ + if (inst_.camera.is_orthographic()) { + data_.coc_mul = (center[0] - jitter[0]) * 0.5f * extent_[0]; + data_.coc_bias = focus_distance_ * data_.coc_mul; + } + else { + data_.coc_bias = -(center[0] - jitter[0]) * 0.5f * extent_[0]; + data_.coc_mul = focus_distance_ * data_.coc_bias; + } + + float min_fg_coc = coc_radius_from_camera_depth(data_, -cam_data.clip_near); + float max_bg_coc = coc_radius_from_camera_depth(data_, -cam_data.clip_far); + if (data_.camera_type != CAMERA_ORTHO) { + /* Background is at infinity so maximum CoC is the limit of coc_radius_from_camera_depth + * at -inf. We only do this for perspective camera since orthographic coc limit is inf. */ + max_bg_coc = data_.coc_bias; + } + /* Clamp with user defined max. */ + data_.coc_abs_max = min_ff(max_ff(fabsf(min_fg_coc), fabsf(max_bg_coc)), fx_max_coc_); + /* TODO(fclem): Make this dependent of the quality of the gather pass. */ + data_.scatter_coc_threshold = 4.0f; + + data_.push_update(); + } + + int2 half_res = math::divide_ceil(extent_, int2(2)); + int2 quarter_res = math::divide_ceil(extent_, int2(4)); + int2 tile_res = math::divide_ceil(half_res, int2(DOF_TILES_SIZE)); + + dispatch_setup_size_ = int3(math::divide_ceil(half_res, int2(DOF_DEFAULT_GROUP_SIZE)), 1); + dispatch_stabilize_size_ = int3(math::divide_ceil(half_res, int2(DOF_DEFAULT_GROUP_SIZE)), 1); + dispatch_downsample_size_ = int3(math::divide_ceil(quarter_res, int2(DOF_DEFAULT_GROUP_SIZE)), + 1); + dispatch_reduce_size_ = int3(math::divide_ceil(half_res, int2(DOF_REDUCE_GROUP_SIZE)), 1); + dispatch_tiles_flatten_size_ = int3(math::divide_ceil(half_res, int2(DOF_TILES_SIZE)), 1); + dispatch_tiles_dilate_size_ = int3( + math::divide_ceil(tile_res, int2(DOF_TILES_DILATE_GROUP_SIZE)), 1); + dispatch_gather_size_ = int3(math::divide_ceil(half_res, int2(DOF_GATHER_GROUP_SIZE)), 1); + dispatch_filter_size_ = int3(math::divide_ceil(half_res, int2(DOF_FILTER_GROUP_SIZE)), 1); + dispatch_resolve_size_ = int3(math::divide_ceil(extent_, int2(DOF_RESOLVE_GROUP_SIZE)), 1); + + if (GPU_type_matches_ex(GPU_DEVICE_ATI, GPU_OS_UNIX, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) { + /* On Mesa, there is a sync bug which can make a portion of the main pass (usually one shader) + * leave blocks of un-initialized memory. Doing a flush seems to alleviate the issue. */ + GPU_flush(); + } + + DRW_stats_group_start("Depth of Field"); + + { + DRW_stats_group_start("Setup"); + + bokeh_gather_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_RG16F); + bokeh_scatter_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_R16F); + bokeh_resolve_lut_tx_.acquire(int2(DOF_MAX_SLIGHT_FOCUS_RADIUS * 2 + 1), GPU_R16F); + + DRW_draw_pass(bokeh_lut_ps_); + + setup_color_tx_.acquire(half_res, GPU_RGBA16F); + setup_coc_tx_.acquire(half_res, GPU_RG16F); + + DRW_draw_pass(setup_ps_); + + /* Outputs to reduced_*_tx_ mip 0. */ + DRW_draw_pass(stabilize_ps_); + + /* Used by stabilize pass. */ + setup_color_tx_.release(); + + { + DRW_stats_group_start("Tile Prepare"); + + /* WARNING: If format changes, make sure dof_tile_* GLSL constants are properly encoded. */ + tiles_fg_tx_.previous().acquire(tile_res, GPU_RGBA16F); + tiles_bg_tx_.previous().acquire(tile_res, GPU_R11F_G11F_B10F); + tiles_fg_tx_.current().acquire(tile_res, GPU_RGBA16F); + tiles_bg_tx_.current().acquire(tile_res, GPU_R11F_G11F_B10F); + + DRW_draw_pass(tiles_flatten_ps_); + + /* Used by tile_flatten and stabilize_ps pass. */ + setup_coc_tx_.release(); + + /* Error introduced by gather center jittering. */ + const float error_multiplier = 1.0f + 1.0f / (DOF_GATHER_RING_COUNT + 0.5f); + int dilation_end_radius = ceilf((fx_max_coc_ * error_multiplier) / (DOF_TILES_SIZE * 2)); + + /* Run dilation twice. One for minmax and one for minabs. */ + for (int pass = 0; pass < 2; pass++) { + /* This algorithm produce the exact dilation radius by dividing it in multiple passes. */ + int dilation_radius = 0; + while (dilation_radius < dilation_end_radius) { + /* Dilate slight focus only on first iteration. */ + tiles_dilate_slight_focus_ = (dilation_radius == 0) ? 1 : 0; + + int remainder = dilation_end_radius - dilation_radius; + /* Do not step over any unvisited tile. */ + int max_multiplier = dilation_radius + 1; + + int ring_count = min_ii(DOF_DILATE_RING_COUNT, ceilf(remainder / (float)max_multiplier)); + int multiplier = min_ii(max_multiplier, floorf(remainder / (float)ring_count)); + + dilation_radius += ring_count * multiplier; + + tiles_dilate_ring_count_ = ring_count; + tiles_dilate_ring_width_mul_ = multiplier; + + tiles_fg_tx_.swap(); + tiles_bg_tx_.swap(); + + DRW_draw_pass((pass == 0) ? tiles_dilate_minmax_ps_ : tiles_dilate_minabs_ps_); + } + } + + tiles_fg_tx_.previous().release(); + tiles_bg_tx_.previous().release(); + + DRW_stats_group_end(); + } + + downsample_tx_.acquire(quarter_res, GPU_RGBA16F); + + DRW_draw_pass(downsample_ps_); + + scatter_fg_indirect_buf_.clear_to_zero(); + scatter_bg_indirect_buf_.clear_to_zero(); + + DRW_draw_pass(reduce_ps_); + + /* Used by reduce pass. */ + downsample_tx_.release(); + + DRW_stats_group_end(); + } + + for (int is_background = 0; is_background < 2; is_background++) { + DRW_stats_group_start(is_background ? "Background Convolution" : "Foreground Convolution"); + + SwapChain &color_tx = is_background ? color_bg_tx_ : color_fg_tx_; + SwapChain &weight_tx = is_background ? weight_bg_tx_ : weight_fg_tx_; + DRWPass *gather_ps = is_background ? gather_bg_ps_ : gather_fg_ps_; + DRWPass *filter_ps = is_background ? filter_bg_ps_ : filter_fg_ps_; + DRWPass *scatter_ps = is_background ? scatter_bg_ps_ : scatter_fg_ps_; + + color_tx.current().acquire(half_res, GPU_RGBA16F); + weight_tx.current().acquire(half_res, GPU_R16F); + occlusion_tx_.acquire(half_res, GPU_RG16F); + + DRW_draw_pass(gather_ps); + + { + /* Filtering pass. */ + color_tx.swap(); + weight_tx.swap(); + + color_tx.current().acquire(half_res, GPU_RGBA16F); + weight_tx.current().acquire(half_res, GPU_R16F); + + DRW_draw_pass(filter_ps); + + color_tx.previous().release(); + weight_tx.previous().release(); + } + + GPU_memory_barrier(GPU_BARRIER_FRAMEBUFFER); + + scatter_fb_.ensure(GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(color_tx.current())); + + GPU_framebuffer_bind(scatter_fb_); + DRW_draw_pass(scatter_ps); + + /* Used by scatter pass. */ + occlusion_tx_.release(); + + DRW_stats_group_end(); + } + { + DRW_stats_group_start("Hole Fill"); + + bokeh_gather_lut_tx_.release(); + bokeh_scatter_lut_tx_.release(); + + hole_fill_color_tx_.acquire(half_res, GPU_RGBA16F); + hole_fill_weight_tx_.acquire(half_res, GPU_R16F); + + DRW_draw_pass(hole_fill_ps_); + + /* NOTE: We do not filter the hole-fill pass as effect is likely to not be noticeable. */ + + DRW_stats_group_end(); + } + { + DRW_stats_group_start("Resolve"); + + DRW_draw_pass(resolve_ps_); + + color_bg_tx_.current().release(); + color_fg_tx_.current().release(); + weight_bg_tx_.current().release(); + weight_fg_tx_.current().release(); + tiles_fg_tx_.current().release(); + tiles_bg_tx_.current().release(); + hole_fill_color_tx_.release(); + hole_fill_weight_tx_.release(); + bokeh_resolve_lut_tx_.release(); + + DRW_stats_group_end(); + } + + DRW_stats_group_end(); + + /* Swap buffers so that next effect has the right input. */ + SWAP(GPUTexture *, *input_tx, *output_tx); +} + +/** \} */ + +} // namespace blender::eevee \ No newline at end of file diff --git a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh new file mode 100644 index 00000000000..12d9e7ebd9f --- /dev/null +++ b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.hh @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 Blender Foundation. + */ + +/** \file + * \ingroup eevee + * + * Depth of field post process effect. + * + * There are 2 methods to achieve this effect. + * - The first uses projection matrix offsetting and sample accumulation to give + * reference quality depth of field. But this needs many samples to hide the + * under-sampling. + * - The second one is a post-processing based one. It follows the + * implementation described in the presentation + * "Life of a Bokeh - Siggraph 2018" from Guillaume Abadie. + * There are some difference with our actual implementation that prioritize quality. + */ + +#pragma once + +#include "eevee_shader_shared.hh" + +namespace blender::eevee { + +class Instance; + +/* -------------------------------------------------------------------- */ +/** \name Depth of field + * \{ */ + +class DepthOfField { + private: + class Instance &inst_; + + /** Samplers */ + static constexpr eGPUSamplerState gather_bilinear = GPU_SAMPLER_MIPMAP | GPU_SAMPLER_FILTER; + static constexpr eGPUSamplerState gather_nearest = GPU_SAMPLER_MIPMAP; + + /** Input/Output texture references. */ + GPUTexture *input_color_tx_ = nullptr; + GPUTexture *output_color_tx_ = nullptr; + + /** Bokeh LUT precompute pass. */ + TextureFromPool bokeh_gather_lut_tx_ = {"dof_bokeh_gather_lut"}; + TextureFromPool bokeh_resolve_lut_tx_ = {"dof_bokeh_resolve_lut"}; + TextureFromPool bokeh_scatter_lut_tx_ = {"dof_bokeh_scatter_lut"}; + DRWPass *bokeh_lut_ps_ = nullptr; + + /** Outputs half-resolution color and Circle Of Confusion. */ + TextureFromPool setup_coc_tx_ = {"dof_setup_coc"}; + TextureFromPool setup_color_tx_ = {"dof_setup_color"}; + int3 dispatch_setup_size_ = int3(-1); + DRWPass *setup_ps_ = nullptr; + + /** Allocated because we need mip chain. Which isn't supported by TextureFromPool. */ + Texture reduced_coc_tx_ = {"dof_reduced_coc"}; + Texture reduced_color_tx_ = {"dof_reduced_color"}; + + /** Stabilization (flicker attenuation) of Color and CoC output of the setup pass. */ + int3 dispatch_stabilize_size_ = int3(-1); + DRWPass *stabilize_ps_ = nullptr; + + /** 1/4th res color buffer used to speedup the local contrast test in the first reduce pass. */ + TextureFromPool downsample_tx_ = {"dof_downsample"}; + int3 dispatch_downsample_size_ = int3(-1); + DRWPass *downsample_ps_ = nullptr; + + /** Create mip-mapped color & COC textures for gather passes as well as scatter rect list. */ + DepthOfFieldScatterListBuf scatter_fg_list_buf_; + DepthOfFieldScatterListBuf scatter_bg_list_buf_; + DrawIndirectBuf scatter_fg_indirect_buf_; + DrawIndirectBuf scatter_bg_indirect_buf_; + int3 dispatch_reduce_size_ = int3(-1); + DRWPass *reduce_ps_ = nullptr; + + /** Outputs min & max COC in each 8x8 half res pixel tiles (so 1/16th of full resolution). */ + SwapChain tiles_fg_tx_; + SwapChain tiles_bg_tx_; + int3 dispatch_tiles_flatten_size_ = int3(-1); + DRWPass *tiles_flatten_ps_ = nullptr; + + /** Dilates the min & max CoCs to cover maximum COC values. */ + bool1 tiles_dilate_slight_focus_ = false; + int tiles_dilate_ring_count_ = -1; + int tiles_dilate_ring_width_mul_ = -1; + int3 dispatch_tiles_dilate_size_ = int3(-1); + DRWPass *tiles_dilate_minmax_ps_ = nullptr; + DRWPass *tiles_dilate_minabs_ps_ = nullptr; + + /** Gather convolution for low intensity pixels and low contrast areas. */ + SwapChain color_bg_tx_; + SwapChain color_fg_tx_; + SwapChain weight_bg_tx_; + SwapChain weight_fg_tx_; + TextureFromPool occlusion_tx_ = {"dof_occlusion"}; + int3 dispatch_gather_size_ = int3(-1); + DRWPass *gather_fg_ps_ = nullptr; + DRWPass *gather_bg_ps_ = nullptr; + + /** Hole-fill convolution: Gather pass meant to fill areas of foreground dis-occlusion. */ + TextureFromPool hole_fill_color_tx_ = {"dof_color_hole_fill"}; + TextureFromPool hole_fill_weight_tx_ = {"dof_weight_hole_fill"}; + DRWPass *hole_fill_ps_ = nullptr; + + /** Small Filter pass to reduce noise out of gather passes. */ + int3 dispatch_filter_size_ = int3(-1); + DRWPass *filter_fg_ps_ = nullptr; + DRWPass *filter_bg_ps_ = nullptr; + + /** Scatter convolution: A quad is emitted for every 4 bright enough half pixels. */ + Framebuffer scatter_fb_ = {"dof_scatter"}; + DRWPass *scatter_fg_ps_ = nullptr; + DRWPass *scatter_bg_ps_ = nullptr; + + /** Recombine the results and also perform a slight out of focus gather. */ + int3 dispatch_resolve_size_ = int3(-1); + DRWPass *resolve_ps_ = nullptr; + + DepthOfFieldDataBuf data_; + + /** Scene settings that are immutable. */ + float user_overblur_; + float fx_max_coc_; + /** Use Hiqh Quality (expensive) in-focus gather pass. */ + bool do_hq_slight_focus_; + /** Use jittered depth of field where we randomize camera location. */ + bool do_jitter_; + + /** Circle of Confusion radius for FX DoF passes. Is in view X direction in [0..1] range. */ + float fx_radius_; + /** Circle of Confusion radius for jittered DoF. Is in view X direction in [0..1] range. */ + float jitter_radius_; + /** Focus distance in view space. */ + float focus_distance_; + /** Extent of the input buffer. */ + int2 extent_; + + /** Reduce pass info. */ + int reduce_steps_; + + public: + DepthOfField(Instance &inst) : inst_(inst){}; + ~DepthOfField(){}; + + void init(); + + void sync(); + + /** + * Apply Depth Of Field jittering to the view and projection matrices.. + */ + void jitter_apply(float4x4 &winmat, float4x4 &viewmat); + + /** + * Will swap input and output texture if rendering happens. The actual output of this function + * is in input_tx. + */ + void render(GPUTexture **input_tx, GPUTexture **output_tx); + + bool postfx_enabled() const + { + return fx_radius_ > 0.0f; + } + + private: + void bokeh_lut_pass_sync(); + void setup_pass_sync(); + void stabilize_pass_sync(); + void downsample_pass_sync(); + void reduce_pass_sync(); + void tiles_flatten_pass_sync(); + void tiles_dilate_pass_sync(); + void gather_pass_sync(); + void filter_pass_sync(); + void scatter_pass_sync(); + void hole_fill_pass_sync(); + void resolve_pass_sync(); +}; + +/** \} */ + +} // namespace blender::eevee \ No newline at end of file diff --git a/source/blender/draw/engines/eevee_next/eevee_film.hh b/source/blender/draw/engines/eevee_next/eevee_film.hh index c488b912215..0d443876d03 100644 --- a/source/blender/draw/engines/eevee_next/eevee_film.hh +++ b/source/blender/draw/engines/eevee_next/eevee_film.hh @@ -79,6 +79,7 @@ class Film { float *read_pass(eViewLayerEEVEEPassType pass_type); float *read_aov(ViewLayerAOV *aov); + /** Returns shading views internal resolution. */ int2 render_extent_get() const { return data_.render_extent; diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.cc b/source/blender/draw/engines/eevee_next/eevee_instance.cc index 70b5cb7d98f..df7a9ba7702 100644 --- a/source/blender/draw/engines/eevee_next/eevee_instance.cc +++ b/source/blender/draw/engines/eevee_next/eevee_instance.cc @@ -61,6 +61,7 @@ void Instance::init(const int2 &output_res, camera.init(); film.init(output_res, output_rect); velocity.init(); + depth_of_field.init(); motion_blur.init(); main_view.init(); } @@ -98,6 +99,7 @@ void Instance::begin_sync() gpencil_engine_enabled = false; + depth_of_field.sync(); motion_blur.sync(); pipelines.sync(); main_view.sync(); diff --git a/source/blender/draw/engines/eevee_next/eevee_instance.hh b/source/blender/draw/engines/eevee_next/eevee_instance.hh index d714111a3c6..60dffd7c5ec 100644 --- a/source/blender/draw/engines/eevee_next/eevee_instance.hh +++ b/source/blender/draw/engines/eevee_next/eevee_instance.hh @@ -16,6 +16,7 @@ #include "DRW_render.h" #include "eevee_camera.hh" +#include "eevee_depth_of_field.hh" #include "eevee_film.hh" #include "eevee_material.hh" #include "eevee_motion_blur.hh" @@ -44,6 +45,7 @@ class Instance { PipelineModule pipelines; VelocityModule velocity; MotionBlurModule motion_blur; + DepthOfField depth_of_field; Sampling sampling; Camera camera; Film film; @@ -80,6 +82,7 @@ class Instance { pipelines(*this), velocity(*this), motion_blur(*this), + depth_of_field(*this), sampling(*this), camera(*this), film(*this), diff --git a/source/blender/draw/engines/eevee_next/eevee_sampling.hh b/source/blender/draw/engines/eevee_next/eevee_sampling.hh index c604ecef40b..be87ee74886 100644 --- a/source/blender/draw/engines/eevee_next/eevee_sampling.hh +++ b/source/blender/draw/engines/eevee_next/eevee_sampling.hh @@ -27,11 +27,11 @@ class Sampling { Instance &inst_; /* Number of samples in the first ring of jittered depth of field. */ - constexpr static uint64_t dof_web_density_ = 6; + static constexpr uint64_t dof_web_density_ = 6; /* High number of sample for viewport infinite rendering. */ - constexpr static uint64_t infinite_sample_count_ = 0xFFFFFFu; + static constexpr uint64_t infinite_sample_count_ = 0xFFFFFFu; /* During interactive rendering, loop over the first few samples. */ - constexpr static uint64_t interactive_sample_max_ = 8; + static constexpr uint64_t interactive_sample_max_ = 8; /** 0 based current sample. Might not increase sequentially in viewport. */ uint64_t sample_ = 0; diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.cc b/source/blender/draw/engines/eevee_next/eevee_shader.cc index 782e73f6dd0..209aff9c168 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader.cc +++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc @@ -90,6 +90,44 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_ return "eevee_motion_blur_tiles_flatten_render"; case MOTION_BLUR_TILE_FLATTEN_VIEWPORT: return "eevee_motion_blur_tiles_flatten_viewport"; + case DOF_BOKEH_LUT: + return "eevee_depth_of_field_bokeh_lut"; + case DOF_DOWNSAMPLE: + return "eevee_depth_of_field_downsample"; + case DOF_FILTER: + return "eevee_depth_of_field_filter"; + case DOF_GATHER_FOREGROUND_LUT: + return "eevee_depth_of_field_gather_foreground_lut"; + case DOF_GATHER_FOREGROUND: + return "eevee_depth_of_field_gather_foreground"; + case DOF_GATHER_BACKGROUND_LUT: + return "eevee_depth_of_field_gather_background_lut"; + case DOF_GATHER_BACKGROUND: + return "eevee_depth_of_field_gather_background"; + case DOF_GATHER_HOLE_FILL: + return "eevee_depth_of_field_hole_fill"; + case DOF_REDUCE: + return "eevee_depth_of_field_reduce"; + case DOF_RESOLVE: + return "eevee_depth_of_field_resolve_lq"; + case DOF_RESOLVE_HQ: + return "eevee_depth_of_field_resolve_hq"; + case DOF_RESOLVE_LUT: + return "eevee_depth_of_field_resolve_lq_lut"; + case DOF_RESOLVE_LUT_HQ: + return "eevee_depth_of_field_resolve_hq_lut"; + case DOF_SETUP: + return "eevee_depth_of_field_setup"; + case DOF_SCATTER: + return "eevee_depth_of_field_scatter"; + case DOF_STABILIZE: + return "eevee_depth_of_field_stabilize"; + case DOF_TILES_DILATE_MINABS: + return "eevee_depth_of_field_tiles_dilate_minabs"; + case DOF_TILES_DILATE_MINMAX: + return "eevee_depth_of_field_tiles_dilate_minmax"; + case DOF_TILES_FLATTEN: + return "eevee_depth_of_field_tiles_flatten"; /* To avoid compiler warning about missing case. */ case MAX_SHADER_TYPE: return ""; diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.hh b/source/blender/draw/engines/eevee_next/eevee_shader.hh index 8dc61fbae0b..13f2022e0d7 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader.hh +++ b/source/blender/draw/engines/eevee_next/eevee_shader.hh @@ -29,6 +29,26 @@ enum eShaderType { FILM_FRAG = 0, FILM_COMP, + DOF_BOKEH_LUT, + DOF_DOWNSAMPLE, + DOF_FILTER, + DOF_GATHER_BACKGROUND_LUT, + DOF_GATHER_BACKGROUND, + DOF_GATHER_FOREGROUND_LUT, + DOF_GATHER_FOREGROUND, + DOF_GATHER_HOLE_FILL, + DOF_REDUCE, + DOF_RESOLVE_HQ, + DOF_RESOLVE_LUT_HQ, + DOF_RESOLVE_LUT, + DOF_RESOLVE, + DOF_SCATTER, + DOF_SETUP, + DOF_STABILIZE, + DOF_TILES_DILATE_MINABS, + DOF_TILES_DILATE_MINMAX, + DOF_TILES_FLATTEN, + MOTION_BLUR_GATHER, MOTION_BLUR_TILE_DILATE, MOTION_BLUR_TILE_FLATTEN_RENDER, diff --git a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh index 70de4101bb9..07957cd2c8c 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh +++ b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh @@ -23,6 +23,9 @@ using draw::SwapChain; using draw::Texture; using draw::TextureFromPool; +constexpr eGPUSamplerState no_filter = GPU_SAMPLER_DEFAULT; +constexpr eGPUSamplerState with_filter = GPU_SAMPLER_FILTER; + #endif #define UBO_MIN_MAX_SUPPORTED_SIZE 1 << 14 @@ -345,6 +348,116 @@ BLI_STATIC_ASSERT_ALIGN(MotionBlurTileIndirection, 16) /** \} */ +/* -------------------------------------------------------------------- */ +/** \name Depth of field + * \{ */ + +/* 5% error threshold. */ +#define DOF_FAST_GATHER_COC_ERROR 0.05 +#define DOF_GATHER_RING_COUNT 5 +#define DOF_DILATE_RING_COUNT 3 + +struct DepthOfFieldData { + /** Size of the render targets for gather & scatter passes. */ + int2 extent; + /** Size of a pixel in uv space (1.0 / extent). */ + float2 texel_size; + /** Scale factor for anisotropic bokeh. */ + float2 bokeh_anisotropic_scale; + float2 bokeh_anisotropic_scale_inv; + /* Correction factor to align main target pixels with the filtered mipmap chain texture. */ + float2 gather_uv_fac; + /** Scatter parameters. */ + float scatter_coc_threshold; + float scatter_color_threshold; + float scatter_neighbor_max_color; + int scatter_sprite_per_row; + /** Firefly removing factor. */ + float denoise_factor; + /** Number of side the bokeh shape has. */ + float bokeh_blades; + /** Rotation of the bokeh shape. */ + float bokeh_rotation; + /** Multiplier and bias to apply to linear depth to Circle of confusion (CoC). */ + float coc_mul, coc_bias; + /** Maximum absolute allowed Circle of confusion (CoC). Min of computed max and user max. */ + float coc_abs_max; + /** Copy of camera type. */ + eCameraType camera_type; + /** Max number of sprite in the scatter pass for each ground. */ + int scatter_max_rect; + + int _pad0, _pad1; +}; +BLI_STATIC_ASSERT_ALIGN(DepthOfFieldData, 16) + +struct ScatterRect { + /** Color and CoC of the 4 pixels the scatter sprite represents. */ + float4 color_and_coc[4]; + /** Rect center position in half pixel space. */ + float2 offset; + /** Rect half extent in half pixel space. */ + float2 half_extent; +}; +BLI_STATIC_ASSERT_ALIGN(ScatterRect, 16) + +/** WORKAROUND(@fclem): This is because this file is included before common_math_lib.glsl. */ +#ifndef M_PI +# define EEVEE_PI +# define M_PI 3.14159265358979323846 /* pi */ +#endif + +static inline float coc_radius_from_camera_depth(DepthOfFieldData dof, float depth) +{ + depth = (dof.camera_type != CAMERA_ORTHO) ? 1.0f / depth : depth; + return dof.coc_mul * depth + dof.coc_bias; +} + +static inline float regular_polygon_side_length(float sides_count) +{ + return 2.0f * sinf(M_PI / sides_count); +} + +/* Returns intersection ratio between the radius edge at theta and the regular polygon edge. + * Start first corners at theta == 0. */ +static inline float circle_to_polygon_radius(float sides_count, float theta) +{ + /* From Graphics Gems from CryENGINE 3 (Siggraph 2013) by Tiago Sousa (slide + * 36). */ + float side_angle = (2.0f * M_PI) / sides_count; + return cosf(side_angle * 0.5f) / + cosf(theta - side_angle * floorf((sides_count * theta + M_PI) / (2.0f * M_PI))); +} + +/* Remap input angle to have homogenous spacing of points along a polygon edge. + * Expects theta to be in [0..2pi] range. */ +static inline float circle_to_polygon_angle(float sides_count, float theta) +{ + float side_angle = (2.0f * M_PI) / sides_count; + float halfside_angle = side_angle * 0.5f; + float side = floorf(theta / side_angle); + /* Length of segment from center to the middle of polygon side. */ + float adjacent = circle_to_polygon_radius(sides_count, 0.0f); + + /* This is the relative position of the sample on the polygon half side. */ + float local_theta = theta - side * side_angle; + float ratio = (local_theta - halfside_angle) / halfside_angle; + + float halfside_len = regular_polygon_side_length(sides_count) * 0.5f; + float opposite = ratio * halfside_len; + + /* NOTE: atan(y_over_x) has output range [-M_PI_2..M_PI_2]. */ + float final_local_theta = atanf(opposite / adjacent); + + return side * side_angle + final_local_theta; +} + +#ifdef EEVEE_PI +# undef M_PI +#endif + +/** \} */ + /* -------------------------------------------------------------------- */ /** \name Ray-Tracing * \{ */ @@ -404,13 +517,16 @@ float4 utility_tx_sample(sampler2DArray util_tx, float2 uv, float layer) using AOVsInfoDataBuf = draw::StorageBuffer; using CameraDataBuf = draw::UniformBuffer; +using DepthOfFieldDataBuf = draw::UniformBuffer; +using DepthOfFieldScatterListBuf = draw::StorageArrayBuffer; +using DrawIndirectBuf = draw::StorageBuffer; using FilmDataBuf = draw::UniformBuffer; +using MotionBlurDataBuf = draw::UniformBuffer; +using MotionBlurTileIndirectionBuf = draw::StorageBuffer; using SamplingDataBuf = draw::StorageBuffer; using VelocityGeometryBuf = draw::StorageArrayBuffer; using VelocityIndexBuf = draw::StorageArrayBuffer; using VelocityObjectBuf = draw::StorageArrayBuffer; -using MotionBlurDataBuf = draw::UniformBuffer; -using MotionBlurTileIndirectionBuf = draw::StorageBuffer; } // namespace blender::eevee #endif diff --git a/source/blender/draw/engines/eevee_next/eevee_view.cc b/source/blender/draw/engines/eevee_next/eevee_view.cc index c7434a662a2..68c855b9bc5 100644 --- a/source/blender/draw/engines/eevee_next/eevee_view.cc +++ b/source/blender/draw/engines/eevee_next/eevee_view.cc @@ -143,7 +143,7 @@ void ShadingView::render() GPUTexture *ShadingView::render_postfx(GPUTexture *input_tx) { - if (/*!dof_.postfx_enabled() &&*/ !inst_.motion_blur.postfx_enabled()) { + if (!inst_.depth_of_field.postfx_enabled() && !inst_.motion_blur.postfx_enabled()) { return input_tx; } postfx_tx_.acquire(extent_, GPU_RGBA16F); @@ -151,7 +151,7 @@ GPUTexture *ShadingView::render_postfx(GPUTexture *input_tx) GPUTexture *output_tx = postfx_tx_; /* Swapping is done internally. Actual output is set to the next input. */ - // dof_.render(depth_tx_, &input_tx, &output_tx); + inst_.depth_of_field.render(&input_tx, &output_tx); inst_.motion_blur.render(&input_tx, &output_tx); return input_tx; @@ -178,7 +178,7 @@ void ShadingView::update_view() /* FIXME(fclem): The offset may be is noticeably large and the culling might make object pop * out of the blurring radius. To fix this, use custom enlarged culling matrix. */ - // dof_.jitter_apply(winmat, viewmat); + inst_.depth_of_field.jitter_apply(winmat, viewmat); DRW_view_update_sub(render_view_, viewmat.ptr(), winmat.ptr()); // inst_.lightprobes.set_view(render_view_, extent_); diff --git a/source/blender/draw/engines/eevee_next/eevee_view.hh b/source/blender/draw/engines/eevee_next/eevee_view.hh index ac8decc7632..ee169bf418e 100644 --- a/source/blender/draw/engines/eevee_next/eevee_view.hh +++ b/source/blender/draw/engines/eevee_next/eevee_view.hh @@ -41,10 +41,6 @@ class ShadingView { /** Matrix to apply to the viewmat. */ const float (*face_matrix_)[4]; - /** Post-FX modules. */ - // DepthOfField dof_; - // MotionBlur mb_; - /** Raytracing persistent buffers. Only opaque and refraction can have surface tracing. */ // RaytraceBuffer rt_buffer_opaque_; // RaytraceBuffer rt_buffer_refract_; diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl new file mode 100644 index 00000000000..15c1073309a --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl @@ -0,0 +1,681 @@ + +/** + * Depth of Field Gather accumulator. + * We currently have only 2 which are very similar. + * One is for the halfres gather passes and the other one for slight in focus regions. + **/ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +/* -------------------------------------------------------------------- */ +/** \name Options. + * \{ */ + +/* Quality options */ +#ifdef DOF_HOLEFILL_PASS +/* No need for very high density for hole_fill. */ +const int gather_ring_count = 3; +const int gather_ring_density = 3; +const int gather_max_density_change = 0; +const int gather_density_change_ring = 1; +#else +const int gather_ring_count = DOF_GATHER_RING_COUNT; +const int gather_ring_density = 3; +const int gather_max_density_change = 50; /* Dictates the maximum good quality blur. */ +const int gather_density_change_ring = 1; +#endif + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Constants. + * \{ */ + +const float unit_ring_radius = 1.0 / float(gather_ring_count); +const float unit_sample_radius = 1.0 / float(gather_ring_count + 0.5); +const float large_kernel_radius = 0.5 + float(gather_ring_count); +const float smaller_kernel_radius = 0.5 + float(gather_ring_count - gather_density_change_ring); +/* NOTE(fclem) the bias is reducing issues with density change visible transition. */ +const float radius_downscale_factor = smaller_kernel_radius / large_kernel_radius; +const int change_density_at_ring = (gather_ring_count - gather_density_change_ring + 1); +const float coc_radius_error = 2.0; + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Gather common. + * \{ */ + +struct DofGatherData { + vec4 color; + float weight; + float dist; /* TODO remove */ + /* For scatter occlusion. */ + float coc; + float coc_sqr; + /* For ring bucket merging. */ + float transparency; + + float layer_opacity; +}; + +#define GATHER_DATA_INIT DofGatherData(vec4(0.0), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) + +/* Intersection with the center of the kernel. */ +float dof_intersection_weight(float coc, float distance_from_center, float intersection_multiplier) +{ + if (no_smooth_intersection) { + return step(0.0, (abs(coc) - distance_from_center)); + } + else { + /* (Slide 64). */ + return saturate((abs(coc) - distance_from_center) * intersection_multiplier + 0.5); + } +} + +/* Returns weight of the sample for the outer bucket (containing previous + * rings). */ +float dof_gather_accum_weight(float coc, float bordering_radius, bool first_ring) +{ + /* First ring has nothing to be mixed against. */ + if (first_ring) { + return 0.0; + } + return saturate(coc - bordering_radius); +} + +void dof_gather_ammend_weight(inout DofGatherData sample_data, float weight) +{ + sample_data.color *= weight; + sample_data.coc *= weight; + sample_data.coc_sqr *= weight; + sample_data.weight *= weight; +} + +void dof_gather_accumulate_sample(DofGatherData sample_data, + float weight, + inout DofGatherData accum_data) +{ + accum_data.color += sample_data.color * weight; + accum_data.coc += sample_data.coc * weight; + accum_data.coc_sqr += sample_data.coc * (sample_data.coc * weight); + accum_data.weight += weight; +} + +void dof_gather_accumulate_sample_pair(DofGatherData pair_data[2], + float bordering_radius, + float intersection_multiplier, + bool first_ring, + const bool do_fast_gather, + const bool is_foreground, + inout DofGatherData ring_data, + inout DofGatherData accum_data) +{ + if (do_fast_gather) { + for (int i = 0; i < 2; i++) { + dof_gather_accumulate_sample(pair_data[i], 1.0, accum_data); + accum_data.layer_opacity += 1.0; + } + return; + } + +#if 0 + const float mirroring_threshold = -dof_layer_threshold - dof_layer_offset; + /* TODO(fclem) Promote to parameter? dither with Noise? */ + const float mirroring_min_distance = 15.0; + if (pair_data[0].coc < mirroring_threshold && + (pair_data[1].coc - mirroring_min_distance) > pair_data[0].coc) { + pair_data[1].coc = pair_data[0].coc; + } + else if (pair_data[1].coc < mirroring_threshold && + (pair_data[0].coc - mirroring_min_distance) > pair_data[1].coc) { + pair_data[0].coc = pair_data[1].coc; + } +#endif + + for (int i = 0; i < 2; i++) { + float sample_weight = dof_sample_weight(pair_data[i].coc); + float layer_weight = dof_layer_weight(pair_data[i].coc, is_foreground); + float inter_weight = dof_intersection_weight( + pair_data[i].coc, pair_data[i].dist, intersection_multiplier); + float weight = inter_weight * layer_weight * sample_weight; + + /** + * If a CoC is larger than bordering radius we accumulate it to the general accumulator. + * If not, we accumulate to the ring bucket. This is to have more consistent sample occlusion. + **/ + float accum_weight = dof_gather_accum_weight(pair_data[i].coc, bordering_radius, first_ring); + dof_gather_accumulate_sample(pair_data[i], weight * accum_weight, accum_data); + dof_gather_accumulate_sample(pair_data[i], weight * (1.0 - accum_weight), ring_data); + + accum_data.layer_opacity += layer_weight; + + if (is_foreground) { + ring_data.transparency += 1.0 - inter_weight * layer_weight; + } + else { + float coc = is_foreground ? -pair_data[i].coc : pair_data[i].coc; + ring_data.transparency += saturate(coc - bordering_radius); + } + } +} + +void dof_gather_accumulate_sample_ring(DofGatherData ring_data, + int sample_count, + bool first_ring, + const bool do_fast_gather, + /* accum_data occludes the ring_data if true. */ + const bool reversed_occlusion, + inout DofGatherData accum_data) +{ + if (do_fast_gather) { + /* Do nothing as ring_data contains nothing. All samples are already in + * accum_data. */ + return; + } + + if (first_ring) { + /* Layer opacity is directly accumulated into accum_data data. */ + accum_data.color = ring_data.color; + accum_data.coc = ring_data.coc; + accum_data.coc_sqr = ring_data.coc_sqr; + accum_data.weight = ring_data.weight; + + accum_data.transparency = ring_data.transparency / float(sample_count); + return; + } + + if (ring_data.weight == 0.0) { + return; + } + + float ring_avg_coc = ring_data.coc / ring_data.weight; + float accum_avg_coc = accum_data.coc / accum_data.weight; + + /* Smooth test to set opacity to see if the ring average coc occludes the + * accumulation. Test is reversed to be multiplied against opacity. */ + float ring_occlu = saturate(accum_avg_coc - ring_avg_coc); + /* The bias here is arbitrary. Seems to avoid weird looking foreground in most + * cases. We might need to make it a parameter or find a relative bias. */ + float accum_occlu = saturate((ring_avg_coc - accum_avg_coc) * 0.1 - 1.0); + + if (is_resolve) { + ring_occlu = accum_occlu = 0.0; + } + + if (no_gather_occlusion) { + ring_occlu = 0.0; + accum_occlu = 0.0; + } + + /* (Slide 40) */ + float ring_opacity = saturate(1.0 - ring_data.transparency / float(sample_count)); + float accum_opacity = 1.0 - accum_data.transparency; + + if (reversed_occlusion) { + /* Accum_data occludes the ring. */ + float alpha = (accum_data.weight == 0.0) ? 0.0 : accum_opacity * accum_occlu; + float one_minus_alpha = 1.0 - alpha; + + accum_data.color += ring_data.color * one_minus_alpha; + accum_data.coc += ring_data.coc * one_minus_alpha; + accum_data.coc_sqr += ring_data.coc_sqr * one_minus_alpha; + accum_data.weight += ring_data.weight * one_minus_alpha; + + accum_data.transparency *= 1.0 - ring_opacity; + } + else { + /* Ring occludes the accum_data (Same as reference). */ + float alpha = (accum_data.weight == 0.0) ? 1.0 : (ring_opacity * ring_occlu); + float one_minus_alpha = 1.0 - alpha; + + accum_data.color = accum_data.color * one_minus_alpha + ring_data.color; + accum_data.coc = accum_data.coc * one_minus_alpha + ring_data.coc; + accum_data.coc_sqr = accum_data.coc_sqr * one_minus_alpha + ring_data.coc_sqr; + accum_data.weight = accum_data.weight * one_minus_alpha + ring_data.weight; + } +} + +/* FIXME(fclem) Seems to be wrong since it needs ringcount+1 as input for + * slightfocus gather. */ +/* This should be replaced by web_sample_count_get() but doing so is breaking other things. */ +int dof_gather_total_sample_count(const int ring_count, const int ring_density) +{ + return (ring_count * ring_count - ring_count) * ring_density + 1; +} + +void dof_gather_accumulate_center_sample(DofGatherData center_data, + float bordering_radius, + int i_radius, + const bool do_fast_gather, + const bool is_foreground, + const bool is_resolve, + inout DofGatherData accum_data) +{ + float layer_weight = dof_layer_weight(center_data.coc, is_foreground); + float sample_weight = dof_sample_weight(center_data.coc); + float weight = layer_weight * sample_weight; + float accum_weight = dof_gather_accum_weight(center_data.coc, bordering_radius, false); + + if (do_fast_gather) { + /* Hope for the compiler to optimize the above. */ + layer_weight = 1.0; + sample_weight = 1.0; + accum_weight = 1.0; + weight = 1.0; + } + + center_data.transparency = 1.0 - weight; + + dof_gather_accumulate_sample(center_data, weight * accum_weight, accum_data); + + if (!do_fast_gather) { + if (is_resolve) { + /* NOTE(fclem): Hack to smooth transition to full in-focus opacity. */ + int total_sample_count = dof_gather_total_sample_count(i_radius + 1, + DOF_SLIGHT_FOCUS_DENSITY); + float fac = saturate(1.0 - abs(center_data.coc) / float(dof_layer_threshold)); + accum_data.layer_opacity += float(total_sample_count) * fac * fac; + } + accum_data.layer_opacity += layer_weight; + + /* Logic of dof_gather_accumulate_sample(). */ + weight *= (1.0 - accum_weight); + center_data.coc_sqr = center_data.coc * (center_data.coc * weight); + center_data.color *= weight; + center_data.coc *= weight; + center_data.weight = weight; + + if (is_foreground && !is_resolve) { + /* Reduce issue with closer foreground over distant foreground. */ + float ring_area = sqr(bordering_radius); + dof_gather_ammend_weight(center_data, ring_area); + } + + /* Accumulate center as its own ring. */ + dof_gather_accumulate_sample_ring( + center_data, 1, false, do_fast_gather, is_foreground, accum_data); + } +} + +int dof_gather_total_sample_count_with_density_change(const int ring_count, + const int ring_density, + int density_change) +{ + int sample_count_per_density_change = dof_gather_total_sample_count(ring_count, ring_density) - + dof_gather_total_sample_count( + ring_count - gather_density_change_ring, ring_density); + + return dof_gather_total_sample_count(ring_count, ring_density) + + sample_count_per_density_change * density_change; +} + +void dof_gather_accumulate_resolve(int total_sample_count, + DofGatherData accum_data, + out vec4 out_col, + out float out_weight, + out vec2 out_occlusion) +{ + float weight_inv = safe_rcp(accum_data.weight); + out_col = accum_data.color * weight_inv; + out_occlusion = vec2(abs(accum_data.coc), accum_data.coc_sqr) * weight_inv; + + if (is_foreground) { + out_weight = 1.0 - accum_data.transparency; + } + else if (accum_data.weight > 0.0) { + out_weight = accum_data.layer_opacity / float(total_sample_count); + } + else { + out_weight = 0.0; + } + /* Gathering may not accumulate to 1.0 alpha because of float precision. */ + if (out_weight > 0.99) { + out_weight = 1.0; + } + else if (out_weight < 0.01) { + out_weight = 0.0; + } + /* Same thing for alpha channel. */ + if (out_col.a > 0.99) { + out_col.a = 1.0; + } + else if (out_col.a < 0.01) { + out_col.a = 0.0; + } +} + +float dof_load_gather_coc(sampler2D gather_input_coc_tx, vec2 uv, float lod) +{ + float coc = textureLod(gather_input_coc_tx, uv, lod).r; + /* We gather at halfres. CoC must be divided by 2 to be compared against radii. */ + return coc * 0.5; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Common Gather accumulator. + * \{ */ + +/* Radii needs to be halfres CoC sizes. */ +bool dof_do_density_change(float base_radius, float min_intersectable_radius) +{ + /* Reduce artifact for very large blur. */ + min_intersectable_radius *= 0.1; + + bool need_new_density = (base_radius * unit_ring_radius > min_intersectable_radius); + bool larger_than_min_density = (base_radius * radius_downscale_factor > + float(gather_ring_count)); + + return need_new_density && larger_than_min_density; +} + +void dof_gather_init(float base_radius, + vec2 noise, + out vec2 center_co, + out float lod, + out float intersection_multiplier) +{ + /* Jitter center half a ring to reduce undersampling. */ + vec2 jitter_ofs = 0.499 * sample_disk(noise); + if (DOF_BOKEH_TEXTURE) { + jitter_ofs *= dof_buf.bokeh_anisotropic_scale; + } + vec2 frag_coord = vec2(gl_GlobalInvocationID.xy) + 0.5; + center_co = frag_coord + jitter_ofs * base_radius * unit_sample_radius; + + /* TODO(fclem) Seems like the default lod selection is too big. Bias to avoid blocky moving out + * of focus shapes. */ + const float lod_bias = -2.0; + lod = max(floor(log2(base_radius * unit_sample_radius) + 0.5) + lod_bias, 0.0); + + if (no_gather_mipmaps) { + lod = 0.0; + } + /* (Slide 64). */ + intersection_multiplier = pow(0.5, lod); +} + +void dof_gather_accumulator(sampler2D color_tx, + sampler2D color_bilinear_tx, + sampler2D coc_tx, + sampler2D bkh_lut_tx, /* Renamed because of ugly macro. */ + float base_radius, + float min_intersectable_radius, + const bool do_fast_gather, + const bool do_density_change, + out vec4 out_color, + out float out_weight, + out vec2 out_occlusion) +{ + vec2 frag_coord = vec2(gl_GlobalInvocationID.xy); + vec2 noise_offset = sampling_rng_2D_get(SAMPLING_LENS_U); + vec2 noise = no_gather_random ? vec2(0.0, 0.0) : + vec2(interlieved_gradient_noise(frag_coord, 0, noise_offset.x), + interlieved_gradient_noise(frag_coord, 1, noise_offset.y)); + + if (!do_fast_gather) { + /* Jitter the radius to reduce noticeable density changes. */ + base_radius += noise.x * unit_ring_radius * base_radius; + } + else { + /* Jittering the radius more than we need means we are going to feather the bokeh shape half a + * ring. So we need to compensate for fast gather that does not check CoC intersection. */ + base_radius += (0.5 - noise.x) * 1.5 * unit_ring_radius * base_radius; + } + /* TODO(fclem) another seed? For now Cranly-Partterson rotation with golden ratio. */ + noise.x = fract(noise.x * 6.1803398875); + + float lod, isect_mul; + vec2 center_co; + dof_gather_init(base_radius, noise, center_co, lod, isect_mul); + + bool first_ring = true; + + DofGatherData accum_data = GATHER_DATA_INIT; + + int density_change = 0; + for (int ring = gather_ring_count; ring > 0; ring--) { + int sample_pair_count = gather_ring_density * ring; + + float step_rot = M_PI / float(sample_pair_count); + mat2 step_rot_mat = rot2_from_angle(step_rot); + + float angle_offset = noise.y * step_rot; + vec2 offset = vec2(cos(angle_offset), sin(angle_offset)); + + float ring_radius = float(ring) * unit_sample_radius * base_radius; + + /* Slide 38. */ + float bordering_radius = ring_radius + + (0.5 + coc_radius_error) * base_radius * unit_sample_radius; + DofGatherData ring_data = GATHER_DATA_INIT; + for (int sample_pair = 0; sample_pair < sample_pair_count; sample_pair++) { + offset = step_rot_mat * offset; + + DofGatherData pair_data[2]; + for (int i = 0; i < 2; i++) { + vec2 offset_co = ((i == 0) ? offset : -offset); + if (DOF_BOKEH_TEXTURE) { + /* Scaling to 0.25 for speed. Improves texture cache hit. */ + offset_co = texture(bkh_lut_tx, offset_co * 0.25 + 0.5).rg; + offset_co *= (is_foreground) ? -dof_buf.bokeh_anisotropic_scale : + dof_buf.bokeh_anisotropic_scale; + } + vec2 sample_co = center_co + offset_co * ring_radius; + vec2 sample_uv = sample_co * dof_buf.gather_uv_fac; + if (do_fast_gather) { + pair_data[i].color = textureLod(color_bilinear_tx, sample_uv, lod); + } + else { + pair_data[i].color = textureLod(color_tx, sample_uv, lod); + } + pair_data[i].coc = dof_load_gather_coc(coc_tx, sample_uv, lod); + pair_data[i].dist = ring_radius; + } + + dof_gather_accumulate_sample_pair(pair_data, + bordering_radius, + isect_mul, + first_ring, + do_fast_gather, + is_foreground, + ring_data, + accum_data); + } + + if (is_foreground) { + /* Reduce issue with closer foreground over distant foreground. */ + /* TODO(fclem) this seems to not be completely correct as the issue remains. */ + float ring_area = (sqr(float(ring) + 0.5 + coc_radius_error) - + sqr(float(ring) - 0.5 + coc_radius_error)) * + sqr(base_radius * unit_sample_radius); + dof_gather_ammend_weight(ring_data, ring_area); + } + + dof_gather_accumulate_sample_ring( + ring_data, sample_pair_count * 2, first_ring, do_fast_gather, is_foreground, accum_data); + + first_ring = false; + + if (do_density_change && (ring == change_density_at_ring) && + (density_change < gather_max_density_change)) { + if (dof_do_density_change(base_radius, min_intersectable_radius)) { + base_radius *= radius_downscale_factor; + ring += gather_density_change_ring; + /* We need to account for the density change in the weights (slide 62). + * For that multiply old kernel data by its area divided by the new kernel area. */ + const float outer_rings_weight = 1.0 / (radius_downscale_factor * radius_downscale_factor); + /* Samples are already weighted per ring in foreground pass. */ + if (!is_foreground) { + dof_gather_ammend_weight(accum_data, outer_rings_weight); + } + /* Re-init kernel position & sampling parameters. */ + dof_gather_init(base_radius, noise, center_co, lod, isect_mul); + density_change++; + } + } + } + + { + /* Center sample. */ + vec2 sample_uv = center_co * dof_buf.gather_uv_fac; + DofGatherData center_data; + if (do_fast_gather) { + center_data.color = textureLod(color_bilinear_tx, sample_uv, lod); + } + else { + center_data.color = textureLod(color_tx, sample_uv, lod); + } + center_data.coc = dof_load_gather_coc(coc_tx, sample_uv, lod); + center_data.dist = 0.0; + + /* Slide 38. */ + float bordering_radius = (0.5 + coc_radius_error) * base_radius * unit_sample_radius; + + dof_gather_accumulate_center_sample( + center_data, bordering_radius, 0, do_fast_gather, is_foreground, false, accum_data); + } + + int total_sample_count = dof_gather_total_sample_count_with_density_change( + gather_ring_count, gather_ring_density, density_change); + dof_gather_accumulate_resolve( + total_sample_count, accum_data, out_color, out_weight, out_occlusion); + + if (debug_gather_perf && density_change > 0) { + float fac = saturate(float(density_change) / float(10.0)); + out_color.rgb = avg(out_color.rgb) * neon_gradient(fac); + } + if (debug_gather_perf && do_fast_gather) { + out_color.rgb = avg(out_color.rgb) * vec3(0.0, 1.0, 0.0); + } + if (debug_scatter_perf) { + out_color.rgb = avg(out_color.rgb) * vec3(0.0, 1.0, 0.0); + } + + /* Output premultiplied color so we can use bilinear sampler in resolve pass. */ + out_color *= out_weight; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Slight focus accumulator. + * + * The full pixel neighborhood is gathered. + * \{ */ + +void dof_slight_focus_gather(sampler2D depth_tx, + sampler2D color_tx, + sampler2D bkh_lut_tx, /* Renamed because of ugly macro job. */ + float radius, + out vec4 out_color, + out float out_weight) +{ + vec2 frag_coord = vec2(gl_GlobalInvocationID.xy) + 0.5; + float noise_offset = sampling_rng_1D_get(SAMPLING_LENS_U); + float noise = no_gather_random ? 0.0 : interlieved_gradient_noise(frag_coord, 3, noise_offset); + + DofGatherData fg_accum = GATHER_DATA_INIT; + DofGatherData bg_accum = GATHER_DATA_INIT; + + int i_radius = clamp(int(radius), 0, int(dof_layer_threshold)); + const int resolve_ring_density = DOF_SLIGHT_FOCUS_DENSITY; + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + bool first_ring = true; + + for (int ring = i_radius; ring > 0; ring--) { + DofGatherData fg_ring = GATHER_DATA_INIT; + DofGatherData bg_ring = GATHER_DATA_INIT; + + int ring_distance = ring; + int ring_sample_count = resolve_ring_density * ring_distance; + for (int sample_id = 0; sample_id < ring_sample_count; sample_id++) { + int s = sample_id * (4 / resolve_ring_density) + + int(noise * float((4 - resolve_ring_density) * ring_distance)); + + ivec2 offset = dof_square_ring_sample_offset(ring_distance, s); + float ring_dist = length(vec2(offset)); + + DofGatherData pair_data[2]; + for (int i = 0; i < 2; i++) { + ivec2 sample_offset = ((i == 0) ? offset : -offset); + ivec2 sample_texel = texel + sample_offset; + /* OPTI: could precompute the factor. */ + vec2 sample_uv = (vec2(sample_texel) + 0.5) / vec2(textureSize(depth_tx, 0)); + float depth = textureLod(depth_tx, sample_uv, 0.0).r; + pair_data[i].coc = dof_coc_from_depth(dof_buf, sample_uv, depth); + pair_data[i].color = safe_color(textureLod(color_tx, sample_uv, 0.0)); + pair_data[i].dist = ring_dist; + if (DOF_BOKEH_TEXTURE) { + /* Contains subpixel distance to bokeh shape. */ + sample_offset += dof_max_slight_focus_radius; + pair_data[i].dist = texelFetch(bkh_lut_tx, sample_offset, 0).r; + } + pair_data[i].coc = clamp(pair_data[i].coc, -dof_buf.coc_abs_max, dof_buf.coc_abs_max); + } + + float bordering_radius = ring_dist + 0.5; + const float isect_mul = 1.0; + dof_gather_accumulate_sample_pair( + pair_data, bordering_radius, isect_mul, first_ring, false, false, bg_ring, bg_accum); + + if (DOF_BOKEH_TEXTURE) { + /* Swap distances in order to flip bokeh shape for foreground. */ + float tmp = pair_data[0].dist; + pair_data[0].dist = pair_data[1].dist; + pair_data[1].dist = tmp; + } + dof_gather_accumulate_sample_pair( + pair_data, bordering_radius, isect_mul, first_ring, false, true, fg_ring, fg_accum); + } + + dof_gather_accumulate_sample_ring( + bg_ring, ring_sample_count * 2, first_ring, false, false, bg_accum); + dof_gather_accumulate_sample_ring( + fg_ring, ring_sample_count * 2, first_ring, false, true, fg_accum); + + first_ring = false; + } + + /* Center sample. */ + vec2 sample_uv = frag_coord / vec2(textureSize(depth_tx, 0)); + DofGatherData center_data; + center_data.color = safe_color(textureLod(color_tx, sample_uv, 0.0)); + center_data.coc = dof_coc_from_depth(dof_buf, sample_uv, textureLod(depth_tx, sample_uv, 0.0).r); + center_data.coc = clamp(center_data.coc, -dof_buf.coc_abs_max, dof_buf.coc_abs_max); + center_data.dist = 0.0; + + /* Slide 38. */ + float bordering_radius = 0.5; + + dof_gather_accumulate_center_sample( + center_data, bordering_radius, i_radius, false, true, true, fg_accum); + dof_gather_accumulate_center_sample( + center_data, bordering_radius, i_radius, false, false, true, bg_accum); + + vec4 bg_col, fg_col; + float bg_weight, fg_weight; + vec2 unused_occlusion; + + int total_sample_count = dof_gather_total_sample_count(i_radius + 1, resolve_ring_density); + dof_gather_accumulate_resolve(total_sample_count, bg_accum, bg_col, bg_weight, unused_occlusion); + dof_gather_accumulate_resolve(total_sample_count, fg_accum, fg_col, fg_weight, unused_occlusion); + + /* Fix weighting issues on perfectly focus > slight focus transitionning areas. */ + if (abs(center_data.coc) < 0.5) { + bg_col = center_data.color; + bg_weight = 1.0; + } + + /* Alpha Over */ + float alpha = 1.0 - fg_weight; + out_weight = bg_weight * alpha + fg_weight; + out_color = bg_col * bg_weight * alpha + fg_col * fg_weight; +} + +/** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl new file mode 100644 index 00000000000..26a597b04e8 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl @@ -0,0 +1,55 @@ + +/** + * Bokeh Look Up Table: This outputs a radius multiplier to shape the sampling in gather pass or + * the scatter sprite appearance. This is only used if bokeh shape is either anamorphic or is not + * a perfect circle. + * We correct samples spacing for polygonal bokeh shapes. However, we do not for anamorphic bokeh + * as it is way more complex and expensive to do. + */ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +void main() +{ + vec2 gather_uv = ((vec2(gl_GlobalInvocationID.xy) + 0.5) / float(DOF_BOKEH_LUT_SIZE)); + /* Center uv in range [-1..1]. */ + gather_uv = gather_uv * 2.0 - 1.0; + + vec2 slight_focus_texel = vec2(gl_GlobalInvocationID.xy) - float(dof_max_slight_focus_radius); + + float radius = length(gather_uv); + + if (dof_buf.bokeh_blades > 0.0) { + /* NOTE: atan(y,x) has output range [-M_PI..M_PI], so add 2pi to avoid negative angles. */ + float theta = atan(gather_uv.y, gather_uv.x) + M_2PI; + float r = length(gather_uv); + + radius /= circle_to_polygon_radius(dof_buf.bokeh_blades, theta - dof_buf.bokeh_rotation); + + float theta_new = circle_to_polygon_angle(dof_buf.bokeh_blades, theta); + float r_new = circle_to_polygon_radius(dof_buf.bokeh_blades, theta_new); + + theta_new -= dof_buf.bokeh_rotation; + + gather_uv = r_new * vec2(-cos(theta_new), sin(theta_new)); + + { + /* Slight focus distance */ + slight_focus_texel *= dof_buf.bokeh_anisotropic_scale_inv; + float theta = atan(slight_focus_texel.y, -slight_focus_texel.x) + M_2PI; + slight_focus_texel /= circle_to_polygon_radius(dof_buf.bokeh_blades, + theta + dof_buf.bokeh_rotation); + } + } + else { + gather_uv *= safe_rcp(length(gather_uv)); + } + + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + /* For gather store the normalized UV. */ + imageStore(out_gather_lut_img, texel, gather_uv.xyxy); + /* For scatter store distance. LUT will be scaled by COC. */ + imageStore(out_scatter_lut_img, texel, vec4(radius)); + /* For slight focus gather store pixel perfect distance. */ + imageStore(out_resolve_lut_img, texel, vec4(length(slight_focus_texel))); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl new file mode 100644 index 00000000000..3d45f285da9 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl @@ -0,0 +1,32 @@ + +/** + * Downsample pass: CoC aware downsample to quarter resolution. + * + * Pretty much identical to the setup pass but get CoC from buffer. + * Also does not weight luma for the bilateral weights. + */ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +void main() +{ + vec2 halfres_texel_size = 1.0 / vec2(textureSize(color_tx, 0).xy); + /* Center uv around the 4 halfres pixels. */ + vec2 quad_center = vec2(gl_GlobalInvocationID * 2 + 1) * halfres_texel_size; + + vec4 colors[4]; + vec4 cocs; + for (int i = 0; i < 4; i++) { + vec2 sample_uv = quad_center + quad_offsets[i] * halfres_texel_size; + colors[i] = textureLod(color_tx, sample_uv, 0.0); + cocs[i] = textureLod(coc_tx, sample_uv, 0.0).r; + } + + vec4 weights = dof_bilateral_coc_weights(cocs); + /* Normalize so that the sum is 1. */ + weights *= safe_rcp(sum(weights)); + + vec4 out_color = weighted_sum_array(colors, weights); + + imageStore(out_color_img, ivec2(gl_GlobalInvocationID.xy), out_color); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl new file mode 100644 index 00000000000..88ecaab6a00 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_filter_comp.glsl @@ -0,0 +1,157 @@ + +/** + * Gather Filter pass: Filter the gather pass result to reduce noise. + * + * This is a simple 3x3 median filter to avoid dilating highlights with a 3x3 max filter even if + * cheaper. + */ + +struct FilterSample { + vec4 color; + float weight; +}; + +/* -------------------------------------------------------------------- */ +/** \name Pixel cache. + * \{ */ + +shared vec4 color_cache[10][10]; +shared float weight_cache[10][10]; + +void cache_init() +{ + /** + * Load enough values into LDS to perform the filter. + * + * ┌──────────────────────────────┐ + * │ │ < Border texels that needs to be loaded. + * │ x x x x x x x x │ ─┐ + * │ x x x x x x x x │ │ + * │ x x x x x x x x │ │ + * │ x x x x x x x x │ │ Thread Group Size 8x8. + * │ L L L L L x x x x │ │ + * │ L L L L L x x x x │ │ + * │ L L L L L x x x x │ │ + * │ L L L L L x x x x │ ─┘ + * │ L L L L L │ < Border texels that needs to be loaded. + * └──────────────────────────────┘ + * └───────────┘ + * Load using 5x5 threads. + */ + + ivec2 texel = ivec2(gl_GlobalInvocationID.xy) - 1; + for (int y = 0; y < 2; y++) { + for (int x = 0; x < 2; x++) { + if (all(lessThan(gl_LocalInvocationID.xy, uvec2(5)))) { + ivec2 cache_texel = ivec2(gl_LocalInvocationID.xy) + ivec2(x, y) * 5; + ivec2 load_texel = clamp(texel + ivec2(x, y) * 5, ivec2(0), textureSize(color_tx, 0) - 1); + + color_cache[cache_texel.y][cache_texel.x] = texelFetch(color_tx, load_texel, 0); + weight_cache[cache_texel.y][cache_texel.x] = texelFetch(weight_tx, load_texel, 0).r; + } + } + } + barrier(); +} + +FilterSample cache_sample(int x, int y) +{ + return FilterSample(color_cache[y][x], weight_cache[y][x]); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Median filter + * From: + * Implementing Median Filters in XC4000E FPGAs + * JOHN L. SMITH, Univision Technologies Inc., Billerica, MA + * http://users.utcluj.ro/~baruch/resources/Image/xl23_16.pdf + * Figure 1 + * \{ */ + +FilterSample filter_min(FilterSample a, FilterSample b) +{ + return FilterSample(min(a.color, b.color), min(a.weight, b.weight)); +} + +FilterSample filter_max(FilterSample a, FilterSample b) +{ + return FilterSample(max(a.color, b.color), max(a.weight, b.weight)); +} + +FilterSample filter_min(FilterSample a, FilterSample b, FilterSample c) +{ + return FilterSample(min(a.color, min(c.color, b.color)), min(a.weight, min(c.weight, b.weight))); +} + +FilterSample filter_max(FilterSample a, FilterSample b, FilterSample c) +{ + return FilterSample(max(a.color, max(c.color, b.color)), max(a.weight, max(c.weight, b.weight))); +} + +FilterSample filter_median(FilterSample s1, FilterSample s2, FilterSample s3) +{ + /* From diagram, with nodes numbered from top to bottom. */ + FilterSample l1 = filter_min(s2, s3); + FilterSample h1 = filter_max(s2, s3); + FilterSample h2 = filter_max(s1, l1); + FilterSample l3 = filter_min(h2, h1); + return l3; +} + +struct FilterLmhResult { + FilterSample low; + FilterSample median; + FilterSample high; +}; + +FilterLmhResult filter_lmh(FilterSample s1, FilterSample s2, FilterSample s3) +{ + /* From diagram, with nodes numbered from top to bottom. */ + FilterSample h1 = filter_max(s2, s3); + FilterSample l1 = filter_min(s2, s3); + + FilterSample h2 = filter_max(s1, l1); + FilterSample l2 = filter_min(s1, l1); + + FilterSample h3 = filter_max(h2, h1); + FilterSample l3 = filter_min(h2, h1); + + FilterLmhResult result; + result.low = l2; + result.median = l3; + result.high = h3; + + return result; +} + +/** \} */ + +void main() +{ + /* OPTI(fclem) Could early return on some tiles. */ + + cache_init(); + + ivec2 texel = ivec2(gl_LocalInvocationID.xy); + + FilterLmhResult rows[3]; + for (int y = 0; y < 3; y++) { + rows[y] = filter_lmh(cache_sample(texel.x + 0, texel.y + y), + cache_sample(texel.x + 1, texel.y + y), + cache_sample(texel.x + 2, texel.y + y)); + } + /* Left nodes. */ + FilterSample high = filter_max(rows[0].low, rows[1].low, rows[2].low); + /* Right nodes. */ + FilterSample low = filter_min(rows[0].high, rows[1].high, rows[2].high); + /* Center nodes. */ + FilterSample median = filter_median(rows[0].median, rows[1].median, rows[2].median); + /* Last bottom nodes. */ + median = filter_median(low, median, high); + + ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy); + imageStore(out_color_img, out_texel, median.color); + imageStore(out_weight_img, out_texel, vec4(median.weight)); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl new file mode 100644 index 00000000000..e9905cd8aaf --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_gather_comp.glsl @@ -0,0 +1,99 @@ + +/** + * Gather pass: Convolve foreground and background parts in separate passes. + * + * Using the min&max CoC tile buffer, we select the best apropriate method to blur the scene color. + * A fast gather path is taken if there is not many CoC variation inside the tile. + * + * We sample using an octaweb sampling pattern. We randomize the kernel center and each ring + * rotation to ensure maximum coverage. + * + * Outputs: + * - Color * Weight, Weight, Occlusion 'CoC' Depth (mean and variance) + **/ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_accumulator_lib.glsl) + +void main() +{ + ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy / DOF_TILES_SIZE); + CocTile coc_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, tile_co); + CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile); + + float base_radius, min_radius, min_intersectable_radius; + bool can_early_out; + if (is_foreground) { + base_radius = -coc_tile.fg_min_coc; + min_radius = -coc_tile.fg_max_coc; + min_intersectable_radius = -coc_tile.fg_max_intersectable_coc; + can_early_out = !prediction.do_foreground; + } + else { + base_radius = coc_tile.bg_max_coc; + min_radius = coc_tile.bg_min_coc; + min_intersectable_radius = coc_tile.bg_min_intersectable_coc; + can_early_out = !prediction.do_background; + } + + bool do_fast_gather = dof_do_fast_gather(base_radius, min_radius, is_foreground); + + /* Gather at half resolution. Divide CoC by 2. */ + base_radius *= 0.5; + min_intersectable_radius *= 0.5; + + bool do_density_change = dof_do_density_change(base_radius, min_intersectable_radius); + + vec4 out_color; + float out_weight; + vec2 out_occlusion; + + if (can_early_out) { + out_color = vec4(0.0); + out_weight = 0.0; + out_occlusion = vec2(0.0, 0.0); + } + else if (do_fast_gather) { + dof_gather_accumulator(color_tx, + color_bilinear_tx, + coc_tx, + bokeh_lut_tx, + base_radius, + min_intersectable_radius, + true, + false, + out_color, + out_weight, + out_occlusion); + } + else if (do_density_change) { + dof_gather_accumulator(color_tx, + color_bilinear_tx, + coc_tx, + bokeh_lut_tx, + base_radius, + min_intersectable_radius, + false, + true, + out_color, + out_weight, + out_occlusion); + } + else { + dof_gather_accumulator(color_tx, + color_bilinear_tx, + coc_tx, + bokeh_lut_tx, + base_radius, + min_intersectable_radius, + false, + false, + out_color, + out_weight, + out_occlusion); + } + + ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy); + imageStore(out_color_img, out_texel, out_color); + imageStore(out_weight_img, out_texel, vec4(out_weight)); + imageStore(out_occlusion_img, out_texel, out_occlusion.xyxy); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl new file mode 100644 index 00000000000..2b664520bba --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_hole_fill_comp.glsl @@ -0,0 +1,70 @@ + +/** + * Holefill pass: Gather background parts where foreground is present. + * + * Using the min&max CoC tile buffer, we select the best apropriate method to blur the scene color. + * A fast gather path is taken if there is not many CoC variation inside the tile. + * + * We sample using an octaweb sampling pattern. We randomize the kernel center and each ring + * rotation to ensure maximum coverage. + **/ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_accumulator_lib.glsl) + +void main() +{ + ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy / DOF_TILES_SIZE); + CocTile coc_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, tile_co); + CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile); + + float base_radius = -coc_tile.fg_min_coc; + float min_radius = -coc_tile.fg_max_coc; + float min_intersectable_radius = dof_tile_large_coc; + bool can_early_out = !prediction.do_hole_fill; + + bool do_fast_gather = dof_do_fast_gather(base_radius, min_radius, is_foreground); + + /* Gather at half resolution. Divide CoC by 2. */ + base_radius *= 0.5; + min_intersectable_radius *= 0.5; + + bool do_density_change = dof_do_density_change(base_radius, min_intersectable_radius); + + vec4 out_color = vec4(0.0); + float out_weight = 0.0; + vec2 unused_occlusion = vec2(0.0, 0.0); + + if (can_early_out) { + /* Early out. */ + } + else if (do_fast_gather) { + dof_gather_accumulator(color_tx, + color_bilinear_tx, + coc_tx, + coc_tx, + base_radius, + min_intersectable_radius, + true, + false, + out_color, + out_weight, + unused_occlusion); + } + else { + dof_gather_accumulator(color_tx, + color_bilinear_tx, + coc_tx, + coc_tx, + base_radius, + min_intersectable_radius, + false, + false, + out_color, + out_weight, + unused_occlusion); + } + + ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy); + imageStore(out_color_img, out_texel, out_color); + imageStore(out_weight_img, out_texel, vec4(out_weight)); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl new file mode 100644 index 00000000000..8a5d11ddc56 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl @@ -0,0 +1,346 @@ + +/** + * Depth of Field utils. + **/ + +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + +/* -------------------------------------------------------------------- */ +/** \name Constants. + * \{ */ + +#ifndef DOF_SLIGHT_FOCUS_DENSITY +# define DOF_SLIGHT_FOCUS_DENSITY 2 +#endif + +#ifdef DOF_RESOLVE_PASS +const bool is_resolve = true; +#else +const bool is_resolve = false; +#endif +#ifdef DOF_FOREGROUND_PASS +const bool is_foreground = DOF_FOREGROUND_PASS; +#else +const bool is_foreground = false; +#endif +/* Debug options */ +const bool debug_gather_perf = false; +const bool debug_scatter_perf = false; +const bool debug_resolve_perf = false; + +const bool no_smooth_intersection = false; +const bool no_gather_occlusion = false; +const bool no_gather_mipmaps = false; +const bool no_gather_random = false; +const bool no_gather_filtering = false; +const bool no_scatter_occlusion = false; +const bool no_scatter_pass = false; +const bool no_foreground_pass = false; +const bool no_background_pass = false; +const bool no_slight_focus_pass = false; +const bool no_focus_pass = false; +const bool no_hole_fill_pass = false; + +/* Distribute weights between near/slightfocus/far fields (slide 117). */ +const float dof_layer_threshold = 4.0; +/* Make sure it overlaps. */ +const float dof_layer_offset_fg = 0.5 + 1.0; +/* Extra offset for convolution layers to avoid light leaking from background. */ +const float dof_layer_offset = 0.5 + 0.5; + +const int dof_max_slight_focus_radius = DOF_MAX_SLIGHT_FOCUS_RADIUS; + +const vec2 quad_offsets[4] = vec2[4]( + vec2(-0.5, 0.5), vec2(0.5, 0.5), vec2(0.5, -0.5), vec2(-0.5, -0.5)); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Weighting and downsampling utils. + * \{ */ + +float dof_hdr_color_weight(vec4 color) +{ + /* Very fast "luma" weighting. */ + float luma = (color.g * 2.0) + (color.r + color.b); + /* TODO(fclem) Pass correct exposure. */ + const float exposure = 1.0; + return 1.0 / (luma * exposure + 4.0); +} + +float dof_coc_select(vec4 cocs) +{ + /* Select biggest coc. */ + float selected_coc = cocs.x; + if (abs(cocs.y) > abs(selected_coc)) { + selected_coc = cocs.y; + } + if (abs(cocs.z) > abs(selected_coc)) { + selected_coc = cocs.z; + } + if (abs(cocs.w) > abs(selected_coc)) { + selected_coc = cocs.w; + } + return selected_coc; +} + +/* NOTE: Do not forget to normalize weights afterwards. */ +vec4 dof_bilateral_coc_weights(vec4 cocs) +{ + float chosen_coc = dof_coc_select(cocs); + + const float scale = 4.0; /* TODO(fclem) revisit. */ + /* NOTE: The difference between the cocs should be inside a abs() function, + * but we follow UE4 implementation to improve how dithered transparency looks (see slide 19). */ + return saturate(1.0 - (chosen_coc - cocs) * scale); +} + +/* NOTE: Do not forget to normalize weights afterwards. */ +vec4 dof_bilateral_color_weights(vec4 colors[4]) +{ + vec4 weights; + for (int i = 0; i < 4; i++) { + weights[i] = dof_hdr_color_weight(colors[i]); + } + return weights; +} + +/* Returns signed Circle of confusion radius (in pixel) based on depth buffer value [0..1]. */ +float dof_coc_from_depth(DepthOfFieldData dof_data, vec2 uv, float depth) +{ + if (is_panoramic(dof_data.camera_type)) { + /* Use radial depth. */ + depth = -length(get_view_space_from_depth(uv, depth)); + } + else { + depth = get_view_z_from_depth(depth); + } + return coc_radius_from_camera_depth(dof_data, depth); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Gather & Scatter Weighting + * \{ */ + +float dof_layer_weight(float coc, const bool is_foreground) +{ + /* NOTE: These are fullres pixel CoC value. */ + if (is_resolve) { + return saturate(-abs(coc) + dof_layer_threshold + dof_layer_offset) * + float(is_foreground ? (coc <= 0.5) : (coc > -0.5)); + } + else { + coc *= 2.0; /* Account for half pixel gather. */ + float threshold = dof_layer_threshold - + ((is_foreground) ? dof_layer_offset_fg : dof_layer_offset); + return saturate(((is_foreground) ? -coc : coc) - threshold); + } +} +vec4 dof_layer_weight(vec4 coc) +{ + /* NOTE: Used for scatter pass which already flipped the sign correctly. */ + coc *= 2.0; /* Account for half pixel gather. */ + return saturate(coc - dof_layer_threshold + dof_layer_offset); +} + +/* NOTE: This is halfres CoC radius. */ +float dof_sample_weight(float coc) +{ +#if 1 /* Optimized */ + return min(1.0, 1.0 / sqr(coc)); +#else + /* Full intensity if CoC radius is below the pixel footprint. */ + const float min_coc = 1.0; + coc = max(min_coc, abs(coc)); + return (M_PI * min_coc * min_coc) / (M_PI * coc * coc); +#endif +} +vec4 dof_sample_weight(vec4 coc) +{ +#if 1 /* Optimized */ + return min(vec4(1.0), 1.0 / sqr(coc)); +#else + /* Full intensity if CoC radius is below the pixel footprint. */ + const float min_coc = 1.0; + coc = max(vec4(min_coc), abs(coc)); + return (M_PI * min_coc * min_coc) / (M_PI * coc * coc); +#endif +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Circle of Confusion tiles + * \{ */ + +struct CocTile { + float fg_min_coc; + float fg_max_coc; + float fg_max_intersectable_coc; + float fg_slight_focus_max_coc; + float bg_min_coc; + float bg_max_coc; + float bg_min_intersectable_coc; +}; + +struct CocTilePrediction { + bool do_foreground; + bool do_slight_focus; + bool do_focus; + bool do_background; + bool do_hole_fill; +}; + +/* WATCH: Might have to change depending on the texture format. */ +const float dof_tile_defocus = 0.25; +const float dof_tile_focus = 0.0; +const float dof_tile_mixed = 0.75; +const float dof_tile_large_coc = 1024.0; + +/* Init a CoC tile for reduction algorithms. */ +CocTile dof_coc_tile_init() +{ + CocTile tile; + tile.fg_min_coc = 0.0; + tile.fg_max_coc = -dof_tile_large_coc; + tile.fg_max_intersectable_coc = dof_tile_large_coc; + tile.fg_slight_focus_max_coc = -1.0; + tile.bg_min_coc = dof_tile_large_coc; + tile.bg_max_coc = 0.0; + tile.bg_min_intersectable_coc = dof_tile_large_coc; + return tile; +} + +CocTile dof_coc_tile_unpack(vec4 fg, vec3 bg) +{ + CocTile tile; + tile.fg_min_coc = -fg.x; + tile.fg_max_coc = -fg.y; + tile.fg_max_intersectable_coc = -fg.z; + tile.fg_slight_focus_max_coc = fg.w; + tile.bg_min_coc = bg.x; + tile.bg_max_coc = bg.y; + tile.bg_min_intersectable_coc = bg.z; + return tile; +} + +/* WORKAROUND(fclem): GLSL compilers differs in what qualifiers are requires to pass images as + * parameters. Workaround by using defines. */ +#define dof_coc_tile_load(tiles_fg_img_, tiles_bg_img_, texel_) \ + dof_coc_tile_unpack( \ + imageLoad(tiles_fg_img_, clamp(texel_, ivec2(0), imageSize(tiles_fg_img_) - 1)), \ + imageLoad(tiles_bg_img_, clamp(texel_, ivec2(0), imageSize(tiles_bg_img_) - 1)).xyz) + +void dof_coc_tile_pack(CocTile tile, out vec4 out_fg, out vec3 out_bg) +{ + out_fg.x = -tile.fg_min_coc; + out_fg.y = -tile.fg_max_coc; + out_fg.z = -tile.fg_max_intersectable_coc; + out_fg.w = tile.fg_slight_focus_max_coc; + out_bg.x = tile.bg_min_coc; + out_bg.y = tile.bg_max_coc; + out_bg.z = tile.bg_min_intersectable_coc; +} + +#define dof_coc_tile_store(tiles_fg_img_, tiles_bg_img_, texel_out_, tile_data_) \ + if (true) { \ + vec4 out_fg; \ + vec3 out_bg; \ + dof_coc_tile_pack(tile_data_, out_fg, out_bg); \ + imageStore(tiles_fg_img_, texel_out_, out_fg); \ + imageStore(tiles_bg_img_, texel_out_, out_bg.xyzz); \ + } + +bool dof_do_fast_gather(float max_absolute_coc, float min_absolute_coc, const bool is_foreground) +{ + float min_weight = dof_layer_weight((is_foreground) ? -min_absolute_coc : min_absolute_coc, + is_foreground); + if (min_weight < 1.0) { + return false; + } + /* FIXME(fclem): This is a workaround to fast gather triggering too early. Since we use custom + * opacity mask, the opacity is not given to be 100% even for after normal threshold. */ + if (is_foreground && min_absolute_coc < dof_layer_threshold) { + return false; + } + return (max_absolute_coc - min_absolute_coc) < (DOF_FAST_GATHER_COC_ERROR * max_absolute_coc); +} + +CocTilePrediction dof_coc_tile_prediction_get(CocTile tile) +{ + /* Based on tile value, predict what pass we need to load. */ + CocTilePrediction predict; + + predict.do_foreground = (-tile.fg_min_coc > dof_layer_threshold - dof_layer_offset_fg); + bool fg_fully_opaque = predict.do_foreground && + dof_do_fast_gather(-tile.fg_min_coc, -tile.fg_max_coc, true); + + predict.do_slight_focus = !fg_fully_opaque && (tile.fg_slight_focus_max_coc >= 0.5); + predict.do_focus = !fg_fully_opaque && (tile.fg_slight_focus_max_coc == dof_tile_focus); + + predict.do_background = !predict.do_focus && !fg_fully_opaque && + (tile.bg_max_coc > dof_layer_threshold - dof_layer_offset); + bool bg_fully_opaque = predict.do_background && + dof_do_fast_gather(-tile.bg_max_coc, tile.bg_min_coc, false); + predict.do_hole_fill = !predict.do_focus && !fg_fully_opaque && -tile.fg_min_coc > 0.0; + +#if 0 /* Debug */ + predict.do_foreground = predict.do_background = predict.do_hole_fill = true; +#endif + return predict; +} + +/* Special function to return the correct max value of 2 slight focus coc. */ +float dof_coc_max_slight_focus(float coc1, float coc2) +{ + /* Do not consider values below 0.5 for expansion as they are "encoded". + * See setup pass shader for more infos. */ + if ((coc1 == dof_tile_defocus && coc2 == dof_tile_focus) || + (coc1 == dof_tile_focus && coc2 == dof_tile_defocus)) { + /* Tile where completely out of focus and in focus are both present. + * Consider as very slightly out of focus. */ + return dof_tile_mixed; + } + return max(coc1, coc2); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Gathering + * \{ */ + +/** + * Generate samples in a square pattern with the ring radius. X is the center tile. + * + * Dist1 Dist2 + * 6 5 4 3 2 + * 3 2 1 7 1 + * . X 0 . X 0 + * . . . . . + * . . . . . + * + * Samples are expected to be mirrored to complete the pattern. + **/ +ivec2 dof_square_ring_sample_offset(int ring_distance, int sample_id) +{ + ivec2 offset; + if (sample_id < ring_distance) { + offset.x = ring_distance; + offset.y = sample_id; + } + else if (sample_id < ring_distance * 3) { + offset.x = ring_distance - sample_id + ring_distance; + offset.y = ring_distance; + } + else { + offset.x = -ring_distance; + offset.y = ring_distance - sample_id + 3 * ring_distance; + } + return offset; +} + +/** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl new file mode 100644 index 00000000000..88a577a1c3c --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl @@ -0,0 +1,246 @@ + +/** + * Reduce copy pass: filter fireflies and split color between scatter and gather input. + * + * NOTE: The texture can end up being too big because of the mipmap padding. We correct for + * that during the convolution phase. + * + * Inputs: + * - Output of setup pass (halfres) and reduce downsample pass (quarter res). + * Outputs: + * - Halfres padded to avoid mipmap mis-alignment (so possibly not matching input size). + * - Gather input color (whole mip chain), Scatter rect list, Signed CoC (whole mip chain). + **/ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +/* NOTE: Do not compare alpha as it is not scattered by the scatter pass. */ +float dof_scatter_neighborhood_rejection(vec3 color) +{ + color = min(vec3(dof_buf.scatter_neighbor_max_color), color); + + float validity = 0.0; + + /* Centered in the middle of 4 quarter res texel. */ + vec2 texel_size = 1.0 / vec2(textureSize(downsample_tx, 0).xy); + vec2 uv = ((vec2(gl_GlobalInvocationID.xy) + 0.5) * 0.5) * texel_size; + + vec3 max_diff = vec3(0.0); + for (int i = 0; i < 4; i++) { + vec2 sample_uv = uv + quad_offsets[i] * texel_size; + vec3 ref = textureLod(downsample_tx, sample_uv, 0.0).rgb; + + ref = min(vec3(dof_buf.scatter_neighbor_max_color), ref); + float diff = max_v3(max(vec3(0.0), abs(ref - color))); + + const float rejection_threshold = 0.7; + diff = saturate(diff / rejection_threshold - 1.0); + validity = max(validity, diff); + } + + return validity; +} + +/* This avoids Bokeh sprite popping in and out at the screen border and + * drawing Bokeh sprites larger than the screen. */ +float dof_scatter_screen_border_rejection(float coc, ivec2 texel) +{ + vec2 screen_size = vec2(imageSize(inout_color_lod0_img)); + vec2 uv = (vec2(texel) + 0.5) / screen_size; + vec2 screen_pos = uv * screen_size; + float min_screen_border_distance = min_v2(min(screen_pos, screen_size - screen_pos)); + /* Fullres to halfres CoC. */ + coc *= 0.5; + /* Allow 10px transition. */ + const float rejection_hardeness = 1.0 / 10.0; + return saturate((min_screen_border_distance - abs(coc)) * rejection_hardeness + 1.0); +} + +float dof_scatter_luminosity_rejection(vec3 color) +{ + const float rejection_hardness = 1.0; + return saturate(max_v3(color - dof_buf.scatter_color_threshold) * rejection_hardness); +} + +float dof_scatter_coc_radius_rejection(float coc) +{ + const float rejection_hardness = 0.3; + return saturate((abs(coc) - dof_buf.scatter_coc_threshold) * rejection_hardness); +} + +float fast_luma(vec3 color) +{ + return (2.0 * color.g) + color.r + color.b; +} + +shared vec4 color_cache[8][8]; +shared float coc_cache[8][8]; +shared float do_scatter[8][8]; + +void main() +{ + ivec2 texel = min(ivec2(gl_GlobalInvocationID.xy), imageSize(inout_color_lod0_img) - 1); + uvec2 texel_local = gl_LocalInvocationID.xy; + /* Increase readablility. */ +#define LOCAL_INDEX texel_local.y][texel_local.x +#define LOCAL_OFFSET(x_, y_) texel_local.y + y_][texel_local.x + x_ + + /* Load level 0 into cache. */ + color_cache[LOCAL_INDEX] = imageLoad(inout_color_lod0_img, texel); + coc_cache[LOCAL_INDEX] = imageLoad(in_coc_lod0_img, texel).r; + + /* Only scatter if luminous enough. */ + do_scatter[LOCAL_INDEX] = dof_scatter_luminosity_rejection(color_cache[LOCAL_INDEX].rgb); + /* Only scatter if CoC is big enough. */ + do_scatter[LOCAL_INDEX] *= dof_scatter_coc_radius_rejection(coc_cache[LOCAL_INDEX]); + /* Only scatter if CoC is not too big to avoid performance issues. */ + do_scatter[LOCAL_INDEX] *= dof_scatter_screen_border_rejection(coc_cache[LOCAL_INDEX], texel); + /* Only scatter if neighborhood is different enough. */ + do_scatter[LOCAL_INDEX] *= dof_scatter_neighborhood_rejection(color_cache[LOCAL_INDEX].rgb); + /* For debuging. */ + if (no_scatter_pass) { + do_scatter[LOCAL_INDEX] = 0.0; + } + + barrier(); + + /* Add a scatter sprite for each 2x2 pixel neighborhood passing the threshold. */ + if (all(equal(texel_local & 1u, uvec2(0)))) { + vec4 do_scatter4; + /* Follows quad_offsets order. */ + do_scatter4.x = do_scatter[LOCAL_OFFSET(0, 1)]; + do_scatter4.y = do_scatter[LOCAL_OFFSET(1, 1)]; + do_scatter4.z = do_scatter[LOCAL_OFFSET(1, 0)]; + do_scatter4.w = do_scatter[LOCAL_OFFSET(0, 0)]; + if (any(greaterThan(do_scatter4, vec4(0.0)))) { + /* Apply energy conservation to anamorphic scattered bokeh. */ + do_scatter4 *= max_v2(dof_buf.bokeh_anisotropic_scale_inv); + + /* Circle of Confusion. */ + vec4 coc4; + coc4.x = coc_cache[LOCAL_OFFSET(0, 1)]; + coc4.y = coc_cache[LOCAL_OFFSET(1, 1)]; + coc4.z = coc_cache[LOCAL_OFFSET(1, 0)]; + coc4.w = coc_cache[LOCAL_OFFSET(0, 0)]; + /* We are scattering at half resolution, so divide CoC by 2. */ + coc4 *= 0.5; + /* Sprite center position. Center sprite around the 4 texture taps. */ + vec2 offset = vec2(gl_GlobalInvocationID.xy) + 1; + /* Add 2.5 to max_coc because the max_coc may not be centered on the sprite origin + * and because we smooth the bokeh shape a bit in the pixel shader. */ + vec2 half_extent = max_v4(abs(coc4)) * dof_buf.bokeh_anisotropic_scale + 2.5; + /* Issue a sprite for each field if any CoC matches. */ + if (any(lessThan(do_scatter4 * sign(coc4), vec4(0.0)))) { + /* Same value for all threads. Not an issue if we don't sync access to it. */ + scatter_fg_indirect_buf.v_count = 4u; + /* Issue 1 strip instance per sprite. */ + uint rect_id = atomicAdd(scatter_fg_indirect_buf.i_count, 1u); + if (rect_id < dof_buf.scatter_max_rect) { + + vec4 coc4_fg = max(vec4(0.0), -coc4); + vec4 fg_weights = dof_layer_weight(coc4_fg) * dof_sample_weight(coc4_fg) * do_scatter4; + /* Filter NaNs. */ + fg_weights = select(fg_weights, vec4(0.0), equal(coc4_fg, vec4(0.0))); + + ScatterRect rect_fg; + rect_fg.offset = offset; + /* Negate extent to flip the sprite. Mimics optical phenomenon. */ + rect_fg.half_extent = -half_extent; + /* NOTE: Since we fliped the quad along (1,-1) line, we need to also swap the (1,1) and + * (0,0) values so that quad_offsets is in the right order in the vertex shader. */ + + /* Circle of Confusion absolute radius in halfres pixels. */ + rect_fg.color_and_coc[0].a = coc4_fg[0]; + rect_fg.color_and_coc[1].a = coc4_fg[3]; + rect_fg.color_and_coc[2].a = coc4_fg[2]; + rect_fg.color_and_coc[3].a = coc4_fg[1]; + /* Apply weights. */ + rect_fg.color_and_coc[0].rgb = color_cache[LOCAL_OFFSET(0, 1)].rgb * fg_weights[0]; + rect_fg.color_and_coc[1].rgb = color_cache[LOCAL_OFFSET(0, 0)].rgb * fg_weights[3]; + rect_fg.color_and_coc[2].rgb = color_cache[LOCAL_OFFSET(1, 0)].rgb * fg_weights[2]; + rect_fg.color_and_coc[3].rgb = color_cache[LOCAL_OFFSET(1, 1)].rgb * fg_weights[1]; + + scatter_fg_list_buf[rect_id] = rect_fg; + } + } + if (any(greaterThan(do_scatter4 * sign(coc4), vec4(0.0)))) { + /* Same value for all threads. Not an issue if we don't sync access to it. */ + scatter_bg_indirect_buf.v_count = 4u; + /* Issue 1 strip instance per sprite. */ + uint rect_id = atomicAdd(scatter_bg_indirect_buf.i_count, 1u); + if (rect_id < dof_buf.scatter_max_rect) { + vec4 coc4_bg = max(vec4(0.0), coc4); + vec4 bg_weights = dof_layer_weight(coc4_bg) * dof_sample_weight(coc4_bg) * do_scatter4; + /* Filter NaNs. */ + bg_weights = select(bg_weights, vec4(0.0), equal(coc4_bg, vec4(0.0))); + + ScatterRect rect_bg; + rect_bg.offset = offset; + rect_bg.half_extent = half_extent; + + /* Circle of Confusion absolute radius in halfres pixels. */ + rect_bg.color_and_coc[0].a = coc4_bg[0]; + rect_bg.color_and_coc[1].a = coc4_bg[1]; + rect_bg.color_and_coc[2].a = coc4_bg[2]; + rect_bg.color_and_coc[3].a = coc4_bg[3]; + /* Apply weights. */ + rect_bg.color_and_coc[0].rgb = color_cache[LOCAL_OFFSET(0, 1)].rgb * bg_weights[0]; + rect_bg.color_and_coc[1].rgb = color_cache[LOCAL_OFFSET(1, 1)].rgb * bg_weights[1]; + rect_bg.color_and_coc[2].rgb = color_cache[LOCAL_OFFSET(1, 0)].rgb * bg_weights[2]; + rect_bg.color_and_coc[3].rgb = color_cache[LOCAL_OFFSET(0, 0)].rgb * bg_weights[3]; + + scatter_bg_list_buf[rect_id] = rect_bg; + } + } + } + } + + /* Remove scatter color from gather. */ + color_cache[LOCAL_INDEX].rgb *= 1.0 - do_scatter[LOCAL_INDEX]; + imageStore(inout_color_lod0_img, texel, color_cache[LOCAL_INDEX]); + + /* Recursive downsample. */ + for (uint i = 1u; i < DOF_MIP_MAX; i++) { + barrier(); + if (all(lessThan(gl_LocalInvocationID.xy, uvec2(1u << (DOF_MIP_MAX - 1u - i))))) { + uvec2 texel_local = gl_LocalInvocationID.xy << i; + + /* TODO(fclem): Could use wave shuffle intrinsics to avoid LDS as suggested by the paper. */ + vec4 coc4; + coc4.x = coc_cache[LOCAL_OFFSET(0, 1)]; + coc4.y = coc_cache[LOCAL_OFFSET(1, 1)]; + coc4.z = coc_cache[LOCAL_OFFSET(1, 0)]; + coc4.w = coc_cache[LOCAL_OFFSET(0, 0)]; + + vec4 colors[4]; + colors[0] = color_cache[LOCAL_OFFSET(0, 1)]; + colors[1] = color_cache[LOCAL_OFFSET(1, 1)]; + colors[2] = color_cache[LOCAL_OFFSET(1, 0)]; + colors[3] = color_cache[LOCAL_OFFSET(0, 0)]; + + vec4 weights = dof_bilateral_coc_weights(coc4); + weights *= dof_bilateral_color_weights(colors); + /* Normalize so that the sum is 1. */ + weights *= safe_rcp(sum(weights)); + + color_cache[LOCAL_INDEX] = weighted_sum_array(colors, weights); + coc_cache[LOCAL_INDEX] = dot(coc4, weights); + + ivec2 texel = ivec2(gl_WorkGroupID.xy * (gl_WorkGroupSize.xy >> i) + + gl_LocalInvocationID.xy); + + if (i == 1) { + imageStore(out_color_lod1_img, texel, color_cache[LOCAL_INDEX]); + imageStore(out_coc_lod1_img, texel, vec4(coc_cache[LOCAL_INDEX])); + } + else if (i == 2) { + imageStore(out_color_lod2_img, texel, color_cache[LOCAL_INDEX]); + imageStore(out_coc_lod2_img, texel, vec4(coc_cache[LOCAL_INDEX])); + } + else /* if (i == 3) */ { + imageStore(out_color_lod3_img, texel, color_cache[LOCAL_INDEX]); + imageStore(out_coc_lod3_img, texel, vec4(coc_cache[LOCAL_INDEX])); + } + } + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl new file mode 100644 index 00000000000..3bb89bd7e0f --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl @@ -0,0 +1,87 @@ + +/** + * Recombine Pass: Load separate convolution layer and composite with self + * slight defocus convolution and in-focus fields. + * + * The halfres gather methods are fast but lack precision for small CoC areas. + * To fix this we do a bruteforce gather to have a smooth transition between + * in-focus and defocus regions. + */ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_accumulator_lib.glsl) + +void main() +{ + vec2 frag_coord = vec2(gl_GlobalInvocationID.xy) + 0.5; + ivec2 tile_co = ivec2(frag_coord / float(DOF_TILES_SIZE * 2)); + CocTile coc_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, tile_co); + CocTilePrediction prediction = dof_coc_tile_prediction_get(coc_tile); + + vec4 out_color = vec4(0.0); + float weight = 0.0; + + vec4 layer_color; + float layer_weight; + + vec2 uv = frag_coord / vec2(textureSize(color_tx, 0)); + vec2 uv_halfres = (frag_coord * 0.5) / vec2(textureSize(color_bg_tx, 0)); + + if (!no_hole_fill_pass && prediction.do_hole_fill) { + layer_color = textureLod(color_hole_fill_tx, uv_halfres, 0.0); + layer_weight = textureLod(weight_hole_fill_tx, uv_halfres, 0.0).r; + out_color = layer_color * safe_rcp(layer_weight); + weight = float(layer_weight > 0.0); + } + + if (!no_background_pass && prediction.do_background) { + layer_color = textureLod(color_bg_tx, uv_halfres, 0.0); + layer_weight = textureLod(weight_bg_tx, uv_halfres, 0.0).r; + /* Always prefer background to hole_fill pass. */ + layer_color *= safe_rcp(layer_weight); + layer_weight = float(layer_weight > 0.0); + /* Composite background. */ + out_color = out_color * (1.0 - layer_weight) + layer_color; + weight = weight * (1.0 - layer_weight) + layer_weight; + /* Fill holes with the composited background. */ + out_color *= safe_rcp(weight); + weight = float(weight > 0.0); + } + + if (!no_slight_focus_pass && prediction.do_slight_focus) { + dof_slight_focus_gather(depth_tx, + color_tx, + bokeh_lut_tx, + coc_tile.fg_slight_focus_max_coc, + layer_color, + layer_weight); + /* Composite slight defocus. */ + out_color = out_color * (1.0 - layer_weight) + layer_color; + weight = weight * (1.0 - layer_weight) + layer_weight; + } + + if (!no_focus_pass && prediction.do_focus) { + layer_color = safe_color(textureLod(color_tx, uv, 0.0)); + layer_weight = 1.0; + /* Composite in focus. */ + out_color = out_color * (1.0 - layer_weight) + layer_color; + weight = weight * (1.0 - layer_weight) + layer_weight; + } + + if (!no_foreground_pass && prediction.do_foreground) { + layer_color = textureLod(color_fg_tx, uv_halfres, 0.0); + layer_weight = textureLod(weight_fg_tx, uv_halfres, 0.0).r; + /* Composite foreground. */ + out_color = out_color * (1.0 - layer_weight) + layer_color; + } + + /* Fix float precision issue in alpha compositing. */ + if (out_color.a > 0.99) { + out_color.a = 1.0; + } + + if (debug_resolve_perf && coc_tile.fg_slight_focus_max_coc >= 0.5) { + out_color.rgb *= vec3(1.0, 0.1, 0.1); + } + + imageStore(out_color_img, ivec2(gl_GlobalInvocationID.xy), out_color); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl new file mode 100644 index 00000000000..cfb7fd2568b --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_frag.glsl @@ -0,0 +1,62 @@ + +/** + * Scatter pass: Use sprites to scatter the color of very bright pixel to have higher quality blur. + * + * We only scatter one quad per sprite and one sprite per 4 pixels to reduce vertex shader + * invocations and overdraw. + */ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +#define linearstep(p0, p1, v) (clamp(((v) - (p0)) / abs((p1) - (p0)), 0.0, 1.0)) + +void main() +{ + vec4 coc4 = vec4(interp.color_and_coc1.w, + interp.color_and_coc2.w, + interp.color_and_coc3.w, + interp.color_and_coc4.w); + vec4 shapes; + if (use_bokeh_lut) { + shapes = vec4(texture(bokeh_lut_tx, interp.rect_uv1).r, + texture(bokeh_lut_tx, interp.rect_uv2).r, + texture(bokeh_lut_tx, interp.rect_uv3).r, + texture(bokeh_lut_tx, interp.rect_uv4).r); + } + else { + shapes = vec4(length(interp.rect_uv1), + length(interp.rect_uv2), + length(interp.rect_uv3), + length(interp.rect_uv4)); + } + shapes *= interp.distance_scale; + /* Becomes signed distance field in pixel units. */ + shapes -= coc4; + /* Smooth the edges a bit to fade out the undersampling artifacts. */ + shapes = saturate(1.0 - linearstep(-0.8, 0.8, shapes)); + /* Outside of bokeh shape. Try to avoid overloading ROPs. */ + if (max_v4(shapes) == 0.0) { + discard; + } + + if (!no_scatter_occlusion) { + /* Works because target is the same size as occlusion_tx. */ + vec2 uv = gl_FragCoord.xy / vec2(textureSize(occlusion_tx, 0).xy); + vec2 occlusion_data = texture(occlusion_tx, uv).rg; + /* Fix tilling artifacts. (Slide 90) */ + const float correction_fac = 1.0 - DOF_FAST_GATHER_COC_ERROR; + /* Occlude the sprite with geometry from the same field using a chebychev test (slide 85). */ + float mean = occlusion_data.x; + float variance = occlusion_data.y; + shapes *= variance * safe_rcp(variance + sqr(max(coc4 * correction_fac - mean, 0.0))); + } + + out_color = (interp.color_and_coc1 * shapes[0] + interp.color_and_coc2 * shapes[1] + + interp.color_and_coc3 * shapes[2] + interp.color_and_coc4 * shapes[3]); + /* Do not accumulate alpha. This has already been accumulated by the gather pass. */ + out_color.a = 0.0; + + if (debug_scatter_perf) { + out_color.rgb = avg(out_color.rgb) * vec3(1.0, 0.0, 0.0); + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl new file mode 100644 index 00000000000..d870496a06c --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_scatter_vert.glsl @@ -0,0 +1,45 @@ + +/** + * Scatter pass: Use sprites to scatter the color of very bright pixel to have higher quality blur. + * + * We only scatter one triangle per sprite and one sprite per 4 pixels to reduce vertex shader + * invocations and overdraw. + **/ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +void main() +{ + ScatterRect rect = scatter_list_buf[gl_InstanceID]; + + interp.color_and_coc1 = rect.color_and_coc[0]; + interp.color_and_coc2 = rect.color_and_coc[1]; + interp.color_and_coc3 = rect.color_and_coc[2]; + interp.color_and_coc4 = rect.color_and_coc[3]; + + vec2 uv = vec2(gl_VertexID & 1, gl_VertexID >> 1) * 2.0 - 1.0; + uv = uv * rect.half_extent; + + gl_Position = vec4(uv + rect.offset, 0.0, 1.0); + /* NDC range [-1..1]. */ + gl_Position.xy = (gl_Position.xy / vec2(textureSize(occlusion_tx, 0).xy)) * 2.0 - 1.0; + + if (use_bokeh_lut) { + /* Bias scale to avoid sampling at the texture's border. */ + interp.distance_scale = (float(DOF_BOKEH_LUT_SIZE) / float(DOF_BOKEH_LUT_SIZE - 1)); + vec2 uv_div = 1.0 / (interp.distance_scale * abs(rect.half_extent)); + interp.rect_uv1 = ((uv + quad_offsets[0]) * uv_div) * 0.5 + 0.5; + interp.rect_uv2 = ((uv + quad_offsets[1]) * uv_div) * 0.5 + 0.5; + interp.rect_uv3 = ((uv + quad_offsets[2]) * uv_div) * 0.5 + 0.5; + interp.rect_uv4 = ((uv + quad_offsets[3]) * uv_div) * 0.5 + 0.5; + /* Only for sampling. */ + interp.distance_scale *= max_v2(abs(rect.half_extent)); + } + else { + interp.distance_scale = 1.0; + interp.rect_uv1 = uv + quad_offsets[0]; + interp.rect_uv2 = uv + quad_offsets[1]; + interp.rect_uv3 = uv + quad_offsets[2]; + interp.rect_uv4 = uv + quad_offsets[3]; + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl new file mode 100644 index 00000000000..99fc42f5d98 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_setup_comp.glsl @@ -0,0 +1,68 @@ + +/** + * Setup pass: CoC and luma aware downsample to half resolution of the input scene color buffer. + * + * An addition to the downsample CoC, we output the maximum slight out of focus CoC to be + * sure we don't miss a pixel. + * + * Input: + * Full-resolution color & depth buffer + * Output: + * Half-resolution Color, signed CoC (out_coc.x), and max slight focus abs CoC (out_coc.y). + **/ + +#pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(common_view_lib.glsl) +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +float dof_abs_max_slight_of_focus_coc(vec4 cocs) +{ + /* Clamp to 0.5 if full in defocus to differentiate full focus tiles with coc == 0.0. + * This enables an optimization in the resolve pass. */ + const vec4 threshold = vec4(dof_layer_threshold + dof_layer_offset); + cocs = abs(cocs); + bvec4 defocus = greaterThan(cocs, threshold); + bvec4 focus = lessThanEqual(cocs, vec4(0.5)); + if (any(defocus) && any(focus)) { + /* For the same reason as in the flatten pass. This is a case we cannot optimize for. */ + cocs = mix(cocs, vec4(dof_tile_mixed), focus); + cocs = mix(cocs, vec4(dof_tile_mixed), defocus); + } + else { + cocs = mix(cocs, vec4(dof_tile_focus), focus); + cocs = mix(cocs, vec4(dof_tile_defocus), defocus); + } + return max_v4(cocs); +} + +void main() +{ + vec2 fullres_texel_size = 1.0 / vec2(textureSize(color_tx, 0).xy); + /* Center uv around the 4 fullres pixels. */ + vec2 quad_center = vec2(gl_GlobalInvocationID.xy * 2 + 1) * fullres_texel_size; + + vec4 colors[4]; + vec4 cocs; + for (int i = 0; i < 4; i++) { + vec2 sample_uv = quad_center + quad_offsets[i] * fullres_texel_size; + /* NOTE: We use samplers without filtering. */ + colors[i] = safe_color(textureLod(color_tx, sample_uv, 0.0)); + cocs[i] = dof_coc_from_depth(dof_buf, sample_uv, textureLod(depth_tx, sample_uv, 0.0).r); + } + + cocs = clamp(cocs, -dof_buf.coc_abs_max, dof_buf.coc_abs_max); + + vec4 weights = dof_bilateral_coc_weights(cocs); + weights *= dof_bilateral_color_weights(colors); + /* Normalize so that the sum is 1. */ + weights *= safe_rcp(sum(weights)); + + ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy); + vec4 out_color = weighted_sum_array(colors, weights); + imageStore(out_color_img, out_texel, out_color); + + vec2 out_coc; + out_coc.x = dot(cocs, weights); + out_coc.y = dof_abs_max_slight_of_focus_coc(cocs); + imageStore(out_coc_img, out_texel, out_coc.xyxy); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl new file mode 100644 index 00000000000..ac371f76395 --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl @@ -0,0 +1,64 @@ + +/** + * Temporal Stabilization of the Depth of field input. + * Corresponds to the TAA pass in the paper. + * + * TODO: This pass needs a cleanup / improvement using much better TAA. + * + * Inputs: + * - Output of setup pass (halfres). + * Outputs: + * - Stabilized Color and CoC (halfres). + **/ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +float fast_luma(vec3 color) +{ + return (2.0 * color.g) + color.r + color.b; +} + +/* Lightweight version of neighborhood clamping found in TAA. */ +vec3 dof_neighborhood_clamping(vec3 color) +{ + vec2 texel_size = 1.0 / vec2(textureSize(color_tx, 0)); + vec2 uv = (vec2(gl_GlobalInvocationID.xy) + 0.5) * texel_size; + vec4 ofs = vec4(-1, 1, -1, 1) * texel_size.xxyy; + + /* Luma clamping. 3x3 square neighborhood. */ + float c00 = fast_luma(textureLod(color_tx, uv + ofs.xz, 0.0).rgb); + float c01 = fast_luma(textureLod(color_tx, uv + ofs.xz * vec2(1.0, 0.0), 0.0).rgb); + float c02 = fast_luma(textureLod(color_tx, uv + ofs.xw, 0.0).rgb); + + float c10 = fast_luma(textureLod(color_tx, uv + ofs.xz * vec2(0.0, 1.0), 0.0).rgb); + float c11 = fast_luma(color); + float c12 = fast_luma(textureLod(color_tx, uv + ofs.xw * vec2(0.0, 1.0), 0.0).rgb); + + float c20 = fast_luma(textureLod(color_tx, uv + ofs.yz, 0.0).rgb); + float c21 = fast_luma(textureLod(color_tx, uv + ofs.yz * vec2(1.0, 0.0), 0.0).rgb); + float c22 = fast_luma(textureLod(color_tx, uv + ofs.yw, 0.0).rgb); + + float avg_luma = avg8(c00, c01, c02, c10, c12, c20, c21, c22); + float max_luma = max8(c00, c01, c02, c10, c12, c20, c21, c22); + + float upper_bound = mix(max_luma, avg_luma, dof_buf.denoise_factor); + upper_bound = mix(c11, upper_bound, dof_buf.denoise_factor); + + float clamped_luma = min(upper_bound, c11); + + return color * clamped_luma * safe_rcp(c11); +} + +void main() +{ + vec2 uv = (vec2(gl_GlobalInvocationID.xy) + 0.5) / vec2(textureSize(color_tx, 0).xy); + vec4 out_color = textureLod(color_tx, uv, 0.0); + float out_coc = textureLod(coc_tx, uv, 0.0).r; + + out_color.rgb = dof_neighborhood_clamping(out_color.rgb); + /* TODO(fclem): Stabilize CoC. */ + + ivec2 out_texel = ivec2(gl_GlobalInvocationID.xy); + imageStore(out_color_img, out_texel, out_color); + imageStore(out_coc_img, out_texel, vec4(out_coc)); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl new file mode 100644 index 00000000000..db99e9e4fba --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_dilate_comp.glsl @@ -0,0 +1,108 @@ + +/** + * Tile dilate pass: Takes the 8x8 Tiles buffer and converts dilates the tiles with large CoC to + * their neighborhood. This pass is repeated multiple time until the maximum CoC can be covered. + * + * Input & Output: + * - Separated foreground and background CoC. 1/8th of half-res resolution. So 1/16th of full-res. + **/ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +/* Error introduced by the random offset of the gathering kernel's center. */ +const float bluring_radius_error = 1.0 + 1.0 / (float(DOF_GATHER_RING_COUNT) + 0.5); +const float tile_to_fullres_factor = float(DOF_TILES_SIZE * 2); + +void main() +{ + ivec2 center_tile_pos = ivec2(gl_GlobalInvocationID.xy); + + CocTile ring_buckets[DOF_DILATE_RING_COUNT]; + + for (int ring = 0; ring < ring_count && ring < DOF_DILATE_RING_COUNT; ring++) { + ring_buckets[ring] = dof_coc_tile_init(); + + int ring_distance = ring + 1; + for (int sample_id = 0; sample_id < 4 * ring_distance; sample_id++) { + ivec2 offset = dof_square_ring_sample_offset(ring_distance, sample_id); + + offset *= ring_width_multiplier; + + for (int i = 0; i < 2; i++) { + ivec2 adj_tile_pos = center_tile_pos + ((i == 0) ? offset : -offset); + + CocTile adj_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, adj_tile_pos); + + if (DILATE_MODE_MIN_MAX) { + /* Actually gather the "absolute" biggest coc but keeping the sign. */ + ring_buckets[ring].fg_min_coc = min(ring_buckets[ring].fg_min_coc, adj_tile.fg_min_coc); + ring_buckets[ring].bg_max_coc = max(ring_buckets[ring].bg_max_coc, adj_tile.bg_max_coc); + + if (dilate_slight_focus) { + ring_buckets[ring].fg_slight_focus_max_coc = dof_coc_max_slight_focus( + ring_buckets[ring].fg_slight_focus_max_coc, adj_tile.fg_slight_focus_max_coc); + } + } + else { /* DILATE_MODE_MIN_ABS */ + ring_buckets[ring].fg_max_coc = max(ring_buckets[ring].fg_max_coc, adj_tile.fg_max_coc); + ring_buckets[ring].bg_min_coc = min(ring_buckets[ring].bg_min_coc, adj_tile.bg_min_coc); + + /* Should be tight as possible to reduce gather overhead (see slide 61). */ + float closest_neighbor_distance = length(max(abs(vec2(offset)) - 1.0, 0.0)) * + tile_to_fullres_factor; + + ring_buckets[ring].fg_max_intersectable_coc = max( + ring_buckets[ring].fg_max_intersectable_coc, + adj_tile.fg_max_intersectable_coc + closest_neighbor_distance); + ring_buckets[ring].bg_min_intersectable_coc = min( + ring_buckets[ring].bg_min_intersectable_coc, + adj_tile.bg_min_intersectable_coc + closest_neighbor_distance); + } + } + } + } + + /* Load center tile. */ + CocTile out_tile = dof_coc_tile_load(in_tiles_fg_img, in_tiles_bg_img, center_tile_pos); + + /* Dilate once. */ + if (dilate_slight_focus) { + out_tile.fg_slight_focus_max_coc = dof_coc_max_slight_focus( + out_tile.fg_slight_focus_max_coc, ring_buckets[0].fg_slight_focus_max_coc); + } + + for (int ring = 0; ring < ring_count && ring < DOF_DILATE_RING_COUNT; ring++) { + float ring_distance = float(ring + 1); + + ring_distance = (ring_distance * ring_width_multiplier - 1) * tile_to_fullres_factor; + + if (DILATE_MODE_MIN_MAX) { + /* NOTE(fclem): Unsure if both sides of the inequalities have the same unit. */ + if (-ring_buckets[ring].fg_min_coc * bluring_radius_error > ring_distance) { + out_tile.fg_min_coc = min(out_tile.fg_min_coc, ring_buckets[ring].fg_min_coc); + } + + if (ring_buckets[ring].bg_max_coc * bluring_radius_error > ring_distance) { + out_tile.bg_max_coc = max(out_tile.bg_max_coc, ring_buckets[ring].bg_max_coc); + } + } + else { /* DILATE_MODE_MIN_ABS */ + /* Find minimum absolute CoC radii that will be intersected for the previously + * computed maximum CoC values. */ + if (-out_tile.fg_min_coc * bluring_radius_error > ring_distance) { + out_tile.fg_max_coc = max(out_tile.fg_max_coc, ring_buckets[ring].fg_max_coc); + out_tile.fg_max_intersectable_coc = max(out_tile.fg_max_intersectable_coc, + ring_buckets[ring].fg_max_intersectable_coc); + } + + if (out_tile.bg_max_coc * bluring_radius_error > ring_distance) { + out_tile.bg_min_coc = min(out_tile.bg_min_coc, ring_buckets[ring].bg_min_coc); + out_tile.bg_min_intersectable_coc = min(out_tile.bg_min_intersectable_coc, + ring_buckets[ring].bg_min_intersectable_coc); + } + } + } + + ivec2 texel_out = ivec2(gl_GlobalInvocationID.xy); + dof_coc_tile_store(out_tiles_fg_img, out_tiles_bg_img, texel_out, out_tile); +} diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl new file mode 100644 index 00000000000..2a0787ec69f --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl @@ -0,0 +1,106 @@ + +/** + * Tile flatten pass: Takes the halfres CoC buffer and converts it to 8x8 tiles. + * + * Output min and max values for each tile and for both foreground & background. + * Also outputs min intersectable CoC for the background, which is the minimum CoC + * that comes from the background pixels. + * + * Input: + * - Half-resolution Circle of confusion. Out of setup pass. + * Output: + * - Separated foreground and background CoC. 1/8th of half-res resolution. So 1/16th of full-res. + */ + +#pragma BLENDER_REQUIRE(eevee_depth_of_field_lib.glsl) + +/** + * In order to use atomic operations, we have to use uints. But this means having to deal with the + * negative number ourselves. Luckily, each ground have a nicely defined range of values we can + * remap to positive float. + */ +shared uint fg_min_coc; +shared uint fg_max_coc; +shared uint fg_max_intersectable_coc; +shared uint bg_min_coc; +shared uint bg_max_coc; +shared uint bg_min_intersectable_coc; + +shared uint fg_slight_focus_max_coc; +shared uint fg_slight_focus_flag; + +const uint slight_focus_flag_defocus = 1u; +const uint slight_focus_flag_focus = 2u; +const uint dof_tile_large_coc_uint = floatBitsToUint(dof_tile_large_coc); + +void main() +{ + if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) { + /* NOTE: Min/Max flipped because of inverted fg_coc sign. */ + fg_min_coc = floatBitsToUint(0.0); + fg_max_coc = dof_tile_large_coc_uint; + fg_max_intersectable_coc = dof_tile_large_coc_uint; + bg_min_coc = dof_tile_large_coc_uint; + bg_max_coc = floatBitsToUint(0.0); + bg_min_intersectable_coc = dof_tile_large_coc_uint; + /* Should be -1.0 but we want to avoid the sign bit in float representation. */ + fg_slight_focus_max_coc = floatBitsToUint(0.0); + fg_slight_focus_flag = 0u; + } + barrier(); + + ivec2 sample_texel = min(ivec2(gl_GlobalInvocationID.xy), textureSize(coc_tx, 0).xy - 1); + vec2 sample_data = texelFetch(coc_tx, sample_texel, 0).rg; + + float sample_coc = sample_data.x; + uint fg_coc = floatBitsToUint(max(-sample_coc, 0.0)); + /* NOTE: atomicMin/Max flipped because of inverted fg_coc sign. */ + atomicMax(fg_min_coc, fg_coc); + atomicMin(fg_max_coc, fg_coc); + atomicMin(fg_max_intersectable_coc, (sample_coc < 0.0) ? fg_coc : dof_tile_large_coc_uint); + + uint bg_coc = floatBitsToUint(max(sample_coc, 0.0)); + atomicMin(bg_min_coc, bg_coc); + atomicMax(bg_max_coc, bg_coc); + atomicMin(bg_min_intersectable_coc, (sample_coc > 0.0) ? bg_coc : dof_tile_large_coc_uint); + + /* Mimics logic of dof_coc_max_slight_focus(). */ + float sample_slight_focus_coc = sample_data.y; + if (sample_slight_focus_coc == dof_tile_defocus) { + atomicOr(fg_slight_focus_flag, slight_focus_flag_defocus); + } + else if (sample_slight_focus_coc == dof_tile_focus) { + atomicOr(fg_slight_focus_flag, slight_focus_flag_focus); + } + /* Add 1 in order to compare signed floats in [-1..1] range. */ + atomicMax(fg_slight_focus_max_coc, floatBitsToUint(sample_slight_focus_coc + 1.0)); + + barrier(); + + if (all(equal(gl_LocalInvocationID.xy, uvec2(0)))) { + if (fg_max_intersectable_coc == dof_tile_large_coc_uint) { + fg_max_intersectable_coc = floatBitsToUint(0.0); + } + + CocTile tile; + /* Foreground sign is flipped since we compare unsigned representation. */ + tile.fg_min_coc = -uintBitsToFloat(fg_min_coc); + tile.fg_max_coc = -uintBitsToFloat(fg_max_coc); + tile.fg_max_intersectable_coc = -uintBitsToFloat(fg_max_intersectable_coc); + tile.bg_min_coc = uintBitsToFloat(bg_min_coc); + tile.bg_max_coc = uintBitsToFloat(bg_max_coc); + tile.bg_min_intersectable_coc = uintBitsToFloat(bg_min_intersectable_coc); + + /* Mimics logic of dof_coc_max_slight_focus(). */ + if (fg_slight_focus_flag == (slight_focus_flag_defocus | slight_focus_flag_focus)) { + tile.fg_slight_focus_max_coc = dof_tile_mixed; + } + else { + /* Remove the 1 bias. */ + tile.fg_slight_focus_max_coc = uintBitsToFloat(fg_slight_focus_max_coc) - 1.0; + } + + ivec2 tile_co = ivec2(gl_WorkGroupID.xy); + dof_coc_tile_store(out_tiles_fg_img, out_tiles_bg_img, tile_co, tile); + } +} diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh new file mode 100644 index 00000000000..42a8c78a51d --- /dev/null +++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh @@ -0,0 +1,248 @@ + +#include "eevee_defines.hh" +#include "gpu_shader_create_info.hh" + +/* -------------------------------------------------------------------- */ +/** \name Setup + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_bokeh_lut) + .do_static_compilation(true) + .local_group_size(DOF_BOKEH_LUT_SIZE, DOF_BOKEH_LUT_SIZE) + .additional_info("eevee_shared", "draw_view") + .uniform_buf(1, "DepthOfFieldData", "dof_buf") + .image(0, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_gather_lut_img") + .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_scatter_lut_img") + .image(2, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_resolve_lut_img") + .compute_source("eevee_depth_of_field_bokeh_lut_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_setup) + .do_static_compilation(true) + .local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE) + .additional_info("eevee_shared", "draw_view") + .uniform_buf(1, "DepthOfFieldData", "dof_buf") + .sampler(0, ImageType::FLOAT_2D, "color_tx") + .sampler(1, ImageType::DEPTH_2D, "depth_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img") + .image(1, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_img") + .compute_source("eevee_depth_of_field_setup_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_stabilize) + .do_static_compilation(true) + .local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE) + .additional_info("eevee_shared", "draw_view") + .uniform_buf(1, "DepthOfFieldData", "dof_buf") + .sampler(0, ImageType::DEPTH_2D, "coc_tx") + .sampler(1, ImageType::FLOAT_2D, "color_tx") + // .sampler(2, ImageType::FLOAT_2D, "velocity_tx") /* TODO: TAA with reprojection. */ + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img") + .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_img") + .compute_source("eevee_depth_of_field_stabilize_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_downsample) + .do_static_compilation(true) + .local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE) + .additional_info("eevee_shared", "draw_view") + .sampler(0, ImageType::FLOAT_2D, "color_tx") + .sampler(1, ImageType::FLOAT_2D, "coc_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img") + .compute_source("eevee_depth_of_field_downsample_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_reduce) + .do_static_compilation(true) + .local_group_size(DOF_REDUCE_GROUP_SIZE, DOF_REDUCE_GROUP_SIZE) + .additional_info("eevee_shared", "draw_view") + .uniform_buf(1, "DepthOfFieldData", "dof_buf") + .sampler(0, ImageType::FLOAT_2D, "downsample_tx") + .storage_buf(0, Qualifier::WRITE, "ScatterRect", "scatter_fg_list_buf[]") + .storage_buf(1, Qualifier::WRITE, "ScatterRect", "scatter_bg_list_buf[]") + .storage_buf(2, Qualifier::READ_WRITE, "DrawCommand", "scatter_fg_indirect_buf") + .storage_buf(3, Qualifier::READ_WRITE, "DrawCommand", "scatter_bg_indirect_buf") + .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "inout_color_lod0_img") + .image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_lod1_img") + .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_lod2_img") + .image(3, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_lod3_img") + .image(4, GPU_R16F, Qualifier::READ, ImageType::FLOAT_2D, "in_coc_lod0_img") + .image(5, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_lod1_img") + .image(6, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_lod2_img") + .image(7, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_coc_lod3_img") + .compute_source("eevee_depth_of_field_reduce_comp.glsl"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Circle-Of-Confusion Tiles + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_flatten) + .do_static_compilation(true) + .local_group_size(DOF_TILES_FLATTEN_GROUP_SIZE, DOF_TILES_FLATTEN_GROUP_SIZE) + .additional_info("eevee_shared", "draw_view") + .sampler(0, ImageType::FLOAT_2D, "coc_tx") + .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_fg_img") + .image(3, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_bg_img") + .compute_source("eevee_depth_of_field_tiles_flatten_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_dilate) + .additional_info("eevee_shared", "draw_view", "eevee_depth_of_field_tiles_common") + .local_group_size(DOF_TILES_DILATE_GROUP_SIZE, DOF_TILES_DILATE_GROUP_SIZE) + .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_fg_img") + .image(3, GPU_R11F_G11F_B10F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_bg_img") + .push_constant(Type::INT, "ring_count") + .push_constant(Type::INT, "ring_width_multiplier") + .push_constant(Type::BOOL, "dilate_slight_focus") + .compute_source("eevee_depth_of_field_tiles_dilate_comp.glsl"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_dilate_minabs) + .do_static_compilation(true) + .define("DILATE_MODE_MIN_MAX", "false") + .additional_info("eevee_depth_of_field_tiles_dilate"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_dilate_minmax) + .do_static_compilation(true) + .define("DILATE_MODE_MIN_MAX", "true") + .additional_info("eevee_depth_of_field_tiles_dilate"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_tiles_common) + .image(0, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_fg_img") + .image(1, GPU_R11F_G11F_B10F, Qualifier::READ, ImageType::FLOAT_2D, "in_tiles_bg_img"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Variations + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_no_lut) + .define("DOF_BOKEH_TEXTURE", "false") + /** + * WORKAROUND(@fclem): This is to keep the code as is for now. The bokeh_lut_tx is referenced + * even if not used after optimisation. But we don't want to include it in the create infos. + */ + .define("bokeh_lut_tx", "color_tx"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_lut) + .define("DOF_BOKEH_TEXTURE", "true") + .sampler(5, ImageType::FLOAT_2D, "bokeh_lut_tx"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_background).define("DOF_FOREGROUND_PASS", "false"); +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_foreground).define("DOF_FOREGROUND_PASS", "true"); +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_hq).define("DOF_SLIGHT_FOCUS_DENSITY", "4"); +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_lq).define("DOF_SLIGHT_FOCUS_DENSITY", "2"); + +#define EEVEE_DOF_FINAL_VARIATION(name, ...) \ + GPU_SHADER_CREATE_INFO(name).additional_info(__VA_ARGS__).do_static_compilation(true); + +#define EEVEE_DOF_LUT_VARIATIONS(prefix, ...) \ + EEVEE_DOF_FINAL_VARIATION(prefix##_lut, "eevee_depth_of_field_lut", __VA_ARGS__) \ + EEVEE_DOF_FINAL_VARIATION(prefix, "eevee_depth_of_field_no_lut", __VA_ARGS__) + +#define EEVEE_DOF_GROUND_VARIATIONS(name, ...) \ + EEVEE_DOF_LUT_VARIATIONS(name##_background, "eevee_depth_of_field_background", __VA_ARGS__) \ + EEVEE_DOF_LUT_VARIATIONS(name##_foreground, "eevee_depth_of_field_foreground", __VA_ARGS__) + +#define EEVEE_DOF_HQ_VARIATIONS(name, ...) \ + EEVEE_DOF_LUT_VARIATIONS(name##_hq, "eevee_depth_of_field_hq", __VA_ARGS__) \ + EEVEE_DOF_LUT_VARIATIONS(name##_lq, "eevee_depth_of_field_lq", __VA_ARGS__) + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Gather + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_gather_common) + .additional_info("eevee_shared", + "draw_view", + "eevee_depth_of_field_tiles_common", + "eevee_sampling_data") + .uniform_buf(2, "DepthOfFieldData", "dof_buf") + .local_group_size(DOF_GATHER_GROUP_SIZE, DOF_GATHER_GROUP_SIZE) + .sampler(0, ImageType::FLOAT_2D, "color_tx") + .sampler(1, ImageType::FLOAT_2D, "color_bilinear_tx") + .sampler(2, ImageType::FLOAT_2D, "coc_tx") + .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img") + .image(3, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_weight_img"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_gather) + .image(4, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_occlusion_img") + .compute_source("eevee_depth_of_field_gather_comp.glsl") + .additional_info("eevee_depth_of_field_gather_common"); + +EEVEE_DOF_GROUND_VARIATIONS(eevee_depth_of_field_gather, "eevee_depth_of_field_gather") + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_hole_fill) + .do_static_compilation(true) + .compute_source("eevee_depth_of_field_hole_fill_comp.glsl") + .additional_info("eevee_depth_of_field_gather_common", "eevee_depth_of_field_no_lut"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_filter) + .do_static_compilation(true) + .local_group_size(DOF_FILTER_GROUP_SIZE, DOF_FILTER_GROUP_SIZE) + .additional_info("eevee_shared") + .sampler(0, ImageType::FLOAT_2D, "color_tx") + .sampler(1, ImageType::FLOAT_2D, "weight_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img") + .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_weight_img") + .compute_source("eevee_depth_of_field_filter_comp.glsl"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Scatter + * \{ */ + +GPU_SHADER_INTERFACE_INFO(eevee_depth_of_field_scatter_iface, "interp") + /** Colors, weights, and Circle of confusion radii for the 4 pixels to scatter. */ + .flat(Type::VEC4, "color_and_coc1") + .flat(Type::VEC4, "color_and_coc2") + .flat(Type::VEC4, "color_and_coc3") + .flat(Type::VEC4, "color_and_coc4") + /** Sprite pixel position with origin at sprite center. In pixels. */ + .no_perspective(Type::VEC2, "rect_uv1") + .no_perspective(Type::VEC2, "rect_uv2") + .no_perspective(Type::VEC2, "rect_uv3") + .no_perspective(Type::VEC2, "rect_uv4") + /** Scaling factor for the bokeh distance. */ + .flat(Type::FLOAT, "distance_scale"); + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_scatter) + .do_static_compilation(true) + .additional_info("eevee_shared", "draw_view") + .sampler(0, ImageType::FLOAT_2D, "occlusion_tx") + .sampler(1, ImageType::FLOAT_2D, "bokeh_lut_tx") + .storage_buf(0, Qualifier::READ, "ScatterRect", "scatter_list_buf[]") + .fragment_out(0, Type::VEC4, "out_color") + .push_constant(Type::BOOL, "use_bokeh_lut") + .vertex_out(eevee_depth_of_field_scatter_iface) + .vertex_source("eevee_depth_of_field_scatter_vert.glsl") + .fragment_source("eevee_depth_of_field_scatter_frag.glsl"); + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Resolve + * \{ */ + +GPU_SHADER_CREATE_INFO(eevee_depth_of_field_resolve) + .define("DOF_RESOLVE_PASS", "true") + .local_group_size(DOF_RESOLVE_GROUP_SIZE, DOF_RESOLVE_GROUP_SIZE) + .additional_info("eevee_shared", + "draw_view", + "eevee_depth_of_field_tiles_common", + "eevee_sampling_data") + .uniform_buf(2, "DepthOfFieldData", "dof_buf") + .sampler(0, ImageType::DEPTH_2D, "depth_tx") + .sampler(1, ImageType::FLOAT_2D, "color_tx") + .sampler(2, ImageType::FLOAT_2D, "color_bg_tx") + .sampler(3, ImageType::FLOAT_2D, "color_fg_tx") + .sampler(4, ImageType::FLOAT_2D, "color_hole_fill_tx") + .sampler(7, ImageType::FLOAT_2D, "weight_bg_tx") + .sampler(8, ImageType::FLOAT_2D, "weight_fg_tx") + .sampler(9, ImageType::FLOAT_2D, "weight_hole_fill_tx") + .image(2, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img") + .compute_source("eevee_depth_of_field_resolve_comp.glsl"); + +EEVEE_DOF_HQ_VARIATIONS(eevee_depth_of_field_resolve, "eevee_depth_of_field_resolve") + +/** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh index b01d1521c5e..e32020f2be6 100644 --- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh +++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh @@ -1,4 +1,5 @@ +#include "eevee_defines.hh" #include "gpu_shader_create_info.hh" GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_flatten) diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 2de0f4e470a..4157caf45d7 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -454,6 +454,7 @@ list(APPEND INC ${CMAKE_CURRENT_BINARY_DIR}) set(SRC_SHADER_CREATE_INFOS ../draw/engines/basic/shaders/infos/basic_depth_info.hh + ../draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh ../draw/engines/eevee_next/shaders/infos/eevee_film_info.hh ../draw/engines/eevee_next/shaders/infos/eevee_material_info.hh ../draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh