1
1

Compare commits

...

66 Commits

Author SHA1 Message Date
d371dfbe2c DRW: Change binding API
Using bind() overload is too error prone.
2022-08-30 21:27:02 +02:00
57ea861692 EEVEE-Next: Light Culling: Use new DRW API 2022-08-30 20:25:48 +02:00
884bb4a4a5 EEVEE-Next: Motion Blur: Use new DRW API 2022-08-30 20:15:11 +02:00
04e0163751 EEVEE-Next: HiZbuffer: Use new DRW API 2022-08-30 20:01:16 +02:00
853d49d754 DRW: Allow drawing without any draw view 2022-08-30 20:00:51 +02:00
26692cb7e7 EEVEE-Next: Port depth of field to use new DRW API 2022-08-30 19:26:47 +02:00
5dd2371e70 EEVEE-Next: Fix shader compile error 2022-08-30 19:26:11 +02:00
42532f35e2 Fix submission of sortable pass 2022-08-30 17:03:59 +02:00
f940884f7b EEVEE-Next: Implement back transparency using PassSortable 2022-08-30 17:02:46 +02:00
e32180aafa Fix test cases 2022-08-30 13:06:41 +02:00
fce7e48578 DRW: Add new pass type for transparent objects (PassSortable) 2022-08-30 13:06:22 +02:00
2f6463517e EEVEE-Next fix out of bounds allocation. 2022-08-30 11:51:36 +02:00
df895ee2d6 EEVEE-Next: Fix material sharing 2022-08-29 15:49:54 +02:00
f9ea1de3a9 Disbale bbox culling still buggy with planar objects
This does match the old draw manager culling
2022-08-29 14:59:48 +02:00
2995425454 Bypass drawing batches with 0 vertex 2022-08-29 14:19:59 +02:00
048ca79b32 Fix crash when rendering empty mesh object 2022-08-29 12:29:14 +02:00
debcb3027e EEVEE-Next: Move render view to be persistent 2022-08-28 12:40:48 +02:00
3ea7e5583b Add debug option to freeze culling matrices to be able to fly through and
inspect culling
2022-08-28 12:40:27 +02:00
8a21c68004 Fix debug print 2022-08-28 12:39:22 +02:00
3f3ccdf667 Fix multi material mesh 2022-08-28 10:03:21 +02:00
e3e86e9f59 Fix 2 bugs 2022-08-27 22:42:48 +02:00
36510c69d1 GPUCodegen: Do not use automatic resource location by default 2022-08-27 21:39:34 +02:00
7a2becf67e A few more bugs 2022-08-27 20:21:30 +02:00
8ae25c3da2 Fix several bugs 2022-08-27 18:22:15 +02:00
8b5ffd9af0 EEVEE next Use new Draw for light culling 2022-08-27 17:26:48 +02:00
c813d270a1 Fix use after moved and reduce memory footprint of passes & subpasses 2022-08-27 15:50:41 +02:00
b6117386b9 Fix backfacing lingering state 2022-08-27 15:49:50 +02:00
3f372f5430 Fix old eevee 2022-08-27 15:49:15 +02:00
48e773ca40 Fix different mesh rendering 2022-08-27 15:49:06 +02:00
8814a0f699 Fix warnings 2022-08-27 15:48:32 +02:00
0191733214 Improve culling using inscribed sphere 2022-08-27 10:38:42 +02:00
6868988a1e Make debug draw/print working with new draw manager 2022-08-27 10:37:43 +02:00
75ef33c7d3 EEVEEE-Next: Share resource slot definition avoiding lots of constants sharing 2022-08-27 09:18:46 +02:00
69e337f47e EEVEE-Next more support for new draw manager 2022-08-26 21:28:13 +02:00
0af76f9541 GPUMaterial: expose name 2022-08-26 20:29:06 +02:00
40f90e2ec0 Make is work 2022-08-26 20:28:52 +02:00
4a025222c5 Use more of the new API in eevee next 2022-08-26 14:50:00 +02:00
a8d9b72dc5 Remove auto resource location fiasco for eevee next 2022-08-26 14:49:12 +02:00
d5b1085aa7 Add Pass::material_set() 2022-08-26 14:48:34 +02:00
7efa5647fa Add more tests, simplify visibility shader, fix some bugs 2022-08-25 22:57:08 +02:00
4c16973fc8 Add draw manager sync callbacks 2022-08-25 16:35:14 +02:00
38a1aad4f2 Imrpove debuggability 2022-08-25 16:34:47 +02:00
ecca7ce32f Add simple pass test 2022-08-25 16:34:07 +02:00
9886150a6d Fix eevee test case 2022-08-25 13:51:39 +02:00
3a6696c0e7 Fix draw resource id binding 2022-08-25 12:00:59 +02:00
92827491c4 Add GPU command creation 2022-08-25 11:08:45 +02:00
e8fddb326e Add serialize for multidraw 2022-08-24 21:29:07 +02:00
e88a53c798 Cleanup: Only use one switch statement for command types 2022-08-24 18:24:45 +02:00
1dfb40c67c GPUStorageBuf: Add GPU_storagebuf_read() for debugging purpose 2022-08-24 16:05:32 +02:00
1bc2e9a6fc Add multidraw 2022-08-24 15:21:14 +02:00
bdda074f32 Fix tests 2022-08-23 22:53:22 +02:00
ae1f3e99e1 Fix compilation error 2022-08-23 22:33:19 +02:00
fd28570dc7 Rework pass class to reduce code duplication 2022-08-23 19:58:19 +02:00
d63355ee21 Make all draw command inside indirect buffers
This make sure the resource pipeline is the same for both multidraw and
non multidraw calls.
2022-08-23 18:40:58 +02:00
53eea778d7 GPUBatch: Implement multidrawindirect and indirect offset 2022-08-23 14:11:45 +02:00
46dc57af82 Add View support with visibility culling 2022-08-22 20:55:09 +02:00
acfce5c4eb Test: Update EEVEE to use part of the new draw manager 2022-08-22 14:19:44 +02:00
ac5e2c4463 Add View class
Contain culling data and View uniforms.
2022-08-22 14:18:52 +02:00
0fed44b289 Add resource filling 2022-08-22 13:51:29 +02:00
94f3b8ced8 Merge branch 'master' into drw-manager-next 2022-08-21 12:45:28 +02:00
fba1110d18 Add debug group, sub goup test, fix a few bug/typo 2022-08-21 12:43:09 +02:00
908d1fd449 Fix a few typo error and add test for float4x4 2022-08-21 10:23:17 +02:00
59d8f8a373 Fix system support check 2022-08-20 22:36:02 +02:00
dec817524f Refactor Pass class
Now there is a still a base class but no virtual functions. The different
implementation of draw are declared as method shadowing the base class
method.

Sub pass is limited to 1 depth. Cannot have nested subpass.

Test is now passing
2022-08-20 22:22:57 +02:00
dcc4204207 Refactor to reduce complexity
Remove different pass type and abstract base class.
2022-08-20 17:33:05 +02:00
0a5460dd82 DRW: Manager: New modern implementation
This is a rewrite of the whole DRW manager in C++ and using more GPU driven
rendering.
2022-08-20 15:09:16 +02:00
94 changed files with 5670 additions and 973 deletions

View File

@@ -2313,6 +2313,7 @@ class USERPREF_PT_experimental_debugging(ExperimentalPanel, Panel):
({"property": "use_cycles_debug"}, None),
({"property": "show_asset_debug_info"}, None),
({"property": "use_asset_indexing"}, None),
({"property": "use_viewport_debug"}, None),
),
)

View File

@@ -7804,6 +7804,25 @@ class VIEW3D_PT_curves_sculpt_grow_shrink_scaling(Panel):
layout.prop(brush.curves_sculpt_settings, "minimum_length")
class VIEW3D_PT_viewport_debug(Panel):
bl_space_type = 'VIEW_3D'
bl_region_type = 'HEADER'
bl_parent_id = 'VIEW3D_PT_overlay'
bl_label = "Viewport Debug"
@classmethod
def poll(cls, context):
prefs = context.preferences
return prefs.experimental.use_viewport_debug
def draw(self, context):
layout = self.layout
view = context.space_data
overlay = view.overlay
layout.prop(overlay, "use_debug_freeze_view_culling")
classes = (
VIEW3D_HT_header,
VIEW3D_HT_tool_header,
@@ -8041,6 +8060,7 @@ classes = (
TOPBAR_PT_annotation_layers,
VIEW3D_PT_curves_sculpt_add_shape,
VIEW3D_PT_curves_sculpt_grow_shrink_scaling,
VIEW3D_PT_viewport_debug,
)

View File

@@ -79,19 +79,22 @@ set(SRC
intern/draw_cache_impl_subdivision.cc
intern/draw_cache_impl_volume.c
intern/draw_color_management.cc
intern/draw_command.cc
intern/draw_common.c
intern/draw_curves.cc
intern/draw_debug.cc
intern/draw_fluid.c
intern/draw_hair.cc
intern/draw_instance_data.c
intern/draw_manager.c
intern/draw_manager_data.c
intern/draw_manager_exec.c
intern/draw_manager_profiling.c
intern/draw_manager_shader.c
intern/draw_manager_text.c
intern/draw_manager_texture.c
intern/draw_manager.c
intern/draw_manager.cc
intern/draw_pass.cc
intern/draw_select_buffer.c
intern/draw_shader.cc
intern/draw_texture_pool.cc
@@ -206,28 +209,32 @@ set(SRC
intern/DRW_gpu_wrapper.hh
intern/DRW_render.h
intern/draw_attributes.h
intern/draw_cache.h
intern/draw_cache_extract.hh
intern/draw_cache_impl.h
intern/draw_cache_inline.h
intern/draw_cache.h
intern/draw_color_management.h
intern/draw_common.h
intern/draw_command.hh
intern/draw_common_shader_shared.h
intern/draw_common.h
intern/draw_curves_private.h
intern/draw_debug.h
intern/draw_debug.hh
intern/draw_hair_private.h
intern/draw_instance_data.h
intern/draw_manager.h
intern/draw_manager_profiling.h
intern/draw_manager_testing.h
intern/draw_manager_text.h
intern/draw_shader.h
intern/draw_manager.h
intern/draw_manager.hh
intern/draw_pass.hh
intern/draw_shader_shared.h
intern/draw_shader.h
intern/draw_subdivision.h
intern/draw_texture_pool.h
intern/draw_view.h
intern/draw_view_data.h
intern/draw_view.cc
intern/draw_view.h
intern/mesh_extractors/extract_mesh.hh
intern/smaa_textures.h
engines/basic/basic_engine.h
@@ -480,14 +487,19 @@ set(GLSL_SRC
intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl
intern/shaders/common_view_clipping_lib.glsl
intern/shaders/common_view_lib.glsl
intern/shaders/draw_command_generate_comp.glsl
intern/shaders/draw_debug_draw_display_frag.glsl
intern/shaders/draw_debug_draw_display_vert.glsl
intern/shaders/draw_debug_info.hh
intern/shaders/draw_debug_print_display_frag.glsl
intern/shaders/draw_debug_print_display_vert.glsl
intern/shaders/draw_resource_finalize_comp.glsl
intern/shaders/draw_visibility_comp.glsl
intern/draw_common_shader_shared.h
intern/draw_command_shared.hh
intern/draw_shader_shared.h
intern/draw_defines.h
engines/gpencil/shaders/gpencil_frag.glsl
engines/gpencil/shaders/gpencil_vert.glsl
@@ -692,6 +704,7 @@ if(WITH_GTESTS)
if(WITH_OPENGL_DRAW_TESTS)
set(TEST_SRC
tests/draw_testing.cc
tests/draw_pass_test.cc
tests/shaders_test.cc
tests/draw_testing.hh

View File

@@ -68,3 +68,37 @@
#define DOF_FILTER_GROUP_SIZE 8
#define DOF_GATHER_GROUP_SIZE DOF_TILES_SIZE
#define DOF_RESOLVE_GROUP_SIZE (DOF_TILES_SIZE * 2)
/* Resource bindings. */
/* Texture. */
#define RBUFS_UTILITY_TEX_SLOT 14
/* Images. */
#define RBUFS_NORMAL_SLOT 0
#define RBUFS_LIGHT_SLOT 1
#define RBUFS_DIFF_COLOR_SLOT 2
#define RBUFS_SPEC_COLOR_SLOT 3
#define RBUFS_EMISSION_SLOT 4
#define RBUFS_AOV_COLOR_SLOT 5
#define RBUFS_AOV_VALUE_SLOT 6
/* Uniform Bufs. */
/* Only during prepass. */
#define VELOCITY_CAMERA_PREV_BUF 3
#define VELOCITY_CAMERA_CURR_BUF 4
#define VELOCITY_CAMERA_NEXT_BUF 5
/* Storage Bufs. */
#define LIGHT_CULL_BUF_SLOT 0
#define LIGHT_BUF_SLOT 1
#define LIGHT_ZBIN_BUF_SLOT 2
#define LIGHT_TILE_BUF_SLOT 3
#define RBUFS_AOV_BUF_SLOT 5
#define SAMPLING_BUF_SLOT 6
/* Only during prepass. */
#define VELOCITY_OBJ_PREV_BUF_SLOT 0
#define VELOCITY_OBJ_NEXT_BUF_SLOT 1
#define VELOCITY_GEO_PREV_BUF_SLOT 2
#define VELOCITY_GEO_NEXT_BUF_SLOT 3
#define VELOCITY_INDIRECTION_BUF_SLOT 4

View File

@@ -237,35 +237,34 @@ void DepthOfField::bokeh_lut_pass_sync()
const bool has_anisotropy = data_.bokeh_anisotropic_scale != float2(1.0f);
if (!has_anisotropy && (data_.bokeh_blades == 0.0)) {
/* No need for LUTs in these cases. */
bokeh_lut_ps_ = nullptr;
use_bokeh_lut_ = false;
return;
}
use_bokeh_lut_ = true;
/* Precompute bokeh texture. */
bokeh_lut_ps_ = DRW_pass_create("Dof.bokeh_lut_ps_", DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(DOF_BOKEH_LUT);
DRWShadingGroup *grp = DRW_shgroup_create(sh, bokeh_lut_ps_);
DRW_shgroup_uniform_block(grp, "dof_buf", data_);
DRW_shgroup_uniform_image_ref(grp, "out_gather_lut_img", &bokeh_gather_lut_tx_);
DRW_shgroup_uniform_image_ref(grp, "out_scatter_lut_img", &bokeh_scatter_lut_tx_);
DRW_shgroup_uniform_image_ref(grp, "out_resolve_lut_img", &bokeh_resolve_lut_tx_);
DRW_shgroup_call_compute(grp, 1, 1, 1);
bokeh_lut_ps_.init();
bokeh_lut_ps_.shader_set(inst_.shaders.static_shader_get(DOF_BOKEH_LUT));
bokeh_lut_ps_.bind_ubo("dof_buf", data_);
bokeh_lut_ps_.bind_image("out_gather_lut_img", &bokeh_gather_lut_tx_);
bokeh_lut_ps_.bind_image("out_scatter_lut_img", &bokeh_scatter_lut_tx_);
bokeh_lut_ps_.bind_image("out_resolve_lut_img", &bokeh_resolve_lut_tx_);
bokeh_lut_ps_.dispatch(int3(1, 1, 1));
}
void DepthOfField::setup_pass_sync()
{
RenderBuffers &render_buffers = inst_.render_buffers;
setup_ps_ = DRW_pass_create("Dof.setup_ps_", DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(DOF_SETUP);
DRWShadingGroup *grp = DRW_shgroup_create(sh, setup_ps_);
DRW_shgroup_uniform_texture_ref_ex(grp, "color_tx", &input_color_tx_, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "depth_tx", &render_buffers.depth_tx, no_filter);
DRW_shgroup_uniform_block(grp, "dof_buf", data_);
DRW_shgroup_uniform_image_ref(grp, "out_color_img", &setup_color_tx_);
DRW_shgroup_uniform_image_ref(grp, "out_coc_img", &setup_coc_tx_);
DRW_shgroup_call_compute_ref(grp, dispatch_setup_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
setup_ps_.init();
setup_ps_.shader_set(inst_.shaders.static_shader_get(DOF_SETUP));
setup_ps_.bind_texture("color_tx", &input_color_tx_, no_filter);
setup_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter);
setup_ps_.bind_ubo("dof_buf", data_);
setup_ps_.bind_image("out_color_img", &setup_color_tx_);
setup_ps_.bind_image("out_coc_img", &setup_coc_tx_);
setup_ps_.dispatch(&dispatch_setup_size_);
setup_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
}
void DepthOfField::stabilize_pass_sync()
@@ -273,214 +272,203 @@ void DepthOfField::stabilize_pass_sync()
RenderBuffers &render_buffers = inst_.render_buffers;
VelocityModule &velocity = inst_.velocity;
stabilize_ps_ = DRW_pass_create("Dof.stabilize_ps_", DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(DOF_STABILIZE);
DRWShadingGroup *grp = DRW_shgroup_create(sh, stabilize_ps_);
DRW_shgroup_uniform_block_ref(grp, "camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS]));
DRW_shgroup_uniform_block_ref(grp, "camera_curr", &(*velocity.camera_steps[STEP_CURRENT]));
stabilize_ps_.init();
stabilize_ps_.shader_set(inst_.shaders.static_shader_get(DOF_STABILIZE));
stabilize_ps_.bind_ubo("camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS]));
stabilize_ps_.bind_ubo("camera_curr", &(*velocity.camera_steps[STEP_CURRENT]));
/* This is only for temporal stability. The next step is not needed. */
DRW_shgroup_uniform_block_ref(grp, "camera_next", &(*velocity.camera_steps[STEP_PREVIOUS]));
DRW_shgroup_uniform_texture_ref_ex(grp, "coc_tx", &setup_coc_tx_, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "color_tx", &setup_color_tx_, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "velocity_tx", &render_buffers.vector_tx, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "in_history_tx", &stabilize_input_, with_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "depth_tx", &render_buffers.depth_tx, no_filter);
DRW_shgroup_uniform_bool(grp, "use_history", &stabilize_valid_history_, 1);
DRW_shgroup_uniform_block(grp, "dof_buf", data_);
DRW_shgroup_uniform_image(grp, "out_coc_img", reduced_coc_tx_.mip_view(0));
DRW_shgroup_uniform_image(grp, "out_color_img", reduced_color_tx_.mip_view(0));
DRW_shgroup_uniform_image_ref(grp, "out_history_img", &stabilize_output_tx_);
DRW_shgroup_call_compute_ref(grp, dispatch_stabilize_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_IMAGE_ACCESS);
stabilize_ps_.bind_ubo("camera_next", &(*velocity.camera_steps[STEP_PREVIOUS]));
stabilize_ps_.bind_texture("coc_tx", &setup_coc_tx_, no_filter);
stabilize_ps_.bind_texture("color_tx", &setup_color_tx_, no_filter);
stabilize_ps_.bind_texture("velocity_tx", &render_buffers.vector_tx, no_filter);
stabilize_ps_.bind_texture("in_history_tx", &stabilize_input_, with_filter);
stabilize_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter);
stabilize_ps_.bind_ubo("dof_buf", data_);
stabilize_ps_.push_constant("use_history", &stabilize_valid_history_, 1);
stabilize_ps_.bind_image("out_coc_img", reduced_coc_tx_.mip_view(0));
stabilize_ps_.bind_image("out_color_img", reduced_color_tx_.mip_view(0));
stabilize_ps_.bind_image("out_history_img", &stabilize_output_tx_);
stabilize_ps_.dispatch(&dispatch_stabilize_size_);
stabilize_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
void DepthOfField::downsample_pass_sync()
{
downsample_ps_ = DRW_pass_create("Dof.downsample_ps_", DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(DOF_DOWNSAMPLE);
DRWShadingGroup *grp = DRW_shgroup_create(sh, downsample_ps_);
DRW_shgroup_uniform_texture_ex(grp, "color_tx", reduced_color_tx_.mip_view(0), no_filter);
DRW_shgroup_uniform_texture_ex(grp, "coc_tx", reduced_coc_tx_.mip_view(0), no_filter);
DRW_shgroup_uniform_image_ref(grp, "out_color_img", &downsample_tx_);
DRW_shgroup_call_compute_ref(grp, dispatch_downsample_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
downsample_ps_.init();
downsample_ps_.shader_set(inst_.shaders.static_shader_get(DOF_DOWNSAMPLE));
downsample_ps_.bind_texture("color_tx", reduced_color_tx_.mip_view(0), no_filter);
downsample_ps_.bind_texture("coc_tx", reduced_coc_tx_.mip_view(0), no_filter);
downsample_ps_.bind_image("out_color_img", &downsample_tx_);
downsample_ps_.dispatch(&dispatch_downsample_size_);
downsample_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
}
void DepthOfField::reduce_pass_sync()
{
reduce_ps_ = DRW_pass_create("Dof.reduce_ps_", DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(DOF_REDUCE);
DRWShadingGroup *grp = DRW_shgroup_create(sh, reduce_ps_);
DRW_shgroup_uniform_block(grp, "dof_buf", data_);
DRW_shgroup_uniform_texture_ref_ex(grp, "downsample_tx", &downsample_tx_, no_filter);
DRW_shgroup_storage_block(grp, "scatter_fg_list_buf", scatter_fg_list_buf_);
DRW_shgroup_storage_block(grp, "scatter_bg_list_buf", scatter_bg_list_buf_);
DRW_shgroup_storage_block(grp, "scatter_fg_indirect_buf", scatter_fg_indirect_buf_);
DRW_shgroup_storage_block(grp, "scatter_bg_indirect_buf", scatter_bg_indirect_buf_);
DRW_shgroup_uniform_image(grp, "inout_color_lod0_img", reduced_color_tx_.mip_view(0));
DRW_shgroup_uniform_image(grp, "out_color_lod1_img", reduced_color_tx_.mip_view(1));
DRW_shgroup_uniform_image(grp, "out_color_lod2_img", reduced_color_tx_.mip_view(2));
DRW_shgroup_uniform_image(grp, "out_color_lod3_img", reduced_color_tx_.mip_view(3));
DRW_shgroup_uniform_image(grp, "in_coc_lod0_img", reduced_coc_tx_.mip_view(0));
DRW_shgroup_uniform_image(grp, "out_coc_lod1_img", reduced_coc_tx_.mip_view(1));
DRW_shgroup_uniform_image(grp, "out_coc_lod2_img", reduced_coc_tx_.mip_view(2));
DRW_shgroup_uniform_image(grp, "out_coc_lod3_img", reduced_coc_tx_.mip_view(3));
DRW_shgroup_call_compute_ref(grp, dispatch_reduce_size_);
reduce_ps_.init();
reduce_ps_.shader_set(inst_.shaders.static_shader_get(DOF_REDUCE));
reduce_ps_.bind_ubo("dof_buf", data_);
reduce_ps_.bind_texture("downsample_tx", &downsample_tx_, no_filter);
reduce_ps_.bind_ssbo("scatter_fg_list_buf", scatter_fg_list_buf_);
reduce_ps_.bind_ssbo("scatter_bg_list_buf", scatter_bg_list_buf_);
reduce_ps_.bind_ssbo("scatter_fg_indirect_buf", scatter_fg_indirect_buf_);
reduce_ps_.bind_ssbo("scatter_bg_indirect_buf", scatter_bg_indirect_buf_);
reduce_ps_.bind_image("inout_color_lod0_img", reduced_color_tx_.mip_view(0));
reduce_ps_.bind_image("out_color_lod1_img", reduced_color_tx_.mip_view(1));
reduce_ps_.bind_image("out_color_lod2_img", reduced_color_tx_.mip_view(2));
reduce_ps_.bind_image("out_color_lod3_img", reduced_color_tx_.mip_view(3));
reduce_ps_.bind_image("in_coc_lod0_img", reduced_coc_tx_.mip_view(0));
reduce_ps_.bind_image("out_coc_lod1_img", reduced_coc_tx_.mip_view(1));
reduce_ps_.bind_image("out_coc_lod2_img", reduced_coc_tx_.mip_view(2));
reduce_ps_.bind_image("out_coc_lod3_img", reduced_coc_tx_.mip_view(3));
reduce_ps_.dispatch(&dispatch_reduce_size_);
/* NOTE: Command buffer barrier is done automatically by the GPU backend. */
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_STORAGE);
reduce_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_STORAGE);
}
void DepthOfField::tiles_flatten_pass_sync()
{
tiles_flatten_ps_ = DRW_pass_create("Dof.tiles_flatten_ps_", DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(DOF_TILES_FLATTEN);
DRWShadingGroup *grp = DRW_shgroup_create(sh, tiles_flatten_ps_);
tiles_flatten_ps_.init();
tiles_flatten_ps_.shader_set(inst_.shaders.static_shader_get(DOF_TILES_FLATTEN));
/* NOTE(fclem): We should use the reduced_coc_tx_ as it is stable, but we need the slight focus
* flag from the setup pass. A better way would be to do the brute-force in focus gather without
* this. */
DRW_shgroup_uniform_texture_ref_ex(grp, "coc_tx", &setup_coc_tx_, no_filter);
DRW_shgroup_uniform_image_ref(grp, "out_tiles_fg_img", &tiles_fg_tx_.current());
DRW_shgroup_uniform_image_ref(grp, "out_tiles_bg_img", &tiles_bg_tx_.current());
DRW_shgroup_call_compute_ref(grp, dispatch_tiles_flatten_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_IMAGE_ACCESS);
tiles_flatten_ps_.bind_texture("coc_tx", &setup_coc_tx_, no_filter);
tiles_flatten_ps_.bind_image("out_tiles_fg_img", &tiles_fg_tx_.current());
tiles_flatten_ps_.bind_image("out_tiles_bg_img", &tiles_bg_tx_.current());
tiles_flatten_ps_.dispatch(&dispatch_tiles_flatten_size_);
tiles_flatten_ps_.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
void DepthOfField::tiles_dilate_pass_sync()
{
tiles_dilate_minmax_ps_ = DRW_pass_create("Dof.tiles_dilate_minmax_ps_", DRW_STATE_NO_DRAW);
tiles_dilate_minabs_ps_ = DRW_pass_create("Dof.tiles_dilate_minabs_ps_", DRW_STATE_NO_DRAW);
for (int pass = 0; pass < 2; pass++) {
DRWPass *drw_pass = (pass == 0) ? tiles_dilate_minmax_ps_ : tiles_dilate_minabs_ps_;
GPUShader *sh = inst_.shaders.static_shader_get((pass == 0) ? DOF_TILES_DILATE_MINMAX :
DOF_TILES_DILATE_MINABS);
DRWShadingGroup *grp = DRW_shgroup_create(sh, drw_pass);
DRW_shgroup_uniform_image_ref(grp, "in_tiles_fg_img", &tiles_fg_tx_.previous());
DRW_shgroup_uniform_image_ref(grp, "in_tiles_bg_img", &tiles_bg_tx_.previous());
DRW_shgroup_uniform_image_ref(grp, "out_tiles_fg_img", &tiles_fg_tx_.current());
DRW_shgroup_uniform_image_ref(grp, "out_tiles_bg_img", &tiles_bg_tx_.current());
DRW_shgroup_uniform_int(grp, "ring_count", &tiles_dilate_ring_count_, 1);
DRW_shgroup_uniform_int(grp, "ring_width_multiplier", &tiles_dilate_ring_width_mul_, 1);
DRW_shgroup_call_compute_ref(grp, dispatch_tiles_dilate_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_IMAGE_ACCESS);
PassSimple &drw_pass = (pass == 0) ? tiles_dilate_minmax_ps_ : tiles_dilate_minabs_ps_;
eShaderType sh_type = (pass == 0) ? DOF_TILES_DILATE_MINMAX : DOF_TILES_DILATE_MINABS;
drw_pass.init();
drw_pass.shader_set(inst_.shaders.static_shader_get(sh_type));
drw_pass.bind_image("in_tiles_fg_img", &tiles_fg_tx_.previous());
drw_pass.bind_image("in_tiles_bg_img", &tiles_bg_tx_.previous());
drw_pass.bind_image("out_tiles_fg_img", &tiles_fg_tx_.current());
drw_pass.bind_image("out_tiles_bg_img", &tiles_bg_tx_.current());
drw_pass.push_constant("ring_count", &tiles_dilate_ring_count_, 1);
drw_pass.push_constant("ring_width_multiplier", &tiles_dilate_ring_width_mul_, 1);
drw_pass.dispatch(&dispatch_tiles_dilate_size_);
drw_pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
}
void DepthOfField::gather_pass_sync()
{
gather_fg_ps_ = DRW_pass_create("Dof.gather_fg_ps_", DRW_STATE_NO_DRAW);
gather_bg_ps_ = DRW_pass_create("Dof.gather_bg_ps_", DRW_STATE_NO_DRAW);
for (int pass = 0; pass < 2; pass++) {
PassSimple &drw_pass = (pass == 0) ? gather_fg_ps_ : gather_bg_ps_;
SwapChain<TextureFromPool, 2> &color_chain = (pass == 0) ? color_fg_tx_ : color_bg_tx_;
SwapChain<TextureFromPool, 2> &weight_chain = (pass == 0) ? weight_fg_tx_ : weight_bg_tx_;
bool use_lut = bokeh_lut_ps_ != nullptr;
eShaderType sh_type = (pass == 0) ?
(use_lut ? DOF_GATHER_FOREGROUND_LUT : DOF_GATHER_FOREGROUND) :
(use_lut ? DOF_GATHER_BACKGROUND_LUT : DOF_GATHER_BACKGROUND);
GPUShader *sh = inst_.shaders.static_shader_get(sh_type);
DRWShadingGroup *grp = DRW_shgroup_create(sh, (pass == 0) ? gather_fg_ps_ : gather_bg_ps_);
inst_.sampling.bind_resources(grp);
DRW_shgroup_uniform_block(grp, "dof_buf", data_);
DRW_shgroup_uniform_texture_ex(grp, "color_bilinear_tx", reduced_color_tx_, gather_bilinear);
DRW_shgroup_uniform_texture_ex(grp, "color_tx", reduced_color_tx_, gather_nearest);
DRW_shgroup_uniform_texture_ex(grp, "coc_tx", reduced_coc_tx_, gather_nearest);
DRW_shgroup_uniform_image_ref(grp, "in_tiles_fg_img", &tiles_fg_tx_.current());
DRW_shgroup_uniform_image_ref(grp, "in_tiles_bg_img", &tiles_bg_tx_.current());
DRW_shgroup_uniform_image_ref(grp, "out_color_img", &color_chain.current());
DRW_shgroup_uniform_image_ref(grp, "out_weight_img", &weight_chain.current());
DRW_shgroup_uniform_image_ref(grp, "out_occlusion_img", &occlusion_tx_);
DRW_shgroup_uniform_texture_ref(grp, "bokeh_lut_tx", &bokeh_gather_lut_tx_);
DRW_shgroup_call_compute_ref(grp, dispatch_gather_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
(use_bokeh_lut_ ? DOF_GATHER_FOREGROUND_LUT :
DOF_GATHER_FOREGROUND) :
(use_bokeh_lut_ ? DOF_GATHER_BACKGROUND_LUT : DOF_GATHER_BACKGROUND);
drw_pass.init();
inst_.sampling.bind_resources(&drw_pass);
drw_pass.shader_set(inst_.shaders.static_shader_get(sh_type));
drw_pass.bind_ubo("dof_buf", data_);
drw_pass.bind_texture("color_bilinear_tx", reduced_color_tx_, gather_bilinear);
drw_pass.bind_texture("color_tx", reduced_color_tx_, gather_nearest);
drw_pass.bind_texture("coc_tx", reduced_coc_tx_, gather_nearest);
drw_pass.bind_image("in_tiles_fg_img", &tiles_fg_tx_.current());
drw_pass.bind_image("in_tiles_bg_img", &tiles_bg_tx_.current());
drw_pass.bind_image("out_color_img", &color_chain.current());
drw_pass.bind_image("out_weight_img", &weight_chain.current());
drw_pass.bind_image("out_occlusion_img", &occlusion_tx_);
drw_pass.bind_texture("bokeh_lut_tx", &bokeh_gather_lut_tx_);
drw_pass.dispatch(&dispatch_gather_size_);
drw_pass.barrier(GPU_BARRIER_TEXTURE_FETCH);
}
}
void DepthOfField::filter_pass_sync()
{
filter_fg_ps_ = DRW_pass_create("Dof.filter_fg_ps_", DRW_STATE_NO_DRAW);
filter_bg_ps_ = DRW_pass_create("Dof.filter_bg_ps_", DRW_STATE_NO_DRAW);
for (int pass = 0; pass < 2; pass++) {
PassSimple &drw_pass = (pass == 0) ? filter_fg_ps_ : filter_bg_ps_;
SwapChain<TextureFromPool, 2> &color_chain = (pass == 0) ? color_fg_tx_ : color_bg_tx_;
SwapChain<TextureFromPool, 2> &weight_chain = (pass == 0) ? weight_fg_tx_ : weight_bg_tx_;
GPUShader *sh = inst_.shaders.static_shader_get(DOF_FILTER);
DRWShadingGroup *grp = DRW_shgroup_create(sh, (pass == 0) ? filter_fg_ps_ : filter_bg_ps_);
DRW_shgroup_uniform_texture_ref(grp, "color_tx", &color_chain.previous());
DRW_shgroup_uniform_texture_ref(grp, "weight_tx", &weight_chain.previous());
DRW_shgroup_uniform_image_ref(grp, "out_color_img", &color_chain.current());
DRW_shgroup_uniform_image_ref(grp, "out_weight_img", &weight_chain.current());
DRW_shgroup_call_compute_ref(grp, dispatch_filter_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
drw_pass.init();
drw_pass.shader_set(inst_.shaders.static_shader_get(DOF_FILTER));
drw_pass.bind_texture("color_tx", &color_chain.previous());
drw_pass.bind_texture("weight_tx", &weight_chain.previous());
drw_pass.bind_image("out_color_img", &color_chain.current());
drw_pass.bind_image("out_weight_img", &weight_chain.current());
drw_pass.dispatch(&dispatch_filter_size_);
drw_pass.barrier(GPU_BARRIER_TEXTURE_FETCH);
}
}
void DepthOfField::scatter_pass_sync()
{
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ADD_FULL;
scatter_fg_ps_ = DRW_pass_create("Dof.scatter_fg_ps_", state);
scatter_bg_ps_ = DRW_pass_create("Dof.scatter_bg_ps_", state);
for (int pass = 0; pass < 2; pass++) {
GPUStorageBuf *scatter_buf = (pass == 0) ? scatter_fg_indirect_buf_ : scatter_bg_indirect_buf_;
GPUStorageBuf *rect_list_buf = (pass == 0) ? scatter_fg_list_buf_ : scatter_bg_list_buf_;
GPUShader *sh = inst_.shaders.static_shader_get(DOF_SCATTER);
DRWShadingGroup *grp = DRW_shgroup_create(sh, (pass == 0) ? scatter_fg_ps_ : scatter_bg_ps_);
DRW_shgroup_uniform_bool_copy(grp, "use_bokeh_lut", bokeh_lut_ps_ != nullptr);
DRW_shgroup_storage_block(grp, "scatter_list_buf", rect_list_buf);
DRW_shgroup_uniform_texture_ref(grp, "bokeh_lut_tx", &bokeh_scatter_lut_tx_);
DRW_shgroup_uniform_texture_ref(grp, "occlusion_tx", &occlusion_tx_);
DRW_shgroup_call_procedural_indirect(grp, GPU_PRIM_TRI_STRIP, nullptr, scatter_buf);
PassSimple &drw_pass = (pass == 0) ? scatter_fg_ps_ : scatter_bg_ps_;
drw_pass.init();
drw_pass.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ADD_FULL);
drw_pass.shader_set(inst_.shaders.static_shader_get(DOF_SCATTER));
drw_pass.push_constant("use_bokeh_lut", use_bokeh_lut_);
drw_pass.bind_texture("bokeh_lut_tx", &bokeh_scatter_lut_tx_);
drw_pass.bind_texture("occlusion_tx", &occlusion_tx_);
if (pass == 0) {
drw_pass.bind_ssbo("scatter_list_buf", scatter_fg_list_buf_);
drw_pass.draw_procedural_indirect(GPU_PRIM_TRI_STRIP, scatter_fg_indirect_buf_);
/* Avoid background gather pass writing to the occlusion_tx mid pass. */
DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_IMAGE_ACCESS);
drw_pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
else {
drw_pass.bind_ssbo("scatter_list_buf", scatter_bg_list_buf_);
drw_pass.draw_procedural_indirect(GPU_PRIM_TRI_STRIP, scatter_bg_indirect_buf_);
}
}
}
void DepthOfField::hole_fill_pass_sync()
{
hole_fill_ps_ = DRW_pass_create("Dof.hole_fill_ps_", DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(DOF_GATHER_HOLE_FILL);
DRWShadingGroup *grp = DRW_shgroup_create(sh, hole_fill_ps_);
inst_.sampling.bind_resources(grp);
DRW_shgroup_uniform_block(grp, "dof_buf", data_);
DRW_shgroup_uniform_texture_ex(grp, "color_bilinear_tx", reduced_color_tx_, gather_bilinear);
DRW_shgroup_uniform_texture_ex(grp, "color_tx", reduced_color_tx_, gather_nearest);
DRW_shgroup_uniform_texture_ex(grp, "coc_tx", reduced_coc_tx_, gather_nearest);
DRW_shgroup_uniform_image_ref(grp, "in_tiles_fg_img", &tiles_fg_tx_.current());
DRW_shgroup_uniform_image_ref(grp, "in_tiles_bg_img", &tiles_bg_tx_.current());
DRW_shgroup_uniform_image_ref(grp, "out_color_img", &hole_fill_color_tx_);
DRW_shgroup_uniform_image_ref(grp, "out_weight_img", &hole_fill_weight_tx_);
DRW_shgroup_call_compute_ref(grp, dispatch_gather_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
hole_fill_ps_.init();
inst_.sampling.bind_resources(&hole_fill_ps_);
hole_fill_ps_.shader_set(inst_.shaders.static_shader_get(DOF_GATHER_HOLE_FILL));
hole_fill_ps_.bind_ubo("dof_buf", data_);
hole_fill_ps_.bind_texture("color_bilinear_tx", reduced_color_tx_, gather_bilinear);
hole_fill_ps_.bind_texture("color_tx", reduced_color_tx_, gather_nearest);
hole_fill_ps_.bind_texture("coc_tx", reduced_coc_tx_, gather_nearest);
hole_fill_ps_.bind_image("in_tiles_fg_img", &tiles_fg_tx_.current());
hole_fill_ps_.bind_image("in_tiles_bg_img", &tiles_bg_tx_.current());
hole_fill_ps_.bind_image("out_color_img", &hole_fill_color_tx_);
hole_fill_ps_.bind_image("out_weight_img", &hole_fill_weight_tx_);
hole_fill_ps_.dispatch(&dispatch_gather_size_);
hole_fill_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
}
void DepthOfField::resolve_pass_sync()
{
eGPUSamplerState with_filter = GPU_SAMPLER_FILTER;
RenderBuffers &render_buffers = inst_.render_buffers;
eShaderType sh_type = use_bokeh_lut_ ? DOF_RESOLVE_LUT : DOF_RESOLVE;
resolve_ps_ = DRW_pass_create("Dof.resolve_ps_", DRW_STATE_NO_DRAW);
bool use_lut = bokeh_lut_ps_ != nullptr;
eShaderType sh_type = use_lut ? DOF_RESOLVE_LUT : DOF_RESOLVE;
GPUShader *sh = inst_.shaders.static_shader_get(sh_type);
DRWShadingGroup *grp = DRW_shgroup_create(sh, resolve_ps_);
inst_.sampling.bind_resources(grp);
DRW_shgroup_uniform_block(grp, "dof_buf", data_);
DRW_shgroup_uniform_texture_ref_ex(grp, "depth_tx", &render_buffers.depth_tx, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "color_tx", &input_color_tx_, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "stable_color_tx", &resolve_stable_color_tx_, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "color_bg_tx", &color_bg_tx_.current(), with_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "color_fg_tx", &color_fg_tx_.current(), with_filter);
DRW_shgroup_uniform_image_ref(grp, "in_tiles_fg_img", &tiles_fg_tx_.current());
DRW_shgroup_uniform_image_ref(grp, "in_tiles_bg_img", &tiles_bg_tx_.current());
DRW_shgroup_uniform_texture_ref(grp, "weight_bg_tx", &weight_bg_tx_.current());
DRW_shgroup_uniform_texture_ref(grp, "weight_fg_tx", &weight_fg_tx_.current());
DRW_shgroup_uniform_texture_ref(grp, "color_hole_fill_tx", &hole_fill_color_tx_);
DRW_shgroup_uniform_texture_ref(grp, "weight_hole_fill_tx", &hole_fill_weight_tx_);
DRW_shgroup_uniform_texture_ref(grp, "bokeh_lut_tx", &bokeh_resolve_lut_tx_);
DRW_shgroup_uniform_image_ref(grp, "out_color_img", &output_color_tx_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
DRW_shgroup_call_compute_ref(grp, dispatch_resolve_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
resolve_ps_.init();
inst_.sampling.bind_resources(&resolve_ps_);
resolve_ps_.shader_set(inst_.shaders.static_shader_get(sh_type));
resolve_ps_.bind_ubo("dof_buf", data_);
resolve_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter);
resolve_ps_.bind_texture("color_tx", &input_color_tx_, no_filter);
resolve_ps_.bind_texture("stable_color_tx", &resolve_stable_color_tx_, no_filter);
resolve_ps_.bind_texture("color_bg_tx", &color_bg_tx_.current(), with_filter);
resolve_ps_.bind_texture("color_fg_tx", &color_fg_tx_.current(), with_filter);
resolve_ps_.bind_image("in_tiles_fg_img", &tiles_fg_tx_.current());
resolve_ps_.bind_image("in_tiles_bg_img", &tiles_bg_tx_.current());
resolve_ps_.bind_texture("weight_bg_tx", &weight_bg_tx_.current());
resolve_ps_.bind_texture("weight_fg_tx", &weight_fg_tx_.current());
resolve_ps_.bind_texture("color_hole_fill_tx", &hole_fill_color_tx_);
resolve_ps_.bind_texture("weight_hole_fill_tx", &hole_fill_weight_tx_);
resolve_ps_.bind_texture("bokeh_lut_tx", &bokeh_resolve_lut_tx_);
resolve_ps_.bind_image("out_color_img", &output_color_tx_);
resolve_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
resolve_ps_.dispatch(&dispatch_resolve_size_);
resolve_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
}
/** \} */
@@ -509,7 +497,8 @@ void DepthOfField::update_sample_table()
data_.filter_center_weight = film_filter_weight(radius, math::length_squared(subpixel_offset));
}
void DepthOfField::render(GPUTexture **input_tx,
void DepthOfField::render(View &view,
GPUTexture **input_tx,
GPUTexture **output_tx,
DepthOfFieldBuffer &dof_buffer)
{
@@ -580,6 +569,8 @@ void DepthOfField::render(GPUTexture **input_tx,
DRW_stats_group_start("Depth of Field");
Manager &drw = *inst_.manager;
{
DRW_stats_group_start("Setup");
{
@@ -587,13 +578,15 @@ void DepthOfField::render(GPUTexture **input_tx,
bokeh_scatter_lut_tx_.acquire(int2(DOF_BOKEH_LUT_SIZE), GPU_R16F);
bokeh_resolve_lut_tx_.acquire(int2(DOF_MAX_SLIGHT_FOCUS_RADIUS * 2 + 1), GPU_R16F);
DRW_draw_pass(bokeh_lut_ps_);
if (use_bokeh_lut_) {
drw.submit(bokeh_lut_ps_, view);
}
}
{
setup_color_tx_.acquire(half_res, GPU_RGBA16F);
setup_coc_tx_.acquire(half_res, GPU_R16F);
DRW_draw_pass(setup_ps_);
drw.submit(setup_ps_, view);
}
{
stabilize_output_tx_.acquire(half_res, GPU_RGBA16F);
@@ -607,7 +600,7 @@ void DepthOfField::render(GPUTexture **input_tx,
stabilize_input_ = dof_buffer.stabilize_history_tx_;
/* Outputs to reduced_*_tx_ mip 0. */
DRW_draw_pass(stabilize_ps_);
drw.submit(stabilize_ps_, view);
/* WATCH(fclem): Swap Texture an TextureFromPool internal GPUTexture in order to reuse
* the one that we just consumed. */
@@ -626,7 +619,7 @@ void DepthOfField::render(GPUTexture **input_tx,
tiles_fg_tx_.current().acquire(tile_res, GPU_R11F_G11F_B10F);
tiles_bg_tx_.current().acquire(tile_res, GPU_R11F_G11F_B10F);
DRW_draw_pass(tiles_flatten_ps_);
drw.submit(tiles_flatten_ps_, view);
/* Used by tile_flatten and stabilize_ps pass. */
setup_coc_tx_.release();
@@ -655,7 +648,7 @@ void DepthOfField::render(GPUTexture **input_tx,
tiles_fg_tx_.swap();
tiles_bg_tx_.swap();
DRW_draw_pass((pass == 0) ? tiles_dilate_minmax_ps_ : tiles_dilate_minabs_ps_);
drw.submit((pass == 0) ? tiles_dilate_minmax_ps_ : tiles_dilate_minabs_ps_, view);
}
}
@@ -667,12 +660,12 @@ void DepthOfField::render(GPUTexture **input_tx,
downsample_tx_.acquire(quarter_res, GPU_RGBA16F);
DRW_draw_pass(downsample_ps_);
drw.submit(downsample_ps_, view);
scatter_fg_indirect_buf_.clear_to_zero();
scatter_bg_indirect_buf_.clear_to_zero();
DRW_draw_pass(reduce_ps_);
drw.submit(reduce_ps_, view);
/* Used by reduce pass. */
downsample_tx_.release();
@@ -686,15 +679,15 @@ void DepthOfField::render(GPUTexture **input_tx,
SwapChain<TextureFromPool, 2> &color_tx = is_background ? color_bg_tx_ : color_fg_tx_;
SwapChain<TextureFromPool, 2> &weight_tx = is_background ? weight_bg_tx_ : weight_fg_tx_;
Framebuffer &scatter_fb = is_background ? scatter_bg_fb_ : scatter_fg_fb_;
DRWPass *gather_ps = is_background ? gather_bg_ps_ : gather_fg_ps_;
DRWPass *filter_ps = is_background ? filter_bg_ps_ : filter_fg_ps_;
DRWPass *scatter_ps = is_background ? scatter_bg_ps_ : scatter_fg_ps_;
PassSimple &gather_ps = is_background ? gather_bg_ps_ : gather_fg_ps_;
PassSimple &filter_ps = is_background ? filter_bg_ps_ : filter_fg_ps_;
PassSimple &scatter_ps = is_background ? scatter_bg_ps_ : scatter_fg_ps_;
color_tx.current().acquire(half_res, GPU_RGBA16F);
weight_tx.current().acquire(half_res, GPU_R16F);
occlusion_tx_.acquire(half_res, GPU_RG16F);
DRW_draw_pass(gather_ps);
drw.submit(gather_ps, view);
{
/* Filtering pass. */
@@ -704,7 +697,7 @@ void DepthOfField::render(GPUTexture **input_tx,
color_tx.current().acquire(half_res, GPU_RGBA16F);
weight_tx.current().acquire(half_res, GPU_R16F);
DRW_draw_pass(filter_ps);
drw.submit(filter_ps, view);
color_tx.previous().release();
weight_tx.previous().release();
@@ -715,7 +708,7 @@ void DepthOfField::render(GPUTexture **input_tx,
scatter_fb.ensure(GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(color_tx.current()));
GPU_framebuffer_bind(scatter_fb);
DRW_draw_pass(scatter_ps);
drw.submit(scatter_ps, view);
/* Used by scatter pass. */
occlusion_tx_.release();
@@ -731,7 +724,7 @@ void DepthOfField::render(GPUTexture **input_tx,
hole_fill_color_tx_.acquire(half_res, GPU_RGBA16F);
hole_fill_weight_tx_.acquire(half_res, GPU_R16F);
DRW_draw_pass(hole_fill_ps_);
drw.submit(hole_fill_ps_, view);
/* NOTE: We do not filter the hole-fill pass as effect is likely to not be noticeable. */
@@ -742,7 +735,7 @@ void DepthOfField::render(GPUTexture **input_tx,
resolve_stable_color_tx_ = dof_buffer.stabilize_history_tx_;
DRW_draw_pass(resolve_ps_);
drw.submit(resolve_ps_, view);
color_bg_tx_.current().release();
color_fg_tx_.current().release();

View File

@@ -56,13 +56,13 @@ class DepthOfField {
TextureFromPool bokeh_gather_lut_tx_ = {"dof_bokeh_gather_lut"};
TextureFromPool bokeh_resolve_lut_tx_ = {"dof_bokeh_resolve_lut"};
TextureFromPool bokeh_scatter_lut_tx_ = {"dof_bokeh_scatter_lut"};
DRWPass *bokeh_lut_ps_ = nullptr;
PassSimple bokeh_lut_ps_ = {"BokehLut"};
/** Outputs half-resolution color and Circle Of Confusion. */
TextureFromPool setup_coc_tx_ = {"dof_setup_coc"};
TextureFromPool setup_color_tx_ = {"dof_setup_color"};
int3 dispatch_setup_size_ = int3(-1);
DRWPass *setup_ps_ = nullptr;
PassSimple setup_ps_ = {"Setup"};
/** Allocated because we need mip chain. Which isn't supported by TextureFromPool. */
Texture reduced_coc_tx_ = {"dof_reduced_coc"};
@@ -73,12 +73,12 @@ class DepthOfField {
GPUTexture *stabilize_input_ = nullptr;
bool1 stabilize_valid_history_ = false;
int3 dispatch_stabilize_size_ = int3(-1);
DRWPass *stabilize_ps_ = nullptr;
PassSimple stabilize_ps_ = {"Stabilize"};
/** 1/4th res color buffer used to speedup the local contrast test in the first reduce pass. */
TextureFromPool downsample_tx_ = {"dof_downsample"};
int3 dispatch_downsample_size_ = int3(-1);
DRWPass *downsample_ps_ = nullptr;
PassSimple downsample_ps_ = {"Downsample"};
/** Create mip-mapped color & COC textures for gather passes as well as scatter rect list. */
DepthOfFieldScatterListBuf scatter_fg_list_buf_;
@@ -86,20 +86,20 @@ class DepthOfField {
DrawIndirectBuf scatter_fg_indirect_buf_;
DrawIndirectBuf scatter_bg_indirect_buf_;
int3 dispatch_reduce_size_ = int3(-1);
DRWPass *reduce_ps_ = nullptr;
PassSimple reduce_ps_ = {"Reduce"};
/** Outputs min & max COC in each 8x8 half res pixel tiles (so 1/16th of full resolution). */
SwapChain<TextureFromPool, 2> tiles_fg_tx_;
SwapChain<TextureFromPool, 2> tiles_bg_tx_;
int3 dispatch_tiles_flatten_size_ = int3(-1);
DRWPass *tiles_flatten_ps_ = nullptr;
PassSimple tiles_flatten_ps_ = {"TilesFlatten"};
/** Dilates the min & max CoCs to cover maximum COC values. */
int tiles_dilate_ring_count_ = -1;
int tiles_dilate_ring_width_mul_ = -1;
int3 dispatch_tiles_dilate_size_ = int3(-1);
DRWPass *tiles_dilate_minmax_ps_ = nullptr;
DRWPass *tiles_dilate_minabs_ps_ = nullptr;
PassSimple tiles_dilate_minmax_ps_ = {"TilesDilateMinmax"};
PassSimple tiles_dilate_minabs_ps_ = {"TilesDilateMinabs"};
/** Gather convolution for low intensity pixels and low contrast areas. */
SwapChain<TextureFromPool, 2> color_bg_tx_;
@@ -108,29 +108,29 @@ class DepthOfField {
SwapChain<TextureFromPool, 2> weight_fg_tx_;
TextureFromPool occlusion_tx_ = {"dof_occlusion"};
int3 dispatch_gather_size_ = int3(-1);
DRWPass *gather_fg_ps_ = nullptr;
DRWPass *gather_bg_ps_ = nullptr;
PassSimple gather_fg_ps_ = {"GatherFg"};
PassSimple gather_bg_ps_ = {"GatherBg"};
/** Hole-fill convolution: Gather pass meant to fill areas of foreground dis-occlusion. */
TextureFromPool hole_fill_color_tx_ = {"dof_color_hole_fill"};
TextureFromPool hole_fill_weight_tx_ = {"dof_weight_hole_fill"};
DRWPass *hole_fill_ps_ = nullptr;
PassSimple hole_fill_ps_ = {"HoleFill"};
/** Small Filter pass to reduce noise out of gather passes. */
int3 dispatch_filter_size_ = int3(-1);
DRWPass *filter_fg_ps_ = nullptr;
DRWPass *filter_bg_ps_ = nullptr;
PassSimple filter_fg_ps_ = {"FilterFg"};
PassSimple filter_bg_ps_ = {"FilterBg"};
/** Scatter convolution: A quad is emitted for every 4 bright enough half pixels. */
Framebuffer scatter_fg_fb_ = {"dof_scatter_fg"};
Framebuffer scatter_bg_fb_ = {"dof_scatter_bg"};
DRWPass *scatter_fg_ps_ = nullptr;
DRWPass *scatter_bg_ps_ = nullptr;
PassSimple scatter_fg_ps_ = {"ScatterFg"};
PassSimple scatter_bg_ps_ = {"ScatterBg"};
/** Recombine the results and also perform a slight out of focus gather. */
GPUTexture *resolve_stable_color_tx_ = nullptr;
int3 dispatch_resolve_size_ = int3(-1);
DRWPass *resolve_ps_ = nullptr;
PassSimple resolve_ps_ = {"Resolve"};
DepthOfFieldDataBuf data_;
@@ -139,6 +139,8 @@ class DepthOfField {
float fx_max_coc_;
/** Use jittered depth of field where we randomize camera location. */
bool do_jitter_;
/** Enable bokeh lookup texture. */
bool use_bokeh_lut_;
/** Circle of Confusion radius for FX DoF passes. Is in view X direction in [0..1] range. */
float fx_radius_;
@@ -166,7 +168,10 @@ class DepthOfField {
* Will swap input and output texture if rendering happens. The actual output of this function
* is in input_tx.
*/
void render(GPUTexture **input_tx, GPUTexture **output_tx, DepthOfFieldBuffer &dof_buffer);
void render(View &view,
GPUTexture **input_tx,
GPUTexture **output_tx,
DepthOfFieldBuffer &dof_buffer);
bool postfx_enabled() const
{

View File

@@ -377,48 +377,44 @@ void Film::sync()
* Still bind previous step to avoid undefined behavior. */
eVelocityStep step_next = inst_.is_viewport() ? STEP_PREVIOUS : STEP_NEXT;
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_ALWAYS;
accumulate_ps_ = DRW_pass_create("Film.Accumulate", state);
GPUShader *sh = inst_.shaders.static_shader_get(shader);
DRWShadingGroup *grp = DRW_shgroup_create(sh, accumulate_ps_);
DRW_shgroup_uniform_block_ref(grp, "film_buf", &data_);
DRW_shgroup_uniform_block_ref(grp, "camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS]));
DRW_shgroup_uniform_block_ref(grp, "camera_curr", &(*velocity.camera_steps[STEP_CURRENT]));
DRW_shgroup_uniform_block_ref(grp, "camera_next", &(*velocity.camera_steps[step_next]));
DRW_shgroup_uniform_texture_ref(grp, "depth_tx", &rbuffers.depth_tx);
DRW_shgroup_uniform_texture_ref(grp, "combined_tx", &combined_final_tx_);
DRW_shgroup_uniform_texture_ref(grp, "normal_tx", &rbuffers.normal_tx);
DRW_shgroup_uniform_texture_ref(grp, "vector_tx", &rbuffers.vector_tx);
DRW_shgroup_uniform_texture_ref(grp, "light_tx", &rbuffers.light_tx);
DRW_shgroup_uniform_texture_ref(grp, "diffuse_color_tx", &rbuffers.diffuse_color_tx);
DRW_shgroup_uniform_texture_ref(grp, "specular_color_tx", &rbuffers.specular_color_tx);
DRW_shgroup_uniform_texture_ref(grp, "volume_light_tx", &rbuffers.volume_light_tx);
DRW_shgroup_uniform_texture_ref(grp, "emission_tx", &rbuffers.emission_tx);
DRW_shgroup_uniform_texture_ref(grp, "environment_tx", &rbuffers.environment_tx);
DRW_shgroup_uniform_texture_ref(grp, "shadow_tx", &rbuffers.shadow_tx);
DRW_shgroup_uniform_texture_ref(grp, "ambient_occlusion_tx", &rbuffers.ambient_occlusion_tx);
DRW_shgroup_uniform_texture_ref(grp, "aov_color_tx", &rbuffers.aov_color_tx);
DRW_shgroup_uniform_texture_ref(grp, "aov_value_tx", &rbuffers.aov_value_tx);
accumulate_ps_.init();
accumulate_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_ALWAYS);
accumulate_ps_.shader_set(inst_.shaders.static_shader_get(shader));
accumulate_ps_.bind_ubo("film_buf", &data_);
accumulate_ps_.bind_ubo("camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS]));
accumulate_ps_.bind_ubo("camera_curr", &(*velocity.camera_steps[STEP_CURRENT]));
accumulate_ps_.bind_ubo("camera_next", &(*velocity.camera_steps[step_next]));
accumulate_ps_.bind_texture("depth_tx", &rbuffers.depth_tx);
accumulate_ps_.bind_texture("combined_tx", &combined_final_tx_);
accumulate_ps_.bind_texture("normal_tx", &rbuffers.normal_tx);
accumulate_ps_.bind_texture("vector_tx", &rbuffers.vector_tx);
accumulate_ps_.bind_texture("light_tx", &rbuffers.light_tx);
accumulate_ps_.bind_texture("diffuse_color_tx", &rbuffers.diffuse_color_tx);
accumulate_ps_.bind_texture("specular_color_tx", &rbuffers.specular_color_tx);
accumulate_ps_.bind_texture("volume_light_tx", &rbuffers.volume_light_tx);
accumulate_ps_.bind_texture("emission_tx", &rbuffers.emission_tx);
accumulate_ps_.bind_texture("environment_tx", &rbuffers.environment_tx);
accumulate_ps_.bind_texture("shadow_tx", &rbuffers.shadow_tx);
accumulate_ps_.bind_texture("ambient_occlusion_tx", &rbuffers.ambient_occlusion_tx);
accumulate_ps_.bind_texture("aov_color_tx", &rbuffers.aov_color_tx);
accumulate_ps_.bind_texture("aov_value_tx", &rbuffers.aov_value_tx);
/* NOTE(@fclem): 16 is the max number of sampled texture in many implementations.
* If we need more, we need to pack more of the similar passes in the same textures as arrays or
* use image binding instead. */
DRW_shgroup_uniform_image_ref(grp, "in_weight_img", &weight_tx_.current());
DRW_shgroup_uniform_image_ref(grp, "out_weight_img", &weight_tx_.next());
DRW_shgroup_uniform_texture_ref_ex(grp, "in_combined_tx", &combined_tx_.current(), filter);
DRW_shgroup_uniform_image_ref(grp, "out_combined_img", &combined_tx_.next());
DRW_shgroup_uniform_image_ref(grp, "depth_img", &depth_tx_);
DRW_shgroup_uniform_image_ref(grp, "color_accum_img", &color_accum_tx_);
DRW_shgroup_uniform_image_ref(grp, "value_accum_img", &value_accum_tx_);
accumulate_ps_.bind_image("in_weight_img", &weight_tx_.current());
accumulate_ps_.bind_image("out_weight_img", &weight_tx_.next());
accumulate_ps_.bind_texture("in_combined_tx", &combined_tx_.current(), filter);
accumulate_ps_.bind_image("out_combined_img", &combined_tx_.next());
accumulate_ps_.bind_image("depth_img", &depth_tx_);
accumulate_ps_.bind_image("color_accum_img", &color_accum_tx_);
accumulate_ps_.bind_image("value_accum_img", &value_accum_tx_);
/* Sync with rendering passes. */
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
/* Sync with rendering passes. */
DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_IMAGE_ACCESS);
accumulate_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_IMAGE_ACCESS);
if (use_compute) {
int2 dispatch_size = math::divide_ceil(data_.extent, int2(FILM_GROUP_SIZE));
DRW_shgroup_call_compute(grp, UNPACK2(dispatch_size), 1);
accumulate_ps_.dispatch(int3(math::divide_ceil(data_.extent, int2(FILM_GROUP_SIZE)), 1));
}
else {
DRW_shgroup_call_procedural_triangles(grp, nullptr, 1);
accumulate_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3);
}
}
@@ -565,8 +561,9 @@ void Film::accumulate(const DRWView *view, GPUTexture *combined_final_tx)
data_.display_only = false;
data_.push_update();
DRW_view_set_active(view);
DRW_draw_pass(accumulate_ps_);
draw::View drw_view("MainView", view);
DRW_manager_get()->submit(accumulate_ps_, drw_view);
combined_tx_.swap();
weight_tx_.swap();
@@ -593,8 +590,9 @@ void Film::display()
data_.display_only = true;
data_.push_update();
DRW_view_set_active(nullptr);
DRW_draw_pass(accumulate_ps_);
draw::View drw_view("MainView", DRW_view_default_get());
DRW_manager_get()->submit(accumulate_ps_, drw_view);
inst_.render_buffers.release();

View File

@@ -55,7 +55,7 @@ class Film {
/** User setting to disable reprojection. Useful for debugging or have a more precise render. */
bool force_disable_reprojection_ = false;
DRWPass *accumulate_ps_ = nullptr;
PassSimple accumulate_ps_ = {"Film.Accumulate"};
FilmDataBuf data_;

View File

@@ -32,36 +32,31 @@ void HiZBuffer::sync()
data_.push_update();
{
hiz_update_ps_ = DRW_pass_create("HizUpdate", DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(HIZ_UPDATE);
DRWShadingGroup *grp = DRW_shgroup_create(sh, hiz_update_ps_);
DRW_shgroup_storage_block(grp, "finished_tile_counter", atomic_tile_counter_);
DRW_shgroup_uniform_texture_ref_ex(grp, "depth_tx", &render_buffers.depth_tx, with_filter);
DRW_shgroup_uniform_image(grp, "out_mip_0", hiz_tx_.mip_view(0));
DRW_shgroup_uniform_image(grp, "out_mip_1", hiz_tx_.mip_view(1));
DRW_shgroup_uniform_image(grp, "out_mip_2", hiz_tx_.mip_view(2));
DRW_shgroup_uniform_image(grp, "out_mip_3", hiz_tx_.mip_view(3));
DRW_shgroup_uniform_image(grp, "out_mip_4", hiz_tx_.mip_view(4));
DRW_shgroup_uniform_image(grp, "out_mip_5", hiz_tx_.mip_view(5));
DRW_shgroup_uniform_image(grp, "out_mip_6", hiz_tx_.mip_view(6));
DRW_shgroup_uniform_image(grp, "out_mip_7", hiz_tx_.mip_view(7));
hiz_update_ps_.init();
hiz_update_ps_.shader_set(inst_.shaders.static_shader_get(HIZ_UPDATE));
hiz_update_ps_.bind_ssbo("finished_tile_counter", atomic_tile_counter_);
hiz_update_ps_.bind_texture("depth_tx", &render_buffers.depth_tx, with_filter);
hiz_update_ps_.bind_image("out_mip_0", hiz_tx_.mip_view(0));
hiz_update_ps_.bind_image("out_mip_1", hiz_tx_.mip_view(1));
hiz_update_ps_.bind_image("out_mip_2", hiz_tx_.mip_view(2));
hiz_update_ps_.bind_image("out_mip_3", hiz_tx_.mip_view(3));
hiz_update_ps_.bind_image("out_mip_4", hiz_tx_.mip_view(4));
hiz_update_ps_.bind_image("out_mip_5", hiz_tx_.mip_view(5));
hiz_update_ps_.bind_image("out_mip_6", hiz_tx_.mip_view(6));
hiz_update_ps_.bind_image("out_mip_7", hiz_tx_.mip_view(7));
/* TODO(@fclem): There might be occasions where we might not want to
* copy mip 0 for performance reasons if there is no need for it. */
DRW_shgroup_uniform_bool_copy(grp, "update_mip_0", true);
DRW_shgroup_call_compute(grp, UNPACK2(dispatch_size), 1);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
hiz_update_ps_.push_constant("update_mip_0", true);
hiz_update_ps_.dispatch(int3(dispatch_size, 1));
hiz_update_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
}
if (inst_.debug_mode == eDebugMode::DEBUG_HIZ_VALIDATION) {
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM;
debug_draw_ps_ = DRW_pass_create("HizUpdate.Debug", state);
GPUShader *sh = inst_.shaders.static_shader_get(HIZ_DEBUG);
DRWShadingGroup *grp = DRW_shgroup_create(sh, debug_draw_ps_);
this->bind_resources(grp);
DRW_shgroup_call_procedural_triangles(grp, nullptr, 1);
}
else {
debug_draw_ps_ = nullptr;
debug_draw_ps_.init();
debug_draw_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM);
debug_draw_ps_.shader_set(inst_.shaders.static_shader_get(HIZ_DEBUG));
this->bind_resources(&debug_draw_ps_);
debug_draw_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3);
}
}
@@ -79,22 +74,24 @@ void HiZBuffer::update()
GPU_framebuffer_restore();
}
DRW_draw_pass(hiz_update_ps_);
inst_.manager->submit(hiz_update_ps_);
if (G.debug & G_DEBUG_GPU) {
GPU_framebuffer_bind(fb);
}
}
void HiZBuffer::debug_draw(GPUFrameBuffer *view_fb)
void HiZBuffer::debug_draw(View &view, GPUFrameBuffer *view_fb)
{
if (debug_draw_ps_ == nullptr) {
return;
if (inst_.debug_mode == eDebugMode::DEBUG_HIZ_VALIDATION) {
inst_.info =
"Debug Mode: HiZ Validation\n"
" - Red: pixel in front of HiZ tile value.\n"
" - Blue: No error.";
inst_.hiz_buffer.update();
GPU_framebuffer_bind(view_fb);
inst_.manager->submit(debug_draw_ps_, view);
}
inst_.info = "Debug Mode: HiZ Validation";
inst_.hiz_buffer.update();
GPU_framebuffer_bind(view_fb);
DRW_draw_pass(debug_draw_ps_);
}
/** \} */

View File

@@ -36,9 +36,9 @@ class HiZBuffer {
*/
draw::StorageBuffer<uint4, true> atomic_tile_counter_ = {"atomic_tile_counter"};
/** Single pass recursive downsample. */
DRWPass *hiz_update_ps_ = nullptr;
PassSimple hiz_update_ps_ = {"HizUpdate"};
/** Debug pass. */
DRWPass *debug_draw_ps_ = nullptr;
PassSimple debug_draw_ps_ = {"HizUpdate.Debug"};
/** Dirty flag to check if the update is necessary. */
bool is_dirty_ = true;
@@ -67,13 +67,20 @@ class HiZBuffer {
*/
void update();
void debug_draw(GPUFrameBuffer *view_fb);
void debug_draw(View &view, GPUFrameBuffer *view_fb);
void bind_resources(DRWShadingGroup *grp)
{
DRW_shgroup_uniform_texture_ref(grp, "hiz_tx", &hiz_tx_);
DRW_shgroup_uniform_block_ref(grp, "hiz_buf", &data_);
}
/* TODO(fclem): Hardcoded bind slots. */
template<typename T> void bind_resources(draw::detail::PassBase<T> *pass)
{
pass->bind_texture("hiz_tx", &hiz_tx_);
pass->bind_ubo("hiz_buf", &data_);
}
};
/** \} */

View File

@@ -52,6 +52,7 @@ void Instance::init(const int2 &output_res,
drw_view = drw_view_;
v3d = v3d_;
rv3d = rv3d_;
manager = DRW_manager_get();
if (assign_if_different(debug_mode, (eDebugMode)G.debug_value)) {
sampling.reset();
@@ -126,12 +127,16 @@ void Instance::object_sync(Object *ob)
return;
}
/* TODO cleanup. */
ObjectRef ob_ref = DRW_object_ref_get(ob);
ResourceHandle res_handle = manager->resource_handle(ob_ref);
ObjectHandle &ob_handle = sync.sync_object(ob);
if (partsys_is_visible && ob != DRW_context_state_get()->object_edit) {
LISTBASE_FOREACH (ModifierData *, md, &ob->modifiers) {
if (md->type == eModifierType_ParticleSystem) {
sync.sync_curves(ob, ob_handle, md);
sync.sync_curves(ob, ob_handle, res_handle, md);
}
}
}
@@ -142,15 +147,15 @@ void Instance::object_sync(Object *ob)
lights.sync_light(ob, ob_handle);
break;
case OB_MESH:
sync.sync_mesh(ob, ob_handle);
sync.sync_mesh(ob, ob_handle, res_handle);
break;
case OB_VOLUME:
break;
case OB_CURVES:
sync.sync_curves(ob, ob_handle);
sync.sync_curves(ob, ob_handle, res_handle);
break;
case OB_GPENCIL:
sync.sync_gpencil(ob, ob_handle);
sync.sync_gpencil(ob, ob_handle, res_handle);
break;
default:
break;

View File

@@ -59,6 +59,7 @@ class Instance {
/** Input data. */
Depsgraph *depsgraph;
Manager *manager;
/** Evaluated IDs. */
Scene *scene;
ViewLayer *view_layer;

View File

@@ -399,76 +399,70 @@ void LightModule::culling_pass_sync()
uint culling_tile_dispatch_size = divide_ceil_u(total_word_count_, CULLING_TILE_GROUP_SIZE);
/* NOTE: We reference the buffers that may be resized or updated later. */
culling_ps_.init();
{
DRW_PASS_CREATE(culling_select_ps_, DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_SELECT);
DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_select_ps_);
DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_);
DRW_shgroup_storage_block(grp, "in_light_buf", light_buf_);
DRW_shgroup_storage_block(grp, "out_light_buf", culling_light_buf_);
DRW_shgroup_storage_block(grp, "out_zdist_buf", culling_zdist_buf_);
DRW_shgroup_storage_block(grp, "out_key_buf", culling_key_buf_);
DRW_shgroup_call_compute(grp, culling_select_dispatch_size, 1, 1);
DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE);
auto &sub = culling_ps_.sub("Select");
sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_SELECT));
sub.bind_ssbo("light_cull_buf", &culling_data_buf_);
sub.bind_ssbo("in_light_buf", light_buf_);
sub.bind_ssbo("out_light_buf", culling_light_buf_);
sub.bind_ssbo("out_zdist_buf", culling_zdist_buf_);
sub.bind_ssbo("out_key_buf", culling_key_buf_);
sub.dispatch(int3(culling_select_dispatch_size, 1, 1));
sub.barrier(GPU_BARRIER_SHADER_STORAGE);
}
{
DRW_PASS_CREATE(culling_sort_ps_, DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_SORT);
DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_sort_ps_);
DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_);
DRW_shgroup_storage_block(grp, "in_light_buf", light_buf_);
DRW_shgroup_storage_block(grp, "out_light_buf", culling_light_buf_);
DRW_shgroup_storage_block(grp, "in_zdist_buf", culling_zdist_buf_);
DRW_shgroup_storage_block(grp, "in_key_buf", culling_key_buf_);
DRW_shgroup_call_compute(grp, culling_sort_dispatch_size, 1, 1);
DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE);
auto &sub = culling_ps_.sub("Sort");
sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_SORT));
sub.bind_ssbo("light_cull_buf", &culling_data_buf_);
sub.bind_ssbo("in_light_buf", light_buf_);
sub.bind_ssbo("out_light_buf", culling_light_buf_);
sub.bind_ssbo("in_zdist_buf", culling_zdist_buf_);
sub.bind_ssbo("in_key_buf", culling_key_buf_);
sub.dispatch(int3(culling_sort_dispatch_size, 1, 1));
sub.barrier(GPU_BARRIER_SHADER_STORAGE);
}
{
DRW_PASS_CREATE(culling_zbin_ps_, DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_ZBIN);
DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_zbin_ps_);
DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_);
DRW_shgroup_storage_block(grp, "light_buf", culling_light_buf_);
DRW_shgroup_storage_block(grp, "out_zbin_buf", culling_zbin_buf_);
DRW_shgroup_call_compute(grp, 1, 1, 1);
DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE);
auto &sub = culling_ps_.sub("Zbin");
sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_ZBIN));
sub.bind_ssbo("light_cull_buf", &culling_data_buf_);
sub.bind_ssbo("light_buf", culling_light_buf_);
sub.bind_ssbo("out_zbin_buf", culling_zbin_buf_);
sub.dispatch(int3(1, 1, 1));
sub.barrier(GPU_BARRIER_SHADER_STORAGE);
}
{
DRW_PASS_CREATE(culling_tile_ps_, DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_TILE);
DRWShadingGroup *grp = DRW_shgroup_create(sh, culling_tile_ps_);
DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_);
DRW_shgroup_storage_block(grp, "light_buf", culling_light_buf_);
DRW_shgroup_storage_block(grp, "out_light_tile_buf", culling_tile_buf_);
DRW_shgroup_call_compute(grp, culling_tile_dispatch_size, 1, 1);
DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE);
auto &sub = culling_ps_.sub("Tiles");
sub.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_TILE));
sub.bind_ssbo("light_cull_buf", &culling_data_buf_);
sub.bind_ssbo("light_buf", culling_light_buf_);
sub.bind_ssbo("out_light_tile_buf", culling_tile_buf_);
sub.dispatch(int3(culling_tile_dispatch_size, 1, 1));
sub.barrier(GPU_BARRIER_SHADER_STORAGE);
}
}
void LightModule::debug_pass_sync()
{
if (inst_.debug_mode != eDebugMode::DEBUG_LIGHT_CULLING) {
debug_draw_ps_ = nullptr;
return;
if (inst_.debug_mode == eDebugMode::DEBUG_LIGHT_CULLING) {
debug_draw_ps_.init();
debug_draw_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM);
debug_draw_ps_.shader_set(inst_.shaders.static_shader_get(LIGHT_CULLING_DEBUG));
inst_.hiz_buffer.bind_resources(&debug_draw_ps_);
debug_draw_ps_.bind_ssbo("light_buf", &culling_light_buf_);
debug_draw_ps_.bind_ssbo("light_cull_buf", &culling_data_buf_);
debug_draw_ps_.bind_ssbo("light_zbin_buf", &culling_zbin_buf_);
debug_draw_ps_.bind_ssbo("light_tile_buf", &culling_tile_buf_);
debug_draw_ps_.bind_texture("depth_tx", &inst_.render_buffers.depth_tx);
debug_draw_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3);
}
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM;
debug_draw_ps_ = DRW_pass_create("LightCulling.Debug", state);
GPUShader *sh = inst_.shaders.static_shader_get(LIGHT_CULLING_DEBUG);
DRWShadingGroup *grp = DRW_shgroup_create(sh, debug_draw_ps_);
inst_.hiz_buffer.bind_resources(grp);
DRW_shgroup_storage_block_ref(grp, "light_buf", &culling_light_buf_);
DRW_shgroup_storage_block_ref(grp, "light_cull_buf", &culling_data_buf_);
DRW_shgroup_storage_block_ref(grp, "light_zbin_buf", &culling_zbin_buf_);
DRW_shgroup_storage_block_ref(grp, "light_tile_buf", &culling_tile_buf_);
DRW_shgroup_uniform_texture_ref(grp, "depth_tx", &inst_.render_buffers.depth_tx);
DRW_shgroup_call_procedural_triangles(grp, nullptr, 1);
}
void LightModule::set_view(const DRWView *view, const int2 extent)
void LightModule::set_view(View &view, const int2 extent)
{
float far_z = DRW_view_far_distance_get(view);
float near_z = DRW_view_near_distance_get(view);
float far_z = view.far_clip();
float near_z = view.near_clip();
culling_data_buf_.zbin_scale = -CULLING_ZBIN_COUNT / fabsf(far_z - near_z);
culling_data_buf_.zbin_bias = -near_z * culling_data_buf_.zbin_scale;
@@ -476,26 +470,17 @@ void LightModule::set_view(const DRWView *view, const int2 extent)
culling_data_buf_.visible_count = 0;
culling_data_buf_.push_update();
DRW_stats_group_start("Light Culling");
DRW_view_set_active(view);
DRW_draw_pass(culling_select_ps_);
DRW_draw_pass(culling_sort_ps_);
DRW_draw_pass(culling_zbin_ps_);
DRW_draw_pass(culling_tile_ps_);
DRW_stats_group_end();
inst_.manager->submit(culling_ps_, view);
}
void LightModule::debug_draw(GPUFrameBuffer *view_fb)
void LightModule::debug_draw(View &view, GPUFrameBuffer *view_fb)
{
if (debug_draw_ps_ == nullptr) {
return;
if (inst_.debug_mode == eDebugMode::DEBUG_LIGHT_CULLING) {
inst_.info = "Debug Mode: Light Culling Validation";
inst_.hiz_buffer.update();
GPU_framebuffer_bind(view_fb);
inst_.manager->submit(debug_draw_ps_, view);
}
inst_.info = "Debug Mode: Light Culling Validation";
inst_.hiz_buffer.update();
GPU_framebuffer_bind(view_fb);
DRW_draw_pass(debug_draw_ps_);
}
/** \} */

View File

@@ -116,16 +116,12 @@ class LightModule {
/** Bitmap of lights touching each tiles. */
LightCullingTileBuf culling_tile_buf_ = {"LightCull_tile"};
/** Culling compute passes. */
DRWPass *culling_select_ps_ = nullptr;
DRWPass *culling_sort_ps_ = nullptr;
DRWPass *culling_zbin_ps_ = nullptr;
DRWPass *culling_tile_ps_ = nullptr;
PassSimple culling_ps_ = {"LightCulling"};
/** Total number of words the tile buffer needs to contain for the render resolution. */
uint total_word_count_ = 0;
/** Debug Culling visualization. */
DRWPass *debug_draw_ps_ = nullptr;
/* GPUTexture *input_depth_tx_ = nullptr; */
PassSimple debug_draw_ps_ = {"LightCulling.Debug"};
public:
LightModule(Instance &inst) : inst_(inst){};
@@ -138,9 +134,9 @@ class LightModule {
/**
* Update acceleration structure for the given view.
*/
void set_view(const DRWView *view, const int2 extent);
void set_view(View &view, const int2 extent);
void debug_draw(GPUFrameBuffer *view_fb);
void debug_draw(View &view, GPUFrameBuffer *view_fb);
void bind_resources(DRWShadingGroup *grp)
{
@@ -154,6 +150,15 @@ class LightModule {
#endif
}
template<typename T> void bind_resources(draw::detail::PassBase<T> *pass)
{
/* Storage Buf. */
pass->bind_ssbo(LIGHT_CULL_BUF_SLOT, &culling_data_buf_);
pass->bind_ssbo(LIGHT_BUF_SLOT, &culling_light_buf_);
pass->bind_ssbo(LIGHT_ZBIN_BUF_SLOT, &culling_zbin_buf_);
pass->bind_ssbo(LIGHT_TILE_BUF_SLOT, &culling_tile_buf_);
}
private:
void culling_pass_sync();
void debug_pass_sync();

View File

@@ -145,9 +145,6 @@ MaterialModule::MaterialModule(Instance &inst) : inst_(inst)
MaterialModule::~MaterialModule()
{
for (Material *mat : material_map_.values()) {
delete mat;
}
BKE_id_free(nullptr, glossy_mat);
BKE_id_free(nullptr, diffuse_mat);
BKE_id_free(nullptr, error_mat_);
@@ -157,13 +154,12 @@ void MaterialModule::begin_sync()
{
queued_shaders_count = 0;
for (Material *mat : material_map_.values()) {
mat->init = false;
}
material_map_.clear();
shader_map_.clear();
}
MaterialPass MaterialModule::material_pass_get(::Material *blender_mat,
MaterialPass MaterialModule::material_pass_get(Object *ob,
::Material *blender_mat,
eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type)
{
@@ -203,35 +199,34 @@ MaterialPass MaterialModule::material_pass_get(::Material *blender_mat,
pipeline_type = MAT_PIPE_FORWARD;
}
if ((pipeline_type == MAT_PIPE_FORWARD) &&
if (ELEM(pipeline_type,
MAT_PIPE_FORWARD,
MAT_PIPE_FORWARD_PREPASS,
MAT_PIPE_FORWARD_PREPASS_VELOCITY) &&
GPU_material_flag_get(matpass.gpumat, GPU_MATFLAG_TRANSPARENT)) {
/* Transparent needs to use one shgroup per object to support reordering. */
matpass.shgrp = inst_.pipelines.material_add(blender_mat, matpass.gpumat, pipeline_type);
/* Transparent pass is generated later. */
matpass.sub_pass = nullptr;
}
else {
ShaderKey shader_key(matpass.gpumat, geometry_type, pipeline_type);
auto add_cb = [&]() -> DRWShadingGroup * {
/* First time encountering this shader. Create a shading group. */
return inst_.pipelines.material_add(blender_mat, matpass.gpumat, pipeline_type);
};
DRWShadingGroup *grp = shader_map_.lookup_or_add_cb(shader_key, add_cb);
PassMain::Sub *shader_sub = shader_map_.lookup_or_add_cb(shader_key, [&]() {
/* First time encountering this shader. Create a sub that will contain materials using it. */
return inst_.pipelines.material_add(ob, blender_mat, matpass.gpumat, pipeline_type);
});
if (grp != nullptr) {
/* Shading group for this shader already exists. Create a sub one for this material. */
/* IMPORTANT: We always create a subgroup so that all subgroups are inserted after the
* first "empty" shgroup. This avoids messing the order of subgroups when there is more
* nested subgroup (i.e: hair drawing). */
/* TODO(@fclem): Remove material resource binding from the first group creation. */
matpass.shgrp = DRW_shgroup_create_sub(grp);
DRW_shgroup_add_material_resources(matpass.shgrp, matpass.gpumat);
if (shader_sub != nullptr) {
/* Create a sub for this material as `shader_sub` is for sharing shader between materials. */
matpass.sub_pass = &shader_sub->sub(GPU_material_get_name(matpass.gpumat));
matpass.sub_pass->material_set(*inst_.manager, matpass.gpumat);
}
}
return matpass;
}
Material &MaterialModule::material_sync(::Material *blender_mat,
Material &MaterialModule::material_sync(Object *ob,
::Material *blender_mat,
eMaterialGeometry geometry_type,
bool has_motion)
{
@@ -249,27 +244,32 @@ Material &MaterialModule::material_sync(::Material *blender_mat,
MaterialKey material_key(blender_mat, geometry_type, surface_pipe);
/* TODO: allocate in blocks to avoid memory fragmentation. */
auto add_cb = [&]() { return new Material(); };
Material &mat = *material_map_.lookup_or_add_cb(material_key, add_cb);
/* Forward pipeline needs to use one shgroup per object. */
if (mat.init == false || (surface_pipe == MAT_PIPE_FORWARD)) {
mat.init = true;
Material &mat = material_map_.lookup_or_add_cb(material_key, [&]() {
Material mat;
/* Order is important for transparent. */
mat.prepass = material_pass_get(blender_mat, prepass_pipe, geometry_type);
mat.shading = material_pass_get(blender_mat, surface_pipe, geometry_type);
mat.prepass = material_pass_get(ob, blender_mat, prepass_pipe, geometry_type);
mat.shading = material_pass_get(ob, blender_mat, surface_pipe, geometry_type);
if (blender_mat->blend_shadow == MA_BS_NONE) {
mat.shadow = MaterialPass();
}
else {
mat.shadow = material_pass_get(blender_mat, MAT_PIPE_SHADOW, geometry_type);
mat.shadow = material_pass_get(ob, blender_mat, MAT_PIPE_SHADOW, geometry_type);
}
mat.is_alpha_blend_transparent = (blender_mat->blend_method == MA_BM_BLEND) &&
GPU_material_flag_get(mat.prepass.gpumat,
GPU_material_flag_get(mat.shading.gpumat,
GPU_MATFLAG_TRANSPARENT);
return mat;
});
if (mat.is_alpha_blend_transparent) {
/* Transparent needs to use one sub pass per object to support reordering.
* NOTE: Pre-pass needs to be created first in order to be sorted first. */
mat.prepass.sub_pass = inst_.pipelines.forward.prepass_transparent_add(
ob, blender_mat, mat.shading.gpumat);
mat.shading.sub_pass = inst_.pipelines.forward.material_transparent_add(
ob, blender_mat, mat.shading.gpumat);
}
return mat;
}
@@ -297,7 +297,7 @@ MaterialArray &MaterialModule::material_array_get(Object *ob, bool has_motion)
for (auto i : IndexRange(materials_len)) {
::Material *blender_mat = material_from_slot(ob, i);
Material &mat = material_sync(blender_mat, to_material_geometry(ob), has_motion);
Material &mat = material_sync(ob, blender_mat, to_material_geometry(ob), has_motion);
material_array_.materials.append(&mat);
material_array_.gpu_materials.append(mat.shading.gpumat);
}
@@ -310,7 +310,7 @@ Material &MaterialModule::material_get(Object *ob,
eMaterialGeometry geometry_type)
{
::Material *blender_mat = material_from_slot(ob, mat_nr);
Material &mat = material_sync(blender_mat, geometry_type, has_motion);
Material &mat = material_sync(ob, blender_mat, geometry_type, has_motion);
return mat;
}

View File

@@ -203,12 +203,11 @@ class DefaultSurfaceNodeTree {
* \{ */
struct MaterialPass {
GPUMaterial *gpumat = nullptr;
DRWShadingGroup *shgrp = nullptr;
GPUMaterial *gpumat;
PassMain::Sub *sub_pass;
};
struct Material {
bool init = false;
bool is_alpha_blend_transparent;
MaterialPass shadow, shading, prepass;
};
@@ -228,8 +227,8 @@ class MaterialModule {
private:
Instance &inst_;
Map<MaterialKey, Material *> material_map_;
Map<ShaderKey, DRWShadingGroup *> shader_map_;
Map<MaterialKey, Material> material_map_;
Map<ShaderKey, PassMain::Sub *> shader_map_;
MaterialArray material_array_;
@@ -254,13 +253,15 @@ class MaterialModule {
Material &material_get(Object *ob, bool has_motion, int mat_nr, eMaterialGeometry geometry_type);
private:
Material &material_sync(::Material *blender_mat,
Material &material_sync(Object *ob,
::Material *blender_mat,
eMaterialGeometry geometry_type,
bool has_motion);
/** Return correct material or empty default material if slot is empty. */
::Material *material_from_slot(Object *ob, int slot);
MaterialPass material_pass_get(::Material *blender_mat,
MaterialPass material_pass_get(Object *ob,
::Material *blender_mat,
eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type);
};

View File

@@ -135,53 +135,49 @@ void MotionBlurModule::sync()
eGPUSamplerState no_filter = GPU_SAMPLER_DEFAULT;
RenderBuffers &render_buffers = inst_.render_buffers;
motion_blur_ps_.init();
inst_.velocity.bind_resources(&motion_blur_ps_);
inst_.sampling.bind_resources(&motion_blur_ps_);
{
/* Create max velocity tiles. */
DRW_PASS_CREATE(tiles_flatten_ps_, DRW_STATE_NO_DRAW);
PassSimple::Sub &sub = motion_blur_ps_.sub("TilesFlatten");
eShaderType shader = (inst_.is_viewport()) ? MOTION_BLUR_TILE_FLATTEN_VIEWPORT :
MOTION_BLUR_TILE_FLATTEN_RENDER;
GPUShader *sh = inst_.shaders.static_shader_get(shader);
DRWShadingGroup *grp = DRW_shgroup_create(sh, tiles_flatten_ps_);
inst_.velocity.bind_resources(grp);
DRW_shgroup_uniform_block(grp, "motion_blur_buf", data_);
DRW_shgroup_uniform_texture_ref(grp, "depth_tx", &render_buffers.depth_tx);
DRW_shgroup_uniform_image_ref(grp, "velocity_img", &render_buffers.vector_tx);
DRW_shgroup_uniform_image_ref(grp, "out_tiles_img", &tiles_tx_);
DRW_shgroup_call_compute_ref(grp, dispatch_flatten_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_IMAGE_ACCESS | GPU_BARRIER_TEXTURE_FETCH);
sub.shader_set(inst_.shaders.static_shader_get(shader));
sub.bind_ubo("motion_blur_buf", data_);
sub.bind_texture("depth_tx", &render_buffers.depth_tx);
sub.bind_image("velocity_img", &render_buffers.vector_tx);
sub.bind_image("out_tiles_img", &tiles_tx_);
sub.dispatch(&dispatch_flatten_size_);
sub.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS | GPU_BARRIER_TEXTURE_FETCH);
}
{
/* Expand max velocity tiles by spreading them in their neighborhood. */
DRW_PASS_CREATE(tiles_dilate_ps_, DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(MOTION_BLUR_TILE_DILATE);
DRWShadingGroup *grp = DRW_shgroup_create(sh, tiles_dilate_ps_);
DRW_shgroup_storage_block(grp, "tile_indirection_buf", tile_indirection_buf_);
DRW_shgroup_uniform_image_ref(grp, "in_tiles_img", &tiles_tx_);
DRW_shgroup_call_compute_ref(grp, dispatch_dilate_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_STORAGE);
PassSimple::Sub &sub = motion_blur_ps_.sub("TilesDilate");
sub.shader_set(inst_.shaders.static_shader_get(MOTION_BLUR_TILE_DILATE));
sub.bind_ssbo("tile_indirection_buf", tile_indirection_buf_);
sub.bind_image("in_tiles_img", &tiles_tx_);
sub.dispatch(&dispatch_dilate_size_);
sub.barrier(GPU_BARRIER_SHADER_STORAGE);
}
{
/* Do the motion blur gather algorithm. */
DRW_PASS_CREATE(gather_ps_, DRW_STATE_NO_DRAW);
GPUShader *sh = inst_.shaders.static_shader_get(MOTION_BLUR_GATHER);
DRWShadingGroup *grp = DRW_shgroup_create(sh, gather_ps_);
inst_.sampling.bind_resources(grp);
DRW_shgroup_uniform_block(grp, "motion_blur_buf", data_);
DRW_shgroup_storage_block(grp, "tile_indirection_buf", tile_indirection_buf_);
DRW_shgroup_uniform_texture_ref_ex(grp, "depth_tx", &render_buffers.depth_tx, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "velocity_tx", &render_buffers.vector_tx, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "in_color_tx", &input_color_tx_, no_filter);
DRW_shgroup_uniform_image_ref(grp, "in_tiles_img", &tiles_tx_);
DRW_shgroup_uniform_image_ref(grp, "out_color_img", &output_color_tx_);
PassSimple::Sub &sub = motion_blur_ps_.sub("ConvolveGather");
sub.shader_set(inst_.shaders.static_shader_get(MOTION_BLUR_GATHER));
sub.bind_ubo("motion_blur_buf", data_);
sub.bind_ssbo("tile_indirection_buf", tile_indirection_buf_);
sub.bind_texture("depth_tx", &render_buffers.depth_tx, no_filter);
sub.bind_texture("velocity_tx", &render_buffers.vector_tx, no_filter);
sub.bind_texture("in_color_tx", &input_color_tx_, no_filter);
sub.bind_image("in_tiles_img", &tiles_tx_);
sub.bind_image("out_color_img", &output_color_tx_);
DRW_shgroup_call_compute_ref(grp, dispatch_gather_size_);
DRW_shgroup_barrier(grp, GPU_BARRIER_TEXTURE_FETCH);
sub.dispatch(&dispatch_gather_size_);
sub.barrier(GPU_BARRIER_TEXTURE_FETCH);
}
}
void MotionBlurModule::render(GPUTexture **input_tx, GPUTexture **output_tx)
void MotionBlurModule::render(View &view, GPUTexture **input_tx, GPUTexture **output_tx)
{
if (!motion_blur_fx_enabled_) {
return;
@@ -239,9 +235,7 @@ void MotionBlurModule::render(GPUTexture **input_tx, GPUTexture **output_tx)
GPU_storagebuf_clear_to_zero(tile_indirection_buf_);
DRW_draw_pass(tiles_flatten_ps_);
DRW_draw_pass(tiles_dilate_ps_);
DRW_draw_pass(gather_ps_);
inst_.manager->submit(motion_blur_ps_, view);
tiles_tx_.release();

View File

@@ -95,9 +95,7 @@ class MotionBlurModule {
GPUTexture *input_color_tx_ = nullptr;
GPUTexture *output_color_tx_ = nullptr;
DRWPass *tiles_flatten_ps_ = nullptr;
DRWPass *tiles_dilate_ps_ = nullptr;
DRWPass *gather_ps_ = nullptr;
PassSimple motion_blur_ps_ = {"MotionBlur"};
MotionBlurTileIndirectionBuf tile_indirection_buf_;
MotionBlurDataBuf data_;
@@ -121,7 +119,7 @@ class MotionBlurModule {
return motion_blur_fx_enabled_;
}
void render(GPUTexture **input_tx, GPUTexture **output_tx);
void render(View &view, GPUTexture **input_tx, GPUTexture **output_tx);
private:
float shutter_time_to_scene_time(float time);

View File

@@ -24,36 +24,35 @@ namespace blender::eevee {
void WorldPipeline::sync(GPUMaterial *gpumat)
{
Manager &manager = *inst_.manager;
RenderBuffers &rbufs = inst_.render_buffers;
DRWState state = DRW_STATE_WRITE_COLOR;
world_ps_ = DRW_pass_create("World", state);
ResourceHandle handle = manager.resource_handle(float4x4::identity().ptr());
/* Push a matrix at the same location as the camera. */
float4x4 camera_mat = float4x4::identity();
// copy_v3_v3(camera_mat[3], inst_.camera.data_get().viewinv[3]);
DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, world_ps_);
DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.pipelines.utility_tx);
DRW_shgroup_call_obmat(grp, DRW_cache_fullscreen_quad_get(), camera_mat.ptr());
DRW_shgroup_uniform_float_copy(grp, "world_opacity_fade", inst_.film.background_opacity_get());
world_ps_.init();
world_ps_.state_set(DRW_STATE_WRITE_COLOR);
world_ps_.material_set(manager, gpumat);
world_ps_.push_constant("world_opacity_fade", inst_.film.background_opacity_get());
world_ps_.bind_texture("utility_tx", inst_.pipelines.utility_tx);
/* AOVs. */
DRW_shgroup_uniform_image_ref(grp, "aov_color_img", &rbufs.aov_color_tx);
DRW_shgroup_uniform_image_ref(grp, "aov_value_img", &rbufs.aov_value_tx);
DRW_shgroup_storage_block_ref(grp, "aov_buf", &inst_.film.aovs_info);
world_ps_.bind_image("aov_color_img", &rbufs.aov_color_tx);
world_ps_.bind_image("aov_value_img", &rbufs.aov_value_tx);
world_ps_.bind_ssbo("aov_buf", &inst_.film.aovs_info);
/* RenderPasses. Cleared by background (even if bad practice). */
DRW_shgroup_uniform_image_ref(grp, "rp_normal_img", &rbufs.normal_tx);
DRW_shgroup_uniform_image_ref(grp, "rp_light_img", &rbufs.light_tx);
DRW_shgroup_uniform_image_ref(grp, "rp_diffuse_color_img", &rbufs.diffuse_color_tx);
DRW_shgroup_uniform_image_ref(grp, "rp_specular_color_img", &rbufs.specular_color_tx);
DRW_shgroup_uniform_image_ref(grp, "rp_emission_img", &rbufs.emission_tx);
world_ps_.bind_image("rp_normal_img", &rbufs.normal_tx);
world_ps_.bind_image("rp_light_img", &rbufs.light_tx);
world_ps_.bind_image("rp_diffuse_color_img", &rbufs.diffuse_color_tx);
world_ps_.bind_image("rp_specular_color_img", &rbufs.specular_color_tx);
world_ps_.bind_image("rp_emission_img", &rbufs.emission_tx);
world_ps_.draw(DRW_cache_fullscreen_quad_get(), handle);
/* To allow opaque pass rendering over it. */
DRW_shgroup_barrier(grp, GPU_BARRIER_SHADER_IMAGE_ACCESS);
world_ps_.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
void WorldPipeline::render()
void WorldPipeline::render(View &view)
{
DRW_draw_pass(world_ps_);
inst_.manager->submit(world_ps_, view);
}
/** \} */
@@ -66,194 +65,150 @@ void WorldPipeline::render()
void ForwardPipeline::sync()
{
camera_forward_ = inst_.camera.forward();
DRWState state_depth_only = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS;
DRWState state_depth_color = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS |
DRW_STATE_WRITE_COLOR;
{
DRWState state = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS;
prepass_ps_ = DRW_pass_create("Forward.Opaque.Prepass", state);
prepass_velocity_ps_ = DRW_pass_create("Forward.Opaque.Prepass.Velocity",
state | DRW_STATE_WRITE_COLOR);
prepass_ps_.init();
state |= DRW_STATE_CULL_BACK;
prepass_culled_ps_ = DRW_pass_create("Forward.Opaque.Prepass.Culled", state);
prepass_culled_velocity_ps_ = DRW_pass_create("Forward.Opaque.Prepass.Velocity",
state | DRW_STATE_WRITE_COLOR);
{
/* Common resources. */
DRW_pass_link(prepass_ps_, prepass_velocity_ps_);
DRW_pass_link(prepass_velocity_ps_, prepass_culled_ps_);
DRW_pass_link(prepass_culled_ps_, prepass_culled_velocity_ps_);
/* Textures. */
prepass_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
inst_.velocity.bind_resources(&prepass_ps_);
inst_.sampling.bind_resources(&prepass_ps_);
}
prepass_double_sided_static_ps_ = &prepass_ps_.sub("DoubleSided.Static");
prepass_double_sided_static_ps_->state_set(state_depth_only);
prepass_single_sided_static_ps_ = &prepass_ps_.sub("SingleSided.Static");
prepass_single_sided_static_ps_->state_set(state_depth_only | DRW_STATE_CULL_BACK);
prepass_double_sided_moving_ps_ = &prepass_ps_.sub("DoubleSided.Moving");
prepass_double_sided_moving_ps_->state_set(state_depth_color);
prepass_single_sided_moving_ps_ = &prepass_ps_.sub("SingleSided.Moving");
prepass_single_sided_moving_ps_->state_set(state_depth_color | DRW_STATE_CULL_BACK);
}
{
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL;
opaque_ps_ = DRW_pass_create("Forward.Opaque", state);
opaque_ps_.init();
state |= DRW_STATE_CULL_BACK;
opaque_culled_ps_ = DRW_pass_create("Forward.Opaque.Culled", state);
{
/* Common resources. */
DRW_pass_link(opaque_ps_, opaque_culled_ps_);
/* RenderPasses. */
opaque_ps_.bind_image(RBUFS_NORMAL_SLOT, &inst_.render_buffers.normal_tx);
opaque_ps_.bind_image(RBUFS_LIGHT_SLOT, &inst_.render_buffers.light_tx);
opaque_ps_.bind_image(RBUFS_DIFF_COLOR_SLOT, &inst_.render_buffers.diffuse_color_tx);
opaque_ps_.bind_image(RBUFS_SPEC_COLOR_SLOT, &inst_.render_buffers.specular_color_tx);
opaque_ps_.bind_image(RBUFS_EMISSION_SLOT, &inst_.render_buffers.emission_tx);
/* AOVs. */
opaque_ps_.bind_image(RBUFS_AOV_COLOR_SLOT, &inst_.render_buffers.aov_color_tx);
opaque_ps_.bind_image(RBUFS_AOV_VALUE_SLOT, &inst_.render_buffers.aov_value_tx);
/* Storage Buf. */
opaque_ps_.bind_ssbo(RBUFS_AOV_BUF_SLOT, &inst_.film.aovs_info);
/* Textures. */
opaque_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
inst_.lights.bind_resources(&opaque_ps_);
inst_.sampling.bind_resources(&opaque_ps_);
}
opaque_single_sided_ps_ = &opaque_ps_.sub("SingleSided");
opaque_single_sided_ps_->state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL |
DRW_STATE_CULL_BACK);
opaque_double_sided_ps_ = &opaque_ps_.sub("DoubleSided");
opaque_double_sided_ps_->state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL);
}
{
DRWState state = DRW_STATE_DEPTH_LESS_EQUAL;
transparent_ps_ = DRW_pass_create("Forward.Transparent", state);
transparent_ps_.init();
/* Workaround limitation of PassSortable. Use dummy pass that will be sorted first in all
* circumstances. */
PassMain::Sub &sub = transparent_ps_.sub("ResourceBind", -FLT_MAX);
/* Common resources. */
/* Textures. */
sub.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
inst_.lights.bind_resources(&sub);
inst_.sampling.bind_resources(&sub);
}
}
DRWShadingGroup *ForwardPipeline::material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat)
PassMain::Sub *ForwardPipeline::prepass_opaque_add(::Material *blender_mat,
GPUMaterial *gpumat,
bool has_motion)
{
RenderBuffers &rbufs = inst_.render_buffers;
DRWPass *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ? opaque_culled_ps_ : opaque_ps_;
LightModule &lights = inst_.lights;
Sampling &sampling = inst_.sampling;
// LightProbeModule &lightprobes = inst_.lightprobes;
// RaytracingModule &raytracing = inst_.raytracing;
// eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT;
DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, pass);
lights.bind_resources(grp);
sampling.bind_resources(grp);
// DRW_shgroup_uniform_block(grp, "sampling_buf", inst_.sampling.ubo_get());
// DRW_shgroup_uniform_block(grp, "grids_buf", lightprobes.grid_ubo_get());
// DRW_shgroup_uniform_block(grp, "cubes_buf", lightprobes.cube_ubo_get());
// DRW_shgroup_uniform_block(grp, "probes_buf", lightprobes.info_ubo_get());
// DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get());
// DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get());
DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.pipelines.utility_tx);
/* AOVs. */
DRW_shgroup_uniform_image_ref(grp, "aov_color_img", &rbufs.aov_color_tx);
DRW_shgroup_uniform_image_ref(grp, "aov_value_img", &rbufs.aov_value_tx);
DRW_shgroup_storage_block_ref(grp, "aov_buf", &inst_.film.aovs_info);
/* RenderPasses. */
DRW_shgroup_uniform_image_ref(grp, "rp_normal_img", &rbufs.normal_tx);
DRW_shgroup_uniform_image_ref(grp, "rp_light_img", &rbufs.light_tx);
DRW_shgroup_uniform_image_ref(grp, "rp_diffuse_color_img", &rbufs.diffuse_color_tx);
DRW_shgroup_uniform_image_ref(grp, "rp_specular_color_img", &rbufs.specular_color_tx);
DRW_shgroup_uniform_image_ref(grp, "rp_emission_img", &rbufs.emission_tx);
/* TODO(fclem): Make this only needed if material uses it ... somehow. */
// if (true) {
// DRW_shgroup_uniform_texture_ref(
// grp, "sss_transmittance_tx", inst_.subsurface.transmittance_ref_get());
// }
// if (raytracing.enabled()) {
// DRW_shgroup_uniform_block(grp, "rt_diffuse_buf", raytracing.diffuse_data);
// DRW_shgroup_uniform_block(grp, "rt_reflection_buf", raytracing.reflection_data);
// DRW_shgroup_uniform_block(grp, "rt_refraction_buf", raytracing.refraction_data);
// DRW_shgroup_uniform_texture_ref_ex(grp, "radiance_tx", &input_screen_radiance_tx_,
// no_interp);
// }
// if (raytracing.enabled()) {
// DRW_shgroup_uniform_block(grp, "hiz_buf", inst_.hiz.ubo_get());
// DRW_shgroup_uniform_texture_ref(grp, "hiz_tx", inst_.hiz_front.texture_ref_get());
// }
return grp;
PassMain::Sub *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ?
(has_motion ? prepass_single_sided_moving_ps_ :
prepass_single_sided_static_ps_) :
(has_motion ? prepass_double_sided_moving_ps_ :
prepass_double_sided_static_ps_);
return &pass->sub(GPU_material_get_name(gpumat));
}
DRWShadingGroup *ForwardPipeline::prepass_opaque_add(::Material *blender_mat,
GPUMaterial *gpumat,
bool has_motion)
PassMain::Sub *ForwardPipeline::material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat)
{
DRWPass *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ?
(has_motion ? prepass_culled_velocity_ps_ : prepass_culled_ps_) :
(has_motion ? prepass_velocity_ps_ : prepass_ps_);
DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, pass);
if (has_motion) {
inst_.velocity.bind_resources(grp);
}
return grp;
PassMain::Sub *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ? opaque_single_sided_ps_ :
opaque_double_sided_ps_;
return &pass->sub(GPU_material_get_name(gpumat));
}
DRWShadingGroup *ForwardPipeline::material_transparent_add(::Material *blender_mat,
GPUMaterial *gpumat)
{
RenderBuffers &rbufs = inst_.render_buffers;
LightModule &lights = inst_.lights;
Sampling &sampling = inst_.sampling;
// LightProbeModule &lightprobes = inst_.lightprobes;
// RaytracingModule &raytracing = inst_.raytracing;
// eGPUSamplerState no_interp = GPU_SAMPLER_DEFAULT;
DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, transparent_ps_);
lights.bind_resources(grp);
sampling.bind_resources(grp);
// DRW_shgroup_uniform_block(grp, "sampling_buf", inst_.sampling.ubo_get());
// DRW_shgroup_uniform_block(grp, "grids_buf", lightprobes.grid_ubo_get());
// DRW_shgroup_uniform_block(grp, "cubes_buf", lightprobes.cube_ubo_get());
// DRW_shgroup_uniform_block(grp, "probes_buf", lightprobes.info_ubo_get());
// DRW_shgroup_uniform_texture_ref(grp, "lightprobe_grid_tx", lightprobes.grid_tx_ref_get());
// DRW_shgroup_uniform_texture_ref(grp, "lightprobe_cube_tx", lightprobes.cube_tx_ref_get());
DRW_shgroup_uniform_texture(grp, "utility_tx", inst_.pipelines.utility_tx);
/* TODO(fclem): Make this only needed if material uses it ... somehow. */
// if (true) {
// DRW_shgroup_uniform_texture_ref(
// grp, "sss_transmittance_tx", inst_.subsurface.transmittance_ref_get());
// }
// if (raytracing.enabled()) {
// DRW_shgroup_uniform_block(grp, "rt_diffuse_buf", raytracing.diffuse_data);
// DRW_shgroup_uniform_block(grp, "rt_reflection_buf", raytracing.reflection_data);
// DRW_shgroup_uniform_block(grp, "rt_refraction_buf", raytracing.refraction_data);
// DRW_shgroup_uniform_texture_ref_ex(
// grp, "rt_radiance_tx", &input_screen_radiance_tx_, no_interp);
// }
// if (raytracing.enabled()) {
// DRW_shgroup_uniform_block(grp, "hiz_buf", inst_.hiz.ubo_get());
// DRW_shgroup_uniform_texture_ref(grp, "hiz_tx", inst_.hiz_front.texture_ref_get());
// }
{
/* TODO(fclem): This is not needed. This is only to please the OpenGL debug Layer.
* If we are to introduce transparency render-passes support, it would be through a separate
* pass. */
/* AOVs. */
DRW_shgroup_uniform_image_ref(grp, "aov_color_img", &rbufs.aov_color_tx);
DRW_shgroup_uniform_image_ref(grp, "aov_value_img", &rbufs.aov_value_tx);
DRW_shgroup_storage_block_ref(grp, "aov_buf", &inst_.film.aovs_info);
/* RenderPasses. */
DRW_shgroup_uniform_image_ref(grp, "rp_normal_img", &rbufs.normal_tx);
DRW_shgroup_uniform_image_ref(grp, "rp_light_img", &rbufs.light_tx);
DRW_shgroup_uniform_image_ref(grp, "rp_diffuse_color_img", &rbufs.diffuse_color_tx);
DRW_shgroup_uniform_image_ref(grp, "rp_specular_color_img", &rbufs.specular_color_tx);
DRW_shgroup_uniform_image_ref(grp, "rp_emission_img", &rbufs.emission_tx);
}
DRWState state_disable = DRW_STATE_WRITE_DEPTH;
DRWState state_enable = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM;
if (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) {
state_enable |= DRW_STATE_CULL_BACK;
}
DRW_shgroup_state_disable(grp, state_disable);
DRW_shgroup_state_enable(grp, state_enable);
return grp;
}
DRWShadingGroup *ForwardPipeline::prepass_transparent_add(::Material *blender_mat,
GPUMaterial *gpumat)
PassMain::Sub *ForwardPipeline::prepass_transparent_add(const Object *ob,
::Material *blender_mat,
GPUMaterial *gpumat)
{
if ((blender_mat->blend_flag & MA_BL_HIDE_BACKFACE) == 0) {
return nullptr;
}
DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, transparent_ps_);
DRWState state_disable = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM;
DRWState state_enable = DRW_STATE_WRITE_DEPTH;
if (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) {
state_enable |= DRW_STATE_CULL_BACK;
DRWState state = DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS_EQUAL;
if ((blender_mat->blend_flag & MA_BL_CULL_BACKFACE)) {
state |= DRW_STATE_CULL_BACK;
}
DRW_shgroup_state_disable(grp, state_disable);
DRW_shgroup_state_enable(grp, state_enable);
return grp;
float sorting_value = math::dot(float3(ob->obmat[3]), camera_forward_);
PassMain::Sub *pass = &transparent_ps_.sub(GPU_material_get_name(gpumat), sorting_value);
pass->state_set(state);
pass->material_set(*inst_.manager, gpumat);
return pass;
}
void ForwardPipeline::render(const DRWView *view,
PassMain::Sub *ForwardPipeline::material_transparent_add(const Object *ob,
::Material *blender_mat,
GPUMaterial *gpumat)
{
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM | DRW_STATE_DEPTH_LESS_EQUAL;
if ((blender_mat->blend_flag & MA_BL_CULL_BACKFACE)) {
state |= DRW_STATE_CULL_BACK;
}
float sorting_value = math::dot(float3(ob->obmat[3]), camera_forward_);
PassMain::Sub *pass = &transparent_ps_.sub(GPU_material_get_name(gpumat), sorting_value);
pass->state_set(state);
pass->material_set(*inst_.manager, gpumat);
return pass;
}
void ForwardPipeline::render(View &view,
Framebuffer &prepass_fb,
Framebuffer &combined_fb,
GPUTexture *UNUSED(combined_tx))
{
UNUSED_VARS(view);
DRW_stats_group_start("ForwardOpaque");
DRW_stats_group_start("Forward.Opaque");
GPU_framebuffer_bind(prepass_fb);
DRW_draw_pass(prepass_ps_);
inst_.manager->submit(prepass_ps_, view);
if (!DRW_pass_is_empty(prepass_ps_)) {
inst_.hiz_buffer.set_dirty();
}
// if (!DRW_pass_is_empty(prepass_ps_)) {
inst_.hiz_buffer.set_dirty();
// }
// if (inst_.raytracing.enabled()) {
// rt_buffer.radiance_copy(combined_tx);
@@ -263,17 +218,11 @@ void ForwardPipeline::render(const DRWView *view,
// inst_.shadows.set_view(view, depth_tx);
GPU_framebuffer_bind(combined_fb);
DRW_draw_pass(opaque_ps_);
inst_.manager->submit(opaque_ps_, view);
DRW_stats_group_end();
DRW_stats_group_start("ForwardTransparent");
/* TODO(fclem) This is suboptimal. We could sort during sync. */
/* FIXME(fclem) This wont work for panoramic, where we need
* to sort by distance to camera, not by z. */
DRW_pass_sort_shgroup_z(transparent_ps_);
DRW_draw_pass(transparent_ps_);
DRW_stats_group_end();
inst_.manager->submit(transparent_ps_, view);
// if (inst_.raytracing.enabled()) {
// gbuffer.ray_radiance_tx.release();

View File

@@ -13,6 +13,7 @@
#pragma once
#include "DRW_render.h"
#include "draw_shader_shared.h"
/* TODO(fclem): Move it to GPU/DRAW. */
#include "../eevee/eevee_lut.h"
@@ -31,13 +32,13 @@ class WorldPipeline {
private:
Instance &inst_;
DRWPass *world_ps_ = nullptr;
PassSimple world_ps_ = {"World.Background"};
public:
WorldPipeline(Instance &inst) : inst_(inst){};
void sync(GPUMaterial *gpumat);
void render();
void render(View &view);
};
/** \} */
@@ -52,13 +53,18 @@ class ForwardPipeline {
private:
Instance &inst_;
DRWPass *prepass_ps_ = nullptr;
DRWPass *prepass_velocity_ps_ = nullptr;
DRWPass *prepass_culled_ps_ = nullptr;
DRWPass *prepass_culled_velocity_ps_ = nullptr;
DRWPass *opaque_ps_ = nullptr;
DRWPass *opaque_culled_ps_ = nullptr;
DRWPass *transparent_ps_ = nullptr;
PassMain prepass_ps_ = {"Prepass"};
PassMain::Sub *prepass_single_sided_static_ps_ = nullptr;
PassMain::Sub *prepass_single_sided_moving_ps_ = nullptr;
PassMain::Sub *prepass_double_sided_static_ps_ = nullptr;
PassMain::Sub *prepass_double_sided_moving_ps_ = nullptr;
PassMain opaque_ps_ = {"Shading"};
PassMain::Sub *opaque_single_sided_ps_ = nullptr;
PassMain::Sub *opaque_double_sided_ps_ = nullptr;
PassSortable transparent_ps_ = {"Forward.Transparent"};
float3 camera_forward_;
// GPUTexture *input_screen_radiance_tx_ = nullptr;
@@ -67,28 +73,17 @@ class ForwardPipeline {
void sync();
DRWShadingGroup *material_add(::Material *blender_mat, GPUMaterial *gpumat)
{
return (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) ?
material_transparent_add(blender_mat, gpumat) :
material_opaque_add(blender_mat, gpumat);
}
PassMain::Sub *prepass_opaque_add(::Material *blender_mat, GPUMaterial *gpumat, bool has_motion);
PassMain::Sub *material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat);
DRWShadingGroup *prepass_add(::Material *blender_mat, GPUMaterial *gpumat, bool has_motion)
{
return (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) ?
prepass_transparent_add(blender_mat, gpumat) :
prepass_opaque_add(blender_mat, gpumat, has_motion);
}
PassMain::Sub *prepass_transparent_add(const Object *ob,
::Material *blender_mat,
GPUMaterial *gpumat);
PassMain::Sub *material_transparent_add(const Object *ob,
::Material *blender_mat,
GPUMaterial *gpumat);
DRWShadingGroup *material_opaque_add(::Material *blender_mat, GPUMaterial *gpumat);
DRWShadingGroup *prepass_opaque_add(::Material *blender_mat,
GPUMaterial *gpumat,
bool has_motion);
DRWShadingGroup *material_transparent_add(::Material *blender_mat, GPUMaterial *gpumat);
DRWShadingGroup *prepass_transparent_add(::Material *blender_mat, GPUMaterial *gpumat);
void render(const DRWView *view,
void render(View &view,
Framebuffer &prepass_fb,
Framebuffer &combined_fb,
GPUTexture *combined_tx);
@@ -192,26 +187,36 @@ class PipelineModule {
// velocity.sync();
}
DRWShadingGroup *material_add(::Material *blender_mat,
GPUMaterial *gpumat,
eMaterialPipeline pipeline_type)
PassMain::Sub *material_add(Object *ob,
::Material *blender_mat,
GPUMaterial *gpumat,
eMaterialPipeline pipeline_type)
{
switch (pipeline_type) {
case MAT_PIPE_DEFERRED_PREPASS:
// return deferred.prepass_add(blender_mat, gpumat, false);
break;
case MAT_PIPE_FORWARD_PREPASS:
if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) {
return forward.prepass_transparent_add(ob, blender_mat, gpumat);
}
return forward.prepass_opaque_add(blender_mat, gpumat, false);
case MAT_PIPE_DEFERRED_PREPASS_VELOCITY:
// return deferred.prepass_add(blender_mat, gpumat, true);
break;
case MAT_PIPE_FORWARD_PREPASS:
return forward.prepass_add(blender_mat, gpumat, false);
case MAT_PIPE_FORWARD_PREPASS_VELOCITY:
return forward.prepass_add(blender_mat, gpumat, true);
if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) {
return forward.prepass_transparent_add(ob, blender_mat, gpumat);
}
return forward.prepass_opaque_add(blender_mat, gpumat, true);
case MAT_PIPE_DEFERRED:
// return deferred.material_add(blender_mat, gpumat);
break;
case MAT_PIPE_FORWARD:
return forward.material_add(blender_mat, gpumat);
if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) {
return forward.material_transparent_add(ob, blender_mat, gpumat);
}
return forward.material_opaque_add(blender_mat, gpumat);
case MAT_PIPE_VOLUME:
/* TODO(fclem) volume pass. */
return nullptr;

View File

@@ -87,6 +87,12 @@ class Sampling {
DRW_shgroup_storage_block_ref(grp, "sampling_buf", &data_);
}
template<typename T> void bind_resources(draw::detail::PassBase<T> *pass)
{
/* Storage Buf. */
pass->bind_ssbo(SAMPLING_BUF_SLOT, &data_);
}
/* Returns a pseudo random number in [0..1] range. Each dimension are de-correlated. */
float rng_get(eSamplingDimension dimension) const
{

View File

@@ -9,6 +9,8 @@
* and static shader usage.
*/
#include "GPU_capabilities.h"
#include "gpu_shader_create_info.hh"
#include "eevee_shader.hh"
@@ -180,11 +182,36 @@ void ShaderModule::material_create_info_ammend(GPUMaterial *gpumat, GPUCodegenOu
GPUCodegenOutput &codegen = *codegen_;
ShaderCreateInfo &info = *reinterpret_cast<ShaderCreateInfo *>(codegen.create_info);
info.auto_resource_location(true);
/* WORKAROUND: Replace by new ob info. */
int64_t ob_info_index = info.additional_infos_.first_index_of_try("draw_object_infos");
if (ob_info_index != -1) {
info.additional_infos_[ob_info_index] = "draw_object_infos_new";
}
/* WORKAROUND: Avoid utility texture merge error. TODO: find a cleaner fix. */
for (auto &resource : info.batch_resources_) {
if (resource.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) {
if (resource.slot == RBUFS_UTILITY_TEX_SLOT) {
resource.slot = GPU_max_textures_frag() - 1;
}
}
}
if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT)) {
info.define("MAT_TRANSPARENT");
/* Transparent material do not have any velocity specific pipeline. */
if (pipeline_type == MAT_PIPE_FORWARD_PREPASS_VELOCITY) {
pipeline_type = MAT_PIPE_FORWARD_PREPASS;
}
}
if (GPU_material_flag_get(gpumat, GPU_MATFLAG_TRANSPARENT) == false &&
pipeline_type == MAT_PIPE_FORWARD) {
/* Opaque forward do support AOVs and render pass. */
info.additional_info("eevee_aov_out");
info.additional_info("eevee_render_pass_out");
}
if (GPU_material_flag_get(gpumat, GPU_MATFLAG_BARYCENTRIC)) {
switch (geometry_type) {
case MAT_GEOM_MESH:

View File

@@ -12,16 +12,16 @@
# include "BLI_memory_utils.hh"
# include "DRW_gpu_wrapper.hh"
# include "draw_manager.hh"
# include "draw_pass.hh"
# include "eevee_defines.hh"
# include "GPU_shader_shared.h"
namespace blender::eevee {
using draw::Framebuffer;
using draw::SwapChain;
using draw::Texture;
using draw::TextureFromPool;
using namespace draw;
constexpr eGPUSamplerState no_filter = GPU_SAMPLER_DEFAULT;
constexpr eGPUSamplerState with_filter = GPU_SAMPLER_FILTER;

View File

@@ -74,25 +74,12 @@ WorldHandle &SyncModule::sync_world(::World *world)
/** \name Common
* \{ */
static inline void shgroup_geometry_call(DRWShadingGroup *grp,
Object *ob,
GPUBatch *geom,
int v_first = -1,
int v_count = -1,
bool use_instancing = false)
static inline void geometry_call(PassMain::Sub *sub_pass,
GPUBatch *geom,
ResourceHandle resource_handle)
{
if (grp == nullptr) {
return;
}
if (v_first == -1) {
DRW_shgroup_call(grp, geom, ob);
}
else if (use_instancing) {
DRW_shgroup_call_instance_range(grp, ob, geom, v_first, v_count);
}
else {
DRW_shgroup_call_range(grp, ob, geom, v_first, v_count);
if (sub_pass != nullptr) {
sub_pass->draw(geom, resource_handle);
}
}
@@ -102,9 +89,10 @@ static inline void shgroup_geometry_call(DRWShadingGroup *grp,
/** \name Mesh
* \{ */
void SyncModule::sync_mesh(Object *ob, ObjectHandle &ob_handle)
void SyncModule::sync_mesh(Object *ob, ObjectHandle &ob_handle, ResourceHandle res_handle)
{
bool has_motion = inst_.velocity.step_object_sync(ob, ob_handle.object_key, ob_handle.recalc);
bool has_motion = inst_.velocity.step_object_sync(
ob, ob_handle.object_key, res_handle, ob_handle.recalc);
MaterialArray &material_array = inst_.materials.material_array_get(ob, has_motion);
@@ -123,11 +111,11 @@ void SyncModule::sync_mesh(Object *ob, ObjectHandle &ob_handle)
continue;
}
Material *material = material_array.materials[i];
shgroup_geometry_call(material->shading.shgrp, ob, geom);
shgroup_geometry_call(material->prepass.shgrp, ob, geom);
shgroup_geometry_call(material->shadow.shgrp, ob, geom);
geometry_call(material->shading.sub_pass, geom, res_handle);
geometry_call(material->prepass.sub_pass, geom, res_handle);
geometry_call(material->shadow.sub_pass, geom, res_handle);
is_shadow_caster = is_shadow_caster || material->shadow.shgrp != nullptr;
is_shadow_caster = is_shadow_caster || material->shadow.sub_pass != nullptr;
is_alpha_blend = is_alpha_blend || material->is_alpha_blend_transparent;
}
@@ -155,11 +143,13 @@ struct gpIterData {
int vcount = 0;
bool instancing = false;
gpIterData(Instance &inst_, Object *ob_, ObjectHandle &ob_handle)
gpIterData(Instance &inst_, Object *ob_, ObjectHandle &ob_handle, ResourceHandle resource_handle)
: inst(inst_),
ob(ob_),
material_array(inst_.materials.material_array_get(
ob_, inst_.velocity.step_object_sync(ob, ob_handle.object_key, ob_handle.recalc)))
ob_,
inst_.velocity.step_object_sync(
ob, ob_handle.object_key, resource_handle, ob_handle.recalc)))
{
cfra = DEG_get_ctime(inst.depsgraph);
};
@@ -167,26 +157,28 @@ struct gpIterData {
static void gpencil_drawcall_flush(gpIterData &iter)
{
#if 0 /* Incompatible with new darw manager. */
if (iter.geom != nullptr) {
shgroup_geometry_call(iter.material->shading.shgrp,
geometry_call(iter.material->shading.sub_pass,
iter.ob,
iter.geom,
iter.vfirst,
iter.vcount,
iter.instancing);
shgroup_geometry_call(iter.material->prepass.shgrp,
geometry_call(iter.material->prepass.sub_pass,
iter.ob,
iter.geom,
iter.vfirst,
iter.vcount,
iter.instancing);
shgroup_geometry_call(iter.material->shadow.shgrp,
geometry_call(iter.material->shadow.sub_pass,
iter.ob,
iter.geom,
iter.vfirst,
iter.vcount,
iter.instancing);
}
#endif
iter.geom = nullptr;
iter.vfirst = -1;
iter.vcount = 0;
@@ -250,21 +242,22 @@ static void gpencil_stroke_sync(bGPDlayer *UNUSED(gpl),
}
}
void SyncModule::sync_gpencil(Object *ob, ObjectHandle &ob_handle)
void SyncModule::sync_gpencil(Object *ob, ObjectHandle &ob_handle, ResourceHandle res_handle)
{
/* TODO(fclem): Waiting for a user option to use the render engine instead of gpencil engine. */
if (true) {
inst_.gpencil_engine_enabled = true;
return;
}
UNUSED_VARS(res_handle);
gpIterData iter(inst_, ob, ob_handle);
gpIterData iter(inst_, ob, ob_handle, res_handle);
BKE_gpencil_visible_stroke_iter((bGPdata *)ob->data, nullptr, gpencil_stroke_sync, &iter);
gpencil_drawcall_flush(iter);
// bool is_caster = true; /* TODO material.shadow.shgrp. */
// bool is_caster = true; /* TODO material.shadow.sub_pass. */
// bool is_alpha_blend = true; /* TODO material.is_alpha_blend. */
// shadows.sync_object(ob, ob_handle, is_caster, is_alpha_blend);
}
@@ -280,19 +273,24 @@ static void shgroup_curves_call(MaterialPass &matpass,
ParticleSystem *part_sys = nullptr,
ModifierData *modifier_data = nullptr)
{
if (matpass.shgrp == nullptr) {
UNUSED_VARS(ob, modifier_data);
if (matpass.sub_pass == nullptr) {
return;
}
if (part_sys != nullptr) {
DRW_shgroup_hair_create_sub(ob, part_sys, modifier_data, matpass.shgrp, matpass.gpumat);
// DRW_shgroup_hair_create_sub(ob, part_sys, modifier_data, matpass.sub_pass, matpass.gpumat);
}
else {
DRW_shgroup_curves_create_sub(ob, matpass.shgrp, matpass.gpumat);
// DRW_shgroup_curves_create_sub(ob, matpass.sub_pass, matpass.gpumat);
}
}
void SyncModule::sync_curves(Object *ob, ObjectHandle &ob_handle, ModifierData *modifier_data)
void SyncModule::sync_curves(Object *ob,
ObjectHandle &ob_handle,
ResourceHandle res_handle,
ModifierData *modifier_data)
{
UNUSED_VARS(res_handle);
int mat_nr = CURVES_MATERIAL_NR;
ParticleSystem *part_sys = nullptr;
@@ -320,7 +318,7 @@ void SyncModule::sync_curves(Object *ob, ObjectHandle &ob_handle, ModifierData *
/* TODO(fclem) Hair velocity. */
// shading_passes.velocity.gpencil_add(ob, ob_handle);
// bool is_caster = material.shadow.shgrp != nullptr;
// bool is_caster = material.shadow.sub_pass != nullptr;
// bool is_alpha_blend = material.is_alpha_blend_transparent;
// shadows.sync_object(ob, ob_handle, is_caster, is_alpha_blend);
}

View File

@@ -150,9 +150,12 @@ class SyncModule {
ObjectHandle &sync_object(Object *ob);
WorldHandle &sync_world(::World *world);
void sync_mesh(Object *ob, ObjectHandle &ob_handle);
void sync_gpencil(Object *ob, ObjectHandle &ob_handle);
void sync_curves(Object *ob, ObjectHandle &ob_handle, ModifierData *modifier_data = nullptr);
void sync_mesh(Object *ob, ObjectHandle &ob_handle, ResourceHandle res_handle);
void sync_gpencil(Object *ob, ObjectHandle &ob_handle, ResourceHandle res_handle);
void sync_curves(Object *ob,
ObjectHandle &ob_handle,
ResourceHandle res_handle,
ModifierData *modifier_data = nullptr);
};
/** \} */

View File

@@ -43,6 +43,10 @@ void VelocityModule::init()
step_ = STEP_CURRENT;
/* Let the main sync loop handle the current step. */
}
/* For viewport, only previous motion is supported.
* Still bind previous step to avoid undefined behavior. */
next_step_ = inst_.is_viewport() ? STEP_PREVIOUS : STEP_NEXT;
}
static void step_object_sync_render(void *velocity,
@@ -51,7 +55,9 @@ static void step_object_sync_render(void *velocity,
Depsgraph *UNUSED(depsgraph))
{
ObjectKey object_key(ob);
reinterpret_cast<VelocityModule *>(velocity)->step_object_sync(ob, object_key);
/* NOTE: Dummy resource handle since this will not be used for drawing. */
ResourceHandle resource_handle(0);
reinterpret_cast<VelocityModule *>(velocity)->step_object_sync(ob, object_key, resource_handle);
}
void VelocityModule::step_sync(eVelocityStep step, float time)
@@ -78,6 +84,7 @@ void VelocityModule::step_camera_sync()
bool VelocityModule::step_object_sync(Object *ob,
ObjectKey &object_key,
ResourceHandle resource_handle,
int /*IDRecalcFlag*/ recalc)
{
bool has_motion = object_has_velocity(ob) || (recalc & ID_RECALC_TRANSFORM);
@@ -89,8 +96,6 @@ bool VelocityModule::step_object_sync(Object *ob,
return false;
}
uint32_t resource_id = DRW_object_resource_id_get(ob);
/* Object motion. */
/* FIXME(fclem) As we are using original objects pointers, there is a chance the previous
* object key matches a totally different object if the scene was changed by user or python
@@ -99,7 +104,7 @@ bool VelocityModule::step_object_sync(Object *ob,
* We live with that until we have a correct way of identifying new objects. */
VelocityObjectData &vel = velocity_map.lookup_or_add_default(object_key);
vel.obj.ofs[step_] = object_steps_usage[step_]++;
vel.obj.resource_id = resource_id;
vel.obj.resource_id = resource_handle.resource_index();
vel.id = (ID *)ob->data;
object_steps[step_]->get_or_resize(vel.obj.ofs[step_]) = ob->obmat;
if (step_ == STEP_CURRENT) {
@@ -257,7 +262,7 @@ void VelocityModule::end_sync()
uint32_t max_resource_id_ = 0u;
for (Map<ObjectKey, VelocityObjectData>::Item item : velocity_map.items()) {
if (item.value.obj.resource_id == (uint)-1) {
if (item.value.obj.resource_id == (uint32_t)-1) {
deleted_obj.append(item.key);
}
else {
@@ -277,7 +282,7 @@ void VelocityModule::end_sync()
velocity_map.remove(key);
}
indirection_buf.resize(power_of_2_max_u(max_resource_id_ + 1));
indirection_buf.resize(ceil_to_multiple_u(max_resource_id_, 128));
/* Avoid uploading more data to the GPU as well as an extra level of
* indirection on the GPU by copying back offsets the to VelocityIndex. */

View File

@@ -67,7 +67,10 @@ class VelocityModule {
private:
Instance &inst_;
/** Step being synced. */
eVelocityStep step_ = STEP_CURRENT;
/** Step referenced as next step. */
eVelocityStep next_step_ = STEP_NEXT;
public:
VelocityModule(Instance &inst) : inst_(inst)
@@ -102,7 +105,10 @@ class VelocityModule {
void step_sync(eVelocityStep step, float time);
/* Gather motion data. Returns true if the object **can** have motion. */
bool step_object_sync(Object *ob, ObjectKey &object_key, int recalc = 0);
bool step_object_sync(Object *ob,
ObjectKey &object_key,
ResourceHandle resource_handle,
int recalc = 0);
/* Moves next frame data to previous frame data. Nullify next frame data. */
void step_swap();
@@ -112,6 +118,20 @@ class VelocityModule {
void bind_resources(DRWShadingGroup *grp);
template<typename T> void bind_resources(draw::detail::Pass<T> *pass)
{
/* Storage Buf. */
pass->bind_ssbo(VELOCITY_OBJ_PREV_BUF_SLOT, &(*object_steps[STEP_PREVIOUS]));
pass->bind_ssbo(VELOCITY_OBJ_NEXT_BUF_SLOT, &(*object_steps[next_step_]));
pass->bind_ssbo(VELOCITY_GEO_PREV_BUF_SLOT, &(*geometry_steps[STEP_PREVIOUS]));
pass->bind_ssbo(VELOCITY_GEO_NEXT_BUF_SLOT, &(*geometry_steps[next_step_]));
pass->bind_ssbo(VELOCITY_INDIRECTION_BUF_SLOT, &indirection_buf);
/* Uniform Buf. */
pass->bind_ubo(VELOCITY_CAMERA_PREV_BUF, &(*camera_steps[STEP_PREVIOUS]));
pass->bind_ubo(VELOCITY_CAMERA_CURR_BUF, &(*camera_steps[STEP_CURRENT]));
pass->bind_ubo(VELOCITY_CAMERA_NEXT_BUF, &(*camera_steps[next_step_]));
}
bool camera_has_motion() const;
bool camera_changed_projection() const;

View File

@@ -118,10 +118,10 @@ void ShadingView::render()
GPU_framebuffer_bind(combined_fb_);
GPU_framebuffer_clear_color_depth(combined_fb_, clear_color, 1.0f);
inst_.pipelines.world.render();
inst_.pipelines.world.render(render_view_new_);
/* TODO(fclem): Move it after the first prepass (and hiz update) once pipeline is stabilized. */
inst_.lights.set_view(render_view_, extent_);
inst_.lights.set_view(render_view_new_, extent_);
// inst_.pipelines.deferred.render(
// render_view_, rt_buffer_opaque_, rt_buffer_refract_, depth_tx_, combined_tx_);
@@ -130,10 +130,10 @@ void ShadingView::render()
// inst_.lookdev.render_overlay(view_fb_);
inst_.pipelines.forward.render(render_view_, prepass_fb_, combined_fb_, rbufs.combined_tx);
inst_.pipelines.forward.render(render_view_new_, prepass_fb_, combined_fb_, rbufs.combined_tx);
inst_.lights.debug_draw(combined_fb_);
inst_.hiz_buffer.debug_draw(combined_fb_);
inst_.lights.debug_draw(render_view_new_, combined_fb_);
inst_.hiz_buffer.debug_draw(render_view_new_, combined_fb_);
GPUTexture *combined_final_tx = render_postfx(rbufs.combined_tx);
@@ -157,8 +157,8 @@ GPUTexture *ShadingView::render_postfx(GPUTexture *input_tx)
GPUTexture *output_tx = postfx_tx_;
/* Swapping is done internally. Actual output is set to the next input. */
inst_.depth_of_field.render(&input_tx, &output_tx, dof_buffer_);
inst_.motion_blur.render(&input_tx, &output_tx);
inst_.depth_of_field.render(render_view_new_, &input_tx, &output_tx, dof_buffer_);
inst_.motion_blur.render(render_view_new_, &input_tx, &output_tx);
return input_tx;
}
@@ -186,6 +186,8 @@ void ShadingView::update_view()
* out of the blurring radius. To fix this, use custom enlarged culling matrix. */
inst_.depth_of_field.jitter_apply(winmat, viewmat);
DRW_view_update_sub(render_view_, viewmat.ptr(), winmat.ptr());
render_view_new_.sync(viewmat, winmat);
}
/** \} */

View File

@@ -57,6 +57,7 @@ class ShadingView {
DRWView *sub_view_ = nullptr;
/** Same as sub_view_ but has Depth Of Field jitter applied. */
DRWView *render_view_ = nullptr;
View render_view_new_;
/** Render size of the view. Can change between scene sample eval. */
int2 extent_ = {-1, -1};
@@ -65,7 +66,7 @@ class ShadingView {
public:
ShadingView(Instance &inst, const char *name, const float (*face_matrix)[4])
: inst_(inst), name_(name), face_matrix_(face_matrix){};
: inst_(inst), name_(name), face_matrix_(face_matrix), render_view_new_(name){};
~ShadingView(){};

View File

@@ -133,9 +133,9 @@ void main()
/* Issue a sprite for each field if any CoC matches. */
if (any(lessThan(do_scatter4 * sign(coc4), vec4(0.0)))) {
/* Same value for all threads. Not an issue if we don't sync access to it. */
scatter_fg_indirect_buf.v_count = 4u;
scatter_fg_indirect_buf.vertex_len = 4u;
/* Issue 1 strip instance per sprite. */
uint rect_id = atomicAdd(scatter_fg_indirect_buf.i_count, 1u);
uint rect_id = atomicAdd(scatter_fg_indirect_buf.instance_len, 1u);
if (rect_id < dof_buf.scatter_max_rect) {
vec4 coc4_fg = max(vec4(0.0), -coc4);
@@ -166,9 +166,9 @@ void main()
}
if (any(greaterThan(do_scatter4 * sign(coc4), vec4(0.0)))) {
/* Same value for all threads. Not an issue if we don't sync access to it. */
scatter_bg_indirect_buf.v_count = 4u;
scatter_bg_indirect_buf.vertex_len = 4u;
/* Issue 1 strip instance per sprite. */
uint rect_id = atomicAdd(scatter_bg_indirect_buf.i_count, 1u);
uint rect_id = atomicAdd(scatter_bg_indirect_buf.instance_len, 1u);
if (rect_id < dof_buf.scatter_max_rect) {
vec4 coc4_bg = max(vec4(0.0), coc4);
vec4 bg_weights = dof_layer_weight(coc4_bg) * dof_sample_weight(coc4_bg) * do_scatter4;

View File

@@ -6,6 +6,7 @@
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(common_hair_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_nodetree_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_surf_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_velocity_lib.glsl)
@@ -73,8 +74,7 @@ void main()
nodetree_surface();
// float noise_offset = sampling_rng_1D_get(SAMPLING_TRANSPARENCY);
float noise_offset = 0.5;
float noise_offset = sampling_rng_1D_get(SAMPLING_TRANSPARENCY);
float random_threshold = hashed_alpha_threshold(1.0, noise_offset, g_data.P);
float transparency = avg(g_transmittance);

View File

@@ -97,6 +97,7 @@ void main()
out_normal += g_refraction_data.N * g_refraction_data.weight;
out_normal = safe_normalize(out_normal);
#ifdef MAT_RENDER_PASS_SUPPORT
ivec2 out_texel = ivec2(gl_FragCoord.xy);
imageStore(rp_normal_img, out_texel, vec4(out_normal, 1.0));
imageStore(
@@ -106,6 +107,7 @@ void main()
imageStore(rp_diffuse_color_img, out_texel, vec4(g_diffuse_data.color, 1.0));
imageStore(rp_specular_color_img, out_texel, vec4(specular_color, 1.0));
imageStore(rp_emission_img, out_texel, vec4(g_emission, 1.0));
#endif
out_radiance.rgb *= 1.0 - g_holdout;

View File

@@ -11,7 +11,7 @@ GPU_SHADER_CREATE_INFO(eevee_depth_of_field_bokeh_lut)
.do_static_compilation(true)
.local_group_size(DOF_BOKEH_LUT_SIZE, DOF_BOKEH_LUT_SIZE)
.additional_info("eevee_shared", "draw_view")
.uniform_buf(1, "DepthOfFieldData", "dof_buf")
.uniform_buf(6, "DepthOfFieldData", "dof_buf")
.image(0, GPU_RG16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_gather_lut_img")
.image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_scatter_lut_img")
.image(2, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_resolve_lut_img")
@@ -21,7 +21,7 @@ GPU_SHADER_CREATE_INFO(eevee_depth_of_field_setup)
.do_static_compilation(true)
.local_group_size(DOF_DEFAULT_GROUP_SIZE, DOF_DEFAULT_GROUP_SIZE)
.additional_info("eevee_shared", "draw_view")
.uniform_buf(1, "DepthOfFieldData", "dof_buf")
.uniform_buf(6, "DepthOfFieldData", "dof_buf")
.sampler(0, ImageType::FLOAT_2D, "color_tx")
.sampler(1, ImageType::DEPTH_2D, "depth_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_color_img")
@@ -32,7 +32,7 @@ GPU_SHADER_CREATE_INFO(eevee_depth_of_field_stabilize)
.do_static_compilation(true)
.local_group_size(DOF_STABILIZE_GROUP_SIZE, DOF_STABILIZE_GROUP_SIZE)
.additional_info("eevee_shared", "draw_view", "eevee_velocity_camera")
.uniform_buf(4, "DepthOfFieldData", "dof_buf")
.uniform_buf(6, "DepthOfFieldData", "dof_buf")
.sampler(0, ImageType::FLOAT_2D, "coc_tx")
.sampler(1, ImageType::FLOAT_2D, "color_tx")
.sampler(2, ImageType::FLOAT_2D, "velocity_tx")
@@ -57,7 +57,7 @@ GPU_SHADER_CREATE_INFO(eevee_depth_of_field_reduce)
.do_static_compilation(true)
.local_group_size(DOF_REDUCE_GROUP_SIZE, DOF_REDUCE_GROUP_SIZE)
.additional_info("eevee_shared", "draw_view")
.uniform_buf(1, "DepthOfFieldData", "dof_buf")
.uniform_buf(6, "DepthOfFieldData", "dof_buf")
.sampler(0, ImageType::FLOAT_2D, "downsample_tx")
.storage_buf(0, Qualifier::WRITE, "ScatterRect", "scatter_fg_list_buf[]")
.storage_buf(1, Qualifier::WRITE, "ScatterRect", "scatter_bg_list_buf[]")
@@ -154,7 +154,7 @@ GPU_SHADER_CREATE_INFO(eevee_depth_of_field_gather_common)
"draw_view",
"eevee_depth_of_field_tiles_common",
"eevee_sampling_data")
.uniform_buf(2, "DepthOfFieldData", "dof_buf")
.uniform_buf(6, "DepthOfFieldData", "dof_buf")
.local_group_size(DOF_GATHER_GROUP_SIZE, DOF_GATHER_GROUP_SIZE)
.sampler(0, ImageType::FLOAT_2D, "color_tx")
.sampler(1, ImageType::FLOAT_2D, "color_bilinear_tx")
@@ -229,7 +229,7 @@ GPU_SHADER_CREATE_INFO(eevee_depth_of_field_resolve)
"draw_view",
"eevee_depth_of_field_tiles_common",
"eevee_sampling_data")
.uniform_buf(2, "DepthOfFieldData", "dof_buf")
.uniform_buf(6, "DepthOfFieldData", "dof_buf")
.sampler(0, ImageType::DEPTH_2D, "depth_tx")
.sampler(1, ImageType::FLOAT_2D, "color_tx")
.sampler(2, ImageType::FLOAT_2D, "color_bg_tx")

View File

@@ -4,7 +4,7 @@
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(eevee_film)
.uniform_buf(4, "FilmData", "film_buf")
.uniform_buf(6, "FilmData", "film_buf")
.sampler(0, ImageType::DEPTH_2D, "depth_tx")
.sampler(1, ImageType::FLOAT_2D, "combined_tx")
.sampler(2, ImageType::FLOAT_2D, "normal_tx")

View File

@@ -8,10 +8,10 @@
* \{ */
GPU_SHADER_CREATE_INFO(eevee_light_data)
.storage_buf(0, Qualifier::READ, "LightCullingData", "light_cull_buf")
.storage_buf(1, Qualifier::READ, "LightData", "light_buf[]")
.storage_buf(2, Qualifier::READ, "uint", "light_zbin_buf[]")
.storage_buf(3, Qualifier::READ, "uint", "light_tile_buf[]");
.storage_buf(LIGHT_CULL_BUF_SLOT, Qualifier::READ, "LightCullingData", "light_cull_buf")
.storage_buf(LIGHT_BUF_SLOT, Qualifier::READ, "LightData", "light_buf[]")
.storage_buf(LIGHT_ZBIN_BUF_SLOT, Qualifier::READ, "uint", "light_zbin_buf[]")
.storage_buf(LIGHT_TILE_BUF_SLOT, Qualifier::READ, "uint", "light_tile_buf[]");
/** \} */

View File

@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "eevee_defines.hh"
#include "gpu_shader_create_info.hh"
/* -------------------------------------------------------------------- */
@@ -14,9 +15,10 @@ GPU_SHADER_CREATE_INFO(eevee_shared)
GPU_SHADER_CREATE_INFO(eevee_sampling_data)
.define("EEVEE_SAMPLING_DATA")
.additional_info("eevee_shared")
.storage_buf(14, Qualifier::READ, "SamplingData", "sampling_buf");
.storage_buf(6, Qualifier::READ, "SamplingData", "sampling_buf");
GPU_SHADER_CREATE_INFO(eevee_utility_texture).sampler(8, ImageType::FLOAT_2D_ARRAY, "utility_tx");
GPU_SHADER_CREATE_INFO(eevee_utility_texture)
.sampler(RBUFS_UTILITY_TEX_SLOT, ImageType::FLOAT_2D_ARRAY, "utility_tx");
/** \} */
@@ -30,7 +32,7 @@ GPU_SHADER_CREATE_INFO(eevee_geom_mesh)
.vertex_in(0, Type::VEC3, "pos")
.vertex_in(1, Type::VEC3, "nor")
.vertex_source("eevee_geom_mesh_vert.glsl")
.additional_info("draw_mesh", "draw_resource_id_varying", "draw_resource_handle");
.additional_info("draw_modelmat_new", "draw_resource_id_varying", "draw_view");
GPU_SHADER_CREATE_INFO(eevee_geom_gpencil)
.additional_info("eevee_shared")
@@ -52,7 +54,7 @@ GPU_SHADER_CREATE_INFO(eevee_geom_world)
.define("MAT_GEOM_WORLD")
.builtins(BuiltinBits::VERTEX_ID)
.vertex_source("eevee_geom_world_vert.glsl")
.additional_info("draw_modelmat", "draw_resource_id_varying", "draw_resource_handle");
.additional_info("draw_modelmat_new", "draw_resource_id_varying", "draw_view");
/** \} */
@@ -78,9 +80,17 @@ GPU_SHADER_INTERFACE_INFO(eevee_surf_iface, "interp")
GPU_SHADER_CREATE_INFO(eevee_aov_out)
.define("MAT_AOV_SUPPORT")
.image_array_out(5, Qualifier::WRITE, GPU_RGBA16F, "aov_color_img")
.image_array_out(6, Qualifier::WRITE, GPU_R16F, "aov_value_img")
.storage_buf(7, Qualifier::READ, "AOVsInfoData", "aov_buf");
.image_array_out(RBUFS_AOV_COLOR_SLOT, Qualifier::WRITE, GPU_RGBA16F, "aov_color_img")
.image_array_out(RBUFS_AOV_VALUE_SLOT, Qualifier::WRITE, GPU_R16F, "aov_value_img")
.storage_buf(RBUFS_AOV_BUF_SLOT, Qualifier::READ, "AOVsInfoData", "aov_buf");
GPU_SHADER_CREATE_INFO(eevee_render_pass_out)
.define("MAT_RENDER_PASS_SUPPORT")
.image_out(RBUFS_NORMAL_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_normal_img")
.image_array_out(RBUFS_LIGHT_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_light_img")
.image_out(RBUFS_DIFF_COLOR_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_diffuse_color_img")
.image_out(RBUFS_SPEC_COLOR_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_specular_color_img")
.image_out(RBUFS_EMISSION_SLOT, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_emission_img");
GPU_SHADER_CREATE_INFO(eevee_surf_deferred)
.vertex_out(eevee_surf_iface)
@@ -104,7 +114,6 @@ GPU_SHADER_CREATE_INFO(eevee_surf_deferred)
;
GPU_SHADER_CREATE_INFO(eevee_surf_forward)
.auto_resource_location(true)
.vertex_out(eevee_surf_iface)
/* Early fragment test is needed for render passes support for forward surfaces. */
/* NOTE: This removes the possibility of using gl_FragDepth. */
@@ -112,41 +121,27 @@ GPU_SHADER_CREATE_INFO(eevee_surf_forward)
.fragment_out(0, Type::VEC4, "out_radiance", DualBlend::SRC_0)
.fragment_out(0, Type::VEC4, "out_transmittance", DualBlend::SRC_1)
.fragment_source("eevee_surf_forward_frag.glsl")
.image_out(0, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_normal_img")
.image_array_out(1, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_light_img")
.image_out(2, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_diffuse_color_img")
.image_out(3, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_specular_color_img")
.image_out(4, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_emission_img")
.additional_info("eevee_aov_out",
"eevee_light_data",
"eevee_utility_texture",
"eevee_sampling_data"
// "eevee_lightprobe_data",
.additional_info("eevee_light_data", "eevee_utility_texture", "eevee_sampling_data"
// "eevee_lightprobe_data",
// "eevee_shadow_data"
/* Optionally added depending on the material. */
// "eevee_raytrace_data",
// "eevee_transmittance_data",
// "eevee_shadow_data"
// "eevee_aov_out",
// "eevee_render_pass_out",
);
GPU_SHADER_CREATE_INFO(eevee_surf_depth)
.vertex_out(eevee_surf_iface)
.fragment_source("eevee_surf_depth_frag.glsl")
// .additional_info("eevee_sampling_data", "eevee_utility_texture")
;
.additional_info("eevee_sampling_data", "eevee_utility_texture");
GPU_SHADER_CREATE_INFO(eevee_surf_world)
.vertex_out(eevee_surf_iface)
.image_out(0, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_normal_img")
.image_array_out(1, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_light_img")
.image_out(2, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_diffuse_color_img")
.image_out(3, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_specular_color_img")
.image_out(4, Qualifier::READ_WRITE, GPU_RGBA16F, "rp_emission_img")
.push_constant(Type::FLOAT, "world_opacity_fade")
.fragment_out(0, Type::VEC4, "out_background")
.fragment_source("eevee_surf_world_frag.glsl")
.additional_info("eevee_aov_out"
//"eevee_utility_texture"
);
.additional_info("eevee_aov_out", "eevee_render_pass_out", "eevee_utility_texture");
#undef image_out
#undef image_array_out
@@ -188,10 +183,7 @@ GPU_SHADER_CREATE_INFO(eevee_volume_deferred)
GPU_SHADER_CREATE_INFO(eevee_material_stub).define("EEVEE_MATERIAL_STUBS");
# define EEVEE_MAT_FINAL_VARIATION(name, ...) \
GPU_SHADER_CREATE_INFO(name) \
.additional_info(__VA_ARGS__) \
.auto_resource_location(true) \
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(name).additional_info(__VA_ARGS__).do_static_compilation(true);
# define EEVEE_MAT_GEOM_VARIATIONS(prefix, ...) \
EEVEE_MAT_FINAL_VARIATION(prefix##_world, "eevee_geom_world", __VA_ARGS__) \

View File

@@ -6,7 +6,7 @@
GPU_SHADER_CREATE_INFO(eevee_motion_blur_tiles_flatten)
.local_group_size(MOTION_BLUR_GROUP_SIZE, MOTION_BLUR_GROUP_SIZE)
.additional_info("eevee_shared", "draw_view", "eevee_velocity_camera")
.uniform_buf(4, "MotionBlurData", "motion_blur_buf")
.uniform_buf(6, "MotionBlurData", "motion_blur_buf")
.sampler(0, ImageType::DEPTH_2D, "depth_tx")
.image(1, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_tiles_img")
.compute_source("eevee_motion_blur_flatten_comp.glsl");
@@ -35,7 +35,7 @@ GPU_SHADER_CREATE_INFO(eevee_motion_blur_gather)
.do_static_compilation(true)
.local_group_size(MOTION_BLUR_GROUP_SIZE, MOTION_BLUR_GROUP_SIZE)
.additional_info("eevee_shared", "draw_view", "eevee_sampling_data")
.uniform_buf(4, "MotionBlurData", "motion_blur_buf")
.uniform_buf(6, "MotionBlurData", "motion_blur_buf")
.sampler(0, ImageType::DEPTH_2D, "depth_tx")
.sampler(1, ImageType::FLOAT_2D, "velocity_tx")
.sampler(2, ImageType::FLOAT_2D, "in_color_tx")

View File

@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "eevee_defines.hh"
#include "gpu_shader_create_info.hh"
/* -------------------------------------------------------------------- */
@@ -17,19 +18,20 @@ GPU_SHADER_INTERFACE_INFO(eevee_velocity_surface_iface, "motion")
GPU_SHADER_CREATE_INFO(eevee_velocity_camera)
.define("VELOCITY_CAMERA")
.uniform_buf(1, "CameraData", "camera_prev")
.uniform_buf(2, "CameraData", "camera_curr")
.uniform_buf(3, "CameraData", "camera_next");
.uniform_buf(VELOCITY_CAMERA_PREV_BUF, "CameraData", "camera_prev")
.uniform_buf(VELOCITY_CAMERA_CURR_BUF, "CameraData", "camera_curr")
.uniform_buf(VELOCITY_CAMERA_NEXT_BUF, "CameraData", "camera_next");
GPU_SHADER_CREATE_INFO(eevee_velocity_geom)
.define("MAT_VELOCITY")
.auto_resource_location(true)
.storage_buf(4, Qualifier::READ, "mat4", "velocity_obj_prev_buf[]", Frequency::PASS)
.storage_buf(5, Qualifier::READ, "mat4", "velocity_obj_next_buf[]", Frequency::PASS)
.storage_buf(6, Qualifier::READ, "vec4", "velocity_geo_prev_buf[]", Frequency::PASS)
.storage_buf(7, Qualifier::READ, "vec4", "velocity_geo_next_buf[]", Frequency::PASS)
.storage_buf(
7, Qualifier::READ, "VelocityIndex", "velocity_indirection_buf[]", Frequency::PASS)
.storage_buf(VELOCITY_OBJ_PREV_BUF_SLOT, Qualifier::READ, "mat4", "velocity_obj_prev_buf[]")
.storage_buf(VELOCITY_OBJ_NEXT_BUF_SLOT, Qualifier::READ, "mat4", "velocity_obj_next_buf[]")
.storage_buf(VELOCITY_GEO_PREV_BUF_SLOT, Qualifier::READ, "vec4", "velocity_geo_prev_buf[]")
.storage_buf(VELOCITY_GEO_NEXT_BUF_SLOT, Qualifier::READ, "vec4", "velocity_geo_next_buf[]")
.storage_buf(VELOCITY_INDIRECTION_BUF_SLOT,
Qualifier::READ,
"VelocityIndex",
"velocity_indirection_buf[]")
.vertex_out(eevee_velocity_surface_iface)
.fragment_out(0, Type::VEC4, "out_velocity")
.additional_info("eevee_velocity_camera");

View File

@@ -238,6 +238,11 @@ class StorageCommon : public DataBuffer<T, len, false>, NonMovable, NonCopyable
GPU_storagebuf_clear_to_zero(ssbo_);
}
void read()
{
GPU_storagebuf_read(ssbo_, this->data_);
}
operator GPUStorageBuf *() const
{
return ssbo_;
@@ -850,6 +855,32 @@ class TextureFromPool : public Texture, NonMovable {
GPUTexture *stencil_view() = delete;
};
/**
* Dummy type to bind texture as image.
* It is just a GPUTexture in disguise.
*/
class Image {};
static inline Image *as_image(GPUTexture *tex)
{
return reinterpret_cast<Image *>(tex);
}
static inline Image **as_image(GPUTexture **tex)
{
return reinterpret_cast<Image **>(tex);
}
static inline GPUTexture *as_texture(Image *img)
{
return reinterpret_cast<GPUTexture *>(img);
}
static inline GPUTexture **as_texture(Image **img)
{
return reinterpret_cast<GPUTexture **>(img);
}
/** \} */
/* -------------------------------------------------------------------- */

View File

@@ -41,6 +41,7 @@
#include "draw_debug.h"
#include "draw_manager_profiling.h"
#include "draw_state.h"
#include "draw_view_data.h"
#include "MEM_guardedalloc.h"
@@ -288,83 +289,6 @@ void DRW_shader_library_free(DRWShaderLibrary *lib);
/* Batches */
/**
* DRWState is a bit-mask that stores the current render state and the desired render state. Based
* on the differences the minimum state changes can be invoked to setup the desired render state.
*
* The Write Stencil, Stencil test, Depth test and Blend state options are mutual exclusive
* therefore they aren't ordered as a bit mask.
*/
typedef enum {
/** To be used for compute passes. */
DRW_STATE_NO_DRAW = 0,
/** Write mask */
DRW_STATE_WRITE_DEPTH = (1 << 0),
DRW_STATE_WRITE_COLOR = (1 << 1),
/* Write Stencil. These options are mutual exclusive and packed into 2 bits */
DRW_STATE_WRITE_STENCIL = (1 << 2),
DRW_STATE_WRITE_STENCIL_SHADOW_PASS = (2 << 2),
DRW_STATE_WRITE_STENCIL_SHADOW_FAIL = (3 << 2),
/** Depth test. These options are mutual exclusive and packed into 3 bits */
DRW_STATE_DEPTH_ALWAYS = (1 << 4),
DRW_STATE_DEPTH_LESS = (2 << 4),
DRW_STATE_DEPTH_LESS_EQUAL = (3 << 4),
DRW_STATE_DEPTH_EQUAL = (4 << 4),
DRW_STATE_DEPTH_GREATER = (5 << 4),
DRW_STATE_DEPTH_GREATER_EQUAL = (6 << 4),
/** Culling test */
DRW_STATE_CULL_BACK = (1 << 7),
DRW_STATE_CULL_FRONT = (1 << 8),
/** Stencil test. These options are mutually exclusive and packed into 2 bits. */
DRW_STATE_STENCIL_ALWAYS = (1 << 9),
DRW_STATE_STENCIL_EQUAL = (2 << 9),
DRW_STATE_STENCIL_NEQUAL = (3 << 9),
/** Blend state. These options are mutual exclusive and packed into 4 bits */
DRW_STATE_BLEND_ADD = (1 << 11),
/** Same as additive but let alpha accumulate without pre-multiply. */
DRW_STATE_BLEND_ADD_FULL = (2 << 11),
/** Standard alpha blending. */
DRW_STATE_BLEND_ALPHA = (3 << 11),
/** Use that if color is already pre-multiply by alpha. */
DRW_STATE_BLEND_ALPHA_PREMUL = (4 << 11),
DRW_STATE_BLEND_BACKGROUND = (5 << 11),
DRW_STATE_BLEND_OIT = (6 << 11),
DRW_STATE_BLEND_MUL = (7 << 11),
DRW_STATE_BLEND_SUB = (8 << 11),
/** Use dual source blending. WARNING: Only one color buffer allowed. */
DRW_STATE_BLEND_CUSTOM = (9 << 11),
DRW_STATE_LOGIC_INVERT = (10 << 11),
DRW_STATE_BLEND_ALPHA_UNDER_PREMUL = (11 << 11),
DRW_STATE_IN_FRONT_SELECT = (1 << 27),
DRW_STATE_SHADOW_OFFSET = (1 << 28),
DRW_STATE_CLIP_PLANES = (1 << 29),
DRW_STATE_FIRST_VERTEX_CONVENTION = (1 << 30),
/** DO NOT USE. Assumed always enabled. Only used internally. */
DRW_STATE_PROGRAM_POINT_SIZE = (1u << 31),
} DRWState;
ENUM_OPERATORS(DRWState, DRW_STATE_PROGRAM_POINT_SIZE);
#define DRW_STATE_DEFAULT \
(DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_LESS_EQUAL)
#define DRW_STATE_BLEND_ENABLED \
(DRW_STATE_BLEND_ADD | DRW_STATE_BLEND_ADD_FULL | DRW_STATE_BLEND_ALPHA | \
DRW_STATE_BLEND_ALPHA_PREMUL | DRW_STATE_BLEND_BACKGROUND | DRW_STATE_BLEND_OIT | \
DRW_STATE_BLEND_MUL | DRW_STATE_BLEND_SUB | DRW_STATE_BLEND_CUSTOM | DRW_STATE_LOGIC_INVERT)
#define DRW_STATE_RASTERIZER_ENABLED \
(DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL | \
DRW_STATE_WRITE_STENCIL_SHADOW_PASS | DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
#define DRW_STATE_DEPTH_TEST_ENABLED \
(DRW_STATE_DEPTH_ALWAYS | DRW_STATE_DEPTH_LESS | DRW_STATE_DEPTH_LESS_EQUAL | \
DRW_STATE_DEPTH_EQUAL | DRW_STATE_DEPTH_GREATER | DRW_STATE_DEPTH_GREATER_EQUAL)
#define DRW_STATE_STENCIL_TEST_ENABLED \
(DRW_STATE_STENCIL_ALWAYS | DRW_STATE_STENCIL_EQUAL | DRW_STATE_STENCIL_NEQUAL)
#define DRW_STATE_WRITE_STENCIL_ENABLED \
(DRW_STATE_WRITE_STENCIL | DRW_STATE_WRITE_STENCIL_SHADOW_PASS | \
DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
typedef enum {
DRW_ATTR_INT,
DRW_ATTR_FLOAT,

View File

@@ -0,0 +1,596 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2022 Blender Foundation. */
/** \file
* \ingroup draw
*/
#include "GPU_batch.h"
#include "GPU_capabilities.h"
#include "GPU_compute.h"
#include "GPU_debug.h"
#include "draw_command.hh"
#include "draw_shader.h"
#include "draw_view.hh"
#include <bitset>
#include <sstream>
/* For debugging purpose */
/* TODO limit to GL < 4.6 */
#define WORKAROUND_RESOURCE_ID false
namespace blender::draw::command {
/* -------------------------------------------------------------------- */
/** \name Commands Execution
* \{ */
void ShaderBind::execute(RecordingState &state) const
{
if (assign_if_different(state.shader, shader)) {
GPU_shader_bind(shader);
}
}
void ResourceBind::execute() const
{
if (slot == -1) {
return;
}
switch (type) {
case ResourceBind::Type::Sampler:
GPU_texture_bind_ex(is_reference ? *texture_ref : texture, sampler, slot, false);
break;
case ResourceBind::Type::Image:
GPU_texture_image_bind(is_reference ? *texture_ref : texture, slot);
break;
case ResourceBind::Type::UniformBuf:
GPU_uniformbuf_bind(is_reference ? *uniform_buf_ref : uniform_buf, slot);
break;
case ResourceBind::Type::StorageBuf:
GPU_storagebuf_bind(is_reference ? *storage_buf_ref : storage_buf, slot);
break;
}
}
void PushConstant::execute(RecordingState &state) const
{
if (location == -1) {
return;
}
switch (type) {
case PushConstant::Type::IntValue:
GPU_shader_uniform_vector_int(state.shader, location, comp_len, array_len, int4_value);
break;
case PushConstant::Type::IntReference:
GPU_shader_uniform_vector_int(state.shader, location, comp_len, array_len, int_ref);
break;
case PushConstant::Type::FloatValue:
GPU_shader_uniform_vector(state.shader, location, comp_len, array_len, float4_value);
break;
case PushConstant::Type::FloatReference:
GPU_shader_uniform_vector(state.shader, location, comp_len, array_len, float_ref);
break;
}
}
void Draw::execute(RecordingState &state) const
{
state.front_facing_set(handle.has_inverted_handedness());
GPU_batch_set_shader(batch, state.shader);
GPU_batch_draw_advanced(batch, vertex_first, vertex_len, 0, instance_len);
}
void DrawMulti::execute(RecordingState &state) const
{
DrawMultiBuf::DrawCommandBuf &indirect_buf = multi_draw_buf->command_buf_;
DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
uint group_index = this->group_first;
while (group_index != (uint)-1) {
const DrawGroup &group = groups[group_index];
if (group.vertex_len > 0) {
GPU_batch_set_shader(group.gpu_batch, state.shader);
constexpr intptr_t stride = sizeof(DrawCommand);
/* We have 2 indirect command reserved per draw group. */
intptr_t offset = stride * group_index * 2;
/* Draw negatively scaled geometry first. */
if (group.len - group.front_facing_len > 0) {
state.front_facing_set(true);
GPU_batch_draw_indirect(group.gpu_batch, indirect_buf, offset);
}
if (group.front_facing_len > 0) {
state.front_facing_set(false);
GPU_batch_draw_indirect(group.gpu_batch, indirect_buf, offset + stride);
}
}
group_index = group.next;
}
}
void DrawIndirect::execute(RecordingState &state) const
{
state.front_facing_set(handle.has_inverted_handedness());
GPU_batch_draw_indirect(batch, *indirect_buf, 0);
}
void Dispatch::execute(RecordingState &state) const
{
if (is_reference) {
GPU_compute_dispatch(state.shader, size_ref->x, size_ref->y, size_ref->z);
}
else {
GPU_compute_dispatch(state.shader, size.x, size.y, size.z);
}
}
void DispatchIndirect::execute(RecordingState &state) const
{
GPU_compute_dispatch_indirect(state.shader, *indirect_buf);
}
void Barrier::execute() const
{
GPU_memory_barrier(type);
}
void Clear::execute() const
{
GPUFrameBuffer *fb = GPU_framebuffer_active_get();
GPU_framebuffer_clear(fb, (eGPUFrameBufferBits)clear_channels, color, depth, stencil);
}
void StateSet::execute(RecordingState &recording_state) const
{
/**
* Does not support locked state for the moment and never should.
* Better implement a less hacky selection!
*/
BLI_assert(DST.state_lock == 0);
if (!assign_if_different(recording_state.pipeline_state, new_state)) {
return;
}
/* Keep old API working. Keep the state tracking in sync. */
/* TODO(fclem): Move at the end of a pass. */
DST.state = new_state;
GPU_state_set(to_write_mask(new_state),
to_blend(new_state),
to_face_cull_test(new_state),
to_depth_test(new_state),
to_stencil_test(new_state),
to_stencil_op(new_state),
to_provoking_vertex(new_state));
if (new_state & DRW_STATE_SHADOW_OFFSET) {
GPU_shadow_offset(true);
}
else {
GPU_shadow_offset(false);
}
/* TODO: this should be part of shader state. */
if (new_state & DRW_STATE_CLIP_PLANES) {
GPU_clip_distances(recording_state.view_clip_plane_count);
}
else {
GPU_clip_distances(0);
}
if (new_state & DRW_STATE_IN_FRONT_SELECT) {
/* XXX `GPU_depth_range` is not a perfect solution
* since very distant geometries can still be occluded.
* Also the depth test precision of these geometries is impaired.
* However, it solves the selection for the vast majority of cases. */
GPU_depth_range(0.0f, 0.01f);
}
else {
GPU_depth_range(0.0f, 1.0f);
}
if (new_state & DRW_STATE_PROGRAM_POINT_SIZE) {
GPU_program_point_size(true);
}
else {
GPU_program_point_size(false);
}
}
void StencilSet::execute() const
{
GPU_stencil_write_mask_set(write_mask);
GPU_stencil_compare_mask_set(compare_mask);
GPU_stencil_reference_set(reference);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Commands Serialization for debugging
* \{ */
std::string ShaderBind::serialize() const
{
return std::string(".shader_bind(") + GPU_shader_get_name(shader) + ")";
}
std::string ResourceBind::serialize() const
{
switch (type) {
case Type::Sampler:
return std::string(".bind_texture") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) +
(sampler != GPU_SAMPLER_MAX ? ", sampler=" + std::to_string(sampler) : "") + ")";
case Type::Image:
return std::string(".bind_image") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
case Type::UniformBuf:
return std::string(".bind_uniform_buf") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
case Type::StorageBuf:
return std::string(".bind_storage_buf") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
default:
BLI_assert_unreachable();
return "";
}
}
std::string PushConstant::serialize() const
{
std::stringstream ss;
for (int i = 0; i < array_len; i++) {
switch (comp_len) {
case 1:
switch (type) {
case Type::IntValue:
ss << int1_value;
break;
case Type::IntReference:
ss << int_ref[i];
break;
case Type::FloatValue:
ss << float1_value;
break;
case Type::FloatReference:
ss << float_ref[i];
break;
}
break;
case 2:
switch (type) {
case Type::IntValue:
ss << int2_value;
break;
case Type::IntReference:
ss << int2_ref[i];
break;
case Type::FloatValue:
ss << float2_value;
break;
case Type::FloatReference:
ss << float2_ref[i];
break;
}
break;
case 3:
switch (type) {
case Type::IntValue:
ss << int3_value;
break;
case Type::IntReference:
ss << int3_ref[i];
break;
case Type::FloatValue:
ss << float3_value;
break;
case Type::FloatReference:
ss << float3_ref[i];
break;
}
break;
case 4:
switch (type) {
case Type::IntValue:
ss << int4_value;
break;
case Type::IntReference:
ss << int4_ref[i];
break;
case Type::FloatValue:
ss << float4_value;
break;
case Type::FloatReference:
ss << float4_ref[i];
break;
}
break;
case 16:
switch (type) {
case Type::IntValue:
case Type::IntReference:
BLI_assert_unreachable();
break;
case Type::FloatValue:
ss << *reinterpret_cast<const float4x4 *>(&float4_value);
break;
case Type::FloatReference:
ss << *float4x4_ref;
break;
}
break;
}
if (i < array_len - 1) {
ss << ", ";
}
}
return std::string(".push_constant(") + std::to_string(location) + ", data=" + ss.str() + ")";
}
std::string Draw::serialize() const
{
std::string inst_len = (instance_len == (uint)-1) ? "from_batch" : std::to_string(instance_len);
std::string vert_len = (vertex_len == (uint)-1) ? "from_batch" : std::to_string(vertex_len);
std::string vert_first = (vertex_first == (uint)-1) ? "from_batch" :
std::to_string(vertex_first);
return std::string(".draw(inst_len=") + inst_len + ", vert_len=" + vert_len +
", vert_first=" + vert_first + ", res_id=" + std::to_string(handle.resource_index()) +
")";
}
std::string DrawMulti::serialize(std::string line_prefix) const
{
DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
MutableSpan<DrawPrototype> prototypes(multi_draw_buf->prototype_buf_.data(),
multi_draw_buf->prototype_count_);
/* This emulates the GPU sorting but without the unstable draw order. */
std::sort(
prototypes.begin(), prototypes.end(), [](const DrawPrototype &a, const DrawPrototype &b) {
return (a.group_id < b.group_id) ||
(a.group_id == b.group_id && a.resource_handle > b.resource_handle);
});
/* Compute prefix sum to have correct offsets. */
uint prefix_sum = 0u;
for (DrawGroup &group : groups) {
group.start = prefix_sum;
prefix_sum += group.front_proto_len + group.back_proto_len;
}
std::stringstream ss;
uint group_len = 0;
uint group_index = this->group_first;
while (group_index != (uint)-1) {
const DrawGroup &grp = groups[group_index];
ss << std::endl << line_prefix << " .group(id=" << group_index << ", len=" << grp.len << ")";
intptr_t offset = grp.start;
if (grp.back_proto_len > 0) {
for (DrawPrototype &proto : prototypes.slice({offset, grp.back_proto_len})) {
BLI_assert(proto.group_id == group_index);
ResourceHandle handle(proto.resource_handle);
BLI_assert(handle.has_inverted_handedness());
ss << std::endl
<< line_prefix << " .proto(instance_len=" << std::to_string(proto.instance_len)
<< ", resource_id=" << std::to_string(handle.resource_index()) << ", back_face)";
}
offset += grp.back_proto_len;
}
if (grp.front_proto_len > 0) {
for (DrawPrototype &proto : prototypes.slice({offset, grp.front_proto_len})) {
BLI_assert(proto.group_id == group_index);
ResourceHandle handle(proto.resource_handle);
BLI_assert(!handle.has_inverted_handedness());
ss << std::endl
<< line_prefix << " .proto(instance_len=" << std::to_string(proto.instance_len)
<< ", resource_id=" << std::to_string(handle.resource_index()) << ", front_face)";
}
}
group_index = grp.next;
group_len++;
}
ss << std::endl;
return line_prefix + ".draw_multi(" + std::to_string(group_len) + ")" + ss.str();
}
std::string DrawIndirect::serialize() const
{
return std::string(".draw_indirect()");
}
std::string Dispatch::serialize() const
{
int3 sz = is_reference ? *size_ref : size;
return std::string(".dispatch") + (is_reference ? "_ref" : "") + "(" + std::to_string(sz.x) +
", " + std::to_string(sz.y) + ", " + std::to_string(sz.z) + ")";
}
std::string DispatchIndirect::serialize() const
{
return std::string(".dispatch_indirect()");
}
std::string Barrier::serialize() const
{
/* TOOD(fclem): Better serialization... */
return std::string(".barrier(") + std::to_string(type) + ")";
}
std::string Clear::serialize() const
{
std::stringstream ss;
if (eGPUFrameBufferBits(clear_channels) & GPU_COLOR_BIT) {
ss << "color=" << color;
if (eGPUFrameBufferBits(clear_channels) & (GPU_DEPTH_BIT | GPU_STENCIL_BIT)) {
ss << ", ";
}
}
if (eGPUFrameBufferBits(clear_channels) & GPU_DEPTH_BIT) {
ss << "depth=" << depth;
if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) {
ss << ", ";
}
}
if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) {
ss << "stencil=0b" << std::bitset<8>(stencil) << ")";
}
return std::string(".clear(") + ss.str() + ")";
}
std::string StateSet::serialize() const
{
/* TOOD(fclem): Better serialization... */
return std::string(".state_set(") + std::to_string(new_state) + ")";
}
std::string StencilSet::serialize() const
{
std::stringstream ss;
ss << ".stencil_set(write_mask=0b" << std::bitset<8>(write_mask) << ", compare_mask=0b"
<< std::bitset<8>(compare_mask) << ", reference=0b" << std::bitset<8>(reference);
return ss.str();
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Commands buffers binding / command / resource ID generation
* \{ */
void DrawCommandBuf::bind(RecordingState &state,
Vector<Header, 0> &headers,
Vector<Undetermined, 0> &commands)
{
UNUSED_VARS(headers, commands);
resource_id_count_ = 0;
for (const Header &header : headers) {
if (header.type != Type::Draw) {
continue;
}
Draw &cmd = commands[header.index].draw;
int batch_vert_len, batch_vert_first, batch_base_index, batch_inst_len;
/* Now that GPUBatches are guaranteed to be finished, extract their parameters. */
GPU_batch_draw_parameter_get(
cmd.batch, &batch_vert_len, &batch_vert_first, &batch_base_index, &batch_inst_len);
/* Instancing attributes are not supported using the new pipeline since we use the base
* instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
BLI_assert(batch_inst_len == 1);
if (cmd.vertex_len == (uint)-1) {
cmd.vertex_len = batch_vert_len;
}
if (cmd.handle.raw > 0) {
/* Save correct offset to start of resource_id buffer region for this draw. */
uint instance_first = resource_id_count_;
resource_id_count_ += cmd.instance_len;
/* Ensure the buffer is big enough. */
resource_id_buf_.get_or_resize(resource_id_count_ - 1);
/* Copy the resource id for all instances. */
uint index = cmd.handle.resource_index();
for (int i = instance_first; i < (instance_first + cmd.instance_len); i++) {
resource_id_buf_[i] = index;
}
}
}
resource_id_buf_.push_update();
if (WORKAROUND_RESOURCE_ID) {
state.resource_id_buf = resource_id_buf_;
}
else {
GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT);
}
}
void DrawMultiBuf::bind(RecordingState &state,
Vector<Header, 0> &headers,
Vector<Undetermined, 0> &commands,
VisibilityBuf &visibility_buf)
{
UNUSED_VARS(headers, commands);
GPU_debug_group_begin("DrawMultiBuf.bind");
resource_id_count_ = 0u;
for (DrawGroup &group : MutableSpan<DrawGroup>(group_buf_.data(), group_count_)) {
/* Compute prefix sum of all instance of previous group. */
group.start = resource_id_count_;
resource_id_count_ += group.len;
int batch_inst_len;
/* Now that GPUBatches are guaranteed to be finished, extract their parameters. */
GPU_batch_draw_parameter_get(group.gpu_batch,
&group.vertex_len,
&group.vertex_first,
&group.base_index,
&batch_inst_len);
/* Instancing attributes are not supported using the new pipeline since we use the base
* instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
BLI_assert(batch_inst_len == 1);
UNUSED_VARS_NDEBUG(batch_inst_len);
/* Now that we got the batch infos, we can set the counters to 0. */
group.total_counter = group.front_facing_counter = group.back_facing_counter = 0;
}
group_buf_.push_update();
prototype_buf_.push_update();
/* Allocate enough for the expansion pass. */
resource_id_buf_.get_or_resize(resource_id_count_);
/* Two command per group. */
command_buf_.get_or_resize(group_count_ * 2);
if (prototype_count_ > 0) {
GPUShader *shader = DRW_shader_draw_command_generate_get();
GPU_shader_bind(shader);
GPU_shader_uniform_1i(shader, "prototype_len", prototype_count_);
GPU_storagebuf_bind(group_buf_, GPU_shader_get_ssbo(shader, "group_buf"));
GPU_storagebuf_bind(visibility_buf, GPU_shader_get_ssbo(shader, "visibility_buf"));
GPU_storagebuf_bind(prototype_buf_, GPU_shader_get_ssbo(shader, "prototype_buf"));
GPU_storagebuf_bind(command_buf_, GPU_shader_get_ssbo(shader, "command_buf"));
GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT);
GPU_compute_dispatch(shader, divide_ceil_u(prototype_count_, DRW_COMMAND_GROUP_SIZE), 1, 1);
if (WORKAROUND_RESOURCE_ID) {
GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
state.resource_id_buf = resource_id_buf_;
}
else {
GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
}
}
GPU_debug_group_end();
}
/** \} */
}; // namespace blender::draw::command

View File

@@ -0,0 +1,533 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2022 Blender Foundation. */
#pragma once
/** \file
* \ingroup draw
*
* Passes record draw commands.
*/
#include "BKE_global.h"
#include "BLI_map.hh"
#include "DRW_gpu_wrapper.hh"
#include "draw_command_shared.hh"
#include "draw_handle.hh"
#include "draw_state.h"
#include "draw_view.hh"
namespace blender::draw::command {
class DrawCommandBuf;
class DrawMultiBuf;
/* -------------------------------------------------------------------- */
/** \name Recording State
* \{ */
/**
* Command recording state.
* Keep track of several states and avoid redundant state changes.
*/
struct RecordingState {
GPUShader *shader = nullptr;
bool front_facing = true;
bool inverted_view = false;
DRWState pipeline_state = DRW_STATE_NO_DRAW;
int view_clip_plane_count = 0;
/** Used for gl_BaseInstance workaround. */
GPUStorageBuf *resource_id_buf = nullptr;
void front_facing_set(bool facing)
{
/* Facing is inverted if view is not in expected handedness. */
facing = this->inverted_view == facing;
/* Remove redundant changes. */
if (assign_if_different(this->front_facing, facing)) {
GPU_front_facing(!facing);
}
}
void cleanup()
{
if (front_facing == false) {
GPU_front_facing(false);
}
if (G.debug & G_DEBUG_GPU) {
GPU_storagebuf_unbind_all();
GPU_texture_image_unbind_all();
GPU_texture_unbind_all();
GPU_uniformbuf_unbind_all();
}
}
};
/** \} */
/* -------------------------------------------------------------------- */
/** \name Regular Commands
* \{ */
enum class Type : uint8_t {
/**
* None Type commands are either uninitialized or are repurposed as data storage.
* They are skipped during submission.
*/
None = 0,
/** Commands stored as Undetermined in regular command buffer. */
Barrier,
Clear,
Dispatch,
DispatchIndirect,
Draw,
DrawIndirect,
PushConstant,
ResourceBind,
ShaderBind,
StateSet,
StencilSet,
/** Special commands stored in separate buffers. */
SubPass,
DrawMulti,
};
/**
* The index of the group is implicit since it is known by the one who want to
* access it. This also allows to have an indexed object to split the command
* stream.
*/
struct Header {
/** Command type. */
Type type;
/** Command index in command heap of this type. */
uint index;
};
struct ShaderBind {
GPUShader *shader;
void execute(RecordingState &state) const;
std::string serialize() const;
};
struct ResourceBind {
eGPUSamplerState sampler;
int slot;
bool is_reference;
enum class Type : uint8_t {
Sampler = 0,
Image,
UniformBuf,
StorageBuf,
} type;
union {
/** TODO: Use draw::Texture|StorageBuffer|UniformBuffer as resources as they will give more
* debug info. */
GPUUniformBuf *uniform_buf;
GPUUniformBuf **uniform_buf_ref;
GPUStorageBuf *storage_buf;
GPUStorageBuf **storage_buf_ref;
/** NOTE: Texture is used for both Sampler and Image binds. */
GPUTexture *texture;
GPUTexture **texture_ref;
};
ResourceBind() = default;
ResourceBind(int slot_, GPUUniformBuf *res)
: slot(slot_), is_reference(false), type(Type::UniformBuf), uniform_buf(res){};
ResourceBind(int slot_, GPUUniformBuf **res)
: slot(slot_), is_reference(true), type(Type::UniformBuf), uniform_buf_ref(res){};
ResourceBind(int slot_, GPUStorageBuf *res)
: slot(slot_), is_reference(false), type(Type::StorageBuf), storage_buf(res){};
ResourceBind(int slot_, GPUStorageBuf **res)
: slot(slot_), is_reference(true), type(Type::StorageBuf), storage_buf_ref(res){};
ResourceBind(int slot_, draw::Image *res)
: slot(slot_), is_reference(false), type(Type::Image), texture(draw::as_texture(res)){};
ResourceBind(int slot_, draw::Image **res)
: slot(slot_), is_reference(true), type(Type::Image), texture_ref(draw::as_texture(res)){};
ResourceBind(int slot_, GPUTexture *res, eGPUSamplerState state)
: sampler(state), slot(slot_), is_reference(false), type(Type::Sampler), texture(res){};
ResourceBind(int slot_, GPUTexture **res, eGPUSamplerState state)
: sampler(state), slot(slot_), is_reference(true), type(Type::Sampler), texture_ref(res){};
void execute() const;
std::string serialize() const;
};
struct PushConstant {
int location;
uint8_t array_len;
uint8_t comp_len;
enum class Type : uint8_t {
IntValue = 0,
FloatValue,
IntReference,
FloatReference,
} type;
/**
* IMPORTANT: Data is at the end of the struct as it can span over the next commands.
* These next commands are not real commands but just memory to hold the data and are not
* referenced by any Command::Header.
* This is a hack to support float4x4 copy.
*/
union {
int int1_value;
int2 int2_value;
int3 int3_value;
int4 int4_value;
float float1_value;
float2 float2_value;
float3 float3_value;
float4 float4_value;
const int *int_ref;
const int2 *int2_ref;
const int3 *int3_ref;
const int4 *int4_ref;
const float *float_ref;
const float2 *float2_ref;
const float3 *float3_ref;
const float4 *float4_ref;
const float4x4 *float4x4_ref;
};
PushConstant() = default;
PushConstant(int loc, const float &val)
: location(loc), array_len(1), comp_len(1), type(Type::FloatValue), float1_value(val){};
PushConstant(int loc, const float2 &val)
: location(loc), array_len(1), comp_len(2), type(Type::FloatValue), float2_value(val){};
PushConstant(int loc, const float3 &val)
: location(loc), array_len(1), comp_len(3), type(Type::FloatValue), float3_value(val){};
PushConstant(int loc, const float4 &val)
: location(loc), array_len(1), comp_len(4), type(Type::FloatValue), float4_value(val){};
PushConstant(int loc, const int &val)
: location(loc), array_len(1), comp_len(1), type(Type::IntValue), int1_value(val){};
PushConstant(int loc, const int2 &val)
: location(loc), array_len(1), comp_len(2), type(Type::IntValue), int2_value(val){};
PushConstant(int loc, const int3 &val)
: location(loc), array_len(1), comp_len(3), type(Type::IntValue), int3_value(val){};
PushConstant(int loc, const int4 &val)
: location(loc), array_len(1), comp_len(4), type(Type::IntValue), int4_value(val){};
PushConstant(int loc, const float *val, int arr)
: location(loc), array_len(arr), comp_len(1), type(Type::FloatReference), float_ref(val){};
PushConstant(int loc, const float2 *val, int arr)
: location(loc), array_len(arr), comp_len(2), type(Type::FloatReference), float2_ref(val){};
PushConstant(int loc, const float3 *val, int arr)
: location(loc), array_len(arr), comp_len(3), type(Type::FloatReference), float3_ref(val){};
PushConstant(int loc, const float4 *val, int arr)
: location(loc), array_len(arr), comp_len(4), type(Type::FloatReference), float4_ref(val){};
PushConstant(int loc, const float4x4 *val)
: location(loc), array_len(1), comp_len(16), type(Type::FloatReference), float4x4_ref(val){};
PushConstant(int loc, const int *val, int arr)
: location(loc), array_len(arr), comp_len(1), type(Type::IntReference), int_ref(val){};
PushConstant(int loc, const int2 *val, int arr)
: location(loc), array_len(arr), comp_len(2), type(Type::IntReference), int2_ref(val){};
PushConstant(int loc, const int3 *val, int arr)
: location(loc), array_len(arr), comp_len(3), type(Type::IntReference), int3_ref(val){};
PushConstant(int loc, const int4 *val, int arr)
: location(loc), array_len(arr), comp_len(4), type(Type::IntReference), int4_ref(val){};
void execute(RecordingState &state) const;
std::string serialize() const;
};
struct Draw {
GPUBatch *batch;
uint instance_len;
uint vertex_len;
uint vertex_first;
ResourceHandle handle;
void execute(RecordingState &state) const;
std::string serialize() const;
};
struct DrawMulti {
GPUBatch *batch;
DrawMultiBuf *multi_draw_buf;
uint group_first;
uint uuid;
void execute(RecordingState &state) const;
std::string serialize(std::string line_prefix) const;
};
struct DrawIndirect {
GPUBatch *batch;
GPUStorageBuf **indirect_buf;
ResourceHandle handle;
void execute(RecordingState &state) const;
std::string serialize() const;
};
struct Dispatch {
bool is_reference;
union {
int3 size;
int3 *size_ref;
};
Dispatch() = default;
Dispatch(int3 group_len) : is_reference(false), size(group_len){};
Dispatch(int3 *group_len) : is_reference(true), size_ref(group_len){};
void execute(RecordingState &state) const;
std::string serialize() const;
};
struct DispatchIndirect {
GPUStorageBuf **indirect_buf;
void execute(RecordingState &state) const;
std::string serialize() const;
};
struct Barrier {
eGPUBarrier type;
void execute() const;
std::string serialize() const;
};
struct Clear {
uint8_t clear_channels; /* #eGPUFrameBufferBits. But want to save some bits. */
uint8_t stencil;
float depth;
float4 color;
void execute() const;
std::string serialize() const;
};
struct StateSet {
DRWState new_state;
void execute(RecordingState &state) const;
std::string serialize() const;
};
struct StencilSet {
uint write_mask;
uint compare_mask;
uint reference;
void execute() const;
std::string serialize() const;
};
union Undetermined {
ShaderBind shader_bind;
ResourceBind resource_bind;
PushConstant push_constant;
Draw draw;
DrawMulti draw_multi;
DrawIndirect draw_indirect;
Dispatch dispatch;
DispatchIndirect dispatch_indirect;
Barrier barrier;
Clear clear;
StateSet state_set;
StencilSet stencil_set;
};
/** Try to keep the command size as low as possible for performance. */
BLI_STATIC_ASSERT(sizeof(Undetermined) <= 24, "One of the command type is too large.")
/** \} */
/* -------------------------------------------------------------------- */
/** \name Draw Commands
*
* A draw command buffer used to issue single draw commands without instance merging or any
* other optimizations.
* \{ */
class DrawCommandBuf {
friend Manager;
private:
using ResourceIdBuf = StorageArrayBuffer<uint, 128, false>;
/** Array of resource id. One per instance. Generated on GPU and send to GPU. */
ResourceIdBuf resource_id_buf_;
/** Used items in the resource_id_buf_. Not it's allocated length. */
uint resource_id_count_ = 0;
public:
void clear(){};
void append_draw(Vector<Header, 0> &headers,
Vector<Undetermined, 0> &commands,
GPUBatch *batch,
uint instance_len,
uint vertex_len,
uint vertex_first,
ResourceHandle handle)
{
vertex_first = vertex_first != -1 ? vertex_first : 0;
instance_len = instance_len != -1 ? instance_len : 1;
int64_t index = commands.append_and_get_index({});
headers.append({Type::Draw, static_cast<uint>(index)});
commands[index].draw = {batch, instance_len, vertex_len, vertex_first, handle};
}
void bind(RecordingState &state, Vector<Header, 0> &headers, Vector<Undetermined, 0> &commands);
};
/** \} */
/* -------------------------------------------------------------------- */
/** \name Multi Draw Commands
*
* For efficient rendering of large scene we strive to minimize the number of draw call and state
* changes. This reduces the amount of work the CPU has to do. To this end, we group many rendering
* commands and sort them per render state using Command::MultiDraw as a container.
*
* We sort by Command::MultiDraw index using a prefix sum on CPU.
* Then we sort the MultiDrawUnit inside each MultiDraw by their drw_resource_id on GPU.
*
* For the sake of the example consider that:
* - Command1/2 are rendering with different shaders.
* - GPUBatch1/2 are two different mesh data block.
* - Each column is a MultiDrawUnit.
*
* +---------------------------------------------------------------+------------------------------+
* | CPU Timeline | Granularity |
* +---------------------------------------------------------------+------------------------------+
* | Command1 | Command2 | < Command::Header |
* | GPUBatch1 | GPUBatch2 | GPUBatch1 | < Command::MultiDraw |
* | 1 | 0 0 | 0 0 0 | 1 1 | < Front facing inverted |
* | MDI | MDI | MDI | MDI | < DrawIndirect emitted |
* +---------------------------------------------------------------+------------------------------+
* | GPU Timeline | Granularity |
* +---------------------------------------------------------------+------------------------------+
* | 4 | 2 5 | 1 3 4 | 6 7 | < Resource_id (sorted) |
* | 1 | 1 1 | 1 | 0 | 1 | 1 1 | < Visibility test result |
* | 4 | 2 + 5 | 1 + 4 | 6+7 | < DrawCommand (compacted) |
* +---------------------------------------------------------------+------------------------------+
*
* In the example above, we will issue 4 draw indirect calls.
*
* \{ */
class DrawMultiBuf {
friend Manager;
friend DrawMulti;
private:
using DrawGroupBuf = StorageArrayBuffer<DrawGroup, 16>;
using DrawPrototypeBuf = StorageArrayBuffer<DrawPrototype, 16>;
using DrawCommandBuf = StorageArrayBuffer<DrawCommand, 16, true>;
using ResourceIdBuf = StorageArrayBuffer<uint, 128, true>;
using DrawGroupKey = std::pair<uint, GPUBatch *>;
using DrawGroupMap = Map<DrawGroupKey, uint>;
/** Maps a DrawMulti command and a gpu batch to their unique DrawGroup command. */
DrawGroupMap group_ids_;
/** DrawGroup Command heap. Uploaded to GPU for sorting. */
DrawGroupBuf group_buf_ = {"DrawGroupBuf"};
/** Prototype commands. */
DrawPrototypeBuf prototype_buf_ = {"DrawPrototypeBuf"};
/** Command list generated by the sorting / compaction steps. Lives on GPU. */
DrawCommandBuf command_buf_ = {"DrawCommandBuf"};
/** Array of resource id. One per instance. Lives on GPU. */
ResourceIdBuf resource_id_buf_ = {"ResourceIdBuf"};
/** Give unique ID to each header so we can use that as hash key. */
uint header_id_counter_ = 0;
/** Number of groups inside group_buf_. */
uint group_count_ = 0;
/** Number of prototype command inside prototype_buf_. */
uint prototype_count_ = 0;
/** Used items in the resource_id_buf_. Not it's allocated length. */
uint resource_id_count_ = 0;
public:
void clear()
{
header_id_counter_ = 0;
group_count_ = 0;
prototype_count_ = 0;
group_ids_.clear();
}
void append_draw(Vector<Header, 0> &headers,
Vector<Undetermined, 0> &commands,
GPUBatch *batch,
uint instance_len,
uint vertex_len,
uint vertex_first,
ResourceHandle handle)
{
/* Unsupported for now. Use PassSimple. */
BLI_assert(vertex_first == 0 || vertex_first == -1);
BLI_assert(vertex_len == -1);
instance_len = instance_len != -1 ? instance_len : 1;
/* If there was some state changes since previous call, we have to create another command. */
if (headers.is_empty() || headers.last().type != Type::DrawMulti) {
uint index = commands.append_and_get_index({});
headers.append({Type::DrawMulti, index});
commands[index].draw_multi = {batch, this, (uint)-1, header_id_counter_++};
}
DrawMulti &cmd = commands.last().draw_multi;
uint &group_id = group_ids_.lookup_or_add(DrawGroupKey(cmd.uuid, batch), (uint)-1);
bool inverted = handle.has_inverted_handedness();
if (group_id == (uint)-1) {
uint new_group_id = group_count_++;
DrawGroup &group = group_buf_.get_or_resize(new_group_id);
group.next = cmd.group_first;
group.len = instance_len;
group.front_facing_len = inverted ? 0 : instance_len;
group.gpu_batch = batch;
group.front_proto_len = 0;
group.back_proto_len = 0;
/* For serialization only. */
(inverted ? group.back_proto_len : group.front_proto_len)++;
/* Append to list. */
cmd.group_first = new_group_id;
group_id = new_group_id;
}
else {
DrawGroup &group = group_buf_[group_id];
group.len += instance_len;
group.front_facing_len += inverted ? 0 : instance_len;
/* For serialization only. */
(inverted ? group.back_proto_len : group.front_proto_len)++;
}
DrawPrototype &draw = prototype_buf_.get_or_resize(prototype_count_++);
draw.group_id = group_id;
draw.resource_handle = handle.raw;
draw.instance_len = instance_len;
}
void bind(RecordingState &state,
Vector<Header, 0> &headers,
Vector<Undetermined, 0> &commands,
VisibilityBuf &visibility_buf);
};
/** \} */
}; // namespace blender::draw::command

View File

@@ -0,0 +1,87 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2022 Blender Foundation. */
/** \file
* \ingroup draw
*/
#ifndef GPU_SHADER
# include "BLI_span.hh"
# include "GPU_shader_shared_utils.h"
namespace blender::draw::command {
struct RecordingState;
#endif
/* -------------------------------------------------------------------- */
/** \name Multi Draw
* \{ */
/**
* A DrawGroup allow to split the command stream into batch-able chunks of commands with
* the same render state.
*/
struct DrawGroup {
/** Index of next DrawGroup from the same header. */
uint next;
/** Index of the first instances after sorting. */
uint start;
/** Total number of instances (including inverted facing). Needed to issue the draw call. */
uint len;
/** Number of non inverted scaling instances in this Group. */
uint front_facing_len;
/** GPUBatch values to be copied to DrawCommand after sorting (if not overriden). */
int vertex_len;
int vertex_first;
int base_index;
/** Atomic counters used during command sorting. */
uint total_counter;
#ifndef GPU_SHADER
/* NOTE: Union just to make sure the struct has always the same size on all platform. */
union {
struct {
/** For debug printing only. */
uint front_proto_len;
uint back_proto_len;
/** Needed to create the correct draw call. */
GPUBatch *gpu_batch;
};
struct {
#endif
uint front_facing_counter;
uint back_facing_counter;
uint _pad0, _pad1;
#ifndef GPU_SHADER
};
};
#endif
};
BLI_STATIC_ASSERT_ALIGN(DrawGroup, 16)
/**
* Representation of a future draw call inside a DrawGroup. This #DrawPrototype is then
* converted into #DrawCommand on GPU after visibility and compaction. Multiple
* #DrawPrototype might get merged into the same final #DrawCommand.
*/
struct DrawPrototype {
/* Reference to parent DrawGroup to get the GPUBatch vertex / instance count. */
uint group_id;
/* Resource handle associated with this call. Also reference visibility. */
uint resource_handle;
/* Number of instances. */
uint instance_len;
uint _pad0;
};
BLI_STATIC_ASSERT_ALIGN(DrawPrototype, 16)
/** \} */
#ifndef GPU_SHADER
}; // namespace blender::draw::command
#endif

View File

@@ -63,26 +63,26 @@ DebugDraw::DebugDraw()
void DebugDraw::init()
{
cpu_print_buf_.command.v_count = 0;
cpu_print_buf_.command.v_first = 0;
cpu_print_buf_.command.i_count = 1;
cpu_print_buf_.command.i_first = 0;
cpu_print_buf_.command.vertex_len = 0;
cpu_print_buf_.command.vertex_first = 0;
cpu_print_buf_.command.instance_len = 1;
cpu_print_buf_.command.instance_first_array = 0;
cpu_draw_buf_.command.v_count = 0;
cpu_draw_buf_.command.v_first = 0;
cpu_draw_buf_.command.i_count = 1;
cpu_draw_buf_.command.i_first = 0;
cpu_draw_buf_.command.vertex_len = 0;
cpu_draw_buf_.command.vertex_first = 0;
cpu_draw_buf_.command.instance_len = 1;
cpu_draw_buf_.command.instance_first_array = 0;
gpu_print_buf_.command.v_count = 0;
gpu_print_buf_.command.v_first = 0;
gpu_print_buf_.command.i_count = 1;
gpu_print_buf_.command.i_first = 0;
gpu_print_buf_.command.vertex_len = 0;
gpu_print_buf_.command.vertex_first = 0;
gpu_print_buf_.command.instance_len = 1;
gpu_print_buf_.command.instance_first_array = 0;
gpu_print_buf_used = false;
gpu_draw_buf_.command.v_count = 0;
gpu_draw_buf_.command.v_first = 0;
gpu_draw_buf_.command.i_count = 1;
gpu_draw_buf_.command.i_first = 0;
gpu_draw_buf_.command.vertex_len = 0;
gpu_draw_buf_.command.vertex_first = 0;
gpu_draw_buf_.command.instance_len = 1;
gpu_draw_buf_.command.instance_first_array = 0;
gpu_draw_buf_used = false;
modelmat_reset();
@@ -323,11 +323,11 @@ template<> void DebugDraw::print_value<uint4>(const uint4 &value)
void DebugDraw::draw_line(float3 v1, float3 v2, uint color)
{
DebugDrawBuf &buf = cpu_draw_buf_;
uint index = buf.command.v_count;
uint index = buf.command.vertex_len;
if (index + 2 < DRW_DEBUG_DRAW_VERT_MAX) {
buf.verts[index + 0] = vert_pack(model_mat_ * v1, color);
buf.verts[index + 1] = vert_pack(model_mat_ * v2, color);
buf.command.v_count += 2;
buf.command.vertex_len += 2;
}
}
@@ -356,7 +356,7 @@ DRWDebugVert DebugDraw::vert_pack(float3 pos, uint color)
void DebugDraw::print_newline()
{
print_col_ = 0u;
print_row_ = ++cpu_print_buf_.command.i_first;
print_row_ = ++cpu_print_buf_.command.instance_first_array;
}
void DebugDraw::print_string_start(uint len)
@@ -406,7 +406,7 @@ void DebugDraw::print_char4(uint data)
break;
}
/* NOTE: Do not skip the header manually like in GPU. */
uint cursor = cpu_print_buf_.command.v_count++;
uint cursor = cpu_print_buf_.command.vertex_len++;
if (cursor < DRW_DEBUG_PRINT_MAX) {
/* For future usage. (i.e: Color) */
uint flags = 0u;
@@ -504,7 +504,7 @@ void DebugDraw::print_value_uint(uint value,
void DebugDraw::display_lines()
{
if (cpu_draw_buf_.command.v_count == 0 && gpu_draw_buf_used == false) {
if (cpu_draw_buf_.command.vertex_len == 0 && gpu_draw_buf_used == false) {
return;
}
GPU_debug_group_begin("Lines");
@@ -525,14 +525,14 @@ void DebugDraw::display_lines()
if (gpu_draw_buf_used) {
GPU_debug_group_begin("GPU");
GPU_storagebuf_bind(gpu_draw_buf_, slot);
GPU_batch_draw_indirect(batch, gpu_draw_buf_);
GPU_batch_draw_indirect(batch, gpu_draw_buf_, 0);
GPU_storagebuf_unbind(gpu_draw_buf_);
GPU_debug_group_end();
}
GPU_debug_group_begin("CPU");
GPU_storagebuf_bind(cpu_draw_buf_, slot);
GPU_batch_draw_indirect(batch, cpu_draw_buf_);
GPU_batch_draw_indirect(batch, cpu_draw_buf_, 0);
GPU_storagebuf_unbind(cpu_draw_buf_);
GPU_debug_group_end();
@@ -541,7 +541,7 @@ void DebugDraw::display_lines()
void DebugDraw::display_prints()
{
if (cpu_print_buf_.command.v_count == 0 && gpu_print_buf_used == false) {
if (cpu_print_buf_.command.vertex_len == 0 && gpu_print_buf_used == false) {
return;
}
GPU_debug_group_begin("Prints");
@@ -557,14 +557,14 @@ void DebugDraw::display_prints()
if (gpu_print_buf_used) {
GPU_debug_group_begin("GPU");
GPU_storagebuf_bind(gpu_print_buf_, slot);
GPU_batch_draw_indirect(batch, gpu_print_buf_);
GPU_batch_draw_indirect(batch, gpu_print_buf_, 0);
GPU_storagebuf_unbind(gpu_print_buf_);
GPU_debug_group_end();
}
GPU_debug_group_begin("CPU");
GPU_storagebuf_bind(cpu_print_buf_, slot);
GPU_batch_draw_indirect(batch, cpu_print_buf_);
GPU_batch_draw_indirect(batch, cpu_print_buf_, 0);
GPU_storagebuf_unbind(cpu_print_buf_);
GPU_debug_group_end();

View File

@@ -0,0 +1,27 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2021 Blender Foundation.
*/
/** \file
* \ingroup draw
*
* List of defines that are shared with the GPUShaderCreateInfos. We do this to avoid
* dragging larger headers into the createInfo pipeline which would cause problems.
*/
#pragma once
#define DRW_VIEW_UBO_SLOT 0
#define DRW_RESOURCE_ID_SLOT 11
#define DRW_OBJ_MAT_SLOT 10
#define DRW_OBJ_INFOS_SLOT 9
#define DRW_OBJ_ATTR_SLOT 8
#define DRW_DEBUG_PRINT_SLOT 15
#define DRW_DEBUG_DRAW_SLOT 14
#define DRW_COMMAND_GROUP_SIZE 64
#define DRW_FINALIZE_GROUP_SIZE 64
/* Must be multiple of 32. Set to 32 for shader simplicity. */
#define DRW_VISIBILITY_GROUP_SIZE 32

View File

@@ -0,0 +1,59 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2022 Blender Foundation. */
#pragma once
/** \file
* \ingroup draw
*
* A unique identifier for each object component.
* It is used to access each component data such as matrices and object attributes.
* It is valid only for the current draw, it is not persistent.
*
* The most significant bit is used to encode if the object needs to invert the front face winding
* because of its object matrix handedness. This is handy because this means sorting inside
* #MultiDraw command will put all inverted commands last.
*
* Default value of 0 points toward an non-cull-able object with unit bounding box centered at
* the origin.
*/
#include "draw_shader_shared.h"
struct Object;
struct DupliObject;
namespace blender::draw {
struct ResourceHandle {
uint raw;
ResourceHandle() = default;
ResourceHandle(uint raw_) : raw(raw_){};
ResourceHandle(uint index, bool inverted_handedness)
{
raw = index;
SET_FLAG_FROM_TEST(raw, inverted_handedness, 0x80000000u);
}
bool has_inverted_handedness() const
{
return (raw & 0x80000000u) != 0;
}
uint resource_index() const
{
return (raw & 0x7FFFFFFFu);
}
};
/* TODO(fclem): Move to somewhere more appropriated after cleaning up the header dependencies. */
struct ObjectRef {
Object *object;
/** Dupli object that corresponds to the current object. */
DupliObject *dupli_object;
/** Object that created the dupli-list the current object is part of. */
Object *dupli_parent;
};
}; // namespace blender::draw

View File

@@ -1001,6 +1001,8 @@ static void drw_engines_init(void)
static void drw_engines_cache_init(void)
{
DRW_manager_begin_sync();
DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) {
if (data->text_draw_cache) {
DRW_text_cache_destroy(data->text_draw_cache);
@@ -1072,6 +1074,8 @@ static void drw_engines_cache_finish(void)
engine->cache_finish(data);
}
}
DRW_manager_end_sync();
}
static void drw_engines_draw_scene(void)

View File

@@ -0,0 +1,205 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2022 Blender Foundation. */
/** \file
* \ingroup draw
*/
#include "BKE_global.h"
#include "GPU_compute.h"
#include "draw_debug.hh"
#include "draw_defines.h"
#include "draw_manager.h"
#include "draw_manager.hh"
#include "draw_pass.hh"
#include "draw_shader.h"
namespace blender::draw {
Manager::~Manager()
{
for (GPUTexture *texture : acquired_textures) {
/* Decrease refcount and free if 0. */
GPU_texture_free(texture);
}
}
void Manager::begin_sync()
{
/* TODO: This means the reference is kept until further redraw or manager teardown. Instead, they
* should be released after each draw loop. But for now, mimics old DRW behavior. */
for (GPUTexture *texture : acquired_textures) {
/* Decrease refcount and free if 0. */
GPU_texture_free(texture);
}
acquired_textures.clear();
#ifdef DEBUG
/* Detect non-init data. */
memset(matrix_buf.data(), 0xF0, resource_len_ * sizeof(*matrix_buf.data()));
memset(bounds_buf.data(), 0xF0, resource_len_ * sizeof(*bounds_buf.data()));
memset(infos_buf.data(), 0xF0, resource_len_ * sizeof(*infos_buf.data()));
#endif
resource_len_ = 0;
/* TODO(fclem): Resize buffers if too big, but with an hysteresis threshold. */
object_active = DST.draw_ctx.obact;
/* Init the 0 resource. */
resource_handle(float4x4::identity());
}
void Manager::end_sync()
{
GPU_debug_group_begin("Manager.end_sync");
matrix_buf.push_update();
bounds_buf.push_update();
infos_buf.push_update();
debug_bind();
/* Dispatch compute to finalize the resources on GPU. Save a bit of CPU time. */
uint thread_groups = divide_ceil_u(resource_len_, DRW_FINALIZE_GROUP_SIZE);
GPUShader *shader = DRW_shader_draw_resource_finalize_get();
GPU_shader_bind(shader);
GPU_shader_uniform_1i(shader, "resource_len", resource_len_);
GPU_storagebuf_bind(matrix_buf, GPU_shader_get_ssbo(shader, "matrix_buf"));
GPU_storagebuf_bind(bounds_buf, GPU_shader_get_ssbo(shader, "bounds_buf"));
GPU_storagebuf_bind(infos_buf, GPU_shader_get_ssbo(shader, "infos_buf"));
GPU_compute_dispatch(shader, thread_groups, 1, 1);
GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
GPU_debug_group_end();
}
void Manager::debug_bind()
{
#ifdef DEBUG
if (DST.debug == nullptr) {
return;
}
GPU_storagebuf_bind(drw_debug_gpu_draw_buf_get(), DRW_DEBUG_DRAW_SLOT);
GPU_storagebuf_bind(drw_debug_gpu_print_buf_get(), DRW_DEBUG_PRINT_SLOT);
# ifndef DISABLE_DEBUG_SHADER_PRINT_BARRIER
/* Add a barrier to allow multiple shader writing to the same buffer. */
GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
# endif
#endif
}
void Manager::submit(PassSimple &pass, View &view)
{
view.bind();
debug_bind();
command::RecordingState state;
state.inverted_view = view.is_inverted();
pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_);
GPU_storagebuf_bind(matrix_buf, DRW_OBJ_MAT_SLOT);
GPU_storagebuf_bind(infos_buf, DRW_OBJ_INFOS_SLOT);
// GPU_storagebuf_bind(attribute_buf, DRW_OBJ_ATTR_SLOT); /* TODO */
pass.submit(state);
state.cleanup();
}
void Manager::submit(PassMain &pass, View &view)
{
view.bind();
debug_bind();
bool freeze_culling = (U.experimental.use_viewport_debug && DST.draw_ctx.v3d &&
(DST.draw_ctx.v3d->debug_flag & V3D_DEBUG_FREEZE_CULLING) != 0);
view.compute_visibility(bounds_buf, resource_len_, freeze_culling);
command::RecordingState state;
state.inverted_view = view.is_inverted();
pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_, view.visibility_buf_);
GPU_storagebuf_bind(matrix_buf, DRW_OBJ_MAT_SLOT);
GPU_storagebuf_bind(infos_buf, DRW_OBJ_INFOS_SLOT);
// GPU_storagebuf_bind(attribute_buf, DRW_OBJ_ATTR_SLOT); /* TODO */
pass.submit(state);
state.cleanup();
}
void Manager::submit(PassSortable &pass, View &view)
{
pass.sort();
this->submit(static_cast<PassMain &>(pass), view);
}
void Manager::submit(PassSimple &pass)
{
debug_bind();
command::RecordingState state;
pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_);
GPU_storagebuf_bind(matrix_buf, DRW_OBJ_MAT_SLOT);
GPU_storagebuf_bind(infos_buf, DRW_OBJ_INFOS_SLOT);
// GPU_storagebuf_bind(attribute_buf, DRW_OBJ_ATTR_SLOT); /* TODO */
pass.submit(state);
state.cleanup();
}
Manager::SubmitDebugOutput Manager::submit_debug(PassSimple &pass, View &view)
{
submit(pass, view);
pass.draw_commands_buf_.resource_id_buf_.read();
Manager::SubmitDebugOutput output;
output.resource_id = {pass.draw_commands_buf_.resource_id_buf_.data(),
pass.draw_commands_buf_.resource_id_count_};
/* There is no visibility data for PassSimple. */
output.visibility = {(uint *)view.visibility_buf_.data(), 0};
return output;
}
Manager::SubmitDebugOutput Manager::submit_debug(PassMain &pass, View &view)
{
submit(pass, view);
GPU_finish();
pass.draw_commands_buf_.resource_id_buf_.read();
view.visibility_buf_.read();
Manager::SubmitDebugOutput output;
output.resource_id = {pass.draw_commands_buf_.resource_id_buf_.data(),
pass.draw_commands_buf_.resource_id_count_};
output.visibility = {(uint *)view.visibility_buf_.data(), divide_ceil_u(resource_len_, 32)};
return output;
}
Manager::DataDebugOutput Manager::data_debug()
{
matrix_buf.read();
bounds_buf.read();
infos_buf.read();
Manager::DataDebugOutput output;
output.matrices = {matrix_buf.data(), resource_len_};
output.bounds = {bounds_buf.data(), resource_len_};
output.infos = {infos_buf.data(), resource_len_};
return output;
}
} // namespace blender::draw

View File

@@ -694,6 +694,9 @@ bool drw_engine_data_engines_data_validate(GPUViewport *viewport, void **engine_
void drw_engine_data_cache_release(GPUViewport *viewport);
void drw_engine_data_free(GPUViewport *viewport);
void DRW_manager_begin_sync(void);
void DRW_manager_end_sync(void);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,187 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2022 Blender Foundation. */
#pragma once
/** \file
* \ingroup draw
*
*/
#include "BLI_sys_types.h"
#include "draw_resource.hh"
#include "draw_view.hh"
#include <string>
namespace blender::draw {
/* Forward declarations. */
namespace detail {
template<typename T> class Pass;
} // namespace detail
namespace command {
class DrawCommandBuf;
class DrawMultiBuf;
} // namespace command
using PassSimple = detail::Pass<command::DrawCommandBuf>;
using PassMain = detail::Pass<command::DrawMultiBuf>;
class PassSortable;
class Manager {
using ObjectMatricesBuf = StorageArrayBuffer<ObjectMatrices, 128>;
using ObjectBoundsBuf = StorageArrayBuffer<ObjectBounds, 128>;
using ObjectInfosBuf = StorageArrayBuffer<ObjectInfos, 128>;
public:
struct SubmitDebugOutput {
/** Indexed by resource id. */
Span<uint32_t> visibility;
/** Indexed by drawn instance. */
Span<uint32_t> resource_id;
};
struct DataDebugOutput {
/** Indexed by resource id. */
Span<ObjectMatrices> matrices;
/** Indexed by resource id. */
Span<ObjectBounds> bounds;
/** Indexed by resource id. */
Span<ObjectInfos> infos;
};
/**
* Buffers containing all object data. Referenced by resource index.
* Exposed as public members for shader access after sync.
*/
ObjectMatricesBuf matrix_buf;
ObjectBoundsBuf bounds_buf;
ObjectInfosBuf infos_buf;
/** List of textures coming from Image data-blocks. They need to be refcounted in order to avoid
* beeing freed in another thread. */
Vector<GPUTexture *> acquired_textures;
private:
uint resource_len_ = 0;
Object *object = nullptr;
Object *object_active = nullptr;
public:
Manager(){};
~Manager();
/**
* Create a new resource handle for the given object. Can be called multiple time with the
* same object **successively** without duplicating the data.
*/
ResourceHandle resource_handle(const ObjectRef ref);
/**
* Get resource id for a loose matrix. The draw-calls for this resource handle won't be culled
* and there won't be any associated object info / bounds. Assumes correct handedness / winding.
*/
ResourceHandle resource_handle(const float4x4 &model_matrix);
/**
* Get resource id for a loose matrix with bounds. The draw-calls for this resource handle will
* be culled bute there won't be any associated object info / bounds. Assumes correct handedness
* / winding.
*/
ResourceHandle resource_handle(const float4x4 &model_matrix,
const float3 &bounds_center,
const float3 &bounds_half_extent);
/**
* Populate additional per resource data on demand.
*/
void extract_object_attributes(ResourceHandle handle,
Object &object,
Span<GPUMaterial *> materials);
/**
* Submit a pass for drawing. All resource reference will be dereferenced and commands will be
* sent to GPU.
*/
void submit(PassSimple &pass, View &view);
void submit(PassMain &pass, View &view);
void submit(PassSortable &pass, View &view);
/**
* Variant without any view. Must not contain any shader using `draw_view` create info.
*/
void submit(PassSimple &pass);
/**
* Submit a pass for drawing but read back all data buffers for inspection.
*/
SubmitDebugOutput submit_debug(PassSimple &pass, View &view);
SubmitDebugOutput submit_debug(PassMain &pass, View &view);
/**
* Check data buffers of the draw manager. Only to be used after end_sync().
*/
DataDebugOutput data_debug();
/**
* Will acquire the texture using ref counting and release it after drawing. To be used for
* texture coming from blender Image.
*/
void acquire_texture(GPUTexture *texture)
{
GPU_texture_ref(texture);
acquired_textures.append(texture);
}
/** TODO(fclem): The following should become private at some point. */
void begin_sync();
void end_sync();
void debug_bind();
};
inline ResourceHandle Manager::resource_handle(const ObjectRef ref)
{
bool is_active_object = (ref.dupli_object ? ref.dupli_parent : ref.object) == object_active;
matrix_buf.get_or_resize(resource_len_).sync(*ref.object);
bounds_buf.get_or_resize(resource_len_).sync(*ref.object);
infos_buf.get_or_resize(resource_len_).sync(ref, is_active_object);
return ResourceHandle(resource_len_++, (ref.object->transflag & OB_NEG_SCALE) != 0);
}
inline ResourceHandle Manager::resource_handle(const float4x4 &model_matrix)
{
matrix_buf.get_or_resize(resource_len_).sync(model_matrix);
bounds_buf.get_or_resize(resource_len_).sync();
infos_buf.get_or_resize(resource_len_).sync();
return ResourceHandle(resource_len_++, false);
}
inline ResourceHandle Manager::resource_handle(const float4x4 &model_matrix,
const float3 &bounds_center,
const float3 &bounds_half_extent)
{
matrix_buf.get_or_resize(resource_len_).sync(model_matrix);
bounds_buf.get_or_resize(resource_len_).sync(bounds_center, bounds_half_extent);
infos_buf.get_or_resize(resource_len_).sync();
return ResourceHandle(resource_len_++, false);
}
inline void Manager::extract_object_attributes(ResourceHandle handle,
Object &object,
Span<GPUMaterial *> materials)
{
/* TODO */
(void)handle;
(void)object;
(void)materials;
}
} // namespace blender::draw
/* TODO(@fclem): This is for testing. The manager should be passed to the engine through the
* callbacks. */
blender::draw::Manager *DRW_manager_get();
blender::draw::ObjectRef DRW_object_ref_get(Object *object);

View File

@@ -891,7 +891,7 @@ static void draw_call_indirect(DRWShadingGroup *shgroup,
}
GPU_batch_set_shader(batch, shgroup->shader);
GPU_batch_draw_indirect(batch, indirect_buf);
GPU_batch_draw_indirect(batch, indirect_buf, 0);
}
static void draw_call_batching_start(DRWCommandsState *state)

View File

@@ -0,0 +1,18 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2022 Blender Foundation. */
/** \file
* \ingroup draw
*/
#include "draw_pass.hh"
namespace blender::draw {
/* -------------------------------------------------------------------- */
/** \name Pass Submission
* \{ */
/** \} */
} // namespace blender::draw

View File

@@ -0,0 +1,996 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2022 Blender Foundation. */
#pragma once
/** \file
* \ingroup draw
*
* Passes record draw commands. There exists different pass types for different purpose but they
* only change in resource load (memory & CPU usage). They can be swapped without any functional
* change.
*
* `PassMain`:
* Should be used on heavy load passes such as ones that may contain scene objects. Draw call
* submission is optimized for large number of draw calls. But has a significant overhead per
* #Pass. Use many #PassSub along with a main #Pass to reduce the overhead and allow groupings of
* commands.
*
* `Pass(Main|Simple)::Sub`:
* A lightweight #Pass that lives inside a main #Pass. It can only be created from #Pass.sub()
* and is auto managed. This mean it can be created, filled and thrown away. A #PassSub reference
* is valid until the next #Pass.init() of the parent pass. Commands recorded inside a #PassSub are
* inserted inside the parent #Pass where the sub have been created durring submission.
*
* `PassSimple`:
* Does not have the overhead of #PassMain but does not have the culling and batching optimization.
*
* NOTE: A pass can be recorded once and resubmitted any number of time. This can be a good
* optimization for passes that are always the same for each frame. The only thing to be aware of
* is the life time of external resources. If a pass contains draw-calls with non default
* ResourceHandle (not 0) or a reference to any non static resources (GPUBatch, PushConstant ref,
* ResourceBind ref) it will have to be re-recorded if any of these reference becomes invalid.
*/
#include "BKE_image.h"
#include "BLI_vector.hh"
#include "DRW_gpu_wrapper.hh"
#include "GPU_debug.h"
#include "GPU_material.h"
#include "draw_command.hh"
#include "draw_handle.hh"
#include "draw_manager.hh"
#include "draw_pass.hh"
#include "draw_shader_shared.h"
#include "draw_state.h"
#include "intern/gpu_codegen.h"
namespace blender::draw {
using namespace blender::draw;
using namespace blender::draw::command;
class Manager;
/* -------------------------------------------------------------------- */
/** \name Pass API
* \{ */
namespace detail {
/**
* Special container that never moves allocated items and has fast indexing.
*/
template<typename T,
/** Numbers of element of type T to allocate together. */
int64_t block_size = 16>
class SubPassVector {
private:
Vector<std::unique_ptr<Vector<T, block_size>>, 0> blocks_;
public:
void clear()
{
blocks_.clear();
}
int64_t append_and_get_index(T &&elem)
{
/* Do not go over the inline size so that existing members never move. */
if (blocks_.is_empty() || blocks_.last()->size() == block_size) {
blocks_.append(std::make_unique<Vector<T, block_size>>());
}
return blocks_.last()->append_and_get_index(elem) + (blocks_.size() - 1) * block_size;
}
T &operator[](int64_t index)
{
return (*blocks_[index / block_size])[index % block_size];
}
const T &operator[](int64_t index) const
{
return (*blocks_[index / block_size])[index % block_size];
}
};
/**
* Public API of a draw pass.
*/
template<
/** Type of command buffer used to create the draw calls. */
typename DrawCommandBufType>
class PassBase {
friend Manager;
/** Will use texture own sampler state. */
static constexpr eGPUSamplerState sampler_auto = GPU_SAMPLER_MAX;
protected:
/** Highest level of the command stream. Split command stream in different command types. */
Vector<command::Header, 0> headers_;
/** Commands referenced by headers (which contains their types). */
Vector<command::Undetermined, 0> commands_;
/* Reference to draw commands buffer. Either own or from parent pass. */
DrawCommandBufType &draw_commands_buf_;
/* Reference to sub-pass commands buffer. Either own or from parent pass. */
SubPassVector<PassBase<DrawCommandBufType>> &sub_passes_;
/** Currently bound shader. Used for interface queries. */
GPUShader *shader_;
public:
const char *debug_name;
PassBase(const char *name,
DrawCommandBufType &draw_command_buf,
SubPassVector<PassBase<DrawCommandBufType>> &sub_passes,
GPUShader *shader = nullptr)
: draw_commands_buf_(draw_command_buf),
sub_passes_(sub_passes),
shader_(shader),
debug_name(name){};
/**
* Reset the pass command pool.
* NOTE: Implemented in derived class. Not a virtual function to avoid indirection. Here only for
* API readability listing.
*/
void init();
/**
* Create a sub-pass inside this pass.
*/
PassBase<DrawCommandBufType> &sub(const char *name);
/**
* Changes the fixed function pipeline state.
* Starts as DRW_STATE_NO_DRAW at the start of a Pass submission.
* SubPass inherit previous pass state.
*
* IMPORTANT: This does not set the stencil mask/reference values. Add a call to state_stencil()
* to ensure correct behavior of stencil aware draws.
*/
void state_set(DRWState state);
/**
* Clear the current frame-buffer.
*/
void clear_color(float4 color);
void clear_depth(float depth);
void clear_stencil(uint8_t stencil);
void clear_depth_stencil(float depth, uint8_t stencil);
void clear_color_depth_stencil(float4 color, float depth, uint8_t stencil);
/**
* Reminders:
* - (compare_mask & reference) is what is tested against (compare_mask & stencil_value)
* stencil_value being the value stored in the stencil buffer.
* - (write-mask & reference) is what gets written if the test condition is fulfilled.
*/
void state_stencil(uint8_t write_mask, uint8_t reference, uint8_t compare_mask);
/**
* Bind a shader. Any following bind() or push_constant() call will use its interface.
*/
void shader_set(GPUShader *shader);
/**
* Bind a material shader along with its associated resources. Any following bind() or
* push_constant() call will use its interface.
* IMPORTANT: Assumes material is compiled and can be used (no compilation error).
*/
void material_set(Manager &manager, GPUMaterial *material);
/**
* Record a draw call.
* NOTE: Setting the count or first to -1 will use the values from the batch.
* NOTE: An instance or vertex count of 0 will discard the draw call. It will not be recorded.
*/
void draw(GPUBatch *batch,
uint instance_len = -1,
uint vertex_len = -1,
uint vertex_first = -1,
ResourceHandle handle = {0});
/**
* Shorter version for the common case.
* NOTE: Implemented in derived class. Not a virtual function to avoid indirection.
*/
void draw(GPUBatch *batch, ResourceHandle handle);
/**
* Record a procedural draw call. Geometry is **NOT** source from a GPUBatch.
* NOTE: An instance or vertex count of 0 will discard the draw call. It will not be recorded.
*/
void draw_procedural(GPUPrimType primitive,
uint instance_len,
uint vertex_len,
uint vertex_first = -1,
ResourceHandle handle = {0});
/**
* Indirect variants.
* NOTE: If needed, the resource id need to also be set accordingly in the DrawCommand.
*/
void draw_indirect(GPUBatch *batch,
StorageBuffer<DrawCommand, true> &indirect_buffer,
ResourceHandle handle = {0});
void draw_procedural_indirect(GPUPrimType primitive,
StorageBuffer<DrawCommand, true> &indirect_buffer,
ResourceHandle handle = {0});
/**
* Record a compute dispatch call.
*/
void dispatch(int3 group_len);
void dispatch(int3 *group_len);
void dispatch(StorageBuffer<DispatchCommand> &indirect_buffer);
/**
* Record a barrier call to synchronize arbitrary load/store operation between draw calls.
*/
void barrier(eGPUBarrier type);
/**
* Bind a shader resource.
*
* Reference versions are to be used when the resource might be resize / realloc or even change
* between the time it is referenced and the time it is dereferenced for drawing.
*
* IMPORTANT: Will keep a reference to the data and dereference it upon drawing. Make sure data
* still alive until pass submission.
*
* NOTE: Variations using slot will not query a shader interface and can be used before
* binding a shader.
*/
void bind_image(const char *name, GPUTexture *image);
void bind_image(const char *name, GPUTexture **image);
void bind_image(int slot, GPUTexture *image);
void bind_image(int slot, GPUTexture **image);
void bind_texture(const char *name, GPUTexture *texture, eGPUSamplerState state = sampler_auto);
void bind_texture(const char *name, GPUTexture **texture, eGPUSamplerState state = sampler_auto);
void bind_texture(int slot, GPUTexture *texture, eGPUSamplerState state = sampler_auto);
void bind_texture(int slot, GPUTexture **texture, eGPUSamplerState state = sampler_auto);
void bind_ssbo(const char *name, GPUStorageBuf *buffer);
void bind_ssbo(const char *name, GPUStorageBuf **buffer);
void bind_ssbo(int slot, GPUStorageBuf *buffer);
void bind_ssbo(int slot, GPUStorageBuf **buffer);
void bind_ubo(const char *name, GPUUniformBuf *buffer);
void bind_ubo(const char *name, GPUUniformBuf **buffer);
void bind_ubo(int slot, GPUUniformBuf *buffer);
void bind_ubo(int slot, GPUUniformBuf **buffer);
/**
* Update a shader constant.
*
* Reference versions are to be used when the resource might change between the time it is
* referenced and the time it is dereferenced for drawing.
*
* IMPORTANT: Will keep a reference to the data and dereference it upon drawing. Make sure data
* still alive until pass submission.
*
* NOTE: bool reference version is expected to take bool1 reference which is aliased to int.
*/
void push_constant(const char *name, const float &data);
void push_constant(const char *name, const float2 &data);
void push_constant(const char *name, const float3 &data);
void push_constant(const char *name, const float4 &data);
void push_constant(const char *name, const int &data);
void push_constant(const char *name, const int2 &data);
void push_constant(const char *name, const int3 &data);
void push_constant(const char *name, const int4 &data);
void push_constant(const char *name, const bool &data);
void push_constant(const char *name, const float4x4 &data);
void push_constant(const char *name, const float *data, int array_len = 1);
void push_constant(const char *name, const float2 *data, int array_len = 1);
void push_constant(const char *name, const float3 *data, int array_len = 1);
void push_constant(const char *name, const float4 *data, int array_len = 1);
void push_constant(const char *name, const int *data, int array_len = 1);
void push_constant(const char *name, const int2 *data, int array_len = 1);
void push_constant(const char *name, const int3 *data, int array_len = 1);
void push_constant(const char *name, const int4 *data, int array_len = 1);
void push_constant(const char *name, const float4x4 *data);
/**
* Turn the pass into a string for inspection.
*/
std::string serialize(std::string line_prefix = "") const;
friend std::ostream &operator<<(std::ostream &stream, const PassBase &pass)
{
return stream << pass.serialize();
}
protected:
/**
* Internal Helpers
*/
int push_constant_offset(const char *name);
void clear(eGPUFrameBufferBits planes, float4 color, float depth, uint8_t stencil);
GPUBatch *procedural_batch_get(GPUPrimType primitive);
/**
* Return a new command recorded with the given type.
*/
command::Undetermined &create_command(command::Type type);
void submit(command::RecordingState &state) const;
};
template<typename DrawCommandBufType> class Pass : public detail::PassBase<DrawCommandBufType> {
public:
using Sub = detail::PassBase<DrawCommandBufType>;
private:
/** Sub-passes referenced by headers. */
SubPassVector<detail::PassBase<DrawCommandBufType>> sub_passes_main_;
/** Draws are recorded as indirect draws for compatibility with the multi-draw pipeline. */
DrawCommandBufType draw_commands_buf_main_;
public:
Pass(const char *name)
: detail::PassBase<DrawCommandBufType>(name, draw_commands_buf_main_, sub_passes_main_){};
void init()
{
this->headers_.clear();
this->commands_.clear();
this->sub_passes_.clear();
this->draw_commands_buf_.clear();
}
}; // namespace blender::draw
} // namespace detail
/** \} */
/* -------------------------------------------------------------------- */
/** \name Pass types
* \{ */
/**
* Normal pass type. No visibility or draw-call optimisation.
*/
// using PassSimple = detail::Pass<DrawCommandBuf>;
/**
* Main pass type.
* Optimized for many draw calls and sub-pass.
*
* IMPORTANT: To be used only for passes containing lots of draw calls since it has a potentially
* high overhead due to batching and culling optimizations.
*/
// using PassMain = detail::Pass<DrawMultiBuf>;
/**
* Special pass type for rendering transparent objects.
* The base level can only be composed of sub passes that will be ordered by a special value.
*/
class PassSortable : public PassMain {
friend Manager;
private:
/** Sorting value associated with each sub pass. */
Vector<float> sorting_values_;
bool sorted_ = false;
public:
PassSortable(const char *name_) : PassMain(name_){};
void init()
{
sorting_values_.clear();
sorted_ = false;
PassMain::init();
}
PassMain::Sub &sub(const char *name, float sorting_value)
{
int64_t index = sub_passes_.append_and_get_index(
PassBase(name, draw_commands_buf_, sub_passes_, shader_));
headers_.append({Type::SubPass, static_cast<uint>(index)});
sorting_values_.append(sorting_value);
return sub_passes_[index];
}
std::string serialize(std::string line_prefix = "") const
{
if (sorted_ == false) {
const_cast<PassSortable *>(this)->sort();
}
return PassMain::serialize(line_prefix);
}
protected:
void sort()
{
if (sorted_ == false) {
std::sort(headers_.begin(), headers_.end(), [&](Header &a, Header &b) {
BLI_assert(a.type == Type::SubPass && b.type == Type::SubPass);
float a_val = sorting_values_[a.index];
float b_val = sorting_values_[b.index];
return a_val < b_val || (a_val == b_val && a.index < b.index);
});
sorted_ = true;
}
}
};
/** \} */
namespace detail {
/* -------------------------------------------------------------------- */
/** \name PassBase Implementation
* \{ */
template<class T> inline command::Undetermined &PassBase<T>::create_command(command::Type type)
{
int64_t index = commands_.append_and_get_index({});
headers_.append({type, static_cast<uint>(index)});
return commands_[index];
}
template<class T>
inline void PassBase<T>::clear(eGPUFrameBufferBits planes,
float4 color,
float depth,
uint8_t stencil)
{
create_command(command::Type::Clear).clear = {(uint8_t)planes, stencil, depth, color};
}
template<class T> inline GPUBatch *PassBase<T>::procedural_batch_get(GPUPrimType primitive)
{
switch (primitive) {
case GPU_PRIM_POINTS:
return drw_cache_procedural_points_get();
case GPU_PRIM_LINES:
return drw_cache_procedural_lines_get();
case GPU_PRIM_TRIS:
return drw_cache_procedural_triangles_get();
case GPU_PRIM_TRI_STRIP:
return drw_cache_procedural_triangle_strips_get();
default:
/* Add new one as needed. */
BLI_assert_unreachable();
return nullptr;
}
}
template<class T> inline PassBase<T> &PassBase<T>::sub(const char *name)
{
int64_t index = sub_passes_.append_and_get_index(
PassBase(name, draw_commands_buf_, sub_passes_, shader_));
headers_.append({command::Type::SubPass, static_cast<uint>(index)});
return sub_passes_[index];
}
template<class T> void PassBase<T>::submit(command::RecordingState &state) const
{
GPU_debug_group_begin(debug_name);
for (const command::Header &header : headers_) {
switch (header.type) {
default:
case Type::None:
break;
case Type::SubPass:
sub_passes_[header.index].submit(state);
break;
case command::Type::ShaderBind:
commands_[header.index].shader_bind.execute(state);
break;
case command::Type::ResourceBind:
commands_[header.index].resource_bind.execute();
break;
case command::Type::PushConstant:
commands_[header.index].push_constant.execute(state);
break;
case command::Type::Draw:
commands_[header.index].draw.execute(state);
break;
case command::Type::DrawMulti:
commands_[header.index].draw_multi.execute(state);
break;
case command::Type::DrawIndirect:
commands_[header.index].draw_indirect.execute(state);
break;
case command::Type::Dispatch:
commands_[header.index].dispatch.execute(state);
break;
case command::Type::DispatchIndirect:
commands_[header.index].dispatch_indirect.execute(state);
break;
case command::Type::Barrier:
commands_[header.index].barrier.execute();
break;
case command::Type::Clear:
commands_[header.index].clear.execute();
break;
case command::Type::StateSet:
commands_[header.index].state_set.execute(state);
break;
case command::Type::StencilSet:
commands_[header.index].stencil_set.execute();
break;
}
}
GPU_debug_group_end();
}
template<class T> std::string PassBase<T>::serialize(std::string line_prefix) const
{
std::stringstream ss;
ss << line_prefix << "." << debug_name << std::endl;
line_prefix += " ";
for (const command::Header &header : headers_) {
switch (header.type) {
default:
case Type::None:
break;
case Type::SubPass:
ss << sub_passes_[header.index].serialize(line_prefix);
break;
case Type::ShaderBind:
ss << line_prefix << commands_[header.index].shader_bind.serialize() << std::endl;
break;
case Type::ResourceBind:
ss << line_prefix << commands_[header.index].resource_bind.serialize() << std::endl;
break;
case Type::PushConstant:
ss << line_prefix << commands_[header.index].push_constant.serialize() << std::endl;
break;
case Type::Draw:
ss << line_prefix << commands_[header.index].draw.serialize() << std::endl;
break;
case Type::DrawMulti:
ss << commands_[header.index].draw_multi.serialize(line_prefix);
break;
case Type::DrawIndirect:
ss << line_prefix << commands_[header.index].draw_indirect.serialize() << std::endl;
break;
case Type::Dispatch:
ss << line_prefix << commands_[header.index].dispatch.serialize() << std::endl;
break;
case Type::DispatchIndirect:
ss << line_prefix << commands_[header.index].dispatch_indirect.serialize() << std::endl;
break;
case Type::Barrier:
ss << line_prefix << commands_[header.index].barrier.serialize() << std::endl;
break;
case Type::Clear:
ss << line_prefix << commands_[header.index].clear.serialize() << std::endl;
break;
case Type::StateSet:
ss << line_prefix << commands_[header.index].state_set.serialize() << std::endl;
break;
case Type::StencilSet:
ss << line_prefix << commands_[header.index].stencil_set.serialize() << std::endl;
break;
}
}
return ss.str();
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Draw calls
* \{ */
template<class T>
inline void PassBase<T>::draw(
GPUBatch *batch, uint instance_len, uint vertex_len, uint vertex_first, ResourceHandle handle)
{
if (instance_len == 0 || vertex_len == 0) {
return;
}
BLI_assert(shader_);
draw_commands_buf_.append_draw(
headers_, commands_, batch, instance_len, vertex_len, vertex_first, handle);
}
template<class T> inline void PassBase<T>::draw(GPUBatch *batch, ResourceHandle handle)
{
draw(batch, -1, -1, -1, handle);
}
template<class T>
inline void PassBase<T>::draw_procedural(GPUPrimType primitive,
uint instance_len,
uint vertex_len,
uint vertex_first,
ResourceHandle handle)
{
draw(procedural_batch_get(primitive), instance_len, vertex_len, vertex_first, handle);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Indirect draw calls
* \{ */
template<class T>
inline void PassBase<T>::draw_indirect(GPUBatch *batch,
StorageBuffer<DrawCommand, true> &indirect_buffer,
ResourceHandle handle)
{
BLI_assert(shader_);
create_command(Type::DrawIndirect).draw_indirect = {batch, &indirect_buffer, handle};
}
template<class T>
inline void PassBase<T>::draw_procedural_indirect(
GPUPrimType primitive,
StorageBuffer<DrawCommand, true> &indirect_buffer,
ResourceHandle handle)
{
draw_indirect(procedural_batch_get(primitive), indirect_buffer, handle);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Compute Dispatch Implementation
* \{ */
template<class T> inline void PassBase<T>::dispatch(int3 group_len)
{
BLI_assert(shader_);
create_command(Type::Dispatch).dispatch = {group_len};
}
template<class T> inline void PassBase<T>::dispatch(int3 *group_len)
{
BLI_assert(shader_);
create_command(Type::Dispatch).dispatch = {group_len};
}
template<class T>
inline void PassBase<T>::dispatch(StorageBuffer<DispatchCommand> &indirect_buffer)
{
BLI_assert(shader_);
create_command(Type::DispatchIndirect).dispatch_indirect = {&indirect_buffer};
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Clear Implementation
* \{ */
template<class T> inline void PassBase<T>::clear_color(float4 color)
{
clear(GPU_COLOR_BIT, color, 0.0f, 0);
}
template<class T> inline void PassBase<T>::clear_depth(float depth)
{
clear(GPU_DEPTH_BIT, float4(0.0f), depth, 0);
}
template<class T> inline void PassBase<T>::clear_stencil(uint8_t stencil)
{
clear(GPU_STENCIL_BIT, float4(0.0f), 0.0f, stencil);
}
template<class T> inline void PassBase<T>::clear_depth_stencil(float depth, uint8_t stencil)
{
clear(GPU_DEPTH_BIT | GPU_STENCIL_BIT, float4(0.0f), depth, stencil);
}
template<class T>
inline void PassBase<T>::clear_color_depth_stencil(float4 color, float depth, uint8_t stencil)
{
clear(GPU_DEPTH_BIT | GPU_STENCIL_BIT | GPU_COLOR_BIT, color, depth, stencil);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Barrier Implementation
* \{ */
template<class T> inline void PassBase<T>::barrier(eGPUBarrier type)
{
create_command(Type::Barrier).barrier = {type};
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name State Implementation
* \{ */
template<class T> inline void PassBase<T>::state_set(DRWState state)
{
create_command(Type::StateSet).state_set = {state};
}
template<class T>
inline void PassBase<T>::state_stencil(uint8_t write_mask, uint8_t reference, uint8_t compare_mask)
{
create_command(Type::StencilSet).stencil_set = {write_mask, reference, compare_mask};
}
template<class T> inline void PassBase<T>::shader_set(GPUShader *shader)
{
shader_ = shader;
create_command(Type::ShaderBind).shader_bind = {shader};
}
template<class T> inline void PassBase<T>::material_set(Manager &manager, GPUMaterial *material)
{
GPUPass *gpupass = GPU_material_get_pass(material);
shader_set(GPU_pass_shader_get(gpupass));
/* Bind all textures needed by the material. */
ListBase textures = GPU_material_textures(material);
for (GPUMaterialTexture *tex : ListBaseWrapper<GPUMaterialTexture>(textures)) {
if (tex->ima) {
/* Image */
ImageUser *iuser = tex->iuser_available ? &tex->iuser : nullptr;
if (tex->tiled_mapping_name[0]) {
GPUTexture *tiles = BKE_image_get_gpu_tiles(tex->ima, iuser, nullptr);
manager.acquire_texture(tiles);
bind_texture(tex->sampler_name, tiles, (eGPUSamplerState)tex->sampler_state);
GPUTexture *tile_map = BKE_image_get_gpu_tilemap(tex->ima, iuser, nullptr);
manager.acquire_texture(tile_map);
bind_texture(tex->tiled_mapping_name, tile_map, (eGPUSamplerState)tex->sampler_state);
}
else {
GPUTexture *texture = BKE_image_get_gpu_texture(tex->ima, iuser, nullptr);
manager.acquire_texture(texture);
bind_texture(tex->sampler_name, texture, (eGPUSamplerState)tex->sampler_state);
}
}
else if (tex->colorband) {
/* Color Ramp */
bind_texture(tex->sampler_name, *tex->colorband);
}
}
GPUUniformBuf *ubo = GPU_material_uniform_buffer_get(material);
if (ubo != nullptr) {
bind_ubo(GPU_UBO_BLOCK_NAME, ubo);
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Resource bind Implementation
* \{ */
template<class T> inline int PassBase<T>::push_constant_offset(const char *name)
{
return GPU_shader_get_uniform(shader_, name);
}
template<class T> inline void PassBase<T>::bind_ssbo(const char *name, GPUStorageBuf *buffer)
{
bind_ssbo(GPU_shader_get_ssbo(shader_, name), buffer);
}
template<class T> inline void PassBase<T>::bind_ubo(const char *name, GPUUniformBuf *buffer)
{
bind_ubo(GPU_shader_get_uniform_block_binding(shader_, name), buffer);
}
template<class T>
inline void PassBase<T>::bind_texture(const char *name,
GPUTexture *texture,
eGPUSamplerState state)
{
bind_texture(GPU_shader_get_texture_binding(shader_, name), texture, state);
}
template<class T> inline void PassBase<T>::bind_image(const char *name, GPUTexture *image)
{
bind_texture(GPU_shader_get_texture_binding(shader_, name), image);
}
template<class T> inline void PassBase<T>::bind_ssbo(int slot, GPUStorageBuf *buffer)
{
create_command(Type::ResourceBind).resource_bind = {slot, buffer};
}
template<class T> inline void PassBase<T>::bind_ubo(int slot, GPUUniformBuf *buffer)
{
create_command(Type::ResourceBind).resource_bind = {slot, buffer};
}
template<class T>
inline void PassBase<T>::bind_texture(int slot, GPUTexture *texture, eGPUSamplerState state)
{
create_command(Type::ResourceBind).resource_bind = {slot, texture, state};
}
template<class T> inline void PassBase<T>::bind_image(int slot, GPUTexture *image)
{
create_command(Type::ResourceBind).resource_bind = {slot, as_image(image)};
}
template<class T> inline void PassBase<T>::bind_ssbo(const char *name, GPUStorageBuf **buffer)
{
bind_ssbo(GPU_shader_get_ssbo(shader_, name), buffer);
}
template<class T> inline void PassBase<T>::bind_ubo(const char *name, GPUUniformBuf **buffer)
{
bind_ubo(GPU_shader_get_uniform_block_binding(shader_, name), buffer);
}
template<class T>
inline void PassBase<T>::bind_texture(const char *name,
GPUTexture **texture,
eGPUSamplerState state)
{
bind_texture(GPU_shader_get_texture_binding(shader_, name), texture, state);
}
template<class T> inline void PassBase<T>::bind_image(const char *name, GPUTexture **image)
{
bind_image(GPU_shader_get_texture_binding(shader_, name), image);
}
template<class T> inline void PassBase<T>::bind_ssbo(int slot, GPUStorageBuf **buffer)
{
create_command(Type::ResourceBind).resource_bind = {slot, buffer};
}
template<class T> inline void PassBase<T>::bind_ubo(int slot, GPUUniformBuf **buffer)
{
create_command(Type::ResourceBind).resource_bind = {slot, buffer};
}
template<class T>
inline void PassBase<T>::bind_texture(int slot, GPUTexture **texture, eGPUSamplerState state)
{
create_command(Type::ResourceBind).resource_bind = {slot, texture, state};
}
template<class T> inline void PassBase<T>::bind_image(int slot, GPUTexture **image)
{
create_command(Type::ResourceBind).resource_bind = {slot, as_image(image)};
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Push Constant Implementation
* \{ */
template<class T> inline void PassBase<T>::push_constant(const char *name, const float &data)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
}
template<class T> inline void PassBase<T>::push_constant(const char *name, const float2 &data)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
}
template<class T> inline void PassBase<T>::push_constant(const char *name, const float3 &data)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
}
template<class T> inline void PassBase<T>::push_constant(const char *name, const float4 &data)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
}
template<class T> inline void PassBase<T>::push_constant(const char *name, const int &data)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
}
template<class T> inline void PassBase<T>::push_constant(const char *name, const int2 &data)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
}
template<class T> inline void PassBase<T>::push_constant(const char *name, const int3 &data)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
}
template<class T> inline void PassBase<T>::push_constant(const char *name, const int4 &data)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
}
template<class T> inline void PassBase<T>::push_constant(const char *name, const bool &data)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
}
template<class T>
inline void PassBase<T>::push_constant(const char *name, const float *data, int array_len)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
}
template<class T>
inline void PassBase<T>::push_constant(const char *name, const float2 *data, int array_len)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
}
template<class T>
inline void PassBase<T>::push_constant(const char *name, const float3 *data, int array_len)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
}
template<class T>
inline void PassBase<T>::push_constant(const char *name, const float4 *data, int array_len)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
}
template<class T>
inline void PassBase<T>::push_constant(const char *name, const int *data, int array_len)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
}
template<class T>
inline void PassBase<T>::push_constant(const char *name, const int2 *data, int array_len)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
}
template<class T>
inline void PassBase<T>::push_constant(const char *name, const int3 *data, int array_len)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
}
template<class T>
inline void PassBase<T>::push_constant(const char *name, const int4 *data, int array_len)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
}
template<class T> inline void PassBase<T>::push_constant(const char *name, const float4x4 *data)
{
create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
}
template<class T> inline void PassBase<T>::push_constant(const char *name, const float4x4 &data)
{
/* WORKAROUND: Push 3 consecutive commands to hold the 64 bytes of the float4x4.
* This assumes that all commands are always stored in flat array of memory. */
Undetermined commands[3];
PushConstant &cmd = commands[0].push_constant;
cmd.location = push_constant_offset(name);
cmd.array_len = 1;
cmd.comp_len = 16;
cmd.type = PushConstant::Type::FloatValue;
/* Copy overrides the next 2 commands. We append them as Type::None to not evaluate them. */
*reinterpret_cast<float4x4 *>(&cmd.float4_value) = data;
create_command(Type::PushConstant) = commands[0];
create_command(Type::None) = commands[1];
create_command(Type::None) = commands[2];
}
/** \} */
} // namespace detail
} // namespace blender::draw

View File

@@ -0,0 +1,199 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2022 Blender Foundation. */
#pragma once
/** \file
* \ingroup draw
*
* Component / Object level resources like object attributes, matrices, visibility etc...
* Each of them are reference by resource index (#ResourceHandle).
*/
#include "BKE_curve.h"
#include "BKE_duplilist.h"
#include "BKE_mesh.h"
#include "BKE_object.h"
#include "BKE_volume.h"
#include "BLI_hash.h"
#include "DNA_curve_types.h"
#include "DNA_layer_types.h"
#include "DNA_meta_types.h"
#include "DNA_object_types.h"
#include "draw_handle.hh"
#include "draw_manager.hh"
#include "draw_shader_shared.h"
/* -------------------------------------------------------------------- */
/** \name ObjectMatrices
* \{ */
inline void ObjectMatrices::sync(const Object &object)
{
model = object.obmat;
model_inverse = object.imat;
}
inline void ObjectMatrices::sync(const float4x4 &model_matrix)
{
model = model_matrix;
model_inverse = model_matrix.inverted();
}
inline std::ostream &operator<<(std::ostream &stream, const ObjectMatrices &matrices)
{
stream << "ObjectMatrices(" << std::endl;
stream << "model=" << matrices.model << ", " << std::endl;
stream << "model_inverse=" << matrices.model_inverse << ")" << std::endl;
return stream;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name ObjectInfos
* \{ */
ENUM_OPERATORS(eObjectInfoFlag, OBJECT_NEGATIVE_SCALE)
inline void ObjectInfos::sync()
{
flag = eObjectInfoFlag::OBJECT_NO_INFO;
}
inline void ObjectInfos::sync(const blender::draw::ObjectRef ref, bool is_active_object)
{
color = ref.object->color;
index = ref.object->index;
SET_FLAG_FROM_TEST(flag, is_active_object, eObjectInfoFlag::OBJECT_ACTIVE);
SET_FLAG_FROM_TEST(
flag, ref.object->base_flag & BASE_SELECTED, eObjectInfoFlag::OBJECT_SELECTED);
SET_FLAG_FROM_TEST(
flag, ref.object->base_flag & BASE_FROM_DUPLI, eObjectInfoFlag::OBJECT_FROM_DUPLI);
SET_FLAG_FROM_TEST(
flag, ref.object->base_flag & BASE_FROM_SET, eObjectInfoFlag::OBJECT_FROM_SET);
SET_FLAG_FROM_TEST(
flag, ref.object->transflag & OB_NEG_SCALE, eObjectInfoFlag::OBJECT_NEGATIVE_SCALE);
if (ref.dupli_object == nullptr) {
/* TODO(fclem): this is rather costly to do at draw time. Maybe we can
* put it in ob->runtime and make depsgraph ensure it is up to date. */
random = BLI_hash_int_2d(BLI_hash_string(ref.object->id.name + 2), 0) * (1.0f / 0xFFFFFFFF);
}
else {
random = ref.dupli_object->random_id * (1.0f / 0xFFFFFFFF);
}
/* Default values. Set if needed. */
random = 0.0f;
if (ref.object->data == nullptr) {
orco_add = float3(0.0f);
orco_mul = float3(1.0f);
return;
}
switch (GS(reinterpret_cast<ID *>(ref.object->data)->name)) {
case ID_VO: {
BoundBox &bbox = *BKE_volume_boundbox_get(ref.object);
orco_add = (float3(bbox.vec[6]) + float3(bbox.vec[0])) * 0.5f; /* Center. */
orco_mul = float3(bbox.vec[6]) - float3(bbox.vec[0]); /* Size. */
break;
}
case ID_ME: {
BKE_mesh_texspace_get((Mesh *)ref.object->data, orco_add, orco_mul);
break;
}
case ID_CU_LEGACY: {
Curve &cu = *(Curve *)ref.object->data;
BKE_curve_texspace_ensure(&cu);
orco_add = cu.loc;
orco_mul = cu.size;
break;
}
case ID_MB: {
MetaBall &mb = *(MetaBall *)ref.object->data;
orco_add = mb.loc;
orco_mul = mb.size;
break;
}
default:
orco_add = float3(0.0f);
orco_mul = float3(1.0f);
break;
}
}
inline std::ostream &operator<<(std::ostream &stream, const ObjectInfos &infos)
{
stream << "ObjectInfos(";
if (infos.flag == eObjectInfoFlag::OBJECT_NO_INFO) {
stream << "skipped)" << std::endl;
return stream;
}
stream << "orco_add=" << infos.orco_add << ", ";
stream << "orco_mul=" << infos.orco_mul << ", ";
stream << "color=" << infos.color << ", ";
stream << "index=" << infos.index << ", ";
stream << "random=" << infos.random << ", ";
stream << "flag=" << infos.flag << ")" << std::endl;
return stream;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name ObjectBounds
* \{ */
inline void ObjectBounds::sync()
{
bounding_sphere.w = -1.0f; /* Disable test. */
}
inline void ObjectBounds::sync(Object &ob)
{
const BoundBox *bbox = BKE_object_boundbox_get(&ob);
if (bbox == nullptr) {
bounding_sphere.w = -1.0f; /* Disable test. */
return;
}
*reinterpret_cast<float3 *>(&bounding_corners[0]) = bbox->vec[0];
*reinterpret_cast<float3 *>(&bounding_corners[1]) = bbox->vec[4];
*reinterpret_cast<float3 *>(&bounding_corners[2]) = bbox->vec[3];
*reinterpret_cast<float3 *>(&bounding_corners[3]) = bbox->vec[1];
bounding_sphere.w = 0.0f; /* Enable test. */
}
inline void ObjectBounds::sync(const float3 &center, const float3 &size)
{
*reinterpret_cast<float3 *>(&bounding_corners[0]) = center - size;
*reinterpret_cast<float3 *>(&bounding_corners[1]) = center + float3(+size.x, -size.y, -size.z);
*reinterpret_cast<float3 *>(&bounding_corners[2]) = center + float3(-size.x, +size.y, -size.z);
*reinterpret_cast<float3 *>(&bounding_corners[3]) = center + float3(-size.x, -size.y, +size.z);
bounding_sphere.w = 0.0; /* Enable test. */
}
inline std::ostream &operator<<(std::ostream &stream, const ObjectBounds &bounds)
{
stream << "ObjectBounds(";
if (bounds.bounding_sphere.w == -1.0f) {
stream << "skipped)" << std::endl;
return stream;
}
stream << std::endl;
stream << ".bounding_corners[0]"
<< *reinterpret_cast<const float3 *>(&bounds.bounding_corners[0]) << std::endl;
stream << ".bounding_corners[1]"
<< *reinterpret_cast<const float3 *>(&bounds.bounding_corners[1]) << std::endl;
stream << ".bounding_corners[2]"
<< *reinterpret_cast<const float3 *>(&bounds.bounding_corners[2]) << std::endl;
stream << ".bounding_corners[3]"
<< *reinterpret_cast<const float3 *>(&bounds.bounding_corners[3]) << std::endl;
stream << ".sphere=(pos=" << float3(bounds.bounding_sphere)
<< ", rad=" << bounds.bounding_sphere.w << std::endl;
stream << ")" << std::endl;
return stream;
}
/** \} */

View File

@@ -17,15 +17,15 @@
#include "draw_shader.h"
extern "C" char datatoc_common_hair_lib_glsl[];
extern "C" char datatoc_common_hair_refine_vert_glsl[];
extern "C" char datatoc_common_hair_refine_comp_glsl[];
extern "C" char datatoc_gpu_shader_3D_smooth_color_frag_glsl[];
static struct {
struct GPUShader *hair_refine_sh[PART_REFINE_MAX_SHADER];
struct GPUShader *debug_print_display_sh;
struct GPUShader *debug_draw_display_sh;
struct GPUShader *draw_visibility_compute_sh;
struct GPUShader *draw_resource_finalize_sh;
struct GPUShader *draw_command_generate_sh;
} e_data = {{nullptr}};
/* -------------------------------------------------------------------- */
@@ -127,6 +127,31 @@ GPUShader *DRW_shader_debug_draw_display_get()
return e_data.debug_draw_display_sh;
}
GPUShader *DRW_shader_draw_visibility_compute_get()
{
if (e_data.draw_visibility_compute_sh == nullptr) {
e_data.draw_visibility_compute_sh = GPU_shader_create_from_info_name(
"draw_visibility_compute");
}
return e_data.draw_visibility_compute_sh;
}
GPUShader *DRW_shader_draw_resource_finalize_get()
{
if (e_data.draw_resource_finalize_sh == nullptr) {
e_data.draw_resource_finalize_sh = GPU_shader_create_from_info_name("draw_resource_finalize");
}
return e_data.draw_resource_finalize_sh;
}
GPUShader *DRW_shader_draw_command_generate_get()
{
if (e_data.draw_command_generate_sh == nullptr) {
e_data.draw_command_generate_sh = GPU_shader_create_from_info_name("draw_command_generate");
}
return e_data.draw_command_generate_sh;
}
/** \} */
void DRW_shaders_free()
@@ -136,4 +161,7 @@ void DRW_shaders_free()
}
DRW_SHADER_FREE_SAFE(e_data.debug_print_display_sh);
DRW_SHADER_FREE_SAFE(e_data.debug_draw_display_sh);
DRW_SHADER_FREE_SAFE(e_data.draw_visibility_compute_sh);
DRW_SHADER_FREE_SAFE(e_data.draw_resource_finalize_sh);
DRW_SHADER_FREE_SAFE(e_data.draw_command_generate_sh);
}

View File

@@ -32,6 +32,9 @@ struct GPUShader *DRW_shader_curves_refine_get(CurvesEvalShader type,
struct GPUShader *DRW_shader_debug_print_display_get(void);
struct GPUShader *DRW_shader_debug_draw_display_get(void);
struct GPUShader *DRW_shader_draw_visibility_compute_get(void);
struct GPUShader *DRW_shader_draw_resource_finalize_get(void);
struct GPUShader *DRW_shader_draw_command_generate_get(void);
void DRW_shaders_free(void);

View File

@@ -5,18 +5,35 @@
# include "GPU_shader.h"
# include "GPU_shader_shared_utils.h"
# include "draw_defines.h"
typedef struct ViewInfos ViewInfos;
typedef struct ObjectMatrices ObjectMatrices;
typedef struct ObjectInfos ObjectInfos;
typedef struct ObjectBounds ObjectBounds;
typedef struct VolumeInfos VolumeInfos;
typedef struct CurvesInfos CurvesInfos;
typedef struct DrawCommand DrawCommand;
typedef struct DrawCommandIndexed DrawCommandIndexed;
typedef struct DispatchCommand DispatchCommand;
typedef struct DRWDebugPrintBuffer DRWDebugPrintBuffer;
typedef struct DRWDebugVert DRWDebugVert;
typedef struct DRWDebugDrawBuffer DRWDebugDrawBuffer;
# ifdef __cplusplus
/* C++ only forward declarations. */
struct Object;
namespace blender::draw {
struct ObjectRef;
} // namespace blender::draw
# else /* __cplusplus */
/* C only forward declarations. */
typedef enum eObjectInfoFlag eObjectInfoFlag;
# endif
#endif
#define DRW_SHADER_SHARED_H
@@ -48,15 +65,17 @@ struct ViewInfos {
float2 viewport_size_inverse;
/** Frustum culling data. */
/** NOTE: vec3 arrays are padded to vec4. */
float4 frustum_corners[8];
float4 frustum_corners[8]; /** NOTE: vec3 array padded to vec4. */
float4 frustum_planes[6];
float4 frustum_bound_sphere;
/** For debugging purpose */
/* Mouse pixel. */
int2 mouse_pixel;
int2 _pad0;
/** True if facing needs to be inverted. */
bool1 is_inverted;
int _pad0;
};
BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
@@ -74,18 +93,84 @@ BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
# define CameraTexCoFactors drw_view.viewcamtexcofac
#endif
/** \} */
/* -------------------------------------------------------------------- */
/** \name Debug draw shapes
* \{ */
struct ObjectMatrices {
float4x4 drw_modelMatrix;
float4x4 drw_modelMatrixInverse;
float4x4 model;
float4x4 model_inverse;
#if !defined(GPU_SHADER) && defined(__cplusplus)
void sync(const Object &object);
void sync(const float4x4 &model_matrix);
#endif
};
BLI_STATIC_ASSERT_ALIGN(ObjectMatrices, 16)
enum eObjectInfoFlag {
OBJECT_SELECTED = (1u << 0u),
OBJECT_FROM_DUPLI = (1u << 1u),
OBJECT_FROM_SET = (1u << 2u),
OBJECT_ACTIVE = (1u << 3u),
OBJECT_NEGATIVE_SCALE = (1u << 4u),
/* Avoid skipped info to change culling. */
OBJECT_NO_INFO = ~OBJECT_NEGATIVE_SCALE
};
BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
struct ObjectInfos {
float4 drw_OrcoTexCoFactors[2];
float4 drw_ObjectColor;
float4 drw_Infos;
#if defined(GPU_SHADER) && !defined(DRAW_FINALIZE_SHADER)
/* TODO Rename to struct member for glsl too. */
float4 orco_mul_bias[2];
float4 color;
float4 infos;
#else
/** Uploaded as center + size. Converted to mul+bias to local coord. */
float3 orco_add;
float _pad0;
float3 orco_mul;
float _pad1;
float4 color;
uint index;
uint _pad2;
float random;
eObjectInfoFlag flag;
#endif
#if !defined(GPU_SHADER) && defined(__cplusplus)
void sync();
void sync(const blender::draw::ObjectRef ref, bool is_active_object);
#endif
};
BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
BLI_STATIC_ASSERT_ALIGN(ObjectInfos, 16)
struct ObjectBounds {
/**
* Uploaded as vertex (0, 4, 3, 1) of the bbox in local space, matching XYZ axis order.
* Then processed by GPU and stored as (0, 4-0, 3-0, 1-0) in world space for faster culling.
*/
float4 bounding_corners[4];
/** Bounding sphere derived from the bounding corner. Computed on GPU. */
float4 bounding_sphere;
/** Radius of the inscribed sphere derived from the bounding corner. Computed on GPU. */
#define _inner_sphere_radius bounding_corners[3].w
#if !defined(GPU_SHADER) && defined(__cplusplus)
void sync();
void sync(Object &ob);
void sync(const float3 &center, const float3 &size);
#endif
};
BLI_STATIC_ASSERT_ALIGN(ObjectBounds, 16)
/** \} */
/* -------------------------------------------------------------------- */
/** \name Object attributes
* \{ */
struct VolumeInfos {
/* Object to grid-space. */
@@ -107,32 +192,35 @@ struct CurvesInfos {
};
BLI_STATIC_ASSERT_ALIGN(CurvesInfos, 16)
#define OrcoTexCoFactors (drw_infos[resource_id].drw_OrcoTexCoFactors)
#define ObjectInfo (drw_infos[resource_id].drw_Infos)
#define ObjectColor (drw_infos[resource_id].drw_ObjectColor)
/** \} */
/* Indirect commands structures. */
/* -------------------------------------------------------------------- */
/** \name Indirect commands structures.
* \{ */
struct DrawCommand {
uint v_count;
uint i_count;
uint v_first;
uint i_first;
/* TODO(fclem): Rename */
uint vertex_len;
uint instance_len;
uint vertex_first;
#if defined(GPU_SHADER)
uint base_index;
/* NOTE: base_index is i_first for non-indexed draw-calls. */
# define _instance_first_array base_index
#else
union {
uint base_index;
/* Use this instead of instance_first_indexed for non indexed draw calls. */
uint instance_first_array;
};
#endif
uint instance_first_indexed;
uint _pad0, _pad1, _pad2;
};
BLI_STATIC_ASSERT_ALIGN(DrawCommand, 16)
struct DrawCommandIndexed {
uint v_count;
uint i_count;
uint v_first;
uint base_index;
uint i_first;
uint _pad0;
uint _pad1;
uint _pad2;
};
BLI_STATIC_ASSERT_ALIGN(DrawCommandIndexed, 16)
struct DispatchCommand {
uint num_groups_x;
uint num_groups_y;
@@ -141,6 +229,8 @@ struct DispatchCommand {
};
BLI_STATIC_ASSERT_ALIGN(DispatchCommand, 16)
/** \} */
/* -------------------------------------------------------------------- */
/** \name Debug print
* \{ */

View File

@@ -0,0 +1,225 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2022 Blender Foundation. */
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
/** \file
* \ingroup draw
*
* Internal Pipeline State tracking. It is higher level than GPU state as everything fits a single
* enum.
*/
/**
* DRWState is a bit-mask that stores the current render state and the desired render state. Based
* on the differences the minimum state changes can be invoked to setup the desired render state.
*
* The Write Stencil, Stencil test, Depth test and Blend state options are mutual exclusive
* therefore they aren't ordered as a bit mask.
*/
typedef enum {
/** To be used for compute passes. */
DRW_STATE_NO_DRAW = 0,
/** Write mask */
DRW_STATE_WRITE_DEPTH = (1 << 0),
DRW_STATE_WRITE_COLOR = (1 << 1),
/* Write Stencil. These options are mutual exclusive and packed into 2 bits */
DRW_STATE_WRITE_STENCIL = (1 << 2),
DRW_STATE_WRITE_STENCIL_SHADOW_PASS = (2 << 2),
DRW_STATE_WRITE_STENCIL_SHADOW_FAIL = (3 << 2),
/** Depth test. These options are mutual exclusive and packed into 3 bits */
DRW_STATE_DEPTH_ALWAYS = (1 << 4),
DRW_STATE_DEPTH_LESS = (2 << 4),
DRW_STATE_DEPTH_LESS_EQUAL = (3 << 4),
DRW_STATE_DEPTH_EQUAL = (4 << 4),
DRW_STATE_DEPTH_GREATER = (5 << 4),
DRW_STATE_DEPTH_GREATER_EQUAL = (6 << 4),
/** Culling test */
DRW_STATE_CULL_BACK = (1 << 7),
DRW_STATE_CULL_FRONT = (1 << 8),
/** Stencil test. These options are mutually exclusive and packed into 2 bits. */
DRW_STATE_STENCIL_ALWAYS = (1 << 9),
DRW_STATE_STENCIL_EQUAL = (2 << 9),
DRW_STATE_STENCIL_NEQUAL = (3 << 9),
/** Blend state. These options are mutual exclusive and packed into 4 bits */
DRW_STATE_BLEND_ADD = (1 << 11),
/** Same as additive but let alpha accumulate without pre-multiply. */
DRW_STATE_BLEND_ADD_FULL = (2 << 11),
/** Standard alpha blending. */
DRW_STATE_BLEND_ALPHA = (3 << 11),
/** Use that if color is already pre-multiply by alpha. */
DRW_STATE_BLEND_ALPHA_PREMUL = (4 << 11),
DRW_STATE_BLEND_BACKGROUND = (5 << 11),
DRW_STATE_BLEND_OIT = (6 << 11),
DRW_STATE_BLEND_MUL = (7 << 11),
DRW_STATE_BLEND_SUB = (8 << 11),
/** Use dual source blending. WARNING: Only one color buffer allowed. */
DRW_STATE_BLEND_CUSTOM = (9 << 11),
DRW_STATE_LOGIC_INVERT = (10 << 11),
DRW_STATE_BLEND_ALPHA_UNDER_PREMUL = (11 << 11),
DRW_STATE_IN_FRONT_SELECT = (1 << 27),
DRW_STATE_SHADOW_OFFSET = (1 << 28),
DRW_STATE_CLIP_PLANES = (1 << 29),
DRW_STATE_FIRST_VERTEX_CONVENTION = (1 << 30),
/** DO NOT USE. Assumed always enabled. Only used internally. */
DRW_STATE_PROGRAM_POINT_SIZE = (1u << 31),
} DRWState;
ENUM_OPERATORS(DRWState, DRW_STATE_PROGRAM_POINT_SIZE);
#define DRW_STATE_DEFAULT \
(DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_LESS_EQUAL)
#define DRW_STATE_BLEND_ENABLED \
(DRW_STATE_BLEND_ADD | DRW_STATE_BLEND_ADD_FULL | DRW_STATE_BLEND_ALPHA | \
DRW_STATE_BLEND_ALPHA_PREMUL | DRW_STATE_BLEND_BACKGROUND | DRW_STATE_BLEND_OIT | \
DRW_STATE_BLEND_MUL | DRW_STATE_BLEND_SUB | DRW_STATE_BLEND_CUSTOM | DRW_STATE_LOGIC_INVERT)
#define DRW_STATE_RASTERIZER_ENABLED \
(DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL | \
DRW_STATE_WRITE_STENCIL_SHADOW_PASS | DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
#define DRW_STATE_DEPTH_TEST_ENABLED \
(DRW_STATE_DEPTH_ALWAYS | DRW_STATE_DEPTH_LESS | DRW_STATE_DEPTH_LESS_EQUAL | \
DRW_STATE_DEPTH_EQUAL | DRW_STATE_DEPTH_GREATER | DRW_STATE_DEPTH_GREATER_EQUAL)
#define DRW_STATE_STENCIL_TEST_ENABLED \
(DRW_STATE_STENCIL_ALWAYS | DRW_STATE_STENCIL_EQUAL | DRW_STATE_STENCIL_NEQUAL)
#define DRW_STATE_WRITE_STENCIL_ENABLED \
(DRW_STATE_WRITE_STENCIL | DRW_STATE_WRITE_STENCIL_SHADOW_PASS | \
DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
#ifdef __cplusplus
}
#endif
#ifdef __cplusplus
namespace blender::draw {
/* -------------------------------------------------------------------- */
/** \name DRWState to GPU state conversion
* \{ */
static inline eGPUWriteMask to_write_mask(DRWState state)
{
eGPUWriteMask write_mask = GPU_WRITE_NONE;
if (state & DRW_STATE_WRITE_DEPTH) {
write_mask |= GPU_WRITE_DEPTH;
}
if (state & DRW_STATE_WRITE_COLOR) {
write_mask |= GPU_WRITE_COLOR;
}
if (state & DRW_STATE_WRITE_STENCIL_ENABLED) {
write_mask |= GPU_WRITE_STENCIL;
}
return write_mask;
}
static inline eGPUFaceCullTest to_face_cull_test(DRWState state)
{
switch (state & (DRW_STATE_CULL_BACK | DRW_STATE_CULL_FRONT)) {
case DRW_STATE_CULL_BACK:
return GPU_CULL_BACK;
case DRW_STATE_CULL_FRONT:
return GPU_CULL_FRONT;
default:
return GPU_CULL_NONE;
}
}
static inline eGPUDepthTest to_depth_test(DRWState state)
{
switch (state & DRW_STATE_DEPTH_TEST_ENABLED) {
case DRW_STATE_DEPTH_LESS:
return GPU_DEPTH_LESS;
case DRW_STATE_DEPTH_LESS_EQUAL:
return GPU_DEPTH_LESS_EQUAL;
case DRW_STATE_DEPTH_EQUAL:
return GPU_DEPTH_EQUAL;
case DRW_STATE_DEPTH_GREATER:
return GPU_DEPTH_GREATER;
case DRW_STATE_DEPTH_GREATER_EQUAL:
return GPU_DEPTH_GREATER_EQUAL;
case DRW_STATE_DEPTH_ALWAYS:
return GPU_DEPTH_ALWAYS;
default:
return GPU_DEPTH_NONE;
}
}
static inline eGPUStencilOp to_stencil_op(DRWState state)
{
switch (state & DRW_STATE_WRITE_STENCIL_ENABLED) {
case DRW_STATE_WRITE_STENCIL:
return GPU_STENCIL_OP_REPLACE;
case DRW_STATE_WRITE_STENCIL_SHADOW_PASS:
return GPU_STENCIL_OP_COUNT_DEPTH_PASS;
case DRW_STATE_WRITE_STENCIL_SHADOW_FAIL:
return GPU_STENCIL_OP_COUNT_DEPTH_FAIL;
default:
return GPU_STENCIL_OP_NONE;
}
}
static inline eGPUStencilTest to_stencil_test(DRWState state)
{
switch (state & DRW_STATE_STENCIL_TEST_ENABLED) {
case DRW_STATE_STENCIL_ALWAYS:
return GPU_STENCIL_ALWAYS;
case DRW_STATE_STENCIL_EQUAL:
return GPU_STENCIL_EQUAL;
case DRW_STATE_STENCIL_NEQUAL:
return GPU_STENCIL_NEQUAL;
default:
return GPU_STENCIL_NONE;
}
}
static inline eGPUBlend to_blend(DRWState state)
{
switch (state & DRW_STATE_BLEND_ENABLED) {
case DRW_STATE_BLEND_ADD:
return GPU_BLEND_ADDITIVE;
case DRW_STATE_BLEND_ADD_FULL:
return GPU_BLEND_ADDITIVE_PREMULT;
case DRW_STATE_BLEND_ALPHA:
return GPU_BLEND_ALPHA;
case DRW_STATE_BLEND_ALPHA_PREMUL:
return GPU_BLEND_ALPHA_PREMULT;
case DRW_STATE_BLEND_BACKGROUND:
return GPU_BLEND_BACKGROUND;
case DRW_STATE_BLEND_OIT:
return GPU_BLEND_OIT;
case DRW_STATE_BLEND_MUL:
return GPU_BLEND_MULTIPLY;
case DRW_STATE_BLEND_SUB:
return GPU_BLEND_SUBTRACT;
case DRW_STATE_BLEND_CUSTOM:
return GPU_BLEND_CUSTOM;
case DRW_STATE_LOGIC_INVERT:
return GPU_BLEND_INVERT;
case DRW_STATE_BLEND_ALPHA_UNDER_PREMUL:
return GPU_BLEND_ALPHA_UNDER_PREMUL;
default:
return GPU_BLEND_NONE;
}
}
static inline eGPUProvokingVertex to_provoking_vertex(DRWState state)
{
switch (state & DRW_STATE_FIRST_VERTEX_CONVENTION) {
case DRW_STATE_FIRST_VERTEX_CONVENTION:
return GPU_VERTEX_FIRST;
default:
return GPU_VERTEX_LAST;
}
}
/** \} */
}; // namespace blender::draw
#endif

View File

@@ -0,0 +1,332 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2022 Blender Foundation. */
/** \file
* \ingroup draw
*/
#include "BLI_math_geom.h"
#include "GPU_compute.h"
#include "GPU_debug.h"
#include "draw_debug.hh"
#include "draw_shader.h"
#include "draw_view.hh"
namespace blender::draw {
void View::sync(const float4x4 &view_mat, const float4x4 &win_mat)
{
data_.viewmat = view_mat;
data_.viewinv = view_mat.inverted();
data_.winmat = win_mat;
data_.wininv = win_mat.inverted();
data_.persmat = data_.winmat * data_.viewmat;
data_.persinv = data_.persmat.inverted();
/* Should not be used anymore. */
data_.viewcamtexcofac = float4(1.0f, 1.0f, 0.0f, 0.0f);
data_.is_inverted = (is_negative_m4(view_mat.ptr()) == is_negative_m4(win_mat.ptr()));
update_view_vectors();
BoundBox &bound_box = *reinterpret_cast<BoundBox *>(&data_.frustum_corners);
BoundSphere &bound_sphere = *reinterpret_cast<BoundSphere *>(&data_.frustum_bound_sphere);
frustum_boundbox_calc(bound_box);
frustum_culling_planes_calc();
frustum_culling_sphere_calc(bound_box, bound_sphere);
dirty_ = true;
}
void View::frustum_boundbox_calc(BoundBox &bbox)
{
/* Extract the 8 corners from a Projection Matrix. */
#if 0 /* Equivalent to this but it has accuracy problems. */
BKE_boundbox_init_from_minmax(&bbox, float3(-1.0f),float3(1.0f));
for (int i = 0; i < 8; i++) {
mul_project_m4_v3(data_.wininv.ptr(), bbox.vec[i]);
}
#endif
float left, right, bottom, top, near, far;
bool is_persp = data_.winmat[3][3] == 0.0f;
projmat_dimensions(data_.winmat.ptr(), &left, &right, &bottom, &top, &near, &far);
bbox.vec[0][2] = bbox.vec[3][2] = bbox.vec[7][2] = bbox.vec[4][2] = -near;
bbox.vec[0][0] = bbox.vec[3][0] = left;
bbox.vec[4][0] = bbox.vec[7][0] = right;
bbox.vec[0][1] = bbox.vec[4][1] = bottom;
bbox.vec[7][1] = bbox.vec[3][1] = top;
/* Get the coordinates of the far plane. */
if (is_persp) {
float sca_far = far / near;
left *= sca_far;
right *= sca_far;
bottom *= sca_far;
top *= sca_far;
}
bbox.vec[1][2] = bbox.vec[2][2] = bbox.vec[6][2] = bbox.vec[5][2] = -far;
bbox.vec[1][0] = bbox.vec[2][0] = left;
bbox.vec[6][0] = bbox.vec[5][0] = right;
bbox.vec[1][1] = bbox.vec[5][1] = bottom;
bbox.vec[2][1] = bbox.vec[6][1] = top;
/* Transform into world space. */
for (int i = 0; i < 8; i++) {
mul_m4_v3(data_.viewinv.ptr(), bbox.vec[i]);
}
}
void View::frustum_culling_planes_calc()
{
planes_from_projmat(data_.persmat.ptr(),
data_.frustum_planes[0],
data_.frustum_planes[5],
data_.frustum_planes[1],
data_.frustum_planes[3],
data_.frustum_planes[4],
data_.frustum_planes[2]);
/* Normalize. */
for (int p = 0; p < 6; p++) {
data_.frustum_planes[p].w /= normalize_v3(data_.frustum_planes[p]);
}
}
void View::frustum_culling_sphere_calc(const BoundBox &bbox, BoundSphere &bsphere)
{
/* Extract Bounding Sphere */
if (data_.winmat[3][3] != 0.0f) {
/* Orthographic */
/* The most extreme points on the near and far plane. (normalized device coords). */
const float *nearpoint = bbox.vec[0];
const float *farpoint = bbox.vec[6];
/* just use median point */
mid_v3_v3v3(bsphere.center, farpoint, nearpoint);
bsphere.radius = len_v3v3(bsphere.center, farpoint);
}
else if (data_.winmat[2][0] == 0.0f && data_.winmat[2][1] == 0.0f) {
/* Perspective with symmetrical frustum. */
/* We obtain the center and radius of the circumscribed circle of the
* isosceles trapezoid composed by the diagonals of the near and far clipping plane */
/* center of each clipping plane */
float mid_min[3], mid_max[3];
mid_v3_v3v3(mid_min, bbox.vec[3], bbox.vec[4]);
mid_v3_v3v3(mid_max, bbox.vec[2], bbox.vec[5]);
/* square length of the diagonals of each clipping plane */
float a_sq = len_squared_v3v3(bbox.vec[3], bbox.vec[4]);
float b_sq = len_squared_v3v3(bbox.vec[2], bbox.vec[5]);
/* distance squared between clipping planes */
float h_sq = len_squared_v3v3(mid_min, mid_max);
float fac = (4 * h_sq + b_sq - a_sq) / (8 * h_sq);
/* The goal is to get the smallest sphere,
* not the sphere that passes through each corner */
CLAMP(fac, 0.0f, 1.0f);
interp_v3_v3v3(bsphere.center, mid_min, mid_max, fac);
/* distance from the center to one of the points of the far plane (1, 2, 5, 6) */
bsphere.radius = len_v3v3(bsphere.center, bbox.vec[1]);
}
else {
/* Perspective with asymmetrical frustum. */
/* We put the sphere center on the line that goes from origin
* to the center of the far clipping plane. */
/* Detect which of the corner of the far clipping plane is the farthest to the origin */
float nfar[4]; /* most extreme far point in NDC space */
float farxy[2]; /* far-point projection onto the near plane */
float farpoint[3] = {0.0f}; /* most extreme far point in camera coordinate */
float nearpoint[3]; /* most extreme near point in camera coordinate */
float farcenter[3] = {0.0f}; /* center of far clipping plane in camera coordinate */
float F = -1.0f, N; /* square distance of far and near point to origin */
float f, n; /* distance of far and near point to z axis. f is always > 0 but n can be < 0 */
float e, s; /* far and near clipping distance (<0) */
float c; /* slope of center line = distance of far clipping center
* to z axis / far clipping distance. */
float z; /* projection of sphere center on z axis (<0) */
/* Find farthest corner and center of far clip plane. */
float corner[3] = {1.0f, 1.0f, 1.0f}; /* in clip space */
for (int i = 0; i < 4; i++) {
float point[3];
mul_v3_project_m4_v3(point, data_.wininv.ptr(), corner);
float len = len_squared_v3(point);
if (len > F) {
copy_v3_v3(nfar, corner);
copy_v3_v3(farpoint, point);
F = len;
}
add_v3_v3(farcenter, point);
/* rotate by 90 degree to walk through the 4 points of the far clip plane */
float tmp = corner[0];
corner[0] = -corner[1];
corner[1] = tmp;
}
/* the far center is the average of the far clipping points */
mul_v3_fl(farcenter, 0.25f);
/* the extreme near point is the opposite point on the near clipping plane */
copy_v3_fl3(nfar, -nfar[0], -nfar[1], -1.0f);
mul_v3_project_m4_v3(nearpoint, data_.wininv.ptr(), nfar);
/* this is a frustum projection */
N = len_squared_v3(nearpoint);
e = farpoint[2];
s = nearpoint[2];
/* distance to view Z axis */
f = len_v2(farpoint);
/* get corresponding point on the near plane */
mul_v2_v2fl(farxy, farpoint, s / e);
/* this formula preserve the sign of n */
sub_v2_v2(nearpoint, farxy);
n = f * s / e - len_v2(nearpoint);
c = len_v2(farcenter) / e;
/* the big formula, it simplifies to (F-N)/(2(e-s)) for the symmetric case */
z = (F - N) / (2.0f * (e - s + c * (f - n)));
bsphere.center[0] = farcenter[0] * z / e;
bsphere.center[1] = farcenter[1] * z / e;
bsphere.center[2] = z;
/* For XR, the view matrix may contain a scale factor. Then, transforming only the center
* into world space after calculating the radius will result in incorrect behavior. */
mul_m4_v3(data_.viewinv.ptr(), bsphere.center); /* Transform to world space. */
mul_m4_v3(data_.viewinv.ptr(), farpoint);
bsphere.radius = len_v3v3(bsphere.center, farpoint);
}
}
void View::set_clip_planes(Span<float4> planes)
{
BLI_assert(planes.size() <= ARRAY_SIZE(data_.clip_planes));
int i = 0;
for (const auto &plane : planes) {
data_.clip_planes[i++] = plane;
}
}
void View::update_viewport_size()
{
float4 viewport;
GPU_viewport_size_get_f(viewport);
float2 viewport_size = float2(viewport.z, viewport.w);
if (assign_if_different(data_.viewport_size, viewport_size)) {
dirty_ = true;
}
}
void View::update_view_vectors()
{
bool is_persp = data_.winmat[3][3] == 0.0f;
/* Near clip distance. */
data_.viewvecs[0][3] = (is_persp) ? -data_.winmat[3][2] / (data_.winmat[2][2] - 1.0f) :
-(data_.winmat[3][2] + 1.0f) / data_.winmat[2][2];
/* Far clip distance. */
data_.viewvecs[1][3] = (is_persp) ? -data_.winmat[3][2] / (data_.winmat[2][2] + 1.0f) :
-(data_.winmat[3][2] - 1.0f) / data_.winmat[2][2];
/* View vectors for the corners of the view frustum.
* Can be used to recreate the world space position easily */
float3 view_vecs[4] = {
{-1.0f, -1.0f, -1.0f},
{1.0f, -1.0f, -1.0f},
{-1.0f, 1.0f, -1.0f},
{-1.0f, -1.0f, 1.0f},
};
/* Convert the view vectors to view space */
for (int i = 0; i < 4; i++) {
mul_project_m4_v3(data_.wininv.ptr(), view_vecs[i]);
/* Normalized trick see:
* http://www.derschmale.com/2014/01/26/reconstructing-positions-from-the-depth-buffer */
if (is_persp) {
view_vecs[i].x /= view_vecs[i].z;
view_vecs[i].y /= view_vecs[i].z;
}
}
/**
* If ortho : view_vecs[0] is the near-bottom-left corner of the frustum and
* view_vecs[1] is the vector going from the near-bottom-left corner to
* the far-top-right corner.
* If Persp : view_vecs[0].xy and view_vecs[1].xy are respectively the bottom-left corner
* when Z = 1, and top-left corner if Z = 1.
* view_vecs[0].z the near clip distance and view_vecs[1].z is the (signed)
* distance from the near plane to the far clip plane.
*/
copy_v3_v3(data_.viewvecs[0], view_vecs[0]);
/* we need to store the differences */
data_.viewvecs[1][0] = view_vecs[1][0] - view_vecs[0][0];
data_.viewvecs[1][1] = view_vecs[2][1] - view_vecs[0][1];
data_.viewvecs[1][2] = view_vecs[3][2] - view_vecs[0][2];
}
void View::bind()
{
update_viewport_size();
if (dirty_) {
dirty_ = false;
data_.push_update();
}
GPU_uniformbuf_bind(data_, DRW_VIEW_UBO_SLOT);
}
void View::compute_visibility(ObjectBoundsBuf &bounds, uint resource_len, bool debug_freeze)
{
if (debug_freeze && frozen_ == false) {
data_freeze_ = static_cast<ViewInfos>(data_);
data_freeze_.push_update();
}
#ifdef DEBUG
if (debug_freeze) {
drw_debug_matrix_as_bbox(data_freeze_.persinv, float4(0, 1, 0, 1));
}
#endif
frozen_ = debug_freeze;
GPU_debug_group_begin("View.compute_visibility");
/* TODO(fclem): Early out if visibility hasn't changed. */
/* TODO(fclem): Resize to nearest pow2 to reduce fragmentation. */
visibility_buf_.resize(divide_ceil_u(resource_len, 128));
uint32_t data = 0xFFFFFFFFu;
GPU_storagebuf_clear(visibility_buf_, GPU_R32UI, GPU_DATA_UINT, &data);
if (do_visibility_) {
GPUShader *shader = DRW_shader_draw_visibility_compute_get();
GPU_shader_bind(shader);
GPU_shader_uniform_1i(shader, "resource_len", resource_len);
GPU_storagebuf_bind(bounds, GPU_shader_get_ssbo(shader, "bounds_buf"));
GPU_storagebuf_bind(visibility_buf_, GPU_shader_get_ssbo(shader, "visibility_buf"));
GPU_uniformbuf_bind((frozen_) ? data_freeze_ : data_, DRW_VIEW_UBO_SLOT);
GPU_compute_dispatch(shader, divide_ceil_u(resource_len, DRW_VISIBILITY_GROUP_SIZE), 1, 1);
GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
}
if (frozen_) {
/* Bind back the non frozen data. */
GPU_uniformbuf_bind(data_, DRW_VIEW_UBO_SLOT);
}
GPU_debug_group_end();
}
} // namespace blender::draw

View File

@@ -0,0 +1,94 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2022 Blender Foundation. */
#pragma once
/** \file
* \ingroup draw
*/
#include "DRW_gpu_wrapper.hh"
#include "DRW_render.h"
#include "draw_shader_shared.h"
namespace blender::draw {
class Manager;
/* TODO deduplicate. */
using ObjectBoundsBuf = StorageArrayBuffer<ObjectBounds, 128>;
/* NOTE: Using uint4 for declaration but bound as uint. */
using VisibilityBuf = StorageArrayBuffer<uint4, 1, true>;
class View {
friend Manager;
private:
UniformBuffer<ViewInfos> data_;
/** Freezed version of data_ used for debugging culling. */
UniformBuffer<ViewInfos> data_freeze_;
/** Result of the visibility computation. 1 bit per resource ID. */
VisibilityBuf visibility_buf_;
const char *debug_name_;
bool do_visibility_ = true;
bool dirty_ = true;
bool frozen_ = false;
public:
View(const char *name) : visibility_buf_(name), debug_name_(name){};
/* For compatibility with old system. Will be removed at some point. */
View(const char *name, const DRWView *view) : visibility_buf_(name), debug_name_(name)
{
float4x4 view_mat, win_mat;
DRW_view_viewmat_get(view, view_mat.ptr(), false);
DRW_view_winmat_get(view, win_mat.ptr(), false);
this->sync(view_mat, win_mat);
}
void set_clip_planes(Span<float4> planes);
void sync(const float4x4 &view_mat, const float4x4 &win_mat);
bool is_persp() const
{
return data_.winmat[3][3] == 0.0f;
}
bool is_inverted() const
{
return data_.is_inverted;
}
float far_clip() const
{
if (is_persp()) {
return -data_.winmat[3][2] / (data_.winmat[2][2] + 1.0f);
}
return -(data_.winmat[3][2] - 1.0f) / data_.winmat[2][2];
}
float near_clip() const
{
if (is_persp()) {
return -data_.winmat[3][2] / (data_.winmat[2][2] - 1.0f);
}
return -(data_.winmat[3][2] + 1.0f) / data_.winmat[2][2];
}
private:
/** Called from draw manager. */
void bind();
void compute_visibility(ObjectBoundsBuf &bounds, uint resource_len, bool debug_freeze);
void update_view_vectors();
void update_viewport_size();
void frustum_boundbox_calc(BoundBox &bbox);
void frustum_culling_planes_calc();
void frustum_culling_sphere_calc(const BoundBox &bbox, BoundSphere &bsphere);
};
} // namespace blender::draw

View File

@@ -7,6 +7,7 @@
#include "BLI_vector.hh"
#include "GPU_capabilities.h"
#include "GPU_viewport.h"
#include "DRW_render.h"
@@ -16,6 +17,7 @@
#include "draw_manager_text.h"
#include "draw_manager.h"
#include "draw_manager.hh"
#include "draw_view_data.h"
using namespace blender;
@@ -33,6 +35,22 @@ struct DRWViewData {
Vector<ViewportEngineData> engines;
Vector<ViewportEngineData *> enabled_engines;
/** New per view/viewport manager. Null if not supported by current hardware. */
draw::Manager *manager = nullptr;
DRWViewData()
{
/* Only for GL >= 4.3 implementation for now. */
if (GPU_shader_storage_buffer_objects_support() && GPU_compute_shader_support()) {
manager = new draw::Manager();
}
};
~DRWViewData()
{
delete manager;
};
};
DRWViewData *DRW_view_data_create(ListBase *engine_types)
@@ -227,3 +245,31 @@ ViewportEngineData *DRW_view_data_enabled_engine_iter_step(DRWEngineIterator *it
ViewportEngineData *engine = iterator->engines[iterator->id++];
return engine;
}
draw::Manager *DRW_manager_get()
{
BLI_assert(DST.view_data_active->manager);
return reinterpret_cast<draw::Manager *>(DST.view_data_active->manager);
}
draw::ObjectRef DRW_object_ref_get(Object *object)
{
BLI_assert(DST.view_data_active->manager);
return {object, DST.dupli_source, DST.dupli_parent};
}
void DRW_manager_begin_sync()
{
if (DST.view_data_active->manager == nullptr) {
return;
}
reinterpret_cast<draw::Manager *>(DST.view_data_active->manager)->begin_sync();
}
void DRW_manager_end_sync()
{
if (DST.view_data_active->manager == nullptr) {
return;
}
reinterpret_cast<draw::Manager *>(DST.view_data_active->manager)->end_sync();
}

View File

@@ -18,7 +18,7 @@ uint drw_debug_start_draw(uint v_needed)
{
uint vertid = atomicAdd(drw_debug_draw_v_count, v_needed);
/* NOTE: Skip the header manually. */
vertid += 1;
vertid += 2;
return vertid;
}

View File

@@ -72,7 +72,7 @@ void drw_print_char4(uint data)
}
uint cursor = atomicAdd(drw_debug_print_cursor, 1u);
/* NOTE: Skip the header manually. */
cursor += 4;
cursor += 8;
if (cursor < DRW_DEBUG_PRINT_MAX) {
/* For future usage. (i.e: Color) */
uint flags = 0u;

View File

@@ -70,6 +70,30 @@ IsectBox isect_data_setup(Box shape)
return data;
}
/* Construct box from 1 corner point + 3 side vectors. */
IsectBox isect_data_setup(vec3 origin, vec3 side_x, vec3 side_y, vec3 side_z)
{
IsectBox data;
data.corners[0] = origin;
data.corners[1] = origin + side_x;
data.corners[2] = origin + side_y + side_x;
data.corners[3] = origin + side_y;
data.corners[4] = data.corners[0] + side_z;
data.corners[5] = data.corners[1] + side_z;
data.corners[6] = data.corners[2] + side_z;
data.corners[7] = data.corners[3] + side_z;
data.planes[0] = isect_plane_setup(data.corners[0], side_y, side_z);
data.planes[1] = isect_plane_setup(data.corners[0], side_x, side_y);
data.planes[2] = isect_plane_setup(data.corners[0], side_z, side_x);
/* Assumes that the box is actually a box! */
data.planes[3] = vec4(-data.planes[0].xyz, -dot(-data.planes[0].xyz, data.corners[6]));
data.planes[4] = vec4(-data.planes[1].xyz, -dot(-data.planes[1].xyz, data.corners[6]));
data.planes[5] = vec4(-data.planes[2].xyz, -dot(-data.planes[2].xyz, data.corners[6]));
return data;
}
struct IsectFrustum {
vec3 corners[8];
vec4 planes[6];
@@ -194,6 +218,50 @@ bool intersect_view(Box box)
return intersects;
}
bool intersect_view(IsectBox i_box)
{
bool intersects = true;
/* Do Box vertices vs Frustum planes. */
for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) {
float test = dot(drw_view.frustum_planes[p], vec4(i_box.corners[v], 1.0));
if (test > 0.0) {
is_any_vertex_on_positive_side = true;
break;
}
}
bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
if (all_vertex_on_negative_side) {
intersects = false;
break;
}
}
if (!intersects) {
return intersects;
}
for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) {
float test = dot(i_box.planes[p], vec4(drw_view.frustum_corners[v].xyz, 1.0));
if (test > 0.0) {
is_any_vertex_on_positive_side = true;
break;
}
}
bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
if (all_vertex_on_negative_side) {
intersects = false;
break;
}
}
return intersects;
}
bool intersect_view(Sphere sphere)
{
bool intersects = true;

View File

@@ -155,7 +155,11 @@ uniform int drw_ResourceID;
# define PASS_RESOURCE_ID
# elif defined(GPU_VERTEX_SHADER)
# define resource_id gpu_InstanceIndex
# if defined(UNIFORM_RESOURCE_ID_NEW)
# define resource_id drw_ResourceID
# else
# define resource_id gpu_InstanceIndex
# endif
# define PASS_RESOURCE_ID drw_ResourceID_iface.resource_index = resource_id;
# elif defined(GPU_GEOMETRY_SHADER)
@@ -203,8 +207,8 @@ flat in int resourceIDFrag;
# ifndef DRW_SHADER_SHARED_H
struct ObjectMatrices {
mat4 drw_modelMatrix;
mat4 drw_modelMatrixInverse;
mat4 model;
mat4 model_inverse;
};
# endif /* DRW_SHADER_SHARED_H */
@@ -214,8 +218,8 @@ layout(std140) uniform modelBlock
ObjectMatrices drw_matrices[DRW_RESOURCE_CHUNK_LEN];
};
# define ModelMatrix (drw_matrices[resource_id].drw_modelMatrix)
# define ModelMatrixInverse (drw_matrices[resource_id].drw_modelMatrixInverse)
# define ModelMatrix (drw_matrices[resource_id].model)
# define ModelMatrixInverse (drw_matrices[resource_id].model_inverse)
# endif /* USE_GPU_SHADER_CREATE_INFO */
#else /* GPU_INTEL */

View File

@@ -0,0 +1,84 @@
/**
* Convert DrawPrototype into draw commands.
*/
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#define atomicAddAndGet(dst, val) (atomicAdd(dst, val) + val)
/* This is only called by the last thread executed over the group's prototype draws. */
void write_draw_call(DrawGroup group, uint group_id)
{
DrawCommand cmd;
cmd.vertex_len = group.vertex_len;
cmd.vertex_first = group.vertex_first;
if (group.base_index != -1) {
cmd.base_index = group.base_index;
cmd.instance_first_indexed = group.start;
}
else {
cmd._instance_first_array = group.start;
}
/* Back-facing command. */
cmd.instance_len = group_buf[group_id].back_facing_counter;
command_buf[group_id * 2 + 0] = cmd;
/* Front-facing command. */
cmd.instance_len = group_buf[group_id].front_facing_counter;
command_buf[group_id * 2 + 1] = cmd;
/* Reset the counters for a next command gen dispatch. Avoids resending the whole data just
* for this purpose. Only the last thread will execute this so it is threadsafe. */
group_buf[group_id].front_facing_counter = 0u;
group_buf[group_id].back_facing_counter = 0u;
group_buf[group_id].total_counter = 0u;
}
void main()
{
uint proto_id = gl_GlobalInvocationID.x;
if (proto_id >= prototype_len) {
return;
}
DrawPrototype proto = prototype_buf[proto_id];
uint group_id = proto.group_id;
bool is_inverted = (proto.resource_handle & 0x80000000u) != 0;
uint resource_index = (proto.resource_handle & 0x7FFFFFFFu);
/* Visibility test result. */
bool is_visible = ((visibility_buf[resource_index / 32u] & (1u << (resource_index % 32u)))) != 0;
DrawGroup group = group_buf[group_id];
if (!is_visible) {
/* Skip the draw but still count towards the completion. */
if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) {
write_draw_call(group, group_id);
}
return;
}
uint back_facing_len = group.len - group.front_facing_len;
uint front_facing_len = group.front_facing_len;
uint dst_index = group.start;
if (is_inverted) {
uint offset = atomicAdd(group_buf[group_id].back_facing_counter, proto.instance_len);
dst_index += offset;
if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) {
write_draw_call(group, group_id);
}
}
else {
uint offset = atomicAdd(group_buf[group_id].front_facing_counter, proto.instance_len);
dst_index += back_facing_len + offset;
if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) {
write_draw_call(group, group_id);
}
}
for (uint i = dst_index; i < dst_index + proto.instance_len; i++) {
/* Fill resource_id buffer for each instance of this draw */
resource_id_buf[i] = resource_index;
}
}

View File

@@ -6,7 +6,7 @@
void main()
{
/* Skip the first vertex containing header data. */
DRWDebugVert vert = drw_debug_verts_buf[gl_VertexID + 1];
DRWDebugVert vert = drw_debug_verts_buf[gl_VertexID + 2];
vec3 pos = uintBitsToFloat(uvec3(vert.pos0, vert.pos1, vert.pos2));
vec4 col = vec4((uvec4(vert.color) >> uvec4(0, 8, 16, 24)) & 0xFFu) / 255.0;

View File

@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "draw_defines.h"
#include "gpu_shader_create_info.hh"
/* -------------------------------------------------------------------- */
@@ -10,7 +11,7 @@
GPU_SHADER_CREATE_INFO(draw_debug_print)
.typedef_source("draw_shader_shared.h")
.storage_buf(7, Qualifier::READ_WRITE, "uint", "drw_debug_print_buf[]");
.storage_buf(DRW_DEBUG_PRINT_SLOT, Qualifier::READ_WRITE, "uint", "drw_debug_print_buf[]");
GPU_SHADER_INTERFACE_INFO(draw_debug_print_display_iface, "").flat(Type::UINT, "char_index");
@@ -34,7 +35,10 @@ GPU_SHADER_CREATE_INFO(draw_debug_print_display)
GPU_SHADER_CREATE_INFO(draw_debug_draw)
.typedef_source("draw_shader_shared.h")
.storage_buf(6, Qualifier::READ_WRITE, "DRWDebugVert", "drw_debug_verts_buf[]");
.storage_buf(DRW_DEBUG_DRAW_SLOT,
Qualifier::READ_WRITE,
"DRWDebugVert",
"drw_debug_verts_buf[]");
GPU_SHADER_INTERFACE_INFO(draw_debug_draw_display_iface, "interp").flat(Type::VEC4, "color");

View File

@@ -8,7 +8,7 @@
void main()
{
/* Skip first 4 chars containing header data. */
uint char_data = drw_debug_print_buf[gl_VertexID + 4];
uint char_data = drw_debug_print_buf[gl_VertexID + 8];
char_index = (char_data & 0xFFu) - 0x20u;
/* Discard invalid chars. */

View File

@@ -1,10 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "draw_defines.h"
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(draw_object_infos)
.typedef_source("draw_shader_shared.h")
.define("OBINFO_LIB")
.define("OrcoTexCoFactors", "(drw_infos[resource_id].orco_mul_bias)")
.define("ObjectInfo", "(drw_infos[resource_id].infos)")
.define("ObjectColor", "(drw_infos[resource_id].color)")
.uniform_buf(1, "ObjectInfos", "drw_infos[DRW_RESOURCE_CHUNK_LEN]", Frequency::BATCH);
GPU_SHADER_CREATE_INFO(draw_volume_infos)
@@ -14,3 +18,11 @@ GPU_SHADER_CREATE_INFO(draw_volume_infos)
GPU_SHADER_CREATE_INFO(draw_curves_infos)
.typedef_source("draw_shader_shared.h")
.uniform_buf(2, "CurvesInfos", "drw_curves", Frequency::BATCH);
GPU_SHADER_CREATE_INFO(draw_object_infos_new)
.typedef_source("draw_shader_shared.h")
.define("OBINFO_LIB")
.define("OrcoTexCoFactors", "(drw_infos[resource_id].orco_mul_bias)")
.define("ObjectInfo", "(drw_infos[resource_id].infos)")
.define("ObjectColor", "(drw_infos[resource_id].color)")
.storage_buf(DRW_OBJ_INFOS_SLOT, Qualifier::READ, "ObjectInfos", "drw_infos[]");

View File

@@ -0,0 +1,64 @@
/**
* Finish computation of a few draw resource after sync.
*/
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
void main()
{
uint resource_id = gl_GlobalInvocationID.x;
if (resource_id >= resource_len) {
return;
}
mat4 model_mat = matrix_buf[resource_id].model;
ObjectInfos infos = infos_buf[resource_id];
ObjectBounds bounds = bounds_buf[resource_id];
if (bounds.bounding_sphere.w != -1.0) {
/* Convert corners to origin + sides in world space. */
vec3 p0 = bounds.bounding_corners[0].xyz;
vec3 p01 = bounds.bounding_corners[1].xyz - p0;
vec3 p02 = bounds.bounding_corners[2].xyz - p0;
vec3 p03 = bounds.bounding_corners[3].xyz - p0;
/* Avoid flat box. */
p01.x = max(p01.x, 1e-4);
p02.y = max(p02.y, 1e-4);
p03.z = max(p03.z, 1e-4);
vec3 diagonal = p01 + p02 + p03;
vec3 center = p0 + diagonal * 0.5;
float min_axis = min_v3(abs(diagonal));
bounds_buf[resource_id].bounding_sphere.xyz = transform_point(model_mat, center);
/* We have to apply scaling to the diagonal. */
bounds_buf[resource_id].bounding_sphere.w = length(transform_direction(model_mat, diagonal)) *
0.5;
bounds_buf[resource_id]._inner_sphere_radius = min_axis;
bounds_buf[resource_id].bounding_corners[0].xyz = transform_point(model_mat, p0);
bounds_buf[resource_id].bounding_corners[1].xyz = transform_direction(model_mat, p01);
bounds_buf[resource_id].bounding_corners[2].xyz = transform_direction(model_mat, p02);
bounds_buf[resource_id].bounding_corners[3].xyz = transform_direction(model_mat, p03);
/* Always have correct handedness in the corners vectors. */
if (flag_test(infos.flag, OBJECT_NEGATIVE_SCALE)) {
bounds_buf[resource_id].bounding_corners[0].xyz +=
bounds_buf[resource_id].bounding_corners[1].xyz;
bounds_buf[resource_id].bounding_corners[1].xyz =
-bounds_buf[resource_id].bounding_corners[1].xyz;
}
/* TODO: Bypass test for very large objects (see T67319). */
if (bounds_buf[resource_id].bounding_sphere.w > 1e12) {
bounds_buf[resource_id].bounding_sphere.w = -1.0;
}
}
vec3 loc = infos.orco_add; /* Box center. */
vec3 size = infos.orco_mul; /* Box half-extent. */
/* This is what the original computation looks like.
* Simplify to a nice MADD in shading code. */
// orco = (pos - loc) / size;
// orco = pos * (1.0 / size) + (-loc / size);
vec3 size_inv = safe_rcp(size);
infos_buf[resource_id].orco_add = -loc * size_inv;
infos_buf[resource_id].orco_mul = size_inv;
}

View File

@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "draw_defines.h"
#include "gpu_shader_create_info.hh"
/* -------------------------------------------------------------------- */
@@ -44,13 +45,13 @@ GPU_SHADER_CREATE_INFO(draw_resource_handle)
* \{ */
GPU_SHADER_CREATE_INFO(draw_view)
.uniform_buf(0, "ViewInfos", "drw_view", Frequency::PASS)
.uniform_buf(DRW_VIEW_UBO_SLOT, "ViewInfos", "drw_view", Frequency::PASS)
.typedef_source("draw_shader_shared.h");
GPU_SHADER_CREATE_INFO(draw_modelmat)
.uniform_buf(8, "ObjectMatrices", "drw_matrices[DRW_RESOURCE_CHUNK_LEN]", Frequency::BATCH)
.define("ModelMatrix", "(drw_matrices[resource_id].drw_modelMatrix)")
.define("ModelMatrixInverse", "(drw_matrices[resource_id].drw_modelMatrixInverse)")
.define("ModelMatrix", "(drw_matrices[resource_id].model)")
.define("ModelMatrixInverse", "(drw_matrices[resource_id].model_inverse)")
.additional_info("draw_view");
GPU_SHADER_CREATE_INFO(draw_modelmat_legacy)
@@ -136,3 +137,75 @@ GPU_SHADER_CREATE_INFO(draw_gpencil)
.additional_info("draw_modelmat", "draw_resource_id_uniform", "draw_object_infos");
/** \} */
/* -------------------------------------------------------------------- */
/** \name Internal Draw Manager usage
* \{ */
GPU_SHADER_CREATE_INFO(draw_resource_finalize)
.do_static_compilation(true)
.typedef_source("draw_shader_shared.h")
.define("DRAW_FINALIZE_SHADER")
.local_group_size(DRW_FINALIZE_GROUP_SIZE)
.storage_buf(0, Qualifier::READ, "ObjectMatrices", "matrix_buf[]")
.storage_buf(1, Qualifier::READ_WRITE, "ObjectBounds", "bounds_buf[]")
.storage_buf(2, Qualifier::READ_WRITE, "ObjectInfos", "infos_buf[]")
.push_constant(Type::INT, "resource_len")
.compute_source("draw_resource_finalize_comp.glsl");
GPU_SHADER_CREATE_INFO(draw_visibility_compute)
.do_static_compilation(true)
.local_group_size(DRW_VISIBILITY_GROUP_SIZE)
.storage_buf(0, Qualifier::READ, "ObjectBounds", "bounds_buf[]")
.storage_buf(1, Qualifier::READ_WRITE, "uint", "visibility_buf[]")
.push_constant(Type::INT, "resource_len")
.compute_source("draw_visibility_comp.glsl")
.additional_info("draw_view");
GPU_SHADER_CREATE_INFO(draw_command_generate)
.do_static_compilation(true)
.typedef_source("draw_shader_shared.h")
.typedef_source("draw_command_shared.hh")
.local_group_size(DRW_COMMAND_GROUP_SIZE)
.storage_buf(0, Qualifier::READ_WRITE, "DrawGroup", "group_buf[]")
.storage_buf(1, Qualifier::READ, "uint", "visibility_buf[]")
.storage_buf(2, Qualifier::READ, "DrawPrototype", "prototype_buf[]")
.storage_buf(3, Qualifier::WRITE, "DrawCommand", "command_buf[]")
.storage_buf(DRW_RESOURCE_ID_SLOT, Qualifier::WRITE, "uint", "resource_id_buf[]")
.push_constant(Type::INT, "prototype_len")
.compute_source("draw_command_generate_comp.glsl");
/** \} */
/* -------------------------------------------------------------------- */
/** \name Draw Resource ID
* New implementation using gl_BaseInstance and storage buffers.
* \{ */
GPU_SHADER_CREATE_INFO(draw_resource_id_new)
.define("UNIFORM_RESOURCE_ID_NEW")
.storage_buf(DRW_RESOURCE_ID_SLOT, Qualifier::READ, "int", "resource_id_buf[]")
.define("drw_ResourceID", "resource_id_buf[gpu_BaseInstance + gl_InstanceID]");
/**
* Workaround the lack of gl_BaseInstance by binding the resource_id_buf as vertex buf.
*/
GPU_SHADER_CREATE_INFO(draw_resource_id_fallback).vertex_in(15, Type::UINT, "drw_ResourceID");
/** \} */
/* -------------------------------------------------------------------- */
/** \name Draw Object Resources
* \{ */
GPU_SHADER_CREATE_INFO(draw_modelmat_new)
.typedef_source("draw_shader_shared.h")
.storage_buf(DRW_OBJ_MAT_SLOT, Qualifier::READ, "ObjectMatrices", "drw_matrix_buf[]")
.define("drw_ModelMatrixInverse", "drw_matrix_buf[resource_id].model_inverse")
.define("drw_ModelMatrix", "drw_matrix_buf[resource_id].model")
/* TODO For compatibility with old shaders. To be removed. */
.define("ModelMatrixInverse", "drw_ModelMatrixInverse")
.define("ModelMatrix", "drw_ModelMatrix")
.additional_info("draw_resource_id_new");
/** \} */

View File

@@ -0,0 +1,46 @@
/**
* Compute visibility of each resource bounds for a given view.
*/
/* TODO(fclem): This could be augmented by a 2 pass occlusion culling system. */
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(common_intersect_lib.glsl)
shared uint shared_result;
void mask_visibility_bit()
{
uint bit = 1u << gl_LocalInvocationID.x;
atomicAnd(visibility_buf[gl_WorkGroupID.x], ~bit);
}
void main()
{
if (gl_GlobalInvocationID.x >= resource_len) {
return;
}
ObjectBounds bounds = bounds_buf[gl_GlobalInvocationID.x];
if (bounds.bounding_sphere.w != -1.0) {
IsectBox box = isect_data_setup(bounds.bounding_corners[0].xyz,
bounds.bounding_corners[1].xyz,
bounds.bounding_corners[2].xyz,
bounds.bounding_corners[3].xyz);
Sphere bounding_sphere = Sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w);
Sphere inscribed_sphere = Sphere(bounds.bounding_sphere.xyz, bounds._inner_sphere_radius);
if (intersect_view(inscribed_sphere) == true) {
/* Visible. */
}
else if (intersect_view(bounding_sphere) == false) {
/* Not visible. */
mask_visibility_bit();
}
else if (intersect_view(box) == false) {
/* Not visible. */
mask_visibility_bit();
}
}
}

View File

@@ -0,0 +1,441 @@
/* SPDX-License-Identifier: Apache-2.0 */
#include "testing/testing.h"
#include "draw_manager.hh"
#include "draw_pass.hh"
#include "draw_shader.h"
#include "draw_testing.hh"
#include <bitset>
namespace blender::draw {
static void test_draw_pass_all_commands()
{
Texture tex;
tex.ensure_2d(GPU_RGBA16, int2(1));
UniformBuffer<uint4> ubo;
ubo.push_update();
StorageBuffer<uint4> ssbo;
ssbo.push_update();
float alpha = 0.0f;
int3 dispatch_size(1);
PassSimple pass = {"test.all_commands"};
pass.init();
pass.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL);
pass.clear_color_depth_stencil(float4(0.25f, 0.5f, 100.0f, -2000.0f), 0.5f, 0xF0);
pass.state_stencil(0x80, 0x0F, 0x8F);
pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
pass.bind_texture("image", tex);
pass.bind_texture("image", &tex);
pass.bind_image("missing_image", tex); /* Should not crash. */
pass.bind_image("missing_image", &tex); /* Should not crash. */
pass.bind_ubo("missing_ubo", ubo); /* Should not crash. */
pass.bind_ubo("missing_ubo", &ubo); /* Should not crash. */
pass.bind_ssbo("missing_ssbo", ssbo); /* Should not crash. */
pass.bind_ssbo("missing_ssbo", &ssbo); /* Should not crash. */
pass.push_constant("alpha", alpha);
pass.push_constant("alpha", &alpha);
pass.push_constant("ModelViewProjectionMatrix", float4x4::identity());
pass.draw_procedural(GPU_PRIM_TRIS, 1, 3);
/* Should not crash even if shader is not a compute. This is because we only serialize. */
/* TODO(fclem): Use real compute shader. */
pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
pass.dispatch(dispatch_size);
pass.dispatch(&dispatch_size);
pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
/* Change references. */
alpha = 1.0f;
dispatch_size = int3(2);
std::string result = pass.serialize();
std::stringstream expected;
expected << ".test.all_commands" << std::endl;
expected << " .state_set(6)" << std::endl;
expected << " .clear(color=(0.25, 0.5, 100, -2000), depth=0.5, stencil=0b11110000))"
<< std::endl;
expected << " .stencil_set(write_mask=0b10000000, compare_mask=0b00001111, reference=0b10001111"
<< std::endl;
expected << " .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl;
expected << " .bind_texture(0)" << std::endl;
expected << " .bind_texture_ref(0)" << std::endl;
expected << " .bind_image(-1)" << std::endl;
expected << " .bind_image_ref(-1)" << std::endl;
expected << " .bind_uniform_buf(-1)" << std::endl;
expected << " .bind_uniform_buf_ref(-1)" << std::endl;
expected << " .bind_storage_buf(-1)" << std::endl;
expected << " .bind_storage_buf_ref(-1)" << std::endl;
expected << " .push_constant(2, data=0)" << std::endl;
expected << " .push_constant(2, data=1)" << std::endl;
expected << " .push_constant(0, data=(" << std::endl;
expected << "( 1.000000, 0.000000, 0.000000, 0.000000)" << std::endl;
expected << "( 0.000000, 1.000000, 0.000000, 0.000000)" << std::endl;
expected << "( 0.000000, 0.000000, 1.000000, 0.000000)" << std::endl;
expected << "( 0.000000, 0.000000, 0.000000, 1.000000)" << std::endl;
expected << ")" << std::endl;
expected << ")" << std::endl;
expected << " .draw(inst_len=1, vert_len=3, vert_first=0, res_id=0)" << std::endl;
expected << " .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl;
expected << " .dispatch(1, 1, 1)" << std::endl;
expected << " .dispatch_ref(2, 2, 2)" << std::endl;
expected << " .barrier(4)" << std::endl;
EXPECT_EQ(result, expected.str());
DRW_shape_cache_free();
}
DRAW_TEST(draw_pass_all_commands)
static void test_draw_pass_sub_ordering()
{
PassSimple pass = {"test.sub_ordering"};
pass.init();
pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
pass.push_constant("test_pass", 1);
PassSimple::Sub &sub1 = pass.sub("Sub1");
sub1.push_constant("test_sub1", 11);
PassSimple::Sub &sub2 = pass.sub("Sub2");
sub2.push_constant("test_sub2", 21);
/* Will execute after both sub. */
pass.push_constant("test_pass", 2);
/* Will execute after sub1. */
sub2.push_constant("test_sub2", 22);
/* Will execute before sub2. */
sub1.push_constant("test_sub1", 12);
/* Will execute before end of pass. */
sub2.push_constant("test_sub2", 23);
std::string result = pass.serialize();
std::stringstream expected;
expected << ".test.sub_ordering" << std::endl;
expected << " .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl;
expected << " .push_constant(-1, data=1)" << std::endl;
expected << " .Sub1" << std::endl;
expected << " .push_constant(-1, data=11)" << std::endl;
expected << " .push_constant(-1, data=12)" << std::endl;
expected << " .Sub2" << std::endl;
expected << " .push_constant(-1, data=21)" << std::endl;
expected << " .push_constant(-1, data=22)" << std::endl;
expected << " .push_constant(-1, data=23)" << std::endl;
expected << " .push_constant(-1, data=2)" << std::endl;
EXPECT_EQ(result, expected.str());
}
DRAW_TEST(draw_pass_sub_ordering)
static void test_draw_pass_simple_draw()
{
PassSimple pass = {"test.simple_draw"};
pass.init();
pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
/* Each draw procedural type uses a different batch. Groups are drawn in correct order. */
pass.draw_procedural(GPU_PRIM_TRIS, 1, 10, 1, {1});
pass.draw_procedural(GPU_PRIM_POINTS, 4, 20, 2, {2});
pass.draw_procedural(GPU_PRIM_TRIS, 2, 30, 3, {3});
pass.draw_procedural(GPU_PRIM_POINTS, 5, 40, 4, ResourceHandle(4, true));
pass.draw_procedural(GPU_PRIM_LINES, 1, 50, 5, {5});
pass.draw_procedural(GPU_PRIM_POINTS, 6, 60, 6, {5});
pass.draw_procedural(GPU_PRIM_TRIS, 3, 70, 7, {6});
std::string result = pass.serialize();
std::stringstream expected;
expected << ".test.simple_draw" << std::endl;
expected << " .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl;
expected << " .draw(inst_len=1, vert_len=10, vert_first=1, res_id=1)" << std::endl;
expected << " .draw(inst_len=4, vert_len=20, vert_first=2, res_id=2)" << std::endl;
expected << " .draw(inst_len=2, vert_len=30, vert_first=3, res_id=3)" << std::endl;
expected << " .draw(inst_len=5, vert_len=40, vert_first=4, res_id=4)" << std::endl;
expected << " .draw(inst_len=1, vert_len=50, vert_first=5, res_id=5)" << std::endl;
expected << " .draw(inst_len=6, vert_len=60, vert_first=6, res_id=5)" << std::endl;
expected << " .draw(inst_len=3, vert_len=70, vert_first=7, res_id=6)" << std::endl;
EXPECT_EQ(result, expected.str());
DRW_shape_cache_free();
}
DRAW_TEST(draw_pass_simple_draw)
static void test_draw_pass_multi_draw()
{
PassMain pass = {"test.multi_draw"};
pass.init();
pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
/* Each draw procedural type uses a different batch. Groups are drawn in reverse order. */
pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, {1});
pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, {2});
pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, {3});
pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, ResourceHandle(4, true));
pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, {5});
pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, {5});
pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, {6});
std::string result = pass.serialize();
std::stringstream expected;
expected << ".test.multi_draw" << std::endl;
expected << " .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl;
expected << " .draw_multi(3)" << std::endl;
expected << " .group(id=2, len=1)" << std::endl;
expected << " .proto(instance_len=1, resource_id=5, front_face)" << std::endl;
expected << " .group(id=1, len=15)" << std::endl;
expected << " .proto(instance_len=5, resource_id=4, back_face)" << std::endl;
expected << " .proto(instance_len=6, resource_id=5, front_face)" << std::endl;
expected << " .proto(instance_len=4, resource_id=2, front_face)" << std::endl;
expected << " .group(id=0, len=6)" << std::endl;
expected << " .proto(instance_len=3, resource_id=6, front_face)" << std::endl;
expected << " .proto(instance_len=2, resource_id=3, front_face)" << std::endl;
expected << " .proto(instance_len=1, resource_id=1, front_face)" << std::endl;
EXPECT_EQ(result, expected.str());
DRW_shape_cache_free();
}
DRAW_TEST(draw_pass_multi_draw)
static void test_draw_pass_sortable()
{
PassSortable pass = {"test.sortable"};
pass.init();
pass.sub("Sub3", 3.0f);
pass.sub("Sub2", 2.0f);
pass.sub("Sub5", 4.0f);
pass.sub("Sub4", 3.0f);
pass.sub("Sub1", 1.0f);
std::string result = pass.serialize();
std::stringstream expected;
expected << ".test.sortable" << std::endl;
expected << " .Sub1" << std::endl;
expected << " .Sub2" << std::endl;
expected << " .Sub3" << std::endl;
expected << " .Sub4" << std::endl;
expected << " .Sub5" << std::endl;
EXPECT_EQ(result, expected.str());
DRW_shape_cache_free();
}
DRAW_TEST(draw_pass_sortable)
static void test_draw_resource_id_gen()
{
float4x4 win_mat;
orthographic_m4(win_mat.ptr(), -1, 1, -1, 1, -1, 1);
View view("test_view");
view.sync(float4x4::identity(), win_mat);
Manager drw;
float4x4 obmat_1 = float4x4::identity();
float4x4 obmat_2 = float4x4::identity();
obmat_1.apply_scale(-0.5f);
obmat_2.apply_scale(0.5f);
drw.begin_sync();
ResourceHandle handle1 = drw.resource_handle(obmat_1);
ResourceHandle handle2 = drw.resource_handle(obmat_1);
ResourceHandle handle3 = drw.resource_handle(obmat_2);
drw.resource_handle(obmat_2, float3(2), float3(1));
drw.end_sync();
StringRefNull expected = "2 1 1 1 1 3 3 1 1 1 1 1 3 2 2 2 2 2 2 1 1 1 ";
{
/* Computed on CPU. */
PassSimple pass = {"test.resource_id"};
pass.init();
pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, handle2);
pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, handle1);
pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, handle3);
pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, handle1);
pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, handle3);
pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, handle2);
pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, handle1);
Manager::SubmitDebugOutput debug = drw.submit_debug(pass, view);
std::stringstream result;
for (auto val : debug.resource_id) {
result << val << " ";
}
EXPECT_EQ(result.str(), expected);
}
{
/* Same thing with PassMain (computed on GPU) */
PassSimple pass = {"test.resource_id"};
pass.init();
pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, handle2);
pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, handle1);
pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, handle3);
pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, handle1);
pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, handle3);
pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, handle2);
pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, handle1);
Manager::SubmitDebugOutput debug = drw.submit_debug(pass, view);
std::stringstream result;
for (auto val : debug.resource_id) {
result << val << " ";
}
EXPECT_EQ(result.str(), expected);
}
DRW_shape_cache_free();
DRW_shaders_free();
}
DRAW_TEST(draw_resource_id_gen)
static void test_draw_visibility()
{
float4x4 win_mat;
orthographic_m4(win_mat.ptr(), -1, 1, -1, 1, -1, 1);
View view("test_view");
view.sync(float4x4::identity(), win_mat);
Manager drw;
float4x4 obmat_1 = float4x4::identity();
float4x4 obmat_2 = float4x4::identity();
obmat_1.apply_scale(-0.5f);
obmat_2.apply_scale(0.5f);
drw.begin_sync(); /* Default {0} always visible. */
drw.resource_handle(obmat_1); /* No bounds, always visible. */
drw.resource_handle(obmat_1, float3(3), float3(1)); /* Out of view. */
drw.resource_handle(obmat_2, float3(0), float3(1)); /* Inside view. */
drw.end_sync();
PassMain pass = {"test.visibility"};
pass.init();
pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
pass.draw_procedural(GPU_PRIM_TRIS, 1, -1);
Manager::SubmitDebugOutput debug = drw.submit_debug(pass, view);
Vector<uint32_t> expected_visibility = {0};
std::stringstream result;
for (auto val : debug.visibility) {
result << std::bitset<32>(val);
}
EXPECT_EQ(result.str(), "11111111111111111111111111111011");
DRW_shape_cache_free();
DRW_shaders_free();
}
DRAW_TEST(draw_visibility)
static void test_draw_manager_sync()
{
float4x4 obmat_1 = float4x4::identity();
float4x4 obmat_2 = float4x4::identity();
obmat_1.apply_scale(-0.5f);
obmat_2.apply_scale(0.5f);
/* TODO find a way to create a minimum object to test resource handle creation on it. */
Manager drw;
drw.begin_sync();
drw.resource_handle(obmat_1);
drw.resource_handle(obmat_2, float3(2), float3(1));
drw.end_sync();
Manager::DataDebugOutput debug = drw.data_debug();
std::stringstream result;
for (const auto &val : debug.matrices) {
result << val;
}
for (const auto &val : debug.bounds) {
result << val;
}
for (const auto &val : debug.infos) {
result << val;
}
std::stringstream expected;
expected << "ObjectMatrices(" << std::endl;
expected << "model=(" << std::endl;
expected << "( 1.000000, 0.000000, 0.000000, 0.000000)" << std::endl;
expected << "( 0.000000, 1.000000, 0.000000, 0.000000)" << std::endl;
expected << "( 0.000000, 0.000000, 1.000000, 0.000000)" << std::endl;
expected << "( 0.000000, 0.000000, 0.000000, 1.000000)" << std::endl;
expected << ")" << std::endl;
expected << ", " << std::endl;
expected << "model_inverse=(" << std::endl;
expected << "( 1.000000, -0.000000, 0.000000, -0.000000)" << std::endl;
expected << "( -0.000000, 1.000000, -0.000000, 0.000000)" << std::endl;
expected << "( 0.000000, -0.000000, 1.000000, -0.000000)" << std::endl;
expected << "( -0.000000, 0.000000, -0.000000, 1.000000)" << std::endl;
expected << ")" << std::endl;
expected << ")" << std::endl;
expected << "ObjectMatrices(" << std::endl;
expected << "model=(" << std::endl;
expected << "( -0.500000, -0.000000, -0.000000, 0.000000)" << std::endl;
expected << "( -0.000000, -0.500000, -0.000000, 0.000000)" << std::endl;
expected << "( -0.000000, -0.000000, -0.500000, 0.000000)" << std::endl;
expected << "( 0.000000, 0.000000, 0.000000, 1.000000)" << std::endl;
expected << ")" << std::endl;
expected << ", " << std::endl;
expected << "model_inverse=(" << std::endl;
expected << "( -2.000000, 0.000000, -0.000000, -0.000000)" << std::endl;
expected << "( 0.000000, -2.000000, 0.000000, 0.000000)" << std::endl;
expected << "( -0.000000, 0.000000, -2.000000, 0.000000)" << std::endl;
expected << "( -0.000000, -0.000000, 0.000000, 1.000000)" << std::endl;
expected << ")" << std::endl;
expected << ")" << std::endl;
expected << "ObjectMatrices(" << std::endl;
expected << "model=(" << std::endl;
expected << "( 0.500000, 0.000000, 0.000000, 0.000000)" << std::endl;
expected << "( 0.000000, 0.500000, 0.000000, 0.000000)" << std::endl;
expected << "( 0.000000, 0.000000, 0.500000, 0.000000)" << std::endl;
expected << "( 0.000000, 0.000000, 0.000000, 1.000000)" << std::endl;
expected << ")" << std::endl;
expected << ", " << std::endl;
expected << "model_inverse=(" << std::endl;
expected << "( 2.000000, -0.000000, 0.000000, -0.000000)" << std::endl;
expected << "( -0.000000, 2.000000, -0.000000, 0.000000)" << std::endl;
expected << "( 0.000000, -0.000000, 2.000000, -0.000000)" << std::endl;
expected << "( -0.000000, 0.000000, -0.000000, 1.000000)" << std::endl;
expected << ")" << std::endl;
expected << ")" << std::endl;
expected << "ObjectBounds(skipped)" << std::endl;
expected << "ObjectBounds(skipped)" << std::endl;
expected << "ObjectBounds(" << std::endl;
expected << ".bounding_corners[0](0.5, 0.5, 0.5)" << std::endl;
expected << ".bounding_corners[1](1, 0, 0)" << std::endl;
expected << ".bounding_corners[2](0, 1, 0)" << std::endl;
expected << ".bounding_corners[3](0, 0, 1)" << std::endl;
expected << ".sphere=(pos=(1, 1, 1), rad=0.866025" << std::endl;
expected << ")" << std::endl;
expected << "ObjectInfos(skipped)" << std::endl;
expected << "ObjectInfos(skipped)" << std::endl;
expected << "ObjectInfos(skipped)" << std::endl;
EXPECT_EQ(result.str(), expected.str());
DRW_shaders_free();
}
DRAW_TEST(draw_manager_sync)
} // namespace blender::draw

View File

@@ -27,6 +27,7 @@ set(INC
# For *_info.hh includes.
../draw/engines/eevee_next
../draw/intern
# For node muting stuff.
../nodes

View File

@@ -164,6 +164,13 @@ void GPU_batch_program_set_builtin_with_config(GPUBatch *batch,
#define GPU_batch_texture_bind(batch, name, tex) \
GPU_texture_bind(tex, GPU_shader_get_texture_binding((batch)->shader, name));
/**
* Return indirect draw call parameters for this batch.
* NOTE: r_base_index is set to -1 if not using an index buffer.
*/
void GPU_batch_draw_parameter_get(
GPUBatch *batch, int *r_v_count, int *r_v_first, int *r_base_index, int *r_i_count);
void GPU_batch_draw(GPUBatch *batch);
void GPU_batch_draw_range(GPUBatch *batch, int v_first, int v_count);
/**
@@ -180,7 +187,9 @@ void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_fi
* Issue a draw call using GPU computed arguments. The argument are expected to be valid for the
* type of geometry drawn (index or non-indexed).
*/
void GPU_batch_draw_indirect(GPUBatch *batch, GPUStorageBuf *indirect_buf);
void GPU_batch_draw_indirect(GPUBatch *batch, GPUStorageBuf *indirect_buf, intptr_t offset);
void GPU_batch_multi_draw_indirect(
GPUBatch *batch, GPUStorageBuf *indirect_buf, int count, intptr_t offset, intptr_t stride);
#if 0 /* future plans */

View File

@@ -228,6 +228,7 @@ void GPU_materials_free(struct Main *bmain);
struct Scene *GPU_material_scene(GPUMaterial *material);
struct GPUPass *GPU_material_get_pass(GPUMaterial *material);
struct GPUShader *GPU_material_get_shader(GPUMaterial *material);
const char *GPU_material_get_name(GPUMaterial *material);
/**
* Return can be NULL if it's a world material.
*/

View File

@@ -47,6 +47,13 @@ void GPU_storagebuf_clear(GPUStorageBuf *ssbo,
void *data);
void GPU_storagebuf_clear_to_zero(GPUStorageBuf *ssbo);
/**
* Read back content of the buffer to CPU for inspection.
* Slow! Only use for inspection / debugging.
* NOTE: Not synchronized. Use appropriate barrier before reading.
*/
void GPU_storagebuf_read(GPUStorageBuf *ssbo, void *data);
/**
* \brief Copy a part of a vertex buffer to a storage buffer.
*

View File

@@ -49,7 +49,12 @@ typedef enum eGPUSamplerState {
* #GPU_SAMPLER_MAX is not a valid enum value, but only a limit.
* It also creates a bad mask for the `NOT` operator in #ENUM_OPERATORS.
*/
#ifdef __cplusplus
static constexpr eGPUSamplerState GPU_SAMPLER_MAX = eGPUSamplerState(GPU_SAMPLER_ICON + 1);
#else
static const int GPU_SAMPLER_MAX = (GPU_SAMPLER_ICON + 1);
#endif
ENUM_OPERATORS(eGPUSamplerState, GPU_SAMPLER_ICON)
#ifdef __cplusplus

View File

@@ -220,6 +220,30 @@ void GPU_batch_set_shader(GPUBatch *batch, GPUShader *shader)
/** \name Drawing / Drawcall functions
* \{ */
void GPU_batch_draw_parameter_get(
GPUBatch *gpu_batch, int *r_v_count, int *r_v_first, int *r_base_index, int *r_i_count)
{
Batch *batch = static_cast<Batch *>(gpu_batch);
if (batch->elem) {
*r_v_count = batch->elem_()->index_len_get();
*r_v_first = batch->elem_()->index_start_get();
*r_base_index = batch->elem_()->index_base_get();
}
else {
*r_v_count = batch->verts_(0)->vertex_len;
*r_v_first = 0;
*r_base_index = -1;
}
int i_count = (batch->inst[0]) ? batch->inst_(0)->vertex_len : 1;
/* Meh. This is to be able to use different numbers of verts in instance VBO's. */
if (batch->inst[1] != nullptr) {
i_count = min_ii(i_count, batch->inst_(1)->vertex_len);
}
*r_i_count = i_count;
}
void GPU_batch_draw(GPUBatch *batch)
{
GPU_shader_bind(batch->shader);
@@ -270,13 +294,23 @@ void GPU_batch_draw_advanced(
batch->draw(v_first, v_count, i_first, i_count);
}
void GPU_batch_draw_indirect(GPUBatch *gpu_batch, GPUStorageBuf *indirect_buf)
void GPU_batch_draw_indirect(GPUBatch *gpu_batch, GPUStorageBuf *indirect_buf, intptr_t offset)
{
BLI_assert(Context::get()->shader != nullptr);
BLI_assert(indirect_buf != nullptr);
Batch *batch = static_cast<Batch *>(gpu_batch);
batch->draw_indirect(indirect_buf);
batch->draw_indirect(indirect_buf, offset);
}
void GPU_batch_multi_draw_indirect(
GPUBatch *gpu_batch, GPUStorageBuf *indirect_buf, int count, intptr_t offset, intptr_t stride)
{
BLI_assert(Context::get()->shader != nullptr);
BLI_assert(indirect_buf != nullptr);
Batch *batch = static_cast<Batch *>(gpu_batch);
batch->multi_draw_indirect(indirect_buf, count, offset, stride);
}
/** \} */

View File

@@ -29,7 +29,11 @@ class Batch : public GPUBatch {
virtual ~Batch() = default;
virtual void draw(int v_first, int v_count, int i_first, int i_count) = 0;
virtual void draw_indirect(GPUStorageBuf *indirect_buf) = 0;
virtual void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) = 0;
virtual void multi_draw_indirect(GPUStorageBuf *indirect_buf,
int count,
intptr_t offset,
intptr_t stride) = 0;
/* Convenience casts. */
IndexBuf *elem_() const

View File

@@ -199,8 +199,7 @@ static std::ostream &operator<<(std::ostream &stream, const GPUOutput *output)
}
/* Trick type to change overload and keep a somewhat nice syntax. */
struct GPUConstant : public GPUInput {
};
struct GPUConstant : public GPUInput {};
/* Print data constructor (i.e: vec2(1.0f, 1.0f)). */
static std::ostream &operator<<(std::ostream &stream, const GPUConstant *input)
@@ -355,21 +354,22 @@ void GPUCodegen::generate_resources()
std::stringstream ss;
/* Textures. */
int slot = 0;
LISTBASE_FOREACH (GPUMaterialTexture *, tex, &graph.textures) {
if (tex->colorband) {
const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
info.sampler(0, ImageType::FLOAT_1D_ARRAY, name, Frequency::BATCH);
info.sampler(slot++, ImageType::FLOAT_1D_ARRAY, name, Frequency::BATCH);
}
else if (tex->tiled_mapping_name[0] != '\0') {
const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
info.sampler(0, ImageType::FLOAT_2D_ARRAY, name, Frequency::BATCH);
info.sampler(slot++, ImageType::FLOAT_2D_ARRAY, name, Frequency::BATCH);
const char *name_mapping = info.name_buffer.append_sampler_name(tex->tiled_mapping_name);
info.sampler(0, ImageType::FLOAT_1D_ARRAY, name_mapping, Frequency::BATCH);
info.sampler(slot++, ImageType::FLOAT_1D_ARRAY, name_mapping, Frequency::BATCH);
}
else {
const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
info.sampler(0, ImageType::FLOAT_2D, name, Frequency::BATCH);
info.sampler(slot++, ImageType::FLOAT_2D, name, Frequency::BATCH);
}
}
@@ -382,7 +382,7 @@ void GPUCodegen::generate_resources()
}
ss << "};\n\n";
info.uniform_buf(0, "NodeTree", GPU_UBO_BLOCK_NAME, Frequency::BATCH);
info.uniform_buf(1, "NodeTree", GPU_UBO_BLOCK_NAME, Frequency::BATCH);
}
if (!BLI_listbase_is_empty(&graph.uniform_attrs.list)) {
@@ -394,7 +394,7 @@ void GPUCodegen::generate_resources()
/* TODO(fclem): Use the macro for length. Currently not working for EEVEE. */
/* DRW_RESOURCE_CHUNK_LEN = 512 */
info.uniform_buf(0, "UniformAttrs", GPU_ATTRIBUTE_UBO_BLOCK_NAME "[512]", Frequency::BATCH);
info.uniform_buf(2, "UniformAttrs", GPU_ATTRIBUTE_UBO_BLOCK_NAME "[512]", Frequency::BATCH);
}
info.typedef_source_generated = ss.str();

View File

@@ -70,6 +70,14 @@ class IndexBuf {
* They can lead to graphical glitches on some systems. (See T96892) */
return is_empty_ ? 0 : index_len_;
}
uint32_t index_start_get() const
{
return index_start_;
}
uint32_t index_base_get() const
{
return index_base_;
}
/* Return size in byte of the drawable data buffer range. Actual buffer size might be bigger. */
size_t size_get() const
{

View File

@@ -91,6 +91,8 @@ struct GPUMaterial {
#ifndef NDEBUG
char name[64];
#else
char name[16];
#endif
};
@@ -193,6 +195,11 @@ GPUShader *GPU_material_get_shader(GPUMaterial *material)
return material->pass ? GPU_pass_shader_get(material->pass) : NULL;
}
const char *GPU_material_get_name(GPUMaterial *material)
{
return material->name;
}
Material *GPU_material_get_material(GPUMaterial *material)
{
return material->ma;
@@ -205,12 +212,7 @@ GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material)
void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs)
{
#ifndef NDEBUG
const char *name = material->name;
#else
const char *name = "Material";
#endif
material->ubo = GPU_uniformbuf_create_from_list(inputs, name);
material->ubo = GPU_uniformbuf_create_from_list(inputs, material->name);
}
ListBase GPU_material_attributes(GPUMaterial *material)
@@ -672,11 +674,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
mat->graph.used_libraries = BLI_gset_new(
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
mat->refcount = 1;
#ifndef NDEBUG
STRNCPY(mat->name, name);
#else
UNUSED_VARS(name);
#endif
if (is_lookdev) {
mat->flag |= GPU_MATFLAG_LOOKDEV_HACK;
}

View File

@@ -109,4 +109,9 @@ void GPU_storagebuf_copy_sub_from_vertbuf(
unwrap(ssbo)->copy_sub(unwrap(src), dst_offset, src_offset, copy_size);
}
void GPU_storagebuf_read(GPUStorageBuf *ssbo, void *data)
{
unwrap(ssbo)->read(data);
}
/** \} */

View File

@@ -44,6 +44,7 @@ class StorageBuf {
eGPUDataFormat data_format,
void *data) = 0;
virtual void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) = 0;
virtual void read(void *data) = 0;
};
/* Syntactic sugar. */

View File

@@ -327,12 +327,13 @@ void GLBatch::draw(int v_first, int v_count, int i_first, int i_count)
}
}
void GLBatch::draw_indirect(GPUStorageBuf *indirect_buf)
void GLBatch::draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset)
{
GL_CHECK_RESOURCES("Batch");
this->bind(0);
/* TODO(fclem): Make the barrier and binding optional if consecutive draws are issued. */
dynamic_cast<GLStorageBuf *>(unwrap(indirect_buf))->bind_as(GL_DRAW_INDIRECT_BUFFER);
/* This barrier needs to be here as it only work on the currently bound indirect buffer. */
glMemoryBarrier(GL_COMMAND_BARRIER_BIT);
@@ -341,10 +342,37 @@ void GLBatch::draw_indirect(GPUStorageBuf *indirect_buf)
if (elem) {
const GLIndexBuf *el = this->elem_();
GLenum index_type = to_gl(el->index_type_);
glDrawElementsIndirect(gl_type, index_type, (GLvoid *)nullptr);
glDrawElementsIndirect(gl_type, index_type, (GLvoid *)offset);
}
else {
glDrawArraysIndirect(gl_type, (GLvoid *)nullptr);
glDrawArraysIndirect(gl_type, (GLvoid *)offset);
}
/* Unbind. */
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);
}
void GLBatch::multi_draw_indirect(GPUStorageBuf *indirect_buf,
int count,
intptr_t offset,
intptr_t stride)
{
GL_CHECK_RESOURCES("Batch");
this->bind(0);
/* TODO(fclem): Make the barrier and binding optional if consecutive draws are issued. */
dynamic_cast<GLStorageBuf *>(unwrap(indirect_buf))->bind_as(GL_DRAW_INDIRECT_BUFFER);
/* This barrier needs to be here as it only work on the currently bound indirect buffer. */
glMemoryBarrier(GL_COMMAND_BARRIER_BIT);
GLenum gl_type = to_gl(prim_type);
if (elem) {
const GLIndexBuf *el = this->elem_();
GLenum index_type = to_gl(el->index_type_);
glMultiDrawElementsIndirect(gl_type, index_type, (GLvoid *)offset, count, stride);
}
else {
glMultiDrawArraysIndirect(gl_type, (GLvoid *)offset, count, stride);
}
/* Unbind. */
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);

View File

@@ -91,7 +91,11 @@ class GLBatch : public Batch {
public:
void draw(int v_first, int v_count, int i_first, int i_count) override;
void draw_indirect(GPUStorageBuf *indirect_buf) override;
void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override;
void multi_draw_indirect(GPUStorageBuf *indirect_buf,
int count,
intptr_t offset,
intptr_t stride) override;
void bind(int i_first);
/* Convenience getters. */

View File

@@ -166,6 +166,23 @@ void GLStorageBuf::copy_sub(VertBuf *src_, uint dst_offset, uint src_offset, uin
}
}
void GLStorageBuf::read(void *data)
{
if (ssbo_id_ == 0) {
this->init();
}
if (GLContext::direct_state_access_support) {
glGetNamedBufferSubData(ssbo_id_, 0, size_in_bytes_, data);
}
else {
/* This binds the buffer to GL_ARRAY_BUFFER and upload the data if any. */
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo_id_);
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, size_in_bytes_, data);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
}
}
/** \} */
} // namespace blender::gpu

View File

@@ -35,6 +35,7 @@ class GLStorageBuf : public StorageBuf {
void unbind() override;
void clear(eGPUTextureFormat internal_format, eGPUDataFormat data_format, void *data) override;
void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) override;
void read(void *data) override;
/* Special internal function to bind SSBOs to indirect argument targets. */
void bind_as(GLenum target);

View File

@@ -640,8 +640,8 @@ typedef struct UserDef_Experimental {
char use_cycles_debug;
char show_asset_debug_info;
char no_asset_indexing;
char use_viewport_debug;
char SANITIZE_AFTER_HERE;
char _pad0;
/* The following options are automatically sanitized (set to 0)
* when the release cycle is not alpha. */
char use_new_curves_tools;

View File

@@ -296,7 +296,9 @@ typedef struct View3D {
char _pad6[2];
int layact DNA_DEPRECATED;
unsigned short local_collections_uuid;
short _pad7[3];
short _pad7[2];
short debug_flag;
/** Optional bool for 3d cursor to define center. */
short ob_center_cursor;
@@ -489,6 +491,11 @@ enum {
V3D_SHADING_COMPOSITOR = (1 << 15),
};
/** #View3D.debug_flag */
enum {
V3D_DEBUG_FREEZE_CULLING = (1 << 0),
};
#define V3D_USES_SCENE_LIGHTS(v3d) \
((((v3d)->shading.type == OB_MATERIAL) && ((v3d)->shading.flag & V3D_SHADING_SCENE_LIGHTS)) || \
(((v3d)->shading.type == OB_RENDER) && \

View File

@@ -4735,6 +4735,13 @@ static void rna_def_space_view3d_overlay(BlenderRNA *brna)
RNA_def_property_range(prop, 0.0f, 1.0f);
RNA_def_property_ui_text(prop, "Opacity", "Vertex Paint mix factor");
RNA_def_property_update(prop, NC_SPACE | ND_SPACE_VIEW3D, "rna_GPencil_update");
/* Developper Debug overlay */
prop = RNA_def_property(srna, "use_debug_freeze_view_culling", PROP_BOOLEAN, PROP_NONE);
RNA_def_property_boolean_sdna(prop, NULL, "debug_flag", V3D_DEBUG_FREEZE_CULLING);
RNA_def_property_ui_text(prop, "Freeze Culling", "Freeze view culling bounds");
RNA_def_property_update(prop, NC_SPACE | ND_SPACE_VIEW3D, NULL);
}
static void rna_def_space_view3d(BlenderRNA *brna)

View File

@@ -6371,6 +6371,14 @@ static void rna_def_userdef_experimental(BlenderRNA *brna)
prop = RNA_def_property(srna, "enable_eevee_next", PROP_BOOLEAN, PROP_NONE);
RNA_def_property_boolean_sdna(prop, NULL, "enable_eevee_next", 1);
RNA_def_property_ui_text(prop, "EEVEE Next", "Enable the new EEVEE codebase, requires restart");
prop = RNA_def_property(srna, "use_viewport_debug", PROP_BOOLEAN, PROP_NONE);
RNA_def_property_boolean_sdna(prop, NULL, "use_viewport_debug", 1);
RNA_def_property_ui_text(prop,
"Viewport Debug",
"Enable viewport debugging options for developpers in the overlays "
"pop-over");
RNA_def_property_update(prop, 0, "rna_userdef_ui_update");
}
static void rna_def_userdef_addon_collection(BlenderRNA *brna, PropertyRNA *cprop)