BMesh: Optimize copying attributes from many elements at once #115824

Merged
Hans Goudey merged 9 commits from HooglyBoogly/blender:bmesh-cd-copy-performance-fix into main 2023-12-09 05:37:47 +01:00
31 changed files with 543 additions and 164 deletions
Showing only changes of commit ed3170c1ba - Show all commits

View File

@ -674,7 +674,6 @@ if(NOT OpenImageIO_FOUND)
set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
set(OPENIMAGEIO_IDIFF "${OPENIMAGEIO}/bin/idiff.exe")
endif()
add_definitions(-DOIIO_NO_SSE=1)
if(WITH_LLVM)
set(LLVM_ROOT_DIR ${LIBDIR}/llvm CACHE PATH "Path to the LLVM installation")

View File

@ -37,7 +37,7 @@ void SplitOperation::execute_pixel_sampled(float output[4],
{
int perc = x_split_ ? split_percentage_ * this->get_width() / 100.0f :
split_percentage_ * this->get_height() / 100.0f;
bool image1 = x_split_ ? x > perc : y > perc;
bool image1 = x_split_ ? x >= perc : y >= perc;
if (image1) {
image1Input_->read_sampled(output, x, y, PixelSampler::Nearest);
}
@ -64,7 +64,7 @@ void SplitOperation::update_memory_buffer_partial(MemoryBuffer *output,
split_percentage_ * this->get_height() / 100.0f;
const size_t elem_bytes = COM_data_type_bytes_len(get_output_socket()->get_data_type());
for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
const bool is_image1 = x_split_ ? it.x > percent : it.y > percent;
const bool is_image1 = x_split_ ? it.x >= percent : it.y >= percent;
memcpy(it.out, it.in(is_image1 ? 0 : 1), elem_bytes);
}
}

View File

@ -41,7 +41,8 @@ using namespace nodes::derived_node_tree_types;
ShaderOperation::ShaderOperation(Context &context, ShaderCompileUnit &compile_unit)
: Operation(context), compile_unit_(compile_unit)
{
material_ = GPU_material_from_callbacks(&construct_material, &generate_code, this);
material_ = GPU_material_from_callbacks(
GPU_MAT_COMPOSITOR, &construct_material, &generate_code, this);
GPU_material_status_set(material_, GPU_MAT_QUEUED);
GPU_material_compile(material_);
}

View File

@ -9,9 +9,9 @@ void main()
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
ivec2 output_size = imageSize(output_img);
#if defined(SPLIT_HORIZONTAL)
bool condition = (output_size.x * split_ratio) < texel.x;
bool condition = (output_size.x * split_ratio) <= texel.x;
#elif defined(SPLIT_VERTICAL)
bool condition = (output_size.y * split_ratio) < texel.y;
bool condition = (output_size.y * split_ratio) <= texel.y;
#endif
vec4 color = condition ? texture_load(first_image_tx, texel) :
texture_load(second_image_tx, texel);

View File

@ -477,6 +477,10 @@ set(GLSL_SRC
engines/eevee_next/shaders/eevee_deferred_capture_frag.glsl
engines/eevee_next/shaders/eevee_deferred_combine_frag.glsl
engines/eevee_next/shaders/eevee_deferred_planar_frag.glsl
engines/eevee_next/shaders/eevee_deferred_tile_classify_frag.glsl
engines/eevee_next/shaders/eevee_deferred_tile_compact_vert.glsl
engines/eevee_next/shaders/eevee_deferred_tile_stencil_frag.glsl
engines/eevee_next/shaders/eevee_deferred_tile_stencil_vert.glsl
engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl
engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl
engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl

View File

@ -1384,11 +1384,13 @@ static GPUMaterial *eevee_material_get_ex(
if (ma) {
bNodeTree *ntree = !is_default ? ma->nodetree : EEVEE_shader_default_surface_nodetree(ma);
mat = DRW_shader_from_material(ma, ntree, options, is_volume, deferred, cbfn, nullptr);
mat = DRW_shader_from_material(
ma, ntree, GPU_MAT_EEVEE_LEGACY, options, is_volume, deferred, cbfn, nullptr);
}
else {
bNodeTree *ntree = !is_default ? wo->nodetree : EEVEE_shader_default_world_nodetree(wo);
mat = DRW_shader_from_world(wo, ntree, options, is_volume, deferred, cbfn, nullptr);
mat = DRW_shader_from_world(
wo, ntree, GPU_MAT_EEVEE_LEGACY, options, is_volume, deferred, cbfn, nullptr);
}
return mat;
}

View File

@ -98,6 +98,10 @@
#define SHADOW_MAX_RAY 4
#define SHADOW_ROG_ID 0
/* Deferred Lighting. */
#define DEFERRED_RADIANCE_FORMAT GPU_R11F_G11F_B10F
#define DEFERRED_GBUFFER_ROG_ID 0
/* Ray-tracing. */
#define RAYTRACE_GROUP_SIZE 8
/* Keep this as a define to avoid shader variations. */

View File

@ -238,7 +238,7 @@ float Light::point_radiance_get(const ::Light *la)
void Light::debug_draw()
{
#ifndef NDEBUG
drw_debug_sphere(_position, influence_radius_max, float4(0.8f, 0.3f, 0.0f, 1.0f));
drw_debug_sphere(float3(_position), influence_radius_max, float4(0.8f, 0.3f, 0.0f, 1.0f));
#endif
}

View File

@ -445,27 +445,27 @@ void DeferredLayer::begin_sync()
}
{
gbuffer_ps_.init();
gbuffer_ps_.subpass_transition(GPU_ATTACHEMENT_WRITE,
{GPU_ATTACHEMENT_WRITE,
GPU_ATTACHEMENT_WRITE,
GPU_ATTACHEMENT_WRITE,
GPU_ATTACHEMENT_WRITE});
/* G-buffer. */
gbuffer_ps_.bind_image(GBUF_CLOSURE_SLOT, &inst_.gbuffer.closure_img_tx);
gbuffer_ps_.bind_image(GBUF_COLOR_SLOT, &inst_.gbuffer.color_img_tx);
/* RenderPasses & AOVs. */
gbuffer_ps_.bind_image(RBUFS_COLOR_SLOT, &inst_.render_buffers.rp_color_tx);
gbuffer_ps_.bind_image(RBUFS_VALUE_SLOT, &inst_.render_buffers.rp_value_tx);
/* Cryptomatte. */
gbuffer_ps_.bind_image(RBUFS_CRYPTOMATTE_SLOT, &inst_.render_buffers.cryptomatte_tx);
/* Storage Buffer. */
/* Textures. */
gbuffer_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
{
/* Common resources. */
/* G-buffer. */
gbuffer_ps_.bind_image(GBUF_CLOSURE_SLOT, &inst_.gbuffer.closure_img_tx);
gbuffer_ps_.bind_image(GBUF_COLOR_SLOT, &inst_.gbuffer.color_img_tx);
/* RenderPasses & AOVs. */
gbuffer_ps_.bind_image(RBUFS_COLOR_SLOT, &inst_.render_buffers.rp_color_tx);
gbuffer_ps_.bind_image(RBUFS_VALUE_SLOT, &inst_.render_buffers.rp_value_tx);
/* Cryptomatte. */
gbuffer_ps_.bind_image(RBUFS_CRYPTOMATTE_SLOT, &inst_.render_buffers.cryptomatte_tx);
/* Storage Buffer. */
/* Textures. */
gbuffer_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
inst_.bind_uniform_data(&gbuffer_ps_);
inst_.sampling.bind_resources(gbuffer_ps_);
inst_.hiz_buffer.bind_resources(gbuffer_ps_);
inst_.cryptomatte.bind_resources(gbuffer_ps_);
}
inst_.bind_uniform_data(&gbuffer_ps_);
inst_.sampling.bind_resources(gbuffer_ps_);
inst_.hiz_buffer.bind_resources(gbuffer_ps_);
inst_.cryptomatte.bind_resources(gbuffer_ps_);
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL;
@ -483,27 +483,85 @@ void DeferredLayer::end_sync()
{
eClosureBits evaluated_closures = CLOSURE_DIFFUSE | CLOSURE_REFLECTION | CLOSURE_REFRACTION;
if (closure_bits_ & evaluated_closures) {
/* First add the tile classification step at the end of the GBuffer pass. */
{
/* Fill tile mask texture with the collected closure present in a tile. */
PassMain::Sub &sub = gbuffer_ps_.sub("TileClassify");
sub.subpass_transition(GPU_ATTACHEMENT_WRITE, /* Needed for depth test. */
{GPU_ATTACHEMENT_IGNORE,
GPU_ATTACHEMENT_READ, /* Header. */
GPU_ATTACHEMENT_IGNORE,
GPU_ATTACHEMENT_IGNORE});
/* Use depth test to reject background pixels. */
/* WORKAROUND: Avoid rasterizer discard, but the shaders actually use no fragment output. */
sub.state_set(DRW_STATE_WRITE_STENCIL | DRW_STATE_DEPTH_GREATER);
sub.shader_set(inst_.shaders.static_shader_get(DEFERRED_TILE_CLASSIFY));
sub.bind_image("tile_mask_img", &tile_mask_tx_);
sub.push_constant("closure_tile_size_shift", &closure_tile_size_shift_);
sub.barrier(GPU_BARRIER_TEXTURE_FETCH);
sub.draw_procedural(GPU_PRIM_TRIS, 1, 3);
}
{
PassMain::Sub &sub = gbuffer_ps_.sub("TileCompaction");
/* Use rasterizer discard. This processes the tile data to create tile command lists. */
sub.state_set(DRW_STATE_NO_DRAW);
sub.shader_set(inst_.shaders.static_shader_get(DEFERRED_TILE_COMPACT));
sub.bind_texture("tile_mask_tx", &tile_mask_tx_);
sub.bind_ssbo("closure_single_tile_buf", &closure_bufs_[0].tile_buf_);
sub.bind_ssbo("closure_single_draw_buf", &closure_bufs_[0].draw_buf_);
sub.bind_ssbo("closure_double_tile_buf", &closure_bufs_[1].tile_buf_);
sub.bind_ssbo("closure_double_draw_buf", &closure_bufs_[1].draw_buf_);
sub.bind_ssbo("closure_triple_tile_buf", &closure_bufs_[2].tile_buf_);
sub.bind_ssbo("closure_triple_draw_buf", &closure_bufs_[2].draw_buf_);
sub.barrier(GPU_BARRIER_TEXTURE_FETCH);
sub.draw_procedural(GPU_PRIM_POINTS, 1, max_lighting_tile_count_);
}
{
PassSimple &pass = eval_light_ps_;
pass.init();
/* Use depth test to reject background pixels. */
/* WORKAROUND: Avoid rasterizer discard, but the shaders actually use no fragment output. */
pass.state_set(DRW_STATE_WRITE_STENCIL | DRW_STATE_DEPTH_GREATER);
pass.shader_set(inst_.shaders.static_shader_get(DEFERRED_LIGHT));
pass.bind_image("direct_diffuse_img", &direct_diffuse_tx_);
pass.bind_image("direct_reflect_img", &direct_reflect_tx_);
pass.bind_image("direct_refract_img", &direct_refract_tx_);
pass.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
pass.bind_image(RBUFS_COLOR_SLOT, &inst_.render_buffers.rp_color_tx);
pass.bind_image(RBUFS_VALUE_SLOT, &inst_.render_buffers.rp_value_tx);
inst_.bind_uniform_data(&pass);
inst_.gbuffer.bind_resources(pass);
inst_.lights.bind_resources(pass);
inst_.shadows.bind_resources(pass);
inst_.sampling.bind_resources(pass);
inst_.hiz_buffer.bind_resources(pass);
pass.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_IMAGE_ACCESS);
pass.draw_procedural(GPU_PRIM_TRIS, 1, 3);
{
PassSimple::Sub &sub = pass.sub("StencilSet");
sub.state_set(DRW_STATE_WRITE_STENCIL | DRW_STATE_STENCIL_ALWAYS |
DRW_STATE_DEPTH_GREATER);
sub.shader_set(inst_.shaders.static_shader_get(DEFERRED_TILE_STENCIL));
sub.push_constant("closure_tile_size_shift", &closure_tile_size_shift_);
sub.bind_texture("direct_radiance_tx", &direct_radiance_txs_[0]);
/* Set stencil value for each tile complexity level. */
for (int i = 0; i < ARRAY_SIZE(closure_bufs_); i++) {
sub.bind_ssbo("closure_tile_buf", &closure_bufs_[i].tile_buf_);
sub.state_stencil(0xFFu, 1u << i, 0xFFu);
sub.draw_procedural_indirect(GPU_PRIM_TRIS, closure_bufs_[i].draw_buf_);
}
}
{
PassSimple::Sub &sub = pass.sub("Eval");
/* Use depth test to reject background pixels which have not been stencil cleared. */
/* WORKAROUND: Avoid rasterizer discard by enabling stencil write, but the shaders actually
* use no fragment output. */
sub.state_set(DRW_STATE_WRITE_STENCIL | DRW_STATE_STENCIL_EQUAL | DRW_STATE_DEPTH_GREATER);
sub.barrier(GPU_BARRIER_SHADER_STORAGE);
sub.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
sub.bind_image(RBUFS_COLOR_SLOT, &inst_.render_buffers.rp_color_tx);
sub.bind_image(RBUFS_VALUE_SLOT, &inst_.render_buffers.rp_value_tx);
/* Submit the more costly ones first to avoid long tail in occupancy.
* See page 78 of "Siggraph 2023: Unreal Engine Substrate" by Hillaire & de Rousiers. */
for (int i = ARRAY_SIZE(closure_bufs_) - 1; i >= 0; i--) {
sub.shader_set(inst_.shaders.static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i)));
sub.bind_image("direct_radiance_1_img", &direct_radiance_txs_[0]);
sub.bind_image("direct_radiance_2_img", &direct_radiance_txs_[1]);
sub.bind_image("direct_radiance_3_img", &direct_radiance_txs_[2]);
inst_.bind_uniform_data(&sub);
inst_.gbuffer.bind_resources(sub);
inst_.lights.bind_resources(sub);
inst_.shadows.bind_resources(sub);
inst_.sampling.bind_resources(sub);
inst_.hiz_buffer.bind_resources(sub);
sub.state_stencil(0xFFu, 1u << i, 0xFFu);
sub.draw_procedural(GPU_PRIM_TRIS, 1, 3);
}
}
}
{
PassSimple &pass = combine_ps_;
@ -511,9 +569,9 @@ void DeferredLayer::end_sync()
/* Use depth test to reject background pixels. */
pass.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_GREATER | DRW_STATE_BLEND_ADD_FULL);
pass.shader_set(inst_.shaders.static_shader_get(DEFERRED_COMBINE));
pass.bind_image("direct_diffuse_img", &direct_diffuse_tx_);
pass.bind_image("direct_reflect_img", &direct_reflect_tx_);
pass.bind_image("direct_refract_img", &direct_refract_tx_);
pass.bind_image("direct_radiance_1_img", &direct_radiance_txs_[0]);
pass.bind_image("direct_radiance_2_img", &direct_radiance_txs_[1]);
pass.bind_image("direct_radiance_3_img", &direct_radiance_txs_[2]);
pass.bind_image("indirect_diffuse_img", &indirect_diffuse_tx_);
pass.bind_image("indirect_reflect_img", &indirect_reflect_tx_);
pass.bind_image("indirect_refract_img", &indirect_refract_tx_);
@ -566,6 +624,7 @@ void DeferredLayer::render(View &main_view,
* environment. So in this case, disable tracing and fallback to probe. */
bool do_screen_space_refraction = !is_first_pass && (closure_bits_ & CLOSURE_REFRACTION);
bool do_screen_space_reflection = (closure_bits_ & CLOSURE_REFLECTION);
eGPUTextureUsage usage_rw = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE;
if (do_screen_space_reflection) {
/* TODO(fclem): Verify if GPU_TEXTURE_USAGE_ATTACHMENT is needed for the copy and the clear. */
@ -609,14 +668,33 @@ void DeferredLayer::render(View &main_view,
}
}
if (/* FIXME(fclem): Metal doesn't clear the whole framebuffer correctly. */
GPU_backend_get_type() == GPU_BACKEND_METAL ||
/* FIXME(fclem): Vulkan doesn't implement load / store config yet. */
if (/* FIXME(fclem): Vulkan doesn't implement load / store config yet. */
GPU_backend_get_type() == GPU_BACKEND_VULKAN)
{
inst_.gbuffer.header_tx.clear(int4(0));
}
int2 tile_mask_size;
int tile_count;
closure_tile_size_shift_ = 4;
/* Increase tile size until they fit the budget. */
for (int i = 0; i < 4; i++, closure_tile_size_shift_++) {
tile_mask_size = math::divide_ceil(extent, int2(1u << closure_tile_size_shift_));
tile_count = tile_mask_size.x * tile_mask_size.y;
if (tile_count <= max_lighting_tile_count_) {
break;
}
}
int target_count = power_of_2_max_u(tile_count);
for (int i = 0; i < ARRAY_SIZE(closure_bufs_); i++) {
closure_bufs_[i].tile_buf_.resize(target_count);
closure_bufs_[i].draw_buf_.clear_to_zero();
}
tile_mask_tx_.ensure_2d_array(GPU_R8UI, tile_mask_size, 4, usage_rw);
tile_mask_tx_.clear(uint4(0));
GPU_framebuffer_bind_ex(gbuffer_fb,
{
{GPU_LOADACTION_LOAD, GPU_STOREACTION_STORE}, /* Depth */
@ -646,11 +724,10 @@ void DeferredLayer::render(View &main_view,
inst_.shadows.set_view(render_view);
{
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE;
direct_diffuse_tx_.acquire(extent, GPU_RGBA16F, usage);
direct_reflect_tx_.acquire(extent, GPU_RGBA16F, usage);
direct_refract_tx_.acquire(extent, GPU_RGBA16F, usage);
int closure_count = count_bits_i(closure_bits_ & (CLOSURE_REFLECTION | CLOSURE_DIFFUSE));
for (int i = 0; i < ARRAY_SIZE(direct_radiance_txs_); i++) {
direct_radiance_txs_[i].acquire(
(closure_count > 1) ? extent : int2(1), GPU_R11F_G11F_B10F, usage_rw);
}
GPU_framebuffer_bind(combined_fb);
@ -676,7 +753,8 @@ void DeferredLayer::render(View &main_view,
indirect_reflect_tx_ = reflect_result.get();
indirect_refract_tx_ = refract_result.get();
inst_.subsurface.render(direct_diffuse_tx_, indirect_diffuse_tx_, closure_bits_, render_view);
inst_.subsurface.render(
direct_radiance_txs_[0], indirect_diffuse_tx_, closure_bits_, render_view);
GPU_framebuffer_bind(combined_fb);
inst_.manager->submit(combine_ps_);
@ -685,9 +763,9 @@ void DeferredLayer::render(View &main_view,
refract_result.release();
reflect_result.release();
direct_diffuse_tx_.release();
direct_reflect_tx_.release();
direct_refract_tx_.release();
for (int i = 0; i < ARRAY_SIZE(direct_radiance_txs_); i++) {
direct_radiance_txs_[i].release();
}
if (do_screen_space_reflection) {
GPU_texture_copy(radiance_feedback_tx_, rb.combined_tx);

View File

@ -204,6 +204,8 @@ class DeferredLayer : DeferredLayerBase {
private:
Instance &inst_;
static constexpr int max_lighting_tile_count_ = 128 * 128;
/* Evaluate all light objects contribution. */
PassSimple eval_light_ps_ = {"EvalLights"};
/* Combine direct and indirect light contributions and apply BSDF color. */
@ -216,15 +218,28 @@ class DeferredLayer : DeferredLayerBase {
* BSDF color and do additive blending for each of the lighting step.
*
* NOTE: Not to be confused with the render passes.
* NOTE: Using array of texture instead of texture array to allow to use TextureFromPool.
*/
TextureFromPool direct_diffuse_tx_ = {"direct_diffuse_tx"};
TextureFromPool direct_reflect_tx_ = {"direct_reflect_tx"};
TextureFromPool direct_refract_tx_ = {"direct_refract_tx"};
TextureFromPool direct_radiance_txs_[3] = {
{"direct_radiance_1"}, {"direct_radiance_2"}, {"direct_radiance_3"}};
/* Reference to ray-tracing result. */
GPUTexture *indirect_diffuse_tx_ = nullptr;
GPUTexture *indirect_reflect_tx_ = nullptr;
GPUTexture *indirect_refract_tx_ = nullptr;
/* Parameters for the light evaluation pass. */
int closure_tile_size_shift_ = 0;
/* Tile buffers for different lighting complexity levels. */
struct {
DrawIndirectBuf draw_buf_ = {"DrawIndirectBuf"};
ClosureTileBuf tile_buf_ = {"ClosureTileBuf"};
} closure_bufs_[3];
/**
* Tile texture containing several bool per tile indicating presence of feature.
* It is used to select specialized shader for each tile.
*/
Texture tile_mask_tx_ = {"tile_mask_tx_"};
/* TODO(fclem): This should be a TextureFromPool. */
Texture radiance_behind_tx_ = {"radiance_behind_tx"};
/* TODO(fclem): This shouldn't be part of the pipeline but of the view. */

View File

@ -94,12 +94,22 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_
return "eevee_film_cryptomatte_post";
case DEFERRED_COMBINE:
return "eevee_deferred_combine";
case DEFERRED_LIGHT:
return "eevee_deferred_light";
case DEFERRED_LIGHT_SINGLE:
return "eevee_deferred_light_single";
case DEFERRED_LIGHT_DOUBLE:
return "eevee_deferred_light_double";
case DEFERRED_LIGHT_TRIPLE:
return "eevee_deferred_light_triple";
case DEFERRED_CAPTURE_EVAL:
return "eevee_deferred_capture_eval";
case DEFERRED_PLANAR_EVAL:
return "eevee_deferred_planar_eval";
case DEFERRED_TILE_CLASSIFY:
return "eevee_deferred_tile_classify";
case DEFERRED_TILE_COMPACT:
return "eevee_deferred_tile_compact";
case DEFERRED_TILE_STENCIL:
return "eevee_deferred_tile_stencil";
case HIZ_DEBUG:
return "eevee_hiz_debug";
case HIZ_UPDATE:
@ -668,8 +678,14 @@ GPUMaterial *ShaderModule::material_shader_get(::Material *blender_mat,
uint64_t shader_uuid = shader_uuid_from_material_type(
pipeline_type, geometry_type, displacement_type, blender_mat->blend_flag);
return DRW_shader_from_material(
blender_mat, nodetree, shader_uuid, is_volume, deferred_compilation, codegen_callback, this);
return DRW_shader_from_material(blender_mat,
nodetree,
GPU_MAT_EEVEE,
shader_uuid,
is_volume,
deferred_compilation,
codegen_callback,
this);
}
GPUMaterial *ShaderModule::world_shader_get(::World *blender_world,
@ -683,8 +699,14 @@ GPUMaterial *ShaderModule::world_shader_get(::World *blender_world,
uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, geometry_type);
return DRW_shader_from_world(
blender_world, nodetree, shader_uuid, is_volume, defer_compilation, codegen_callback, this);
return DRW_shader_from_world(blender_world,
nodetree,
GPU_MAT_EEVEE,
shader_uuid,
is_volume,
defer_compilation,
codegen_callback,
this);
}
/* Variation to compile a material only with a nodetree. Caller needs to maintain the list of
@ -704,6 +726,7 @@ GPUMaterial *ShaderModule::material_shader_get(const char *name,
nodetree,
&materials,
name,
GPU_MAT_EEVEE,
shader_uuid,
is_volume,
false,

View File

@ -32,10 +32,15 @@ enum eShaderType {
FILM_COMP,
FILM_CRYPTOMATTE_POST,
DEFERRED_COMBINE,
DEFERRED_LIGHT,
DEFERRED_CAPTURE_EVAL,
DEFERRED_COMBINE,
DEFERRED_LIGHT_SINGLE,
DEFERRED_LIGHT_DOUBLE,
DEFERRED_LIGHT_TRIPLE,
DEFERRED_PLANAR_EVAL,
DEFERRED_TILE_CLASSIFY,
DEFERRED_TILE_COMPACT,
DEFERRED_TILE_STENCIL,
DEBUG_GBUFFER,
DEBUG_SURFELS,

View File

@ -749,10 +749,10 @@ struct LightData {
#define _clipmap_origin_y object_mat[3][3]
/** Aliases for axes. */
#ifndef USE_GPU_SHADER_CREATE_INFO
# define _right object_mat[0].xyz()
# define _up object_mat[1].xyz()
# define _back object_mat[2].xyz()
# define _position object_mat[3].xyz()
# define _right object_mat[0]
# define _up object_mat[1]
# define _back object_mat[2]
# define _position object_mat[3]
#else
# define _right object_mat[0].xyz
# define _up object_mat[1].xyz
@ -1426,7 +1426,7 @@ struct PipelineInfoData {
float alpha_hash_scale;
float _pad0;
float _pad1;
float _pad3;
float _pad2;
};
BLI_STATIC_ASSERT_ALIGN(PipelineInfoData, 16)
@ -1528,6 +1528,7 @@ float4 utility_tx_sample_lut(sampler2DArray util_tx, float cos_theta, float roug
using AOVsInfoDataBuf = draw::StorageBuffer<AOVsInfoData>;
using CameraDataBuf = draw::UniformBuffer<CameraData>;
using ClosureTileBuf = draw::StorageArrayBuffer<uint, 1024, true>;
using DepthOfFieldDataBuf = draw::UniformBuffer<DepthOfFieldData>;
using DepthOfFieldScatterListBuf = draw::StorageArrayBuffer<ScatterRect, 16, true>;
using DrawIndirectBuf = draw::StorageBuffer<DrawCommand, true>;

View File

@ -22,18 +22,19 @@ void main()
vec3 refract_light = vec3(0.0);
if (gbuf.has_diffuse) {
diffuse_light = imageLoad(direct_diffuse_img, texel).rgb +
diffuse_light = imageLoad(direct_radiance_1_img, texel).rgb +
imageLoad(indirect_diffuse_img, texel).rgb;
}
if (gbuf.has_reflection) {
reflect_light = imageLoad(direct_reflect_img, texel).rgb +
reflect_light = imageLoad(direct_radiance_2_img, texel).rgb +
imageLoad(indirect_reflect_img, texel).rgb;
}
if (gbuf.has_refraction) {
refract_light = /* imageLoad(direct_refract_img, texel).rgb + */ /* TODO: Not implemented. */
imageLoad(indirect_refract_img, texel).rgb;
refract_light =
/* imageLoad(direct_radiance_3_img, texel).rgb + */ /* TODO: Not implemented. */
imageLoad(indirect_refract_img, texel).rgb;
}
/* Light passes. */

View File

@ -20,32 +20,45 @@ void main()
float depth = texelFetch(hiz_tx, texel, 0).r;
GBufferData gbuf = gbuffer_read(gbuf_header_tx, gbuf_closure_tx, gbuf_color_tx, texel);
if (!gbuf.has_reflection && !gbuf.has_diffuse /* TODO(fclem) && !gbuf.has_refraction */) {
if (gbuf.closure_count == 0) {
return;
}
vec3 P = drw_point_screen_to_world(vec3(uvcoordsvar.xy, depth));
/* Assume reflection closure normal is always somewhat representative of the geometric normal.
* Ng is only used for shadow biases and subsurface check in this case. */
vec3 Ng = gbuf.has_reflection ? gbuf.reflection.N : gbuf.diffuse.N;
vec3 Ng = gbuf.surface_N;
vec3 V = drw_world_incident_vector(P);
float vPz = dot(drw_view_forward(), P) - dot(drw_view_forward(), drw_view_position());
ClosureLightStack stack;
ClosureLight cl_diff;
cl_diff.N = gbuf.diffuse.N;
cl_diff.ltc_mat = LTC_LAMBERT_MAT;
cl_diff.type = LIGHT_DIFFUSE;
stack.cl[0] = cl_diff;
/* TODO(fclem): This is waiting for fully flexible evaluation pipeline. We need to refactor the
* raytracing pipeline first. */
if (gbuf.has_diffuse) {
ClosureLight cl_diff;
cl_diff.N = gbuf.diffuse.N;
cl_diff.ltc_mat = LTC_LAMBERT_MAT;
cl_diff.type = LIGHT_DIFFUSE;
stack.cl[0] = cl_diff;
}
else {
ClosureLight cl_refl;
cl_refl.N = gbuf.reflection.N;
cl_refl.ltc_mat = LTC_GGX_MAT(dot(gbuf.reflection.N, V), gbuf.reflection.roughness);
cl_refl.type = LIGHT_SPECULAR;
stack.cl[0] = cl_refl;
}
#if LIGHT_CLOSURE_EVAL_COUNT > 1
ClosureLight cl_refl;
cl_refl.N = gbuf.reflection.N;
cl_refl.ltc_mat = LTC_GGX_MAT(dot(gbuf.reflection.N, V), gbuf.reflection.roughness);
cl_refl.type = LIGHT_SPECULAR;
stack.cl[1] = cl_refl;
#endif
#ifdef SSS_TRANSMITTANCE
#if LIGHT_CLOSURE_EVAL_COUNT > 2
ClosureLight cl_sss;
cl_sss.N = -gbuf.diffuse.N;
cl_sss.ltc_mat = LTC_LAMBERT_MAT;
@ -53,54 +66,65 @@ void main()
stack.cl[2] = cl_sss;
#endif
#ifdef SSS_TRANSMITTANCE
float shadow_thickness = thickness_from_shadow(P, Ng, vPz);
float thickness = (shadow_thickness != THICKNESS_NO_VALUE) ?
max(shadow_thickness, gbuf.thickness) :
gbuf.thickness;
#else
float thickness = 0.0;
#ifdef SSS_TRANSMITTANCE
if (gbuf.has_sss) {
float shadow_thickness = thickness_from_shadow(P, Ng, vPz);
thickness = (shadow_thickness != THICKNESS_NO_VALUE) ? max(shadow_thickness, gbuf.thickness) :
gbuf.thickness;
}
#endif
light_eval(stack, P, Ng, V, vPz, thickness);
vec3 radiance_shadowed = stack.cl[0].light_shadowed;
vec3 radiance_unshadowed = stack.cl[0].light_unshadowed;
#if LIGHT_CLOSURE_EVAL_COUNT > 1
radiance_shadowed += stack.cl[1].light_shadowed;
radiance_unshadowed += stack.cl[1].light_unshadowed;
#endif
#if LIGHT_CLOSURE_EVAL_COUNT > 2
radiance_shadowed += stack.cl[2].light_shadowed;
radiance_unshadowed += stack.cl[2].light_unshadowed;
#endif
#ifdef SSS_TRANSMITTANCE
if (gbuf.diffuse.sss_id != 0u) {
if (gbuf.has_sss) {
vec3 sss_profile = subsurface_transmission(gbuf.diffuse.sss_radius, thickness);
stack.cl[2].light_shadowed *= sss_profile;
stack.cl[2].light_unshadowed *= sss_profile;
/* Add to diffuse light for processing inside the Screen Space SSS pass. */
stack.cl[0].light_shadowed += stack.cl[2].light_shadowed;
stack.cl[0].light_unshadowed += stack.cl[2].light_unshadowed;
}
else {
stack.cl[2].light_shadowed = vec3(0.0);
stack.cl[2].light_unshadowed = vec3(0.0);
}
#endif
vec3 radiance_diffuse = stack.cl[0].light_shadowed;
vec3 radiance_specular = stack.cl[1].light_shadowed;
#ifdef SSS_TRANSMITTANCE
radiance_diffuse += stack.cl[2].light_shadowed;
#endif
vec3 radiance_shadowed = stack.cl[0].light_shadowed;
vec3 radiance_unshadowed = stack.cl[0].light_unshadowed;
radiance_shadowed += stack.cl[1].light_shadowed;
radiance_unshadowed += stack.cl[1].light_unshadowed;
#ifdef SSS_TRANSMITTANCE
radiance_shadowed += stack.cl[2].light_shadowed;
radiance_unshadowed += stack.cl[2].light_unshadowed;
#endif
/* TODO(fclem): Change shadow pass to be colored. */
vec3 shadows = radiance_shadowed * safe_rcp(radiance_unshadowed);
output_renderpass_value(uniform_buf.render_pass.shadow_id, average(shadows));
if (gbuf.has_diffuse) {
imageStore(direct_diffuse_img, texel, vec4(radiance_diffuse, 1.0));
if (gbuf.closure_count > 0) {
/* TODO(fclem): This is waiting for fully flexible evaluation pipeline. We need to refactor the
* raytracing pipeline first. */
if (gbuf.has_diffuse) {
imageStore(direct_radiance_1_img, texel, vec4(stack.cl[0].light_shadowed, 1.0));
}
else {
imageStore(direct_radiance_2_img, texel, vec4(stack.cl[0].light_shadowed, 1.0));
}
}
if (gbuf.has_reflection) {
imageStore(direct_reflect_img, texel, vec4(radiance_specular, 1.0));
#if LIGHT_CLOSURE_EVAL_COUNT > 1
if (gbuf.closure_count > 1) {
imageStore(direct_radiance_2_img, texel, vec4(stack.cl[1].light_shadowed, 1.0));
}
/* TODO(fclem): Support LTC for refraction. */
// imageStore(direct_refract_img, texel, vec4(cl_refr.light_shadowed, 1.0));
#endif
#if LIGHT_CLOSURE_EVAL_COUNT > 2
# if 0 /* Will work when we have fully flexible evaluation. */
if (gbuf.closure_count > 2) {
imageStore(direct_radiance_3_img, texel, vec4(stack.cl[2].light_shadowed, 1.0));
}
# endif
#endif
}

View File

@ -0,0 +1,33 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* This pass load Gbuffer data and output a mask of tiles to process.
* This mask is then processed by the compaction phase.
*/
#pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_math_vector_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_codegen_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_gbuffer_lib.glsl)
void main()
{
ivec2 texel = ivec2(gl_FragCoord.xy);
ivec2 tile_co = texel >> closure_tile_size_shift;
if (gbuffer_has_closure(in_gbuffer_header, eClosureBits(CLOSURE_DIFFUSE))) {
imageStore(tile_mask_img, ivec3(tile_co, 0), uvec4(1u));
}
if (gbuffer_has_closure(in_gbuffer_header, eClosureBits(CLOSURE_REFLECTION))) {
imageStore(tile_mask_img, ivec3(tile_co, 1), uvec4(1u));
}
if (gbuffer_has_closure(in_gbuffer_header, eClosureBits(CLOSURE_REFRACTION))) {
imageStore(tile_mask_img, ivec3(tile_co, 2), uvec4(1u));
}
if (gbuffer_has_closure(in_gbuffer_header, eClosureBits(CLOSURE_SSS))) {
imageStore(tile_mask_img, ivec3(tile_co, 3), uvec4(1u));
}
}

View File

@ -0,0 +1,51 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* Convert the tile classification texture into streams of tiles of each types.
* Dispatched with 1 vertex (thread) per tile.
*/
#pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
void main()
{
/* Doesn't matter. Doesn't get rasterized. */
gl_Position = vec4(0.0);
int tile_per_row = textureSize(tile_mask_tx, 0).x;
ivec2 tile_coord = ivec2(gl_VertexID % tile_per_row, gl_VertexID / tile_per_row);
if (gl_VertexID == 0) {
closure_double_draw_buf.instance_len = 1u;
closure_single_draw_buf.instance_len = 1u;
closure_triple_draw_buf.instance_len = 1u;
}
if (!in_texture_range(tile_coord, tile_mask_tx)) {
return;
}
uint closure_count = texelFetch(tile_mask_tx, ivec3(tile_coord, 0), 0).r +
texelFetch(tile_mask_tx, ivec3(tile_coord, 1), 0).r +
// texelFetch(tile_mask_tx, ivec3(tile_coord, 2), 0).r + /* TODO: refract */
texelFetch(tile_mask_tx, ivec3(tile_coord, 3), 0).r;
/* TODO(fclem): This is waiting for fully flexible evaluation pipeline. We need to refactor the
* raytracing pipeline first. */
bool has_reflection = texelFetch(tile_mask_tx, ivec3(tile_coord, 1), 0).r != 0u;
bool has_sss = texelFetch(tile_mask_tx, ivec3(tile_coord, 3), 0).r != 0u;
if (closure_count == 3 || has_sss) {
uint tile_index = atomicAdd(closure_triple_draw_buf.vertex_len, 6u) / 6u;
closure_triple_tile_buf[tile_index] = packUvec2x16(uvec2(tile_coord));
}
else if (closure_count == 2 || has_reflection) {
uint tile_index = atomicAdd(closure_double_draw_buf.vertex_len, 6u) / 6u;
closure_double_tile_buf[tile_index] = packUvec2x16(uvec2(tile_coord));
}
else if (closure_count == 1) {
uint tile_index = atomicAdd(closure_single_draw_buf.vertex_len, 6u) / 6u;
closure_single_tile_buf[tile_index] = packUvec2x16(uvec2(tile_coord));
}
}

View File

@ -0,0 +1,12 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* Load tile classification data and mark stencil areas.
*/
void main()
{
/* Stencil only pass. Passthrough. */
}

View File

@ -0,0 +1,29 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* Load tile classification data and mark stencil areas.
*/
#pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
void main()
{
int tile_id = gl_VertexID / 6;
int vertex_id = gl_VertexID % 6;
ivec2 tile_coord = ivec2(unpackUvec2x16(closure_tile_buf[tile_id]));
/* Generate Quad with 2 triangles with same winding.
* This way it can be merged on some hardware. */
int v = (vertex_id > 2) ? (3 - (vertex_id - 3)) : vertex_id;
ivec2 tile_corner = ivec2(v & 1, v >> 1);
int tile_size = (1 << closure_tile_size_shift);
vec2 ss_coord = vec2((tile_coord + tile_corner) * tile_size) /
vec2(textureSize(direct_radiance_tx, 0));
vec2 ndc_coord = ss_coord * 2.0 - 1.0;
/* gl_Position expects Homogenous space coord. But this is the same thing as NDC in 2D mode. */
gl_Position = vec4(ndc_coord, 1.0, 1.0);
}

View File

@ -163,6 +163,13 @@ bool gbuffer_has_closure(uint header, eClosureBits closure)
return has_diffuse;
}
bool has_sss = (gbuffer_header_unpack(header, layer) == GBUF_SSS);
layer += int(has_sss);
if (closure == eClosureBits(CLOSURE_SSS)) {
return has_sss;
}
return false;
}
@ -265,8 +272,10 @@ struct GBufferData {
bool has_diffuse;
bool has_reflection;
bool has_refraction;
bool has_sss;
bool has_any_surface;
uint header;
uint closure_count;
};
GBufferData gbuffer_read(usampler2D header_tx,
@ -287,6 +296,7 @@ GBufferData gbuffer_read(usampler2D header_tx,
}
gbuf.thickness = 0.0;
gbuf.closure_count = 0u;
/* First closure is always written. */
gbuf.surface_N = gbuffer_normal_unpack(texelFetch(closure_tx, ivec3(texel, 0), 0).xy);
@ -318,6 +328,8 @@ GBufferData gbuffer_read(usampler2D header_tx,
gbuf.diffuse.sss_radius = vec3(0.0, 0.0, 0.0);
gbuf.diffuse.sss_id = 0u;
gbuf.closure_count = 2u;
return gbuf;
}
@ -333,6 +345,7 @@ GBufferData gbuffer_read(usampler2D header_tx,
gbuf.refraction.N = gbuffer_normal_unpack(closure_packed.xy);
gbuf.refraction.roughness = closure_packed.z;
gbuf.refraction.ior = gbuffer_ior_unpack(closure_packed.w);
gbuf.closure_count += 1u;
layer += 1;
}
else {
@ -352,6 +365,7 @@ GBufferData gbuffer_read(usampler2D header_tx,
gbuf.reflection.color = gbuffer_color_unpack(color_packed);
gbuf.reflection.N = gbuffer_normal_unpack(closure_packed.xy);
gbuf.reflection.roughness = closure_packed.z;
gbuf.closure_count += 1u;
layer += 1;
}
else {
@ -370,6 +384,7 @@ GBufferData gbuffer_read(usampler2D header_tx,
gbuf.diffuse.color = gbuffer_color_unpack(color_packed);
gbuf.diffuse.N = gbuffer_normal_unpack(closure_packed.xy);
gbuf.thickness = gbuffer_thickness_unpack(closure_packed.w);
gbuf.closure_count += 1u;
layer += 1;
}
else {
@ -379,9 +394,9 @@ GBufferData gbuffer_read(usampler2D header_tx,
gbuf.thickness = 0.0;
}
bool has_sss = (gbuffer_header_unpack(gbuf.header, layer) == GBUF_SSS);
gbuf.has_sss = (gbuffer_header_unpack(gbuf.header, layer) == GBUF_SSS);
if (has_sss) {
if (gbuf.has_sss) {
vec4 closure_packed = texelFetch(closure_tx, ivec3(texel, layer), 0);
gbuf.diffuse.sss_radius = gbuffer_sss_radii_unpack(closure_packed.xyz);

View File

@ -17,17 +17,51 @@ GPU_SHADER_CREATE_INFO(eevee_gbuffer_data)
.sampler(9, ImageType::FLOAT_2D_ARRAY, "gbuf_closure_tx")
.sampler(10, ImageType::FLOAT_2D_ARRAY, "gbuf_color_tx");
GPU_SHADER_CREATE_INFO(eevee_deferred_tile_classify)
.fragment_source("eevee_deferred_tile_classify_frag.glsl")
/* Early fragment test is needed to avoid processing background fragments. */
.early_fragment_test(true)
.additional_info("eevee_shared", "draw_fullscreen")
.subpass_in(1, Type::UINT, "in_gbuffer_header", DEFERRED_GBUFFER_ROG_ID)
.typedef_source("draw_shader_shared.h")
.image(0, GPU_R8UI, Qualifier::WRITE, ImageType::UINT_2D_ARRAY, "tile_mask_img")
.push_constant(Type::INT, "closure_tile_size_shift")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(eevee_deferred_tile_compact)
.additional_info("eevee_shared")
.typedef_source("draw_shader_shared.h")
.vertex_source("eevee_deferred_tile_compact_vert.glsl")
/* Reuse dummy stencil frag. */
.fragment_source("eevee_deferred_tile_stencil_frag.glsl")
.storage_buf(0, Qualifier::READ_WRITE, "DrawCommand", "closure_single_draw_buf")
.storage_buf(1, Qualifier::READ_WRITE, "DrawCommand", "closure_double_draw_buf")
.storage_buf(2, Qualifier::READ_WRITE, "DrawCommand", "closure_triple_draw_buf")
.storage_buf(3, Qualifier::WRITE, "uint", "closure_single_tile_buf[]")
.storage_buf(4, Qualifier::WRITE, "uint", "closure_double_tile_buf[]")
.storage_buf(5, Qualifier::WRITE, "uint", "closure_triple_tile_buf[]")
.sampler(0, ImageType::UINT_2D_ARRAY, "tile_mask_tx")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(eevee_deferred_tile_stencil)
.vertex_source("eevee_deferred_tile_stencil_vert.glsl")
.fragment_source("eevee_deferred_tile_stencil_frag.glsl")
.additional_info("eevee_shared")
/* Only for texture size. */
.sampler(0, ImageType::FLOAT_2D, "direct_radiance_tx")
.storage_buf(4, Qualifier::READ, "uint", "closure_tile_buf[]")
.push_constant(Type::INT, "closure_tile_size_shift")
.typedef_source("draw_shader_shared.h")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(eevee_deferred_light)
.fragment_source("eevee_deferred_light_frag.glsl")
/* Early fragment test is needed to avoid processing fragments without correct GBuffer data. */
/* Early fragment test is needed to avoid processing background fragments. */
.early_fragment_test(true)
/* Chaining to next pass. */
/* TODO(@fclem): These could use the sub-pass feature. */
.image_out(2, GPU_RGBA16F, "direct_diffuse_img")
.image_out(3, GPU_RGBA16F, "direct_reflect_img")
.image_out(4, GPU_RGBA16F, "direct_refract_img")
.define("SSS_TRANSMITTANCE")
.define("LIGHT_CLOSURE_EVAL_COUNT", "3")
.image_out(2, DEFERRED_RADIANCE_FORMAT, "direct_radiance_1_img")
.image_out(3, DEFERRED_RADIANCE_FORMAT, "direct_radiance_2_img")
.image_out(4, DEFERRED_RADIANCE_FORMAT, "direct_radiance_3_img")
.additional_info("eevee_shared",
"eevee_gbuffer_data",
"eevee_utility_texture",
@ -36,17 +70,32 @@ GPU_SHADER_CREATE_INFO(eevee_deferred_light)
"eevee_shadow_data",
"eevee_hiz_data",
"eevee_render_pass_out",
"draw_view",
"draw_fullscreen")
"draw_fullscreen",
"draw_view");
GPU_SHADER_CREATE_INFO(eevee_deferred_light_single)
.additional_info("eevee_deferred_light")
.define("LIGHT_CLOSURE_EVAL_COUNT", "1")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(eevee_deferred_light_double)
.additional_info("eevee_deferred_light")
.define("LIGHT_CLOSURE_EVAL_COUNT", "2")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(eevee_deferred_light_triple)
.additional_info("eevee_deferred_light")
.define("SSS_TRANSMITTANCE")
.define("LIGHT_CLOSURE_EVAL_COUNT", "3")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(eevee_deferred_combine)
/* Early fragment test is needed to avoid processing fragments without correct GBuffer data. */
/* Early fragment test is needed to avoid processing fragments background fragments. */
.early_fragment_test(true)
/* Inputs. */
.image_in(2, GPU_RGBA16F, "direct_diffuse_img")
.image_in(3, GPU_RGBA16F, "direct_reflect_img")
.image_in(4, GPU_RGBA16F, "direct_refract_img")
.image_in(2, DEFERRED_RADIANCE_FORMAT, "direct_radiance_1_img")
.image_in(3, DEFERRED_RADIANCE_FORMAT, "direct_radiance_2_img")
.image_in(4, DEFERRED_RADIANCE_FORMAT, "direct_radiance_3_img")
.image_in(5, RAYTRACE_RADIANCE_FORMAT, "indirect_diffuse_img")
.image_in(6, RAYTRACE_RADIANCE_FORMAT, "indirect_reflect_img")
.image_in(7, RAYTRACE_RADIANCE_FORMAT, "indirect_refract_img")
@ -98,6 +147,7 @@ GPU_SHADER_CREATE_INFO(eevee_deferred_planar_eval)
.fragment_source("eevee_deferred_planar_frag.glsl")
.do_static_compilation(true);
#undef image_array_out
#undef image_out
#undef image_in

View File

@ -153,7 +153,7 @@ GPU_SHADER_CREATE_INFO(eevee_surf_deferred)
.early_fragment_test(true)
/* Direct output. (Emissive, Holdout) */
.fragment_out(0, Type::VEC4, "out_radiance")
.fragment_out(1, Type::UINT, "out_gbuf_header")
.fragment_out(1, Type::UINT, "out_gbuf_header", DualBlend::NONE, DEFERRED_GBUFFER_ROG_ID)
.fragment_out(2, Type::VEC4, "out_gbuf_color")
.fragment_out(3, Type::VEC4, "out_gbuf_closure")
/* Everything is stored inside a two layered target, one for each format. This is to fit the

View File

@ -11,7 +11,7 @@ GPU_SHADER_CREATE_INFO(eevee_subsurface_setup)
.typedef_source("draw_shader_shared.h")
.additional_info("draw_view", "eevee_shared", "eevee_gbuffer_data")
.sampler(2, ImageType::DEPTH_2D, "depth_tx")
.image(0, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "direct_light_img")
.image(0, DEFERRED_RADIANCE_FORMAT, Qualifier::READ, ImageType::FLOAT_2D, "direct_light_img")
.image(1, RAYTRACE_RADIANCE_FORMAT, Qualifier::READ, ImageType::FLOAT_2D, "indirect_light_img")
.image(2, SUBSURFACE_OBJECT_ID_FORMAT, Qualifier::WRITE, ImageType::UINT_2D, "object_id_img")
.image(3, SUBSURFACE_RADIANCE_FORMAT, Qualifier::WRITE, ImageType::FLOAT_2D, "radiance_img")

View File

@ -296,6 +296,7 @@ struct GPUShader *DRW_shader_create_fullscreen_with_shaderlib_ex(const char *fra
struct GPUMaterial *DRW_shader_from_world(struct World *wo,
struct bNodeTree *ntree,
eGPUMaterialEngine engine,
const uint64_t shader_id,
const bool is_volume_shader,
bool deferred,
@ -303,6 +304,7 @@ struct GPUMaterial *DRW_shader_from_world(struct World *wo,
void *thunk);
struct GPUMaterial *DRW_shader_from_material(struct Material *ma,
struct bNodeTree *ntree,
eGPUMaterialEngine engine,
const uint64_t shader_id,
const bool is_volume_shader,
bool deferred,

View File

@ -493,6 +493,7 @@ GPUShader *DRW_shader_create_fullscreen_with_shaderlib_ex(const char *frag,
GPUMaterial *DRW_shader_from_world(World *wo,
bNodeTree *ntree,
eGPUMaterialEngine engine,
const uint64_t shader_id,
const bool is_volume_shader,
bool deferred,
@ -505,6 +506,7 @@ GPUMaterial *DRW_shader_from_world(World *wo,
ntree,
&wo->gpumaterial,
wo->id.name,
engine,
shader_id,
is_volume_shader,
false,
@ -525,6 +527,7 @@ GPUMaterial *DRW_shader_from_world(World *wo,
GPUMaterial *DRW_shader_from_material(Material *ma,
bNodeTree *ntree,
eGPUMaterialEngine engine,
const uint64_t shader_id,
const bool is_volume_shader,
bool deferred,
@ -537,6 +540,7 @@ GPUMaterial *DRW_shader_from_material(Material *ma,
ntree,
&ma->gpumaterial,
ma->id.name,
engine,
shader_id,
is_volume_shader,
false,

View File

@ -117,11 +117,15 @@ Array<float2> polyline_fit_curve(Span<float2> points,
return {};
}
if (r_cubic_array == nullptr) {
return {};
}
Span<float2> r_cubic_array_span(reinterpret_cast<float2 *>(r_cubic_array),
r_cubic_array_len * 3);
Array<float2> curve_positions(r_cubic_array_span);
/* Free the c-style array. */
MEM_freeN(r_cubic_array);
free(r_cubic_array);
return curve_positions;
}
@ -153,11 +157,16 @@ IndexMask polyline_detect_corners(Span<float2> points,
/* Error occurred, return. */
return IndexMask();
}
if (r_corners == nullptr) {
return IndexMask();
}
BLI_assert(samples_max < std::numeric_limits<int>::max());
Span<int> indices(reinterpret_cast<int *>(r_corners), r_corner_len);
const IndexMask corner_mask = IndexMask::from_indices<int>(indices, memory);
/* Free the c-style array. */
MEM_freeN(r_corners);
free(r_corners);
return corner_mask;
}

View File

@ -720,7 +720,7 @@ static void invert_visibility_bmesh(Object &object, const Span<PBVHNode *> nodes
bool fully_hidden = true;
for (BMVert *vert : BKE_pbvh_bmesh_node_unique_verts(node)) {
BM_elem_flag_toggle(vert, BM_ELEM_HIDDEN);
fully_hidden &= BM_elem_flag_test(vert, BM_ELEM_HIDDEN);
fully_hidden &= BM_elem_flag_test_bool(vert, BM_ELEM_HIDDEN);
}
BKE_pbvh_node_fully_hidden_set(node, fully_hidden);
BKE_pbvh_node_mark_rebuild_draw(node);

View File

@ -233,19 +233,19 @@ struct GPUUniformBuf *GPU_material_sss_profile_get(GPUMaterial *material,
/**
* High level functions to create and use GPU materials.
*/
GPUMaterial *GPU_material_from_nodetree_find(struct ListBase *gpumaterials,
const void *engine_type,
int options);
/**
* \note Caller must use #GPU_material_from_nodetree_find to re-use existing materials,
* This is enforced since constructing other arguments to this function may be expensive
* so only do this when they are needed.
*/
typedef enum eGPUMaterialEngine {
GPU_MAT_EEVEE_LEGACY = 0,
GPU_MAT_EEVEE,
GPU_MAT_COMPOSITOR,
} eGPUMaterialEngine;
GPUMaterial *GPU_material_from_nodetree(struct Scene *scene,
struct Material *ma,
struct bNodeTree *ntree,
struct ListBase *gpumaterials,
const char *name,
eGPUMaterialEngine engine,
uint64_t shader_uuid,
bool is_volume_shader,
bool is_lookdev,
@ -421,7 +421,8 @@ typedef void (*ConstructGPUMaterialFn)(void *thunk, GPUMaterial *material);
/* Construct a GPU material from a set of callbacks. See the callback types for more information.
* The given thunk will be passed as the first parameter of each callback. */
GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_function_cb,
GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine,
ConstructGPUMaterialFn construct_function_cb,
GPUCodegenCallbackFn generate_code_function_cb,
void *thunk);

View File

@ -97,6 +97,8 @@ struct GPUPass {
uint refcount;
/** The last time the refcount was greater than 0. */
int gc_timestamp;
/** The engine type this pass is compiled for. */
eGPUMaterialEngine engine;
/** Identity hash generated from all GLSL code. */
uint32_t hash;
/** Did we already tried to compile the attached GPUShader. */
@ -122,12 +124,12 @@ static SpinLock pass_cache_spin;
/* Search by hash only. Return first pass with the same hash.
* There is hash collision if (pass->next && pass->next->hash == hash) */
static GPUPass *gpu_pass_cache_lookup(uint32_t hash)
static GPUPass *gpu_pass_cache_lookup(eGPUMaterialEngine engine, uint32_t hash)
{
BLI_spin_lock(&pass_cache_spin);
/* Could be optimized with a Lookup table. */
for (GPUPass *pass = pass_cache; pass; pass = pass->next) {
if (pass->hash == hash) {
if (pass->hash == hash && pass->engine == engine) {
BLI_spin_unlock(&pass_cache_spin);
return pass;
}
@ -157,10 +159,12 @@ static GPUPass *gpu_pass_cache_resolve_collision(GPUPass *pass,
GPUShaderCreateInfo *info,
uint32_t hash)
{
eGPUMaterialEngine engine = pass->engine;
BLI_spin_lock(&pass_cache_spin);
for (; pass && (pass->hash == hash); pass = pass->next) {
if (*reinterpret_cast<ShaderCreateInfo *>(info) ==
*reinterpret_cast<ShaderCreateInfo *>(pass->create_info))
*reinterpret_cast<ShaderCreateInfo *>(pass->create_info) &&
pass->engine == engine)
{
BLI_spin_unlock(&pass_cache_spin);
return pass;
@ -732,6 +736,7 @@ void GPUCodegen::generate_graphs()
GPUPass *GPU_generate_pass(GPUMaterial *material,
GPUNodeGraph *graph,
eGPUMaterialEngine engine,
GPUCodegenCallbackFn finalize_source_cb,
void *thunk,
bool optimize_graph)
@ -763,7 +768,7 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
* NOTE: We only perform cache look-up for non-optimized shader
* graphs, as baked constant data among other optimizations will generate too many
* shader source permutations, with minimal re-usability. */
pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
pass_hash = gpu_pass_cache_lookup(engine, codegen.hash_get());
/* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
* there is no way to have a collision currently. Some advocated to only use a bigger hash. */
@ -813,6 +818,7 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
pass->shader = nullptr;
pass->refcount = 1;
pass->create_info = codegen.create_info;
pass->engine = engine;
pass->hash = codegen.hash_get();
pass->compiled = false;
pass->cached = false;

View File

@ -25,6 +25,7 @@ typedef struct GPUPass GPUPass;
GPUPass *GPU_generate_pass(GPUMaterial *material,
struct GPUNodeGraph *graph,
eGPUMaterialEngine engine,
GPUCodegenCallbackFn finalize_source_cb,
void *thunk,
bool optimize_graph);

View File

@ -99,8 +99,9 @@ struct GPUMaterial {
eGPUMaterialStatus status;
/** Some flags about the nodetree & the needed resources. */
eGPUMaterialFlag flag;
/* Identify shader variations (shadow, probe, world background...).
* Should be unique even across render engines. */
/** The engine type this material is compiled for. */
eGPUMaterialEngine engine;
/* Identify shader variations (shadow, probe, world background...) */
uint64_t uuid;
/* Number of generated function. */
int generated_function_len;
@ -821,6 +822,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
bNodeTree *ntree,
ListBase *gpumaterials,
const char *name,
eGPUMaterialEngine engine,
uint64_t shader_uuid,
bool is_volume_shader,
bool is_lookdev,
@ -830,7 +832,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
/* Search if this material is not already compiled. */
LISTBASE_FOREACH (LinkData *, link, gpumaterials) {
GPUMaterial *mat = (GPUMaterial *)link->data;
if (mat->uuid == shader_uuid) {
if (mat->uuid == shader_uuid && mat->engine == engine) {
return mat;
}
}
@ -838,6 +840,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
GPUMaterial *mat = static_cast<GPUMaterial *>(MEM_callocN(sizeof(GPUMaterial), "GPUMaterial"));
mat->ma = ma;
mat->scene = scene;
mat->engine = engine;
mat->uuid = shader_uuid;
mat->flag = GPU_MATFLAG_UPDATED;
mat->status = GPU_MAT_CREATED;
@ -860,7 +863,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
{
/* Create source code and search pass cache for an already compiled version. */
mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, false);
mat->pass = GPU_generate_pass(mat, &mat->graph, engine, callback, thunk, false);
if (mat->pass == nullptr) {
/* We had a cache hit and the shader has already failed to compile. */
@ -891,7 +894,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
mat->optimize_pass_info.callback = callback;
mat->optimize_pass_info.thunk = thunk;
#else
mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, true);
mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, engine, callback, thunk, true);
if (mat->optimized_pass == nullptr) {
/* Failed to create optimized pass. */
gpu_node_graph_free_nodes(&mat->graph);
@ -1024,8 +1027,12 @@ void GPU_material_optimize(GPUMaterial *mat)
* optimal, as these do not benefit from caching, due to baked constants. However, this could
* possibly be cause for concern for certain cases. */
if (!mat->optimized_pass) {
mat->optimized_pass = GPU_generate_pass(
mat, &mat->graph, mat->optimize_pass_info.callback, mat->optimize_pass_info.thunk, true);
mat->optimized_pass = GPU_generate_pass(mat,
&mat->graph,
mat->engine,
mat->optimize_pass_info.callback,
mat->optimize_pass_info.thunk,
true);
BLI_assert(mat->optimized_pass);
}
#else
@ -1097,7 +1104,8 @@ void GPU_materials_free(Main *bmain)
BKE_material_defaults_free_gpu();
}
GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_function_cb,
GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine,
ConstructGPUMaterialFn construct_function_cb,
GPUCodegenCallbackFn generate_code_function_cb,
void *thunk)
{
@ -1110,6 +1118,7 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
material->optimized_pass = nullptr;
material->default_mat = nullptr;
material->engine = engine;
/* Construct the material graph by adding and linking the necessary GPU material nodes. */
construct_function_cb(thunk, material);
@ -1119,7 +1128,7 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
/* Lookup an existing pass in the cache or generate a new one. */
material->pass = GPU_generate_pass(
material, &material->graph, generate_code_function_cb, thunk, false);
material, &material->graph, material->engine, generate_code_function_cb, thunk, false);
material->optimized_pass = nullptr;
/* The pass already exists in the pass cache but its shader already failed to compile. */