BMesh: Optimize copying attributes from many elements at once #115824
|
@ -674,7 +674,6 @@ if(NOT OpenImageIO_FOUND)
|
|||
set(OPENIMAGEIO_LIBRARIES ${OIIO_OPTIMIZED} ${OIIO_DEBUG})
|
||||
set(OPENIMAGEIO_IDIFF "${OPENIMAGEIO}/bin/idiff.exe")
|
||||
endif()
|
||||
add_definitions(-DOIIO_NO_SSE=1)
|
||||
|
||||
if(WITH_LLVM)
|
||||
set(LLVM_ROOT_DIR ${LIBDIR}/llvm CACHE PATH "Path to the LLVM installation")
|
||||
|
|
|
@ -37,7 +37,7 @@ void SplitOperation::execute_pixel_sampled(float output[4],
|
|||
{
|
||||
int perc = x_split_ ? split_percentage_ * this->get_width() / 100.0f :
|
||||
split_percentage_ * this->get_height() / 100.0f;
|
||||
bool image1 = x_split_ ? x > perc : y > perc;
|
||||
bool image1 = x_split_ ? x >= perc : y >= perc;
|
||||
if (image1) {
|
||||
image1Input_->read_sampled(output, x, y, PixelSampler::Nearest);
|
||||
}
|
||||
|
@ -64,7 +64,7 @@ void SplitOperation::update_memory_buffer_partial(MemoryBuffer *output,
|
|||
split_percentage_ * this->get_height() / 100.0f;
|
||||
const size_t elem_bytes = COM_data_type_bytes_len(get_output_socket()->get_data_type());
|
||||
for (BuffersIterator<float> it = output->iterate_with(inputs, area); !it.is_end(); ++it) {
|
||||
const bool is_image1 = x_split_ ? it.x > percent : it.y > percent;
|
||||
const bool is_image1 = x_split_ ? it.x >= percent : it.y >= percent;
|
||||
memcpy(it.out, it.in(is_image1 ? 0 : 1), elem_bytes);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,7 +41,8 @@ using namespace nodes::derived_node_tree_types;
|
|||
ShaderOperation::ShaderOperation(Context &context, ShaderCompileUnit &compile_unit)
|
||||
: Operation(context), compile_unit_(compile_unit)
|
||||
{
|
||||
material_ = GPU_material_from_callbacks(&construct_material, &generate_code, this);
|
||||
material_ = GPU_material_from_callbacks(
|
||||
GPU_MAT_COMPOSITOR, &construct_material, &generate_code, this);
|
||||
GPU_material_status_set(material_, GPU_MAT_QUEUED);
|
||||
GPU_material_compile(material_);
|
||||
}
|
||||
|
|
|
@ -9,9 +9,9 @@ void main()
|
|||
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
|
||||
ivec2 output_size = imageSize(output_img);
|
||||
#if defined(SPLIT_HORIZONTAL)
|
||||
bool condition = (output_size.x * split_ratio) < texel.x;
|
||||
bool condition = (output_size.x * split_ratio) <= texel.x;
|
||||
#elif defined(SPLIT_VERTICAL)
|
||||
bool condition = (output_size.y * split_ratio) < texel.y;
|
||||
bool condition = (output_size.y * split_ratio) <= texel.y;
|
||||
#endif
|
||||
vec4 color = condition ? texture_load(first_image_tx, texel) :
|
||||
texture_load(second_image_tx, texel);
|
||||
|
|
|
@ -477,6 +477,10 @@ set(GLSL_SRC
|
|||
engines/eevee_next/shaders/eevee_deferred_capture_frag.glsl
|
||||
engines/eevee_next/shaders/eevee_deferred_combine_frag.glsl
|
||||
engines/eevee_next/shaders/eevee_deferred_planar_frag.glsl
|
||||
engines/eevee_next/shaders/eevee_deferred_tile_classify_frag.glsl
|
||||
engines/eevee_next/shaders/eevee_deferred_tile_compact_vert.glsl
|
||||
engines/eevee_next/shaders/eevee_deferred_tile_stencil_frag.glsl
|
||||
engines/eevee_next/shaders/eevee_deferred_tile_stencil_vert.glsl
|
||||
engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl
|
||||
engines/eevee_next/shaders/eevee_depth_of_field_bokeh_lut_comp.glsl
|
||||
engines/eevee_next/shaders/eevee_depth_of_field_downsample_comp.glsl
|
||||
|
|
|
@ -1384,11 +1384,13 @@ static GPUMaterial *eevee_material_get_ex(
|
|||
|
||||
if (ma) {
|
||||
bNodeTree *ntree = !is_default ? ma->nodetree : EEVEE_shader_default_surface_nodetree(ma);
|
||||
mat = DRW_shader_from_material(ma, ntree, options, is_volume, deferred, cbfn, nullptr);
|
||||
mat = DRW_shader_from_material(
|
||||
ma, ntree, GPU_MAT_EEVEE_LEGACY, options, is_volume, deferred, cbfn, nullptr);
|
||||
}
|
||||
else {
|
||||
bNodeTree *ntree = !is_default ? wo->nodetree : EEVEE_shader_default_world_nodetree(wo);
|
||||
mat = DRW_shader_from_world(wo, ntree, options, is_volume, deferred, cbfn, nullptr);
|
||||
mat = DRW_shader_from_world(
|
||||
wo, ntree, GPU_MAT_EEVEE_LEGACY, options, is_volume, deferred, cbfn, nullptr);
|
||||
}
|
||||
return mat;
|
||||
}
|
||||
|
|
|
@ -98,6 +98,10 @@
|
|||
#define SHADOW_MAX_RAY 4
|
||||
#define SHADOW_ROG_ID 0
|
||||
|
||||
/* Deferred Lighting. */
|
||||
#define DEFERRED_RADIANCE_FORMAT GPU_R11F_G11F_B10F
|
||||
#define DEFERRED_GBUFFER_ROG_ID 0
|
||||
|
||||
/* Ray-tracing. */
|
||||
#define RAYTRACE_GROUP_SIZE 8
|
||||
/* Keep this as a define to avoid shader variations. */
|
||||
|
|
|
@ -238,7 +238,7 @@ float Light::point_radiance_get(const ::Light *la)
|
|||
void Light::debug_draw()
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
drw_debug_sphere(_position, influence_radius_max, float4(0.8f, 0.3f, 0.0f, 1.0f));
|
||||
drw_debug_sphere(float3(_position), influence_radius_max, float4(0.8f, 0.3f, 0.0f, 1.0f));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -445,27 +445,27 @@ void DeferredLayer::begin_sync()
|
|||
}
|
||||
{
|
||||
gbuffer_ps_.init();
|
||||
gbuffer_ps_.subpass_transition(GPU_ATTACHEMENT_WRITE,
|
||||
{GPU_ATTACHEMENT_WRITE,
|
||||
GPU_ATTACHEMENT_WRITE,
|
||||
GPU_ATTACHEMENT_WRITE,
|
||||
GPU_ATTACHEMENT_WRITE});
|
||||
/* G-buffer. */
|
||||
gbuffer_ps_.bind_image(GBUF_CLOSURE_SLOT, &inst_.gbuffer.closure_img_tx);
|
||||
gbuffer_ps_.bind_image(GBUF_COLOR_SLOT, &inst_.gbuffer.color_img_tx);
|
||||
/* RenderPasses & AOVs. */
|
||||
gbuffer_ps_.bind_image(RBUFS_COLOR_SLOT, &inst_.render_buffers.rp_color_tx);
|
||||
gbuffer_ps_.bind_image(RBUFS_VALUE_SLOT, &inst_.render_buffers.rp_value_tx);
|
||||
/* Cryptomatte. */
|
||||
gbuffer_ps_.bind_image(RBUFS_CRYPTOMATTE_SLOT, &inst_.render_buffers.cryptomatte_tx);
|
||||
/* Storage Buffer. */
|
||||
/* Textures. */
|
||||
gbuffer_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
|
||||
|
||||
{
|
||||
/* Common resources. */
|
||||
|
||||
/* G-buffer. */
|
||||
gbuffer_ps_.bind_image(GBUF_CLOSURE_SLOT, &inst_.gbuffer.closure_img_tx);
|
||||
gbuffer_ps_.bind_image(GBUF_COLOR_SLOT, &inst_.gbuffer.color_img_tx);
|
||||
/* RenderPasses & AOVs. */
|
||||
gbuffer_ps_.bind_image(RBUFS_COLOR_SLOT, &inst_.render_buffers.rp_color_tx);
|
||||
gbuffer_ps_.bind_image(RBUFS_VALUE_SLOT, &inst_.render_buffers.rp_value_tx);
|
||||
/* Cryptomatte. */
|
||||
gbuffer_ps_.bind_image(RBUFS_CRYPTOMATTE_SLOT, &inst_.render_buffers.cryptomatte_tx);
|
||||
/* Storage Buffer. */
|
||||
/* Textures. */
|
||||
gbuffer_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
|
||||
|
||||
inst_.bind_uniform_data(&gbuffer_ps_);
|
||||
inst_.sampling.bind_resources(gbuffer_ps_);
|
||||
inst_.hiz_buffer.bind_resources(gbuffer_ps_);
|
||||
inst_.cryptomatte.bind_resources(gbuffer_ps_);
|
||||
}
|
||||
inst_.bind_uniform_data(&gbuffer_ps_);
|
||||
inst_.sampling.bind_resources(gbuffer_ps_);
|
||||
inst_.hiz_buffer.bind_resources(gbuffer_ps_);
|
||||
inst_.cryptomatte.bind_resources(gbuffer_ps_);
|
||||
|
||||
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_EQUAL;
|
||||
|
||||
|
@ -483,27 +483,85 @@ void DeferredLayer::end_sync()
|
|||
{
|
||||
eClosureBits evaluated_closures = CLOSURE_DIFFUSE | CLOSURE_REFLECTION | CLOSURE_REFRACTION;
|
||||
if (closure_bits_ & evaluated_closures) {
|
||||
/* First add the tile classification step at the end of the GBuffer pass. */
|
||||
{
|
||||
/* Fill tile mask texture with the collected closure present in a tile. */
|
||||
PassMain::Sub &sub = gbuffer_ps_.sub("TileClassify");
|
||||
sub.subpass_transition(GPU_ATTACHEMENT_WRITE, /* Needed for depth test. */
|
||||
{GPU_ATTACHEMENT_IGNORE,
|
||||
GPU_ATTACHEMENT_READ, /* Header. */
|
||||
GPU_ATTACHEMENT_IGNORE,
|
||||
GPU_ATTACHEMENT_IGNORE});
|
||||
/* Use depth test to reject background pixels. */
|
||||
/* WORKAROUND: Avoid rasterizer discard, but the shaders actually use no fragment output. */
|
||||
sub.state_set(DRW_STATE_WRITE_STENCIL | DRW_STATE_DEPTH_GREATER);
|
||||
sub.shader_set(inst_.shaders.static_shader_get(DEFERRED_TILE_CLASSIFY));
|
||||
sub.bind_image("tile_mask_img", &tile_mask_tx_);
|
||||
sub.push_constant("closure_tile_size_shift", &closure_tile_size_shift_);
|
||||
sub.barrier(GPU_BARRIER_TEXTURE_FETCH);
|
||||
sub.draw_procedural(GPU_PRIM_TRIS, 1, 3);
|
||||
}
|
||||
{
|
||||
PassMain::Sub &sub = gbuffer_ps_.sub("TileCompaction");
|
||||
/* Use rasterizer discard. This processes the tile data to create tile command lists. */
|
||||
sub.state_set(DRW_STATE_NO_DRAW);
|
||||
sub.shader_set(inst_.shaders.static_shader_get(DEFERRED_TILE_COMPACT));
|
||||
sub.bind_texture("tile_mask_tx", &tile_mask_tx_);
|
||||
sub.bind_ssbo("closure_single_tile_buf", &closure_bufs_[0].tile_buf_);
|
||||
sub.bind_ssbo("closure_single_draw_buf", &closure_bufs_[0].draw_buf_);
|
||||
sub.bind_ssbo("closure_double_tile_buf", &closure_bufs_[1].tile_buf_);
|
||||
sub.bind_ssbo("closure_double_draw_buf", &closure_bufs_[1].draw_buf_);
|
||||
sub.bind_ssbo("closure_triple_tile_buf", &closure_bufs_[2].tile_buf_);
|
||||
sub.bind_ssbo("closure_triple_draw_buf", &closure_bufs_[2].draw_buf_);
|
||||
sub.barrier(GPU_BARRIER_TEXTURE_FETCH);
|
||||
sub.draw_procedural(GPU_PRIM_POINTS, 1, max_lighting_tile_count_);
|
||||
}
|
||||
|
||||
{
|
||||
PassSimple &pass = eval_light_ps_;
|
||||
pass.init();
|
||||
/* Use depth test to reject background pixels. */
|
||||
/* WORKAROUND: Avoid rasterizer discard, but the shaders actually use no fragment output. */
|
||||
pass.state_set(DRW_STATE_WRITE_STENCIL | DRW_STATE_DEPTH_GREATER);
|
||||
pass.shader_set(inst_.shaders.static_shader_get(DEFERRED_LIGHT));
|
||||
pass.bind_image("direct_diffuse_img", &direct_diffuse_tx_);
|
||||
pass.bind_image("direct_reflect_img", &direct_reflect_tx_);
|
||||
pass.bind_image("direct_refract_img", &direct_refract_tx_);
|
||||
pass.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
|
||||
pass.bind_image(RBUFS_COLOR_SLOT, &inst_.render_buffers.rp_color_tx);
|
||||
pass.bind_image(RBUFS_VALUE_SLOT, &inst_.render_buffers.rp_value_tx);
|
||||
inst_.bind_uniform_data(&pass);
|
||||
inst_.gbuffer.bind_resources(pass);
|
||||
inst_.lights.bind_resources(pass);
|
||||
inst_.shadows.bind_resources(pass);
|
||||
inst_.sampling.bind_resources(pass);
|
||||
inst_.hiz_buffer.bind_resources(pass);
|
||||
pass.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_IMAGE_ACCESS);
|
||||
pass.draw_procedural(GPU_PRIM_TRIS, 1, 3);
|
||||
|
||||
{
|
||||
PassSimple::Sub &sub = pass.sub("StencilSet");
|
||||
sub.state_set(DRW_STATE_WRITE_STENCIL | DRW_STATE_STENCIL_ALWAYS |
|
||||
DRW_STATE_DEPTH_GREATER);
|
||||
sub.shader_set(inst_.shaders.static_shader_get(DEFERRED_TILE_STENCIL));
|
||||
sub.push_constant("closure_tile_size_shift", &closure_tile_size_shift_);
|
||||
sub.bind_texture("direct_radiance_tx", &direct_radiance_txs_[0]);
|
||||
/* Set stencil value for each tile complexity level. */
|
||||
for (int i = 0; i < ARRAY_SIZE(closure_bufs_); i++) {
|
||||
sub.bind_ssbo("closure_tile_buf", &closure_bufs_[i].tile_buf_);
|
||||
sub.state_stencil(0xFFu, 1u << i, 0xFFu);
|
||||
sub.draw_procedural_indirect(GPU_PRIM_TRIS, closure_bufs_[i].draw_buf_);
|
||||
}
|
||||
}
|
||||
{
|
||||
PassSimple::Sub &sub = pass.sub("Eval");
|
||||
/* Use depth test to reject background pixels which have not been stencil cleared. */
|
||||
/* WORKAROUND: Avoid rasterizer discard by enabling stencil write, but the shaders actually
|
||||
* use no fragment output. */
|
||||
sub.state_set(DRW_STATE_WRITE_STENCIL | DRW_STATE_STENCIL_EQUAL | DRW_STATE_DEPTH_GREATER);
|
||||
sub.barrier(GPU_BARRIER_SHADER_STORAGE);
|
||||
sub.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
|
||||
sub.bind_image(RBUFS_COLOR_SLOT, &inst_.render_buffers.rp_color_tx);
|
||||
sub.bind_image(RBUFS_VALUE_SLOT, &inst_.render_buffers.rp_value_tx);
|
||||
/* Submit the more costly ones first to avoid long tail in occupancy.
|
||||
* See page 78 of "Siggraph 2023: Unreal Engine Substrate" by Hillaire & de Rousiers. */
|
||||
for (int i = ARRAY_SIZE(closure_bufs_) - 1; i >= 0; i--) {
|
||||
sub.shader_set(inst_.shaders.static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i)));
|
||||
sub.bind_image("direct_radiance_1_img", &direct_radiance_txs_[0]);
|
||||
sub.bind_image("direct_radiance_2_img", &direct_radiance_txs_[1]);
|
||||
sub.bind_image("direct_radiance_3_img", &direct_radiance_txs_[2]);
|
||||
inst_.bind_uniform_data(&sub);
|
||||
inst_.gbuffer.bind_resources(sub);
|
||||
inst_.lights.bind_resources(sub);
|
||||
inst_.shadows.bind_resources(sub);
|
||||
inst_.sampling.bind_resources(sub);
|
||||
inst_.hiz_buffer.bind_resources(sub);
|
||||
sub.state_stencil(0xFFu, 1u << i, 0xFFu);
|
||||
sub.draw_procedural(GPU_PRIM_TRIS, 1, 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
PassSimple &pass = combine_ps_;
|
||||
|
@ -511,9 +569,9 @@ void DeferredLayer::end_sync()
|
|||
/* Use depth test to reject background pixels. */
|
||||
pass.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_GREATER | DRW_STATE_BLEND_ADD_FULL);
|
||||
pass.shader_set(inst_.shaders.static_shader_get(DEFERRED_COMBINE));
|
||||
pass.bind_image("direct_diffuse_img", &direct_diffuse_tx_);
|
||||
pass.bind_image("direct_reflect_img", &direct_reflect_tx_);
|
||||
pass.bind_image("direct_refract_img", &direct_refract_tx_);
|
||||
pass.bind_image("direct_radiance_1_img", &direct_radiance_txs_[0]);
|
||||
pass.bind_image("direct_radiance_2_img", &direct_radiance_txs_[1]);
|
||||
pass.bind_image("direct_radiance_3_img", &direct_radiance_txs_[2]);
|
||||
pass.bind_image("indirect_diffuse_img", &indirect_diffuse_tx_);
|
||||
pass.bind_image("indirect_reflect_img", &indirect_reflect_tx_);
|
||||
pass.bind_image("indirect_refract_img", &indirect_refract_tx_);
|
||||
|
@ -566,6 +624,7 @@ void DeferredLayer::render(View &main_view,
|
|||
* environment. So in this case, disable tracing and fallback to probe. */
|
||||
bool do_screen_space_refraction = !is_first_pass && (closure_bits_ & CLOSURE_REFRACTION);
|
||||
bool do_screen_space_reflection = (closure_bits_ & CLOSURE_REFLECTION);
|
||||
eGPUTextureUsage usage_rw = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE;
|
||||
|
||||
if (do_screen_space_reflection) {
|
||||
/* TODO(fclem): Verify if GPU_TEXTURE_USAGE_ATTACHMENT is needed for the copy and the clear. */
|
||||
|
@ -609,14 +668,33 @@ void DeferredLayer::render(View &main_view,
|
|||
}
|
||||
}
|
||||
|
||||
if (/* FIXME(fclem): Metal doesn't clear the whole framebuffer correctly. */
|
||||
GPU_backend_get_type() == GPU_BACKEND_METAL ||
|
||||
/* FIXME(fclem): Vulkan doesn't implement load / store config yet. */
|
||||
if (/* FIXME(fclem): Vulkan doesn't implement load / store config yet. */
|
||||
GPU_backend_get_type() == GPU_BACKEND_VULKAN)
|
||||
{
|
||||
inst_.gbuffer.header_tx.clear(int4(0));
|
||||
}
|
||||
|
||||
int2 tile_mask_size;
|
||||
int tile_count;
|
||||
closure_tile_size_shift_ = 4;
|
||||
/* Increase tile size until they fit the budget. */
|
||||
for (int i = 0; i < 4; i++, closure_tile_size_shift_++) {
|
||||
tile_mask_size = math::divide_ceil(extent, int2(1u << closure_tile_size_shift_));
|
||||
tile_count = tile_mask_size.x * tile_mask_size.y;
|
||||
if (tile_count <= max_lighting_tile_count_) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int target_count = power_of_2_max_u(tile_count);
|
||||
for (int i = 0; i < ARRAY_SIZE(closure_bufs_); i++) {
|
||||
closure_bufs_[i].tile_buf_.resize(target_count);
|
||||
closure_bufs_[i].draw_buf_.clear_to_zero();
|
||||
}
|
||||
|
||||
tile_mask_tx_.ensure_2d_array(GPU_R8UI, tile_mask_size, 4, usage_rw);
|
||||
tile_mask_tx_.clear(uint4(0));
|
||||
|
||||
GPU_framebuffer_bind_ex(gbuffer_fb,
|
||||
{
|
||||
{GPU_LOADACTION_LOAD, GPU_STOREACTION_STORE}, /* Depth */
|
||||
|
@ -646,11 +724,10 @@ void DeferredLayer::render(View &main_view,
|
|||
|
||||
inst_.shadows.set_view(render_view);
|
||||
|
||||
{
|
||||
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE;
|
||||
direct_diffuse_tx_.acquire(extent, GPU_RGBA16F, usage);
|
||||
direct_reflect_tx_.acquire(extent, GPU_RGBA16F, usage);
|
||||
direct_refract_tx_.acquire(extent, GPU_RGBA16F, usage);
|
||||
int closure_count = count_bits_i(closure_bits_ & (CLOSURE_REFLECTION | CLOSURE_DIFFUSE));
|
||||
for (int i = 0; i < ARRAY_SIZE(direct_radiance_txs_); i++) {
|
||||
direct_radiance_txs_[i].acquire(
|
||||
(closure_count > 1) ? extent : int2(1), GPU_R11F_G11F_B10F, usage_rw);
|
||||
}
|
||||
|
||||
GPU_framebuffer_bind(combined_fb);
|
||||
|
@ -676,7 +753,8 @@ void DeferredLayer::render(View &main_view,
|
|||
indirect_reflect_tx_ = reflect_result.get();
|
||||
indirect_refract_tx_ = refract_result.get();
|
||||
|
||||
inst_.subsurface.render(direct_diffuse_tx_, indirect_diffuse_tx_, closure_bits_, render_view);
|
||||
inst_.subsurface.render(
|
||||
direct_radiance_txs_[0], indirect_diffuse_tx_, closure_bits_, render_view);
|
||||
|
||||
GPU_framebuffer_bind(combined_fb);
|
||||
inst_.manager->submit(combine_ps_);
|
||||
|
@ -685,9 +763,9 @@ void DeferredLayer::render(View &main_view,
|
|||
refract_result.release();
|
||||
reflect_result.release();
|
||||
|
||||
direct_diffuse_tx_.release();
|
||||
direct_reflect_tx_.release();
|
||||
direct_refract_tx_.release();
|
||||
for (int i = 0; i < ARRAY_SIZE(direct_radiance_txs_); i++) {
|
||||
direct_radiance_txs_[i].release();
|
||||
}
|
||||
|
||||
if (do_screen_space_reflection) {
|
||||
GPU_texture_copy(radiance_feedback_tx_, rb.combined_tx);
|
||||
|
|
|
@ -204,6 +204,8 @@ class DeferredLayer : DeferredLayerBase {
|
|||
private:
|
||||
Instance &inst_;
|
||||
|
||||
static constexpr int max_lighting_tile_count_ = 128 * 128;
|
||||
|
||||
/* Evaluate all light objects contribution. */
|
||||
PassSimple eval_light_ps_ = {"EvalLights"};
|
||||
/* Combine direct and indirect light contributions and apply BSDF color. */
|
||||
|
@ -216,15 +218,28 @@ class DeferredLayer : DeferredLayerBase {
|
|||
* BSDF color and do additive blending for each of the lighting step.
|
||||
*
|
||||
* NOTE: Not to be confused with the render passes.
|
||||
* NOTE: Using array of texture instead of texture array to allow to use TextureFromPool.
|
||||
*/
|
||||
TextureFromPool direct_diffuse_tx_ = {"direct_diffuse_tx"};
|
||||
TextureFromPool direct_reflect_tx_ = {"direct_reflect_tx"};
|
||||
TextureFromPool direct_refract_tx_ = {"direct_refract_tx"};
|
||||
TextureFromPool direct_radiance_txs_[3] = {
|
||||
{"direct_radiance_1"}, {"direct_radiance_2"}, {"direct_radiance_3"}};
|
||||
/* Reference to ray-tracing result. */
|
||||
GPUTexture *indirect_diffuse_tx_ = nullptr;
|
||||
GPUTexture *indirect_reflect_tx_ = nullptr;
|
||||
GPUTexture *indirect_refract_tx_ = nullptr;
|
||||
|
||||
/* Parameters for the light evaluation pass. */
|
||||
int closure_tile_size_shift_ = 0;
|
||||
/* Tile buffers for different lighting complexity levels. */
|
||||
struct {
|
||||
DrawIndirectBuf draw_buf_ = {"DrawIndirectBuf"};
|
||||
ClosureTileBuf tile_buf_ = {"ClosureTileBuf"};
|
||||
} closure_bufs_[3];
|
||||
/**
|
||||
* Tile texture containing several bool per tile indicating presence of feature.
|
||||
* It is used to select specialized shader for each tile.
|
||||
*/
|
||||
Texture tile_mask_tx_ = {"tile_mask_tx_"};
|
||||
|
||||
/* TODO(fclem): This should be a TextureFromPool. */
|
||||
Texture radiance_behind_tx_ = {"radiance_behind_tx"};
|
||||
/* TODO(fclem): This shouldn't be part of the pipeline but of the view. */
|
||||
|
|
|
@ -94,12 +94,22 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_
|
|||
return "eevee_film_cryptomatte_post";
|
||||
case DEFERRED_COMBINE:
|
||||
return "eevee_deferred_combine";
|
||||
case DEFERRED_LIGHT:
|
||||
return "eevee_deferred_light";
|
||||
case DEFERRED_LIGHT_SINGLE:
|
||||
return "eevee_deferred_light_single";
|
||||
case DEFERRED_LIGHT_DOUBLE:
|
||||
return "eevee_deferred_light_double";
|
||||
case DEFERRED_LIGHT_TRIPLE:
|
||||
return "eevee_deferred_light_triple";
|
||||
case DEFERRED_CAPTURE_EVAL:
|
||||
return "eevee_deferred_capture_eval";
|
||||
case DEFERRED_PLANAR_EVAL:
|
||||
return "eevee_deferred_planar_eval";
|
||||
case DEFERRED_TILE_CLASSIFY:
|
||||
return "eevee_deferred_tile_classify";
|
||||
case DEFERRED_TILE_COMPACT:
|
||||
return "eevee_deferred_tile_compact";
|
||||
case DEFERRED_TILE_STENCIL:
|
||||
return "eevee_deferred_tile_stencil";
|
||||
case HIZ_DEBUG:
|
||||
return "eevee_hiz_debug";
|
||||
case HIZ_UPDATE:
|
||||
|
@ -668,8 +678,14 @@ GPUMaterial *ShaderModule::material_shader_get(::Material *blender_mat,
|
|||
uint64_t shader_uuid = shader_uuid_from_material_type(
|
||||
pipeline_type, geometry_type, displacement_type, blender_mat->blend_flag);
|
||||
|
||||
return DRW_shader_from_material(
|
||||
blender_mat, nodetree, shader_uuid, is_volume, deferred_compilation, codegen_callback, this);
|
||||
return DRW_shader_from_material(blender_mat,
|
||||
nodetree,
|
||||
GPU_MAT_EEVEE,
|
||||
shader_uuid,
|
||||
is_volume,
|
||||
deferred_compilation,
|
||||
codegen_callback,
|
||||
this);
|
||||
}
|
||||
|
||||
GPUMaterial *ShaderModule::world_shader_get(::World *blender_world,
|
||||
|
@ -683,8 +699,14 @@ GPUMaterial *ShaderModule::world_shader_get(::World *blender_world,
|
|||
|
||||
uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, geometry_type);
|
||||
|
||||
return DRW_shader_from_world(
|
||||
blender_world, nodetree, shader_uuid, is_volume, defer_compilation, codegen_callback, this);
|
||||
return DRW_shader_from_world(blender_world,
|
||||
nodetree,
|
||||
GPU_MAT_EEVEE,
|
||||
shader_uuid,
|
||||
is_volume,
|
||||
defer_compilation,
|
||||
codegen_callback,
|
||||
this);
|
||||
}
|
||||
|
||||
/* Variation to compile a material only with a nodetree. Caller needs to maintain the list of
|
||||
|
@ -704,6 +726,7 @@ GPUMaterial *ShaderModule::material_shader_get(const char *name,
|
|||
nodetree,
|
||||
&materials,
|
||||
name,
|
||||
GPU_MAT_EEVEE,
|
||||
shader_uuid,
|
||||
is_volume,
|
||||
false,
|
||||
|
|
|
@ -32,10 +32,15 @@ enum eShaderType {
|
|||
FILM_COMP,
|
||||
FILM_CRYPTOMATTE_POST,
|
||||
|
||||
DEFERRED_COMBINE,
|
||||
DEFERRED_LIGHT,
|
||||
DEFERRED_CAPTURE_EVAL,
|
||||
DEFERRED_COMBINE,
|
||||
DEFERRED_LIGHT_SINGLE,
|
||||
DEFERRED_LIGHT_DOUBLE,
|
||||
DEFERRED_LIGHT_TRIPLE,
|
||||
DEFERRED_PLANAR_EVAL,
|
||||
DEFERRED_TILE_CLASSIFY,
|
||||
DEFERRED_TILE_COMPACT,
|
||||
DEFERRED_TILE_STENCIL,
|
||||
|
||||
DEBUG_GBUFFER,
|
||||
DEBUG_SURFELS,
|
||||
|
|
|
@ -749,10 +749,10 @@ struct LightData {
|
|||
#define _clipmap_origin_y object_mat[3][3]
|
||||
/** Aliases for axes. */
|
||||
#ifndef USE_GPU_SHADER_CREATE_INFO
|
||||
# define _right object_mat[0].xyz()
|
||||
# define _up object_mat[1].xyz()
|
||||
# define _back object_mat[2].xyz()
|
||||
# define _position object_mat[3].xyz()
|
||||
# define _right object_mat[0]
|
||||
# define _up object_mat[1]
|
||||
# define _back object_mat[2]
|
||||
# define _position object_mat[3]
|
||||
#else
|
||||
# define _right object_mat[0].xyz
|
||||
# define _up object_mat[1].xyz
|
||||
|
@ -1426,7 +1426,7 @@ struct PipelineInfoData {
|
|||
float alpha_hash_scale;
|
||||
float _pad0;
|
||||
float _pad1;
|
||||
float _pad3;
|
||||
float _pad2;
|
||||
};
|
||||
BLI_STATIC_ASSERT_ALIGN(PipelineInfoData, 16)
|
||||
|
||||
|
@ -1528,6 +1528,7 @@ float4 utility_tx_sample_lut(sampler2DArray util_tx, float cos_theta, float roug
|
|||
|
||||
using AOVsInfoDataBuf = draw::StorageBuffer<AOVsInfoData>;
|
||||
using CameraDataBuf = draw::UniformBuffer<CameraData>;
|
||||
using ClosureTileBuf = draw::StorageArrayBuffer<uint, 1024, true>;
|
||||
using DepthOfFieldDataBuf = draw::UniformBuffer<DepthOfFieldData>;
|
||||
using DepthOfFieldScatterListBuf = draw::StorageArrayBuffer<ScatterRect, 16, true>;
|
||||
using DrawIndirectBuf = draw::StorageBuffer<DrawCommand, true>;
|
||||
|
|
|
@ -22,18 +22,19 @@ void main()
|
|||
vec3 refract_light = vec3(0.0);
|
||||
|
||||
if (gbuf.has_diffuse) {
|
||||
diffuse_light = imageLoad(direct_diffuse_img, texel).rgb +
|
||||
diffuse_light = imageLoad(direct_radiance_1_img, texel).rgb +
|
||||
imageLoad(indirect_diffuse_img, texel).rgb;
|
||||
}
|
||||
|
||||
if (gbuf.has_reflection) {
|
||||
reflect_light = imageLoad(direct_reflect_img, texel).rgb +
|
||||
reflect_light = imageLoad(direct_radiance_2_img, texel).rgb +
|
||||
imageLoad(indirect_reflect_img, texel).rgb;
|
||||
}
|
||||
|
||||
if (gbuf.has_refraction) {
|
||||
refract_light = /* imageLoad(direct_refract_img, texel).rgb + */ /* TODO: Not implemented. */
|
||||
imageLoad(indirect_refract_img, texel).rgb;
|
||||
refract_light =
|
||||
/* imageLoad(direct_radiance_3_img, texel).rgb + */ /* TODO: Not implemented. */
|
||||
imageLoad(indirect_refract_img, texel).rgb;
|
||||
}
|
||||
|
||||
/* Light passes. */
|
||||
|
|
|
@ -20,32 +20,45 @@ void main()
|
|||
float depth = texelFetch(hiz_tx, texel, 0).r;
|
||||
GBufferData gbuf = gbuffer_read(gbuf_header_tx, gbuf_closure_tx, gbuf_color_tx, texel);
|
||||
|
||||
if (!gbuf.has_reflection && !gbuf.has_diffuse /* TODO(fclem) && !gbuf.has_refraction */) {
|
||||
if (gbuf.closure_count == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
vec3 P = drw_point_screen_to_world(vec3(uvcoordsvar.xy, depth));
|
||||
/* Assume reflection closure normal is always somewhat representative of the geometric normal.
|
||||
* Ng is only used for shadow biases and subsurface check in this case. */
|
||||
vec3 Ng = gbuf.has_reflection ? gbuf.reflection.N : gbuf.diffuse.N;
|
||||
vec3 Ng = gbuf.surface_N;
|
||||
vec3 V = drw_world_incident_vector(P);
|
||||
float vPz = dot(drw_view_forward(), P) - dot(drw_view_forward(), drw_view_position());
|
||||
|
||||
ClosureLightStack stack;
|
||||
|
||||
ClosureLight cl_diff;
|
||||
cl_diff.N = gbuf.diffuse.N;
|
||||
cl_diff.ltc_mat = LTC_LAMBERT_MAT;
|
||||
cl_diff.type = LIGHT_DIFFUSE;
|
||||
stack.cl[0] = cl_diff;
|
||||
/* TODO(fclem): This is waiting for fully flexible evaluation pipeline. We need to refactor the
|
||||
* raytracing pipeline first. */
|
||||
if (gbuf.has_diffuse) {
|
||||
ClosureLight cl_diff;
|
||||
cl_diff.N = gbuf.diffuse.N;
|
||||
cl_diff.ltc_mat = LTC_LAMBERT_MAT;
|
||||
cl_diff.type = LIGHT_DIFFUSE;
|
||||
stack.cl[0] = cl_diff;
|
||||
}
|
||||
else {
|
||||
ClosureLight cl_refl;
|
||||
cl_refl.N = gbuf.reflection.N;
|
||||
cl_refl.ltc_mat = LTC_GGX_MAT(dot(gbuf.reflection.N, V), gbuf.reflection.roughness);
|
||||
cl_refl.type = LIGHT_SPECULAR;
|
||||
stack.cl[0] = cl_refl;
|
||||
}
|
||||
|
||||
#if LIGHT_CLOSURE_EVAL_COUNT > 1
|
||||
ClosureLight cl_refl;
|
||||
cl_refl.N = gbuf.reflection.N;
|
||||
cl_refl.ltc_mat = LTC_GGX_MAT(dot(gbuf.reflection.N, V), gbuf.reflection.roughness);
|
||||
cl_refl.type = LIGHT_SPECULAR;
|
||||
stack.cl[1] = cl_refl;
|
||||
#endif
|
||||
|
||||
#ifdef SSS_TRANSMITTANCE
|
||||
#if LIGHT_CLOSURE_EVAL_COUNT > 2
|
||||
ClosureLight cl_sss;
|
||||
cl_sss.N = -gbuf.diffuse.N;
|
||||
cl_sss.ltc_mat = LTC_LAMBERT_MAT;
|
||||
|
@ -53,54 +66,65 @@ void main()
|
|||
stack.cl[2] = cl_sss;
|
||||
#endif
|
||||
|
||||
#ifdef SSS_TRANSMITTANCE
|
||||
float shadow_thickness = thickness_from_shadow(P, Ng, vPz);
|
||||
float thickness = (shadow_thickness != THICKNESS_NO_VALUE) ?
|
||||
max(shadow_thickness, gbuf.thickness) :
|
||||
gbuf.thickness;
|
||||
#else
|
||||
float thickness = 0.0;
|
||||
#ifdef SSS_TRANSMITTANCE
|
||||
if (gbuf.has_sss) {
|
||||
float shadow_thickness = thickness_from_shadow(P, Ng, vPz);
|
||||
thickness = (shadow_thickness != THICKNESS_NO_VALUE) ? max(shadow_thickness, gbuf.thickness) :
|
||||
gbuf.thickness;
|
||||
}
|
||||
#endif
|
||||
|
||||
light_eval(stack, P, Ng, V, vPz, thickness);
|
||||
|
||||
vec3 radiance_shadowed = stack.cl[0].light_shadowed;
|
||||
vec3 radiance_unshadowed = stack.cl[0].light_unshadowed;
|
||||
#if LIGHT_CLOSURE_EVAL_COUNT > 1
|
||||
radiance_shadowed += stack.cl[1].light_shadowed;
|
||||
radiance_unshadowed += stack.cl[1].light_unshadowed;
|
||||
#endif
|
||||
#if LIGHT_CLOSURE_EVAL_COUNT > 2
|
||||
radiance_shadowed += stack.cl[2].light_shadowed;
|
||||
radiance_unshadowed += stack.cl[2].light_unshadowed;
|
||||
#endif
|
||||
|
||||
#ifdef SSS_TRANSMITTANCE
|
||||
if (gbuf.diffuse.sss_id != 0u) {
|
||||
if (gbuf.has_sss) {
|
||||
vec3 sss_profile = subsurface_transmission(gbuf.diffuse.sss_radius, thickness);
|
||||
stack.cl[2].light_shadowed *= sss_profile;
|
||||
stack.cl[2].light_unshadowed *= sss_profile;
|
||||
/* Add to diffuse light for processing inside the Screen Space SSS pass. */
|
||||
stack.cl[0].light_shadowed += stack.cl[2].light_shadowed;
|
||||
stack.cl[0].light_unshadowed += stack.cl[2].light_unshadowed;
|
||||
}
|
||||
else {
|
||||
stack.cl[2].light_shadowed = vec3(0.0);
|
||||
stack.cl[2].light_unshadowed = vec3(0.0);
|
||||
}
|
||||
#endif
|
||||
|
||||
vec3 radiance_diffuse = stack.cl[0].light_shadowed;
|
||||
vec3 radiance_specular = stack.cl[1].light_shadowed;
|
||||
#ifdef SSS_TRANSMITTANCE
|
||||
radiance_diffuse += stack.cl[2].light_shadowed;
|
||||
#endif
|
||||
|
||||
vec3 radiance_shadowed = stack.cl[0].light_shadowed;
|
||||
vec3 radiance_unshadowed = stack.cl[0].light_unshadowed;
|
||||
radiance_shadowed += stack.cl[1].light_shadowed;
|
||||
radiance_unshadowed += stack.cl[1].light_unshadowed;
|
||||
#ifdef SSS_TRANSMITTANCE
|
||||
radiance_shadowed += stack.cl[2].light_shadowed;
|
||||
radiance_unshadowed += stack.cl[2].light_unshadowed;
|
||||
#endif
|
||||
|
||||
/* TODO(fclem): Change shadow pass to be colored. */
|
||||
vec3 shadows = radiance_shadowed * safe_rcp(radiance_unshadowed);
|
||||
output_renderpass_value(uniform_buf.render_pass.shadow_id, average(shadows));
|
||||
|
||||
if (gbuf.has_diffuse) {
|
||||
imageStore(direct_diffuse_img, texel, vec4(radiance_diffuse, 1.0));
|
||||
if (gbuf.closure_count > 0) {
|
||||
/* TODO(fclem): This is waiting for fully flexible evaluation pipeline. We need to refactor the
|
||||
* raytracing pipeline first. */
|
||||
if (gbuf.has_diffuse) {
|
||||
imageStore(direct_radiance_1_img, texel, vec4(stack.cl[0].light_shadowed, 1.0));
|
||||
}
|
||||
else {
|
||||
imageStore(direct_radiance_2_img, texel, vec4(stack.cl[0].light_shadowed, 1.0));
|
||||
}
|
||||
}
|
||||
if (gbuf.has_reflection) {
|
||||
imageStore(direct_reflect_img, texel, vec4(radiance_specular, 1.0));
|
||||
|
||||
#if LIGHT_CLOSURE_EVAL_COUNT > 1
|
||||
if (gbuf.closure_count > 1) {
|
||||
imageStore(direct_radiance_2_img, texel, vec4(stack.cl[1].light_shadowed, 1.0));
|
||||
}
|
||||
/* TODO(fclem): Support LTC for refraction. */
|
||||
// imageStore(direct_refract_img, texel, vec4(cl_refr.light_shadowed, 1.0));
|
||||
#endif
|
||||
|
||||
#if LIGHT_CLOSURE_EVAL_COUNT > 2
|
||||
# if 0 /* Will work when we have fully flexible evaluation. */
|
||||
if (gbuf.closure_count > 2) {
|
||||
imageStore(direct_radiance_3_img, texel, vec4(stack.cl[2].light_shadowed, 1.0));
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
/* SPDX-FileCopyrightText: 2023 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
/**
|
||||
* This pass load Gbuffer data and output a mask of tiles to process.
|
||||
* This mask is then processed by the compaction phase.
|
||||
*/
|
||||
|
||||
#pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
|
||||
#pragma BLENDER_REQUIRE(gpu_shader_math_vector_lib.glsl)
|
||||
#pragma BLENDER_REQUIRE(gpu_shader_codegen_lib.glsl)
|
||||
#pragma BLENDER_REQUIRE(eevee_gbuffer_lib.glsl)
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec2 texel = ivec2(gl_FragCoord.xy);
|
||||
|
||||
ivec2 tile_co = texel >> closure_tile_size_shift;
|
||||
|
||||
if (gbuffer_has_closure(in_gbuffer_header, eClosureBits(CLOSURE_DIFFUSE))) {
|
||||
imageStore(tile_mask_img, ivec3(tile_co, 0), uvec4(1u));
|
||||
}
|
||||
if (gbuffer_has_closure(in_gbuffer_header, eClosureBits(CLOSURE_REFLECTION))) {
|
||||
imageStore(tile_mask_img, ivec3(tile_co, 1), uvec4(1u));
|
||||
}
|
||||
if (gbuffer_has_closure(in_gbuffer_header, eClosureBits(CLOSURE_REFRACTION))) {
|
||||
imageStore(tile_mask_img, ivec3(tile_co, 2), uvec4(1u));
|
||||
}
|
||||
if (gbuffer_has_closure(in_gbuffer_header, eClosureBits(CLOSURE_SSS))) {
|
||||
imageStore(tile_mask_img, ivec3(tile_co, 3), uvec4(1u));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/* SPDX-FileCopyrightText: 2023 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
/**
|
||||
* Convert the tile classification texture into streams of tiles of each types.
|
||||
* Dispatched with 1 vertex (thread) per tile.
|
||||
*/
|
||||
|
||||
#pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
|
||||
|
||||
void main()
|
||||
{
|
||||
/* Doesn't matter. Doesn't get rasterized. */
|
||||
gl_Position = vec4(0.0);
|
||||
|
||||
int tile_per_row = textureSize(tile_mask_tx, 0).x;
|
||||
ivec2 tile_coord = ivec2(gl_VertexID % tile_per_row, gl_VertexID / tile_per_row);
|
||||
|
||||
if (gl_VertexID == 0) {
|
||||
closure_double_draw_buf.instance_len = 1u;
|
||||
closure_single_draw_buf.instance_len = 1u;
|
||||
closure_triple_draw_buf.instance_len = 1u;
|
||||
}
|
||||
|
||||
if (!in_texture_range(tile_coord, tile_mask_tx)) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint closure_count = texelFetch(tile_mask_tx, ivec3(tile_coord, 0), 0).r +
|
||||
texelFetch(tile_mask_tx, ivec3(tile_coord, 1), 0).r +
|
||||
// texelFetch(tile_mask_tx, ivec3(tile_coord, 2), 0).r + /* TODO: refract */
|
||||
texelFetch(tile_mask_tx, ivec3(tile_coord, 3), 0).r;
|
||||
/* TODO(fclem): This is waiting for fully flexible evaluation pipeline. We need to refactor the
|
||||
* raytracing pipeline first. */
|
||||
bool has_reflection = texelFetch(tile_mask_tx, ivec3(tile_coord, 1), 0).r != 0u;
|
||||
bool has_sss = texelFetch(tile_mask_tx, ivec3(tile_coord, 3), 0).r != 0u;
|
||||
|
||||
if (closure_count == 3 || has_sss) {
|
||||
uint tile_index = atomicAdd(closure_triple_draw_buf.vertex_len, 6u) / 6u;
|
||||
closure_triple_tile_buf[tile_index] = packUvec2x16(uvec2(tile_coord));
|
||||
}
|
||||
else if (closure_count == 2 || has_reflection) {
|
||||
uint tile_index = atomicAdd(closure_double_draw_buf.vertex_len, 6u) / 6u;
|
||||
closure_double_tile_buf[tile_index] = packUvec2x16(uvec2(tile_coord));
|
||||
}
|
||||
else if (closure_count == 1) {
|
||||
uint tile_index = atomicAdd(closure_single_draw_buf.vertex_len, 6u) / 6u;
|
||||
closure_single_tile_buf[tile_index] = packUvec2x16(uvec2(tile_coord));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
/* SPDX-FileCopyrightText: 2023 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
/**
|
||||
* Load tile classification data and mark stencil areas.
|
||||
*/
|
||||
|
||||
void main()
|
||||
{
|
||||
/* Stencil only pass. Passthrough. */
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
/* SPDX-FileCopyrightText: 2023 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
/**
|
||||
* Load tile classification data and mark stencil areas.
|
||||
*/
|
||||
|
||||
#pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
|
||||
|
||||
void main()
|
||||
{
|
||||
int tile_id = gl_VertexID / 6;
|
||||
int vertex_id = gl_VertexID % 6;
|
||||
ivec2 tile_coord = ivec2(unpackUvec2x16(closure_tile_buf[tile_id]));
|
||||
|
||||
/* Generate Quad with 2 triangles with same winding.
|
||||
* This way it can be merged on some hardware. */
|
||||
int v = (vertex_id > 2) ? (3 - (vertex_id - 3)) : vertex_id;
|
||||
ivec2 tile_corner = ivec2(v & 1, v >> 1);
|
||||
|
||||
int tile_size = (1 << closure_tile_size_shift);
|
||||
vec2 ss_coord = vec2((tile_coord + tile_corner) * tile_size) /
|
||||
vec2(textureSize(direct_radiance_tx, 0));
|
||||
vec2 ndc_coord = ss_coord * 2.0 - 1.0;
|
||||
|
||||
/* gl_Position expects Homogenous space coord. But this is the same thing as NDC in 2D mode. */
|
||||
gl_Position = vec4(ndc_coord, 1.0, 1.0);
|
||||
}
|
|
@ -163,6 +163,13 @@ bool gbuffer_has_closure(uint header, eClosureBits closure)
|
|||
return has_diffuse;
|
||||
}
|
||||
|
||||
bool has_sss = (gbuffer_header_unpack(header, layer) == GBUF_SSS);
|
||||
layer += int(has_sss);
|
||||
|
||||
if (closure == eClosureBits(CLOSURE_SSS)) {
|
||||
return has_sss;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -265,8 +272,10 @@ struct GBufferData {
|
|||
bool has_diffuse;
|
||||
bool has_reflection;
|
||||
bool has_refraction;
|
||||
bool has_sss;
|
||||
bool has_any_surface;
|
||||
uint header;
|
||||
uint closure_count;
|
||||
};
|
||||
|
||||
GBufferData gbuffer_read(usampler2D header_tx,
|
||||
|
@ -287,6 +296,7 @@ GBufferData gbuffer_read(usampler2D header_tx,
|
|||
}
|
||||
|
||||
gbuf.thickness = 0.0;
|
||||
gbuf.closure_count = 0u;
|
||||
|
||||
/* First closure is always written. */
|
||||
gbuf.surface_N = gbuffer_normal_unpack(texelFetch(closure_tx, ivec3(texel, 0), 0).xy);
|
||||
|
@ -318,6 +328,8 @@ GBufferData gbuffer_read(usampler2D header_tx,
|
|||
gbuf.diffuse.sss_radius = vec3(0.0, 0.0, 0.0);
|
||||
gbuf.diffuse.sss_id = 0u;
|
||||
|
||||
gbuf.closure_count = 2u;
|
||||
|
||||
return gbuf;
|
||||
}
|
||||
|
||||
|
@ -333,6 +345,7 @@ GBufferData gbuffer_read(usampler2D header_tx,
|
|||
gbuf.refraction.N = gbuffer_normal_unpack(closure_packed.xy);
|
||||
gbuf.refraction.roughness = closure_packed.z;
|
||||
gbuf.refraction.ior = gbuffer_ior_unpack(closure_packed.w);
|
||||
gbuf.closure_count += 1u;
|
||||
layer += 1;
|
||||
}
|
||||
else {
|
||||
|
@ -352,6 +365,7 @@ GBufferData gbuffer_read(usampler2D header_tx,
|
|||
gbuf.reflection.color = gbuffer_color_unpack(color_packed);
|
||||
gbuf.reflection.N = gbuffer_normal_unpack(closure_packed.xy);
|
||||
gbuf.reflection.roughness = closure_packed.z;
|
||||
gbuf.closure_count += 1u;
|
||||
layer += 1;
|
||||
}
|
||||
else {
|
||||
|
@ -370,6 +384,7 @@ GBufferData gbuffer_read(usampler2D header_tx,
|
|||
gbuf.diffuse.color = gbuffer_color_unpack(color_packed);
|
||||
gbuf.diffuse.N = gbuffer_normal_unpack(closure_packed.xy);
|
||||
gbuf.thickness = gbuffer_thickness_unpack(closure_packed.w);
|
||||
gbuf.closure_count += 1u;
|
||||
layer += 1;
|
||||
}
|
||||
else {
|
||||
|
@ -379,9 +394,9 @@ GBufferData gbuffer_read(usampler2D header_tx,
|
|||
gbuf.thickness = 0.0;
|
||||
}
|
||||
|
||||
bool has_sss = (gbuffer_header_unpack(gbuf.header, layer) == GBUF_SSS);
|
||||
gbuf.has_sss = (gbuffer_header_unpack(gbuf.header, layer) == GBUF_SSS);
|
||||
|
||||
if (has_sss) {
|
||||
if (gbuf.has_sss) {
|
||||
vec4 closure_packed = texelFetch(closure_tx, ivec3(texel, layer), 0);
|
||||
|
||||
gbuf.diffuse.sss_radius = gbuffer_sss_radii_unpack(closure_packed.xyz);
|
||||
|
|
|
@ -17,17 +17,51 @@ GPU_SHADER_CREATE_INFO(eevee_gbuffer_data)
|
|||
.sampler(9, ImageType::FLOAT_2D_ARRAY, "gbuf_closure_tx")
|
||||
.sampler(10, ImageType::FLOAT_2D_ARRAY, "gbuf_color_tx");
|
||||
|
||||
GPU_SHADER_CREATE_INFO(eevee_deferred_tile_classify)
|
||||
.fragment_source("eevee_deferred_tile_classify_frag.glsl")
|
||||
/* Early fragment test is needed to avoid processing background fragments. */
|
||||
.early_fragment_test(true)
|
||||
.additional_info("eevee_shared", "draw_fullscreen")
|
||||
.subpass_in(1, Type::UINT, "in_gbuffer_header", DEFERRED_GBUFFER_ROG_ID)
|
||||
.typedef_source("draw_shader_shared.h")
|
||||
.image(0, GPU_R8UI, Qualifier::WRITE, ImageType::UINT_2D_ARRAY, "tile_mask_img")
|
||||
.push_constant(Type::INT, "closure_tile_size_shift")
|
||||
.do_static_compilation(true);
|
||||
|
||||
GPU_SHADER_CREATE_INFO(eevee_deferred_tile_compact)
|
||||
.additional_info("eevee_shared")
|
||||
.typedef_source("draw_shader_shared.h")
|
||||
.vertex_source("eevee_deferred_tile_compact_vert.glsl")
|
||||
/* Reuse dummy stencil frag. */
|
||||
.fragment_source("eevee_deferred_tile_stencil_frag.glsl")
|
||||
.storage_buf(0, Qualifier::READ_WRITE, "DrawCommand", "closure_single_draw_buf")
|
||||
.storage_buf(1, Qualifier::READ_WRITE, "DrawCommand", "closure_double_draw_buf")
|
||||
.storage_buf(2, Qualifier::READ_WRITE, "DrawCommand", "closure_triple_draw_buf")
|
||||
.storage_buf(3, Qualifier::WRITE, "uint", "closure_single_tile_buf[]")
|
||||
.storage_buf(4, Qualifier::WRITE, "uint", "closure_double_tile_buf[]")
|
||||
.storage_buf(5, Qualifier::WRITE, "uint", "closure_triple_tile_buf[]")
|
||||
.sampler(0, ImageType::UINT_2D_ARRAY, "tile_mask_tx")
|
||||
.do_static_compilation(true);
|
||||
|
||||
GPU_SHADER_CREATE_INFO(eevee_deferred_tile_stencil)
|
||||
.vertex_source("eevee_deferred_tile_stencil_vert.glsl")
|
||||
.fragment_source("eevee_deferred_tile_stencil_frag.glsl")
|
||||
.additional_info("eevee_shared")
|
||||
/* Only for texture size. */
|
||||
.sampler(0, ImageType::FLOAT_2D, "direct_radiance_tx")
|
||||
.storage_buf(4, Qualifier::READ, "uint", "closure_tile_buf[]")
|
||||
.push_constant(Type::INT, "closure_tile_size_shift")
|
||||
.typedef_source("draw_shader_shared.h")
|
||||
.do_static_compilation(true);
|
||||
|
||||
GPU_SHADER_CREATE_INFO(eevee_deferred_light)
|
||||
.fragment_source("eevee_deferred_light_frag.glsl")
|
||||
/* Early fragment test is needed to avoid processing fragments without correct GBuffer data. */
|
||||
/* Early fragment test is needed to avoid processing background fragments. */
|
||||
.early_fragment_test(true)
|
||||
/* Chaining to next pass. */
|
||||
/* TODO(@fclem): These could use the sub-pass feature. */
|
||||
.image_out(2, GPU_RGBA16F, "direct_diffuse_img")
|
||||
.image_out(3, GPU_RGBA16F, "direct_reflect_img")
|
||||
.image_out(4, GPU_RGBA16F, "direct_refract_img")
|
||||
.define("SSS_TRANSMITTANCE")
|
||||
.define("LIGHT_CLOSURE_EVAL_COUNT", "3")
|
||||
.image_out(2, DEFERRED_RADIANCE_FORMAT, "direct_radiance_1_img")
|
||||
.image_out(3, DEFERRED_RADIANCE_FORMAT, "direct_radiance_2_img")
|
||||
.image_out(4, DEFERRED_RADIANCE_FORMAT, "direct_radiance_3_img")
|
||||
.additional_info("eevee_shared",
|
||||
"eevee_gbuffer_data",
|
||||
"eevee_utility_texture",
|
||||
|
@ -36,17 +70,32 @@ GPU_SHADER_CREATE_INFO(eevee_deferred_light)
|
|||
"eevee_shadow_data",
|
||||
"eevee_hiz_data",
|
||||
"eevee_render_pass_out",
|
||||
"draw_view",
|
||||
"draw_fullscreen")
|
||||
"draw_fullscreen",
|
||||
"draw_view");
|
||||
|
||||
GPU_SHADER_CREATE_INFO(eevee_deferred_light_single)
|
||||
.additional_info("eevee_deferred_light")
|
||||
.define("LIGHT_CLOSURE_EVAL_COUNT", "1")
|
||||
.do_static_compilation(true);
|
||||
|
||||
GPU_SHADER_CREATE_INFO(eevee_deferred_light_double)
|
||||
.additional_info("eevee_deferred_light")
|
||||
.define("LIGHT_CLOSURE_EVAL_COUNT", "2")
|
||||
.do_static_compilation(true);
|
||||
|
||||
GPU_SHADER_CREATE_INFO(eevee_deferred_light_triple)
|
||||
.additional_info("eevee_deferred_light")
|
||||
.define("SSS_TRANSMITTANCE")
|
||||
.define("LIGHT_CLOSURE_EVAL_COUNT", "3")
|
||||
.do_static_compilation(true);
|
||||
|
||||
GPU_SHADER_CREATE_INFO(eevee_deferred_combine)
|
||||
/* Early fragment test is needed to avoid processing fragments without correct GBuffer data. */
|
||||
/* Early fragment test is needed to avoid processing fragments background fragments. */
|
||||
.early_fragment_test(true)
|
||||
/* Inputs. */
|
||||
.image_in(2, GPU_RGBA16F, "direct_diffuse_img")
|
||||
.image_in(3, GPU_RGBA16F, "direct_reflect_img")
|
||||
.image_in(4, GPU_RGBA16F, "direct_refract_img")
|
||||
.image_in(2, DEFERRED_RADIANCE_FORMAT, "direct_radiance_1_img")
|
||||
.image_in(3, DEFERRED_RADIANCE_FORMAT, "direct_radiance_2_img")
|
||||
.image_in(4, DEFERRED_RADIANCE_FORMAT, "direct_radiance_3_img")
|
||||
.image_in(5, RAYTRACE_RADIANCE_FORMAT, "indirect_diffuse_img")
|
||||
.image_in(6, RAYTRACE_RADIANCE_FORMAT, "indirect_reflect_img")
|
||||
.image_in(7, RAYTRACE_RADIANCE_FORMAT, "indirect_refract_img")
|
||||
|
@ -98,6 +147,7 @@ GPU_SHADER_CREATE_INFO(eevee_deferred_planar_eval)
|
|||
.fragment_source("eevee_deferred_planar_frag.glsl")
|
||||
.do_static_compilation(true);
|
||||
|
||||
#undef image_array_out
|
||||
#undef image_out
|
||||
#undef image_in
|
||||
|
||||
|
|
|
@ -153,7 +153,7 @@ GPU_SHADER_CREATE_INFO(eevee_surf_deferred)
|
|||
.early_fragment_test(true)
|
||||
/* Direct output. (Emissive, Holdout) */
|
||||
.fragment_out(0, Type::VEC4, "out_radiance")
|
||||
.fragment_out(1, Type::UINT, "out_gbuf_header")
|
||||
.fragment_out(1, Type::UINT, "out_gbuf_header", DualBlend::NONE, DEFERRED_GBUFFER_ROG_ID)
|
||||
.fragment_out(2, Type::VEC4, "out_gbuf_color")
|
||||
.fragment_out(3, Type::VEC4, "out_gbuf_closure")
|
||||
/* Everything is stored inside a two layered target, one for each format. This is to fit the
|
||||
|
|
|
@ -11,7 +11,7 @@ GPU_SHADER_CREATE_INFO(eevee_subsurface_setup)
|
|||
.typedef_source("draw_shader_shared.h")
|
||||
.additional_info("draw_view", "eevee_shared", "eevee_gbuffer_data")
|
||||
.sampler(2, ImageType::DEPTH_2D, "depth_tx")
|
||||
.image(0, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "direct_light_img")
|
||||
.image(0, DEFERRED_RADIANCE_FORMAT, Qualifier::READ, ImageType::FLOAT_2D, "direct_light_img")
|
||||
.image(1, RAYTRACE_RADIANCE_FORMAT, Qualifier::READ, ImageType::FLOAT_2D, "indirect_light_img")
|
||||
.image(2, SUBSURFACE_OBJECT_ID_FORMAT, Qualifier::WRITE, ImageType::UINT_2D, "object_id_img")
|
||||
.image(3, SUBSURFACE_RADIANCE_FORMAT, Qualifier::WRITE, ImageType::FLOAT_2D, "radiance_img")
|
||||
|
|
|
@ -296,6 +296,7 @@ struct GPUShader *DRW_shader_create_fullscreen_with_shaderlib_ex(const char *fra
|
|||
|
||||
struct GPUMaterial *DRW_shader_from_world(struct World *wo,
|
||||
struct bNodeTree *ntree,
|
||||
eGPUMaterialEngine engine,
|
||||
const uint64_t shader_id,
|
||||
const bool is_volume_shader,
|
||||
bool deferred,
|
||||
|
@ -303,6 +304,7 @@ struct GPUMaterial *DRW_shader_from_world(struct World *wo,
|
|||
void *thunk);
|
||||
struct GPUMaterial *DRW_shader_from_material(struct Material *ma,
|
||||
struct bNodeTree *ntree,
|
||||
eGPUMaterialEngine engine,
|
||||
const uint64_t shader_id,
|
||||
const bool is_volume_shader,
|
||||
bool deferred,
|
||||
|
|
|
@ -493,6 +493,7 @@ GPUShader *DRW_shader_create_fullscreen_with_shaderlib_ex(const char *frag,
|
|||
|
||||
GPUMaterial *DRW_shader_from_world(World *wo,
|
||||
bNodeTree *ntree,
|
||||
eGPUMaterialEngine engine,
|
||||
const uint64_t shader_id,
|
||||
const bool is_volume_shader,
|
||||
bool deferred,
|
||||
|
@ -505,6 +506,7 @@ GPUMaterial *DRW_shader_from_world(World *wo,
|
|||
ntree,
|
||||
&wo->gpumaterial,
|
||||
wo->id.name,
|
||||
engine,
|
||||
shader_id,
|
||||
is_volume_shader,
|
||||
false,
|
||||
|
@ -525,6 +527,7 @@ GPUMaterial *DRW_shader_from_world(World *wo,
|
|||
|
||||
GPUMaterial *DRW_shader_from_material(Material *ma,
|
||||
bNodeTree *ntree,
|
||||
eGPUMaterialEngine engine,
|
||||
const uint64_t shader_id,
|
||||
const bool is_volume_shader,
|
||||
bool deferred,
|
||||
|
@ -537,6 +540,7 @@ GPUMaterial *DRW_shader_from_material(Material *ma,
|
|||
ntree,
|
||||
&ma->gpumaterial,
|
||||
ma->id.name,
|
||||
engine,
|
||||
shader_id,
|
||||
is_volume_shader,
|
||||
false,
|
||||
|
|
|
@ -117,11 +117,15 @@ Array<float2> polyline_fit_curve(Span<float2> points,
|
|||
return {};
|
||||
}
|
||||
|
||||
if (r_cubic_array == nullptr) {
|
||||
return {};
|
||||
}
|
||||
|
||||
Span<float2> r_cubic_array_span(reinterpret_cast<float2 *>(r_cubic_array),
|
||||
r_cubic_array_len * 3);
|
||||
Array<float2> curve_positions(r_cubic_array_span);
|
||||
/* Free the c-style array. */
|
||||
MEM_freeN(r_cubic_array);
|
||||
free(r_cubic_array);
|
||||
return curve_positions;
|
||||
}
|
||||
|
||||
|
@ -153,11 +157,16 @@ IndexMask polyline_detect_corners(Span<float2> points,
|
|||
/* Error occurred, return. */
|
||||
return IndexMask();
|
||||
}
|
||||
|
||||
if (r_corners == nullptr) {
|
||||
return IndexMask();
|
||||
}
|
||||
|
||||
BLI_assert(samples_max < std::numeric_limits<int>::max());
|
||||
Span<int> indices(reinterpret_cast<int *>(r_corners), r_corner_len);
|
||||
const IndexMask corner_mask = IndexMask::from_indices<int>(indices, memory);
|
||||
/* Free the c-style array. */
|
||||
MEM_freeN(r_corners);
|
||||
free(r_corners);
|
||||
return corner_mask;
|
||||
}
|
||||
|
||||
|
|
|
@ -720,7 +720,7 @@ static void invert_visibility_bmesh(Object &object, const Span<PBVHNode *> nodes
|
|||
bool fully_hidden = true;
|
||||
for (BMVert *vert : BKE_pbvh_bmesh_node_unique_verts(node)) {
|
||||
BM_elem_flag_toggle(vert, BM_ELEM_HIDDEN);
|
||||
fully_hidden &= BM_elem_flag_test(vert, BM_ELEM_HIDDEN);
|
||||
fully_hidden &= BM_elem_flag_test_bool(vert, BM_ELEM_HIDDEN);
|
||||
}
|
||||
BKE_pbvh_node_fully_hidden_set(node, fully_hidden);
|
||||
BKE_pbvh_node_mark_rebuild_draw(node);
|
||||
|
|
|
@ -233,19 +233,19 @@ struct GPUUniformBuf *GPU_material_sss_profile_get(GPUMaterial *material,
|
|||
/**
|
||||
* High level functions to create and use GPU materials.
|
||||
*/
|
||||
GPUMaterial *GPU_material_from_nodetree_find(struct ListBase *gpumaterials,
|
||||
const void *engine_type,
|
||||
int options);
|
||||
/**
|
||||
* \note Caller must use #GPU_material_from_nodetree_find to re-use existing materials,
|
||||
* This is enforced since constructing other arguments to this function may be expensive
|
||||
* so only do this when they are needed.
|
||||
*/
|
||||
|
||||
typedef enum eGPUMaterialEngine {
|
||||
GPU_MAT_EEVEE_LEGACY = 0,
|
||||
GPU_MAT_EEVEE,
|
||||
GPU_MAT_COMPOSITOR,
|
||||
} eGPUMaterialEngine;
|
||||
|
||||
GPUMaterial *GPU_material_from_nodetree(struct Scene *scene,
|
||||
struct Material *ma,
|
||||
struct bNodeTree *ntree,
|
||||
struct ListBase *gpumaterials,
|
||||
const char *name,
|
||||
eGPUMaterialEngine engine,
|
||||
uint64_t shader_uuid,
|
||||
bool is_volume_shader,
|
||||
bool is_lookdev,
|
||||
|
@ -421,7 +421,8 @@ typedef void (*ConstructGPUMaterialFn)(void *thunk, GPUMaterial *material);
|
|||
|
||||
/* Construct a GPU material from a set of callbacks. See the callback types for more information.
|
||||
* The given thunk will be passed as the first parameter of each callback. */
|
||||
GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_function_cb,
|
||||
GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine,
|
||||
ConstructGPUMaterialFn construct_function_cb,
|
||||
GPUCodegenCallbackFn generate_code_function_cb,
|
||||
void *thunk);
|
||||
|
||||
|
|
|
@ -97,6 +97,8 @@ struct GPUPass {
|
|||
uint refcount;
|
||||
/** The last time the refcount was greater than 0. */
|
||||
int gc_timestamp;
|
||||
/** The engine type this pass is compiled for. */
|
||||
eGPUMaterialEngine engine;
|
||||
/** Identity hash generated from all GLSL code. */
|
||||
uint32_t hash;
|
||||
/** Did we already tried to compile the attached GPUShader. */
|
||||
|
@ -122,12 +124,12 @@ static SpinLock pass_cache_spin;
|
|||
|
||||
/* Search by hash only. Return first pass with the same hash.
|
||||
* There is hash collision if (pass->next && pass->next->hash == hash) */
|
||||
static GPUPass *gpu_pass_cache_lookup(uint32_t hash)
|
||||
static GPUPass *gpu_pass_cache_lookup(eGPUMaterialEngine engine, uint32_t hash)
|
||||
{
|
||||
BLI_spin_lock(&pass_cache_spin);
|
||||
/* Could be optimized with a Lookup table. */
|
||||
for (GPUPass *pass = pass_cache; pass; pass = pass->next) {
|
||||
if (pass->hash == hash) {
|
||||
if (pass->hash == hash && pass->engine == engine) {
|
||||
BLI_spin_unlock(&pass_cache_spin);
|
||||
return pass;
|
||||
}
|
||||
|
@ -157,10 +159,12 @@ static GPUPass *gpu_pass_cache_resolve_collision(GPUPass *pass,
|
|||
GPUShaderCreateInfo *info,
|
||||
uint32_t hash)
|
||||
{
|
||||
eGPUMaterialEngine engine = pass->engine;
|
||||
BLI_spin_lock(&pass_cache_spin);
|
||||
for (; pass && (pass->hash == hash); pass = pass->next) {
|
||||
if (*reinterpret_cast<ShaderCreateInfo *>(info) ==
|
||||
*reinterpret_cast<ShaderCreateInfo *>(pass->create_info))
|
||||
*reinterpret_cast<ShaderCreateInfo *>(pass->create_info) &&
|
||||
pass->engine == engine)
|
||||
{
|
||||
BLI_spin_unlock(&pass_cache_spin);
|
||||
return pass;
|
||||
|
@ -732,6 +736,7 @@ void GPUCodegen::generate_graphs()
|
|||
|
||||
GPUPass *GPU_generate_pass(GPUMaterial *material,
|
||||
GPUNodeGraph *graph,
|
||||
eGPUMaterialEngine engine,
|
||||
GPUCodegenCallbackFn finalize_source_cb,
|
||||
void *thunk,
|
||||
bool optimize_graph)
|
||||
|
@ -763,7 +768,7 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
|
|||
* NOTE: We only perform cache look-up for non-optimized shader
|
||||
* graphs, as baked constant data among other optimizations will generate too many
|
||||
* shader source permutations, with minimal re-usability. */
|
||||
pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
|
||||
pass_hash = gpu_pass_cache_lookup(engine, codegen.hash_get());
|
||||
|
||||
/* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
|
||||
* there is no way to have a collision currently. Some advocated to only use a bigger hash. */
|
||||
|
@ -813,6 +818,7 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
|
|||
pass->shader = nullptr;
|
||||
pass->refcount = 1;
|
||||
pass->create_info = codegen.create_info;
|
||||
pass->engine = engine;
|
||||
pass->hash = codegen.hash_get();
|
||||
pass->compiled = false;
|
||||
pass->cached = false;
|
||||
|
|
|
@ -25,6 +25,7 @@ typedef struct GPUPass GPUPass;
|
|||
|
||||
GPUPass *GPU_generate_pass(GPUMaterial *material,
|
||||
struct GPUNodeGraph *graph,
|
||||
eGPUMaterialEngine engine,
|
||||
GPUCodegenCallbackFn finalize_source_cb,
|
||||
void *thunk,
|
||||
bool optimize_graph);
|
||||
|
|
|
@ -99,8 +99,9 @@ struct GPUMaterial {
|
|||
eGPUMaterialStatus status;
|
||||
/** Some flags about the nodetree & the needed resources. */
|
||||
eGPUMaterialFlag flag;
|
||||
/* Identify shader variations (shadow, probe, world background...).
|
||||
* Should be unique even across render engines. */
|
||||
/** The engine type this material is compiled for. */
|
||||
eGPUMaterialEngine engine;
|
||||
/* Identify shader variations (shadow, probe, world background...) */
|
||||
uint64_t uuid;
|
||||
/* Number of generated function. */
|
||||
int generated_function_len;
|
||||
|
@ -821,6 +822,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
|
|||
bNodeTree *ntree,
|
||||
ListBase *gpumaterials,
|
||||
const char *name,
|
||||
eGPUMaterialEngine engine,
|
||||
uint64_t shader_uuid,
|
||||
bool is_volume_shader,
|
||||
bool is_lookdev,
|
||||
|
@ -830,7 +832,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
|
|||
/* Search if this material is not already compiled. */
|
||||
LISTBASE_FOREACH (LinkData *, link, gpumaterials) {
|
||||
GPUMaterial *mat = (GPUMaterial *)link->data;
|
||||
if (mat->uuid == shader_uuid) {
|
||||
if (mat->uuid == shader_uuid && mat->engine == engine) {
|
||||
return mat;
|
||||
}
|
||||
}
|
||||
|
@ -838,6 +840,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
|
|||
GPUMaterial *mat = static_cast<GPUMaterial *>(MEM_callocN(sizeof(GPUMaterial), "GPUMaterial"));
|
||||
mat->ma = ma;
|
||||
mat->scene = scene;
|
||||
mat->engine = engine;
|
||||
mat->uuid = shader_uuid;
|
||||
mat->flag = GPU_MATFLAG_UPDATED;
|
||||
mat->status = GPU_MAT_CREATED;
|
||||
|
@ -860,7 +863,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
|
|||
|
||||
{
|
||||
/* Create source code and search pass cache for an already compiled version. */
|
||||
mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, false);
|
||||
mat->pass = GPU_generate_pass(mat, &mat->graph, engine, callback, thunk, false);
|
||||
|
||||
if (mat->pass == nullptr) {
|
||||
/* We had a cache hit and the shader has already failed to compile. */
|
||||
|
@ -891,7 +894,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
|
|||
mat->optimize_pass_info.callback = callback;
|
||||
mat->optimize_pass_info.thunk = thunk;
|
||||
#else
|
||||
mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, true);
|
||||
mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, engine, callback, thunk, true);
|
||||
if (mat->optimized_pass == nullptr) {
|
||||
/* Failed to create optimized pass. */
|
||||
gpu_node_graph_free_nodes(&mat->graph);
|
||||
|
@ -1024,8 +1027,12 @@ void GPU_material_optimize(GPUMaterial *mat)
|
|||
* optimal, as these do not benefit from caching, due to baked constants. However, this could
|
||||
* possibly be cause for concern for certain cases. */
|
||||
if (!mat->optimized_pass) {
|
||||
mat->optimized_pass = GPU_generate_pass(
|
||||
mat, &mat->graph, mat->optimize_pass_info.callback, mat->optimize_pass_info.thunk, true);
|
||||
mat->optimized_pass = GPU_generate_pass(mat,
|
||||
&mat->graph,
|
||||
mat->engine,
|
||||
mat->optimize_pass_info.callback,
|
||||
mat->optimize_pass_info.thunk,
|
||||
true);
|
||||
BLI_assert(mat->optimized_pass);
|
||||
}
|
||||
#else
|
||||
|
@ -1097,7 +1104,8 @@ void GPU_materials_free(Main *bmain)
|
|||
BKE_material_defaults_free_gpu();
|
||||
}
|
||||
|
||||
GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_function_cb,
|
||||
GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine,
|
||||
ConstructGPUMaterialFn construct_function_cb,
|
||||
GPUCodegenCallbackFn generate_code_function_cb,
|
||||
void *thunk)
|
||||
{
|
||||
|
@ -1110,6 +1118,7 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
|
|||
material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
|
||||
material->optimized_pass = nullptr;
|
||||
material->default_mat = nullptr;
|
||||
material->engine = engine;
|
||||
|
||||
/* Construct the material graph by adding and linking the necessary GPU material nodes. */
|
||||
construct_function_cb(thunk, material);
|
||||
|
@ -1119,7 +1128,7 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
|
|||
|
||||
/* Lookup an existing pass in the cache or generate a new one. */
|
||||
material->pass = GPU_generate_pass(
|
||||
material, &material->graph, generate_code_function_cb, thunk, false);
|
||||
material, &material->graph, material->engine, generate_code_function_cb, thunk, false);
|
||||
material->optimized_pass = nullptr;
|
||||
|
||||
/* The pass already exists in the pass cache but its shader already failed to compile. */
|
||||
|
|
Loading…
Reference in New Issue