Metal/EEVEE: Resolve rendering artifacts in EEVEE with Intel GPUs on macOS. #104700

Closed
Jason Fielder wants to merge 1 commits from Jason-Fielder/blender:EEVEE_Intel_Fixes_3 into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
7 changed files with 205 additions and 32 deletions

View File

@ -242,6 +242,8 @@ typedef struct EEVEE_PassList {
struct DRWPass *volumetric_accum_ps;
struct DRWPass *ssr_raytrace;
struct DRWPass *ssr_resolve;
struct DRWPass *ssr_resolve_probe;
struct DRWPass *ssr_resolve_refl;
struct DRWPass *sss_blur_ps;
struct DRWPass *sss_resolve_ps;
struct DRWPass *sss_translucency_ps;
@ -700,6 +702,9 @@ typedef struct EEVEE_EffectsInfo {
struct GPUTexture *ssr_specrough_input;
struct GPUTexture *ssr_hit_output;
struct GPUTexture *ssr_hit_depth;
/* Intel devices require a split execution due to shader issue */
bool use_split_ssr_pass;
/* Temporal Anti Aliasing */
int taa_reproject_sample;
int taa_current_sample;
@ -1208,6 +1213,8 @@ struct GPUShader *EEVEE_shaders_effect_ambient_occlusion_sh_get(void);
struct GPUShader *EEVEE_shaders_effect_ambient_occlusion_debug_sh_get(void);
struct GPUShader *EEVEE_shaders_effect_reflection_trace_sh_get(void);
struct GPUShader *EEVEE_shaders_effect_reflection_resolve_sh_get(void);
struct GPUShader *EEVEE_shaders_effect_reflection_resolve_probe_sh_get(void);
struct GPUShader *EEVEE_shaders_effect_reflection_resolve_refl_sh_get(void);
struct GPUShader *EEVEE_shaders_renderpasses_post_process_sh_get(void);
struct GPUShader *EEVEE_shaders_cryptomatte_sh_get(bool is_hair);
struct GPUShader *EEVEE_shaders_shadow_sh_get(void);

View File

@ -14,6 +14,7 @@
#include "DEG_depsgraph_query.h"
#include "GPU_platform.h"
#include "GPU_texture.h"
#include "eevee_private.h"
@ -86,6 +87,12 @@ int EEVEE_screen_raytrace_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
GPU_ATTACHMENT_TEXTURE(effects->ssr_hit_depth),
});
/* NOTE(Metal): Intel GPUs rendering with Metal require the reflections pass to be split
* into two separate phases. This reduces the individual complexity of each shader
* invocation. */
effects->use_split_ssr_pass = GPU_type_matches_ex(
GPU_DEVICE_INTEL, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL);
return EFFECT_SSR | EFFECT_NORMAL_BUFFER | EFFECT_RADIANCE_BUFFER | EFFECT_DOUBLE_BUFFER |
((use_refraction) ? EFFECT_REFRACT : 0);
}
@ -145,30 +152,76 @@ void EEVEE_screen_raytrace_cache_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *v
eGPUSamplerState no_filter = GPU_SAMPLER_DEFAULT;
DRW_PASS_CREATE(psl->ssr_resolve, DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ADD);
grp = DRW_shgroup_create(resolve_shader, psl->ssr_resolve);
DRW_shgroup_uniform_texture_ref(grp, "normalBuffer", &effects->ssr_normal_input);
DRW_shgroup_uniform_texture_ref(grp, "specroughBuffer", &effects->ssr_specrough_input);
DRW_shgroup_uniform_texture_ref(grp, "probeCubes", &lcache->cube_tx.tex);
DRW_shgroup_uniform_texture_ref(grp, "probePlanars", &vedata->txl->planar_pool);
DRW_shgroup_uniform_texture_ref(grp, "planarDepth", &vedata->txl->planar_depth);
DRW_shgroup_uniform_texture_ref_ex(grp, "hitBuffer", &effects->ssr_hit_output, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "hitDepth", &effects->ssr_hit_depth, no_filter);
DRW_shgroup_uniform_texture_ref(grp, "colorBuffer", &txl->filtered_radiance);
DRW_shgroup_uniform_texture_ref(grp, "maxzBuffer", &txl->maxzbuffer);
DRW_shgroup_uniform_texture_ref(grp, "shadowCubeTexture", &sldata->shadow_cube_pool);
DRW_shgroup_uniform_texture_ref(grp, "shadowCascadeTexture", &sldata->shadow_cascade_pool);
DRW_shgroup_uniform_texture(grp, "utilTex", EEVEE_materials_get_util_tex());
DRW_shgroup_uniform_block(grp, "light_block", sldata->light_ubo);
DRW_shgroup_uniform_block(grp, "shadow_block", sldata->shadow_ubo);
DRW_shgroup_uniform_block(grp, "grid_block", sldata->grid_ubo);
DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo);
DRW_shgroup_uniform_block(grp, "planar_block", sldata->planar_ubo);
DRW_shgroup_uniform_block(grp, "common_block", sldata->common_ubo);
DRW_shgroup_uniform_block(grp, "renderpass_block", sldata->renderpass_ubo.combined);
DRW_shgroup_uniform_int(grp, "samplePoolOffset", &effects->taa_current_sample, 1);
DRW_shgroup_uniform_texture_ref(grp, "horizonBuffer", &effects->gtao_horizons);
DRW_shgroup_call_procedural_triangles(grp, NULL, 1);
if (effects->use_split_ssr_pass) {
/* Prepare passes for split reflections resolve variant. */
for (int i = 0; i < 2; i++) {
if (i == 0) {
/* Prepare Reflection Probes resolve pass. */
struct GPUShader *resolve_shader_probe =
EEVEE_shaders_effect_reflection_resolve_probe_sh_get();
DRW_PASS_CREATE(psl->ssr_resolve_probe, DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ADD);
grp = DRW_shgroup_create(resolve_shader_probe, psl->ssr_resolve_probe);
}
else if (i == 1) {
/* Prepare SSR resolve pass. */
struct GPUShader *resolve_shader_refl =
EEVEE_shaders_effect_reflection_resolve_refl_sh_get();
DRW_PASS_CREATE(psl->ssr_resolve_refl, DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ADD);
grp = DRW_shgroup_create(resolve_shader_refl, psl->ssr_resolve_refl);
}
DRW_shgroup_uniform_texture_ref(grp, "normalBuffer", &effects->ssr_normal_input);
DRW_shgroup_uniform_texture_ref(grp, "specroughBuffer", &effects->ssr_specrough_input);
DRW_shgroup_uniform_texture_ref(grp, "probeCubes", &lcache->cube_tx.tex);
DRW_shgroup_uniform_texture_ref(grp, "probePlanars", &vedata->txl->planar_pool);
DRW_shgroup_uniform_texture_ref(grp, "planarDepth", &vedata->txl->planar_depth);
DRW_shgroup_uniform_texture_ref_ex(grp, "hitBuffer", &effects->ssr_hit_output, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "hitDepth", &effects->ssr_hit_depth, no_filter);
DRW_shgroup_uniform_texture_ref(grp, "colorBuffer", &txl->filtered_radiance);
DRW_shgroup_uniform_texture_ref(grp, "maxzBuffer", &txl->maxzbuffer);
DRW_shgroup_uniform_texture_ref(grp, "shadowCubeTexture", &sldata->shadow_cube_pool);
DRW_shgroup_uniform_texture_ref(grp, "shadowCascadeTexture", &sldata->shadow_cascade_pool);
DRW_shgroup_uniform_texture(grp, "utilTex", EEVEE_materials_get_util_tex());
DRW_shgroup_uniform_block(grp, "light_block", sldata->light_ubo);
DRW_shgroup_uniform_block(grp, "shadow_block", sldata->shadow_ubo);
DRW_shgroup_uniform_block(grp, "grid_block", sldata->grid_ubo);
DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo);
DRW_shgroup_uniform_block(grp, "planar_block", sldata->planar_ubo);
DRW_shgroup_uniform_block(grp, "common_block", sldata->common_ubo);
DRW_shgroup_uniform_block(grp, "renderpass_block", sldata->renderpass_ubo.combined);
DRW_shgroup_uniform_int(grp, "samplePoolOffset", &effects->taa_current_sample, 1);
DRW_shgroup_uniform_texture_ref(grp, "horizonBuffer", &effects->gtao_horizons);
DRW_shgroup_call_procedural_triangles(grp, NULL, 1);
}
}
else {
/* Prepare standard reflections resolve pass. */
DRW_PASS_CREATE(psl->ssr_resolve, DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_ADD);
grp = DRW_shgroup_create(resolve_shader, psl->ssr_resolve);
DRW_shgroup_uniform_texture_ref(grp, "normalBuffer", &effects->ssr_normal_input);
DRW_shgroup_uniform_texture_ref(grp, "specroughBuffer", &effects->ssr_specrough_input);
DRW_shgroup_uniform_texture_ref(grp, "probeCubes", &lcache->cube_tx.tex);
DRW_shgroup_uniform_texture_ref(grp, "probePlanars", &vedata->txl->planar_pool);
DRW_shgroup_uniform_texture_ref(grp, "planarDepth", &vedata->txl->planar_depth);
DRW_shgroup_uniform_texture_ref_ex(grp, "hitBuffer", &effects->ssr_hit_output, no_filter);
DRW_shgroup_uniform_texture_ref_ex(grp, "hitDepth", &effects->ssr_hit_depth, no_filter);
DRW_shgroup_uniform_texture_ref(grp, "colorBuffer", &txl->filtered_radiance);
DRW_shgroup_uniform_texture_ref(grp, "maxzBuffer", &txl->maxzbuffer);
DRW_shgroup_uniform_texture_ref(grp, "shadowCubeTexture", &sldata->shadow_cube_pool);
DRW_shgroup_uniform_texture_ref(grp, "shadowCascadeTexture", &sldata->shadow_cascade_pool);
DRW_shgroup_uniform_texture(grp, "utilTex", EEVEE_materials_get_util_tex());
DRW_shgroup_uniform_block(grp, "light_block", sldata->light_ubo);
DRW_shgroup_uniform_block(grp, "shadow_block", sldata->shadow_ubo);
DRW_shgroup_uniform_block(grp, "grid_block", sldata->grid_ubo);
DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo);
DRW_shgroup_uniform_block(grp, "planar_block", sldata->planar_ubo);
DRW_shgroup_uniform_block(grp, "common_block", sldata->common_ubo);
DRW_shgroup_uniform_block(grp, "renderpass_block", sldata->renderpass_ubo.combined);
DRW_shgroup_uniform_int(grp, "samplePoolOffset", &effects->taa_current_sample, 1);
DRW_shgroup_uniform_texture_ref(grp, "horizonBuffer", &effects->gtao_horizons);
DRW_shgroup_call_procedural_triangles(grp, NULL, 1);
}
}
}
@ -205,7 +258,15 @@ void EEVEE_reflection_compute(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *v
EEVEE_effects_downsample_radiance_buffer(vedata, txl->color_double_buffer);
GPU_framebuffer_bind(fbl->main_color_fb);
DRW_draw_pass(psl->ssr_resolve);
if (effects->use_split_ssr_pass) {
/* Trace reflections for probes and SSR independently */
DRW_draw_pass(psl->ssr_resolve_probe);
DRW_draw_pass(psl->ssr_resolve_refl);
}
else {
DRW_draw_pass(psl->ssr_resolve);
}
/* Restore */
GPU_framebuffer_bind(fbl->main_fb);
@ -244,6 +305,12 @@ void EEVEE_reflection_output_accumulate(EEVEE_ViewLayerData *UNUSED(sldata), EEV
GPU_framebuffer_clear_color(fbl->ssr_accum_fb, clear);
}
DRW_draw_pass(psl->ssr_resolve);
if (effects->use_split_ssr_pass) {
DRW_draw_pass(psl->ssr_resolve_probe);
DRW_draw_pass(psl->ssr_resolve_refl);
}
else {
DRW_draw_pass(psl->ssr_resolve);
}
}
}

View File

@ -113,6 +113,8 @@ static struct {
/* Screen Space Reflection */
struct GPUShader *reflection_trace;
struct GPUShader *reflection_resolve;
struct GPUShader *reflection_resolve_probe;
struct GPUShader *reflection_resolve_raytrace;
/* Shadows */
struct GPUShader *shadow_sh;
@ -658,6 +660,24 @@ struct GPUShader *EEVEE_shaders_effect_reflection_resolve_sh_get(void)
return e_data.reflection_resolve;
}
struct GPUShader *EEVEE_shaders_effect_reflection_resolve_probe_sh_get(void)
{
if (e_data.reflection_resolve_probe == NULL) {
e_data.reflection_resolve_probe = DRW_shader_create_from_info_name(
"eevee_legacy_effect_reflection_resolve_probe");
}
return e_data.reflection_resolve_probe;
}
struct GPUShader *EEVEE_shaders_effect_reflection_resolve_refl_sh_get(void)
{
if (e_data.reflection_resolve_raytrace == NULL) {
e_data.reflection_resolve_raytrace = DRW_shader_create_from_info_name(
"eevee_legacy_effect_reflection_resolve_ssr");
}
return e_data.reflection_resolve_raytrace;
}
/** \} */
/* -------------------------------------------------------------------- */
@ -1281,8 +1301,8 @@ static char *eevee_get_defines(int options)
/* Global EEVEE defines included for CreateInfo shaders via `engine_eevee_shared_defines.h` in
* eevee_legacy_common_lib CreateInfo. */
/* Defines which affect bindings are instead injected via CreateInfo permutations. To specify new
* permutations, references to GPUShaderCreateInfo variants should be fetched in:
/* Defines which affect bindings are instead injected via CreateInfo permutations. To specify
* new permutations, references to GPUShaderCreateInfo variants should be fetched in:
* `eevee_get_vert/geom/frag_info(..)`
*
* CreateInfo's for EEVEE materials are declared in:
@ -1495,6 +1515,8 @@ void EEVEE_shaders_free(void)
}
DRW_SHADER_FREE_SAFE(e_data.reflection_trace);
DRW_SHADER_FREE_SAFE(e_data.reflection_resolve);
DRW_SHADER_FREE_SAFE(e_data.reflection_resolve_probe);
DRW_SHADER_FREE_SAFE(e_data.reflection_resolve_raytrace);
DRW_SHADER_LIB_FREE_SAFE(e_data.lib);
if (e_data.default_world) {

View File

@ -82,6 +82,21 @@ vec2 get_ao_dir(float jitter)
return vec2(cos(jitter), sin(jitter));
}
/* Certain intel drivers on macOS lose precision, resulting in rendering artifacts,
* when using standard pow(A, B) function.
* Using logarithmic identity provides higher precision results. */
#if defined(GPU_INTEL) && defined(OS_MAC)
float occlusion_pow(float a, float b)
{
return exp(b * log(a));
}
#else
float occlusion_pow(float a, float b)
{
return pow(a, b);
}
#endif
/* Return horizon angle cosine. */
float search_horizon(vec3 vI,
vec3 vP,
@ -327,7 +342,7 @@ float diffuse_occlusion(OcclusionData data, vec3 V, vec3 N, vec3 Ng)
float visibility;
occlusion_eval(data, V, N, Ng, 0.0, visibility, unused_error, unused);
/* Scale by user factor */
visibility = pow(saturate(visibility), aoFactor);
visibility = occlusion_pow(saturate(visibility), aoFactor);
return visibility;
}
@ -340,7 +355,7 @@ float diffuse_occlusion(
visibility = gtao_multibounce(visibility, albedo);
/* Scale by user factor */
visibility = pow(saturate(visibility), aoFactor);
visibility = occlusion_pow(saturate(visibility), aoFactor);
return visibility;
}
@ -404,7 +419,7 @@ float specular_occlusion(
visibility = mix(specular_occlusion, 1.0, tmp);
/* Scale by user factor */
visibility = pow(saturate(visibility), aoFactor);
visibility = occlusion_pow(saturate(visibility), aoFactor);
return visibility;
}

View File

@ -88,10 +88,13 @@ void closure_Glossy_light_eval(ClosureInputGlossy cl_in,
ClosureLightData light,
inout ClosureOutputGlossy cl_out)
{
/* Ensure specular light contribution only gets applied once when running split pass */
#ifndef RESOLVE_SSR
float radiance = light_specular(light.data, cl_eval.ltc_mat, cl_in.N, cl_common.V, light.L);
radiance *= cl_eval.ltc_brdf_scale;
cl_out.radiance += light.data.l_color *
(light.data.l_spec * light.vis * light.contact_shadow * radiance);
#endif
}
void closure_Glossy_planar_eval(ClosureInputGlossy cl_in,
@ -117,9 +120,12 @@ void closure_Glossy_cubemap_eval(ClosureInputGlossy cl_in,
ClosureCubemapData cube,
inout ClosureOutputGlossy cl_out)
{
/* Ensure cubemap probes contribution only gets applied once when running split pass */
#ifndef RESOLVE_SSR
vec3 probe_radiance = probe_evaluate_cube(
cube.id, cl_common.P, cl_eval.probe_sampling_dir, cl_in.roughness);
cl_out.radiance += cube.attenuation * probe_radiance;
#endif
}
void closure_Glossy_indirect_end(ClosureInputGlossy cl_in,
@ -127,6 +133,8 @@ void closure_Glossy_indirect_end(ClosureInputGlossy cl_in,
ClosureEvalCommon cl_common,
inout ClosureOutputGlossy cl_out)
{
/* Ensure specular contribution only gets applied once when running split pass */
#ifndef RESOLVE_SSR
/* If not enough light has been accumulated from probes, use the world specular cubemap
* to fill the remaining energy needed. */
if (specToggle && cl_common.specular_accum > 0.0) {
@ -136,8 +144,17 @@ void closure_Glossy_indirect_end(ClosureInputGlossy cl_in,
/* Apply occlusion on distant lighting. */
cl_out.radiance *= cl_eval.spec_occlusion;
#endif
/* Apply Raytrace reflections after occlusion since they are direct, local reflections. */
#if defined(RESOLVE_PROBE)
/* NO OP - output base radiance*/
#elif defined(RESOLVE_SSR)
/* Output only raytrace radiance */
cl_out.radiance = cl_eval.raytrace_radiance;
#else
/* Standard resolve */
cl_out.radiance += cl_eval.raytrace_radiance;
#endif
}
void closure_Glossy_eval_end(ClosureInputGlossy cl_in,

View File

@ -39,7 +39,13 @@ vec4 ssr_get_scene_color_and_mask(vec3 hit_vP, int planar_index, float mip)
color = textureLod(probePlanars, vec3(uv, planar_index), mip).rgb;
}
else {
/* Do not sample scene buffer if running probe pass in split reflection mode. */
#ifndef RESOLVE_PROBE
color = textureLod(colorBuffer, uv * hizUvScale.xy, mip).rgb;
#else
color = vec3(0.0);
#endif
}
/* Clamped brightness. */
@ -223,10 +229,26 @@ CLOSURE_EVAL_FUNCTION_DECLARE_1(ssr_resolve, Glossy)
void main()
{
float depth = textureLod(maxzBuffer, uvcoordsvar.xy * hizUvScale.xy, 0.0).r;
#if defined(GPU_INTEL) && defined(GPU_METAL)
float factor = 1.0f;
#endif
if (depth == 1.0) {
#if defined(GPU_INTEL) && defined(GPU_METAL)
/* Divergent code execution (and sampling) causes corruption due to undefined
* derivative/sampling behaviour, on Intel GPUs. Using a mask factor to ensure shaders do not
* diverge and only the final result is masked. */
factor = 0.0f;
#else
/* Note: In the Metal API, prior to Metal 2.3, Discard is not an explicit return and can
* produce undefined behaviour. This is especially prominent with derivatives if control-flow
* divergence is present.
*
* Adding a return call eliminates undefined behaviour and a later out-of-bounds read causing
* a crash on AMD platforms.
* This behaviour can also affect OpenGL on certain devices. */
discard;
return;
#endif
}
ivec2 texel = ivec2(gl_FragCoord.xy);
@ -235,8 +257,12 @@ void main()
float roughness = speccol_roughness.a;
if (max_v3(brdf) <= 0.0) {
#if defined(GPU_INTEL) && defined(GPU_METAL)
factor = 0.0f;
#else
discard;
return;
#endif
}
FragDepth = depth;
@ -258,5 +284,11 @@ void main()
* passed as specular color. */
CLOSURE_EVAL_FUNCTION_1(ssr_resolve, Glossy);
/* Default single pass resolve */
fragColor = vec4(out_Glossy_0.radiance * brdf, 1.0);
#if defined(GPU_INTEL) && defined(GPU_METAL)
/* Due to non-uniform control flow with discard, Intel on macOS requires blending factor
* to discard unwanted fragments. */
fragColor *= factor;
#endif
}

View File

@ -227,6 +227,19 @@ GPU_SHADER_CREATE_INFO(eevee_legacy_effect_reflection_resolve)
.auto_resource_location(true)
.do_static_compilation(true);
/* Split reflection resolve support for Intel-based MacBooks.*/
GPU_SHADER_CREATE_INFO(eevee_legacy_effect_reflection_resolve_probe)
.define("RESOLVE_PROBE")
.additional_info("eevee_legacy_effect_reflection_resolve")
.auto_resource_location(true)
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(eevee_legacy_effect_reflection_resolve_ssr)
.define("RESOLVE_SSR")
.additional_info("eevee_legacy_effect_reflection_resolve")
.auto_resource_location(true)
.do_static_compilation(true);
/* EEVEE_shaders_subsurface_first_pass_sh_get */
GPU_SHADER_CREATE_INFO(eevee_legacy_shader_effect_subsurface_common)
.additional_info("draw_fullscreen")