From 98500521d46956931f1fae4a3459b1d133d2695c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cle=CC=81ment=20Foucault?= Date: Thu, 31 Oct 2024 09:17:34 +0100 Subject: [PATCH] EEVEE: Use half float in some part of the pipeline The intent is to reduce register pressure on devices that supports these types. --- .../shaders/eevee_bxdf_diffuse_lib.glsl | 4 +- .../eevee_next/shaders/eevee_bxdf_lib.glsl | 12 ++--- .../shaders/eevee_bxdf_microfacet_lib.glsl | 7 +-- .../eevee_next/shaders/eevee_closure_lib.glsl | 4 +- .../shaders/eevee_deferred_capture_frag.glsl | 11 ++-- .../shaders/eevee_deferred_light_frag.glsl | 26 ++++----- .../shaders/eevee_deferred_planar_frag.glsl | 18 +++---- .../eevee_display_lightprobe_planar_frag.glsl | 2 +- .../eevee_display_lightprobe_sphere_frag.glsl | 3 +- .../eevee_next/shaders/eevee_gbuffer_lib.glsl | 54 +++++++++---------- .../shaders/eevee_light_eval_lib.glsl | 12 ++--- .../shaders/eevee_lightprobe_eval_lib.glsl | 33 ++++++------ .../shaders/eevee_lightprobe_sphere_lib.glsl | 9 ++-- .../eevee_lightprobe_volume_ray_comp.glsl | 2 +- .../eevee_ray_trace_fallback_comp.glsl | 2 +- .../shaders/eevee_ray_trace_planar_comp.glsl | 2 +- .../shaders/eevee_ray_trace_screen_comp.glsl | 2 +- .../shaders/eevee_shadow_tracing_lib.glsl | 24 ++++----- .../shaders/eevee_subsurface_lib.glsl | 8 +-- .../shaders/eevee_surf_world_frag.glsl | 2 +- .../shaders/eevee_surfel_light_comp.glsl | 5 +- .../shaders/eevee_surfel_ray_comp.glsl | 2 +- .../common/gpu_shader_math_vector_lib.glsl | 36 +++++++++++++ 23 files changed, 160 insertions(+), 120 deletions(-) diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_diffuse_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_diffuse_lib.glsl index a86b4219308..74db43bdad7 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_diffuse_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_diffuse_lib.glsl @@ -55,7 +55,7 @@ LightProbeRay bxdf_diffuse_lightprobe(vec3 N) ClosureLight bxdf_diffuse_light(ClosureUndetermined cl) { ClosureLight light; - light.ltc_mat = vec4(1.0, 0.0, 0.0, 1.0); /* No transform, just plain cosine distribution. */ + light.ltc_mat = half4(1.0, 0.0, 0.0, 1.0); /* No transform, just plain cosine distribution. */ light.N = cl.N; light.type = LIGHT_DIFFUSE; return light; @@ -144,7 +144,7 @@ ClosureLight bxdf_translucent_light(ClosureUndetermined cl, vec3 V, float thickn * only focusing the light a tiny bit. Using the flipped normal is good enough approximation. */ ClosureLight light; - light.ltc_mat = vec4(1.0, 0.0, 0.0, 1.0); /* No transform, just plain cosine distribution. */ + light.ltc_mat = half4(1.0, 0.0, 0.0, 1.0); /* No transform, just plain cosine distribution. */ light.N = -cl.N; light.type = (thickness > 0.0) ? LIGHT_TRANSLUCENT_WITH_THICKNESS : LIGHT_DIFFUSE; return light; diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_lib.glsl index 04ea0689986..e41e3308e2f 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_lib.glsl @@ -13,7 +13,7 @@ #include "gpu_shader_utildefines_lib.glsl" struct BsdfSample { - packed_float3 direction; + float3 direction; float pdf; }; @@ -26,14 +26,14 @@ struct BsdfEval { struct ClosureLight { /* LTC matrix. */ - packed_float4 ltc_mat; + half4 ltc_mat; /* Shading normal. */ packed_float3 N; /* Enum used as index to fetch which light intensity to use [0..3]. */ LightingType type; /* Output both shadowed and unshadowed for shadow denoising. */ - packed_float3 light_shadowed; - packed_float3 light_unshadowed; + half3 light_shadowed; + half3 light_unshadowed; }; /* Represent an approximation of a bunch of rays from a BSDF. */ @@ -49,9 +49,9 @@ struct LightProbeRay { /* General purpose 3D ray. */ struct Ray { - packed_float3 direction; - float max_time; packed_float3 origin; + float max_time; + float3 direction; }; /* -------------------------------------------------------------------- */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_microfacet_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_microfacet_lib.glsl index b2f6094256f..29fe2ff934f 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_microfacet_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_microfacet_lib.glsl @@ -396,7 +396,8 @@ ClosureLight bxdf_ggx_light_reflection(ClosureReflection cl, vec3 V) { float cos_theta = dot(cl.N, V); ClosureLight light; - light.ltc_mat = utility_tx_sample_lut(utility_tx, cos_theta, cl.roughness, UTIL_LTC_MAT_LAYER); + light.ltc_mat = half4( + utility_tx_sample_lut(utility_tx, cos_theta, cl.roughness, UTIL_LTC_MAT_LAYER)); light.N = cl.N; light.type = LIGHT_SPECULAR; return light; @@ -417,8 +418,8 @@ ClosureLight bxdf_ggx_light_transmission(ClosureRefraction cl, vec3 V, float thi float cos_theta = dot(-cl.N, R); ClosureLight light; - light.ltc_mat = utility_tx_sample_lut( - utility_tx, cos_theta, perceptual_roughness, UTIL_LTC_MAT_LAYER); + light.ltc_mat = half4( + utility_tx_sample_lut(utility_tx, cos_theta, perceptual_roughness, UTIL_LTC_MAT_LAYER)); light.N = -cl.N; light.type = LIGHT_TRANSMISSION; return light; diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_closure_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_closure_lib.glsl index 0068f50813b..9bf7fc85c71 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_closure_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_closure_lib.glsl @@ -118,8 +118,8 @@ ClosureLight closure_light_new_ex(ClosureUndetermined cl, break; } } - cl_light.light_shadowed = vec3(0.0); - cl_light.light_unshadowed = vec3(0.0); + cl_light.light_shadowed = half3(0.0); + cl_light.light_unshadowed = half3(0.0); return cl_light; } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_deferred_capture_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_deferred_capture_frag.glsl index 0080bdcc011..bf939839532 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_deferred_capture_frag.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_deferred_capture_frag.glsl @@ -64,20 +64,21 @@ void main() uchar receiver_light_set = gbuffer_light_link_receiver_unpack(gbuf.header); light_eval_reflection(stack, P, Ng, V, vPz, receiver_light_set); - vec3 radiance_front = stack.cl[0].light_shadowed; + half3 radiance_front = stack.cl[0].light_shadowed; stack.cl[0] = closure_light_new(cl_transmit, V, gbuf.thickness); light_eval_transmission(stack, P, Ng, V, vPz, gbuf.thickness, receiver_light_set); - vec3 radiance_back = stack.cl[0].light_shadowed; + half3 radiance_back = stack.cl[0].light_shadowed; /* Indirect light. */ /* Can only load irradiance to avoid dependency loop with the reflection probe. */ SphericalHarmonicL1 sh = lightprobe_volume_sample(P, V, Ng); - radiance_front += spherical_harmonics_evaluate_lambert(Ng, sh); + radiance_front += half3(spherical_harmonics_evaluate_lambert(Ng, sh)); /* TODO(fclem): Correct transmission eval. */ - radiance_back += spherical_harmonics_evaluate_lambert(-Ng, sh); + radiance_back += half3(spherical_harmonics_evaluate_lambert(-Ng, sh)); - out_radiance = vec4(radiance_front * albedo_front + radiance_back * albedo_back, 0.0); + out_radiance = vec4( + vec3(radiance_front * half3(albedo_front) + radiance_back * half3(albedo_back)), 0.0); } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_deferred_light_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_deferred_light_frag.glsl index 15c62925bbb..31ee4f1a4f8 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_deferred_light_frag.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_deferred_light_frag.glsl @@ -76,7 +76,7 @@ void main() if (use_transmission) { ClosureUndetermined cl_transmit = gbuffer_closure_get(gbuf, 0); #if 1 /* TODO Limit to SSS. */ - vec3 sss_reflect_shadowed, sss_reflect_unshadowed; + half3 sss_reflect_shadowed, sss_reflect_unshadowed; if (cl_transmit.type == CLOSURE_BSSRDF_BURLEY_ID) { sss_reflect_shadowed = stack.cl[0].light_shadowed; sss_reflect_unshadowed = stack.cl[0].light_unshadowed; @@ -91,8 +91,8 @@ void main() #if 1 /* TODO Limit to SSS. */ if (cl_transmit.type == CLOSURE_BSSRDF_BURLEY_ID) { /* Apply transmission profile onto transmitted light and sum with reflected light. */ - vec3 sss_profile = subsurface_transmission(to_closure_subsurface(cl_transmit).sss_radius, - abs(gbuf.thickness)); + half3 sss_profile = subsurface_transmission(to_closure_subsurface(cl_transmit).sss_radius, + abs(gbuf.thickness)); stack.cl[0].light_shadowed *= sss_profile; stack.cl[0].light_unshadowed *= sss_profile; stack.cl[0].light_shadowed += sss_reflect_shadowed; @@ -102,13 +102,13 @@ void main() } if (render_pass_shadow_id != -1) { - vec3 radiance_shadowed = vec3(0); - vec3 radiance_unshadowed = vec3(0); + half3 radiance_shadowed = half3(0); + half3 radiance_unshadowed = half3(0); for (int i = 0; i < LIGHT_CLOSURE_EVAL_COUNT && i < gbuf.closure_count; i++) { radiance_shadowed += closure_light_get(stack, i).light_shadowed; radiance_unshadowed += closure_light_get(stack, i).light_unshadowed; } - vec3 shadows = radiance_shadowed * safe_rcp(radiance_unshadowed); + half3 shadows = radiance_shadowed * safe_rcp(radiance_unshadowed); output_renderpass_value(render_pass_shadow_id, average(shadows)); } @@ -120,24 +120,24 @@ void main() for (int i = 0; i < LIGHT_CLOSURE_EVAL_COUNT && i < gbuf.closure_count; i++) { ClosureUndetermined cl = gbuffer_closure_get(gbuf, i); - vec3 indirect_light = lightprobe_eval(samp, cl, P, V, gbuf.thickness); + half3 indirect_light = lightprobe_eval(samp, cl, P, V, gbuf.thickness); int layer_index = gbuffer_closure_get_bin_index(gbuf, i); - vec3 direct_light = closure_light_get(stack, i).light_shadowed; + half3 direct_light = closure_light_get(stack, i).light_shadowed; if (use_split_indirect) { - write_radiance_indirect(layer_index, texel, indirect_light); - write_radiance_direct(layer_index, texel, direct_light); + write_radiance_indirect(layer_index, texel, vec3(indirect_light)); + write_radiance_direct(layer_index, texel, vec3(direct_light)); } else { - write_radiance_direct(layer_index, texel, direct_light + indirect_light); + write_radiance_direct(layer_index, texel, vec3(direct_light + indirect_light)); } } } else { for (int i = 0; i < LIGHT_CLOSURE_EVAL_COUNT && i < gbuf.closure_count; i++) { int layer_index = gbuffer_closure_get_bin_index(gbuf, i); - vec3 direct_light = closure_light_get(stack, i).light_shadowed; - write_radiance_direct(layer_index, texel, direct_light); + half3 direct_light = closure_light_get(stack, i).light_shadowed; + write_radiance_direct(layer_index, texel, vec3(direct_light)); } } } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_deferred_planar_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_deferred_planar_frag.glsl index c46bbb0debb..b8669b958ea 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_deferred_planar_frag.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_deferred_planar_frag.glsl @@ -19,8 +19,8 @@ void main() GBufferReader gbuf = gbuffer_read(gbuf_header_tx, gbuf_closure_tx, gbuf_normal_tx, texel); - vec3 albedo_front = vec3(0.0); - vec3 albedo_back = vec3(0.0); + half3 albedo_front = half3(0.0); + half3 albedo_back = half3(0.0); for (int i = 0; i < GBUFFER_LAYER_MAX && i < gbuf.closure_count; i++) { ClosureUndetermined cl = gbuffer_closure_get(gbuf, i); @@ -28,11 +28,11 @@ void main() case CLOSURE_BSSRDF_BURLEY_ID: case CLOSURE_BSDF_DIFFUSE_ID: case CLOSURE_BSDF_MICROFACET_GGX_REFLECTION_ID: - albedo_front += cl.color; + albedo_front += half3(cl.color); break; case CLOSURE_BSDF_TRANSLUCENT_ID: case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: - albedo_back += (gbuf.thickness != 0.0) ? square(cl.color) : cl.color; + albedo_back += half3((gbuf.thickness != 0.0) ? square(cl.color) : cl.color); break; case CLOSURE_NONE_ID: /* TODO(fclem): Assert. */ @@ -59,18 +59,18 @@ void main() uchar receiver_light_set = gbuffer_light_link_receiver_unpack(gbuf.header); light_eval_reflection(stack, P, Ng, V, vPz, receiver_light_set); - vec3 radiance_front = stack.cl[0].light_shadowed; + half3 radiance_front = stack.cl[0].light_shadowed; stack.cl[0] = closure_light_new(cl_transmit, V, gbuf.thickness); light_eval_transmission(stack, P, Ng, V, vPz, gbuf.thickness, receiver_light_set); - vec3 radiance_back = stack.cl[0].light_shadowed; + half3 radiance_back = stack.cl[0].light_shadowed; /* Indirect light. */ SphericalHarmonicL1 sh = lightprobe_volume_sample(P, V, Ng); - radiance_front += spherical_harmonics_evaluate_lambert(Ng, sh); - radiance_back += spherical_harmonics_evaluate_lambert(-Ng, sh); + radiance_front += half3(spherical_harmonics_evaluate_lambert(Ng, sh)); + radiance_back += half3(spherical_harmonics_evaluate_lambert(-Ng, sh)); - out_radiance = vec4(radiance_front * albedo_front + radiance_back * albedo_back, 0.0); + out_radiance = vec4(vec3(radiance_front * albedo_front + radiance_back * albedo_back), 0.0); } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_planar_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_planar_frag.glsl index 4712afdceb4..1a56d32b0e1 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_planar_frag.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_planar_frag.glsl @@ -16,7 +16,7 @@ void main() vec3 R = -reflect(V, probe_normal); SphereProbeUvArea world_atlas_coord = reinterpret_as_atlas_coord(world_coord_packed); - out_color = lightprobe_spheres_sample(R, 0.0, world_atlas_coord); + out_color = vec4(lightprobe_spheres_sample(R, 0.0, world_atlas_coord)); } else { out_color = texture(planar_radiance_tx, vec3(uv, probe_index)); diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_sphere_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_sphere_frag.glsl index 9a5cf488ac2..8cfa8fc8f66 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_sphere_frag.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_sphere_frag.glsl @@ -20,6 +20,7 @@ void main() vec3 V = drw_world_incident_vector(P); vec3 L = reflect(-V, N); - out_color = lightprobe_spheres_sample(L, 0, lightprobe_sphere_buf[probe_index].atlas_coord); + out_color = vec4( + lightprobe_spheres_sample(L, 0, lightprobe_sphere_buf[probe_index].atlas_coord)); out_color.a = 0.0; } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_gbuffer_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_gbuffer_lib.glsl index 8223efe1914..0a4cae015cc 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_gbuffer_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_gbuffer_lib.glsl @@ -81,11 +81,11 @@ struct GBufferReader { /* Additional object information if any closure needs it. */ float thickness; /* Number of valid closure encoded in the gbuffer. */ - int closure_count; + uchar closure_count; /* Only used for book-keeping when reading. */ - int data_len; + uchar data_len; /* Only used for debugging and testing. */ - int normal_len; + uchar normal_len; }; ClosureType gbuffer_mode_to_closure_type(uint mode) @@ -126,7 +126,7 @@ uint fetchGBuffer(usampler2D tx, ivec2 texel) { return texelFetch(tx, texel, 0).r; } -vec4 fetchGBuffer(sampler2DArray tx, ivec2 texel, int layer) +vec4 fetchGBuffer(sampler2DArray tx, ivec2 texel, uchar layer) { return texelFetch(tx, ivec3(texel, layer), 0); } @@ -142,11 +142,11 @@ uint fetchGBuffer(samplerGBufferHeader tx, ivec2 texel) { return uint(0); } -vec4 fetchGBuffer(samplerGBufferClosure tx, ivec2 texel, int layer) +vec4 fetchGBuffer(samplerGBufferClosure tx, ivec2 texel, uchar layer) { return vec4(0.0); } -vec4 fetchGBuffer(samplerGBufferNormal tx, ivec2 texel, int layer) +vec4 fetchGBuffer(samplerGBufferNormal tx, ivec2 texel, uchar layer) { return vec4(0.0); } @@ -159,11 +159,11 @@ uint fetchGBuffer(samplerGBufferHeader tx, ivec2 texel) { return g_data_packed.header; } -vec4 fetchGBuffer(samplerGBufferClosure tx, ivec2 texel, int layer) +vec4 fetchGBuffer(samplerGBufferClosure tx, ivec2 texel, uchar layer) { return g_data_packed.data[layer]; } -vec4 fetchGBuffer(samplerGBufferNormal tx, ivec2 texel, int layer) +vec4 fetchGBuffer(samplerGBufferNormal tx, ivec2 texel, uchar layer) { return g_data_packed.N[layer].xyyy; } @@ -557,8 +557,8 @@ void gbuffer_closure_diffuse_skip(inout GBufferReader gbuf) gbuffer_skip_normal(gbuf); } void gbuffer_closure_diffuse_load(inout GBufferReader gbuf, - int layer, - int bin_index, + uchar layer, + uchar bin_index, samplerGBufferClosure closure_tx, samplerGBufferNormal normal_tx) { @@ -584,8 +584,8 @@ void gbuffer_closure_translucent_skip(inout GBufferReader gbuf) gbuffer_skip_normal(gbuf); } void gbuffer_closure_translucent_load(inout GBufferReader gbuf, - int layer, - int bin_index, + uchar layer, + uchar bin_index, samplerGBufferClosure closure_tx, samplerGBufferNormal normal_tx) { @@ -613,8 +613,8 @@ void gbuffer_closure_subsurface_skip(inout GBufferReader gbuf) gbuffer_skip_normal(gbuf); } void gbuffer_closure_subsurface_load(inout GBufferReader gbuf, - int layer, - int bin_index, + uchar layer, + uchar bin_index, samplerGBufferClosure closure_tx, samplerGBufferNormal normal_tx) { @@ -644,8 +644,8 @@ void gbuffer_closure_reflection_skip(inout GBufferReader gbuf) gbuffer_skip_normal(gbuf); } void gbuffer_closure_reflection_load(inout GBufferReader gbuf, - int layer, - int bin_index, + uchar layer, + uchar bin_index, samplerGBufferClosure closure_tx, samplerGBufferNormal normal_tx) { @@ -675,8 +675,8 @@ void gbuffer_closure_refraction_skip(inout GBufferReader gbuf) gbuffer_skip_normal(gbuf); } void gbuffer_closure_refraction_load(inout GBufferReader gbuf, - int layer, - int bin_index, + uchar layer, + uchar bin_index, samplerGBufferClosure closure_tx, samplerGBufferNormal normal_tx) { @@ -714,8 +714,8 @@ void gbuffer_closure_reflection_colorless_skip(inout GBufferReader gbuf) gbuffer_skip_normal(gbuf); } void gbuffer_closure_reflection_colorless_load(inout GBufferReader gbuf, - int layer, - int bin_index, + uchar layer, + uchar bin_index, samplerGBufferClosure closure_tx, samplerGBufferNormal normal_tx) { @@ -744,8 +744,8 @@ void gbuffer_closure_refraction_colorless_skip(inout GBufferReader gbuf) gbuffer_skip_normal(gbuf); } void gbuffer_closure_refraction_colorless_load(inout GBufferReader gbuf, - int layer, - int bin_index, + uchar layer, + uchar bin_index, samplerGBufferClosure closure_tx, samplerGBufferNormal normal_tx) { @@ -931,7 +931,7 @@ ClosureType gbuffer_closure_type_get_by_bin(uint header, int bin_index) int gbuffer_closure_get_bin_index(GBufferReader gbuf, int layer_index) { int layer = 0; - for (int bin = 0; bin < GBUFFER_LAYER_MAX; bin++) { + for (uchar bin = 0; bin < GBUFFER_LAYER_MAX; bin++) { GBufferMode mode = gbuffer_header_unpack(gbuf.header, bin); /* Gbuffer header can have holes. Skip GBUF_NONE. */ if (mode != GBUF_NONE) { @@ -948,7 +948,7 @@ int gbuffer_closure_get_bin_index(GBufferReader gbuf, int layer_index) ClosureUndetermined gbuffer_closure_get_by_bin(GBufferReader gbuf, int bin_index) { int layer_index = 0; - for (int bin = 0; bin < GBUFFER_LAYER_MAX; bin++) { + for (uchar bin = 0; bin < GBUFFER_LAYER_MAX; bin++) { GBufferMode mode = gbuffer_header_unpack(gbuf.header, bin); if (bin == bin_index) { return gbuffer_closure_get(gbuf, layer_index); @@ -977,7 +977,7 @@ GBufferReader gbuffer_read(samplerGBufferHeader header_tx, gbuf.data_len = 0; gbuf.normal_len = 0; gbuf.surface_N = vec3(0.0); - for (int bin = 0; bin < GBUFFER_LAYER_MAX; bin++) { + for (uchar bin = 0; bin < GBUFFER_LAYER_MAX; bin++) { gbuffer_register_closure(gbuf, closure_new(CLOSURE_NONE_ID), bin); } @@ -991,7 +991,7 @@ GBufferReader gbuffer_read(samplerGBufferHeader header_tx, gbuf.surface_N = gbuffer_normal_unpack(fetchGBuffer(normal_tx, texel, 0).xy); bool has_additional_data = false; - for (int bin = 0; bin < GBUFFER_LAYER_MAX; bin++) { + for (uchar bin = 0; bin < GBUFFER_LAYER_MAX; bin++) { GBufferMode mode = gbuffer_header_unpack(gbuf.header, bin); switch (mode) { default: @@ -1060,7 +1060,7 @@ ClosureUndetermined gbuffer_read_bin(uint header, } GBufferMode mode; - for (int bin = 0; bin < GBUFFER_LAYER_MAX; bin++) { + for (uchar bin = 0; bin < GBUFFER_LAYER_MAX; bin++) { mode = gbuffer_header_unpack(gbuf.header, bin); if (bin >= bin_index) { diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl index abaf9757ef4..b9efc71aa3b 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_eval_lib.glsl @@ -97,17 +97,17 @@ void light_eval_single_closure(LightData light, LightVector lv, inout ClosureLight cl, vec3 V, - float attenuation, - float shadow, + half attenuation, + half shadow, const bool is_transmission) { attenuation *= light_power_get(light, cl.type); if (attenuation < 1e-30) { return; } - float ltc_result = light_ltc(utility_tx, light, cl.N, V, lv, cl.ltc_mat); - vec3 out_radiance = light.color * ltc_result; - float visibility = shadow * attenuation; + float ltc_result = light_ltc(utility_tx, light, cl.N, V, lv, vec4(cl.ltc_mat)); + half3 out_radiance = half3(light.color * ltc_result); + half visibility = shadow * attenuation; cl.light_shadowed += visibility * out_radiance; cl.light_unshadowed += attenuation * out_radiance; } @@ -148,7 +148,7 @@ void light_eval_single(uint l_idx, return; } - float shadow = 1.0; + half shadow = 1.0; if (light.tilemap_index != LIGHT_NO_SHADOW) { shadow = shadow_eval(light, is_directional, diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_eval_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_eval_lib.glsl index 4db3747da01..f888808ca73 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_eval_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_eval_lib.glsl @@ -67,10 +67,10 @@ vec3 lightprobe_sphere_parallax(SphereProbeData probe, vec3 P, vec3 L) * Return spherical sample normalized by irradiance at sample position. * This avoid most of light leaking and reduce the need for many local probes. */ -vec3 lightprobe_spherical_sample_normalized_with_parallax(LightProbeSample samp, - vec3 P, - vec3 L, - float lod) +half3 lightprobe_spherical_sample_normalized_with_parallax(LightProbeSample samp, + vec3 P, + vec3 L, + float lod) { SphereProbeData probe = lightprobe_sphere_buf[samp.spherical_id]; ReflectionProbeLowFreqLight shading_sh = lightprobe_spheres_extract_low_freq( @@ -87,36 +87,35 @@ float pdf_to_lod(float inv_pdf) return blur_pdf * 2.0; } -vec3 lightprobe_eval_direction(LightProbeSample samp, vec3 P, vec3 L, float inv_pdf) +half3 lightprobe_eval_direction(LightProbeSample samp, vec3 P, vec3 L, float inv_pdf) { - vec3 radiance_sh = lightprobe_spherical_sample_normalized_with_parallax( - samp, P, L, pdf_to_lod(inv_pdf)); - return radiance_sh; + return lightprobe_spherical_sample_normalized_with_parallax(samp, P, L, pdf_to_lod(inv_pdf)); } # ifdef EEVEE_UTILITY_TX /* TODO: Port that inside a BSSDF file. */ -vec3 lightprobe_eval(LightProbeSample samp, ClosureSubsurface cl, vec3 P, vec3 V, float thickness) +half3 lightprobe_eval(LightProbeSample samp, ClosureSubsurface cl, vec3 P, vec3 V, float thickness) { - vec3 sss_profile = subsurface_transmission(cl.sss_radius, abs(thickness)); - vec3 radiance_sh = spherical_harmonics_evaluate_lambert(cl.N, samp.volume_irradiance); - radiance_sh += spherical_harmonics_evaluate_lambert(-cl.N, samp.volume_irradiance) * sss_profile; + half3 sss_profile = subsurface_transmission(cl.sss_radius, abs(thickness)); + half3 radiance_sh = half3(spherical_harmonics_evaluate_lambert(cl.N, samp.volume_irradiance)); + radiance_sh += half3(spherical_harmonics_evaluate_lambert(-cl.N, samp.volume_irradiance)) * + sss_profile; return radiance_sh; } -vec3 lightprobe_eval( +half3 lightprobe_eval( LightProbeSample samp, ClosureUndetermined cl, vec3 P, vec3 V, float thickness) { LightProbeRay ray = bxdf_lightprobe_ray(cl, P, V, thickness); float lod = sphere_probe_roughness_to_lod(ray.perceptual_roughness); - float fac = sphere_probe_roughness_to_mix_fac(ray.perceptual_roughness); + half fac = sphere_probe_roughness_to_mix_fac(ray.perceptual_roughness); - vec3 radiance_cube = lightprobe_spherical_sample_normalized_with_parallax( + half3 radiance_cube = lightprobe_spherical_sample_normalized_with_parallax( samp, P, ray.dominant_direction, lod); - vec3 radiance_sh = spherical_harmonics_evaluate_lambert(ray.dominant_direction, - samp.volume_irradiance); + half3 radiance_sh = half3( + spherical_harmonics_evaluate_lambert(ray.dominant_direction, samp.volume_irradiance)); return mix(radiance_cube, radiance_sh, fac); } # endif diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_sphere_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_sphere_lib.glsl index 0ef40d59590..fcc18d0f08f 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_sphere_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_sphere_lib.glsl @@ -10,17 +10,18 @@ #include "gpu_shader_math_vector_lib.glsl" #ifdef SPHERE_PROBE -vec4 lightprobe_spheres_sample(vec3 L, float lod, SphereProbeUvArea uv_area) +half4 lightprobe_spheres_sample(vec3 L, float lod, SphereProbeUvArea uv_area) { float lod_min = floor(lod); float lod_max = ceil(lod); - float mix_fac = lod - lod_min; + half mix_fac = half(lod - lod_min); vec2 altas_uv_min, altas_uv_max; sphere_probe_direction_to_uv(L, lod_min, lod_max, uv_area, altas_uv_min, altas_uv_max); - vec4 color_min = textureLod(lightprobe_spheres_tx, vec3(altas_uv_min, uv_area.layer), lod_min); - vec4 color_max = textureLod(lightprobe_spheres_tx, vec3(altas_uv_max, uv_area.layer), lod_max); + half4 color_min, color_max; + color_min = half4(textureLod(lightprobe_spheres_tx, vec3(altas_uv_min, uv_area.layer), lod_min)); + color_max = half4(textureLod(lightprobe_spheres_tx, vec3(altas_uv_max, uv_area.layer), lod_max)); return mix(color_min, color_max, mix_fac); } #endif diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_volume_ray_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_volume_ray_comp.glsl index 92439da5822..558ed9c6023 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_volume_ray_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_volume_ray_comp.glsl @@ -68,7 +68,7 @@ void irradiance_capture_world(vec3 L, inout SphericalHarmonicL1 sh) if (capture_info_buf.capture_world_direct) { SphereProbeUvArea atlas_coord = capture_info_buf.world_atlas_coord; - radiance = lightprobe_spheres_sample(L, 0.0, atlas_coord).rgb; + radiance = vec3(lightprobe_spheres_sample(L, 0.0, atlas_coord).rgb); /* Clamped brightness. */ float luma = max(1e-8, reduce_max(radiance)); diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_ray_trace_fallback_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_ray_trace_fallback_comp.glsl index 8e07b79728c..1fa664a906f 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_ray_trace_fallback_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_ray_trace_fallback_comp.glsl @@ -70,7 +70,7 @@ void main() float clamp_indirect = uniform_buf.clamp.surface_indirect; samp.volume_irradiance = spherical_harmonics_clamp(samp.volume_irradiance, clamp_indirect); - vec3 radiance = lightprobe_eval_direction(samp, ray.origin, ray.direction, ray_pdf_inv); + vec3 radiance = vec3(lightprobe_eval_direction(samp, ray.origin, ray.direction, ray_pdf_inv)); /* Set point really far for correct reprojection of background. */ float hit_time = 1000.0; diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_ray_trace_planar_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_ray_trace_planar_comp.glsl index c8aceef3e4e..ec0bd9eb342 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_ray_trace_planar_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_ray_trace_planar_comp.glsl @@ -101,7 +101,7 @@ void main() vec3 Ng = ray.direction; /* Fallback to nearest light-probe. */ LightProbeSample samp = lightprobe_load(P, Ng, V); - radiance = lightprobe_eval_direction(samp, P, ray.direction, ray_pdf_inv); + radiance = vec3(lightprobe_eval_direction(samp, P, ray.direction, ray_pdf_inv)); /* Set point really far for correct reprojection of background. */ hit.time = 10000.0; } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_ray_trace_screen_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_ray_trace_screen_comp.glsl index f2c6115b4ac..1a3269bb344 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_ray_trace_screen_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_ray_trace_screen_comp.glsl @@ -141,7 +141,7 @@ void main() float clamp_indirect = uniform_buf.clamp.surface_indirect; samp.volume_irradiance = spherical_harmonics_clamp(samp.volume_irradiance, clamp_indirect); - radiance = lightprobe_eval_direction(samp, ray.origin, ray.direction, ray_pdf_inv); + radiance = vec3(lightprobe_eval_direction(samp, ray.origin, ray.direction, ray_pdf_inv)); /* Set point really far for correct reprojection of background. */ hit.time = 10000.0; } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tracing_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tracing_lib.glsl index b7e6ede3386..668f8f17c38 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tracing_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tracing_lib.glsl @@ -409,15 +409,15 @@ float shadow_normal_offset(vec3 Ng, vec3 L) * Evaluate shadowing by casting rays toward the light direction. * Returns light visibility. */ -float shadow_eval(LightData light, - const bool is_directional, - const bool is_transmission, - bool is_translucent_with_thickness, - float thickness, /* Only used if is_transmission is true. */ - vec3 P, - vec3 Ng, - int ray_count, - int ray_step_count) +half shadow_eval(LightData light, + const bool is_directional, + const bool is_transmission, + bool is_translucent_with_thickness, + float thickness, /* Only used if is_transmission is true. */ + vec3 P, + vec3 Ng, + int ray_count, + int ray_step_count) { #if defined(EEVEE_SAMPLING_DATA) && defined(EEVEE_UTILITY_TX) # ifdef GPU_FRAGMENT_SHADER @@ -474,7 +474,7 @@ float shadow_eval(LightData light, /* Don't do a any horizon clipping in this case as the closure is lit from both sides. */ lNg = (is_transmission && is_translucent_with_thickness) ? vec3(0.0) : lNg; - float surface_hit = 0.0; + half surface_hit = 0.0; for (int ray_index = 0; ray_index < ray_count && ray_index < SHADOW_MAX_RAY; ray_index++) { vec2 random_ray_2d = fract(hammersley_2d(ray_index, ray_count) + random_shadow_3d.xy); @@ -489,10 +489,10 @@ float shadow_eval(LightData light, has_hit = shadow_map_trace(clip_ray, ray_step_count, random_shadow_3d.z); } - surface_hit += float(has_hit); + surface_hit += half(has_hit); } /* Average samples. */ - return saturate(1.0 - surface_hit / float(ray_count)); + return saturate(1.0 - surface_hit / half(ray_count)); } /** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_subsurface_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_subsurface_lib.glsl index 3f33d49c265..6fd057d0a81 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_subsurface_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_subsurface_lib.glsl @@ -15,20 +15,20 @@ #ifdef EEVEE_UTILITY_TX -float subsurface_transmittance_profile(float u) +half subsurface_transmittance_profile(float u) { - return utility_tx_sample(utility_tx, vec2(u, 0.0), UTIL_SSS_TRANSMITTANCE_PROFILE_LAYER).r; + return half(utility_tx_sample(utility_tx, vec2(u, 0.0), UTIL_SSS_TRANSMITTANCE_PROFILE_LAYER).r); } /** * Returns the amount of light that can travels through a uniform medium and exit at the backface. */ -vec3 subsurface_transmission(vec3 sss_radii, float thickness) +half3 subsurface_transmission(vec3 sss_radii, float thickness) { sss_radii *= SSS_TRANSMIT_LUT_RADIUS; vec3 channels_co = saturate(thickness / sss_radii) * SSS_TRANSMIT_LUT_SCALE + SSS_TRANSMIT_LUT_BIAS; - vec3 translucency; + half3 translucency; translucency.x = (sss_radii.x > 0.0) ? subsurface_transmittance_profile(channels_co.x) : 0.0; translucency.y = (sss_radii.y > 0.0) ? subsurface_transmittance_profile(channels_co.y) : 0.0; translucency.z = (sss_radii.z > 0.0) ? subsurface_transmittance_profile(channels_co.z) : 0.0; diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl index 0a90de63944..f4135464b22 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surf_world_frag.glsl @@ -46,7 +46,7 @@ void main() float lod = max(1.0, base_lod); float mix_factor = min(1.0, base_lod); SphereProbeUvArea world_atlas_coord = reinterpret_as_atlas_coord(world_coord_packed); - vec4 probe_color = lightprobe_spheres_sample(-g_data.N, lod, world_atlas_coord); + vec4 probe_color = vec4(lightprobe_spheres_sample(-g_data.N, lod, world_atlas_coord)); out_background.rgb = mix(out_background.rgb, probe_color.rgb, mix_factor); SphericalHarmonicL1 volume_irradiance = lightprobe_volume_sample( diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surfel_light_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surfel_light_comp.glsl index d2bbb878607..12a7aaf497f 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_surfel_light_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surfel_light_comp.glsl @@ -35,7 +35,7 @@ void main() light_eval_reflection(stack, P, Ng, V, 0.0, surfel.receiver_light_set); if (capture_info_buf.capture_indirect) { - surfel_buf[index].radiance_direct.front.rgb += stack.cl[0].light_shadowed * + surfel_buf[index].radiance_direct.front.rgb += vec3(stack.cl[0].light_shadowed) * surfel.albedo_front; } @@ -46,6 +46,7 @@ void main() light_eval_reflection(stack, P, -Ng, -V, 0.0, surfel.receiver_light_set); if (capture_info_buf.capture_indirect) { - surfel_buf[index].radiance_direct.back.rgb += stack.cl[0].light_shadowed * surfel.albedo_back; + surfel_buf[index].radiance_direct.back.rgb += vec3(stack.cl[0].light_shadowed) * + surfel.albedo_back; } } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_surfel_ray_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_surfel_ray_comp.glsl index afeecd389a7..89f0dccb50d 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_surfel_ray_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surfel_ray_comp.glsl @@ -89,7 +89,7 @@ void radiance_transfer_world(inout Surfel receiver, vec3 L) if (capture_info_buf.capture_world_indirect) { SphereProbeUvArea atlas_coord = capture_info_buf.world_atlas_coord; - radiance = lightprobe_spheres_sample(L, 0.0, atlas_coord).rgb; + radiance = vec3(lightprobe_spheres_sample(L, 0.0, atlas_coord).rgb); } if (capture_info_buf.capture_visibility_indirect) { diff --git a/source/blender/gpu/shaders/common/gpu_shader_math_vector_lib.glsl b/source/blender/gpu/shaders/common/gpu_shader_math_vector_lib.glsl index fd099fb61d3..868341579ec 100644 --- a/source/blender/gpu/shaders/common/gpu_shader_math_vector_lib.glsl +++ b/source/blender/gpu/shaders/common/gpu_shader_math_vector_lib.glsl @@ -576,6 +576,18 @@ vec4 safe_rcp(vec4 a) { return select(vec4(0.0), (1.0 / a), notEqual(a, vec4(0.0))); } +half2 safe_rcp(half2 a) +{ + return select(half2(0.0), (1.0 / a), notEqual(a, half2(0.0))); +} +half3 safe_rcp(half3 a) +{ + return select(half3(0.0), (1.0 / a), notEqual(a, half3(0.0))); +} +half4 safe_rcp(half4 a) +{ + return select(half4(0.0), (1.0 / a), notEqual(a, half4(0.0))); +} vec2 interpolate(vec2 a, vec2 b, float t) { @@ -706,6 +718,18 @@ float reduce_add(vec4 a) { return a.x + a.y + a.z + a.w; } +half reduce_add(half2 a) +{ + return a.x + a.y; +} +half reduce_add(half3 a) +{ + return a.x + a.y + a.z; +} +half reduce_add(half4 a) +{ + return a.x + a.y + a.z + a.w; +} int reduce_add(ivec2 a) { return a.x + a.y; @@ -756,6 +780,18 @@ float average(vec4 a) { return reduce_add(a) * (1.0 / 4.0); } +float average(half2 a) +{ + return reduce_add(a) * (1.0 / 2.0); +} +float average(half3 a) +{ + return reduce_add(a) * (1.0 / 3.0); +} +float average(half4 a) +{ + return reduce_add(a) * (1.0 / 4.0); +} # define ASSERT_UNIT_EPSILON 0.0002 -- 2.30.2