diff --git a/source/blender/draw/engines/eevee/shaders/effect_reflection_resolve_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_reflection_resolve_frag.glsl index 41310fd2e67..c60a4389a3d 100644 --- a/source/blender/draw/engines/eevee/shaders/effect_reflection_resolve_frag.glsl +++ b/source/blender/draw/engines/eevee/shaders/effect_reflection_resolve_frag.glsl @@ -103,6 +103,20 @@ void resolve_reflection_sample(int planar_index, weight_accum += weight; } +/* NOTE(Metal): For Apple silicon GPUs executing this particular shader, by default, memory read + * pressure is high while ALU remains low. Packing the sample data into a smaller format balances + * this trade-off by reducing local shader register pressure and expensive memory look-ups into + * spilled local shader memory, resulting in an increase in performance of 20% for this shader. */ +#ifdef GPU_METAL +# define SAMPLE_STORAGE_TYPE uchar +# define pack_sample(x, y) uchar(((uchar(x + 2)) << uchar(3)) + (uchar(y + 2))) +# define unpack_sample(x) vec2((char(x) >> 3) - 2, (char(x) & 7) - 2) +#else +# define SAMPLE_STORAGE_TYPE vec2 +# define pack_sample(x, y) SAMPLE_STORAGE_TYPE(x, y) +# define unpack_sample(x) x +#endif + void raytrace_resolve(ClosureInputGlossy cl_in, inout ClosureEvalGlossy cl_eval, inout ClosureEvalCommon cl_common, @@ -110,55 +124,55 @@ void raytrace_resolve(ClosureInputGlossy cl_in, { /* Note: Reflection samples declared in function scope to avoid per-thread memory pressure on * tile-based GPUs e.g. Apple Silicon. */ - const vec2 resolve_sample_offsets[36] = vec2[36]( + const SAMPLE_STORAGE_TYPE resolve_sample_offsets[36] = SAMPLE_STORAGE_TYPE[36]( /* Set 1. */ /* First Ring (2x2). */ - vec2(0, 0), + pack_sample(0, 0), /* Second Ring (6x6). */ - vec2(-1, 3), - vec2(1, 3), - vec2(-1, 1), - vec2(3, 1), - vec2(-2, 0), - vec2(3, 0), - vec2(2, -1), - vec2(1, -2), + pack_sample(-1, 3), + pack_sample(1, 3), + pack_sample(-1, 1), + pack_sample(3, 1), + pack_sample(-2, 0), + pack_sample(3, 0), + pack_sample(2, -1), + pack_sample(1, -2), /* Set 2. */ /* First Ring (2x2). */ - vec2(1, 1), + pack_sample(1, 1), /* Second Ring (6x6). */ - vec2(-2, 3), - vec2(3, 3), - vec2(0, 2), - vec2(2, 2), - vec2(-2, -1), - vec2(1, -1), - vec2(0, -2), - vec2(3, -2), + pack_sample(-2, 3), + pack_sample(3, 3), + pack_sample(0, 2), + pack_sample(2, 2), + pack_sample(-2, -1), + pack_sample(1, -1), + pack_sample(0, -2), + pack_sample(3, -2), /* Set 3. */ /* First Ring (2x2). */ - vec2(0, 1), + pack_sample(0, 1), /* Second Ring (6x6). */ - vec2(0, 3), - vec2(3, 2), - vec2(-2, 1), - vec2(2, 1), - vec2(-1, 0), - vec2(-2, -2), - vec2(0, -1), - vec2(2, -2), + pack_sample(0, 3), + pack_sample(3, 2), + pack_sample(-2, 1), + pack_sample(2, 1), + pack_sample(-1, 0), + pack_sample(-2, -2), + pack_sample(0, -1), + pack_sample(2, -2), /* Set 4. */ /* First Ring (2x2). */ - vec2(1, 0), + pack_sample(1, 0), /* Second Ring (6x6). */ - vec2(2, 3), - vec2(-2, 2), - vec2(-1, 2), - vec2(1, 2), - vec2(2, 0), - vec2(-1, -1), - vec2(3, -1), - vec2(-1, -2)); + pack_sample(2, 3), + pack_sample(-2, 2), + pack_sample(-1, 2), + pack_sample(1, 2), + pack_sample(2, 0), + pack_sample(-1, -1), + pack_sample(3, -1), + pack_sample(-1, -2)); float roughness = cl_in.roughness; @@ -208,7 +222,8 @@ void raytrace_resolve(ClosureInputGlossy cl_in, int sample_id = sample_pool * resolve_samples_count + i; vec2 texture_size = vec2(textureSize(hitBuffer, 0)); vec2 sample_texel = texture_size * uvcoordsvar.xy * ssrUvScale; - vec2 sample_uv = (sample_texel + resolve_sample_offsets[sample_id]) / texture_size; + vec2 sample_uv = (sample_texel + unpack_sample(resolve_sample_offsets[sample_id])) / + texture_size; resolve_reflection_sample( planar_index, sample_uv, vP, vN, vV, roughness_squared, cone_tan, weight_acc, ssr_accum);