From b2b8727adc7ba55131bbf2a36d1a4c664c557ac9 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Fri, 26 Apr 2024 07:07:20 +0200 Subject: [PATCH 1/3] Fix: EEVEE-Next: Intel UHD support for probe remapping. Windows/Intel UHD 600 iGPUs crash when compiling probe remapping shader. The cause is that there is a balanced barrier inside a forloop. When removing the barrier the compilation works. This is a driver bug and most likely not being solved as the driver maintenance mode will only be updated for critical fixes. This PR works around the issue by unrolling the for loop. Partially fixes #120919 --- .../draw/engines/eevee_next/eevee_defines.hh | 1 + .../eevee_reflection_probe_remap_comp.glsl | 52 ++++++++++++++++++- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/source/blender/draw/engines/eevee_next/eevee_defines.hh b/source/blender/draw/engines/eevee_next/eevee_defines.hh index 776147549d4..5d1538dd70b 100644 --- a/source/blender/draw/engines/eevee_next/eevee_defines.hh +++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh @@ -34,6 +34,7 @@ #define CULLING_TILE_GROUP_SIZE 256 /* Reflection Probes. */ +/* When changed update unrolling in `eevee_reflection_probe_remap_comp.glsl`. */ #define SPHERE_PROBE_REMAP_GROUP_SIZE 32 #define SPHERE_PROBE_GROUP_SIZE 16 #define SPHERE_PROBE_SELECT_GROUP_SIZE 64 diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_reflection_probe_remap_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_reflection_probe_remap_comp.glsl index 0296b214b8e..f0dde04225c 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_reflection_probe_remap_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_reflection_probe_remap_comp.glsl @@ -119,9 +119,13 @@ void main() float sample_weight = octahedral_texel_solid_angle(local_texel, write_coord, sample_coord); const uint local_index = gl_LocalInvocationIndex; - const uint group_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y; /* Parallel sum. Result is stored inside local_radiance[0]. */ + /* Code has been unrolled to work around UHD600 driver bug and need to be modified when + * `SPHERE_PROBE_REMAP_GROUP_SIZE` changes. */ + +#if 0 + const uint group_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y; local_radiance[local_index] = radiance.xyzz * sample_weight; for (uint stride = group_size / 2; stride > 0; stride /= 2) { barrier(); @@ -129,8 +133,52 @@ void main() local_radiance[local_index] += local_radiance[local_index + stride]; } } - barrier(); +#else + local_radiance[local_index] = radiance.xyzz * sample_weight; + barrier(); + if (local_index < 512) { + local_radiance[local_index] += local_radiance[local_index + 512]; + } + barrier(); + if (local_index < 256) { + local_radiance[local_index] += local_radiance[local_index + 256]; + } + barrier(); + if (local_index < 128) { + local_radiance[local_index] += local_radiance[local_index + 128]; + } + barrier(); + if (local_index < 64) { + local_radiance[local_index] += local_radiance[local_index + 64]; + } + barrier(); + if (local_index < 32) { + local_radiance[local_index] += local_radiance[local_index + 32]; + } + barrier(); + if (local_index < 16) { + local_radiance[local_index] += local_radiance[local_index + 16]; + } + barrier(); + if (local_index < 8) { + local_radiance[local_index] += local_radiance[local_index + 8]; + } + barrier(); + if (local_index < 4) { + local_radiance[local_index] += local_radiance[local_index + 4]; + } + barrier(); + if (local_index < 2) { + local_radiance[local_index] += local_radiance[local_index + 2]; + } + barrier(); + if (local_index < 1) { + local_radiance[local_index] += local_radiance[local_index + 1]; + } + barrier(); +#endif + if (gl_LocalInvocationIndex == 0u) { /* Find the middle point of the whole thread-group. Use it as light vector. * Note that this is an approximation since the footprint of a thread-group is not -- 2.30.2 From 59d9727dc754ec51c2a201437fd256e38f75b8dd Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 6 May 2024 08:14:43 +0200 Subject: [PATCH 2/3] Use loop --- .../draw/engines/eevee_next/eevee_defines.hh | 2 +- .../eevee_reflection_probe_remap_comp.glsl | 55 ++----------------- 2 files changed, 5 insertions(+), 52 deletions(-) diff --git a/source/blender/draw/engines/eevee_next/eevee_defines.hh b/source/blender/draw/engines/eevee_next/eevee_defines.hh index 5d1538dd70b..eae9ac65060 100644 --- a/source/blender/draw/engines/eevee_next/eevee_defines.hh +++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh @@ -34,7 +34,7 @@ #define CULLING_TILE_GROUP_SIZE 256 /* Reflection Probes. */ -/* When changed update unrolling in `eevee_reflection_probe_remap_comp.glsl`. */ +/* When changed update parallel sum loop in `eevee_reflection_probe_remap_comp.glsl`. */ #define SPHERE_PROBE_REMAP_GROUP_SIZE 32 #define SPHERE_PROBE_GROUP_SIZE 16 #define SPHERE_PROBE_SELECT_GROUP_SIZE 64 diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_reflection_probe_remap_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_reflection_probe_remap_comp.glsl index 2072d29957c..c8f3551b77e 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_reflection_probe_remap_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_reflection_probe_remap_comp.glsl @@ -122,65 +122,18 @@ void main() float sample_weight = octahedral_texel_solid_angle(local_texel, write_coord, sample_coord); const uint local_index = gl_LocalInvocationIndex; + const uint group_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y; /* Parallel sum. Result is stored inside local_radiance[0]. */ - /* Code has been unrolled to work around UHD600 driver bug and need to be modified when - * `SPHERE_PROBE_REMAP_GROUP_SIZE` changes. */ - -#if 0 - const uint group_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y; - local_radiance[local_index] = radiance.xyzz * sample_weight; - for (uint stride = group_size / 2; stride > 0; stride /= 2) { + uint stride = group_size / 2; + for (int i = 0; i < 10; i++) { barrier(); if (local_index < stride) { local_radiance[local_index] += local_radiance[local_index + stride]; } + stride /= 2; } barrier(); -#else - local_radiance[local_index] = radiance.xyzz * sample_weight; - barrier(); - if (local_index < 512) { - local_radiance[local_index] += local_radiance[local_index + 512]; - } - barrier(); - if (local_index < 256) { - local_radiance[local_index] += local_radiance[local_index + 256]; - } - barrier(); - if (local_index < 128) { - local_radiance[local_index] += local_radiance[local_index + 128]; - } - barrier(); - if (local_index < 64) { - local_radiance[local_index] += local_radiance[local_index + 64]; - } - barrier(); - if (local_index < 32) { - local_radiance[local_index] += local_radiance[local_index + 32]; - } - barrier(); - if (local_index < 16) { - local_radiance[local_index] += local_radiance[local_index + 16]; - } - barrier(); - if (local_index < 8) { - local_radiance[local_index] += local_radiance[local_index + 8]; - } - barrier(); - if (local_index < 4) { - local_radiance[local_index] += local_radiance[local_index + 4]; - } - barrier(); - if (local_index < 2) { - local_radiance[local_index] += local_radiance[local_index + 2]; - } - barrier(); - if (local_index < 1) { - local_radiance[local_index] += local_radiance[local_index + 1]; - } - barrier(); -#endif if (gl_LocalInvocationIndex == 0u) { /* Find the middle point of the whole thread-group. Use it as light vector. -- 2.30.2 From a1bba8c0e6475fffe9efe623a0264ba70916b683 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 6 May 2024 08:17:16 +0200 Subject: [PATCH 3/3] Revert whitespace changes --- .../eevee_next/shaders/eevee_reflection_probe_remap_comp.glsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_reflection_probe_remap_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_reflection_probe_remap_comp.glsl index c8f3551b77e..d37419f0dba 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_reflection_probe_remap_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_reflection_probe_remap_comp.glsl @@ -133,8 +133,8 @@ void main() } stride /= 2; } - barrier(); + barrier(); if (gl_LocalInvocationIndex == 0u) { /* Find the middle point of the whole thread-group. Use it as light vector. * Note that this is an approximation since the footprint of a thread-group is not -- 2.30.2