11 changed files with 433 additions and 84 deletions
--- a/source/blender/draw/engines/eevee_next/eevee_defines.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh
@ -87,7 +87,8 @@
 #define DOF_RESOLVE_GROUP_SIZE (DOF_TILES_SIZE * 2)

 /* IrradianceBake. */
-#define SURFEL_LIGHT_GROUP_SIZE 256
+#define SURFEL_GROUP_SIZE 256
+#define SURFEL_LIST_GROUP_SIZE 256

 /* Resource bindings. */

--- a/source/blender/draw/engines/eevee_next/eevee_irradiance_cache.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_irradiance_cache.cc
@ -26,6 +26,9 @@ void IrradianceCache::sync()
  if (!inst_.is_baking()) {
    debug_pass_sync();
  }
+  else {
+    bake.sync();
+  }
 }

 void IrradianceCache::debug_pass_sync()
@ -88,6 +91,55 @@ void IrradianceCache::debug_draw(View &view, GPUFrameBuffer *view_fb)
 /** \name Baking
 * \{ */

+void IrradianceBake::sync()
+{
+  {
+    PassSimple &pass = surfel_light_eval_ps_;
+    pass.init();
+    /* Apply lights contribution to scene surfel representation. */
+    pass.shader_set(inst_.shaders.static_shader_get(SURFEL_LIGHT));
+    pass.bind_ssbo(SURFEL_BUF_SLOT, &surfels_buf_);
+    pass.bind_ssbo(CAPTURE_BUF_SLOT, &capture_info_buf_);
+    pass.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
+    inst_.lights.bind_resources(&pass);
+    inst_.shadows.bind_resources(&pass);
+    /* Sync with the surfel creation stage. */
+    pass.barrier(GPU_BARRIER_SHADER_STORAGE);
+    pass.dispatch(&dispatch_per_surfel_);
+  }
+  {
+    PassSimple &pass = surfel_light_propagate_ps_;
+    pass.init();
+    {
+      PassSimple::Sub &sub = pass.sub("ListBuild");
+      sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_BUILD));
+      sub.bind_ssbo(SURFEL_BUF_SLOT, &surfels_buf_);
+      sub.bind_ssbo(CAPTURE_BUF_SLOT, &capture_info_buf_);
+      sub.bind_ssbo("list_start_buf", &list_start_buf_);
+      sub.bind_ssbo("list_info_buf", &list_info_buf_);
+      sub.barrier(GPU_BARRIER_SHADER_STORAGE);
+      sub.dispatch(&dispatch_per_surfel_);
+    }
+    {
+      PassSimple::Sub &sub = pass.sub("ListSort");
+      sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_SORT));
+      sub.bind_ssbo(SURFEL_BUF_SLOT, &surfels_buf_);
+      sub.bind_ssbo("list_start_buf", &list_start_buf_);
+      sub.bind_ssbo("list_info_buf", &list_info_buf_);
+      sub.barrier(GPU_BARRIER_SHADER_STORAGE);
+      sub.dispatch(&dispatch_per_list_);
+    }
+    {
+      // PassSimple::Sub &sub = pass.sub("LightPropagate");
+    }
+  }
+  {
+    PassSimple &pass = irradiance_capture_ps_;
+    pass.init();
+    /* TODO */
+  }
+}
+
 void IrradianceBake::surfels_create(const IrradianceGrid &grid)
 {
  /**
@ -97,19 +149,32 @@ void IrradianceBake::surfels_create(const IrradianceGrid &grid)
   */
  using namespace blender::math;

-  float4x4 transform(grid.transform);
-  float3 location, scale;
-  Quaternion rotation;
-  math::to_loc_rot_scale(transform, location, rotation, scale);
+  const float4x4 transform(grid.transform);
+  float3 scale;
+  math::to_loc_rot_scale(transform, grid_location_, grid_orientation_, scale);

-  /** We could use multi-view rendering here to avoid multiple submissions but it is unlikely to
+  /* Extract bounding box. Order is arbitrary as it is not important for our usage. */
+  const std::array<float3, 8> bbox_corners({float3{+1, +1, +1},
+                                            float3{-1, +1, +1},
+                                            float3{+1, -1, +1},
+                                            float3{-1, -1, +1},
+                                            float3{+1, +1, -1},
+                                            float3{-1, +1, -1},
+                                            float3{+1, -1, -1},
+                                            float3{-1, -1, -1}});
+  grid_bbox_vertices.clear();
+  for (const float3 &point : bbox_corners) {
+    grid_bbox_vertices.append(transform_point(transform, point));
+  }
+
+  /* We could use multi-view rendering here to avoid multiple submissions but it is unlikely to
   * make any difference. The bottleneck is still the light propagation loop. */
  auto sync_view = [&](View &view, CartesianBasis basis) {
    float3 extent = scale;
    float4x4 winmat = projection::orthographic(
        -extent.x, extent.x, -extent.y, extent.y, -extent.z, extent.z);
-    float4x4 viewinv = math::from_loc_rot<float4x4>(location,
-                                                    rotation * to_quaternion<float>(basis));
+    float4x4 viewinv = math::from_loc_rot<float4x4>(
+        grid_location_, grid_orientation_ * to_quaternion<float>(basis));
    view.sync(invert(viewinv), winmat);
  };

@ -117,12 +182,9 @@ void IrradianceBake::surfels_create(const IrradianceGrid &grid)
  sync_view(view_y_, basis_y_);
  sync_view(view_z_, basis_z_);

-  /* Surfel per unit distance. */
-  float surfel_density = 2.0f;
-  grid_pixel_extent_ = max(int3(1), int3(surfel_density * 2.0f * scale));
+  grid_pixel_extent_ = max(int3(1), int3(surfel_density_ * 2.0f * scale));

  DRW_stats_group_start("IrradianceBake.SurfelsCount");
-  GPU_debug_capture_begin();

  /* Raster the scene to query the number of surfel needed. */
  capture_info_buf_.do_surfel_count = true;
@ -137,7 +199,6 @@ void IrradianceBake::surfels_create(const IrradianceGrid &grid)
  empty_raster_fb_.ensure(grid_pixel_extent_.xy());
  inst_.pipelines.capture.render(view_z_);

-  GPU_debug_capture_end();
  DRW_stats_group_end();

  /* Allocate surfel pool. */
@ -151,6 +212,8 @@ void IrradianceBake::surfels_create(const IrradianceGrid &grid)
  /* TODO(fclem): Check for GL limit and abort if the surfel cache doesn't fit the GPU memory. */
  surfels_buf_.resize(capture_info_buf_.surfel_len);

+  dispatch_per_surfel_.x = divide_ceil_u(surfels_buf_.size(), SURFEL_GROUP_SIZE);
+
  DRW_stats_group_start("IrradianceBake.SurfelsCreate");

  /* Raster the scene to generate the surfels. */
@ -166,52 +229,10 @@ void IrradianceBake::surfels_create(const IrradianceGrid &grid)
  inst_.pipelines.capture.render(view_z_);

  DRW_stats_group_end();
-
-  /* Sync needs to happen after `surfels_buf_` is resized for correct dispatch size. */
-  sync();
-}
-
-void IrradianceBake::sync()
-{
-  {
-    PassSimple &pass = surfel_light_eval_ps_;
-    pass.init();
-    /* Apply lights contribution to scene surfel representation. */
-    pass.shader_set(inst_.shaders.static_shader_get(SURFEL_LIGHT));
-    pass.bind_ssbo(SURFEL_BUF_SLOT, &surfels_buf_);
-    pass.bind_ssbo(CAPTURE_BUF_SLOT, &capture_info_buf_);
-    pass.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
-    inst_.lights.bind_resources(&pass);
-    inst_.shadows.bind_resources(&pass);
-    /* Sync with the surfel creation stage. */
-    pass.barrier(GPU_BARRIER_SHADER_STORAGE);
-    pass.dispatch(int3(divide_ceil_u(surfels_buf_.size(), SURFEL_LIGHT_GROUP_SIZE), 1, 1));
-    pass.barrier(GPU_BARRIER_SHADER_STORAGE);
-  }
-  {
-    PassSimple &pass = surfel_light_propagate_ps_;
-    pass.init();
-    {
-      PassSimple::Sub &sub = pass.sub("ListBuild");
-      sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_BUILD));
-      /* TODO */
-    }
-    {
-      PassSimple::Sub &sub = pass.sub("ListSort");
-      sub.shader_set(inst_.shaders.static_shader_get(SURFEL_LIST_SORT));
-      /* TODO */
-    }
-  }
-  {
-    PassSimple &pass = irradiance_capture_ps_;
-    pass.init();
-    /* TODO */
-  }
 }

 void IrradianceBake::surfels_lights_eval()
 {
-  GPU_debug_capture_begin();
  /* Use the last setup view. This should work since the view is orthographic. */
  /* TODO(fclem): Remove this. It is only present to avoid crash inside `shadows.set_view` */
  inst_.render_buffers.acquire(int2(1));
@ -221,16 +242,49 @@ void IrradianceBake::surfels_lights_eval()
  inst_.render_buffers.release();

  inst_.manager->submit(surfel_light_eval_ps_);
-
-  GPU_debug_capture_end();
 }

 void IrradianceBake::propagate_light_sample()
 {
-  /* Pick random ray direction over the sphere. */
-  /* Project to regular grid and create the surfels lists. */
-  /* Sort the surfels lists. */
-  /* Propagate light. */
+  using namespace blender::math;
+
+  float2 rand_uv = inst_.sampling.rng_2d_get(eSamplingDimension::SAMPLING_FILTER_U);
+  const float3 ray_direction = inst_.sampling.sample_hemisphere(rand_uv);
+  const float3 up = ray_direction;
+  /* Find the closest axis. */
+  const float3 grid_local_ray_direction = transform_point(grid_orientation_, ray_direction);
+  Axis closest_grid_axis = Axis::from_int(dominant_axis(grid_local_ray_direction));
+  /* Use one of the other 2 grid axes to get a reference right vector. */
+  Axis right_axis = AxisSigned(closest_grid_axis).next_after().axis();
+  const float3 grid_right = from_rotation<float3x3>(grid_orientation_)[right_axis.as_int()];
+  /* Create a view around the grid position with the ray direction as up axis.
+   * The other axes are aligned to the grid local axes to avoid to allocate too many list start. */
+  const float4x4 viewmat = invert(
+      from_orthonormal_axes<float4x4>(grid_location_, normalize(cross(up, grid_right)), up));
+
+  /* Compute projection bounds. */
+  float2 min, max;
+  INIT_MINMAX2(min, max);
+  for (const float3 &point : grid_bbox_vertices) {
+    min_max(transform_point(viewmat, point).xy(), min, max);
+  }
+
+  /* NOTE: Z values do not really matter since we are not doing any rasterization. */
+  const float4x4 winmat = projection::orthographic<float>(min.x, max.x, min.y, max.y, 0, 1);
+
+  View ray_view = {"RayProjectionView"};
+  ray_view.sync(viewmat, winmat);
+
+  list_info_buf_.ray_grid_size = math::max(int2(1), int2(surfel_density_ * (max - min)));
+  list_info_buf_.list_max = list_info_buf_.ray_grid_size.x * list_info_buf_.ray_grid_size.y;
+  list_info_buf_.push_update();
+
+  dispatch_per_list_.x = divide_ceil_u(list_info_buf_.list_max, SURFEL_LIST_GROUP_SIZE);
+
+  list_start_buf_.resize(ceil_to_multiple_u(list_info_buf_.list_max, 4));
+
+  GPU_storagebuf_clear(list_start_buf_, -1);
+  inst_.manager->submit(surfel_light_propagate_ps_, ray_view);
 }

 void IrradianceBake::read_result(LightCacheIrradianceGrid &light_cache_grid)
--- a/source/blender/draw/engines/eevee_next/eevee_irradiance_cache.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_irradiance_cache.hh
@ -8,6 +8,8 @@

 #include "DNA_lightprobe_types.h"

+#include "BLI_math_quaternion_types.hh"
+
 #include "eevee_lightprobe.hh"
 #include "eevee_shader_shared.hh"

@ -53,6 +55,24 @@ class IrradianceBake {
  View view_z_ = {"BakingViewZ"};
  /** Pixel resolution in each of the projection axes. Match the target surfel density. */
  int3 grid_pixel_extent_ = int3(0);
+  /** Information for surfel list building. */
+  SurfelListInfoBuf list_info_buf_ = {"list_info_buf_"};
+  /** List array containing list start surfel index. Cleared to -1. */
+  StorageArrayBuffer<int, 16, true> list_start_buf_ = {"list_start_buf_"};
+
+  /* Dispatch size for per surfel workload. */
+  int3 dispatch_per_surfel_ = int3(1);
+  /* Dispatch size for per surfel list workload. */
+  int3 dispatch_per_list_ = int3(1);
+
+  /* Surfel per unit distance. */
+  float surfel_density_ = 2.0f;
+  /* Orientation of the irradiance grid being baked. */
+  math::Quaternion grid_orientation_;
+  /* Object center of the irradiance grid being baked. */
+  float3 grid_location_;
+  /* Bounding box vertices of the irradiance grid being baked. In world space. */
+  Vector<float3> grid_bbox_vertices;

 public:
  IrradianceBake(Instance &inst) : inst_(inst){};
--- a/source/blender/draw/engines/eevee_next/eevee_sampling.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_sampling.cc
@ -10,6 +10,9 @@

 #include "BLI_rand.h"

+#include "BLI_math_base.hh"
+#include "BLI_math_base_safe.h"
+
 #include "eevee_instance.hh"
 #include "eevee_sampling.hh"

@ -180,6 +183,14 @@ float2 Sampling::sample_disk(const float2 &rand)
  return sqrtf(rand.x) * float2(cosf(omega), sinf(omega));
 }

+float3 Sampling::sample_hemisphere(const float2 &rand)
+{
+  const float omega = rand.y * 2.0f * M_PI;
+  const float cos_theta = rand.x;
+  const float sin_theta = safe_sqrtf(1.0f - square_f(cos_theta));
+  return float3(sin_theta * float2(cosf(omega), sinf(omega)), cos_theta);
+}
+
 float2 Sampling::sample_spiral(const float2 &rand)
 {
  /* Fibonacci spiral. */
--- a/source/blender/draw/engines/eevee_next/eevee_sampling.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_sampling.hh
@ -158,6 +158,13 @@ class Sampling {
   */
  static float2 sample_disk(const float2 &rand);

+  /**
+   * Uniform hemisphere distribution.
+   * \a rand is 2 random float in the [0..1] range.
+   * Returns point on a Z positive hemisphere of radius 1 and centered on the origin.
+   */
+  static float3 sample_hemisphere(const float2 &rand);
+
  /**
   * Uniform disc distribution using Fibonacci spiral sampling.
   * \a rand is 2 random float in the [0..1] range.
--- a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
@ -833,16 +833,19 @@ static inline ShadowTileDataPacked shadow_tile_pack(ShadowTileData tile)
 struct Surfel {
  /** World position of the surfel. */
  packed_float3 position;
-  int _pad0;
+  /** Previous surfel index in the ray link-list. Only valid after sorting. */
+  int prev;
  /** World orientation of the surface. */
  packed_float3 normal;
-  int _pad1;
+  /** Next surfel index in the ray link-list. */
+  int next;
  /** Surface albedo to apply to incoming radiance. */
  packed_float3 albedo;
-  int _pad2;
-  /** Accumulated reflected radiance at this point. */
+  /** Distance along the ray direction for sorting. */
+  float ray_distance;
+  /** Accumulated reflected radiance. */
  packed_float3 radiance;
-  int _pad3;
+  int _pad0;
 };
 BLI_STATIC_ASSERT_ALIGN(Surfel, 16)

@ -857,18 +860,15 @@ struct CaptureInfoData {
 };
 BLI_STATIC_ASSERT_ALIGN(CaptureInfoData, 16)

-enum SurfelListEntryType : uint32_t {
-  ENTRY_SURFEL = 0u,
-  ENTRY_IRRADIANCE_SAMPLE = 1u,
-};
+struct SurfelListInfoData {
+  /** Size of the grid used to project the surfels into linked lists. */
+  int2 ray_grid_size;
+  /** Maximum number of list. Is equal to `ray_grid_size.x * ray_grid_size.y`. */
+  int list_max;

-struct SurfelListEntry {
-  uint next_entry_index;
-  SurfelListEntryType type;
-  uint payload;
-  uint _pad0;
+  int _pad0;
 };
-BLI_STATIC_ASSERT_ALIGN(SurfelListEntry, 16)
+BLI_STATIC_ASSERT_ALIGN(SurfelListInfoData, 16)

 /** \} */

@ -996,8 +996,9 @@ using ShadowPageCacheBuf = draw::StorageArrayBuffer<uint2, SHADOW_MAX_PAGE, true
 using ShadowTileMapDataBuf = draw::StorageVectorBuffer<ShadowTileMapData, SHADOW_MAX_TILEMAP>;
 using ShadowTileMapClipBuf = draw::StorageArrayBuffer<ShadowTileMapClip, SHADOW_MAX_TILEMAP, true>;
 using ShadowTileDataBuf = draw::StorageArrayBuffer<ShadowTileDataPacked, SHADOW_MAX_TILE, true>;
-using SurfelBuf = draw::StorageArrayBuffer<Surfel, 64, true>;
+using SurfelBuf = draw::StorageArrayBuffer<Surfel, 64>;
 using CaptureInfoBuf = draw::StorageBuffer<CaptureInfoData>;
+using SurfelListInfoBuf = draw::StorageBuffer<SurfelListInfoData>;
 using VelocityGeometryBuf = draw::StorageArrayBuffer<float4, 16, true>;
 using VelocityIndexBuf = draw::StorageArrayBuffer<VelocityIndex, 16>;
 using VelocityObjectBuf = draw::StorageArrayBuffer<float4x4, 16>;
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_debug_surfels_vert.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_debug_surfels_vert.glsl
@ -1,11 +1,22 @@
 #pragma BLENDER_REQUIRE(common_view_lib.glsl)
 #pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(common_debug_draw_lib.glsl)

 void main()
 {
  surfel_index = gl_InstanceID;
  Surfel surfel = surfels_buf[surfel_index];

+#if 0 /* Debug surfel lists. TODO allow in release build with a dedicated shader. */
+  if (gl_VertexID == 0 && surfel.next > -1) {
+    Surfel surfel_next = surfels_buf[surfel.next];
+    vec4 line_color = (surfel.prev == -1)      ? vec4(1.0, 1.0, 0.0, 1.0) :
+                      (surfel_next.next == -1) ? vec4(0.0, 1.0, 1.0, 1.0) :
+                                                 vec4(0.0, 1.0, 0.0, 1.0);
+    drw_debug_line(surfel_next.position, surfel.position, line_color);
+  }
+#endif
+
  vec3 lP;

  switch (gl_VertexID) {
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_surfel_list_build_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surfel_list_build_comp.glsl
@ -1,10 +1,37 @@

 /**
 * Takes scene surfel representation and build list of surfels aligning in a given direction.
+ *
+ * The lists head are allocated to fit the surfel granularity.
+ *
+ * Due to alignment the link and list head are split into several int arrays to avoid too much
+ * memory waste.
+ *
+ * Dispatch 1 thread per surfel.
 */

-#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_math_base_lib.glsl)
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)

 void main()
 {
+  int surfel_index = int(gl_GlobalInvocationID);
+  if (surfel_index >= capture_info_buf.surfel_len) {
+    return;
+  }
+
+  vec4 hP = point_world_to_ndc(surfel_buf[surfel_index].position);
+
+  surfel_buf[surfel_index].ray_distance = -hP.z;
+
+  vec2 ssP_surfel = hP.xy * 0.5 + 0.5;
+  ivec2 ray_coord_on_grid = clamp(ivec2(ssP_surfel * vec2(list_info_buf.ray_grid_size)),
+                                  ivec2(0),
+                                  list_info_buf.ray_grid_size - 1);
+  int list_index = ray_coord_on_grid.y * list_info_buf.ray_grid_size.x + ray_coord_on_grid.x;
+
+  /* NOTE: We only need to init the `list_start_buf` to -1 for the whole list to be valid since
+   * every surfel will load its `next` value from the list head. */
+  surfel_buf[surfel_index].next = atomicExchange(list_start_buf[list_index], surfel_index);
 }
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_surfel_list_sort_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_surfel_list_sort_comp.glsl
@ -3,10 +3,113 @@
 * Sort a buffer of surfel list by distance along a direction.
 * The resulting surfel lists are then the equivalent of a series of ray cast in the same
 * direction. The fact that the surfels are sorted gives proper occlusion.
+ *
+ * Sort by increasing `ray_distance`. Start of list is smallest value.
+ *
+ * Outputs a flat array of surfel indices. Each ray is a range inside the array. This allows
+ * parallel processing in the light propagation phase.
+ * Dispatched as 1 thread per list.
 */

-#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
+
+/**
+ * A doubly-linked list implementation.
+ * IMPORTANT: It is not general purpose as it only cover the cases needed by this shader.
+ */
+struct List {
+  int first, last;
+};
+
+/* Return the split list after link_index. */
+List list_split_after(inout List original, int link_index)
+{
+  int next_link = surfel_buf[link_index].next;
+  int last_link = original.last;
+
+  original.last = link_index;
+
+  List split;
+  split.first = next_link;
+  split.last = last_link;
+
+  surfel_buf[link_index].next = -1;
+  surfel_buf[next_link].prev = -1;
+
+  return split;
+}
+
+void list_add_tail(inout List list, int link_index)
+{
+  surfel_buf[link_index].next = -1;
+  surfel_buf[link_index].prev = list.last;
+  surfel_buf[list.last].next = link_index;
+  list.last = link_index;
+}
+
+void list_insert_link_before(inout List list, int next_link, int new_link)
+{
+  if (list.first == next_link) {
+    /* At beginning of list. */
+    list.first = new_link;
+  }
+  int prev_link = surfel_buf[next_link].prev;
+  surfel_buf[new_link].next = next_link;
+  surfel_buf[new_link].prev = prev_link;
+  surfel_buf[next_link].prev = new_link;
+  if (prev_link != -1) {
+    surfel_buf[prev_link].next = new_link;
+  }
+}

 void main()
 {
+  int list_index = int(gl_GlobalInvocationID);
+  if (list_index >= list_info_buf.list_max) {
+    return;
+  }
+
+  int list_start = list_start_buf[list_index];
+
+  if (list_start == -1) {
+    /* Empty list. */
+    return;
+  }
+
+  /* Create Surfel.prev pointers. */
+  int prev_id = -1;
+  for (int i = list_start; i > -1; i = surfel_buf[i].next) {
+    surfel_buf[i].prev = prev_id;
+    prev_id = i;
+  }
+
+  List sorted_list;
+  sorted_list.first = list_start;
+  sorted_list.last = prev_id;
+
+  if (sorted_list.first == sorted_list.last) {
+    /* Only one item. Nothing to sort. */
+    return;
+  }
+
+  /* Using insertion sort as it is easier to implement. */
+
+  List unsorted_list = list_split_after(sorted_list, sorted_list.first);
+
+  /* Mutable foreach. */
+  for (int i = unsorted_list.first, next; i > -1; i = next) {
+    next = surfel_buf[i].next;
+
+    bool insert = false;
+    for (int j = sorted_list.first; j > -1; j = surfel_buf[j].next) {
+      if (surfel_buf[j].ray_distance < surfel_buf[i].ray_distance) {
+        list_insert_link_before(sorted_list, j, i);
+        insert = true;
+        break;
+      }
+    }
+    if (insert == false) {
+      list_add_tail(sorted_list, i);
+    }
+  }
 }
--- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_irradiance_cache_info.hh
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_irradiance_cache_info.hh
@ -19,7 +19,7 @@ GPU_SHADER_CREATE_INFO(eevee_debug_surfels)
    .do_static_compilation(true);

 GPU_SHADER_CREATE_INFO(eevee_surfel_light)
-    .local_group_size(CULLING_SELECT_GROUP_SIZE)
+    .local_group_size(SURFEL_GROUP_SIZE)
    .additional_info("eevee_shared",
                     "draw_view",
                     "eevee_utility_texture",
@ -31,15 +31,20 @@ GPU_SHADER_CREATE_INFO(eevee_surfel_light)
    .do_static_compilation(true);

 GPU_SHADER_CREATE_INFO(eevee_surfel_list_build)
-    .local_group_size(CULLING_SELECT_GROUP_SIZE)
+    .local_group_size(SURFEL_GROUP_SIZE)
    .additional_info("eevee_shared", "draw_view")
+    .storage_buf(0, Qualifier::READ_WRITE, "int", "list_start_buf[]")
+    .storage_buf(SURFEL_BUF_SLOT, Qualifier::READ_WRITE, "Surfel", "surfel_buf[]")
+    .storage_buf(CAPTURE_BUF_SLOT, Qualifier::READ, "CaptureInfoData", "capture_info_buf")
+    .storage_buf(6, Qualifier::READ_WRITE, "SurfelListInfoData", "list_info_buf")
    .compute_source("eevee_surfel_list_build_comp.glsl")
-    .storage_buf(0, Qualifier::READ_WRITE, "Surfel", "surfels_buf[]")
    .do_static_compilation(true);

 GPU_SHADER_CREATE_INFO(eevee_surfel_list_sort)
-    .local_group_size(CULLING_SELECT_GROUP_SIZE)
+    .local_group_size(SURFEL_LIST_GROUP_SIZE)
    .additional_info("eevee_shared", "draw_view")
+    .storage_buf(0, Qualifier::READ_WRITE, "int", "list_start_buf[]")
+    .storage_buf(SURFEL_BUF_SLOT, Qualifier::READ_WRITE, "Surfel", "surfel_buf[]")
+    .storage_buf(6, Qualifier::READ, "SurfelListInfoData", "list_info_buf")
    .compute_source("eevee_surfel_list_sort_comp.glsl")
-    .storage_buf(0, Qualifier::READ_WRITE, "Surfel", "surfels_buf[]")
    .do_static_compilation(true);
--- a/source/blender/draw/tests/eevee_test.cc
+++ b/source/blender/draw/tests/eevee_test.cc
@ -1144,4 +1144,113 @@ static void test_eevee_shadow_page_mask()
 }
 DRAW_TEST(eevee_shadow_page_mask)

+static void test_eevee_surfel_list()
+{
+  StorageArrayBuffer<int> list_start_buf = {"list_start_buf"};
+  StorageVectorBuffer<Surfel> surfel_buf = {"surfel_buf"};
+  CaptureInfoBuf capture_info_buf = {"capture_info_buf"};
+  SurfelListInfoBuf list_info_buf = {"list_info_buf"};
+
+  /**
+   * Simulate surfels on a 2x2 projection grid covering [0..2] on the Z axis.
+   */
+  {
+    Surfel surfel;
+    /* NOTE: Expected link assumes linear increasing processing order [0->5]. But this is
+     * multithreaded and we can't know the execution order in advance. */
+    /* 0: Project to (1, 0) = list 1. Unsorted Next = -1; Next = -1;  Prev = 3. */
+    surfel.position = {1.1f, 0.1f, 0.1f};
+    surfel_buf.append(surfel);
+    /* 1: Project to (1, 0) = list 1. Unsorted Next = 0; Next = 2; Prev = -1. */
+    surfel.position = {1.1f, 0.2f, 0.5f};
+    surfel_buf.append(surfel);
+    /* 2: Project to (1, 0) = list 1. Unsorted Next = 1; Next = 3; Prev = 1. */
+    surfel.position = {1.1f, 0.3f, 0.3f};
+    surfel_buf.append(surfel);
+    /* 3: Project to (1, 0) = list 1. Unsorted Next = 2; Next = 0; Prev = 2. */
+    surfel.position = {1.2f, 0.4f, 0.2f};
+    surfel_buf.append(surfel);
+    /* 4: Project to (1, 1) = list 3. Unsorted Next = -1; Next = -1;  Prev = -1. */
+    surfel.position = {1.0f, 1.0f, 0.5f};
+    surfel_buf.append(surfel);
+    /* 5: Project to (0, 1) = list 2. Unsorted Next = -1; Next = -1;  Prev = -1. */
+    surfel.position = {0.1f, 1.1f, 0.5f};
+    surfel_buf.append(surfel);
+
+    surfel_buf.push_update();
+  }
+  {
+    capture_info_buf.surfel_len = surfel_buf.size();
+    capture_info_buf.push_update();
+  }
+  {
+    list_info_buf.ray_grid_size = int2(2);
+    list_info_buf.list_max = list_info_buf.ray_grid_size.x * list_info_buf.ray_grid_size.y;
+    list_info_buf.push_update();
+  }
+  {
+    list_start_buf.resize(ceil_to_multiple_u(list_info_buf.list_max, 4u));
+    list_start_buf.push_update();
+    GPU_storagebuf_clear(list_start_buf, -1);
+  }
+
+  /* Top-down view. */
+  View view = {"RayProjectionView"};
+  view.sync(float4x4::identity(), math::projection::orthographic<float>(0, 2, 0, 2, 0, 1));
+
+  GPUShader *sh_build = GPU_shader_create_from_info_name("eevee_surfel_list_build");
+  GPUShader *sh_sort = GPU_shader_create_from_info_name("eevee_surfel_list_sort");
+
+  PassSimple pass("Build_and_Sort");
+  pass.shader_set(sh_build);
+  pass.bind_ssbo("list_start_buf", list_start_buf);
+  pass.bind_ssbo("surfel_buf", surfel_buf);
+  pass.bind_ssbo("capture_info_buf", capture_info_buf);
+  pass.bind_ssbo("list_info_buf", list_info_buf);
+  pass.dispatch(int3(1, 1, 1));
+  pass.barrier(GPU_BARRIER_SHADER_STORAGE);
+
+  pass.shader_set(sh_sort);
+  pass.bind_ssbo("list_start_buf", list_start_buf);
+  pass.bind_ssbo("surfel_buf", surfel_buf);
+  pass.bind_ssbo("list_info_buf", list_info_buf);
+  pass.dispatch(int3(1, 1, 1));
+  pass.barrier(GPU_BARRIER_BUFFER_UPDATE);
+
+  Manager manager;
+  manager.submit(pass, view);
+
+  list_start_buf.read();
+  surfel_buf.read();
+
+  /* NOTE: All of these are unstable by definition (atomic + multithread).
+   * But should be consistent since we only dispatch one thread-group. */
+  /* Expect last added surfel index. It is the list start index before sorting. */
+  Vector<int> expect_list_start = {-1, 3, 5, 4};
+  /* Expect surfel list. */
+  Vector<int> expect_link_next = {-1, +2, +3, +0, -1, -1};
+  Vector<int> expect_link_prev = {+3, -1, +1, +2, -1, -1};
+
+  Vector<int> link_next, link_prev;
+  for (auto &surfel : Span<Surfel>(surfel_buf.data(), surfel_buf.size())) {
+    link_next.append(surfel.next);
+    link_prev.append(surfel.prev);
+  }
+
+#if 0 /* Useful for debugging */
+  // Span<int>(list_start_buf.data(), expect_list_start.size()).print_as_lines("list_start");
+  // link_next.as_span().print_as_lines("link_next");
+  // link_prev.as_span().print_as_lines("link_prev");
+#endif
+
+  EXPECT_EQ_ARRAY(list_start_buf.data(), expect_list_start.data(), expect_list_start.size());
+  EXPECT_EQ_ARRAY(link_next.data(), expect_link_next.data(), expect_link_next.size());
+  EXPECT_EQ_ARRAY(link_prev.data(), expect_link_prev.data(), expect_link_prev.size());
+
+  GPU_shader_free(sh_build);
+  GPU_shader_free(sh_sort);
+  DRW_shaders_free();
+}
+DRAW_TEST(eevee_surfel_list)
+
 }  // namespace blender::draw