Cleanup: minor cleanups for sample pattern code

2022-09-01 01:28:58 +02:00
parent 60119daef5
commit 06d2dc6be2
14 changed files with 165 additions and 202 deletions
--- a/intern/cycles/kernel/integrator/init_from_bake.h
+++ b/intern/cycles/kernel/integrator/init_from_bake.h
@@ -121,13 +121,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
  /* Random number generator. */
  const uint rng_hash = hash_uint(seed) ^ kernel_data.integrator.seed;

-  float filter_x, filter_y;
-  if (sample == 0) {
-    filter_x = filter_y = 0.5f;
-  }
-  else {
-    path_rng_2D(kg, rng_hash, sample, PRNG_FILTER, &filter_x, &filter_y);
-  }
+  const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) :
+                                             path_rng_2D(kg, rng_hash, sample, PRNG_FILTER);

  /* Initialize path state for path integration. */
  path_state_init_integrator(kg, state, sample, rng_hash);
@@ -150,8 +145,9 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,

  /* Sub-pixel offset. */
  if (sample > 0) {
-    u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f);
-    v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f),
+    u = bake_clamp_mirror_repeat(u + dudx * (rand_filter.x - 0.5f) + dudy * (rand_filter.y - 0.5f),
+                                 1.0f);
+    v = bake_clamp_mirror_repeat(v + dvdx * (rand_filter.x - 0.5f) + dvdy * (rand_filter.y - 0.5f),
                                 1.0f - u);
  }

--- a/intern/cycles/kernel/integrator/init_from_camera.h
+++ b/intern/cycles/kernel/integrator/init_from_camera.h
@@ -23,31 +23,21 @@ ccl_device_inline void integrate_camera_sample(KernelGlobals kg,
                                               ccl_private Ray *ray)
 {
  /* Filter sampling. */
-  float filter_u, filter_v;
-
-  if (sample == 0) {
-    filter_u = 0.5f;
-    filter_v = 0.5f;
-  }
-  else {
-    path_rng_2D(kg, rng_hash, sample, PRNG_FILTER, &filter_u, &filter_v);
-  }
+  const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) :
+                                             path_rng_2D(kg, rng_hash, sample, PRNG_FILTER);

  /* Depth of field sampling. */
-  float lens_u = 0.0f, lens_v = 0.0f;
-  if (kernel_data.cam.aperturesize > 0.0f) {
-    path_rng_2D(kg, rng_hash, sample, PRNG_LENS, &lens_u, &lens_v);
-  }
+  const float2 rand_lens = (kernel_data.cam.aperturesize > 0.0f) ?
+                               path_rng_2D(kg, rng_hash, sample, PRNG_LENS) :
+                               zero_float2();

  /* Motion blur time sampling. */
-  float time = 0.0f;
-#ifdef __CAMERA_MOTION__
-  if (kernel_data.cam.shuttertime != -1.0f)
-    time = path_rng_1D(kg, rng_hash, sample, PRNG_TIME);
-#endif
+  const float rand_time = (kernel_data.cam.shuttertime != -1.0f) ?
+                              path_rng_1D(kg, rng_hash, sample, PRNG_TIME) :
+                              0.0f;

  /* Generate camera ray. */
-  camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
+  camera_sample(kg, x, y, rand_filter.x, rand_filter.y, rand_lens.x, rand_lens.y, rand_time, ray);
 }

 /* Return false to indicate that this pixel is finished.
--- a/intern/cycles/kernel/integrator/mnee.h
+++ b/intern/cycles/kernel/integrator/mnee.h
@@ -1033,10 +1033,12 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg,
          float2 h = zero_float2();
          if (microfacet_bsdf->alpha_x > 0.f && microfacet_bsdf->alpha_y > 0.f) {
            /* Sample transmissive microfacet bsdf. */
-            float bsdf_u, bsdf_v;
-            path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF, &bsdf_u, &bsdf_v);
-            h = mnee_sample_bsdf_dh(
-                bsdf->type, microfacet_bsdf->alpha_x, microfacet_bsdf->alpha_y, bsdf_u, bsdf_v);
+            const float2 bsdf_uv = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF);
+            h = mnee_sample_bsdf_dh(bsdf->type,
+                                    microfacet_bsdf->alpha_x,
+                                    microfacet_bsdf->alpha_y,
+                                    bsdf_uv.x,
+                                    bsdf_uv.y);
          }

          /* Setup differential geometry on vertex. */
--- a/intern/cycles/kernel/integrator/path_state.h
+++ b/intern/cycles/kernel/integrator/path_state.h
@@ -298,27 +298,25 @@ ccl_device_inline void shadow_path_state_rng_load(ConstIntegratorShadowState sta

 ccl_device_inline float path_state_rng_1D(KernelGlobals kg,
                                          ccl_private const RNGState *rng_state,
-                                          int dimension)
+                                          const int dimension)
 {
  return path_rng_1D(
      kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension);
 }

-ccl_device_inline void path_state_rng_2D(KernelGlobals kg,
-                                         ccl_private const RNGState *rng_state,
-                                         int dimension,
-                                         ccl_private float *fx,
-                                         ccl_private float *fy)
+ccl_device_inline float2 path_state_rng_2D(KernelGlobals kg,
+                                           ccl_private const RNGState *rng_state,
+                                           const int dimension)
 {
-  path_rng_2D(
-      kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension, fx, fy);
+  return path_rng_2D(
+      kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension);
 }

 ccl_device_inline float path_branched_rng_1D(KernelGlobals kg,
                                             ccl_private const RNGState *rng_state,
-                                             int branch,
-                                             int num_branches,
-                                             int dimension)
+                                             const int branch,
+                                             const int num_branches,
+                                             const int dimension)
 {
  return path_rng_1D(kg,
                     rng_state->rng_hash,
@@ -326,20 +324,16 @@ ccl_device_inline float path_branched_rng_1D(KernelGlobals kg,
                     rng_state->rng_offset + dimension);
 }

-ccl_device_inline void path_branched_rng_2D(KernelGlobals kg,
-                                            ccl_private const RNGState *rng_state,
-                                            int branch,
-                                            int num_branches,
-                                            int dimension,
-                                            ccl_private float *fx,
-                                            ccl_private float *fy)
+ccl_device_inline float2 path_branched_rng_2D(KernelGlobals kg,
+                                              ccl_private const RNGState *rng_state,
+                                              const int branch,
+                                              const int num_branches,
+                                              const int dimension)
 {
-  path_rng_2D(kg,
-              rng_state->rng_hash,
-              rng_state->sample * num_branches + branch,
-              rng_state->rng_offset + dimension,
-              fx,
-              fy);
+  return path_rng_2D(kg,
+                     rng_state->rng_hash,
+                     rng_state->sample * num_branches + branch,
+                     rng_state->rng_offset + dimension);
 }

 /* Utility functions to get light termination value,
--- a/intern/cycles/kernel/integrator/shade_surface.h
+++ b/intern/cycles/kernel/integrator/shade_surface.h
@@ -155,11 +155,10 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
  {
    const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
    const uint bounce = INTEGRATOR_STATE(state, path, bounce);
-    float light_u, light_v;
-    path_state_rng_2D(kg, rng_state, PRNG_LIGHT, &light_u, &light_v);
+    const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT);

    if (!light_distribution_sample_from_position(
-            kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, &ls)) {
+            kg, rand_light.x, rand_light.y, sd->time, sd->P, bounce, path_flag, &ls)) {
      return;
    }
  }
@@ -347,9 +346,8 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
    return LABEL_NONE;
  }

-  float bsdf_u, bsdf_v;
-  path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF, &bsdf_u, &bsdf_v);
-  ccl_private const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u);
+  float2 rand_bsdf = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF);
+  ccl_private const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &rand_bsdf);

 #ifdef __SUBSURFACE__
  /* BSSRDF closure, we schedule subsurface intersection kernel. */
@@ -364,8 +362,7 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
  float3 bsdf_omega_in ccl_optional_struct_init;
  int label;

-  label = shader_bsdf_sample_closure(
-      kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_pdf);
+  label = shader_bsdf_sample_closure(kg, sd, sc, rand_bsdf, &bsdf_eval, &bsdf_omega_in, &bsdf_pdf);

  if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) {
    return LABEL_NONE;
@@ -456,8 +453,7 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
    return;
  }

-  float bsdf_u, bsdf_v;
-  path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF, &bsdf_u, &bsdf_v);
+  const float2 rand_bsdf = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF);

  float3 ao_N;
  const Spectrum ao_weight = shader_bsdf_ao(
@@ -465,7 +461,7 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,

  float3 ao_D;
  float ao_pdf;
-  sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+  sample_cos_hemisphere(ao_N, rand_bsdf.x, rand_bsdf.y, &ao_D, &ao_pdf);

  bool skip_self = true;

--- a/intern/cycles/kernel/integrator/shade_volume.h
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -694,11 +694,10 @@ ccl_device_forceinline bool integrate_volume_sample_light(
  /* Sample position on a light. */
  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
  const uint bounce = INTEGRATOR_STATE(state, path, bounce);
-  float light_u, light_v;
-  path_state_rng_2D(kg, rng_state, PRNG_LIGHT, &light_u, &light_v);
+  const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT);

  if (!light_distribution_sample_from_volume_segment(
-          kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, ls)) {
+          kg, rand_light.x, rand_light.y, sd->time, sd->P, bounce, path_flag, ls)) {
    return false;
  }

@@ -735,11 +734,10 @@ ccl_device_forceinline void integrate_volume_direct_light(
  {
    const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
    const uint bounce = INTEGRATOR_STATE(state, path, bounce);
-    float light_u, light_v;
-    path_state_rng_2D(kg, rng_state, PRNG_LIGHT, &light_u, &light_v);
+    const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT);

    if (!light_distribution_sample_from_position(
-            kg, light_u, light_v, sd->time, P, bounce, path_flag, ls)) {
+            kg, rand_light.x, rand_light.y, sd->time, P, bounce, path_flag, ls)) {
      return;
    }
  }
@@ -864,8 +862,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
 {
  PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INDIRECT_LIGHT);

-  float phase_u, phase_v;
-  path_state_rng_2D(kg, rng_state, PRNG_VOLUME_PHASE, &phase_u, &phase_v);
+  const float2 rand_phase = path_state_rng_2D(kg, rng_state, PRNG_VOLUME_PHASE);

  /* Phase closure, sample direction. */
  float phase_pdf;
@@ -873,7 +870,7 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
  float3 phase_omega_in ccl_optional_struct_init;

  const int label = shader_volume_phase_sample(
-      kg, sd, phases, phase_u, phase_v, &phase_eval, &phase_omega_in, &phase_pdf);
+      kg, sd, phases, rand_phase, &phase_eval, &phase_omega_in, &phase_pdf);

  if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) {
    return false;
--- a/intern/cycles/kernel/integrator/shader_eval.h
+++ b/intern/cycles/kernel/integrator/shader_eval.h
@@ -267,7 +267,7 @@ ccl_device_inline

 /* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */
 ccl_device_inline ccl_private const ShaderClosure *shader_bsdf_bssrdf_pick(
-    ccl_private const ShaderData *ccl_restrict sd, ccl_private float *randu)
+    ccl_private const ShaderData *ccl_restrict sd, ccl_private float2 *rand_bsdf)
 {
  int sampled = 0;

@@ -283,7 +283,7 @@ ccl_device_inline ccl_private const ShaderClosure *shader_bsdf_bssrdf_pick(
      }
    }

-    float r = (*randu) * sum;
+    float r = (*rand_bsdf).x * sum;
    float partial_sum = 0.0f;

    for (int i = 0; i < sd->num_closure; i++) {
@@ -296,7 +296,7 @@ ccl_device_inline ccl_private const ShaderClosure *shader_bsdf_bssrdf_pick(
          sampled = i;

          /* Rescale to reuse for direction sample, to better preserve stratification. */
-          *randu = (r - partial_sum) / sc->sample_weight;
+          (*rand_bsdf).x = (r - partial_sum) / sc->sample_weight;
          break;
        }

@@ -335,8 +335,7 @@ shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd,
 ccl_device int shader_bsdf_sample_closure(KernelGlobals kg,
                                          ccl_private ShaderData *sd,
                                          ccl_private const ShaderClosure *sc,
-                                          float randu,
-                                          float randv,
+                                          const float2 rand_bsdf,
                                          ccl_private BsdfEval *bsdf_eval,
                                          ccl_private float3 *omega_in,
                                          ccl_private float *pdf)
@@ -348,7 +347,7 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals kg,
  Spectrum eval = zero_spectrum();

  *pdf = 0.0f;
-  label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, pdf);
+  label = bsdf_sample(kg, sd, sc, rand_bsdf.x, rand_bsdf.y, &eval, omega_in, pdf);

  if (*pdf != 0.0f) {
    bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight);
@@ -703,8 +702,7 @@ ccl_device float shader_volume_phase_eval(KernelGlobals kg,
 ccl_device int shader_volume_phase_sample(KernelGlobals kg,
                                          ccl_private const ShaderData *sd,
                                          ccl_private const ShaderVolumePhases *phases,
-                                          float randu,
-                                          float randv,
+                                          float2 rand_phase,
                                          ccl_private BsdfEval *phase_eval,
                                          ccl_private float3 *omega_in,
                                          ccl_private float *pdf)
@@ -720,7 +718,7 @@ ccl_device int shader_volume_phase_sample(KernelGlobals kg,
      sum += svc->sample_weight;
    }

-    float r = randu * sum;
+    float r = rand_phase.x * sum;
    float partial_sum = 0.0f;

    for (sampled = 0; sampled < phases->num_closure; sampled++) {
@@ -729,7 +727,7 @@ ccl_device int shader_volume_phase_sample(KernelGlobals kg,

      if (r <= next_sum) {
        /* Rescale to reuse for BSDF direction sample. */
-        randu = (r - partial_sum) / svc->sample_weight;
+        rand_phase.x = (r - partial_sum) / svc->sample_weight;
        break;
      }

@@ -749,7 +747,7 @@ ccl_device int shader_volume_phase_sample(KernelGlobals kg,
  Spectrum eval = zero_spectrum();

  *pdf = 0.0f;
-  label = volume_phase_sample(sd, svc, randu, randv, &eval, omega_in, pdf);
+  label = volume_phase_sample(sd, svc, rand_phase.x, rand_phase.y, &eval, omega_in, pdf);

  if (*pdf != 0.0f) {
    bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
@@ -761,8 +759,7 @@ ccl_device int shader_volume_phase_sample(KernelGlobals kg,
 ccl_device int shader_phase_sample_closure(KernelGlobals kg,
                                           ccl_private const ShaderData *sd,
                                           ccl_private const ShaderVolumeClosure *sc,
-                                           float randu,
-                                           float randv,
+                                           const float2 rand_phase,
                                           ccl_private BsdfEval *phase_eval,
                                           ccl_private float3 *omega_in,
                                           ccl_private float *pdf)
@@ -771,7 +768,7 @@ ccl_device int shader_phase_sample_closure(KernelGlobals kg,
  Spectrum eval = zero_spectrum();

  *pdf = 0.0f;
-  label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, pdf);
+  label = volume_phase_sample(sd, sc, rand_phase.x, rand_phase.y, &eval, omega_in, pdf);

  if (*pdf != 0.0f)
    bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
--- a/intern/cycles/kernel/integrator/subsurface_disk.h
+++ b/intern/cycles/kernel/integrator/subsurface_disk.h
@@ -25,8 +25,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
                                       ccl_private LocalIntersection &ss_isect)

 {
-  float disk_u, disk_v;
-  path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_DISK, &disk_u, &disk_v);
+  float2 rand_disk = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_DISK);

  /* Read shading point info from integrator state. */
  const float3 P = INTEGRATOR_STATE(state, ray, P);
@@ -46,20 +45,20 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
  disk_N = Ng;
  make_orthonormals(disk_N, &disk_T, &disk_B);

-  if (disk_v < 0.5f) {
+  if (rand_disk.y < 0.5f) {
    pick_pdf_N = 0.5f;
    pick_pdf_T = 0.25f;
    pick_pdf_B = 0.25f;
-    disk_v *= 2.0f;
+    rand_disk.y *= 2.0f;
  }
-  else if (disk_v < 0.75f) {
+  else if (rand_disk.y < 0.75f) {
    float3 tmp = disk_N;
    disk_N = disk_T;
    disk_T = tmp;
    pick_pdf_N = 0.25f;
    pick_pdf_T = 0.5f;
    pick_pdf_B = 0.25f;
-    disk_v = (disk_v - 0.5f) * 4.0f;
+    rand_disk.y = (rand_disk.y - 0.5f) * 4.0f;
  }
  else {
    float3 tmp = disk_N;
@@ -68,14 +67,14 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
    pick_pdf_N = 0.25f;
    pick_pdf_T = 0.25f;
    pick_pdf_B = 0.5f;
-    disk_v = (disk_v - 0.75f) * 4.0f;
+    rand_disk.y = (rand_disk.y - 0.75f) * 4.0f;
  }

  /* Sample point on disk. */
-  float phi = M_2PI_F * disk_v;
+  float phi = M_2PI_F * rand_disk.y;
  float disk_height, disk_r;

-  bssrdf_sample(radius, disk_u, &disk_r, &disk_height);
+  bssrdf_sample(radius, rand_disk.x, &disk_r, &disk_height);

  float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;

@@ -163,7 +162,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
  }

  /* Use importance resampling, sampling one of the hits proportional to weight. */
-  const float r = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_DISK_RESAMPLE) * sum_weights;
+  const float rand_resample = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_DISK_RESAMPLE);
+  const float r = rand_resample * sum_weights;
  float partial_sum = 0.0f;

  for (int hit = 0; hit < num_eval_hits; hit++) {
--- a/intern/cycles/kernel/integrator/subsurface_random_walk.h
+++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h
@@ -165,8 +165,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
                                              ccl_private Ray &ray,
                                              ccl_private LocalIntersection &ss_isect)
 {
-  float bssrdf_u, bssrdf_v;
-  path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_BSDF, &bssrdf_u, &bssrdf_v);
+  const float2 rand_bsdf = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_BSDF);

  const float3 P = INTEGRATOR_STATE(state, ray, P);
  const float3 N = INTEGRATOR_STATE(state, ray, D);
@@ -179,7 +178,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
  /* Sample diffuse surface scatter into the object. */
  float3 D;
  float pdf;
-  sample_cos_hemisphere(-N, bssrdf_u, bssrdf_v, &D, &pdf);
+  sample_cos_hemisphere(-N, rand_bsdf.x, rand_bsdf.y, &D, &pdf);
  if (dot(-Ng, D) <= 0.0f) {
    return false;
  }
@@ -309,23 +308,23 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
      }

      /* Sample scattering direction. */
-      float scatter_u, scatter_v;
-      path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_BSDF, &scatter_u, &scatter_v);
+      const float2 rand_scatter = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_BSDF);
      float cos_theta;
      float hg_pdf;
      if (guided) {
-        cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, scatter_u);
+        cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, rand_scatter.x);
        /* The backwards guiding distribution is just mirrored along `sd->N`, so swapping the
         * sign here is enough to sample from that instead. */
        if (guide_backward) {
          cos_theta = -cos_theta;
        }
-        float3 newD = direction_from_cosine(N, cos_theta, scatter_v);
+        float3 newD = direction_from_cosine(N, cos_theta, rand_scatter.y);
        hg_pdf = single_peaked_henyey_greenstein(dot(ray.D, newD), anisotropy);
        ray.D = newD;
      }
      else {
-        float3 newD = henyey_greenstrein_sample(ray.D, anisotropy, scatter_u, scatter_v, &hg_pdf);
+        float3 newD = henyey_greenstrein_sample(
+            ray.D, anisotropy, rand_scatter.x, rand_scatter.y, &hg_pdf);
        cos_theta = dot(newD, N);
        ray.D = newD;
      }
--- a/intern/cycles/kernel/sample/jitter.h
+++ b/intern/cycles/kernel/sample/jitter.h
@@ -7,7 +7,10 @@
 #pragma once
 CCL_NAMESPACE_BEGIN

-ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uint dimension)
+ccl_device float pmj_sample_1D(KernelGlobals kg,
+                               uint sample,
+                               const uint rng_hash,
+                               const uint dimension)
 {
  uint seed = rng_hash;

@@ -22,20 +25,22 @@ ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uin
   * The funky sample mask stuff is to ensure that we only shuffle
   * *within* the current sample pattern, which is necessary to avoid
   * early repeat pattern use. */
-  uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
+  const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
  /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */
-  uint sample_mask = NUM_PMJ_SAMPLES - 1;
-  uint sample_shuffled = nested_uniform_scramble(sample, hash_wang_seeded_uint(dimension, seed));
+  const uint sample_mask = NUM_PMJ_SAMPLES - 1;
+  const uint sample_shuffled = nested_uniform_scramble(sample,
+                                                       hash_wang_seeded_uint(dimension, seed));
  sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);

  /* Fetch the sample. */
-  uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) % (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS);
+  const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) %
+                     (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS);
  float x = kernel_data_fetch(sample_pattern_lut, index * 2);

  /* Do limited Cranley-Patterson rotation when using scrambling distance. */
  if (kernel_data.integrator.scrambling_distance < 1.0f) {
-    float jitter_x = hash_wang_seeded_float(dimension, rng_hash) *
-                     kernel_data.integrator.scrambling_distance;
+    const float jitter_x = hash_wang_seeded_float(dimension, rng_hash) *
+                           kernel_data.integrator.scrambling_distance;
    x += jitter_x;
    x -= floorf(x);
  }
@@ -43,12 +48,10 @@ ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uin
  return x;
 }

-ccl_device void pmj_sample_2D(KernelGlobals kg,
-                              uint sample,
-                              uint rng_hash,
-                              uint dimension,
-                              ccl_private float *x,
-                              ccl_private float *y)
+ccl_device float2 pmj_sample_2D(KernelGlobals kg,
+                                uint sample,
+                                const uint rng_hash,
+                                const uint dimension)
 {
  uint seed = rng_hash;

@@ -63,28 +66,32 @@ ccl_device void pmj_sample_2D(KernelGlobals kg,
   * The funky sample mask stuff is to ensure that we only shuffle
   * *within* the current sample pattern, which is necessary to avoid
   * early repeat pattern use. */
-  uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
+  const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
  /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */
-  uint sample_mask = NUM_PMJ_SAMPLES - 1;
-  uint sample_shuffled = nested_uniform_scramble(sample, hash_wang_seeded_uint(dimension, seed));
+  const uint sample_mask = NUM_PMJ_SAMPLES - 1;
+  const uint sample_shuffled = nested_uniform_scramble(sample,
+                                                       hash_wang_seeded_uint(dimension, seed));
  sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);

  /* Fetch the sample. */
-  uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) % (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS);
-  (*x) = kernel_data_fetch(sample_pattern_lut, index * 2);
-  (*y) = kernel_data_fetch(sample_pattern_lut, index * 2 + 1);
+  const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) %
+                     (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS);
+  float x = kernel_data_fetch(sample_pattern_lut, index * 2);
+  float y = kernel_data_fetch(sample_pattern_lut, index * 2 + 1);

  /* Do limited Cranley-Patterson rotation when using scrambling distance. */
  if (kernel_data.integrator.scrambling_distance < 1.0f) {
-    float jitter_x = hash_wang_seeded_float(dimension, rng_hash) *
-                     kernel_data.integrator.scrambling_distance;
-    float jitter_y = hash_wang_seeded_float(dimension, rng_hash ^ 0xca0e1151) *
-                     kernel_data.integrator.scrambling_distance;
-    (*x) += jitter_x;
-    (*y) += jitter_y;
-    (*x) -= floorf(*x);
-    (*y) -= floorf(*y);
+    const float jitter_x = hash_wang_seeded_float(dimension, rng_hash) *
+                           kernel_data.integrator.scrambling_distance;
+    const float jitter_y = hash_wang_seeded_float(dimension, rng_hash ^ 0xca0e1151) *
+                           kernel_data.integrator.scrambling_distance;
+    x += jitter_x;
+    y += jitter_y;
+    x -= floorf(x);
+    y -= floorf(y);
  }
+
+  return make_float2(x, y);
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/sample/pattern.h
+++ b/intern/cycles/kernel/sample/pattern.h
@@ -30,24 +30,20 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals kg,
  }
 }

-ccl_device_forceinline void path_rng_2D(KernelGlobals kg,
-                                        uint rng_hash,
-                                        int sample,
-                                        int dimension,
-                                        ccl_private float *fx,
-                                        ccl_private float *fy)
+ccl_device_forceinline float2 path_rng_2D(KernelGlobals kg,
+                                          uint rng_hash,
+                                          int sample,
+                                          int dimension)
 {
 #ifdef __DEBUG_CORRELATION__
-  *fx = (float)drand48();
-  *fy = (float)drand48();
-  return;
+  return make_float2((float)drand48(), (float)drand48());
 #endif

  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
-    sobol_burley_sample_2D(sample, dimension, rng_hash, fx, fy);
+    return sobol_burley_sample_2D(sample, dimension, rng_hash);
  }
  else {
-    pmj_sample_2D(kg, sample, rng_hash, dimension, fx, fy);
+    return pmj_sample_2D(kg, sample, rng_hash, dimension);
  }
 }

--- a/intern/cycles/kernel/sample/sobol_burley.h
+++ b/intern/cycles/kernel/sample/sobol_burley.h
@@ -32,14 +32,16 @@ CCL_NAMESPACE_BEGIN
 * Note that the seed must be well randomized before being
 * passed to this function.
 */
-ccl_device_forceinline float sobol_burley(uint rev_bit_index, uint dimension, uint scramble_seed)
+ccl_device_forceinline float sobol_burley(uint rev_bit_index,
+                                          const uint dimension,
+                                          const uint scramble_seed)
 {
  uint result = 0;

  if (dimension == 0) {
-    // Fast-path for dimension 0, which is just Van der corput.
-    // This makes a notable difference in performance since we reuse
-    // dimensions for padding, and dimension 0 is reused the most.
+    /* Fast-path for dimension 0, which is just Van der corput.
+     * This makes a notable difference in performance since we reuse
+     * dimensions for padding, and dimension 0 is reused the most. */
    result = reverse_integer_bits(rev_bit_index);
  }
  else {
@@ -49,14 +51,14 @@ ccl_device_forceinline float sobol_burley(uint rev_bit_index, uint dimension, ui
      result ^= sobol_burley_table[dimension][i + j];
      i += j + 1;

-      // We can't do "<<= j + 1" because that can overflow the shift
-      // operator, which doesn't do what we need on at least x86.
+      /* We can't do "<<= j + 1" because that can overflow the shift
+       * operator, which doesn't do what we need on at least x86. */
      rev_bit_index <<= j;
      rev_bit_index <<= 1;
    }
  }

-  // Apply Owen scrambling.
+  /* Apply Owen scrambling. */
  result = reverse_integer_bits(reversed_bit_owen(result, scramble_seed));

  return uint_to_float_excl(result);
@@ -65,13 +67,13 @@ ccl_device_forceinline float sobol_burley(uint rev_bit_index, uint dimension, ui
 /*
 * Computes a 1D Owen-scrambled and shuffled Sobol sample.
 */
-ccl_device float sobol_burley_sample_1D(uint index, uint dimension, uint seed)
+ccl_device float sobol_burley_sample_1D(uint index, uint const dimension, uint seed)
 {
-  // Include the dimension in the seed, so we get decorrelated
-  // sequences for different dimensions via shuffling.
+  /* Include the dimension in the seed, so we get decorrelated
+   * sequences for different dimensions via shuffling. */
  seed ^= hash_hp_uint(dimension);

-  // Shuffle.
+  /* Shuffle. */
  index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xbff95bfe);

  return sobol_burley(index, 0, seed ^ 0x635c77bd);
@@ -80,64 +82,52 @@ ccl_device float sobol_burley_sample_1D(uint index, uint dimension, uint seed)
 /*
 * Computes a 2D Owen-scrambled and shuffled Sobol sample.
 */
-ccl_device void sobol_burley_sample_2D(
-    uint index, uint dimension_set, uint seed, ccl_private float *x, ccl_private float *y)
+ccl_device float2 sobol_burley_sample_2D(uint index, const uint dimension_set, uint seed)
 {
-  // Include the dimension set in the seed, so we get decorrelated
-  // sequences for different dimension sets via shuffling.
+  /* Include the dimension set in the seed, so we get decorrelated
+   * sequences for different dimension sets via shuffling. */
  seed ^= hash_hp_uint(dimension_set);

-  // Shuffle.
+  /* Shuffle. */
  index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xf8ade99a);

-  *x = sobol_burley(index, 0, seed ^ 0xe0aaaf76);
-  *y = sobol_burley(index, 1, seed ^ 0x94964d4e);
+  return make_float2(sobol_burley(index, 0, seed ^ 0xe0aaaf76),
+                     sobol_burley(index, 1, seed ^ 0x94964d4e));
 }

 /*
 * Computes a 3D Owen-scrambled and shuffled Sobol sample.
 */
-ccl_device void sobol_burley_sample_3D(uint index,
-                                       uint dimension_set,
-                                       uint seed,
-                                       ccl_private float *x,
-                                       ccl_private float *y,
-                                       ccl_private float *z)
+ccl_device float3 sobol_burley_sample_3D(uint index, const uint dimension_set, uint seed)
 {
-  // Include the dimension set in the seed, so we get decorrelated
-  // sequences for different dimension sets via shuffling.
+  /* Include the dimension set in the seed, so we get decorrelated
+   * sequences for different dimension sets via shuffling. */
  seed ^= hash_hp_uint(dimension_set);

-  // Shuffle.
+  /* Shuffle. */
  index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xcaa726ac);

-  *x = sobol_burley(index, 0, seed ^ 0x9e78e391);
-  *y = sobol_burley(index, 1, seed ^ 0x67c33241);
-  *z = sobol_burley(index, 2, seed ^ 0x78c395c5);
+  return make_float3(sobol_burley(index, 0, seed ^ 0x9e78e391),
+                     sobol_burley(index, 1, seed ^ 0x67c33241),
+                     sobol_burley(index, 2, seed ^ 0x78c395c5));
 }

 /*
 * Computes a 4D Owen-scrambled and shuffled Sobol sample.
 */
-ccl_device void sobol_burley_sample_4D(uint index,
-                                       uint dimension_set,
-                                       uint seed,
-                                       ccl_private float *x,
-                                       ccl_private float *y,
-                                       ccl_private float *z,
-                                       ccl_private float *w)
+ccl_device float4 sobol_burley_sample_4D(uint index, const uint dimension_set, uint seed)
 {
-  // Include the dimension set in the seed, so we get decorrelated
-  // sequences for different dimension sets via shuffling.
+  /* Include the dimension set in the seed, so we get decorrelated
+   * sequences for different dimension sets via shuffling. */
  seed ^= hash_hp_uint(dimension_set);

-  // Shuffle.
+  /* Shuffle. */
  index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xc2c1a055);

-  *x = sobol_burley(index, 0, seed ^ 0x39468210);
-  *y = sobol_burley(index, 1, seed ^ 0xe9d8a845);
-  *z = sobol_burley(index, 2, seed ^ 0x5f32b482);
-  *w = sobol_burley(index, 3, seed ^ 0x1524cc56);
+  return make_float4(sobol_burley(index, 0, seed ^ 0x39468210),
+                     sobol_burley(index, 1, seed ^ 0xe9d8a845),
+                     sobol_burley(index, 2, seed ^ 0x5f32b482),
+                     sobol_burley(index, 3, seed ^ 0x1524cc56));
 }

 CCL_NAMESPACE_END
--- a/intern/cycles/kernel/svm/ao.h
+++ b/intern/cycles/kernel/svm/ao.h
@@ -49,10 +49,10 @@ ccl_device float svm_ao(

  int unoccluded = 0;
  for (int sample = 0; sample < num_samples; sample++) {
-    float disk_u, disk_v;
-    path_branched_rng_2D(kg, &rng_state, sample, num_samples, PRNG_SURFACE_AO, &disk_u, &disk_v);
+    const float2 rand_disk = path_branched_rng_2D(
+        kg, &rng_state, sample, num_samples, PRNG_SURFACE_AO);

-    float2 d = concentric_sample_disk(disk_u, disk_v);
+    float2 d = concentric_sample_disk(rand_disk.x, rand_disk.y);
    float3 D = make_float3(d.x, d.y, safe_sqrtf(1.0f - dot(d, d)));

    /* Create ray. */
--- a/intern/cycles/kernel/svm/bevel.h
+++ b/intern/cycles/kernel/svm/bevel.h
@@ -128,9 +128,8 @@ ccl_device float3 svm_bevel(
  path_state_rng_load(state, &rng_state);

  for (int sample = 0; sample < num_samples; sample++) {
-    float disk_u, disk_v;
-    path_branched_rng_2D(
-        kg, &rng_state, sample, num_samples, PRNG_SURFACE_BEVEL, &disk_u, &disk_v);
+    float2 rand_disk = path_branched_rng_2D(
+        kg, &rng_state, sample, num_samples, PRNG_SURFACE_BEVEL);

    /* Pick random axis in local frame and point on disk. */
    float3 disk_N, disk_T, disk_B;
@@ -139,13 +138,13 @@ ccl_device float3 svm_bevel(
    disk_N = sd->Ng;
    make_orthonormals(disk_N, &disk_T, &disk_B);

-    float axisu = disk_u;
+    float axisu = rand_disk.x;

    if (axisu < 0.5f) {
      pick_pdf_N = 0.5f;
      pick_pdf_T = 0.25f;
      pick_pdf_B = 0.25f;
-      disk_u *= 2.0f;
+      rand_disk.x *= 2.0f;
    }
    else if (axisu < 0.75f) {
      float3 tmp = disk_N;
@@ -154,7 +153,7 @@ ccl_device float3 svm_bevel(
      pick_pdf_N = 0.25f;
      pick_pdf_T = 0.5f;
      pick_pdf_B = 0.25f;
-      disk_u = (disk_u - 0.5f) * 4.0f;
+      rand_disk.x = (rand_disk.x - 0.5f) * 4.0f;
    }
    else {
      float3 tmp = disk_N;
@@ -163,12 +162,12 @@ ccl_device float3 svm_bevel(
      pick_pdf_N = 0.25f;
      pick_pdf_T = 0.25f;
      pick_pdf_B = 0.5f;
-      disk_u = (disk_u - 0.75f) * 4.0f;
+      rand_disk.x = (rand_disk.x - 0.75f) * 4.0f;
    }

    /* Sample point on disk. */
-    float phi = M_2PI_F * disk_u;
-    float disk_r = disk_v;
+    float phi = M_2PI_F * rand_disk.x;
+    float disk_r = rand_disk.y;
    float disk_height;

    /* Perhaps find something better than Cubic BSSRDF, but happens to work well. */