WIP: Cycles: Implement blue-noise dithered sampling #118479

Draft
Lukas Stockner wants to merge 1 commits from LukasStockner/blender:blue-noise-dithered into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
9 changed files with 151 additions and 35 deletions

View File

@ -71,8 +71,11 @@ enum_use_layer_samples = (
)
enum_sampling_pattern = (
('SOBOL_BURLEY', "Sobol-Burley", "Use on-the-fly computed Owen-scrambled Sobol for random sampling", 0),
('TABULATED_SOBOL', "Tabulated Sobol", "Use pre-computed tables of Owen-scrambled Sobol for random sampling", 1),
('BLUE_NOISE_PURE', "Blue-Noise (pure)", "Blue-Noise (pure)", 0),
('BLUE_NOISE_ROUND', "Blue-Noise (round)", "Blue-Noise (round)", 2),
('BLUE_NOISE_FIRST', "Blue-Noise (first)", "Blue-Noise (first)", 3),
('BLUE_NOISE_CASCADE', "Blue-Noise (cascade)", "Blue-Noise (cascade)", 4),
)
enum_emission_sampling = (

View File

@ -409,7 +409,7 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer,
/* Only use scrambling distance in the viewport if user wants to. */
bool preview_scrambling_distance = get_boolean(cscene, "preview_scrambling_distance");
if ((preview && !preview_scrambling_distance) ||
sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY)
sampling_pattern != SAMPLING_PATTERN_TABULATED_SOBOL)
{
scrambling_distance = 1.0f;
}

View File

@ -194,7 +194,7 @@ KERNEL_STRUCT_MEMBER(integrator, float, scrambling_distance)
KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
KERNEL_STRUCT_MEMBER(integrator, int, tabulated_sobol_sequence_size)
KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
KERNEL_STRUCT_MEMBER(integrator, int, sobol_index_mask)
KERNEL_STRUCT_MEMBER(integrator, int, blue_noise_sequence_length)
/* Volume render. */
KERNEL_STRUCT_MEMBER(integrator, int, use_volumes)
KERNEL_STRUCT_MEMBER(integrator, int, volume_max_steps)

View File

@ -141,6 +141,7 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
prim += kernel_data.bake.tri_offset;
/* Random number generator. */
// TODO
const uint rng_hash = hash_uint(seed) ^ kernel_data.integrator.seed;
const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) :

View File

@ -222,6 +222,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f));
/* Modify state for RNGs, decorrelated from other paths. */
// TODO
rng_state.rng_hash = hash_hp_seeded_uint(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef);
/* Random walk until we hit the surface again. */

View File

@ -37,13 +37,23 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals kg,
return (float)drand48();
#endif
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
const uint index_mask = kernel_data.integrator.sobol_index_mask;
return sobol_burley_sample_1D(sample, dimension, rng_hash, index_mask);
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_PURE) {
return sobol_burley_sample_1D(sample + rng_hash, dimension, 0, 0xffffffff);
}
else {
return tabulated_sobol_sample_1D(kg, sample, rng_hash, dimension);
else if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_CASCADE) {
sample += 1;
const uint base = prev_power_of_two(sample);
sample -= base;
rng_hash *= base;
return sobol_burley_sample_1D(sample + rng_hash, dimension, hash_hp_uint(base), 0xffffffff);
}
else if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_FIRST) {
if (sample == 0) {
return sobol_burley_sample_1D(rng_hash, dimension, 0, 0xffffffff);
}
sample -= 1;
}
return tabulated_sobol_sample_1D(kg, sample, rng_hash, dimension);
}
ccl_device_forceinline float2 path_rng_2D(KernelGlobals kg,
@ -55,13 +65,23 @@ ccl_device_forceinline float2 path_rng_2D(KernelGlobals kg,
return make_float2((float)drand48(), (float)drand48());
#endif
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
const uint index_mask = kernel_data.integrator.sobol_index_mask;
return sobol_burley_sample_2D(sample, dimension, rng_hash, index_mask);
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_PURE) {
return sobol_burley_sample_2D(sample + rng_hash, dimension, 0, 0xffffffff);
}
else {
return tabulated_sobol_sample_2D(kg, sample, rng_hash, dimension);
else if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_CASCADE) {
sample += 1;
const uint base = prev_power_of_two(sample);
sample -= base;
rng_hash *= base;
return sobol_burley_sample_2D(sample + rng_hash, dimension, hash_hp_uint(base), 0xffffffff);
}
else if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_FIRST) {
if (sample == 0) {
return sobol_burley_sample_2D(rng_hash, dimension, 0, 0xffffffff);
}
sample -= 1;
}
return tabulated_sobol_sample_2D(kg, sample, rng_hash, dimension);
}
ccl_device_forceinline float3 path_rng_3D(KernelGlobals kg,
@ -73,13 +93,23 @@ ccl_device_forceinline float3 path_rng_3D(KernelGlobals kg,
return make_float3((float)drand48(), (float)drand48(), (float)drand48());
#endif
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
const uint index_mask = kernel_data.integrator.sobol_index_mask;
return sobol_burley_sample_3D(sample, dimension, rng_hash, index_mask);
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_PURE) {
return sobol_burley_sample_3D(sample + rng_hash, dimension, 0, 0xffffffff);
}
else {
return tabulated_sobol_sample_3D(kg, sample, rng_hash, dimension);
else if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_CASCADE) {
sample += 1;
const uint base = prev_power_of_two(sample);
sample -= base;
rng_hash *= base;
return sobol_burley_sample_3D(sample + rng_hash, dimension, hash_hp_uint(base), 0xffffffff);
}
else if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_FIRST) {
if (sample == 0) {
return sobol_burley_sample_3D(rng_hash, dimension, 0, 0xffffffff);
}
sample -= 1;
}
return tabulated_sobol_sample_3D(kg, sample, rng_hash, dimension);
}
ccl_device_forceinline float4 path_rng_4D(KernelGlobals kg,
@ -91,13 +121,23 @@ ccl_device_forceinline float4 path_rng_4D(KernelGlobals kg,
return make_float4((float)drand48(), (float)drand48(), (float)drand48(), (float)drand48());
#endif
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
const uint index_mask = kernel_data.integrator.sobol_index_mask;
return sobol_burley_sample_4D(sample, dimension, rng_hash, index_mask);
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_PURE) {
return sobol_burley_sample_4D(sample + rng_hash, dimension, 0, 0xffffffff);
}
else {
return tabulated_sobol_sample_4D(kg, sample, rng_hash, dimension);
else if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_CASCADE) {
sample += 1;
const uint base = prev_power_of_two(sample);
sample -= base;
rng_hash *= base;
return sobol_burley_sample_4D(sample + rng_hash, dimension, hash_hp_uint(base), 0xffffffff);
}
else if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_FIRST) {
if (sample == 0) {
return sobol_burley_sample_4D(rng_hash, dimension, 0, 0xffffffff);
}
sample -= 1;
}
return tabulated_sobol_sample_4D(kg, sample, rng_hash, dimension);
}
/**
@ -132,12 +172,30 @@ ccl_device_inline uint path_rng_hash_init(KernelGlobals kg,
const int x,
const int y)
{
const uint rng_hash = hash_iqnt2d(x, y) ^ kernel_data.integrator.seed;
uint rng_hash = kernel_data.integrator.seed;
const uint pattern = kernel_data.integrator.sampling_pattern;
if ((pattern == SAMPLING_PATTERN_TABULATED_SOBOL) ||
(pattern == SAMPLING_PATTERN_BLUE_NOISE_FIRST && sample > 0))
{
rng_hash ^= hash_iqnt2d(x, y);
}
else {
/* Perform blue-noise dithered sampling by distributing the base sequence across pixels
* following a hierarchically shuffled 2D morton curve.
* Based on:
* https://psychopath.io/post/2022_07_24_owen_scrambling_based_dithered_blue_noise_sampling.
*/
rng_hash = nested_uniform_scramble_base4(morton2d(x, y), rng_hash);
/* Each pixel gets N samples from the sequence, so we offset the samples that this pixel
* uses by pixel_index * N.
* rng_hash here is used to track the offset into the sequence, it's not a hash anymore. */
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_PURE) {
rng_hash *= kernel_data.integrator.blue_noise_sequence_length;
}
}
#ifdef __DEBUG_CORRELATION__
srand48(rng_hash + sample);
#else
(void)sample;
#endif
return rng_hash;

View File

@ -25,6 +25,25 @@ ccl_device_inline uint reversed_bit_owen(uint n, uint seed)
return n;
}
/*
* Performs base-4 Owen scrambling on a reversed-bit unsigned integer.
*
* See https://psychopath.io/post/2022_08_14_a_fast_hash_for_base_4_owen_scrambling
*/

I suspect this is just a copy/paste oversight, but just want to note that this bit:

This is equivalent to the Laine-Karras permutation, but much higher quality.

Is not true of the base-4 hash. It is not equivalent to the Laine-Karras permutation (which is base 2), and is also not especially high quality, as I outlined in the linked post.

I suspect this is just a copy/paste oversight, but just want to note that this bit: > This is equivalent to the Laine-Karras permutation, but much higher quality. Is not true of the base-4 hash. It is not equivalent to the Laine-Karras permutation (which is base 2), and is also not especially high quality, as I outlined in the linked post.
ccl_device_inline uint reversed_bit_owen_base4(uint n, uint seed)
{
n ^= n * 0x3d20adea;
n ^= (n >> 1) & (n << 1) & 0x55555555;
n += seed;
n *= (seed >> 16) | 1;
n ^= (n >> 1) & (n << 1) & 0x55555555;
n ^= n * 0x05526c56;
n ^= n * 0x53a22864;
return n;
}
/*
* Performs base-2 Owen scrambling on an unsigned integer.
*/
@ -33,4 +52,27 @@ ccl_device_inline uint nested_uniform_scramble(uint i, uint seed)
return reverse_integer_bits(reversed_bit_owen(reverse_integer_bits(i), seed));
}
/*
* Performs base-4 Owen scrambling on an unsigned integer.
*/
ccl_device_inline uint nested_uniform_scramble_base4(uint i, uint seed)
{
return reverse_integer_bits(reversed_bit_owen_base4(reverse_integer_bits(i), seed));
}
ccl_device_inline uint expand_bits(uint x)
{
x &= 0x0000ffff;
x = (x ^ (x << 8)) & 0x00ff00ff;
x = (x ^ (x << 4)) & 0x0f0f0f0f;
x = (x ^ (x << 2)) & 0x33333333;
x = (x ^ (x << 1)) & 0x55555555;
return x;
}
ccl_device_inline uint morton2d(uint x, uint y)
{
return (expand_bits(x) << 1) | expand_bits(y);
}
CCL_NAMESPACE_END

View File

@ -334,8 +334,11 @@ enum PathTraceDimension {
};
enum SamplingPattern {
SAMPLING_PATTERN_SOBOL_BURLEY = 0,
SAMPLING_PATTERN_BLUE_NOISE_PURE = 0,
SAMPLING_PATTERN_TABULATED_SOBOL = 1,
SAMPLING_PATTERN_BLUE_NOISE_CASCADE = 2,
SAMPLING_PATTERN_BLUE_NOISE_FIRST = 3,
SAMPLING_PATTERN_BLUE_NOISE_ROUND = 4,
SAMPLING_NUM_PATTERNS,
};

View File

@ -119,8 +119,11 @@ NODE_DEFINE(Integrator)
SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.0f);
static NodeEnum sampling_pattern_enum;
sampling_pattern_enum.insert("sobol_burley", SAMPLING_PATTERN_SOBOL_BURLEY);
sampling_pattern_enum.insert("blue_noise_pure", SAMPLING_PATTERN_BLUE_NOISE_PURE);
sampling_pattern_enum.insert("tabulated_sobol", SAMPLING_PATTERN_TABULATED_SOBOL);
sampling_pattern_enum.insert("blue_noise_cascade", SAMPLING_PATTERN_BLUE_NOISE_CASCADE);
sampling_pattern_enum.insert("blue_noise_round", SAMPLING_PATTERN_BLUE_NOISE_ROUND);
sampling_pattern_enum.insert("blue_noise_first", SAMPLING_PATTERN_BLUE_NOISE_FIRST);
SOCKET_ENUM(sampling_pattern,
"Sampling Pattern",
sampling_pattern_enum,
@ -273,7 +276,13 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
kintegrator->sampling_pattern = sampling_pattern;
kintegrator->scrambling_distance = scrambling_distance;
kintegrator->sobol_index_mask = reverse_integer_bits(next_power_of_two(aa_samples - 1) - 1);
kintegrator->blue_noise_sequence_length = aa_samples;
if (kintegrator->sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_ROUND) {
if (!is_power_of_two(aa_samples)) {
kintegrator->blue_noise_sequence_length = next_power_of_two(aa_samples);
}
kintegrator->sampling_pattern = SAMPLING_PATTERN_BLUE_NOISE_PURE;
}
/* NOTE: The kintegrator->use_light_tree is assigned to the efficient value in the light manager,
* and the synchronization code is expected to tag the light manager for update when the
@ -288,17 +297,16 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
/* Build pre-tabulated Sobol samples if needed. */
int sequence_size = clamp(
next_power_of_two(aa_samples - 1), MIN_TAB_SOBOL_SAMPLES, MAX_TAB_SOBOL_SAMPLES);
if (kintegrator->sampling_pattern == SAMPLING_PATTERN_TABULATED_SOBOL &&
dscene->sample_pattern_lut.size() !=
(sequence_size * NUM_TAB_SOBOL_PATTERNS * NUM_TAB_SOBOL_DIMENSIONS))
{
bool use_tabulated_sobol = (kintegrator->sampling_pattern == SAMPLING_PATTERN_TABULATED_SOBOL) ||
(kintegrator->sampling_pattern == SAMPLING_PATTERN_BLUE_NOISE_FIRST);
const int table_size = sequence_size * NUM_TAB_SOBOL_PATTERNS * NUM_TAB_SOBOL_DIMENSIONS;
if (use_tabulated_sobol && dscene->sample_pattern_lut.size() != table_size) {
kintegrator->tabulated_sobol_sequence_size = sequence_size;
if (dscene->sample_pattern_lut.size() != 0) {
dscene->sample_pattern_lut.free();
}
float4 *directions = (float4 *)dscene->sample_pattern_lut.alloc(
sequence_size * NUM_TAB_SOBOL_PATTERNS * NUM_TAB_SOBOL_DIMENSIONS);
float4 *directions = (float4 *)dscene->sample_pattern_lut.alloc(table_size);
TaskPool pool;
for (int j = 0; j < NUM_TAB_SOBOL_PATTERNS; ++j) {
float4 *sequence = directions + j * sequence_size;