Work around what appears to be a compiler bug, just changing the code a bit without any functional changes.
		
			
				
	
	
		
			322 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			322 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|  * Copyright 2011-2013 Blender Foundation
 | |
|  *
 | |
|  * Licensed under the Apache License, Version 2.0 (the "License");
 | |
|  * you may not use this file except in compliance with the License.
 | |
|  * You may obtain a copy of the License at
 | |
|  *
 | |
|  * http://www.apache.org/licenses/LICENSE-2.0
 | |
|  *
 | |
|  * Unless required by applicable law or agreed to in writing, software
 | |
|  * distributed under the License is distributed on an "AS IS" BASIS,
 | |
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|  * See the License for the specific language governing permissions and
 | |
|  * limitations under the License.
 | |
|  */
 | |
| 
 | |
| #include "kernel/kernel_jitter.h"
 | |
| #include "util/util_hash.h"
 | |
| 
 | |
| CCL_NAMESPACE_BEGIN
 | |
| 
 | |
| /* Pseudo random numbers, uncomment this for debugging correlations. Only run
 | |
|  * this single threaded on a CPU for repeatable results. */
 | |
| //#define __DEBUG_CORRELATION__
 | |
| 
 | |
| /* High Dimensional Sobol.
 | |
|  *
 | |
|  * Multidimensional sobol with generator matrices. Dimension 0 and 1 are equal
 | |
|  * to classic Van der Corput and Sobol sequences. */
 | |
| 
 | |
| #ifdef __SOBOL__
 | |
| 
 | |
| /* Skip initial numbers that for some dimensions have clear patterns that
 | |
|  * don't cover the entire sample space. Ideally we would have a better
 | |
|  * progressive pattern that doesn't suffer from this problem, because even
 | |
|  * with this offset some dimensions are quite poor.
 | |
|  */
 | |
| #  define SOBOL_SKIP 64
 | |
| 
 | |
| ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension)
 | |
| {
 | |
|   uint result = 0;
 | |
|   uint i = index + SOBOL_SKIP;
 | |
|   for (int j = 0, x; (x = find_first_set(i)); i >>= x) {
 | |
|     j += x;
 | |
|     result ^= kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j - 1);
 | |
|   }
 | |
|   return result;
 | |
| }
 | |
| 
 | |
| #endif /* __SOBOL__ */
 | |
| 
 | |
| ccl_device_forceinline float path_rng_1D(
 | |
|     KernelGlobals *kg, uint rng_hash, int sample, int num_samples, int dimension)
 | |
| {
 | |
| #ifdef __DEBUG_CORRELATION__
 | |
|   return (float)drand48();
 | |
| #endif
 | |
|   if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) {
 | |
|     return pmj_sample_1D(kg, sample, rng_hash, dimension);
 | |
|   }
 | |
| #ifdef __CMJ__
 | |
| #  ifdef __SOBOL__
 | |
|   if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
 | |
| #  endif
 | |
|   {
 | |
|     /* Correlated multi-jitter. */
 | |
|     int p = rng_hash + dimension;
 | |
|     return cmj_sample_1D(sample, num_samples, p);
 | |
|   }
 | |
| #endif
 | |
| 
 | |
| #ifdef __SOBOL__
 | |
|   /* Sobol sequence value using direction vectors. */
 | |
|   uint result = sobol_dimension(kg, sample, dimension);
 | |
|   float r = (float)result * (1.0f / (float)0xFFFFFFFF);
 | |
| 
 | |
|   /* Cranly-Patterson rotation using rng seed */
 | |
|   float shift;
 | |
| 
 | |
|   /* Hash rng with dimension to solve correlation issues.
 | |
|    * See T38710, T50116.
 | |
|    */
 | |
|   uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
 | |
|   shift = tmp_rng * (1.0f / (float)0xFFFFFFFF);
 | |
| 
 | |
|   return r + shift - floorf(r + shift);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| ccl_device_forceinline void path_rng_2D(KernelGlobals *kg,
 | |
|                                         uint rng_hash,
 | |
|                                         int sample,
 | |
|                                         int num_samples,
 | |
|                                         int dimension,
 | |
|                                         float *fx,
 | |
|                                         float *fy)
 | |
| {
 | |
| #ifdef __DEBUG_CORRELATION__
 | |
|   *fx = (float)drand48();
 | |
|   *fy = (float)drand48();
 | |
|   return;
 | |
| #endif
 | |
|   if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) {
 | |
|     const float2 f = pmj_sample_2D(kg, sample, rng_hash, dimension);
 | |
|     *fx = f.x;
 | |
|     *fy = f.y;
 | |
|     return;
 | |
|   }
 | |
| #ifdef __CMJ__
 | |
| #  ifdef __SOBOL__
 | |
|   if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
 | |
| #  endif
 | |
|   {
 | |
|     /* Correlated multi-jitter. */
 | |
|     int p = rng_hash + dimension;
 | |
|     cmj_sample_2D(sample, num_samples, p, fx, fy);
 | |
|     return;
 | |
|   }
 | |
| #endif
 | |
| 
 | |
| #ifdef __SOBOL__
 | |
|   /* Sobol. */
 | |
|   *fx = path_rng_1D(kg, rng_hash, sample, num_samples, dimension);
 | |
|   *fy = path_rng_1D(kg, rng_hash, sample, num_samples, dimension + 1);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| ccl_device_inline void path_rng_init(KernelGlobals *kg,
 | |
|                                      int sample,
 | |
|                                      int num_samples,
 | |
|                                      uint *rng_hash,
 | |
|                                      int x,
 | |
|                                      int y,
 | |
|                                      float *fx,
 | |
|                                      float *fy)
 | |
| {
 | |
|   /* load state */
 | |
|   *rng_hash = hash_uint2(x, y);
 | |
|   *rng_hash ^= kernel_data.integrator.seed;
 | |
| 
 | |
| #ifdef __DEBUG_CORRELATION__
 | |
|   srand48(*rng_hash + sample);
 | |
| #endif
 | |
| 
 | |
|   if (sample == 0) {
 | |
|     *fx = 0.5f;
 | |
|     *fy = 0.5f;
 | |
|   }
 | |
|   else {
 | |
|     path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_FILTER_U, fx, fy);
 | |
|   }
 | |
| }
 | |
| 
 | |
| /* Linear Congruential Generator */
 | |
| 
 | |
| ccl_device uint lcg_step_uint(uint *rng)
 | |
| {
 | |
|   /* implicit mod 2^32 */
 | |
|   *rng = (1103515245 * (*rng) + 12345);
 | |
|   return *rng;
 | |
| }
 | |
| 
 | |
| ccl_device float lcg_step_float(uint *rng)
 | |
| {
 | |
|   /* implicit mod 2^32 */
 | |
|   *rng = (1103515245 * (*rng) + 12345);
 | |
|   return (float)*rng * (1.0f / (float)0xFFFFFFFF);
 | |
| }
 | |
| 
 | |
| ccl_device uint lcg_init(uint seed)
 | |
| {
 | |
|   uint rng = seed;
 | |
|   lcg_step_uint(&rng);
 | |
|   return rng;
 | |
| }
 | |
| 
 | |
| /* Path Tracing Utility Functions
 | |
|  *
 | |
|  * For each random number in each step of the path we must have a unique
 | |
|  * dimension to avoid using the same sequence twice.
 | |
|  *
 | |
|  * For branches in the path we must be careful not to reuse the same number
 | |
|  * in a sequence and offset accordingly.
 | |
|  */
 | |
| 
 | |
| ccl_device_inline float path_state_rng_1D(KernelGlobals *kg,
 | |
|                                           const ccl_addr_space PathState *state,
 | |
|                                           int dimension)
 | |
| {
 | |
|   return path_rng_1D(
 | |
|       kg, state->rng_hash, state->sample, state->num_samples, state->rng_offset + dimension);
 | |
| }
 | |
| 
 | |
| ccl_device_inline void path_state_rng_2D(
 | |
|     KernelGlobals *kg, const ccl_addr_space PathState *state, int dimension, float *fx, float *fy)
 | |
| {
 | |
|   path_rng_2D(kg,
 | |
|               state->rng_hash,
 | |
|               state->sample,
 | |
|               state->num_samples,
 | |
|               state->rng_offset + dimension,
 | |
|               fx,
 | |
|               fy);
 | |
| }
 | |
| 
 | |
| ccl_device_inline float path_state_rng_1D_hash(KernelGlobals *kg,
 | |
|                                                const ccl_addr_space PathState *state,
 | |
|                                                uint hash)
 | |
| {
 | |
|   /* Use a hash instead of dimension, this is not great but avoids adding
 | |
|    * more dimensions to each bounce which reduces quality of dimensions we
 | |
|    * are already using. */
 | |
|   return path_rng_1D(kg,
 | |
|                      cmj_hash_simple(state->rng_hash, hash),
 | |
|                      state->sample,
 | |
|                      state->num_samples,
 | |
|                      state->rng_offset);
 | |
| }
 | |
| 
 | |
| ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg,
 | |
|                                              uint rng_hash,
 | |
|                                              const ccl_addr_space PathState *state,
 | |
|                                              int branch,
 | |
|                                              int num_branches,
 | |
|                                              int dimension)
 | |
| {
 | |
|   return path_rng_1D(kg,
 | |
|                      rng_hash,
 | |
|                      state->sample * num_branches + branch,
 | |
|                      state->num_samples * num_branches,
 | |
|                      state->rng_offset + dimension);
 | |
| }
 | |
| 
 | |
| ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg,
 | |
|                                             uint rng_hash,
 | |
|                                             const ccl_addr_space PathState *state,
 | |
|                                             int branch,
 | |
|                                             int num_branches,
 | |
|                                             int dimension,
 | |
|                                             float *fx,
 | |
|                                             float *fy)
 | |
| {
 | |
|   path_rng_2D(kg,
 | |
|               rng_hash,
 | |
|               state->sample * num_branches + branch,
 | |
|               state->num_samples * num_branches,
 | |
|               state->rng_offset + dimension,
 | |
|               fx,
 | |
|               fy);
 | |
| }
 | |
| 
 | |
| /* Utility functions to get light termination value,
 | |
|  * since it might not be needed in many cases.
 | |
|  */
 | |
| ccl_device_inline float path_state_rng_light_termination(KernelGlobals *kg,
 | |
|                                                          const ccl_addr_space PathState *state)
 | |
| {
 | |
|   if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
 | |
|     return path_state_rng_1D(kg, state, PRNG_LIGHT_TERMINATE);
 | |
|   }
 | |
|   return 0.0f;
 | |
| }
 | |
| 
 | |
| ccl_device_inline float path_branched_rng_light_termination(KernelGlobals *kg,
 | |
|                                                             uint rng_hash,
 | |
|                                                             const ccl_addr_space PathState *state,
 | |
|                                                             int branch,
 | |
|                                                             int num_branches)
 | |
| {
 | |
|   if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
 | |
|     return path_branched_rng_1D(kg, rng_hash, state, branch, num_branches, PRNG_LIGHT_TERMINATE);
 | |
|   }
 | |
|   return 0.0f;
 | |
| }
 | |
| 
 | |
| ccl_device_inline uint lcg_state_init(PathState *state, uint scramble)
 | |
| {
 | |
|   return lcg_init(state->rng_hash + state->rng_offset + state->sample * scramble);
 | |
| }
 | |
| 
 | |
| ccl_device_inline uint lcg_state_init_addrspace(ccl_addr_space PathState *state, uint scramble)
 | |
| {
 | |
|   return lcg_init(state->rng_hash + state->rng_offset + state->sample * scramble);
 | |
| }
 | |
| 
 | |
| ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng)
 | |
| {
 | |
|   /* Implicit mod 2^32 */
 | |
|   *rng = (1103515245 * (*rng) + 12345);
 | |
|   return (float)*rng * (1.0f / (float)0xFFFFFFFF);
 | |
| }
 | |
| 
 | |
| ccl_device_inline bool sample_is_even(int pattern, int sample)
 | |
| {
 | |
|   if (pattern == SAMPLING_PATTERN_PMJ) {
 | |
|     /* See Section 10.2.1, "Progressive Multi-Jittered Sample Sequences", Christensen et al.
 | |
|      * We can use this to get divide sample sequence into two classes for easier variance
 | |
|      * estimation. */
 | |
| #if defined(__GNUC__) && !defined(__KERNEL_GPU__)
 | |
|     return __builtin_popcount(sample & 0xaaaaaaaa) & 1;
 | |
| #elif defined(__NVCC__)
 | |
|     return __popc(sample & 0xaaaaaaaa) & 1;
 | |
| #elif defined(__KERNEL_OPENCL__)
 | |
|     return popcount(sample & 0xaaaaaaaa) & 1;
 | |
| #else
 | |
|     /* TODO(Stefan): popcnt intrinsic for Windows with fallback for older CPUs. */
 | |
|     int i = sample & 0xaaaaaaaa;
 | |
|     i = i - ((i >> 1) & 0x55555555);
 | |
|     i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
 | |
|     i = (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
 | |
|     return i & 1;
 | |
| #endif
 | |
|   }
 | |
|   else {
 | |
|     /* TODO(Stefan): Are there reliable ways of dividing CMJ and Sobol into two classes? */
 | |
|     return sample & 0x1;
 | |
|   }
 | |
| }
 | |
| 
 | |
| CCL_NAMESPACE_END
 |