| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | /*
 | 
					
						
							| 
									
										
										
										
											2013-08-18 14:16:15 +00:00
										 |  |  |  * Copyright 2011-2013 Blender Foundation | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2013-08-18 14:16:15 +00:00
										 |  |  |  * Licensed under the Apache License, Version 2.0 (the "License"); | 
					
						
							|  |  |  |  * you may not use this file except in compliance with the License. | 
					
						
							|  |  |  |  * You may obtain a copy of the License at | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2013-08-18 14:16:15 +00:00
										 |  |  |  * http://www.apache.org/licenses/LICENSE-2.0
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2013-08-18 14:16:15 +00:00
										 |  |  |  * Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  |  * distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  |  * See the License for the specific language governing permissions and | 
					
						
							| 
									
										
										
										
											2014-12-25 02:50:24 +01:00
										 |  |  |  * limitations under the License. | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * ShaderData, used in four steps: | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Setup from incoming ray, sampled position and background. | 
					
						
							|  |  |  |  * Execute for surface, volume or displacement. | 
					
						
							|  |  |  |  * Evaluate one or more closures. | 
					
						
							|  |  |  |  * Release. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | #include "closure/alloc.h"
 | 
					
						
							| 
									
										
										
										
											2012-12-15 10:18:42 +00:00
										 |  |  | #include "closure/bsdf_util.h"
 | 
					
						
							| 
									
										
										
										
											2012-10-20 12:18:00 +00:00
										 |  |  | #include "closure/bsdf.h"
 | 
					
						
							|  |  |  | #include "closure/emissive.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #include "svm/svm.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CCL_NAMESPACE_BEGIN | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* ShaderData setup from incoming ray */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-10-17 22:48:29 +00:00
										 |  |  | #ifdef __OBJECT_MOTION__
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time) | 
					
						
							| 
									
										
										
										
											2012-10-17 22:48:29 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2015-05-14 18:46:26 +05:00
										 |  |  | 	if(ccl_fetch(sd, flag) & SD_OBJECT_MOTION) { | 
					
						
							|  |  |  | 		ccl_fetch(sd, ob_tfm) = object_fetch_transform_motion(kg, ccl_fetch(sd, object), time); | 
					
						
							|  |  |  | 		ccl_fetch(sd, ob_itfm) = transform_quick_inverse(ccl_fetch(sd, ob_tfm)); | 
					
						
							| 
									
										
										
										
											2012-10-17 22:48:29 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	else { | 
					
						
							| 
									
										
										
										
											2015-05-14 18:46:26 +05:00
										 |  |  | 		ccl_fetch(sd, ob_tfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_TRANSFORM); | 
					
						
							|  |  |  | 		ccl_fetch(sd, ob_itfm) = object_fetch_transform(kg, ccl_fetch(sd, object), OBJECT_INVERSE_TRANSFORM); | 
					
						
							| 
									
										
										
										
											2012-10-17 22:48:29 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-01-14 14:53:05 +05:00
										 |  |  | ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg, | 
					
						
							|  |  |  |                                                ShaderData *sd, | 
					
						
							|  |  |  |                                                const Intersection *isect, | 
					
						
							|  |  |  |                                                const Ray *ray) | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | { | 
					
						
							|  |  |  | #ifdef __INSTANCING__
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, object) = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2012-10-15 21:12:58 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, type) = isect->type; | 
					
						
							|  |  |  | 	ccl_fetch(sd, flag) = kernel_tex_fetch(__object_flag, ccl_fetch(sd, object)); | 
					
						
							| 
									
										
										
										
											2012-12-31 13:07:06 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-04-30 12:49:26 +00:00
										 |  |  | 	/* matrices and time */ | 
					
						
							| 
									
										
										
										
											2012-10-09 18:37:14 +00:00
										 |  |  | #ifdef __OBJECT_MOTION__
 | 
					
						
							| 
									
										
										
										
											2012-10-17 22:48:29 +00:00
										 |  |  | 	shader_setup_object_transforms(kg, sd, ray->time); | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, time) = ray->time; | 
					
						
							| 
									
										
										
										
											2012-04-30 12:49:26 +00:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, prim) = kernel_tex_fetch(__prim_index, isect->prim); | 
					
						
							|  |  |  | 	ccl_fetch(sd, ray_length) = isect->t; | 
					
						
							| 
									
										
										
										
											2012-04-30 12:49:26 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | #ifdef __UV__
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, u) = isect->u; | 
					
						
							|  |  |  | 	ccl_fetch(sd, v) = isect->v; | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-12-28 14:21:30 +00:00
										 |  |  | #ifdef __HAIR__
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | 		/* curve */ | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); | 
					
						
							| 
									
										
										
										
											2012-12-28 14:21:30 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		ccl_fetch(sd, shader) = __float_as_int(curvedata.z); | 
					
						
							|  |  |  | 		ccl_fetch(sd, P) = bvh_curve_refine(kg, sd, isect, ray); | 
					
						
							| 
									
										
										
										
											2012-12-28 14:21:30 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | 	else | 
					
						
							| 
									
										
										
										
											2012-12-28 14:21:30 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) { | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:47 +01:00
										 |  |  | 		/* static triangle */ | 
					
						
							| 
									
										
										
										
											2014-07-23 12:54:49 +06:00
										 |  |  | 		float3 Ng = triangle_normal(kg, sd); | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		ccl_fetch(sd, shader) = kernel_tex_fetch(__tri_shader, ccl_fetch(sd, prim)); | 
					
						
							| 
									
										
										
										
											2012-12-28 14:21:30 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		/* vectors */ | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		ccl_fetch(sd, P) = triangle_refine(kg, sd, isect, ray); | 
					
						
							|  |  |  | 		ccl_fetch(sd, Ng) = Ng; | 
					
						
							|  |  |  | 		ccl_fetch(sd, N) = Ng; | 
					
						
							| 
									
										
										
										
											2012-12-28 14:21:30 +00:00
										 |  |  | 		 | 
					
						
							|  |  |  | 		/* smooth normal */ | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) | 
					
						
							|  |  |  | 			ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v)); | 
					
						
							| 
									
										
										
										
											2011-09-27 20:37:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #ifdef __DPDU__
 | 
					
						
							| 
									
										
										
										
											2012-12-28 14:21:30 +00:00
										 |  |  | 		/* dPdu/dPdv */ | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv)); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2012-12-28 14:21:30 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:47 +01:00
										 |  |  | 	else { | 
					
						
							|  |  |  | 		/* motion triangle */ | 
					
						
							|  |  |  | 		motion_triangle_shader_setup(kg, sd, isect, ray, false); | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2012-12-28 14:21:30 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, I) = -ray->D; | 
					
						
							| 
									
										
										
										
											2012-12-28 14:21:30 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, flag) |= kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2); | 
					
						
							| 
									
										
										
										
											2012-12-28 14:21:30 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #ifdef __INSTANCING__
 | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:47 +01:00
										 |  |  | 	if(isect->object != OBJECT_NONE) { | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 		/* instance transform */ | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N)); | 
					
						
							|  |  |  | 		object_normal_transform_auto(kg, sd, &ccl_fetch(sd, Ng)); | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  ifdef __DPDU__
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu)); | 
					
						
							|  |  |  | 		object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv)); | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  endif
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* backfacing test */ | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	if(backfacing) { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		ccl_fetch(sd, flag) |= SD_BACKFACING; | 
					
						
							|  |  |  | 		ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng); | 
					
						
							|  |  |  | 		ccl_fetch(sd, N) = -ccl_fetch(sd, N); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #ifdef __DPDU__
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu); | 
					
						
							|  |  |  | 		ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __RAY_DIFFERENTIALS__
 | 
					
						
							|  |  |  | 	/* differentials */ | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	differential_transfer(&ccl_fetch(sd, dP), ray->dP, ray->D, ray->dD, ccl_fetch(sd, Ng), isect->t); | 
					
						
							|  |  |  | 	differential_incoming(&ccl_fetch(sd, dI), ray->dD); | 
					
						
							|  |  |  | 	differential_dudv(&ccl_fetch(sd, du), &ccl_fetch(sd, dv), ccl_fetch(sd, dPdu), ccl_fetch(sd, dPdv), ccl_fetch(sd, dP), ccl_fetch(sd, Ng)); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-04-01 20:26:52 +00:00
										 |  |  | /* ShaderData setup from BSSRDF scatter */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __SUBSURFACE__
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData *sd, | 
					
						
							| 
									
										
										
										
											2013-04-01 20:26:52 +00:00
										 |  |  | 	const Intersection *isect, const Ray *ray) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	bool backfacing = sd->flag & SD_BACKFACING; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* object, matrices, time, ray_length stay the same */ | 
					
						
							|  |  |  | 	sd->flag = kernel_tex_fetch(__object_flag, sd->object); | 
					
						
							|  |  |  | 	sd->prim = kernel_tex_fetch(__prim_index, isect->prim); | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | 	sd->type = isect->type; | 
					
						
							| 
									
										
										
										
											2013-04-01 20:26:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  ifdef __UV__
 | 
					
						
							| 
									
										
										
										
											2013-08-18 14:15:57 +00:00
										 |  |  | 	sd->u = isect->u; | 
					
						
							|  |  |  | 	sd->v = isect->v; | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  endif
 | 
					
						
							| 
									
										
										
										
											2013-04-01 20:26:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | 	/* fetch triangle data */ | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:47 +01:00
										 |  |  | 	if(sd->type == PRIMITIVE_TRIANGLE) { | 
					
						
							| 
									
										
										
										
											2014-07-23 12:54:49 +06:00
										 |  |  | 		float3 Ng = triangle_normal(kg, sd); | 
					
						
							| 
									
										
										
										
											2015-04-07 00:25:54 +05:00
										 |  |  | 		sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		/* static triangle */ | 
					
						
							|  |  |  | 		sd->P = triangle_refine_subsurface(kg, sd, isect, ray); | 
					
						
							|  |  |  | 		sd->Ng = Ng; | 
					
						
							|  |  |  | 		sd->N = Ng; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if(sd->shader & SHADER_SMOOTH_NORMAL) | 
					
						
							|  |  |  | 			sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v); | 
					
						
							| 
									
										
										
										
											2013-04-01 20:26:52 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  ifdef __DPDU__
 | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | 		/* dPdu/dPdv */ | 
					
						
							|  |  |  | 		triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  endif
 | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:47 +01:00
										 |  |  | 	else { | 
					
						
							|  |  |  | 		/* motion triangle */ | 
					
						
							|  |  |  | 		motion_triangle_shader_setup(kg, sd, isect, ray, true); | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2013-04-01 20:26:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  ifdef __INSTANCING__
 | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:47 +01:00
										 |  |  | 	if(isect->object != OBJECT_NONE) { | 
					
						
							| 
									
										
										
										
											2013-04-01 20:26:52 +00:00
										 |  |  | 		/* instance transform */ | 
					
						
							|  |  |  | 		object_normal_transform(kg, sd, &sd->N); | 
					
						
							|  |  |  | 		object_normal_transform(kg, sd, &sd->Ng); | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #    ifdef __DPDU__
 | 
					
						
							| 
									
										
										
										
											2013-04-01 20:26:52 +00:00
										 |  |  | 		object_dir_transform(kg, sd, &sd->dPdu); | 
					
						
							|  |  |  | 		object_dir_transform(kg, sd, &sd->dPdv); | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #    endif
 | 
					
						
							| 
									
										
										
										
											2013-04-01 20:26:52 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  endif
 | 
					
						
							| 
									
										
										
										
											2013-04-01 20:26:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/* backfacing test */ | 
					
						
							|  |  |  | 	if(backfacing) { | 
					
						
							|  |  |  | 		sd->flag |= SD_BACKFACING; | 
					
						
							|  |  |  | 		sd->Ng = -sd->Ng; | 
					
						
							|  |  |  | 		sd->N = -sd->N; | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  ifdef __DPDU__
 | 
					
						
							| 
									
										
										
										
											2013-04-01 20:26:52 +00:00
										 |  |  | 		sd->dPdu = -sd->dPdu; | 
					
						
							|  |  |  | 		sd->dPdv = -sd->dPdv; | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  endif
 | 
					
						
							| 
									
										
										
										
											2013-04-01 20:26:52 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* should not get used in principle as the shading will only use a diffuse
 | 
					
						
							|  |  |  | 	 * BSDF, but the shader might still access it */ | 
					
						
							|  |  |  | 	sd->I = sd->N; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  ifdef __RAY_DIFFERENTIALS__
 | 
					
						
							| 
									
										
										
										
											2013-04-01 20:26:52 +00:00
										 |  |  | 	/* differentials */ | 
					
						
							|  |  |  | 	differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng); | 
					
						
							|  |  |  | 	/* don't modify dP and dI */ | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  endif
 | 
					
						
							| 
									
										
										
										
											2013-04-01 20:26:52 +00:00
										 |  |  | } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | /* ShaderData setup from position sampled on mesh */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-03 15:00:55 +01:00
										 |  |  | ccl_device void shader_setup_from_sample(KernelGlobals *kg, | 
					
						
							|  |  |  |                                          ShaderData *sd, | 
					
						
							|  |  |  |                                          const float3 P, | 
					
						
							|  |  |  |                                          const float3 Ng, | 
					
						
							|  |  |  |                                          const float3 I, | 
					
						
							|  |  |  |                                          int shader, int object, int prim, | 
					
						
							|  |  |  |                                          float u, float v, float t, | 
					
						
							|  |  |  |                                          float time) | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | { | 
					
						
							|  |  |  | 	/* vectors */ | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, P) = P; | 
					
						
							|  |  |  | 	ccl_fetch(sd, N) = Ng; | 
					
						
							|  |  |  | 	ccl_fetch(sd, Ng) = Ng; | 
					
						
							|  |  |  | 	ccl_fetch(sd, I) = I; | 
					
						
							|  |  |  | 	ccl_fetch(sd, shader) = shader; | 
					
						
							|  |  |  | 	ccl_fetch(sd, type) = (prim == PRIM_NONE)? PRIMITIVE_NONE: PRIMITIVE_TRIANGLE; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/* primitive */ | 
					
						
							|  |  |  | #ifdef __INSTANCING__
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, object) = object; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2013-09-15 23:58:00 +00:00
										 |  |  | 	/* currently no access to bvh prim index for strand sd->prim*/ | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, prim) = prim; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #ifdef __UV__
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, u) = u; | 
					
						
							|  |  |  | 	ccl_fetch(sd, v) = v; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, ray_length) = t; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/* detect instancing, for non-instanced the object index is -object-1 */ | 
					
						
							| 
									
										
										
										
											2011-08-10 14:26:51 +00:00
										 |  |  | #ifdef __INSTANCING__
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 	bool instanced = false; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	if(ccl_fetch(sd, prim) != PRIM_NONE) { | 
					
						
							|  |  |  | 		if(ccl_fetch(sd, object) >= 0) | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 			instanced = true; | 
					
						
							|  |  |  | 		else | 
					
						
							| 
									
										
										
										
											2011-08-10 14:26:51 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 			ccl_fetch(sd, object) = ~ccl_fetch(sd, object); | 
					
						
							| 
									
										
										
										
											2011-08-10 14:26:51 +00:00
										 |  |  | #ifdef __INSTANCING__
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2011-08-10 14:26:51 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2); | 
					
						
							|  |  |  | 	if(ccl_fetch(sd, object) != OBJECT_NONE) { | 
					
						
							|  |  |  | 		ccl_fetch(sd, flag) |= kernel_tex_fetch(__object_flag, ccl_fetch(sd, object)); | 
					
						
							| 
									
										
										
										
											2012-10-15 21:12:58 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-10-09 18:37:14 +00:00
										 |  |  | #ifdef __OBJECT_MOTION__
 | 
					
						
							| 
									
										
										
										
											2012-10-17 22:48:29 +00:00
										 |  |  | 		shader_setup_object_transforms(kg, sd, time); | 
					
						
							| 
									
										
										
										
											2012-10-15 21:12:58 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2012-04-30 12:49:26 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, time) = time; | 
					
						
							| 
									
										
										
										
											2012-10-16 13:20:57 +00:00
										 |  |  | #else
 | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2012-04-30 12:49:26 +00:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	if(ccl_fetch(sd, type) & PRIMITIVE_TRIANGLE) { | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | 		/* smooth normal */ | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) { | 
					
						
							|  |  |  | 			ccl_fetch(sd, N) = triangle_smooth_normal(kg, ccl_fetch(sd, prim), ccl_fetch(sd, u), ccl_fetch(sd, v)); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | #ifdef __INSTANCING__
 | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | 			if(instanced) | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 				object_normal_transform_auto(kg, sd, &ccl_fetch(sd, N)); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | 		/* dPdu/dPdv */ | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #ifdef __DPDU__
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		triangle_dPdudv(kg, ccl_fetch(sd, prim), &ccl_fetch(sd, dPdu), &ccl_fetch(sd, dPdv)); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  ifdef __INSTANCING__
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 		if(instanced) { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 			object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdu)); | 
					
						
							|  |  |  | 			object_dir_transform_auto(kg, sd, &ccl_fetch(sd, dPdv)); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  endif
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | 	else { | 
					
						
							|  |  |  | #ifdef __DPDU__
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							|  |  |  | 		ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/* backfacing test */ | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	if(ccl_fetch(sd, prim) != PRIM_NONE) { | 
					
						
							|  |  |  | 		bool backfacing = (dot(ccl_fetch(sd, Ng), ccl_fetch(sd, I)) < 0.0f); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		if(backfacing) { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 			ccl_fetch(sd, flag) |= SD_BACKFACING; | 
					
						
							|  |  |  | 			ccl_fetch(sd, Ng) = -ccl_fetch(sd, Ng); | 
					
						
							|  |  |  | 			ccl_fetch(sd, N) = -ccl_fetch(sd, N); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #ifdef __DPDU__
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 			ccl_fetch(sd, dPdu) = -ccl_fetch(sd, dPdu); | 
					
						
							|  |  |  | 			ccl_fetch(sd, dPdv) = -ccl_fetch(sd, dPdv); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __RAY_DIFFERENTIALS__
 | 
					
						
							|  |  |  | 	/* no ray differentials here yet */ | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, dP) = differential3_zero(); | 
					
						
							|  |  |  | 	ccl_fetch(sd, dI) = differential3_zero(); | 
					
						
							|  |  |  | 	ccl_fetch(sd, du) = differential_zero(); | 
					
						
							|  |  |  | 	ccl_fetch(sd, dv) = differential_zero(); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* ShaderData setup for displacement */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd, | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 	int object, int prim, float u, float v) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							|  |  |  | 	int shader; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-08-13 16:19:12 +06:00
										 |  |  | 	triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/* force smooth shading for displacement */ | 
					
						
							| 
									
										
										
										
											2011-11-22 13:15:19 +00:00
										 |  |  | 	shader |= SHADER_SMOOTH_NORMAL; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/* watch out: no instance transform currently */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-01-06 23:38:13 +01:00
										 |  |  | 	shader_setup_from_sample(kg, sd, P, Ng, I, shader, object, prim, u, v, 0.0f, TIME_INVALID); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* ShaderData setup from ray into background */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-01-06 23:38:13 +01:00
										 |  |  | ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray) | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | { | 
					
						
							|  |  |  | 	/* vectors */ | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, P) = ray->D; | 
					
						
							|  |  |  | 	ccl_fetch(sd, N) = -ray->D; | 
					
						
							|  |  |  | 	ccl_fetch(sd, Ng) = -ray->D; | 
					
						
							|  |  |  | 	ccl_fetch(sd, I) = -ray->D; | 
					
						
							|  |  |  | 	ccl_fetch(sd, shader) = kernel_data.background.surface_shader; | 
					
						
							|  |  |  | 	ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2); | 
					
						
							| 
									
										
										
										
											2012-10-09 18:37:14 +00:00
										 |  |  | #ifdef __OBJECT_MOTION__
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, time) = ray->time; | 
					
						
							| 
									
										
										
										
											2012-04-30 12:49:26 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, ray_length) = 0.0f; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | #ifdef __INSTANCING__
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, object) = PRIM_NONE; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, prim) = PRIM_NONE; | 
					
						
							| 
									
										
										
										
											2013-12-28 16:56:19 +01:00
										 |  |  | #ifdef __UV__
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, u) = 0.0f; | 
					
						
							|  |  |  | 	ccl_fetch(sd, v) = 0.0f; | 
					
						
							| 
									
										
										
										
											2013-12-28 16:56:19 +01:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __DPDU__
 | 
					
						
							|  |  |  | 	/* dPdu/dPdv */ | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, dPdu) = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							|  |  |  | 	ccl_fetch(sd, dPdv) = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							| 
									
										
										
										
											2013-12-28 16:56:19 +01:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __RAY_DIFFERENTIALS__
 | 
					
						
							|  |  |  | 	/* differentials */ | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, dP) = ray->dD; | 
					
						
							|  |  |  | 	differential_incoming(&ccl_fetch(sd, dI), ccl_fetch(sd, dP)); | 
					
						
							|  |  |  | 	ccl_fetch(sd, du) = differential_zero(); | 
					
						
							|  |  |  | 	ccl_fetch(sd, dv) = differential_zero(); | 
					
						
							| 
									
										
										
										
											2013-12-28 16:56:19 +01:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* ShaderData setup from point inside volume */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | #ifdef __VOLUME__
 | 
					
						
							| 
									
										
										
										
											2016-01-06 23:38:13 +01:00
										 |  |  | ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray) | 
					
						
							| 
									
										
										
										
											2013-12-28 16:56:19 +01:00
										 |  |  | { | 
					
						
							|  |  |  | 	/* vectors */ | 
					
						
							|  |  |  | 	sd->P = ray->P; | 
					
						
							|  |  |  | 	sd->N = -ray->D;   | 
					
						
							|  |  |  | 	sd->Ng = -ray->D; | 
					
						
							|  |  |  | 	sd->I = -ray->D; | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:47 +01:00
										 |  |  | 	sd->shader = SHADER_NONE; | 
					
						
							| 
									
										
										
										
											2013-12-28 23:00:51 +01:00
										 |  |  | 	sd->flag = 0; | 
					
						
							| 
									
										
										
										
											2013-12-28 16:56:19 +01:00
										 |  |  | #ifdef __OBJECT_MOTION__
 | 
					
						
							|  |  |  | 	sd->time = ray->time; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 	sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __INSTANCING__
 | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:47 +01:00
										 |  |  | 	sd->object = PRIM_NONE; /* todo: fill this for texture coordinates */ | 
					
						
							| 
									
										
										
										
											2013-12-28 16:56:19 +01:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:47 +01:00
										 |  |  | 	sd->prim = PRIM_NONE; | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | 	sd->type = PRIMITIVE_NONE; | 
					
						
							| 
									
										
										
										
											2013-12-28 16:56:19 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #ifdef __UV__
 | 
					
						
							|  |  |  | 	sd->u = 0.0f; | 
					
						
							|  |  |  | 	sd->v = 0.0f; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __DPDU__
 | 
					
						
							|  |  |  | 	/* dPdu/dPdv */ | 
					
						
							|  |  |  | 	sd->dPdu = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							|  |  |  | 	sd->dPdv = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __RAY_DIFFERENTIALS__
 | 
					
						
							|  |  |  | 	/* differentials */ | 
					
						
							|  |  |  | 	sd->dP = ray->dD; | 
					
						
							|  |  |  | 	differential_incoming(&sd->dI, sd->dP); | 
					
						
							| 
									
										
										
										
											2013-05-03 21:34:51 +00:00
										 |  |  | 	sd->du = differential_zero(); | 
					
						
							|  |  |  | 	sd->dv = differential_zero(); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2013-06-08 10:51:33 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/* for NDC coordinates */ | 
					
						
							|  |  |  | 	sd->ray_P = ray->P; | 
					
						
							|  |  |  | 	sd->ray_dP = ray->dP; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | } | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | /* Merging */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
 | 
					
						
							|  |  |  | ccl_device void shader_merge_closures(ShaderData *sd) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	/* merge identical closures, better when we sample a single closure at a time */ | 
					
						
							|  |  |  | 	for(int i = 0; i < sd->num_closure; i++) { | 
					
						
							|  |  |  | 		ShaderClosure *sci = &sd->closure[i]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		for(int j = i + 1; j < sd->num_closure; j++) { | 
					
						
							|  |  |  | 			ShaderClosure *scj = &sd->closure[j]; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 			if(sci->type != scj->type) | 
					
						
							| 
									
										
										
										
											2014-04-03 15:51:43 +02:00
										 |  |  | 				continue; | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 			if(!bsdf_merge(sci, scj)) | 
					
						
							| 
									
										
											  
											
												Cycles: Add multi-scattering, energy-conserving GGX as an option to the Glossy, Anisotropic and Glass BSDFs
This commit adds a new distribution to the Glossy, Anisotropic and Glass BSDFs that implements the
multiple-scattering microfacet model described in the paper "Multiple-Scattering Microfacet BSDFs with the Smith Model".
Essentially, the improvement is that unlike classical GGX, which only models single scattering and assumes
the contribution of multiple bounces to be zero, this new model performs a random walk on the microsurface until
the ray leaves it again, which ensures perfect energy conservation.
In practise, this means that the "darkening problem" - GGX materials becoming darker with increasing
roughness - is solved in a physically correct and efficient way.
The downside of this model is that it has no (known) analytic expression for evalation. However, it can be
evaluated stochastically, and although the correct PDF isn't known either, the properties of MIS and the
balance heuristic guarantee an unbiased result at the cost of slightly higher noise.
Reviewers: dingto, #cycles, brecht
Reviewed By: dingto, #cycles, brecht
Subscribers: bliblubli, ace_dragon, gregzaal, brecht, harvester, dingto, marcog, swerner, jtheninja, Blendify, nutel
Differential Revision: https://developer.blender.org/D2002
											
										 
											2016-06-23 22:56:43 +02:00
										 |  |  | 				continue; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-04-03 15:51:43 +02:00
										 |  |  | 			sci->weight += scj->weight; | 
					
						
							|  |  |  | 			sci->sample_weight += scj->sample_weight; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			int size = sd->num_closure - (j+1); | 
					
						
							|  |  |  | 			if(size > 0) { | 
					
						
							|  |  |  | 				for(int k = 0; k < size; k++) { | 
					
						
							|  |  |  | 					scj[k] = scj[k+1]; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			sd->num_closure--; | 
					
						
							| 
									
										
										
										
											2015-06-13 18:17:16 +02:00
										 |  |  | 			kernel_assert(sd->num_closure >= 0); | 
					
						
							| 
									
										
										
										
											2014-04-03 15:51:43 +02:00
										 |  |  | 			j--; | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | /* BSDF */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: Add multi-scattering, energy-conserving GGX as an option to the Glossy, Anisotropic and Glass BSDFs
This commit adds a new distribution to the Glossy, Anisotropic and Glass BSDFs that implements the
multiple-scattering microfacet model described in the paper "Multiple-Scattering Microfacet BSDFs with the Smith Model".
Essentially, the improvement is that unlike classical GGX, which only models single scattering and assumes
the contribution of multiple bounces to be zero, this new model performs a random walk on the microsurface until
the ray leaves it again, which ensures perfect energy conservation.
In practise, this means that the "darkening problem" - GGX materials becoming darker with increasing
roughness - is solved in a physically correct and efficient way.
The downside of this model is that it has no (known) analytic expression for evalation. However, it can be
evaluated stochastically, and although the correct PDF isn't known either, the properties of MIS and the
balance heuristic guarantee an unbiased result at the cost of slightly higher noise.
Reviewers: dingto, #cycles, brecht
Reviewed By: dingto, #cycles, brecht
Subscribers: bliblubli, ace_dragon, gregzaal, brecht, harvester, dingto, marcog, swerner, jtheninja, Blendify, nutel
Differential Revision: https://developer.blender.org/D2002
											
										 
											2016-06-23 22:56:43 +02:00
										 |  |  | ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, ShaderData *sd, const float3 omega_in, float *pdf, | 
					
						
							| 
									
										
										
										
											2012-12-15 10:18:42 +00:00
										 |  |  | 	int skip_bsdf, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight) | 
					
						
							| 
									
										
										
										
											2012-09-03 13:56:40 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2013-08-18 14:15:57 +00:00
										 |  |  | 	/* this is the veach one-sample model with balance heuristic, some pdf
 | 
					
						
							|  |  |  | 	 * factors drop out when using balance heuristic weighting */ | 
					
						
							| 
									
										
										
										
											2015-06-29 00:56:04 +02:00
										 |  |  | 	for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { | 
					
						
							| 
									
										
										
										
											2012-09-03 13:56:40 +00:00
										 |  |  | 		if(i == skip_bsdf) | 
					
						
							|  |  |  | 			continue; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		const ShaderClosure *sc = ccl_fetch_array(sd, closure, i); | 
					
						
							| 
									
										
										
										
											2012-09-03 13:56:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		if(CLOSURE_IS_BSDF(sc->type)) { | 
					
						
							|  |  |  | 			float bsdf_pdf = 0.0f; | 
					
						
							| 
									
										
										
										
											2012-12-15 10:18:42 +00:00
										 |  |  | 			float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf); | 
					
						
							| 
									
										
										
										
											2012-09-03 13:56:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 			if(bsdf_pdf != 0.0f) { | 
					
						
							| 
									
										
										
										
											2012-12-15 10:18:42 +00:00
										 |  |  | 				bsdf_eval_accum(result_eval, sc->type, eval*sc->weight); | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 				sum_pdf += bsdf_pdf*sc->sample_weight; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			sum_sample_weight += sc->sample_weight; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-01-31 15:59:30 +00:00
										 |  |  | 	*pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-20 17:44:19 +01:00
										 |  |  | #ifdef __BRANCHED_PATH__
 | 
					
						
							|  |  |  | ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg, | 
					
						
							| 
									
										
											  
											
												Cycles: Add multi-scattering, energy-conserving GGX as an option to the Glossy, Anisotropic and Glass BSDFs
This commit adds a new distribution to the Glossy, Anisotropic and Glass BSDFs that implements the
multiple-scattering microfacet model described in the paper "Multiple-Scattering Microfacet BSDFs with the Smith Model".
Essentially, the improvement is that unlike classical GGX, which only models single scattering and assumes
the contribution of multiple bounces to be zero, this new model performs a random walk on the microsurface until
the ray leaves it again, which ensures perfect energy conservation.
In practise, this means that the "darkening problem" - GGX materials becoming darker with increasing
roughness - is solved in a physically correct and efficient way.
The downside of this model is that it has no (known) analytic expression for evalation. However, it can be
evaluated stochastically, and although the correct PDF isn't known either, the properties of MIS and the
balance heuristic guarantee an unbiased result at the cost of slightly higher noise.
Reviewers: dingto, #cycles, brecht
Reviewed By: dingto, #cycles, brecht
Subscribers: bliblubli, ace_dragon, gregzaal, brecht, harvester, dingto, marcog, swerner, jtheninja, Blendify, nutel
Differential Revision: https://developer.blender.org/D2002
											
										 
											2016-06-23 22:56:43 +02:00
										 |  |  |                                                         ShaderData *sd, | 
					
						
							| 
									
										
										
										
											2015-11-20 17:44:19 +01:00
										 |  |  |                                                         const float3 omega_in, | 
					
						
							|  |  |  |                                                         BsdfEval *result_eval, | 
					
						
							|  |  |  |                                                         float light_pdf, | 
					
						
							|  |  |  |                                                         bool use_mis) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { | 
					
						
							|  |  |  | 		const ShaderClosure *sc = ccl_fetch_array(sd, closure, i); | 
					
						
							|  |  |  | 		if(CLOSURE_IS_BSDF(sc->type)) { | 
					
						
							|  |  |  | 			float bsdf_pdf = 0.0f; | 
					
						
							|  |  |  | 			float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf); | 
					
						
							|  |  |  | 			if(bsdf_pdf != 0.0f) { | 
					
						
							|  |  |  | 				float mis_weight = use_mis? power_heuristic(light_pdf, bsdf_pdf): 1.0f; | 
					
						
							|  |  |  | 				bsdf_eval_accum(result_eval, | 
					
						
							|  |  |  | 				                sc->type, | 
					
						
							|  |  |  | 				                eval * sc->weight * mis_weight); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ccl_device void shader_bsdf_eval(KernelGlobals *kg, | 
					
						
							|  |  |  |                                  ShaderData *sd, | 
					
						
							|  |  |  |                                  const float3 omega_in, | 
					
						
							|  |  |  |                                  BsdfEval *eval, | 
					
						
							|  |  |  |                                  float light_pdf, | 
					
						
							|  |  |  |                                  bool use_mis) | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
											
												Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
  Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
											
										 
											2012-01-25 17:23:52 +00:00
										 |  |  | 	bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-11-20 17:44:19 +01:00
										 |  |  | #ifdef __BRANCHED_PATH__
 | 
					
						
							|  |  |  | 	if(kernel_data.integrator.branched) | 
					
						
							|  |  |  | 		_shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis); | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		float pdf; | 
					
						
							|  |  |  | 		_shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, -1, eval, 0.0f, 0.0f); | 
					
						
							|  |  |  | 		if(use_mis) { | 
					
						
							|  |  |  | 			float weight = power_heuristic(light_pdf, pdf); | 
					
						
							|  |  |  | 			bsdf_eval_mul(eval, make_float3(weight, weight, weight)); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: Add multi-scattering, energy-conserving GGX as an option to the Glossy, Anisotropic and Glass BSDFs
This commit adds a new distribution to the Glossy, Anisotropic and Glass BSDFs that implements the
multiple-scattering microfacet model described in the paper "Multiple-Scattering Microfacet BSDFs with the Smith Model".
Essentially, the improvement is that unlike classical GGX, which only models single scattering and assumes
the contribution of multiple bounces to be zero, this new model performs a random walk on the microsurface until
the ray leaves it again, which ensures perfect energy conservation.
In practise, this means that the "darkening problem" - GGX materials becoming darker with increasing
roughness - is solved in a physically correct and efficient way.
The downside of this model is that it has no (known) analytic expression for evalation. However, it can be
evaluated stochastically, and although the correct PDF isn't known either, the properties of MIS and the
balance heuristic guarantee an unbiased result at the cost of slightly higher noise.
Reviewers: dingto, #cycles, brecht
Reviewed By: dingto, #cycles, brecht
Subscribers: bliblubli, ace_dragon, gregzaal, brecht, harvester, dingto, marcog, swerner, jtheninja, Blendify, nutel
Differential Revision: https://developer.blender.org/D2002
											
										 
											2016-06-23 22:56:43 +02:00
										 |  |  | ccl_device int shader_bsdf_sample(KernelGlobals *kg, ShaderData *sd, | 
					
						
							| 
									
										
										
											
												Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
  Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
											
										 
											2012-01-25 17:23:52 +00:00
										 |  |  | 	float randu, float randv, BsdfEval *bsdf_eval, | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 	float3 *omega_in, differential3 *domega_in, float *pdf) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int sampled = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	if(ccl_fetch(sd, num_closure) > 1) { | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 		/* pick a BSDF closure based on sample weights */ | 
					
						
							|  |  |  | 		float sum = 0.0f; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) { | 
					
						
							|  |  |  | 			const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled); | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 			 | 
					
						
							|  |  |  | 			if(CLOSURE_IS_BSDF(sc->type)) | 
					
						
							|  |  |  | 				sum += sc->sample_weight; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		float r = ccl_fetch(sd, randb_closure)*sum; | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 		sum = 0.0f; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		for(sampled = 0; sampled < ccl_fetch(sd, num_closure); sampled++) { | 
					
						
							|  |  |  | 			const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled); | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 			 | 
					
						
							|  |  |  | 			if(CLOSURE_IS_BSDF(sc->type)) { | 
					
						
							| 
									
										
										
										
											2013-04-01 20:26:52 +00:00
										 |  |  | 				sum += sc->sample_weight; | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 				if(r <= sum) | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		if(sampled == ccl_fetch(sd, num_closure)) { | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 			*pdf = 0.0f; | 
					
						
							|  |  |  | 			return LABEL_NONE; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	const ShaderClosure *sc = ccl_fetch_array(sd, closure, sampled); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 	int label; | 
					
						
							| 
									
										
										
											
												Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
  Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
											
										 
											2012-01-25 17:23:52 +00:00
										 |  |  | 	float3 eval; | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	*pdf = 0.0f; | 
					
						
							| 
									
										
										
										
											2012-12-15 10:18:42 +00:00
										 |  |  | 	label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); | 
					
						
							| 
									
										
										
										
											2012-09-03 13:56:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-02-07 17:32:01 +00:00
										 |  |  | 	if(*pdf != 0.0f) { | 
					
						
							|  |  |  | 		bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		if(ccl_fetch(sd, num_closure) > 1) { | 
					
						
							| 
									
										
										
										
											2012-02-07 17:32:01 +00:00
										 |  |  | 			float sweight = sc->sample_weight; | 
					
						
							| 
									
										
										
										
											2012-12-15 10:18:42 +00:00
										 |  |  | 			_shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sampled, bsdf_eval, *pdf*sweight, sweight); | 
					
						
							| 
									
										
										
										
											2012-02-07 17:32:01 +00:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return label; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: Add multi-scattering, energy-conserving GGX as an option to the Glossy, Anisotropic and Glass BSDFs
This commit adds a new distribution to the Glossy, Anisotropic and Glass BSDFs that implements the
multiple-scattering microfacet model described in the paper "Multiple-Scattering Microfacet BSDFs with the Smith Model".
Essentially, the improvement is that unlike classical GGX, which only models single scattering and assumes
the contribution of multiple bounces to be zero, this new model performs a random walk on the microsurface until
the ray leaves it again, which ensures perfect energy conservation.
In practise, this means that the "darkening problem" - GGX materials becoming darker with increasing
roughness - is solved in a physically correct and efficient way.
The downside of this model is that it has no (known) analytic expression for evalation. However, it can be
evaluated stochastically, and although the correct PDF isn't known either, the properties of MIS and the
balance heuristic guarantee an unbiased result at the cost of slightly higher noise.
Reviewers: dingto, #cycles, brecht
Reviewed By: dingto, #cycles, brecht
Subscribers: bliblubli, ace_dragon, gregzaal, brecht, harvester, dingto, marcog, swerner, jtheninja, Blendify, nutel
Differential Revision: https://developer.blender.org/D2002
											
										 
											2016-06-23 22:56:43 +02:00
										 |  |  | ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd, | 
					
						
							| 
									
										
										
										
											2012-06-13 11:44:48 +00:00
										 |  |  | 	const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval, | 
					
						
							|  |  |  | 	float3 *omega_in, differential3 *domega_in, float *pdf) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int label; | 
					
						
							|  |  |  | 	float3 eval; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	*pdf = 0.0f; | 
					
						
							| 
									
										
										
										
											2012-12-15 10:18:42 +00:00
										 |  |  | 	label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); | 
					
						
							| 
									
										
										
										
											2012-09-03 13:56:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-06-13 11:44:48 +00:00
										 |  |  | 	if(*pdf != 0.0f) | 
					
						
							|  |  |  | 		bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return label; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness) | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2015-06-29 00:56:04 +02:00
										 |  |  | 	for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		ShaderClosure *sc = ccl_fetch_array(sd, closure, i); | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-12-15 10:18:42 +00:00
										 |  |  | 		if(CLOSURE_IS_BSDF(sc->type)) | 
					
						
							|  |  |  | 			bsdf_blur(kg, sc, roughness); | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd) | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	if(ccl_fetch(sd, flag) & SD_HAS_ONLY_VOLUME) | 
					
						
							| 
									
										
										
										
											2014-05-26 12:50:57 +02:00
										 |  |  | 		return make_float3(1.0f, 1.0f, 1.0f); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 	float3 eval = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-29 00:56:04 +02:00
										 |  |  | 	for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		ShaderClosure *sc = ccl_fetch_array(sd, closure, i); | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-09-27 20:37:24 +00:00
										 |  |  | 		if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) // todo: make this work for osl
 | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 			eval += sc->weight; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	return eval; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd) | 
					
						
							| 
									
										
										
										
											2013-09-17 13:22:42 +00:00
										 |  |  | { | 
					
						
							|  |  |  | 	float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f)); | 
					
						
							|  |  |  | 	alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f)); | 
					
						
							|  |  |  | 	 | 
					
						
							|  |  |  | 	return alpha; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd) | 
					
						
							| 
									
										
										
											
												Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
  Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
											
										 
											2012-01-25 17:23:52 +00:00
										 |  |  | { | 
					
						
							|  |  |  | 	float3 eval = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-29 00:56:04 +02:00
										 |  |  | 	for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		ShaderClosure *sc = ccl_fetch_array(sd, closure, i); | 
					
						
							| 
									
										
										
											
												Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
  Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
											
										 
											2012-01-25 17:23:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) | 
					
						
							|  |  |  | 			eval += sc->weight; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return eval; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd) | 
					
						
							| 
									
										
										
											
												Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
  Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
											
										 
											2012-01-25 17:23:52 +00:00
										 |  |  | { | 
					
						
							|  |  |  | 	float3 eval = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-29 00:56:04 +02:00
										 |  |  | 	for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		ShaderClosure *sc = ccl_fetch_array(sd, closure, i); | 
					
						
							| 
									
										
										
											
												Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
  Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
											
										 
											2012-01-25 17:23:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		if(CLOSURE_IS_BSDF_GLOSSY(sc->type)) | 
					
						
							|  |  |  | 			eval += sc->weight; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return eval; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd) | 
					
						
							| 
									
										
										
											
												Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
  Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
											
										 
											2012-01-25 17:23:52 +00:00
										 |  |  | { | 
					
						
							|  |  |  | 	float3 eval = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-29 00:56:04 +02:00
										 |  |  | 	for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		ShaderClosure *sc = ccl_fetch_array(sd, closure, i); | 
					
						
							| 
									
										
										
											
												Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
  Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
											
										 
											2012-01-25 17:23:52 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type)) | 
					
						
							|  |  |  | 			eval += sc->weight; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return eval; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd) | 
					
						
							| 
									
										
										
										
											2013-08-03 13:12:09 +00:00
										 |  |  | { | 
					
						
							|  |  |  | 	float3 eval = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-29 00:56:04 +02:00
										 |  |  | 	for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		ShaderClosure *sc = ccl_fetch_array(sd, closure, i); | 
					
						
							| 
									
										
										
										
											2013-08-03 13:12:09 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-10 17:28:44 +05:00
										 |  |  | 		if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type)) | 
					
						
							| 
									
										
										
										
											2013-08-03 13:12:09 +00:00
										 |  |  | 			eval += sc->weight; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return eval; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_) | 
					
						
							| 
									
										
										
										
											2012-11-06 19:59:02 +00:00
										 |  |  | { | 
					
						
							|  |  |  | 	float3 eval = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							| 
									
										
										
										
											2013-08-18 14:15:57 +00:00
										 |  |  | 	float3 N = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							| 
									
										
										
										
											2012-11-15 15:37:58 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-29 00:56:04 +02:00
										 |  |  | 	for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		ShaderClosure *sc = ccl_fetch_array(sd, closure, i); | 
					
						
							| 
									
										
										
										
											2012-11-06 19:59:02 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-11-15 15:37:58 +00:00
										 |  |  | 		if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 			const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc; | 
					
						
							| 
									
										
										
										
											2012-11-15 15:37:58 +00:00
										 |  |  | 			eval += sc->weight*ao_factor; | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 			N += bsdf->N*average(sc->weight); | 
					
						
							| 
									
										
										
										
											2012-11-15 15:37:58 +00:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2013-08-14 19:38:14 +00:00
										 |  |  | 		else if(CLOSURE_IS_AMBIENT_OCCLUSION(sc->type)) { | 
					
						
							| 
									
										
										
										
											2012-11-06 19:59:02 +00:00
										 |  |  | 			eval += sc->weight; | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 			N += ccl_fetch(sd, N)*average(sc->weight); | 
					
						
							| 
									
										
										
										
											2012-11-15 15:37:58 +00:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2012-11-06 19:59:02 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-08-18 14:15:57 +00:00
										 |  |  | 	if(is_zero(N)) | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		N = ccl_fetch(sd, N); | 
					
						
							| 
									
										
										
										
											2012-11-23 13:41:25 +00:00
										 |  |  | 	else | 
					
						
							| 
									
										
										
										
											2013-08-18 14:15:57 +00:00
										 |  |  | 		N = normalize(N); | 
					
						
							| 
									
										
										
										
											2012-11-15 15:37:58 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-08-18 14:15:57 +00:00
										 |  |  | 	*N_ = N; | 
					
						
							| 
									
										
										
										
											2012-11-06 19:59:02 +00:00
										 |  |  | 	return eval; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | #ifdef __SUBSURFACE__
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_) | 
					
						
							| 
									
										
										
										
											2013-08-18 14:15:57 +00:00
										 |  |  | { | 
					
						
							|  |  |  | 	float3 eval = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							|  |  |  | 	float3 N = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							|  |  |  | 	float texture_blur = 0.0f, weight_sum = 0.0f; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-29 00:56:04 +02:00
										 |  |  | 	for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		ShaderClosure *sc = ccl_fetch_array(sd, closure, i); | 
					
						
							| 
									
										
										
										
											2013-08-18 14:15:57 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		if(CLOSURE_IS_BSSRDF(sc->type)) { | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 			const Bssrdf *bssrdf = (const Bssrdf*)sc; | 
					
						
							| 
									
										
										
										
											2013-08-18 14:15:57 +00:00
										 |  |  | 			float avg_weight = fabsf(average(sc->weight)); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 			N += bssrdf->N*avg_weight; | 
					
						
							| 
									
										
										
										
											2013-08-18 14:15:57 +00:00
										 |  |  | 			eval += sc->weight; | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 			texture_blur += bssrdf->texture_blur*avg_weight; | 
					
						
							| 
									
										
										
										
											2013-08-18 14:15:57 +00:00
										 |  |  | 			weight_sum += avg_weight; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if(N_) | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		*N_ = (is_zero(N))? ccl_fetch(sd, N): normalize(N); | 
					
						
							| 
									
										
										
										
											2013-08-18 14:15:57 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	if(texture_blur_) | 
					
						
							|  |  |  | 		*texture_blur_ = texture_blur/weight_sum; | 
					
						
							|  |  |  | 	 | 
					
						
							|  |  |  | 	return eval; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2013-08-18 14:15:57 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | /* Emission */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure *sc) | 
					
						
							| 
									
										
										
										
											2012-12-15 10:18:42 +00:00
										 |  |  | { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	return emissive_simple_eval(ccl_fetch(sd, Ng), ccl_fetch(sd, I)); | 
					
						
							| 
									
										
										
										
											2012-12-15 10:18:42 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd) | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2011-09-27 20:37:24 +00:00
										 |  |  | 	float3 eval; | 
					
						
							|  |  |  | 	eval = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { | 
					
						
							|  |  |  | 		ShaderClosure *sc = ccl_fetch_array(sd, closure, i); | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-12-15 10:18:42 +00:00
										 |  |  | 		if(CLOSURE_IS_EMISSION(sc->type)) | 
					
						
							|  |  |  | 			eval += emissive_eval(kg, sd, sc)*sc->weight; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2011-09-27 20:37:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	return eval; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-08-28 13:55:59 +00:00
										 |  |  | /* Holdout */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd) | 
					
						
							| 
									
										
										
										
											2011-08-28 13:55:59 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 	float3 weight = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { | 
					
						
							|  |  |  | 		ShaderClosure *sc = ccl_fetch_array(sd, closure, i); | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		if(CLOSURE_IS_HOLDOUT(sc->type)) | 
					
						
							|  |  |  | 			weight += sc->weight; | 
					
						
							| 
									
										
										
										
											2011-08-28 13:55:59 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	return weight; | 
					
						
							| 
									
										
										
										
											2011-08-28 13:55:59 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | /* Surface Evaluation */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-28 17:11:17 +05:00
										 |  |  | ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_space RNG *rng, | 
					
						
							| 
									
										
										
										
											2016-01-06 23:38:13 +01:00
										 |  |  | 	ccl_addr_space PathState *state, float randb, int path_flag, ShaderContext ctx) | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, num_closure) = 0; | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 	ccl_fetch(sd, num_closure_extra) = 0; | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, randb_closure) = randb; | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | #ifdef __OSL__
 | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 	if(kg->osl) | 
					
						
							| 
									
										
										
										
											2016-01-06 23:38:13 +01:00
										 |  |  | 		OSLShader::eval_surface(kg, sd, state, path_flag, ctx); | 
					
						
							| 
									
										
										
										
											2012-09-03 13:56:40 +00:00
										 |  |  | 	else | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 	{ | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #ifdef __SVM__
 | 
					
						
							| 
									
										
										
										
											2016-01-06 23:38:13 +01:00
										 |  |  | 		svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #else
 | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 		ccl_fetch_array(sd, closure, 0)->weight = make_float3(0.8f, 0.8f, 0.8f); | 
					
						
							|  |  |  | 		ccl_fetch_array(sd, closure, 0)->N = ccl_fetch(sd, N); | 
					
						
							|  |  |  | 		ccl_fetch_array(sd, closure, 0)->data0 = 0.0f; | 
					
						
							|  |  |  | 		ccl_fetch_array(sd, closure, 0)->data1 = 0.0f; | 
					
						
							|  |  |  | 		ccl_fetch(sd, flag) |= bsdf_diffuse_setup(ccl_fetch_array(sd, closure, 0)); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2012-09-03 13:56:40 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
											  
											
												Cycles: Add multi-scattering, energy-conserving GGX as an option to the Glossy, Anisotropic and Glass BSDFs
This commit adds a new distribution to the Glossy, Anisotropic and Glass BSDFs that implements the
multiple-scattering microfacet model described in the paper "Multiple-Scattering Microfacet BSDFs with the Smith Model".
Essentially, the improvement is that unlike classical GGX, which only models single scattering and assumes
the contribution of multiple bounces to be zero, this new model performs a random walk on the microsurface until
the ray leaves it again, which ensures perfect energy conservation.
In practise, this means that the "darkening problem" - GGX materials becoming darker with increasing
roughness - is solved in a physically correct and efficient way.
The downside of this model is that it has no (known) analytic expression for evalation. However, it can be
evaluated stochastically, and although the correct PDF isn't known either, the properties of MIS and the
balance heuristic guarantee an unbiased result at the cost of slightly higher noise.
Reviewers: dingto, #cycles, brecht
Reviewed By: dingto, #cycles, brecht
Subscribers: bliblubli, ace_dragon, gregzaal, brecht, harvester, dingto, marcog, swerner, jtheninja, Blendify, nutel
Differential Revision: https://developer.blender.org/D2002
											
										 
											2016-06-23 22:56:43 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	if(rng && (ccl_fetch(sd, flag) & SD_BSDF_NEEDS_LCG)) { | 
					
						
							| 
									
										
										
										
											2016-06-28 17:11:17 +05:00
										 |  |  | 		ccl_fetch(sd, lcg_state) = lcg_state_init_addrspace(rng, state, 0xb4bc3953); | 
					
						
							| 
									
										
											  
											
												Cycles: Add multi-scattering, energy-conserving GGX as an option to the Glossy, Anisotropic and Glass BSDFs
This commit adds a new distribution to the Glossy, Anisotropic and Glass BSDFs that implements the
multiple-scattering microfacet model described in the paper "Multiple-Scattering Microfacet BSDFs with the Smith Model".
Essentially, the improvement is that unlike classical GGX, which only models single scattering and assumes
the contribution of multiple bounces to be zero, this new model performs a random walk on the microsurface until
the ray leaves it again, which ensures perfect energy conservation.
In practise, this means that the "darkening problem" - GGX materials becoming darker with increasing
roughness - is solved in a physically correct and efficient way.
The downside of this model is that it has no (known) analytic expression for evalation. However, it can be
evaluated stochastically, and although the correct PDF isn't known either, the properties of MIS and the
balance heuristic guarantee an unbiased result at the cost of slightly higher noise.
Reviewers: dingto, #cycles, brecht
Reviewed By: dingto, #cycles, brecht
Subscribers: bliblubli, ace_dragon, gregzaal, brecht, harvester, dingto, marcog, swerner, jtheninja, Blendify, nutel
Differential Revision: https://developer.blender.org/D2002
											
										 
											2016-06-23 22:56:43 +02:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Background Evaluation */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-01-06 23:38:13 +01:00
										 |  |  | ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, | 
					
						
							|  |  |  | 	ccl_addr_space PathState *state, int path_flag, ShaderContext ctx) | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, num_closure) = 0; | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 	ccl_fetch(sd, num_closure_extra) = 0; | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, randb_closure) = 0.0f; | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | #ifdef __SVM__
 | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | #ifdef __OSL__
 | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 	if(kg->osl) { | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 		OSLShader::eval_background(kg, sd, state, path_flag, ctx); | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2012-09-03 13:56:40 +00:00
										 |  |  | 	else | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 	{ | 
					
						
							| 
									
										
										
										
											2016-01-06 23:38:13 +01:00
										 |  |  | 		svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag); | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 	float3 eval = make_float3(0.0f, 0.0f, 0.0f); | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 	for(int i = 0; i < ccl_fetch(sd, num_closure); i++) { | 
					
						
							|  |  |  | 		const ShaderClosure *sc = ccl_fetch_array(sd, closure, i); | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 		if(CLOSURE_IS_BACKGROUND(sc->type)) | 
					
						
							|  |  |  | 			eval += sc->weight; | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 	return eval; | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #else
 | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 	return make_float3(0.8f, 0.8f, 0.8f); | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Volume */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | #ifdef __VOLUME__
 | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-29 15:40:43 +01:00
										 |  |  | ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf, | 
					
						
							|  |  |  | 	int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2015-06-29 00:56:04 +02:00
										 |  |  | 	for(int i = 0; i < sd->num_closure; i++) { | 
					
						
							| 
									
										
										
										
											2013-12-29 15:40:43 +01:00
										 |  |  | 		if(i == skip_phase) | 
					
						
							|  |  |  | 			continue; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 		const ShaderClosure *sc = &sd->closure[i]; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-29 15:40:43 +01:00
										 |  |  | 		if(CLOSURE_IS_PHASE(sc->type)) { | 
					
						
							|  |  |  | 			float phase_pdf = 0.0f; | 
					
						
							|  |  |  | 			float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			if(phase_pdf != 0.0f) { | 
					
						
							|  |  |  | 				bsdf_eval_accum(result_eval, sc->type, eval); | 
					
						
							| 
									
										
										
										
											2014-04-04 16:45:49 +02:00
										 |  |  | 				sum_pdf += phase_pdf*sc->sample_weight; | 
					
						
							| 
									
										
										
										
											2013-12-29 15:40:43 +01:00
										 |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			sum_sample_weight += sc->sample_weight; | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2011-09-12 13:13:56 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-29 15:40:43 +01:00
										 |  |  | 	*pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd, | 
					
						
							|  |  |  | 	const float3 omega_in, BsdfEval *eval, float *pdf) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	_shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f); | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-29 15:40:43 +01:00
										 |  |  | ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *sd, | 
					
						
							|  |  |  | 	float randu, float randv, BsdfEval *phase_eval, | 
					
						
							|  |  |  | 	float3 *omega_in, differential3 *domega_in, float *pdf) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int sampled = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if(sd->num_closure > 1) { | 
					
						
							|  |  |  | 		/* pick a phase closure based on sample weights */ | 
					
						
							|  |  |  | 		float sum = 0.0f; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		for(sampled = 0; sampled < sd->num_closure; sampled++) { | 
					
						
							|  |  |  | 			const ShaderClosure *sc = &sd->closure[sampled]; | 
					
						
							|  |  |  | 			 | 
					
						
							|  |  |  | 			if(CLOSURE_IS_PHASE(sc->type)) | 
					
						
							|  |  |  | 				sum += sc->sample_weight; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		float r = sd->randb_closure*sum; | 
					
						
							|  |  |  | 		sum = 0.0f; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		for(sampled = 0; sampled < sd->num_closure; sampled++) { | 
					
						
							|  |  |  | 			const ShaderClosure *sc = &sd->closure[sampled]; | 
					
						
							|  |  |  | 			 | 
					
						
							|  |  |  | 			if(CLOSURE_IS_PHASE(sc->type)) { | 
					
						
							|  |  |  | 				sum += sc->sample_weight; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 				if(r <= sum) | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if(sampled == sd->num_closure) { | 
					
						
							|  |  |  | 			*pdf = 0.0f; | 
					
						
							|  |  |  | 			return LABEL_NONE; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* todo: this isn't quite correct, we don't weight anisotropy properly
 | 
					
						
							|  |  |  | 	 * depending on color channels, even if this is perhaps not a common case */ | 
					
						
							|  |  |  | 	const ShaderClosure *sc = &sd->closure[sampled]; | 
					
						
							|  |  |  | 	int label; | 
					
						
							|  |  |  | 	float3 eval; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	*pdf = 0.0f; | 
					
						
							|  |  |  | 	label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if(*pdf != 0.0f) { | 
					
						
							|  |  |  | 		bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return label; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *sd, | 
					
						
							|  |  |  | 	const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval, | 
					
						
							|  |  |  | 	float3 *omega_in, differential3 *domega_in, float *pdf) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int label; | 
					
						
							|  |  |  | 	float3 eval; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	*pdf = 0.0f; | 
					
						
							|  |  |  | 	label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if(*pdf != 0.0f) | 
					
						
							|  |  |  | 		bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return label; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | /* Volume Evaluation */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd, | 
					
						
							| 
									
										
										
										
											2016-01-06 23:38:13 +01:00
										 |  |  | 	PathState *state, VolumeStack *stack, int path_flag, ShaderContext ctx) | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 	/* reset closures once at the start, we will be accumulating the closures
 | 
					
						
							|  |  |  | 	 * for all volumes in the stack into a single array of closures */ | 
					
						
							|  |  |  | 	sd->num_closure = 0; | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 	sd->num_closure_extra = 0; | 
					
						
							| 
									
										
										
										
											2013-12-28 23:00:51 +01:00
										 |  |  | 	sd->flag = 0; | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:47 +01:00
										 |  |  | 	for(int i = 0; stack[i].shader != SHADER_NONE; i++) { | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 		/* setup shaderdata from stack. it's mostly setup already in
 | 
					
						
							|  |  |  | 		 * shader_setup_from_volume, this switching should be quick */ | 
					
						
							|  |  |  | 		sd->object = stack[i].object; | 
					
						
							|  |  |  | 		sd->shader = stack[i].shader; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		sd->flag &= ~(SD_SHADER_FLAGS|SD_OBJECT_FLAGS); | 
					
						
							|  |  |  | 		sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:47 +01:00
										 |  |  | 		if(sd->object != OBJECT_NONE) { | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 			sd->flag |= kernel_tex_fetch(__object_flag, sd->object); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __OBJECT_MOTION__
 | 
					
						
							|  |  |  | 			/* todo: this is inefficient for motion blur, we should be
 | 
					
						
							|  |  |  | 			 * caching matrices instead of recomputing them each step */ | 
					
						
							|  |  |  | 			shader_setup_object_transforms(kg, sd, sd->time); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		/* evaluate shader */ | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #ifdef __SVM__
 | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  ifdef __OSL__
 | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 		if(kg->osl) { | 
					
						
							| 
									
										
										
										
											2016-01-06 23:38:13 +01:00
										 |  |  | 			OSLShader::eval_volume(kg, sd, state, path_flag, ctx); | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  endif
 | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 		{ | 
					
						
							| 
									
										
										
										
											2016-01-06 23:38:13 +01:00
										 |  |  | 			svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag); | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		/* merge closures to avoid exceeding number of closures limit */ | 
					
						
							|  |  |  | 		if(i > 0) | 
					
						
							|  |  |  | 			shader_merge_closures(sd); | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-07 15:48:04 +01:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | /* Displacement Evaluation */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-01-06 23:38:13 +01:00
										 |  |  | ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ShaderContext ctx) | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | { | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, num_closure) = 0; | 
					
						
							| 
									
										
										
										
											2016-07-25 03:03:23 +02:00
										 |  |  | 	ccl_fetch(sd, num_closure_extra) = 0; | 
					
						
							| 
									
										
											  
											
												Cycles: OpenCL kernel split
This commit contains all the work related on the AMD megakernel split work
which was mainly done by Varun Sundar, George Kyriazis and Lenny Wang, plus
some help from Sergey Sharybin, Martijn Berger, Thomas Dinges and likely
someone else which we're forgetting to mention.
Currently only AMD cards are enabled for the new split kernel, but it is
possible to force split opencl kernel to be used by setting the following
environment variable: CYCLES_OPENCL_SPLIT_KERNEL_TEST=1.
Not all the features are supported yet, and that being said no motion blur,
camera blur, SSS and volumetrics for now. Also transparent shadows are
disabled on AMD device because of some compiler bug.
This kernel is also only implements regular path tracing and supporting
branched one will take a bit. Branched path tracing is exposed to the
interface still, which is a bit misleading and will be hidden there soon.
More feature will be enabled once they're ported to the split kernel and
tested.
Neither regular CPU nor CUDA has any difference, they're generating the
same exact code, which means no regressions/improvements there.
Based on the research paper:
  https://research.nvidia.com/sites/default/files/publications/laine2013hpg_paper.pdf
Here's the documentation:
  https://docs.google.com/document/d/1LuXW-CV-sVJkQaEGZlMJ86jZ8FmoPfecaMdR-oiWbUY/edit
Design discussion of the patch:
  https://developer.blender.org/T44197
Differential Revision: https://developer.blender.org/D1200
											
										 
											2015-05-09 19:34:30 +05:00
										 |  |  | 	ccl_fetch(sd, randb_closure) = 0.0f; | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | 	/* this will modify sd->P */ | 
					
						
							|  |  |  | #ifdef __SVM__
 | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  ifdef __OSL__
 | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 	if(kg->osl) | 
					
						
							| 
									
										
										
										
											2012-12-15 10:18:42 +00:00
										 |  |  | 		OSLShader::eval_displacement(kg, sd, ctx); | 
					
						
							| 
									
										
										
										
											2012-09-03 13:56:40 +00:00
										 |  |  | 	else | 
					
						
							| 
									
										
										
										
											2016-02-12 18:33:43 +01:00
										 |  |  | #  endif
 | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 	{ | 
					
						
							| 
									
										
										
										
											2016-01-06 23:38:13 +01:00
										 |  |  | 		svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0); | 
					
						
							| 
									
										
										
										
											2013-12-28 20:02:40 +01:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-09-27 20:37:24 +00:00
										 |  |  | /* Transparent Shadows */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __TRANSPARENT_SHADOWS__
 | 
					
						
							| 
									
										
										
										
											2013-11-16 00:17:10 +01:00
										 |  |  | ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect) | 
					
						
							| 
									
										
										
										
											2011-09-27 20:37:24 +00:00
										 |  |  | { | 
					
						
							|  |  |  | 	int prim = kernel_tex_fetch(__prim_index, isect->prim); | 
					
						
							| 
									
										
										
										
											2012-12-28 14:21:30 +00:00
										 |  |  | 	int shader = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __HAIR__
 | 
					
						
							| 
									
										
										
										
											2014-03-29 13:03:46 +01:00
										 |  |  | 	if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) { | 
					
						
							| 
									
										
										
										
											2012-12-28 14:21:30 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2014-09-24 13:34:28 +02:00
										 |  |  | 		shader = kernel_tex_fetch(__tri_shader, prim); | 
					
						
							| 
									
										
										
										
											2012-12-28 14:21:30 +00:00
										 |  |  | #ifdef __HAIR__
 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	else { | 
					
						
							| 
									
										
										
										
											2013-01-03 12:09:09 +00:00
										 |  |  | 		float4 str = kernel_tex_fetch(__curves, prim); | 
					
						
							| 
									
										
										
										
											2012-12-28 14:21:30 +00:00
										 |  |  | 		shader = __float_as_int(str.z); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
											
												Cycles: Render Passes
Currently supported passes:
* Combined, Z, Normal, Object Index, Material Index, Emission, Environment,
  Diffuse/Glossy/Transmission x Direct/Indirect/Color
Not supported yet:
* UV, Vector, Mist
Only enabled for CPU devices at the moment, will do GPU tweaks tommorrow,
also for environment importance sampling.
Documentation:
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Passes
											
										 
											2012-01-25 17:23:52 +00:00
										 |  |  | 	int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2); | 
					
						
							| 
									
										
										
										
											2011-09-27 20:37:24 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-06-18 09:36:00 +00:00
										 |  |  | 	return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0; | 
					
						
							| 
									
										
										
										
											2011-09-27 20:37:24 +00:00
										 |  |  | } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-04-27 11:58:34 +00:00
										 |  |  | CCL_NAMESPACE_END | 
					
						
							|  |  |  | 
 |