Fix T48732 v2: New GGX breaks OpenCL kernel

As far as I can see, the second issue there was that the functions receive a pointer to a member variable of the
ShaderData, which is stored in global memory. However, this means that the pointer points to global memory as well,
therefore OpenCL requires the ccl_addr_space "keyword" in front of the pointer.
With this commit, the OpenCL kernels build on Linux with the Intel CPU OpenCL runtime - however, they already did
without the change and I don't have an AMD card, so I can't really test whether the AMD runtime is happy as well now.
This commit is contained in:
2016-06-26 00:46:27 +02:00
parent aff81c6393
commit 2a69b09b62
3 changed files with 11 additions and 11 deletions

View File

@@ -365,12 +365,12 @@ ccl_device int bsdf_microfacet_multi_ggx_setup(ShaderClosure *sc)
return bsdf_microfacet_multi_ggx_common_setup(sc);
}
ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, uint *lcg_state) {
ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
*pdf = 0.0f;
return make_float3(0.0f, 0.0f, 0.0f);
}
ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, uint *lcg_state) {
ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
bool is_aniso = (sc->data0 != sc->data1);
float3 X, Y, Z;
Z = sc->N;
@@ -389,7 +389,7 @@ ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc
return mf_eval_glossy(localI, localO, true, make_float3(sc->custom1, sc->custom2, sc->custom3), sc->data0, sc->data1, lcg_state, NULL, NULL);
}
ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, uint *lcg_state)
ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, ccl_addr_space uint *lcg_state)
{
bool is_aniso = (sc->data0 != sc->data1);
float3 X, Y, Z;
@@ -429,7 +429,7 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_setup(ShaderClosure *sc)
return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG|SD_BSDF_HAS_CUSTOM;
}
ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, uint *lcg_state) {
ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
float3 X, Y, Z;
Z = sc->N;
make_orthonormals(Z, &X, &Y);
@@ -441,7 +441,7 @@ ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClos
return mf_eval_glass(localI, localO, false, make_float3(sc->custom1, sc->custom2, sc->custom3), sc->data0, sc->data1, lcg_state, sc->data2);
}
ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, uint *lcg_state) {
ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) {
float3 X, Y, Z;
Z = sc->N;
make_orthonormals(Z, &X, &Y);
@@ -453,7 +453,7 @@ ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosu
return mf_eval_glass(localI, localO, true, make_float3(sc->custom1, sc->custom2, sc->custom3), sc->data0, sc->data1, lcg_state, sc->data2);
}
ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, uint *lcg_state)
ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, ccl_addr_space uint *lcg_state)
{
float3 X, Y, Z;
Z = sc->N;

View File

@@ -25,7 +25,7 @@
* energy is used. In combination with MIS, that is enough to produce an unbiased result, although
* the balance heuristic isn't necessarily optimal anymore.
*/
ccl_device float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, float3 wo, const bool wo_outside, const float3 color, const float alpha_x, const float alpha_y, uint* lcg_state
ccl_device float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, float3 wo, const bool wo_outside, const float3 color, const float alpha_x, const float alpha_y, ccl_addr_space uint* lcg_state
#ifdef MF_MULTI_GLASS
, const float eta
#elif defined(MF_MULTI_GLOSSY)
@@ -158,7 +158,7 @@ ccl_device float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, float3 wo, const boo
* escaped the surface in wo. The function returns the throughput between wi and wo.
* Without reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal.
*/
ccl_device float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3 *wo, const float3 color, const float alpha_x, const float alpha_y, uint *lcg_state
ccl_device float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3 *wo, const float3 color, const float alpha_x, const float alpha_y, ccl_addr_space uint *lcg_state
#ifdef MF_MULTI_GLASS
, const float eta
#elif defined(MF_MULTI_GLOSSY)

View File

@@ -232,14 +232,14 @@ ccl_device void path_rng_end(KernelGlobals *kg, ccl_global uint *rng_state, RNG
/* Linear Congruential Generator */
ccl_device uint lcg_step_uint(uint *rng)
ccl_device uint lcg_step_uint(ccl_addr_space uint *rng)
{
/* implicit mod 2^32 */
*rng = (1103515245*(*rng) + 12345);
return *rng;
}
ccl_device float lcg_step_float(uint *rng)
ccl_device float lcg_step_float(ccl_addr_space uint *rng)
{
/* implicit mod 2^32 */
*rng = (1103515245*(*rng) + 12345);
@@ -309,7 +309,7 @@ ccl_device_inline void path_state_branch(PathState *state, int branch, int num_b
state->num_samples = state->num_samples*num_branches;
}
ccl_device_inline uint lcg_state_init(RNG *rng, const PathState *state, uint scramble)
ccl_device_inline uint lcg_state_init(RNG *rng, const ccl_addr_space PathState *state, uint scramble)
{
return lcg_init(*rng + state->rng_offset + state->sample*scramble);
}