Fix: use color_srgb_to_linear_v4 in attribute_convert.h #115098
|
@ -66,10 +66,10 @@ template<> struct AttributeConverter<blender::ColorGeometry4b> {
|
|||
static constexpr auto type_desc = TypeRGBA;
|
||||
static CyclesT convert(const blender::ColorGeometry4b &value)
|
||||
{
|
||||
return color_srgb_to_linear(make_float4(byte_to_float(value[0]),
|
||||
byte_to_float(value[1]),
|
||||
byte_to_float(value[2]),
|
||||
byte_to_float(value[3])));
|
||||
return color_srgb_to_linear_v4(make_float4(byte_to_float(value[0]),
|
||||
byte_to_float(value[1]),
|
||||
byte_to_float(value[2]),
|
||||
byte_to_float(value[3])));
|
||||
}
|
||||
};
|
||||
template<> struct AttributeConverter<bool> {
|
||||
|
|
|
@ -253,7 +253,7 @@ ccl_device float3 xyY_to_xyz(float x, float y, float Y)
|
|||
* exp = exponent, encoded as uint32_t
|
||||
* e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t
|
||||
*/
|
||||
template<unsigned exp, unsigned e2coeff> ccl_device_inline float4 fastpow(const float4 &arg)
|
||||
template<unsigned exp, unsigned e2coeff> ccl_device_inline float4 fastpow_sse2(const float4 &arg)
|
||||
{
|
||||
float4 ret = arg * cast(make_int4(e2coeff));
|
||||
ret = make_float4(cast(ret));
|
||||
|
@ -263,7 +263,7 @@ template<unsigned exp, unsigned e2coeff> ccl_device_inline float4 fastpow(const
|
|||
}
|
||||
|
||||
/* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */
|
||||
ccl_device_inline float4 improve_5throot_solution(const float4 &old_result, const float4 &x)
|
||||
ccl_device_inline float4 improve_5throot_solution_sse2(const float4 &old_result, const float4 &x)
|
||||
{
|
||||
float4 approx2 = old_result * old_result;
|
||||
float4 approx4 = approx2 * approx2;
|
||||
|
@ -273,7 +273,7 @@ ccl_device_inline float4 improve_5throot_solution(const float4 &old_result, cons
|
|||
}
|
||||
|
||||
/* Calculate powf(x, 2.4). Working domain: 1e-10 < x < 1e+10 */
|
||||
ccl_device_inline float4 fastpow24(const float4 &arg)
|
||||
ccl_device_inline float4 fastpow24_sse2(const float4 &arg)
|
||||
{
|
||||
/* max, avg and |avg| errors were calculated in gcc without FMA instructions
|
||||
* The final precision should be better than powf in glibc */
|
||||
|
@ -281,27 +281,27 @@ ccl_device_inline float4 fastpow24(const float4 &arg)
|
|||
/* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */
|
||||
/* 0x3F4CCCCD = 4/5 */
|
||||
/* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */
|
||||
float4 x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(
|
||||
float4 x = fastpow_sse2<0x3F4CCCCD, 0x4F55A7FB>(
|
||||
arg); // error max = 0.17 avg = 0.0018 |avg| = 0.05
|
||||
float4 arg2 = arg * arg;
|
||||
float4 arg4 = arg2 * arg2;
|
||||
|
||||
/* error max = 0.018 avg = 0.0031 |avg| = 0.0031 */
|
||||
x = improve_5throot_solution(x, arg4);
|
||||
x = improve_5throot_solution_sse2(x, arg4);
|
||||
/* error max = 0.00021 avg = 1.6e-05 |avg| = 1.6e-05 */
|
||||
x = improve_5throot_solution(x, arg4);
|
||||
x = improve_5throot_solution_sse2(x, arg4);
|
||||
/* error max = 6.1e-07 avg = 5.2e-08 |avg| = 1.1e-07 */
|
||||
x = improve_5throot_solution(x, arg4);
|
||||
x = improve_5throot_solution_sse2(x, arg4);
|
||||
|
||||
return x * (x * x);
|
||||
}
|
||||
|
||||
ccl_device float4 color_srgb_to_linear(const float4 &c)
|
||||
ccl_device float4 color_srgb_to_linear_sse2(const float4 &c)
|
||||
{
|
||||
int4 cmp = c < make_float4(0.04045f);
|
||||
float4 lt = max(c * make_float4(1.0f / 12.92f), make_float4(0.0f));
|
||||
float4 gtebase = (c + make_float4(0.055f)) * make_float4(1.0f / 1.055f); /* fma */
|
||||
float4 gte = fastpow24(gtebase);
|
||||
float4 gte = fastpow24_sse2(gtebase);
|
||||
return select(cmp, lt, gte);
|
||||
}
|
||||
#endif /* __KERNEL_SSE2__ */
|
||||
|
@ -328,7 +328,7 @@ ccl_device float4 color_srgb_to_linear_v4(float4 c)
|
|||
{
|
||||
#ifdef __KERNEL_SSE2__
|
||||
float4 r = c;
|
||||
r = color_srgb_to_linear(r);
|
||||
r = color_srgb_to_linear_sse2(r);
|
||||
r.w = c.w;
|
||||
return r;
|
||||
#else
|
||||
|
|
Loading…
Reference in New Issue