From a06fa0fcfd3cbcd7a90a63002a4742073a181c21 Mon Sep 17 00:00:00 2001 From: Leon Marz Date: Sat, 18 Nov 2023 15:06:55 +0100 Subject: [PATCH 1/2] Fix build error on architectures without SSE or sse2neon As color_srgb_to_linear is only defined for them. Pull Request: https://projects.blender.org/blender/blender/pulls/115098 --- intern/cycles/blender/attribute_convert.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/intern/cycles/blender/attribute_convert.h b/intern/cycles/blender/attribute_convert.h index ee26e1d9571..34b75d55855 100644 --- a/intern/cycles/blender/attribute_convert.h +++ b/intern/cycles/blender/attribute_convert.h @@ -66,10 +66,10 @@ template<> struct AttributeConverter { static constexpr auto type_desc = TypeRGBA; static CyclesT convert(const blender::ColorGeometry4b &value) { - return color_srgb_to_linear(make_float4(byte_to_float(value[0]), - byte_to_float(value[1]), - byte_to_float(value[2]), - byte_to_float(value[3]))); + return color_srgb_to_linear_v4(make_float4(byte_to_float(value[0]), + byte_to_float(value[1]), + byte_to_float(value[2]), + byte_to_float(value[3]))); } }; template<> struct AttributeConverter { -- 2.30.2 From 67bc11c859e73058ead1081a82726de2bb3bdc7c Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Wed, 22 Nov 2023 19:41:47 +0100 Subject: [PATCH 2/2] Cleanup: rename sse2 specific functions to avoid accidentally using them Ref #115098 --- intern/cycles/util/color.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/intern/cycles/util/color.h b/intern/cycles/util/color.h index 8b3082a3ade..842b186899b 100644 --- a/intern/cycles/util/color.h +++ b/intern/cycles/util/color.h @@ -253,7 +253,7 @@ ccl_device float3 xyY_to_xyz(float x, float y, float Y) * exp = exponent, encoded as uint32_t * e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t */ -template ccl_device_inline float4 fastpow(const float4 &arg) +template ccl_device_inline float4 fastpow_sse2(const float4 &arg) { float4 ret = arg * cast(make_int4(e2coeff)); ret = make_float4(cast(ret)); @@ -263,7 +263,7 @@ template ccl_device_inline float4 fastpow(const } /* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */ -ccl_device_inline float4 improve_5throot_solution(const float4 &old_result, const float4 &x) +ccl_device_inline float4 improve_5throot_solution_sse2(const float4 &old_result, const float4 &x) { float4 approx2 = old_result * old_result; float4 approx4 = approx2 * approx2; @@ -273,7 +273,7 @@ ccl_device_inline float4 improve_5throot_solution(const float4 &old_result, cons } /* Calculate powf(x, 2.4). Working domain: 1e-10 < x < 1e+10 */ -ccl_device_inline float4 fastpow24(const float4 &arg) +ccl_device_inline float4 fastpow24_sse2(const float4 &arg) { /* max, avg and |avg| errors were calculated in gcc without FMA instructions * The final precision should be better than powf in glibc */ @@ -281,27 +281,27 @@ ccl_device_inline float4 fastpow24(const float4 &arg) /* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */ /* 0x3F4CCCCD = 4/5 */ /* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */ - float4 x = fastpow<0x3F4CCCCD, 0x4F55A7FB>( + float4 x = fastpow_sse2<0x3F4CCCCD, 0x4F55A7FB>( arg); // error max = 0.17 avg = 0.0018 |avg| = 0.05 float4 arg2 = arg * arg; float4 arg4 = arg2 * arg2; /* error max = 0.018 avg = 0.0031 |avg| = 0.0031 */ - x = improve_5throot_solution(x, arg4); + x = improve_5throot_solution_sse2(x, arg4); /* error max = 0.00021 avg = 1.6e-05 |avg| = 1.6e-05 */ - x = improve_5throot_solution(x, arg4); + x = improve_5throot_solution_sse2(x, arg4); /* error max = 6.1e-07 avg = 5.2e-08 |avg| = 1.1e-07 */ - x = improve_5throot_solution(x, arg4); + x = improve_5throot_solution_sse2(x, arg4); return x * (x * x); } -ccl_device float4 color_srgb_to_linear(const float4 &c) +ccl_device float4 color_srgb_to_linear_sse2(const float4 &c) { int4 cmp = c < make_float4(0.04045f); float4 lt = max(c * make_float4(1.0f / 12.92f), make_float4(0.0f)); float4 gtebase = (c + make_float4(0.055f)) * make_float4(1.0f / 1.055f); /* fma */ - float4 gte = fastpow24(gtebase); + float4 gte = fastpow24_sse2(gtebase); return select(cmp, lt, gte); } #endif /* __KERNEL_SSE2__ */ @@ -328,7 +328,7 @@ ccl_device float4 color_srgb_to_linear_v4(float4 c) { #ifdef __KERNEL_SSE2__ float4 r = c; - r = color_srgb_to_linear(r); + r = color_srgb_to_linear_sse2(r); r.w = c.w; return r; #else -- 2.30.2