2022-02-11 13:53:21 +01:00
|
|
|
/* SPDX-License-Identifier: Apache-2.0
|
|
|
|
* Copyright 2011-2022 Blender Foundation */
|
2014-01-15 15:29:22 +01:00
|
|
|
|
|
|
|
#ifndef __UTIL_HALF_H__
|
|
|
|
#define __UTIL_HALF_H__
|
|
|
|
|
2021-10-24 14:19:19 +02:00
|
|
|
#include "util/math.h"
|
|
|
|
#include "util/types.h"
|
2014-01-15 15:29:22 +01:00
|
|
|
|
2021-02-14 15:34:23 +01:00
|
|
|
#if !defined(__KERNEL_GPU__) && defined(__KERNEL_SSE2__)
|
2021-10-24 14:19:19 +02:00
|
|
|
# include "util/simd.h"
|
2014-02-27 17:29:35 +04:00
|
|
|
#endif
|
|
|
|
|
2014-01-15 15:29:22 +01:00
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
|
|
|
|
/* Half Floats */
|
|
|
|
|
2021-11-18 14:25:05 +01:00
|
|
|
#if defined(__KERNEL_METAL__)
|
|
|
|
|
|
|
|
ccl_device_inline float half_to_float(half h_in)
|
|
|
|
{
|
|
|
|
float f;
|
|
|
|
union {
|
|
|
|
half h;
|
|
|
|
uint16_t s;
|
|
|
|
} val;
|
|
|
|
val.h = h_in;
|
|
|
|
|
|
|
|
*((ccl_private int *)&f) = ((val.s & 0x8000) << 16) | (((val.s & 0x7c00) + 0x1C000) << 13) |
|
|
|
|
((val.s & 0x03FF) << 13);
|
|
|
|
|
|
|
|
return f;
|
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
2016-08-11 22:47:53 +02:00
|
|
|
/* CUDA has its own half data type, no need to define then */
|
2022-06-29 12:58:04 +02:00
|
|
|
# if !defined(__KERNEL_CUDA__) && !defined(__KERNEL_HIP__) && !defined(__KERNEL_ONEAPI__)
|
2019-05-01 21:14:11 +10:00
|
|
|
/* Implementing this as a class rather than a typedef so that the compiler can tell it apart from
|
|
|
|
* unsigned shorts. */
|
2018-07-05 12:37:52 +02:00
|
|
|
class half {
|
|
|
|
public:
|
2023-04-04 20:00:56 +02:00
|
|
|
half() = default;
|
2023-03-29 16:50:54 +02:00
|
|
|
half(const unsigned short &i) : v(i) {}
|
2018-07-05 12:37:52 +02:00
|
|
|
operator unsigned short()
|
2019-04-17 06:17:24 +02:00
|
|
|
{
|
2018-07-05 12:37:52 +02:00
|
|
|
return v;
|
2018-07-15 18:34:31 +02:00
|
|
|
}
|
|
|
|
half &operator=(const unsigned short &i)
|
|
|
|
{
|
|
|
|
v = i;
|
|
|
|
return *this;
|
2019-04-17 06:17:24 +02:00
|
|
|
}
|
|
|
|
|
2018-07-05 12:37:52 +02:00
|
|
|
private:
|
|
|
|
unsigned short v;
|
|
|
|
};
|
2021-11-18 14:25:05 +01:00
|
|
|
# endif
|
2016-08-11 22:47:53 +02:00
|
|
|
|
2014-01-15 15:29:22 +01:00
|
|
|
struct half4 {
|
|
|
|
half x, y, z, w;
|
|
|
|
};
|
2021-11-18 14:25:05 +01:00
|
|
|
#endif
|
2014-01-15 15:29:22 +01:00
|
|
|
|
2021-10-21 19:25:38 +02:00
|
|
|
/* Conversion to/from half float for image textures
|
|
|
|
*
|
|
|
|
* Simplified float to half for fast sampling on processor without a native
|
|
|
|
* instruction, and eliminating any NaN and inf values. */
|
2014-01-15 15:29:22 +01:00
|
|
|
|
2021-10-21 19:25:38 +02:00
|
|
|
ccl_device_inline half float_to_half_image(float f)
|
2014-01-15 15:29:22 +01:00
|
|
|
{
|
2022-06-29 12:58:04 +02:00
|
|
|
#if defined(__KERNEL_METAL__) || defined(__KERNEL_ONEAPI__)
|
2022-05-27 20:11:23 +02:00
|
|
|
return half(min(f, 65504.0f));
|
2021-11-18 14:25:05 +01:00
|
|
|
#elif defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
|
2022-05-27 20:11:23 +02:00
|
|
|
return __float2half(min(f, 65504.0f));
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
#else
|
2017-02-23 14:42:06 +01:00
|
|
|
const uint u = __float_as_uint(f);
|
|
|
|
/* Sign bit, shifted to its position. */
|
|
|
|
uint sign_bit = u & 0x80000000;
|
|
|
|
sign_bit >>= 16;
|
|
|
|
/* Exponent. */
|
|
|
|
uint exponent_bits = u & 0x7f800000;
|
|
|
|
/* Non-sign bits. */
|
|
|
|
uint value_bits = u & 0x7fffffff;
|
|
|
|
value_bits >>= 13; /* Align mantissa on MSB. */
|
|
|
|
value_bits -= 0x1c000; /* Adjust bias. */
|
|
|
|
/* Flush-to-zero. */
|
|
|
|
value_bits = (exponent_bits < 0x38800000) ? 0 : value_bits;
|
|
|
|
/* Clamp-to-max. */
|
|
|
|
value_bits = (exponent_bits > 0x47000000) ? 0x7bff : value_bits;
|
|
|
|
/* Denormals-as-zero. */
|
|
|
|
value_bits = (exponent_bits == 0 ? 0 : value_bits);
|
|
|
|
/* Re-insert sign bit and return. */
|
|
|
|
return (value_bits | sign_bit);
|
2021-10-21 19:25:38 +02:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
ccl_device_inline float half_to_float_image(half h)
|
|
|
|
{
|
2021-11-18 14:25:05 +01:00
|
|
|
#if defined(__KERNEL_METAL__)
|
|
|
|
return half_to_float(h);
|
2022-06-29 12:58:04 +02:00
|
|
|
#elif defined(__KERNEL_ONEAPI__)
|
|
|
|
return float(h);
|
2021-11-18 14:25:05 +01:00
|
|
|
#elif defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
|
2021-10-21 19:25:38 +02:00
|
|
|
return __half2float(h);
|
|
|
|
#else
|
|
|
|
const int x = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13);
|
|
|
|
return __int_as_float(x);
|
|
|
|
#endif
|
2017-02-23 14:42:06 +01:00
|
|
|
}
|
|
|
|
|
2021-10-21 19:25:38 +02:00
|
|
|
ccl_device_inline float4 half4_to_float4_image(const half4 h)
|
|
|
|
{
|
|
|
|
/* Unable to use because it gives different results half_to_float_image, can we
|
|
|
|
* modify float_to_half_image so the conversion results are identical? */
|
|
|
|
#if 0 /* defined(__KERNEL_AVX2__) */
|
|
|
|
/* CPU: AVX. */
|
|
|
|
__m128i x = _mm_castpd_si128(_mm_load_sd((const double *)&h));
|
|
|
|
return float4(_mm_cvtph_ps(x));
|
2014-01-15 15:29:22 +01:00
|
|
|
#endif
|
|
|
|
|
2021-10-21 19:25:38 +02:00
|
|
|
const float4 f = make_float4(half_to_float_image(h.x),
|
|
|
|
half_to_float_image(h.y),
|
|
|
|
half_to_float_image(h.z),
|
|
|
|
half_to_float_image(h.w));
|
|
|
|
return f;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Conversion to half float texture for display.
|
|
|
|
*
|
|
|
|
* Simplified float to half for fast display texture conversion on processors
|
|
|
|
* without a native instruction. Assumes no negative, no NaN, no inf, and sets
|
|
|
|
* denormal to 0. */
|
|
|
|
|
|
|
|
ccl_device_inline half float_to_half_display(const float f)
|
|
|
|
{
|
2022-06-29 12:58:04 +02:00
|
|
|
#if defined(__KERNEL_METAL__) || defined(__KERNEL_ONEAPI__)
|
2022-05-27 20:11:23 +02:00
|
|
|
return half(min(f, 65504.0f));
|
2021-11-18 14:25:05 +01:00
|
|
|
#elif defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
|
2022-05-27 20:11:23 +02:00
|
|
|
return __float2half(min(f, 65504.0f));
|
2021-10-21 19:25:38 +02:00
|
|
|
#else
|
|
|
|
const int x = __float_as_int((f > 0.0f) ? ((f < 65504.0f) ? f : 65504.0f) : 0.0f);
|
|
|
|
const int absolute = x & 0x7FFFFFFF;
|
|
|
|
const int Z = absolute + 0xC8000000;
|
|
|
|
const int result = (absolute < 0x38800000) ? 0 : Z;
|
|
|
|
const int rshift = (result >> 13);
|
|
|
|
return (rshift & 0x7FFF);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
ccl_device_inline half4 float4_to_half4_display(const float4 f)
|
|
|
|
{
|
2022-11-01 15:16:55 +01:00
|
|
|
#ifdef __KERNEL_SSE__
|
2021-10-21 19:25:38 +02:00
|
|
|
/* CPU: SSE and AVX. */
|
2022-11-01 15:16:55 +01:00
|
|
|
float4 x = min(max(f, make_float4(0.0f)), make_float4(65504.0f));
|
2021-10-21 19:25:38 +02:00
|
|
|
# ifdef __KERNEL_AVX2__
|
2022-11-01 15:16:55 +01:00
|
|
|
int4 rpack = int4(_mm_cvtps_ph(x, 0));
|
2021-10-21 19:25:38 +02:00
|
|
|
# else
|
2022-11-01 15:16:55 +01:00
|
|
|
int4 absolute = cast(x) & make_int4(0x7FFFFFFF);
|
|
|
|
int4 Z = absolute + make_int4(0xC8000000);
|
|
|
|
int4 result = andnot(absolute < make_int4(0x38800000), Z);
|
|
|
|
int4 rshift = (result >> 13) & make_int4(0x7FFF);
|
|
|
|
int4 rpack = int4(_mm_packs_epi32(rshift, rshift));
|
2021-10-21 19:25:38 +02:00
|
|
|
# endif
|
|
|
|
half4 h;
|
|
|
|
_mm_storel_pi((__m64 *)&h, _mm_castsi128_ps(rpack));
|
|
|
|
return h;
|
|
|
|
#else
|
|
|
|
/* GPU and scalar fallback. */
|
|
|
|
const half4 h = {float_to_half_display(f.x),
|
|
|
|
float_to_half_display(f.y),
|
|
|
|
float_to_half_display(f.z),
|
|
|
|
float_to_half_display(f.w)};
|
|
|
|
return h;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2014-01-15 15:29:22 +01:00
|
|
|
CCL_NAMESPACE_END
|
|
|
|
|
|
|
|
#endif /* __UTIL_HALF_H__ */
|