WIP: Vulkan: Workbench #107886
|
@ -110,12 +110,14 @@ class FloatingPointFormat {
|
|||
static constexpr uint8_t MantissaLen = MantissaBitLen;
|
||||
static constexpr uint8_t MantissaShift = 0;
|
||||
static constexpr uint32_t MantissaMask = (1 << MantissaBitLen) - 1;
|
||||
static constexpr uint32_t MantissaNanMask = MantissaMask;
|
||||
static constexpr uint8_t ExponentShift = MantissaBitLen;
|
||||
static constexpr uint8_t ExponentLen = ExponentBitLen;
|
||||
static constexpr uint32_t ExponentMask = (1 << ExponentBitLen) - 1;
|
||||
static constexpr uint32_t ExponentBias = (1 << (ExponentBitLen - 1)) - 1;
|
||||
static constexpr int32_t ExponentSpecialMask = ExponentMask;
|
||||
static constexpr uint8_t SignShift = MantissaBitLen + ExponentBitLen;
|
||||
static constexpr uint32_t SignMask = HasSignBit ? 1 : 0;
|
||||
static constexpr uint32_t ExponentBias = (1 << (ExponentBitLen - 1)) - 1;
|
||||
|
||||
static uint32_t get_mantissa(uint32_t floating_point_number)
|
||||
{
|
||||
|
@ -161,7 +163,7 @@ class FloatingPointFormat {
|
|||
|
||||
static uint32_t set_sign(bool sign, uint32_t floating_point_number)
|
||||
{
|
||||
if constexpr (HasSignBit) {
|
||||
if constexpr (!HasSignBit) {
|
||||
return floating_point_number;
|
||||
}
|
||||
uint32_t result = clear_sign(floating_point_number);
|
||||
|
@ -182,9 +184,6 @@ using FormatF10 = FloatingPointFormat<false, 5, 5>;
|
|||
* convert between the formats. Additional conversion rules can be applied to the conversion
|
||||
* function. Due to the implementation the compiler would make an optimized version depending on
|
||||
* the actual possibilities.
|
||||
*
|
||||
* NOTE: Implementation should be extended to support Nan, Inf, -Inf and clamping to min/max when
|
||||
* values don't fit in the destination.
|
||||
*/
|
||||
template<
|
||||
/**
|
||||
|
@ -199,7 +198,7 @@ template<
|
|||
|
||||
/**
|
||||
* Should negative values be clamped to zero when DestinationFormat doesn't contain a sign
|
||||
* bit.
|
||||
* bit. Also -Inf will be clamped to zero.
|
||||
*
|
||||
* When set to `false` and DestinationFormat doesn't contain a sign bit the value will be
|
||||
* made absolute.
|
||||
|
@ -211,25 +210,46 @@ uint32_t convert_float_formats(uint32_t value)
|
|||
uint32_t mantissa = SourceFormat::get_mantissa(value);
|
||||
int32_t exponent = SourceFormat::get_exponent(value);
|
||||
|
||||
const bool is_nan = (exponent == SourceFormat::ExponentSpecialMask) && mantissa;
|
||||
const bool is_inf = (exponent == SourceFormat::ExponentSpecialMask) && (mantissa == 0);
|
||||
|
||||
/* Sign conversion */
|
||||
if constexpr (!DestinationFormat::HasSign && ClampNegativeToZero) {
|
||||
if (is_signed) {
|
||||
if (is_signed && !is_nan) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Mantissa conversion */
|
||||
if constexpr (SourceFormat::MantissaLen > DestinationFormat::MantissaLen) {
|
||||
mantissa = mantissa >> (SourceFormat::MantissaLen - DestinationFormat::MantissaLen);
|
||||
if (is_inf) {
|
||||
exponent = DestinationFormat::ExponentSpecialMask;
|
||||
}
|
||||
else if constexpr (SourceFormat::MantissaLen < DestinationFormat::MantissaLen) {
|
||||
mantissa = mantissa << (DestinationFormat::MantissaLen - SourceFormat::MantissaLen);
|
||||
else if (is_nan) {
|
||||
exponent = DestinationFormat::ExponentSpecialMask;
|
||||
mantissa = DestinationFormat::MantissaNanMask;
|
||||
}
|
||||
else {
|
||||
/* Exponent conversion */
|
||||
exponent -= SourceFormat::ExponentBias;
|
||||
/* Clamping when destination has lower precision. */
|
||||
if constexpr (SourceFormat::ExponentLen > DestinationFormat::ExponentLen) {
|
||||
if (exponent > DestinationFormat::ExponentBias) {
|
||||
exponent = 0;
|
||||
mantissa = SourceFormat::MantissaMask;
|
||||
}
|
||||
else if (exponent < -int32_t(DestinationFormat::ExponentBias)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
exponent += DestinationFormat::ExponentBias;
|
||||
|
||||
/* Exponent conversion */
|
||||
exponent += DestinationFormat::ExponentBias - SourceFormat::ExponentBias;
|
||||
// TODO: Clamp to min/max value? only when Destination::ExponentBias <
|
||||
// SourceFormat::ExponentBias.
|
||||
/* Mantissa conversion */
|
||||
if constexpr (SourceFormat::MantissaLen > DestinationFormat::MantissaLen) {
|
||||
mantissa = mantissa >> (SourceFormat::MantissaLen - DestinationFormat::MantissaLen);
|
||||
}
|
||||
else if constexpr (SourceFormat::MantissaLen < DestinationFormat::MantissaLen) {
|
||||
mantissa = mantissa << (DestinationFormat::MantissaLen - SourceFormat::MantissaLen);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t result = 0;
|
||||
result = DestinationFormat::set_sign(is_signed, result);
|
||||
|
|
|
@ -5,22 +5,83 @@
|
|||
namespace blender::gpu::tests {
|
||||
TEST(VulkanDataConversion, ConvertF32F16)
|
||||
{
|
||||
uint32_t f32_2 = 0b01000000000000000000000000000000;
|
||||
uint32_t f16_2_expected = 0b0100000000000000;
|
||||
uint32_t f16_2 = convert_float_formats<FormatF16, FormatF32>(f32_2);
|
||||
const uint32_t f32_2 = 0b01000000000000000000000000000000;
|
||||
const uint32_t f16_2_expected = 0b0100000000000000;
|
||||
const uint32_t f16_2 = convert_float_formats<FormatF16, FormatF32>(f32_2);
|
||||
EXPECT_EQ(f16_2, f16_2_expected);
|
||||
|
||||
uint32_t f32_3 = 0b01000000010000000000000000000000;
|
||||
uint32_t f16_3_expected = 0b0100001000000000;
|
||||
uint32_t f16_3 = convert_float_formats<FormatF16, FormatF32>(f32_3);
|
||||
const uint32_t f32_3 = 0b01000000010000000000000000000000;
|
||||
const uint32_t f16_3_expected = 0b0100001000000000;
|
||||
const uint32_t f16_3 = convert_float_formats<FormatF16, FormatF32>(f32_3);
|
||||
EXPECT_EQ(f16_3, f16_3_expected);
|
||||
|
||||
uint32_t f32_4 = 0b01000000100000000000000000000000;
|
||||
uint32_t f16_4_expected = 0b0100010000000000;
|
||||
uint32_t f16_4 = convert_float_formats<FormatF16, FormatF32>(f32_4);
|
||||
const uint32_t f32_4 = 0b01000000100000000000000000000000;
|
||||
const uint32_t f16_4_expected = 0b0100010000000000;
|
||||
const uint32_t f16_4 = convert_float_formats<FormatF16, FormatF32>(f32_4);
|
||||
EXPECT_EQ(f16_4, f16_4_expected);
|
||||
}
|
||||
|
||||
TEST(VulkanDataConversion, clamp_negative_to_zero)
|
||||
{
|
||||
const uint32_t f32_2 = 0b11000000000000000000000000000000;
|
||||
const uint32_t f32_inf_min = 0b11111111100000000000000000000000;
|
||||
const uint32_t f32_inf_max = 0b01111111100000000000000000000000;
|
||||
const uint32_t f32_nan = 0b11111111111111111111111111111111;
|
||||
|
||||
/* F32(-2) fits in F16. */
|
||||
const uint32_t f16_2_expected = 0b1100000000000000;
|
||||
const uint32_t f16_2a = convert_float_formats<FormatF16, FormatF32, true>(f32_2);
|
||||
EXPECT_EQ(f16_2a, f16_2_expected);
|
||||
|
||||
const uint32_t f16_2b = convert_float_formats<FormatF16, FormatF32, false>(f32_2);
|
||||
EXPECT_EQ(f16_2b, f16_2_expected);
|
||||
|
||||
/* F32(-2) doesn't fit in F11 as F11 only supports unsigned values. Clamp to zero. */
|
||||
const uint32_t f11_0_expected = 0b00000000000;
|
||||
const uint32_t f11_2_expected = 0b10000000000;
|
||||
const uint32_t f11_inf_expected = 0b11111000000;
|
||||
const uint32_t f11_nan_expected = 0b11111111111;
|
||||
{
|
||||
const uint32_t f11_0 = convert_float_formats<FormatF11, FormatF32, true>(f32_2);
|
||||
EXPECT_EQ(f11_0, f11_0_expected);
|
||||
const uint32_t f11_0b = convert_float_formats<FormatF11, FormatF32, true>(f32_inf_min);
|
||||
EXPECT_EQ(f11_0b, f11_0_expected);
|
||||
const uint32_t f11_inf = convert_float_formats<FormatF11, FormatF32, true>(f32_inf_max);
|
||||
EXPECT_EQ(f11_inf, f11_inf_expected);
|
||||
const uint32_t f11_nan = convert_float_formats<FormatF11, FormatF32, true>(f32_nan);
|
||||
EXPECT_EQ(f11_nan, f11_nan_expected);
|
||||
}
|
||||
|
||||
/* F32(-2) doesn't fit in F11 as F11 only supports unsigned values. Make absolute. */
|
||||
{
|
||||
const uint32_t f11_2 = convert_float_formats<FormatF11, FormatF32, false>(f32_2);
|
||||
EXPECT_EQ(f11_2, f11_2_expected);
|
||||
const uint32_t f11_inf = convert_float_formats<FormatF11, FormatF32, false>(f32_inf_min);
|
||||
EXPECT_EQ(f11_inf, f11_inf_expected);
|
||||
const uint32_t f11_infb = convert_float_formats<FormatF11, FormatF32, false>(f32_inf_max);
|
||||
EXPECT_EQ(f11_infb, f11_inf_expected);
|
||||
const uint32_t f11_nan = convert_float_formats<FormatF11, FormatF32, false>(f32_nan);
|
||||
EXPECT_EQ(f11_nan, f11_nan_expected);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(VulkanDataConversion, infinity_upper)
|
||||
{
|
||||
const uint32_t f32_inf = 0b01111111100000000000000000000000;
|
||||
|
||||
const uint32_t f16_inf_expected = 0b0111110000000000;
|
||||
const uint32_t f16_inf = convert_float_formats<FormatF16, FormatF32, true>(f32_inf);
|
||||
EXPECT_EQ(f16_inf, f16_inf_expected);
|
||||
|
||||
const uint32_t f11_inf_expected = 0b11111000000;
|
||||
const uint32_t f11_inf = convert_float_formats<FormatF11, FormatF32, true>(f32_inf);
|
||||
EXPECT_EQ(f11_inf, f11_inf_expected);
|
||||
|
||||
const uint32_t f10_inf_expected = 0b1111100000;
|
||||
const uint32_t f10_inf = convert_float_formats<FormatF10, FormatF32, true>(f32_inf);
|
||||
EXPECT_EQ(f10_inf, f10_inf_expected);
|
||||
}
|
||||
|
||||
// TODO: add test case for Nan, Inf, -Inf, Clamping
|
||||
|
||||
} // namespace blender::gpu::tests
|
Loading…
Reference in New Issue