Vulkan: Low Precision Float Conversion #108168
@ -101,64 +101,64 @@ void convert_in_place(void *data, const GPUVertFormat &vertex_format, const uint
|
||||
* \{ */
|
||||
|
||||
/**
|
||||
* Description of a IEEE 754-1985 standard floating point data type.
|
||||
* Description of a IEEE 754-1985 floating point data type.
|
||||
*/
|
||||
template<bool HasSignBit, uint8_t MantissaBitLen, uint8_t ExponentBitLen>
|
||||
class FloatingPointFormat {
|
||||
public:
|
||||
static constexpr bool HasSign = HasSignBit;
|
||||
static constexpr uint8_t SignShift = MantissaBitLen + ExponentBitLen;
|
||||
static constexpr uint32_t SignMask = HasSignBit ? 1 : 0;
|
||||
static constexpr uint8_t MantissaLen = MantissaBitLen;
|
||||
static constexpr uint8_t MantissaShift = 0;
|
||||
static constexpr uint32_t MantissaMask = (1 << MantissaBitLen) - 1;
|
||||
static constexpr uint32_t MantissaNanMask = MantissaMask;
|
||||
static constexpr uint8_t ExponentShift = MantissaBitLen;
|
||||
static constexpr uint8_t ExponentLen = ExponentBitLen;
|
||||
static constexpr uint32_t ExponentMask = (1 << ExponentBitLen) - 1;
|
||||
static constexpr int32_t ExponentBias = (1 << (ExponentBitLen - 1)) - 1;
|
||||
static constexpr int32_t ExponentSpecialMask = ExponentMask;
|
||||
static constexpr bool HAS_SIGN = HasSignBit;
|
||||
static constexpr uint8_t SIGN_SHIFT = MantissaBitLen + ExponentBitLen;
|
||||
static constexpr uint32_t SIGN_MASK = HasSignBit ? 1 : 0;
|
||||
static constexpr uint8_t MANTISSA_LEN = MantissaBitLen;
|
||||
static constexpr uint8_t MANTISSA_SHIFT = 0;
|
||||
static constexpr uint32_t MANTISSA_MASK = (1 << MantissaBitLen) - 1;
|
||||
static constexpr uint32_t MANTISSA_NAN_MASK = MANTISSA_MASK;
|
||||
static constexpr uint8_t EXPONENT_SHIFT = MantissaBitLen;
|
||||
static constexpr uint8_t EXPONENT_LEN = ExponentBitLen;
|
||||
static constexpr uint32_t EXPONENT_MASK = (1 << ExponentBitLen) - 1;
|
||||
static constexpr int32_t EXPONENT_BIAS = (1 << (ExponentBitLen - 1)) - 1;
|
||||
static constexpr int32_t EXPONENT_SPECIAL_MASK = EXPONENT_MASK;
|
||||
|
||||
static uint32_t get_mantissa(uint32_t floating_point_number)
|
||||
{
|
||||
return (floating_point_number >> MantissaShift) & MantissaMask;
|
||||
return (floating_point_number >> MANTISSA_SHIFT) & MANTISSA_MASK;
|
||||
}
|
||||
static uint32_t clear_mantissa(uint32_t floating_point_number)
|
||||
{
|
||||
return floating_point_number & ~(MantissaMask << MantissaShift);
|
||||
return floating_point_number & ~(MANTISSA_MASK << MANTISSA_SHIFT);
|
||||
}
|
||||
static uint32_t set_mantissa(uint32_t mantissa, uint32_t floating_point_number)
|
||||
{
|
||||
uint32_t result = clear_mantissa(floating_point_number);
|
||||
result |= mantissa << MantissaShift;
|
||||
result |= mantissa << MANTISSA_SHIFT;
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint32_t get_exponent(uint32_t floating_point_number)
|
||||
{
|
||||
return ((floating_point_number >> ExponentShift) & ExponentMask);
|
||||
return ((floating_point_number >> EXPONENT_SHIFT) & EXPONENT_MASK);
|
||||
}
|
||||
static uint32_t clear_exponent(uint32_t floating_point_number)
|
||||
{
|
||||
return floating_point_number & ~(ExponentMask << ExponentShift);
|
||||
return floating_point_number & ~(EXPONENT_MASK << EXPONENT_SHIFT);
|
||||
}
|
||||
static uint32_t set_exponent(uint32_t exponent, uint32_t floating_point_number)
|
||||
{
|
||||
uint32_t result = clear_exponent(floating_point_number);
|
||||
result |= (exponent) << ExponentShift;
|
||||
result |= (exponent) << EXPONENT_SHIFT;
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool is_signed(uint32_t floating_point_number)
|
||||
{
|
||||
if constexpr (HasSignBit) {
|
||||
return (floating_point_number >> SignShift) & SignMask;
|
||||
return (floating_point_number >> SIGN_SHIFT) & SIGN_MASK;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
static uint32_t clear_sign(uint32_t floating_point_number)
|
||||
{
|
||||
return floating_point_number & ~(1 << SignShift);
|
||||
return floating_point_number & ~(1 << SIGN_SHIFT);
|
||||
}
|
||||
|
||||
static uint32_t set_sign(bool sign, uint32_t floating_point_number)
|
||||
@ -167,7 +167,7 @@ class FloatingPointFormat {
|
||||
return floating_point_number;
|
||||
}
|
||||
uint32_t result = clear_sign(floating_point_number);
|
||||
result |= uint32_t(sign) << SignShift;
|
||||
result |= uint32_t(sign) << SIGN_SHIFT;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
@ -210,12 +210,12 @@ uint32_t convert_float_formats(uint32_t value)
|
||||
uint32_t mantissa = SourceFormat::get_mantissa(value);
|
||||
int32_t exponent = SourceFormat::get_exponent(value);
|
||||
|
||||
const bool is_nan = (exponent == SourceFormat::ExponentSpecialMask) && mantissa;
|
||||
const bool is_inf = (exponent == SourceFormat::ExponentSpecialMask) && (mantissa == 0);
|
||||
const bool is_nan = (exponent == SourceFormat::EXPONENT_SPECIAL_MASK) && mantissa;
|
||||
const bool is_inf = (exponent == SourceFormat::EXPONENT_SPECIAL_MASK) && (mantissa == 0);
|
||||
const bool is_zero = (exponent == 0 && mantissa == 0);
|
||||
|
||||
/* Sign conversion */
|
||||
if constexpr (!DestinationFormat::HasSign && ClampNegativeToZero) {
|
||||
if constexpr (!DestinationFormat::HAS_SIGN && ClampNegativeToZero) {
|
||||
if (is_signed && !is_nan) {
|
||||
return 0;
|
||||
}
|
||||
@ -225,33 +225,33 @@ uint32_t convert_float_formats(uint32_t value)
|
||||
}
|
||||
|
||||
if (is_inf) {
|
||||
exponent = DestinationFormat::ExponentSpecialMask;
|
||||
exponent = DestinationFormat::EXPONENT_SPECIAL_MASK;
|
||||
}
|
||||
else if (is_nan) {
|
||||
exponent = DestinationFormat::ExponentSpecialMask;
|
||||
mantissa = DestinationFormat::MantissaNanMask;
|
||||
exponent = DestinationFormat::EXPONENT_SPECIAL_MASK;
|
||||
mantissa = DestinationFormat::MANTISSA_NAN_MASK;
|
||||
}
|
||||
else {
|
||||
/* Exponent conversion */
|
||||
exponent -= SourceFormat::ExponentBias;
|
||||
exponent -= SourceFormat::EXPONENT_BIAS;
|
||||
/* Clamping when destination has lower precision. */
|
||||
if constexpr (SourceFormat::ExponentLen > DestinationFormat::ExponentLen) {
|
||||
if (exponent > DestinationFormat::ExponentBias) {
|
||||
if constexpr (SourceFormat::EXPONENT_LEN > DestinationFormat::EXPONENT_LEN) {
|
||||
if (exponent > DestinationFormat::EXPONENT_BIAS) {
|
||||
exponent = 0;
|
||||
mantissa = SourceFormat::MantissaMask;
|
||||
mantissa = SourceFormat::MANTISSA_MASK;
|
||||
}
|
||||
else if (exponent < -DestinationFormat::ExponentBias) {
|
||||
else if (exponent < -DestinationFormat::EXPONENT_BIAS) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
exponent += DestinationFormat::ExponentBias;
|
||||
exponent += DestinationFormat::EXPONENT_BIAS;
|
||||
|
||||
/* Mantissa conversion */
|
||||
if constexpr (SourceFormat::MantissaLen > DestinationFormat::MantissaLen) {
|
||||
mantissa = mantissa >> (SourceFormat::MantissaLen - DestinationFormat::MantissaLen);
|
||||
if constexpr (SourceFormat::MANTISSA_LEN > DestinationFormat::MANTISSA_LEN) {
|
||||
mantissa = mantissa >> (SourceFormat::MANTISSA_LEN - DestinationFormat::MANTISSA_LEN);
|
||||
}
|
||||
else if constexpr (SourceFormat::MantissaLen < DestinationFormat::MantissaLen) {
|
||||
mantissa = mantissa << (DestinationFormat::MantissaLen - SourceFormat::MantissaLen);
|
||||
else if constexpr (SourceFormat::MANTISSA_LEN < DestinationFormat::MANTISSA_LEN) {
|
||||
mantissa = mantissa << (DestinationFormat::MANTISSA_LEN - SourceFormat::MANTISSA_LEN);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user