3 changed files with 93 additions and 88 deletions
--- a/source/blender/gpu/vulkan/vk_data_conversion.cc
+++ b/source/blender/gpu/vulkan/vk_data_conversion.cc
@ -9,8 +9,6 @@

 #include "BLI_color.hh"

-#include "Imath/half.h"
-
 namespace blender::gpu {

 /* -------------------------------------------------------------------- */
@ -549,6 +547,26 @@ static ConversionType reversed(ConversionType type)
 /** \name Data Conversion
 * \{ */

+static uint32_t float_to_uint32_t(float value)
+{
+  union {
+    float fl;
+    uint32_t u;
+  } float_to_bits;
+  float_to_bits.fl = value;
+  return float_to_bits.u;
+}
+
+static float uint32_t_to_float(uint32_t value)
+{
+  union {
+    float fl;
+    uint32_t u;
+  } float_to_bits;
+  float_to_bits.u = value;
+  return float_to_bits.fl;
+}
+
 template<typename InnerType> struct ComponentValue {
  InnerType value;
 };
@ -681,12 +699,12 @@ void convert(DestinationType &dst, const SourceType &src)

 static void convert(F16 &dst, const F32 &src)
 {
-  dst.value = imath_float_to_half(src.value);
+  dst.value = convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value));
 }

 static void convert(F32 &dst, const F16 &src)
 {
-  dst.value = imath_half_to_float(src.value);
+  dst.value = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.value));
 }

 static void convert(SRGBA8 &dst, const FLOAT4 &src)
@ -705,41 +723,21 @@ constexpr uint8_t SHIFT_B = 22;
 constexpr uint8_t SHIFT_G = 11;
 constexpr uint8_t SHIFT_R = 0;

-static uint32_t float_to_uint32_t(float value)
-{
-  union {
-    float fl;
-    uint32_t u;
-  } float_to_bits;
-  float_to_bits.fl = value;
-  return float_to_bits.u;
-}
-
-static float uint32_t_to_float(uint32_t value)
-{
-  union {
-    float fl;
-    uint32_t u;
-  } float_to_bits;
-  float_to_bits.u = value;
-  return float_to_bits.fl;
-}
-
 static void convert(FLOAT3 &dst, const B10F_G11G_R11F &src)
 {
  dst.value.x = uint32_t_to_float(
-      convert_float_formats<Format32F, Format11F>((src.value >> SHIFT_R) && MASK_11_BITS));
+      convert_float_formats<FormatF32, FormatF11>((src.value >> SHIFT_R) && MASK_11_BITS));
  dst.value.y = uint32_t_to_float(
-      convert_float_formats<Format32F, Format11F>((src.value >> SHIFT_G) && MASK_11_BITS));
+      convert_float_formats<FormatF32, FormatF11>((src.value >> SHIFT_G) && MASK_11_BITS));
  dst.value.z = uint32_t_to_float(
-      convert_float_formats<Format32F, Format11F>((src.value >> SHIFT_B) && MASK_10_BITS));
+      convert_float_formats<FormatF32, FormatF10>((src.value >> SHIFT_B) && MASK_10_BITS));
 }

 static void convert(B10F_G11G_R11F &dst, const FLOAT3 &src)
 {
-  uint32_t r = convert_float_formats<Format11F, Format32F>(float_to_uint32_t(src.value.x));
-  uint32_t g = convert_float_formats<Format11F, Format32F>(float_to_uint32_t(src.value.y));
-  uint32_t b = convert_float_formats<Format10F, Format32F>(float_to_uint32_t(src.value.z));
+  uint32_t r = convert_float_formats<FormatF11, FormatF32>(float_to_uint32_t(src.value.x));
+  uint32_t g = convert_float_formats<FormatF11, FormatF32>(float_to_uint32_t(src.value.y));
+  uint32_t b = convert_float_formats<FormatF10, FormatF32>(float_to_uint32_t(src.value.z));
  dst.value = r << SHIFT_R | g << SHIFT_G | b << SHIFT_B;
 }

--- a/source/blender/gpu/vulkan/vk_data_conversion.hh
+++ b/source/blender/gpu/vulkan/vk_data_conversion.hh
@ -100,6 +100,9 @@ void convert_in_place(void *data, const GPUVertFormat &vertex_format, const uint
 /** \name Floating point conversions
 * \{ */

+/**
+ * Description of a IEEE 754-1985 standard floating point data type.
+ */
 template<bool HasSignBit, uint8_t MantissaBitLen, uint8_t ExponentBitLen>
 class FloatingPointFormat {
 public:
@ -114,49 +117,49 @@ class FloatingPointFormat {
  static constexpr uint32_t SignMask = HasSignBit ? 1 : 0;
  static constexpr uint32_t ExponentBias = (1 << (ExponentBitLen - 1)) - 1;

-  uint32_t get_mantissa(uint32_t floating_point_number)
+  static uint32_t get_mantissa(uint32_t floating_point_number)
  {
    return (floating_point_number >> MantissaShift) & MantissaMask;
  }
-  uint32_t clear_mantissa(uint32_t floating_point_number)
+  static uint32_t clear_mantissa(uint32_t floating_point_number)
  {
    return floating_point_number & ~(MantissaMask << MantissaShift);
  }
-  uint32_t set_mantissa(uint32_t mantissa, uint32_t floating_point_number)
+  static uint32_t set_mantissa(uint32_t mantissa, uint32_t floating_point_number)
  {
    uint32_t result = clear_mantissa(floating_point_number);
    result |= mantissa << MantissaShift;
    return result;
  }

-  uint32_t get_exponent(uint32_t floating_point_number)
+  static uint32_t get_exponent(uint32_t floating_point_number)
  {
    return ((floating_point_number >> ExponentShift) & ExponentMask);
  }
-  uint32_t clear_exponent(uint32_t floating_point_number)
+  static uint32_t clear_exponent(uint32_t floating_point_number)
  {
    return floating_point_number & ~(ExponentMask << ExponentShift);
  }
-  uint32_t set_exponent(uint32_t exponent, uint32_t floating_point_number)
+  static uint32_t set_exponent(uint32_t exponent, uint32_t floating_point_number)
  {
    uint32_t result = clear_exponent(floating_point_number);
    result |= (exponent) << ExponentShift;
    return result;
  }

-  bool is_signed(uint32_t floating_point_number)
+  static bool is_signed(uint32_t floating_point_number)
  {
    if constexpr (HasSignBit) {
      return (floating_point_number >> SignShift) & SignMask;
    }
    return false;
  }
-  uint32_t clear_sign(uint32_t floating_point_number)
+  static uint32_t clear_sign(uint32_t floating_point_number)
  {
    return floating_point_number & ~(1 << SignShift);
  }

-  uint32_t set_sign(bool sign, uint32_t floating_point_number)
+  static uint32_t set_sign(bool sign, uint32_t floating_point_number)
  {
    if constexpr (HasSignBit) {
      return floating_point_number;
@ -167,39 +170,52 @@ class FloatingPointFormat {
  }
 };

-using Format32F = FloatingPointFormat<true, 23, 8>;
-using Format16F = FloatingPointFormat<true, 10, 5>;
-using Format11F = FloatingPointFormat<false, 6, 5>;
-using Format10F = FloatingPointFormat<false, 5, 5>;
+using FormatF32 = FloatingPointFormat<true, 23, 8>;
+using FormatF16 = FloatingPointFormat<true, 10, 5>;
+using FormatF11 = FloatingPointFormat<false, 6, 5>;
+using FormatF10 = FloatingPointFormat<false, 5, 5>;

-template<typename DestinationFormat, typename SourceFormat>
+/**
+ * Convert between low precision floating (including 32 bit floats).
+ *
+ * The input and output values are bits (uint32_t) as this function does a bit-wise operations to
+ * convert between the formats. Additional conversion rules can be applied to the conversion
+ * function. Due to the implementation the compiler would make an optimized version depending on
+ * the actual possibilities.
+ *
+ * NOTE: Implementation should be extended to support Nan, Inf, -Inf and clamping to min/max when
+ * values don't fit in the destination.
+ */
+template<
+    /**
+     * FloatingPointFormat of the the value that is converted to.
+     */
+    typename DestinationFormat,
+
+    /**
+     * FloatingPointFormat of the the value that is converted from.
+     */
+    typename SourceFormat,
+
+    /**
+     * Should negative values be clamped to zero when DestinationFormat doesn't contain a sign
+     * bit.
+     *
+     * When set to `false` and DestinationFormat doesn't contain a sign bit the value will be
+     * made absolute.
+     */
+    bool ClampNegativeToZero = true>
 uint32_t convert_float_formats(uint32_t value)
 {
-  SourceFormat src_format;
-  DestinationFormat dst_format;
-  /*
-  printf("Source MS:%d MM:%x ES:%d EM:%x EB:%x\n",
-         SourceFormat::MantissaShift,
-         SourceFormat::MantissaMask,
-         SourceFormat::ExponentShift,
-         SourceFormat::ExponentMask,
-         SourceFormat::ExponentBias);
-  printf("Destination MS:%d MM:%x ES:%d EM:%x EB:%x\n",
-         DestinationFormat::MantissaShift,
-         DestinationFormat::MantissaMask,
-         DestinationFormat::ExponentShift,
-         DestinationFormat::ExponentMask,
-         DestinationFormat::ExponentBias);
-         */
+  bool is_signed = SourceFormat::is_signed(value);
+  uint32_t mantissa = SourceFormat::get_mantissa(value);
+  int32_t exponent = SourceFormat::get_exponent(value);

-  bool is_signed = src_format.is_signed(value);
-  uint32_t mantissa = src_format.get_mantissa(value);
-  int32_t exponent = src_format.get_exponent(value);
-  printf("src:%x S:%d, M:%x E:%x\n", value, is_signed, mantissa, exponent);
  /* Sign conversion */
-  if (is_signed && !DestinationFormat::HasSign) {
-    // NOTE: we clamp to zero.
-    return 0;
+  if constexpr (!DestinationFormat::HasSign && ClampNegativeToZero) {
+    if (is_signed) {
+      return 0;
+    }
  }

  /* Mantissa conversion */
@ -211,28 +227,16 @@ uint32_t convert_float_formats(uint32_t value)
  }

  /* Exponent conversion */
-  const bool is_denormalized = exponent == 0;
-  if (!is_denormalized) {
-    exponent -= SourceFormat::ExponentBias;
-    /*
-    if constexpr (SourceFormat::ExponentLen > DestinationFormat::ExponentLen) {
-      exponent = exponent >> (SourceFormat::ExponentLen - DestinationFormat::ExponentLen);
-    }
-    else if constexpr (SourceFormat::ExponentLen < DestinationFormat::ExponentLen) {
-      exponent = exponent << (DestinationFormat::ExponentLen - SourceFormat::ExponentLen);
-    }
-    */
-    exponent += DestinationFormat::ExponentBias;
-  }
+  exponent += DestinationFormat::ExponentBias - SourceFormat::ExponentBias;
+  // TODO: Clamp to min/max value? only when Destination::ExponentBias <
+  // SourceFormat::ExponentBias.

  uint32_t result = 0;
-  result = dst_format.set_sign(is_signed, result);
-  result = dst_format.set_exponent(exponent, result);
-  result = dst_format.set_mantissa(mantissa, result);
-  printf("dst:%x S:%d, M:%x E:%x\n", result, is_signed, mantissa, exponent);
+  result = DestinationFormat::set_sign(is_signed, result);
+  result = DestinationFormat::set_exponent(exponent, result);
+  result = DestinationFormat::set_mantissa(mantissa, result);
  return result;
 }

 /* \} */
-
 };  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_data_conversion_test.cc
+++ b/source/blender/gpu/vulkan/vk_data_conversion_test.cc
@ -7,17 +7,20 @@ TEST(VulkanDataConversion, ConvertF32F16)
 {
  uint32_t f32_2 = 0b01000000000000000000000000000000;
  uint32_t f16_2_expected = 0b0100000000000000;
-  uint32_t f16_2 = convert_float_formats<Format16F, Format32F>(f32_2);
+  uint32_t f16_2 = convert_float_formats<FormatF16, FormatF32>(f32_2);
  EXPECT_EQ(f16_2, f16_2_expected);

  uint32_t f32_3 = 0b01000000010000000000000000000000;
  uint32_t f16_3_expected = 0b0100001000000000;
-  uint32_t f16_3 = convert_float_formats<Format16F, Format32F>(f32_3);
+  uint32_t f16_3 = convert_float_formats<FormatF16, FormatF32>(f32_3);
  EXPECT_EQ(f16_3, f16_3_expected);

  uint32_t f32_4 = 0b01000000100000000000000000000000;
  uint32_t f16_4_expected = 0b0100010000000000;
-  uint32_t f16_4 = convert_float_formats<Format16F, Format32F>(f32_4);
+  uint32_t f16_4 = convert_float_formats<FormatF16, FormatF32>(f32_4);
  EXPECT_EQ(f16_4, f16_4_expected);
 }
+
+// TODO: add test case for Nan, Inf, -Inf, Clamping
+
 }  // namespace blender::gpu::tests