From 418cb6f797c8e6c7587ddb48bacf60865608bf13 Mon Sep 17 00:00:00 2001
From: Jeroen Bakker <jeroen@blender.org>
Date: Tue, 23 May 2023 09:50:16 +0200
Subject: [PATCH 1/3] Vulkan: Low Precision Float Conversion

This PR adds conversion template to convert between Low Precision float
formats. These include Binary32 floats and lower. It also adds support
to convert between unsigned and signed float formats and float formats
with different mantissa and exponents.

Additionally overflows (values that don't fit in the target float
format) will be clamped to the maximum value.

Reasoning:
Up to now the Vulkan backend only supported float and half float
formats, but to support workbench 11 and 10 unsigned floats have to be
supported as well. The available libraries that support those float
formats targets scientific applications. Where the final code couldn't
be optimized that well by the compiler.

Data conversion for color pixels have different requirements about
clamping and sign, what could eliminate some clamping code in other
areas in Blender as well. Also could fix some indesired clamping when
using pixels with high intensity that didn't fit in the texture format
leading to artifects in Eevee and slow-down in the image editor.
---
 CMakeLists.txt                                |   3 +-
 source/blender/gpu/CMakeLists.txt             |   8 +-
 source/blender/gpu/tests/texture_test.cc      |   6 +-
 .../blender/gpu/vulkan/vk_data_conversion.cc  |  72 +++++++-
 .../blender/gpu/vulkan/vk_data_conversion.hh  | 167 ++++++++++++++++++
 .../gpu/vulkan/vk_data_conversion_test.cc     | 100 +++++++++++
 6 files changed, 338 insertions(+), 18 deletions(-)
 create mode 100644 source/blender/gpu/vulkan/vk_data_conversion_test.cc

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 312a073288f..3fb863606f5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -900,10 +900,9 @@ set_and_warn_dependency(WITH_TBB WITH_MOD_FLUID         OFF)
 # NanoVDB requires OpenVDB to convert the data structure
 set_and_warn_dependency(WITH_OPENVDB WITH_NANOVDB       OFF)
 
-# OpenVDB, Alembic and Vulkan, OSL uses 'half' or 'imath' from OpenEXR
+# OpenVDB, Alembic and OSL uses 'half' or 'imath' from OpenEXR
 set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_OPENVDB OFF)
 set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_ALEMBIC OFF)
-set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_VULKAN_BACKEND OFF)
 set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_CYCLES_OSL OFF)
 
 # auto enable openimageio for cycles
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index ebbb267f63f..df7d8ff389c 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -52,7 +52,6 @@ endif()
 
 set(INC_SYS
   ${Epoxy_INCLUDE_DIRS}
-  ${IMATH_INCLUDE_DIR}
 )
 
 set(SRC
@@ -354,11 +353,6 @@ if(WITH_VULKAN_BACKEND)
   )
 
   add_definitions(-DWITH_VULKAN_BACKEND)
-  if(WIN32)
-    if(EXISTS ${LIBDIR}/imath/bin/imath.dll)
-      add_definitions(-DIMATH_DLL)
-    endif()
-  endif()
 endif()
 
 if(WITH_VULKAN_GUARDEDALLOC)
@@ -816,7 +810,6 @@ if(WITH_GPU_BUILDTIME_SHADER_BUILDER)
     bf_blenlib
     bf_intern_ghost
     ${PLATFORM_LINKLIBS}
-    ${IMATH_LIBRARIES}
   )
   target_include_directories(shader_builder PRIVATE ${INC} ${CMAKE_CURRENT_BINARY_DIR})
 
@@ -862,6 +855,7 @@ if(WITH_GTESTS)
     if(WITH_VULKAN_BACKEND)
       list(APPEND TEST_SRC
         tests/memory_layout_test.cc
+        vulkan/vk_data_conversion_test.cc
       )
     endif()
 
diff --git a/source/blender/gpu/tests/texture_test.cc b/source/blender/gpu/tests/texture_test.cc
index 785f37fbcac..a7f4364ba52 100644
--- a/source/blender/gpu/tests/texture_test.cc
+++ b/source/blender/gpu/tests/texture_test.cc
@@ -254,13 +254,13 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2UI()
   texture_create_upload_read_with_bias<GPU_RGB10_A2UI, GPU_DATA_FLOAT>(0.0f);
 }
 GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2UI);
+#endif
 
 static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R11F_G11F_B10F()
 {
-  texture_create_upload_read_with_bias<GPU_R11F_G11F_B10F, GPU_DATA_FLOAT>(0.0f);
+  texture_create_upload_read_with_bias<GPU_R11F_G11F_B10F, GPU_DATA_FLOAT>(0.0009f);
 }
 GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R11F_G11F_B10F);
-#endif
 
 static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8()
 {
@@ -403,13 +403,13 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F()
 GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F);
 #endif
 
+#if RUN_COMPONENT_UNIMPLEMENTED
 static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT24()
 {
   texture_create_upload_read_with_bias<GPU_DEPTH_COMPONENT24, GPU_DATA_FLOAT>(0.0000001f);
 }
 GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT24);
 
-#if RUN_COMPONENT_UNIMPLEMENTED
 static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT16()
 {
   texture_create_upload_read_with_bias<GPU_DEPTH_COMPONENT16, GPU_DATA_FLOAT>(0.0f);
diff --git a/source/blender/gpu/vulkan/vk_data_conversion.cc b/source/blender/gpu/vulkan/vk_data_conversion.cc
index e31fd3845ff..e0f9a56e178 100644
--- a/source/blender/gpu/vulkan/vk_data_conversion.cc
+++ b/source/blender/gpu/vulkan/vk_data_conversion.cc
@@ -9,8 +9,6 @@
 
 #include "BLI_color.hh"
 
-#include "Imath/half.h"
-
 namespace blender::gpu {
 
 /* -------------------------------------------------------------------- */
@@ -55,6 +53,9 @@ enum class ConversionType {
   FLOAT_TO_DEPTH_COMPONENT24,
   DEPTH_COMPONENT24_TO_FLOAT,
 
+  FLOAT_TO_B10F_G11F_R11F,
+  B10F_G11F_R11F_TO_FLOAT,
+
   /**
    * The requested conversion isn't supported.
    */
@@ -104,6 +105,9 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format)
     case GPU_DEPTH_COMPONENT24:
       return ConversionType::FLOAT_TO_DEPTH_COMPONENT24;
 
+    case GPU_R11F_G11F_B10F:
+      return ConversionType::FLOAT_TO_B10F_G11F_R11F;
+
     case GPU_RGB32F: /* GPU_RGB32F Not supported by vendors. */
     case GPU_RGBA8UI:
     case GPU_RGBA8I:
@@ -125,7 +129,6 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format)
     case GPU_R32I:
     case GPU_RGB10_A2:
     case GPU_RGB10_A2UI:
-    case GPU_R11F_G11F_B10F:
     case GPU_DEPTH32F_STENCIL8:
     case GPU_DEPTH24_STENCIL8:
     case GPU_RGB8UI:
@@ -526,6 +529,7 @@ static ConversionType reversed(ConversionType type)
       CASE_PAIR(FLOAT, HALF)
       CASE_PAIR(FLOAT, SRGBA8)
       CASE_PAIR(FLOAT, DEPTH_COMPONENT24)
+      CASE_PAIR(FLOAT, B10F_G11F_R11F)
 
     case ConversionType::UNSUPPORTED:
       return ConversionType::UNSUPPORTED;
@@ -543,6 +547,26 @@ static ConversionType reversed(ConversionType type)
 /** \name Data Conversion
  * \{ */
 
+static uint32_t float_to_uint32_t(float value)
+{
+  union {
+    float fl;
+    uint32_t u;
+  } float_to_bits;
+  float_to_bits.fl = value;
+  return float_to_bits.u;
+}
+
+static float uint32_t_to_float(uint32_t value)
+{
+  union {
+    float fl;
+    uint32_t u;
+  } float_to_bits;
+  float_to_bits.u = value;
+  return float_to_bits.fl;
+}
+
 template<typename InnerType> struct ComponentValue {
   InnerType value;
 };
@@ -559,7 +583,10 @@ using I32 = ComponentValue<int32_t>;
 using F32 = ComponentValue<float>;
 using F16 = ComponentValue<uint16_t>;
 using SRGBA8 = PixelValue<ColorSceneLinearByteEncoded4b<eAlpha::Premultiplied>>;
+using FLOAT3 = PixelValue<float3>;
 using FLOAT4 = PixelValue<ColorSceneLinear4f<eAlpha::Premultiplied>>;
+/* NOTE: Vulkan stores R11_G11_B10 in reverse component order. */
+class B10F_G11G_R11F : public PixelValue<uint32_t> {};
 
 class DepthComponent24 : public ComponentValue<uint32_t> {
  public:
@@ -672,12 +699,12 @@ void convert(DestinationType &dst, const SourceType &src)
 
 static void convert(F16 &dst, const F32 &src)
 {
-  dst.value = imath_float_to_half(src.value);
+  dst.value = convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value));
 }
 
 static void convert(F32 &dst, const F16 &src)
 {
-  dst.value = imath_half_to_float(src.value);
+  dst.value = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.value));
 }
 
 static void convert(SRGBA8 &dst, const FLOAT4 &src)
@@ -690,6 +717,30 @@ static void convert(FLOAT4 &dst, const SRGBA8 &src)
   dst.value = src.value.decode();
 }
 
+constexpr uint32_t MASK_10_BITS = 0b1111111111;
+constexpr uint32_t MASK_11_BITS = 0b11111111111;
+constexpr uint8_t SHIFT_B = 22;
+constexpr uint8_t SHIFT_G = 11;
+constexpr uint8_t SHIFT_R = 0;
+
+static void convert(FLOAT3 &dst, const B10F_G11G_R11F &src)
+{
+  dst.value.x = uint32_t_to_float(
+      convert_float_formats<FormatF32, FormatF11>((src.value >> SHIFT_R) & MASK_11_BITS));
+  dst.value.y = uint32_t_to_float(
+      convert_float_formats<FormatF32, FormatF11>((src.value >> SHIFT_G) & MASK_11_BITS));
+  dst.value.z = uint32_t_to_float(
+      convert_float_formats<FormatF32, FormatF10>((src.value >> SHIFT_B) & MASK_10_BITS));
+}
+
+static void convert(B10F_G11G_R11F &dst, const FLOAT3 &src)
+{
+  uint32_t r = convert_float_formats<FormatF11, FormatF32>(float_to_uint32_t(src.value.x));
+  uint32_t g = convert_float_formats<FormatF11, FormatF32>(float_to_uint32_t(src.value.y));
+  uint32_t b = convert_float_formats<FormatF10, FormatF32>(float_to_uint32_t(src.value.z));
+  dst.value = r << SHIFT_R | g << SHIFT_G | b << SHIFT_B;
+}
+
 /* \} */
 
 template<typename DestinationType, typename SourceType>
@@ -829,6 +880,14 @@ static void convert_buffer(void *dst_memory,
       convert_per_component<F32, UnsignedNormalized<DepthComponent24>>(
           dst_memory, src_memory, buffer_size, device_format);
       break;
+
+    case ConversionType::FLOAT_TO_B10F_G11F_R11F:
+      convert_per_pixel<B10F_G11G_R11F, FLOAT3>(dst_memory, src_memory, buffer_size);
+      break;
+
+    case ConversionType::B10F_G11F_R11F_TO_FLOAT:
+      convert_per_pixel<FLOAT3, B10F_G11G_R11F>(dst_memory, src_memory, buffer_size);
+      break;
   }
 }
 
@@ -876,7 +935,8 @@ void convert_device_to_host(void *dst_buffer,
                             eGPUTextureFormat device_format)
 {
   ConversionType conversion_type = reversed(host_to_device(host_format, device_format));
-  BLI_assert(conversion_type != ConversionType::UNSUPPORTED);
+  BLI_assert_msg(conversion_type != ConversionType::UNSUPPORTED,
+                 "Data conversion between host_format and device_format isn't supported (yet).");
   convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type);
 }
 
diff --git a/source/blender/gpu/vulkan/vk_data_conversion.hh b/source/blender/gpu/vulkan/vk_data_conversion.hh
index 1b5405f04c1..971148d216f 100644
--- a/source/blender/gpu/vulkan/vk_data_conversion.hh
+++ b/source/blender/gpu/vulkan/vk_data_conversion.hh
@@ -96,4 +96,171 @@ bool conversion_needed(const GPUVertFormat &vertex_format);
  */
 void convert_in_place(void *data, const GPUVertFormat &vertex_format, const uint vertex_len);
 
+/* -------------------------------------------------------------------- */
+/** \name Floating point conversions
+ * \{ */
+
+/**
+ * Description of a IEEE 754-1985 standard floating point data type.
+ */
+template<bool HasSignBit, uint8_t MantissaBitLen, uint8_t ExponentBitLen>
+class FloatingPointFormat {
+ public:
+  static constexpr bool HasSign = HasSignBit;
+  static constexpr uint8_t SignShift = MantissaBitLen + ExponentBitLen;
+  static constexpr uint32_t SignMask = HasSignBit ? 1 : 0;
+  static constexpr uint8_t MantissaLen = MantissaBitLen;
+  static constexpr uint8_t MantissaShift = 0;
+  static constexpr uint32_t MantissaMask = (1 << MantissaBitLen) - 1;
+  static constexpr uint32_t MantissaNanMask = MantissaMask;
+  static constexpr uint8_t ExponentShift = MantissaBitLen;
+  static constexpr uint8_t ExponentLen = ExponentBitLen;
+  static constexpr uint32_t ExponentMask = (1 << ExponentBitLen) - 1;
+  static constexpr int32_t ExponentBias = (1 << (ExponentBitLen - 1)) - 1;
+  static constexpr int32_t ExponentSpecialMask = ExponentMask;
+
+  static uint32_t get_mantissa(uint32_t floating_point_number)
+  {
+    return (floating_point_number >> MantissaShift) & MantissaMask;
+  }
+  static uint32_t clear_mantissa(uint32_t floating_point_number)
+  {
+    return floating_point_number & ~(MantissaMask << MantissaShift);
+  }
+  static uint32_t set_mantissa(uint32_t mantissa, uint32_t floating_point_number)
+  {
+    uint32_t result = clear_mantissa(floating_point_number);
+    result |= mantissa << MantissaShift;
+    return result;
+  }
+
+  static uint32_t get_exponent(uint32_t floating_point_number)
+  {
+    return ((floating_point_number >> ExponentShift) & ExponentMask);
+  }
+  static uint32_t clear_exponent(uint32_t floating_point_number)
+  {
+    return floating_point_number & ~(ExponentMask << ExponentShift);
+  }
+  static uint32_t set_exponent(uint32_t exponent, uint32_t floating_point_number)
+  {
+    uint32_t result = clear_exponent(floating_point_number);
+    result |= (exponent) << ExponentShift;
+    return result;
+  }
+
+  static bool is_signed(uint32_t floating_point_number)
+  {
+    if constexpr (HasSignBit) {
+      return (floating_point_number >> SignShift) & SignMask;
+    }
+    return false;
+  }
+  static uint32_t clear_sign(uint32_t floating_point_number)
+  {
+    return floating_point_number & ~(1 << SignShift);
+  }
+
+  static uint32_t set_sign(bool sign, uint32_t floating_point_number)
+  {
+    if constexpr (!HasSignBit) {
+      return floating_point_number;
+    }
+    uint32_t result = clear_sign(floating_point_number);
+    result |= uint32_t(sign) << SignShift;
+    return result;
+  }
+};
+
+using FormatF32 = FloatingPointFormat<true, 23, 8>;
+using FormatF16 = FloatingPointFormat<true, 10, 5>;
+using FormatF11 = FloatingPointFormat<false, 6, 5>;
+using FormatF10 = FloatingPointFormat<false, 5, 5>;
+
+/**
+ * Convert between low precision floating (including 32 bit floats).
+ *
+ * The input and output values are bits (uint32_t) as this function does a bit-wise operations to
+ * convert between the formats. Additional conversion rules can be applied to the conversion
+ * function. Due to the implementation the compiler would make an optimized version depending on
+ * the actual possibilities.
+ */
+template<
+    /**
+     * FloatingPointFormat of the the value that is converted to.
+     */
+    typename DestinationFormat,
+
+    /**
+     * FloatingPointFormat of the the value that is converted from.
+     */
+    typename SourceFormat,
+
+    /**
+     * Should negative values be clamped to zero when DestinationFormat doesn't contain a sign
+     * bit. Also -Inf will be clamped to zero.
+     *
+     * When set to `false` and DestinationFormat doesn't contain a sign bit the value will be
+     * made absolute.
+     */
+    bool ClampNegativeToZero = true>
+uint32_t convert_float_formats(uint32_t value)
+{
+  bool is_signed = SourceFormat::is_signed(value);
+  uint32_t mantissa = SourceFormat::get_mantissa(value);
+  int32_t exponent = SourceFormat::get_exponent(value);
+
+  const bool is_nan = (exponent == SourceFormat::ExponentSpecialMask) && mantissa;
+  const bool is_inf = (exponent == SourceFormat::ExponentSpecialMask) && (mantissa == 0);
+  const bool is_zero = (exponent == 0 && mantissa == 0);
+
+  /* Sign conversion */
+  if constexpr (!DestinationFormat::HasSign && ClampNegativeToZero) {
+    if (is_signed && !is_nan) {
+      return 0;
+    }
+  }
+  if (is_zero) {
+    return 0;
+  }
+
+  if (is_inf) {
+    exponent = DestinationFormat::ExponentSpecialMask;
+  }
+  else if (is_nan) {
+    exponent = DestinationFormat::ExponentSpecialMask;
+    mantissa = DestinationFormat::MantissaNanMask;
+  }
+  else {
+    /* Exponent conversion */
+    exponent -= SourceFormat::ExponentBias;
+    /* Clamping when destination has lower precision. */
+    if constexpr (SourceFormat::ExponentLen > DestinationFormat::ExponentLen) {
+      if (exponent > DestinationFormat::ExponentBias) {
+        exponent = 0;
+        mantissa = SourceFormat::MantissaMask;
+      }
+      else if (exponent < -DestinationFormat::ExponentBias) {
+        return 0;
+      }
+    }
+    exponent += DestinationFormat::ExponentBias;
+
+    /* Mantissa conversion */
+    if constexpr (SourceFormat::MantissaLen > DestinationFormat::MantissaLen) {
+      mantissa = mantissa >> (SourceFormat::MantissaLen - DestinationFormat::MantissaLen);
+    }
+    else if constexpr (SourceFormat::MantissaLen < DestinationFormat::MantissaLen) {
+      mantissa = mantissa << (DestinationFormat::MantissaLen - SourceFormat::MantissaLen);
+    }
+  }
+
+  uint32_t result = 0;
+  result = DestinationFormat::set_sign(is_signed, result);
+  result = DestinationFormat::set_exponent(exponent, result);
+  result = DestinationFormat::set_mantissa(mantissa, result);
+  return result;
+}
+
+/* \} */
 };  // namespace blender::gpu
diff --git a/source/blender/gpu/vulkan/vk_data_conversion_test.cc b/source/blender/gpu/vulkan/vk_data_conversion_test.cc
new file mode 100644
index 00000000000..33ebcad3874
--- /dev/null
+++ b/source/blender/gpu/vulkan/vk_data_conversion_test.cc
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+#include "testing/testing.h"
+
+#include "vk_data_conversion.hh"
+
+namespace blender::gpu::tests {
+static void test_f32_f16(uint32_t f32_in, uint32_t f16_expected)
+{
+  const uint32_t f16 = convert_float_formats<FormatF16, FormatF32>(f32_in);
+  EXPECT_EQ(f16, f16_expected);
+  const uint32_t f32_reverse = convert_float_formats<FormatF32, FormatF16>(f16);
+  EXPECT_EQ(f32_reverse, f32_in);
+}
+
+TEST(VulkanDataConversion, ConvertF32F16)
+{
+  /* 0.0 */
+  test_f32_f16(0b00000000000000000000000000000000, 0b0000000000000000);
+  /* 0.125 */
+  test_f32_f16(0b00111110000000000000000000000000, 0b0011000000000000);
+  /* 2.0 */
+  test_f32_f16(0b01000000000000000000000000000000, 0b0100000000000000);
+  /* 3.0 */
+  test_f32_f16(0b01000000010000000000000000000000, 0b0100001000000000);
+  /* 4.0 */
+  test_f32_f16(0b01000000100000000000000000000000, 0b0100010000000000);
+}
+
+TEST(VulkanDataConversion, clamp_negative_to_zero)
+{
+  const uint32_t f32_2 = 0b11000000000000000000000000000000;
+  const uint32_t f32_inf_min = 0b11111111100000000000000000000000;
+  const uint32_t f32_inf_max = 0b01111111100000000000000000000000;
+  const uint32_t f32_nan = 0b11111111111111111111111111111111;
+
+  /* F32(-2) fits in F16. */
+  const uint32_t f16_2_expected = 0b1100000000000000;
+  const uint32_t f16_2a = convert_float_formats<FormatF16, FormatF32, true>(f32_2);
+  EXPECT_EQ(f16_2a, f16_2_expected);
+
+  const uint32_t f16_2b = convert_float_formats<FormatF16, FormatF32, false>(f32_2);
+  EXPECT_EQ(f16_2b, f16_2_expected);
+
+  /* F32(-2) doesn't fit in F11 as F11 only supports unsigned values. Clamp to zero. */
+  const uint32_t f11_0_expected = 0b00000000000;
+  const uint32_t f11_2_expected = 0b10000000000;
+  const uint32_t f11_inf_expected = 0b11111000000;
+  const uint32_t f11_nan_expected = 0b11111111111;
+  {
+    const uint32_t f11_0 = convert_float_formats<FormatF11, FormatF32, true>(f32_2);
+    EXPECT_EQ(f11_0, f11_0_expected);
+    const uint32_t f11_0b = convert_float_formats<FormatF11, FormatF32, true>(f32_inf_min);
+    EXPECT_EQ(f11_0b, f11_0_expected);
+    const uint32_t f11_inf = convert_float_formats<FormatF11, FormatF32, true>(f32_inf_max);
+    EXPECT_EQ(f11_inf, f11_inf_expected);
+    const uint32_t f11_nan = convert_float_formats<FormatF11, FormatF32, true>(f32_nan);
+    EXPECT_EQ(f11_nan, f11_nan_expected);
+  }
+
+  /* F32(-2) doesn't fit in F11 as F11 only supports unsigned values. Make absolute. */
+  {
+    const uint32_t f11_2 = convert_float_formats<FormatF11, FormatF32, false>(f32_2);
+    EXPECT_EQ(f11_2, f11_2_expected);
+    const uint32_t f11_inf = convert_float_formats<FormatF11, FormatF32, false>(f32_inf_min);
+    EXPECT_EQ(f11_inf, f11_inf_expected);
+    const uint32_t f11_infb = convert_float_formats<FormatF11, FormatF32, false>(f32_inf_max);
+    EXPECT_EQ(f11_infb, f11_inf_expected);
+    const uint32_t f11_nan = convert_float_formats<FormatF11, FormatF32, false>(f32_nan);
+    EXPECT_EQ(f11_nan, f11_nan_expected);
+  }
+}
+
+TEST(VulkanDataConversion, infinity_upper)
+{
+  const uint32_t f32_inf = 0b01111111100000000000000000000000;
+
+  const uint32_t f16_inf_expected = 0b0111110000000000;
+  const uint32_t f16_inf = convert_float_formats<FormatF16, FormatF32, true>(f32_inf);
+  EXPECT_EQ(f16_inf, f16_inf_expected);
+
+  const uint32_t f11_inf_expected = 0b11111000000;
+  const uint32_t f11_inf = convert_float_formats<FormatF11, FormatF32, true>(f32_inf);
+  EXPECT_EQ(f11_inf, f11_inf_expected);
+
+  const uint32_t f10_inf_expected = 0b1111100000;
+  const uint32_t f10_inf = convert_float_formats<FormatF10, FormatF32, true>(f32_inf);
+  EXPECT_EQ(f10_inf, f10_inf_expected);
+}
+
+TEST(VulkanDataConversion, infinity_lower)
+{
+  const uint32_t f32_inf = 0b11111111100000000000000000000000;
+
+  const uint32_t f16_inf_expected = 0b1111110000000000;
+  const uint32_t f16_inf = convert_float_formats<FormatF16, FormatF32, true>(f32_inf);
+  EXPECT_EQ(f16_inf, f16_inf_expected);
+}
+
+}  // namespace blender::gpu::tests
\ No newline at end of file
-- 
2.30.2


From 7959794a23dbd83904804d6df68df3ad8bcbfb67 Mon Sep 17 00:00:00 2001
From: Jeroen Bakker <jeroen@blender.org>
Date: Tue, 23 May 2023 14:36:29 +0200
Subject: [PATCH 2/3] Fix code style.

---
 .../blender/gpu/vulkan/vk_data_conversion.hh  | 76 +++++++++----------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/source/blender/gpu/vulkan/vk_data_conversion.hh b/source/blender/gpu/vulkan/vk_data_conversion.hh
index 971148d216f..b63c74bd001 100644
--- a/source/blender/gpu/vulkan/vk_data_conversion.hh
+++ b/source/blender/gpu/vulkan/vk_data_conversion.hh
@@ -101,64 +101,64 @@ void convert_in_place(void *data, const GPUVertFormat &vertex_format, const uint
  * \{ */
 
 /**
- * Description of a IEEE 754-1985 standard floating point data type.
+ * Description of a IEEE 754-1985 floating point data type.
  */
 template<bool HasSignBit, uint8_t MantissaBitLen, uint8_t ExponentBitLen>
 class FloatingPointFormat {
  public:
-  static constexpr bool HasSign = HasSignBit;
-  static constexpr uint8_t SignShift = MantissaBitLen + ExponentBitLen;
-  static constexpr uint32_t SignMask = HasSignBit ? 1 : 0;
-  static constexpr uint8_t MantissaLen = MantissaBitLen;
-  static constexpr uint8_t MantissaShift = 0;
-  static constexpr uint32_t MantissaMask = (1 << MantissaBitLen) - 1;
-  static constexpr uint32_t MantissaNanMask = MantissaMask;
-  static constexpr uint8_t ExponentShift = MantissaBitLen;
-  static constexpr uint8_t ExponentLen = ExponentBitLen;
-  static constexpr uint32_t ExponentMask = (1 << ExponentBitLen) - 1;
-  static constexpr int32_t ExponentBias = (1 << (ExponentBitLen - 1)) - 1;
-  static constexpr int32_t ExponentSpecialMask = ExponentMask;
+  static constexpr bool HAS_SIGN = HasSignBit;
+  static constexpr uint8_t SIGN_SHIFT = MantissaBitLen + ExponentBitLen;
+  static constexpr uint32_t SIGN_MASK = HasSignBit ? 1 : 0;
+  static constexpr uint8_t MANTISSA_LEN = MantissaBitLen;
+  static constexpr uint8_t MANTISSA_SHIFT = 0;
+  static constexpr uint32_t MANTISSA_MASK = (1 << MantissaBitLen) - 1;
+  static constexpr uint32_t MANTISSA_NAN_MASK = MANTISSA_MASK;
+  static constexpr uint8_t EXPONENT_SHIFT = MantissaBitLen;
+  static constexpr uint8_t EXPONENT_LEN = ExponentBitLen;
+  static constexpr uint32_t EXPONENT_MASK = (1 << ExponentBitLen) - 1;
+  static constexpr int32_t EXPONENT_BIAS = (1 << (ExponentBitLen - 1)) - 1;
+  static constexpr int32_t EXPONENT_SPECIAL_MASK = EXPONENT_MASK;
 
   static uint32_t get_mantissa(uint32_t floating_point_number)
   {
-    return (floating_point_number >> MantissaShift) & MantissaMask;
+    return (floating_point_number >> MANTISSA_SHIFT) & MANTISSA_MASK;
   }
   static uint32_t clear_mantissa(uint32_t floating_point_number)
   {
-    return floating_point_number & ~(MantissaMask << MantissaShift);
+    return floating_point_number & ~(MANTISSA_MASK << MANTISSA_SHIFT);
   }
   static uint32_t set_mantissa(uint32_t mantissa, uint32_t floating_point_number)
   {
     uint32_t result = clear_mantissa(floating_point_number);
-    result |= mantissa << MantissaShift;
+    result |= mantissa << MANTISSA_SHIFT;
     return result;
   }
 
   static uint32_t get_exponent(uint32_t floating_point_number)
   {
-    return ((floating_point_number >> ExponentShift) & ExponentMask);
+    return ((floating_point_number >> EXPONENT_SHIFT) & EXPONENT_MASK);
   }
   static uint32_t clear_exponent(uint32_t floating_point_number)
   {
-    return floating_point_number & ~(ExponentMask << ExponentShift);
+    return floating_point_number & ~(EXPONENT_MASK << EXPONENT_SHIFT);
   }
   static uint32_t set_exponent(uint32_t exponent, uint32_t floating_point_number)
   {
     uint32_t result = clear_exponent(floating_point_number);
-    result |= (exponent) << ExponentShift;
+    result |= (exponent) << EXPONENT_SHIFT;
     return result;
   }
 
   static bool is_signed(uint32_t floating_point_number)
   {
     if constexpr (HasSignBit) {
-      return (floating_point_number >> SignShift) & SignMask;
+      return (floating_point_number >> SIGN_SHIFT) & SIGN_MASK;
     }
     return false;
   }
   static uint32_t clear_sign(uint32_t floating_point_number)
   {
-    return floating_point_number & ~(1 << SignShift);
+    return floating_point_number & ~(1 << SIGN_SHIFT);
   }
 
   static uint32_t set_sign(bool sign, uint32_t floating_point_number)
@@ -167,7 +167,7 @@ class FloatingPointFormat {
       return floating_point_number;
     }
     uint32_t result = clear_sign(floating_point_number);
-    result |= uint32_t(sign) << SignShift;
+    result |= uint32_t(sign) << SIGN_SHIFT;
     return result;
   }
 };
@@ -210,12 +210,12 @@ uint32_t convert_float_formats(uint32_t value)
   uint32_t mantissa = SourceFormat::get_mantissa(value);
   int32_t exponent = SourceFormat::get_exponent(value);
 
-  const bool is_nan = (exponent == SourceFormat::ExponentSpecialMask) && mantissa;
-  const bool is_inf = (exponent == SourceFormat::ExponentSpecialMask) && (mantissa == 0);
+  const bool is_nan = (exponent == SourceFormat::EXPONENT_SPECIAL_MASK) && mantissa;
+  const bool is_inf = (exponent == SourceFormat::EXPONENT_SPECIAL_MASK) && (mantissa == 0);
   const bool is_zero = (exponent == 0 && mantissa == 0);
 
   /* Sign conversion */
-  if constexpr (!DestinationFormat::HasSign && ClampNegativeToZero) {
+  if constexpr (!DestinationFormat::HAS_SIGN && ClampNegativeToZero) {
     if (is_signed && !is_nan) {
       return 0;
     }
@@ -225,33 +225,33 @@ uint32_t convert_float_formats(uint32_t value)
   }
 
   if (is_inf) {
-    exponent = DestinationFormat::ExponentSpecialMask;
+    exponent = DestinationFormat::EXPONENT_SPECIAL_MASK;
   }
   else if (is_nan) {
-    exponent = DestinationFormat::ExponentSpecialMask;
-    mantissa = DestinationFormat::MantissaNanMask;
+    exponent = DestinationFormat::EXPONENT_SPECIAL_MASK;
+    mantissa = DestinationFormat::MANTISSA_NAN_MASK;
   }
   else {
     /* Exponent conversion */
-    exponent -= SourceFormat::ExponentBias;
+    exponent -= SourceFormat::EXPONENT_BIAS;
     /* Clamping when destination has lower precision. */
-    if constexpr (SourceFormat::ExponentLen > DestinationFormat::ExponentLen) {
-      if (exponent > DestinationFormat::ExponentBias) {
+    if constexpr (SourceFormat::EXPONENT_LEN > DestinationFormat::EXPONENT_LEN) {
+      if (exponent > DestinationFormat::EXPONENT_BIAS) {
         exponent = 0;
-        mantissa = SourceFormat::MantissaMask;
+        mantissa = SourceFormat::MANTISSA_MASK;
       }
-      else if (exponent < -DestinationFormat::ExponentBias) {
+      else if (exponent < -DestinationFormat::EXPONENT_BIAS) {
         return 0;
       }
     }
-    exponent += DestinationFormat::ExponentBias;
+    exponent += DestinationFormat::EXPONENT_BIAS;
 
     /* Mantissa conversion */
-    if constexpr (SourceFormat::MantissaLen > DestinationFormat::MantissaLen) {
-      mantissa = mantissa >> (SourceFormat::MantissaLen - DestinationFormat::MantissaLen);
+    if constexpr (SourceFormat::MANTISSA_LEN > DestinationFormat::MANTISSA_LEN) {
+      mantissa = mantissa >> (SourceFormat::MANTISSA_LEN - DestinationFormat::MANTISSA_LEN);
     }
-    else if constexpr (SourceFormat::MantissaLen < DestinationFormat::MantissaLen) {
-      mantissa = mantissa << (DestinationFormat::MantissaLen - SourceFormat::MantissaLen);
+    else if constexpr (SourceFormat::MANTISSA_LEN < DestinationFormat::MANTISSA_LEN) {
+      mantissa = mantissa << (DestinationFormat::MANTISSA_LEN - SourceFormat::MANTISSA_LEN);
     }
   }
 
-- 
2.30.2


From 5843a4b703a4e7baef532648a37d1390b0f1d03f Mon Sep 17 00:00:00 2001
From: Jeroen Bakker <jeroen@blender.org>
Date: Thu, 1 Jun 2023 13:39:52 +0200
Subject: [PATCH 3/3] Code formatting.

---
 source/blender/gpu/vulkan/vk_data_conversion.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/source/blender/gpu/vulkan/vk_data_conversion.cc b/source/blender/gpu/vulkan/vk_data_conversion.cc
index e0f9a56e178..df377389d55 100644
--- a/source/blender/gpu/vulkan/vk_data_conversion.cc
+++ b/source/blender/gpu/vulkan/vk_data_conversion.cc
@@ -586,7 +586,8 @@ using SRGBA8 = PixelValue<ColorSceneLinearByteEncoded4b<eAlpha::Premultiplied>>;
 using FLOAT3 = PixelValue<float3>;
 using FLOAT4 = PixelValue<ColorSceneLinear4f<eAlpha::Premultiplied>>;
 /* NOTE: Vulkan stores R11_G11_B10 in reverse component order. */
-class B10F_G11G_R11F : public PixelValue<uint32_t> {};
+class B10F_G11G_R11F : public PixelValue<uint32_t> {
+};
 
 class DepthComponent24 : public ComponentValue<uint32_t> {
  public:
-- 
2.30.2