diff --git a/CMakeLists.txt b/CMakeLists.txt index 312a073288f..3fb863606f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -900,10 +900,9 @@ set_and_warn_dependency(WITH_TBB WITH_MOD_FLUID OFF) # NanoVDB requires OpenVDB to convert the data structure set_and_warn_dependency(WITH_OPENVDB WITH_NANOVDB OFF) -# OpenVDB, Alembic and Vulkan, OSL uses 'half' or 'imath' from OpenEXR +# OpenVDB, Alembic and OSL uses 'half' or 'imath' from OpenEXR set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_OPENVDB OFF) set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_ALEMBIC OFF) -set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_VULKAN_BACKEND OFF) set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_CYCLES_OSL OFF) # auto enable openimageio for cycles diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index ebbb267f63f..df7d8ff389c 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -52,7 +52,6 @@ endif() set(INC_SYS ${Epoxy_INCLUDE_DIRS} - ${IMATH_INCLUDE_DIR} ) set(SRC @@ -354,11 +353,6 @@ if(WITH_VULKAN_BACKEND) ) add_definitions(-DWITH_VULKAN_BACKEND) - if(WIN32) - if(EXISTS ${LIBDIR}/imath/bin/imath.dll) - add_definitions(-DIMATH_DLL) - endif() - endif() endif() if(WITH_VULKAN_GUARDEDALLOC) @@ -816,7 +810,6 @@ if(WITH_GPU_BUILDTIME_SHADER_BUILDER) bf_blenlib bf_intern_ghost ${PLATFORM_LINKLIBS} - ${IMATH_LIBRARIES} ) target_include_directories(shader_builder PRIVATE ${INC} ${CMAKE_CURRENT_BINARY_DIR}) @@ -862,6 +855,7 @@ if(WITH_GTESTS) if(WITH_VULKAN_BACKEND) list(APPEND TEST_SRC tests/memory_layout_test.cc + vulkan/vk_data_conversion_test.cc ) endif() diff --git a/source/blender/gpu/tests/texture_test.cc b/source/blender/gpu/tests/texture_test.cc index 785f37fbcac..a7f4364ba52 100644 --- a/source/blender/gpu/tests/texture_test.cc +++ b/source/blender/gpu/tests/texture_test.cc @@ -254,13 +254,13 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2UI() texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2UI); +#endif static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R11F_G11F_B10F() { - texture_create_upload_read_with_bias(0.0f); + texture_create_upload_read_with_bias(0.0009f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R11F_G11F_B10F); -#endif static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8() { @@ -403,13 +403,13 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F() GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F); #endif +#if RUN_COMPONENT_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT24() { texture_create_upload_read_with_bias(0.0000001f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT24); -#if RUN_COMPONENT_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT16() { texture_create_upload_read_with_bias(0.0f); diff --git a/source/blender/gpu/vulkan/vk_data_conversion.cc b/source/blender/gpu/vulkan/vk_data_conversion.cc index e31fd3845ff..df377389d55 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.cc +++ b/source/blender/gpu/vulkan/vk_data_conversion.cc @@ -9,8 +9,6 @@ #include "BLI_color.hh" -#include "Imath/half.h" - namespace blender::gpu { /* -------------------------------------------------------------------- */ @@ -55,6 +53,9 @@ enum class ConversionType { FLOAT_TO_DEPTH_COMPONENT24, DEPTH_COMPONENT24_TO_FLOAT, + FLOAT_TO_B10F_G11F_R11F, + B10F_G11F_R11F_TO_FLOAT, + /** * The requested conversion isn't supported. */ @@ -104,6 +105,9 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format) case GPU_DEPTH_COMPONENT24: return ConversionType::FLOAT_TO_DEPTH_COMPONENT24; + case GPU_R11F_G11F_B10F: + return ConversionType::FLOAT_TO_B10F_G11F_R11F; + case GPU_RGB32F: /* GPU_RGB32F Not supported by vendors. */ case GPU_RGBA8UI: case GPU_RGBA8I: @@ -125,7 +129,6 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format) case GPU_R32I: case GPU_RGB10_A2: case GPU_RGB10_A2UI: - case GPU_R11F_G11F_B10F: case GPU_DEPTH32F_STENCIL8: case GPU_DEPTH24_STENCIL8: case GPU_RGB8UI: @@ -526,6 +529,7 @@ static ConversionType reversed(ConversionType type) CASE_PAIR(FLOAT, HALF) CASE_PAIR(FLOAT, SRGBA8) CASE_PAIR(FLOAT, DEPTH_COMPONENT24) + CASE_PAIR(FLOAT, B10F_G11F_R11F) case ConversionType::UNSUPPORTED: return ConversionType::UNSUPPORTED; @@ -543,6 +547,26 @@ static ConversionType reversed(ConversionType type) /** \name Data Conversion * \{ */ +static uint32_t float_to_uint32_t(float value) +{ + union { + float fl; + uint32_t u; + } float_to_bits; + float_to_bits.fl = value; + return float_to_bits.u; +} + +static float uint32_t_to_float(uint32_t value) +{ + union { + float fl; + uint32_t u; + } float_to_bits; + float_to_bits.u = value; + return float_to_bits.fl; +} + template struct ComponentValue { InnerType value; }; @@ -559,7 +583,11 @@ using I32 = ComponentValue; using F32 = ComponentValue; using F16 = ComponentValue; using SRGBA8 = PixelValue>; +using FLOAT3 = PixelValue; using FLOAT4 = PixelValue>; +/* NOTE: Vulkan stores R11_G11_B10 in reverse component order. */ +class B10F_G11G_R11F : public PixelValue { +}; class DepthComponent24 : public ComponentValue { public: @@ -672,12 +700,12 @@ void convert(DestinationType &dst, const SourceType &src) static void convert(F16 &dst, const F32 &src) { - dst.value = imath_float_to_half(src.value); + dst.value = convert_float_formats(float_to_uint32_t(src.value)); } static void convert(F32 &dst, const F16 &src) { - dst.value = imath_half_to_float(src.value); + dst.value = uint32_t_to_float(convert_float_formats(src.value)); } static void convert(SRGBA8 &dst, const FLOAT4 &src) @@ -690,6 +718,30 @@ static void convert(FLOAT4 &dst, const SRGBA8 &src) dst.value = src.value.decode(); } +constexpr uint32_t MASK_10_BITS = 0b1111111111; +constexpr uint32_t MASK_11_BITS = 0b11111111111; +constexpr uint8_t SHIFT_B = 22; +constexpr uint8_t SHIFT_G = 11; +constexpr uint8_t SHIFT_R = 0; + +static void convert(FLOAT3 &dst, const B10F_G11G_R11F &src) +{ + dst.value.x = uint32_t_to_float( + convert_float_formats((src.value >> SHIFT_R) & MASK_11_BITS)); + dst.value.y = uint32_t_to_float( + convert_float_formats((src.value >> SHIFT_G) & MASK_11_BITS)); + dst.value.z = uint32_t_to_float( + convert_float_formats((src.value >> SHIFT_B) & MASK_10_BITS)); +} + +static void convert(B10F_G11G_R11F &dst, const FLOAT3 &src) +{ + uint32_t r = convert_float_formats(float_to_uint32_t(src.value.x)); + uint32_t g = convert_float_formats(float_to_uint32_t(src.value.y)); + uint32_t b = convert_float_formats(float_to_uint32_t(src.value.z)); + dst.value = r << SHIFT_R | g << SHIFT_G | b << SHIFT_B; +} + /* \} */ template @@ -829,6 +881,14 @@ static void convert_buffer(void *dst_memory, convert_per_component>( dst_memory, src_memory, buffer_size, device_format); break; + + case ConversionType::FLOAT_TO_B10F_G11F_R11F: + convert_per_pixel(dst_memory, src_memory, buffer_size); + break; + + case ConversionType::B10F_G11F_R11F_TO_FLOAT: + convert_per_pixel(dst_memory, src_memory, buffer_size); + break; } } @@ -876,7 +936,8 @@ void convert_device_to_host(void *dst_buffer, eGPUTextureFormat device_format) { ConversionType conversion_type = reversed(host_to_device(host_format, device_format)); - BLI_assert(conversion_type != ConversionType::UNSUPPORTED); + BLI_assert_msg(conversion_type != ConversionType::UNSUPPORTED, + "Data conversion between host_format and device_format isn't supported (yet)."); convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type); } diff --git a/source/blender/gpu/vulkan/vk_data_conversion.hh b/source/blender/gpu/vulkan/vk_data_conversion.hh index 1b5405f04c1..b63c74bd001 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.hh +++ b/source/blender/gpu/vulkan/vk_data_conversion.hh @@ -96,4 +96,171 @@ bool conversion_needed(const GPUVertFormat &vertex_format); */ void convert_in_place(void *data, const GPUVertFormat &vertex_format, const uint vertex_len); +/* -------------------------------------------------------------------- */ +/** \name Floating point conversions + * \{ */ + +/** + * Description of a IEEE 754-1985 floating point data type. + */ +template +class FloatingPointFormat { + public: + static constexpr bool HAS_SIGN = HasSignBit; + static constexpr uint8_t SIGN_SHIFT = MantissaBitLen + ExponentBitLen; + static constexpr uint32_t SIGN_MASK = HasSignBit ? 1 : 0; + static constexpr uint8_t MANTISSA_LEN = MantissaBitLen; + static constexpr uint8_t MANTISSA_SHIFT = 0; + static constexpr uint32_t MANTISSA_MASK = (1 << MantissaBitLen) - 1; + static constexpr uint32_t MANTISSA_NAN_MASK = MANTISSA_MASK; + static constexpr uint8_t EXPONENT_SHIFT = MantissaBitLen; + static constexpr uint8_t EXPONENT_LEN = ExponentBitLen; + static constexpr uint32_t EXPONENT_MASK = (1 << ExponentBitLen) - 1; + static constexpr int32_t EXPONENT_BIAS = (1 << (ExponentBitLen - 1)) - 1; + static constexpr int32_t EXPONENT_SPECIAL_MASK = EXPONENT_MASK; + + static uint32_t get_mantissa(uint32_t floating_point_number) + { + return (floating_point_number >> MANTISSA_SHIFT) & MANTISSA_MASK; + } + static uint32_t clear_mantissa(uint32_t floating_point_number) + { + return floating_point_number & ~(MANTISSA_MASK << MANTISSA_SHIFT); + } + static uint32_t set_mantissa(uint32_t mantissa, uint32_t floating_point_number) + { + uint32_t result = clear_mantissa(floating_point_number); + result |= mantissa << MANTISSA_SHIFT; + return result; + } + + static uint32_t get_exponent(uint32_t floating_point_number) + { + return ((floating_point_number >> EXPONENT_SHIFT) & EXPONENT_MASK); + } + static uint32_t clear_exponent(uint32_t floating_point_number) + { + return floating_point_number & ~(EXPONENT_MASK << EXPONENT_SHIFT); + } + static uint32_t set_exponent(uint32_t exponent, uint32_t floating_point_number) + { + uint32_t result = clear_exponent(floating_point_number); + result |= (exponent) << EXPONENT_SHIFT; + return result; + } + + static bool is_signed(uint32_t floating_point_number) + { + if constexpr (HasSignBit) { + return (floating_point_number >> SIGN_SHIFT) & SIGN_MASK; + } + return false; + } + static uint32_t clear_sign(uint32_t floating_point_number) + { + return floating_point_number & ~(1 << SIGN_SHIFT); + } + + static uint32_t set_sign(bool sign, uint32_t floating_point_number) + { + if constexpr (!HasSignBit) { + return floating_point_number; + } + uint32_t result = clear_sign(floating_point_number); + result |= uint32_t(sign) << SIGN_SHIFT; + return result; + } +}; + +using FormatF32 = FloatingPointFormat; +using FormatF16 = FloatingPointFormat; +using FormatF11 = FloatingPointFormat; +using FormatF10 = FloatingPointFormat; + +/** + * Convert between low precision floating (including 32 bit floats). + * + * The input and output values are bits (uint32_t) as this function does a bit-wise operations to + * convert between the formats. Additional conversion rules can be applied to the conversion + * function. Due to the implementation the compiler would make an optimized version depending on + * the actual possibilities. + */ +template< + /** + * FloatingPointFormat of the the value that is converted to. + */ + typename DestinationFormat, + + /** + * FloatingPointFormat of the the value that is converted from. + */ + typename SourceFormat, + + /** + * Should negative values be clamped to zero when DestinationFormat doesn't contain a sign + * bit. Also -Inf will be clamped to zero. + * + * When set to `false` and DestinationFormat doesn't contain a sign bit the value will be + * made absolute. + */ + bool ClampNegativeToZero = true> +uint32_t convert_float_formats(uint32_t value) +{ + bool is_signed = SourceFormat::is_signed(value); + uint32_t mantissa = SourceFormat::get_mantissa(value); + int32_t exponent = SourceFormat::get_exponent(value); + + const bool is_nan = (exponent == SourceFormat::EXPONENT_SPECIAL_MASK) && mantissa; + const bool is_inf = (exponent == SourceFormat::EXPONENT_SPECIAL_MASK) && (mantissa == 0); + const bool is_zero = (exponent == 0 && mantissa == 0); + + /* Sign conversion */ + if constexpr (!DestinationFormat::HAS_SIGN && ClampNegativeToZero) { + if (is_signed && !is_nan) { + return 0; + } + } + if (is_zero) { + return 0; + } + + if (is_inf) { + exponent = DestinationFormat::EXPONENT_SPECIAL_MASK; + } + else if (is_nan) { + exponent = DestinationFormat::EXPONENT_SPECIAL_MASK; + mantissa = DestinationFormat::MANTISSA_NAN_MASK; + } + else { + /* Exponent conversion */ + exponent -= SourceFormat::EXPONENT_BIAS; + /* Clamping when destination has lower precision. */ + if constexpr (SourceFormat::EXPONENT_LEN > DestinationFormat::EXPONENT_LEN) { + if (exponent > DestinationFormat::EXPONENT_BIAS) { + exponent = 0; + mantissa = SourceFormat::MANTISSA_MASK; + } + else if (exponent < -DestinationFormat::EXPONENT_BIAS) { + return 0; + } + } + exponent += DestinationFormat::EXPONENT_BIAS; + + /* Mantissa conversion */ + if constexpr (SourceFormat::MANTISSA_LEN > DestinationFormat::MANTISSA_LEN) { + mantissa = mantissa >> (SourceFormat::MANTISSA_LEN - DestinationFormat::MANTISSA_LEN); + } + else if constexpr (SourceFormat::MANTISSA_LEN < DestinationFormat::MANTISSA_LEN) { + mantissa = mantissa << (DestinationFormat::MANTISSA_LEN - SourceFormat::MANTISSA_LEN); + } + } + + uint32_t result = 0; + result = DestinationFormat::set_sign(is_signed, result); + result = DestinationFormat::set_exponent(exponent, result); + result = DestinationFormat::set_mantissa(mantissa, result); + return result; +} + +/* \} */ }; // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_data_conversion_test.cc b/source/blender/gpu/vulkan/vk_data_conversion_test.cc new file mode 100644 index 00000000000..33ebcad3874 --- /dev/null +++ b/source/blender/gpu/vulkan/vk_data_conversion_test.cc @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +#include "testing/testing.h" + +#include "vk_data_conversion.hh" + +namespace blender::gpu::tests { +static void test_f32_f16(uint32_t f32_in, uint32_t f16_expected) +{ + const uint32_t f16 = convert_float_formats(f32_in); + EXPECT_EQ(f16, f16_expected); + const uint32_t f32_reverse = convert_float_formats(f16); + EXPECT_EQ(f32_reverse, f32_in); +} + +TEST(VulkanDataConversion, ConvertF32F16) +{ + /* 0.0 */ + test_f32_f16(0b00000000000000000000000000000000, 0b0000000000000000); + /* 0.125 */ + test_f32_f16(0b00111110000000000000000000000000, 0b0011000000000000); + /* 2.0 */ + test_f32_f16(0b01000000000000000000000000000000, 0b0100000000000000); + /* 3.0 */ + test_f32_f16(0b01000000010000000000000000000000, 0b0100001000000000); + /* 4.0 */ + test_f32_f16(0b01000000100000000000000000000000, 0b0100010000000000); +} + +TEST(VulkanDataConversion, clamp_negative_to_zero) +{ + const uint32_t f32_2 = 0b11000000000000000000000000000000; + const uint32_t f32_inf_min = 0b11111111100000000000000000000000; + const uint32_t f32_inf_max = 0b01111111100000000000000000000000; + const uint32_t f32_nan = 0b11111111111111111111111111111111; + + /* F32(-2) fits in F16. */ + const uint32_t f16_2_expected = 0b1100000000000000; + const uint32_t f16_2a = convert_float_formats(f32_2); + EXPECT_EQ(f16_2a, f16_2_expected); + + const uint32_t f16_2b = convert_float_formats(f32_2); + EXPECT_EQ(f16_2b, f16_2_expected); + + /* F32(-2) doesn't fit in F11 as F11 only supports unsigned values. Clamp to zero. */ + const uint32_t f11_0_expected = 0b00000000000; + const uint32_t f11_2_expected = 0b10000000000; + const uint32_t f11_inf_expected = 0b11111000000; + const uint32_t f11_nan_expected = 0b11111111111; + { + const uint32_t f11_0 = convert_float_formats(f32_2); + EXPECT_EQ(f11_0, f11_0_expected); + const uint32_t f11_0b = convert_float_formats(f32_inf_min); + EXPECT_EQ(f11_0b, f11_0_expected); + const uint32_t f11_inf = convert_float_formats(f32_inf_max); + EXPECT_EQ(f11_inf, f11_inf_expected); + const uint32_t f11_nan = convert_float_formats(f32_nan); + EXPECT_EQ(f11_nan, f11_nan_expected); + } + + /* F32(-2) doesn't fit in F11 as F11 only supports unsigned values. Make absolute. */ + { + const uint32_t f11_2 = convert_float_formats(f32_2); + EXPECT_EQ(f11_2, f11_2_expected); + const uint32_t f11_inf = convert_float_formats(f32_inf_min); + EXPECT_EQ(f11_inf, f11_inf_expected); + const uint32_t f11_infb = convert_float_formats(f32_inf_max); + EXPECT_EQ(f11_infb, f11_inf_expected); + const uint32_t f11_nan = convert_float_formats(f32_nan); + EXPECT_EQ(f11_nan, f11_nan_expected); + } +} + +TEST(VulkanDataConversion, infinity_upper) +{ + const uint32_t f32_inf = 0b01111111100000000000000000000000; + + const uint32_t f16_inf_expected = 0b0111110000000000; + const uint32_t f16_inf = convert_float_formats(f32_inf); + EXPECT_EQ(f16_inf, f16_inf_expected); + + const uint32_t f11_inf_expected = 0b11111000000; + const uint32_t f11_inf = convert_float_formats(f32_inf); + EXPECT_EQ(f11_inf, f11_inf_expected); + + const uint32_t f10_inf_expected = 0b1111100000; + const uint32_t f10_inf = convert_float_formats(f32_inf); + EXPECT_EQ(f10_inf, f10_inf_expected); +} + +TEST(VulkanDataConversion, infinity_lower) +{ + const uint32_t f32_inf = 0b11111111100000000000000000000000; + + const uint32_t f16_inf_expected = 0b1111110000000000; + const uint32_t f16_inf = convert_float_formats(f32_inf); + EXPECT_EQ(f16_inf, f16_inf_expected); +} + +} // namespace blender::gpu::tests \ No newline at end of file