Vulkan: Low Precision Float Conversion #108168
|
@ -900,10 +900,9 @@ set_and_warn_dependency(WITH_TBB WITH_MOD_FLUID OFF)
|
||||||
# NanoVDB requires OpenVDB to convert the data structure
|
# NanoVDB requires OpenVDB to convert the data structure
|
||||||
set_and_warn_dependency(WITH_OPENVDB WITH_NANOVDB OFF)
|
set_and_warn_dependency(WITH_OPENVDB WITH_NANOVDB OFF)
|
||||||
|
|
||||||
# OpenVDB, Alembic and Vulkan, OSL uses 'half' or 'imath' from OpenEXR
|
# OpenVDB, Alembic and OSL uses 'half' or 'imath' from OpenEXR
|
||||||
set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_OPENVDB OFF)
|
set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_OPENVDB OFF)
|
||||||
set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_ALEMBIC OFF)
|
set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_ALEMBIC OFF)
|
||||||
set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_VULKAN_BACKEND OFF)
|
|
||||||
set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_CYCLES_OSL OFF)
|
set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_CYCLES_OSL OFF)
|
||||||
|
|
||||||
# auto enable openimageio for cycles
|
# auto enable openimageio for cycles
|
||||||
|
|
|
@ -52,7 +52,6 @@ endif()
|
||||||
|
|
||||||
set(INC_SYS
|
set(INC_SYS
|
||||||
${Epoxy_INCLUDE_DIRS}
|
${Epoxy_INCLUDE_DIRS}
|
||||||
${IMATH_INCLUDE_DIR}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
set(SRC
|
set(SRC
|
||||||
|
@ -354,11 +353,6 @@ if(WITH_VULKAN_BACKEND)
|
||||||
)
|
)
|
||||||
|
|
||||||
add_definitions(-DWITH_VULKAN_BACKEND)
|
add_definitions(-DWITH_VULKAN_BACKEND)
|
||||||
if(WIN32)
|
|
||||||
if(EXISTS ${LIBDIR}/imath/bin/imath.dll)
|
|
||||||
add_definitions(-DIMATH_DLL)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_VULKAN_GUARDEDALLOC)
|
if(WITH_VULKAN_GUARDEDALLOC)
|
||||||
|
@ -816,7 +810,6 @@ if(WITH_GPU_BUILDTIME_SHADER_BUILDER)
|
||||||
bf_blenlib
|
bf_blenlib
|
||||||
bf_intern_ghost
|
bf_intern_ghost
|
||||||
${PLATFORM_LINKLIBS}
|
${PLATFORM_LINKLIBS}
|
||||||
${IMATH_LIBRARIES}
|
|
||||||
)
|
)
|
||||||
target_include_directories(shader_builder PRIVATE ${INC} ${CMAKE_CURRENT_BINARY_DIR})
|
target_include_directories(shader_builder PRIVATE ${INC} ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
|
||||||
|
@ -862,6 +855,7 @@ if(WITH_GTESTS)
|
||||||
if(WITH_VULKAN_BACKEND)
|
if(WITH_VULKAN_BACKEND)
|
||||||
list(APPEND TEST_SRC
|
list(APPEND TEST_SRC
|
||||||
tests/memory_layout_test.cc
|
tests/memory_layout_test.cc
|
||||||
|
vulkan/vk_data_conversion_test.cc
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
|
@ -254,13 +254,13 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2UI()
|
||||||
texture_create_upload_read_with_bias<GPU_RGB10_A2UI, GPU_DATA_FLOAT>(0.0f);
|
texture_create_upload_read_with_bias<GPU_RGB10_A2UI, GPU_DATA_FLOAT>(0.0f);
|
||||||
}
|
}
|
||||||
GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2UI);
|
GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2UI);
|
||||||
|
#endif
|
||||||
|
|
||||||
static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R11F_G11F_B10F()
|
static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R11F_G11F_B10F()
|
||||||
{
|
{
|
||||||
texture_create_upload_read_with_bias<GPU_R11F_G11F_B10F, GPU_DATA_FLOAT>(0.0f);
|
texture_create_upload_read_with_bias<GPU_R11F_G11F_B10F, GPU_DATA_FLOAT>(0.0009f);
|
||||||
}
|
}
|
||||||
GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R11F_G11F_B10F);
|
GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R11F_G11F_B10F);
|
||||||
#endif
|
|
||||||
|
|
||||||
static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8()
|
static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8()
|
||||||
{
|
{
|
||||||
|
@ -403,13 +403,13 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F()
|
||||||
GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F);
|
GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if RUN_COMPONENT_UNIMPLEMENTED
|
||||||
static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT24()
|
static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT24()
|
||||||
{
|
{
|
||||||
texture_create_upload_read_with_bias<GPU_DEPTH_COMPONENT24, GPU_DATA_FLOAT>(0.0000001f);
|
texture_create_upload_read_with_bias<GPU_DEPTH_COMPONENT24, GPU_DATA_FLOAT>(0.0000001f);
|
||||||
}
|
}
|
||||||
GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT24);
|
GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT24);
|
||||||
|
|
||||||
#if RUN_COMPONENT_UNIMPLEMENTED
|
|
||||||
static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT16()
|
static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT16()
|
||||||
{
|
{
|
||||||
texture_create_upload_read_with_bias<GPU_DEPTH_COMPONENT16, GPU_DATA_FLOAT>(0.0f);
|
texture_create_upload_read_with_bias<GPU_DEPTH_COMPONENT16, GPU_DATA_FLOAT>(0.0f);
|
||||||
|
|
|
@ -9,8 +9,6 @@
|
||||||
|
|
||||||
#include "BLI_color.hh"
|
#include "BLI_color.hh"
|
||||||
|
|
||||||
#include "Imath/half.h"
|
|
||||||
|
|
||||||
namespace blender::gpu {
|
namespace blender::gpu {
|
||||||
|
|
||||||
/* -------------------------------------------------------------------- */
|
/* -------------------------------------------------------------------- */
|
||||||
|
@ -55,6 +53,9 @@ enum class ConversionType {
|
||||||
FLOAT_TO_DEPTH_COMPONENT24,
|
FLOAT_TO_DEPTH_COMPONENT24,
|
||||||
DEPTH_COMPONENT24_TO_FLOAT,
|
DEPTH_COMPONENT24_TO_FLOAT,
|
||||||
|
|
||||||
|
FLOAT_TO_B10F_G11F_R11F,
|
||||||
|
B10F_G11F_R11F_TO_FLOAT,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The requested conversion isn't supported.
|
* The requested conversion isn't supported.
|
||||||
*/
|
*/
|
||||||
|
@ -104,6 +105,9 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format)
|
||||||
case GPU_DEPTH_COMPONENT24:
|
case GPU_DEPTH_COMPONENT24:
|
||||||
return ConversionType::FLOAT_TO_DEPTH_COMPONENT24;
|
return ConversionType::FLOAT_TO_DEPTH_COMPONENT24;
|
||||||
|
|
||||||
|
case GPU_R11F_G11F_B10F:
|
||||||
|
return ConversionType::FLOAT_TO_B10F_G11F_R11F;
|
||||||
|
|
||||||
case GPU_RGB32F: /* GPU_RGB32F Not supported by vendors. */
|
case GPU_RGB32F: /* GPU_RGB32F Not supported by vendors. */
|
||||||
case GPU_RGBA8UI:
|
case GPU_RGBA8UI:
|
||||||
case GPU_RGBA8I:
|
case GPU_RGBA8I:
|
||||||
|
@ -125,7 +129,6 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format)
|
||||||
case GPU_R32I:
|
case GPU_R32I:
|
||||||
case GPU_RGB10_A2:
|
case GPU_RGB10_A2:
|
||||||
case GPU_RGB10_A2UI:
|
case GPU_RGB10_A2UI:
|
||||||
case GPU_R11F_G11F_B10F:
|
|
||||||
case GPU_DEPTH32F_STENCIL8:
|
case GPU_DEPTH32F_STENCIL8:
|
||||||
case GPU_DEPTH24_STENCIL8:
|
case GPU_DEPTH24_STENCIL8:
|
||||||
case GPU_RGB8UI:
|
case GPU_RGB8UI:
|
||||||
|
@ -526,6 +529,7 @@ static ConversionType reversed(ConversionType type)
|
||||||
CASE_PAIR(FLOAT, HALF)
|
CASE_PAIR(FLOAT, HALF)
|
||||||
CASE_PAIR(FLOAT, SRGBA8)
|
CASE_PAIR(FLOAT, SRGBA8)
|
||||||
CASE_PAIR(FLOAT, DEPTH_COMPONENT24)
|
CASE_PAIR(FLOAT, DEPTH_COMPONENT24)
|
||||||
|
CASE_PAIR(FLOAT, B10F_G11F_R11F)
|
||||||
|
|
||||||
case ConversionType::UNSUPPORTED:
|
case ConversionType::UNSUPPORTED:
|
||||||
return ConversionType::UNSUPPORTED;
|
return ConversionType::UNSUPPORTED;
|
||||||
|
@ -543,6 +547,26 @@ static ConversionType reversed(ConversionType type)
|
||||||
/** \name Data Conversion
|
/** \name Data Conversion
|
||||||
* \{ */
|
* \{ */
|
||||||
|
|
||||||
|
static uint32_t float_to_uint32_t(float value)
|
||||||
|
{
|
||||||
|
union {
|
||||||
|
float fl;
|
||||||
|
uint32_t u;
|
||||||
|
} float_to_bits;
|
||||||
|
float_to_bits.fl = value;
|
||||||
|
return float_to_bits.u;
|
||||||
|
}
|
||||||
|
|
||||||
|
static float uint32_t_to_float(uint32_t value)
|
||||||
|
{
|
||||||
|
union {
|
||||||
|
float fl;
|
||||||
|
uint32_t u;
|
||||||
|
} float_to_bits;
|
||||||
|
float_to_bits.u = value;
|
||||||
|
return float_to_bits.fl;
|
||||||
|
}
|
||||||
|
|
||||||
template<typename InnerType> struct ComponentValue {
|
template<typename InnerType> struct ComponentValue {
|
||||||
InnerType value;
|
InnerType value;
|
||||||
};
|
};
|
||||||
|
@ -559,7 +583,11 @@ using I32 = ComponentValue<int32_t>;
|
||||||
using F32 = ComponentValue<float>;
|
using F32 = ComponentValue<float>;
|
||||||
using F16 = ComponentValue<uint16_t>;
|
using F16 = ComponentValue<uint16_t>;
|
||||||
using SRGBA8 = PixelValue<ColorSceneLinearByteEncoded4b<eAlpha::Premultiplied>>;
|
using SRGBA8 = PixelValue<ColorSceneLinearByteEncoded4b<eAlpha::Premultiplied>>;
|
||||||
|
using FLOAT3 = PixelValue<float3>;
|
||||||
using FLOAT4 = PixelValue<ColorSceneLinear4f<eAlpha::Premultiplied>>;
|
using FLOAT4 = PixelValue<ColorSceneLinear4f<eAlpha::Premultiplied>>;
|
||||||
|
/* NOTE: Vulkan stores R11_G11_B10 in reverse component order. */
|
||||||
|
class B10F_G11G_R11F : public PixelValue<uint32_t> {
|
||||||
|
};
|
||||||
|
|
||||||
class DepthComponent24 : public ComponentValue<uint32_t> {
|
class DepthComponent24 : public ComponentValue<uint32_t> {
|
||||||
public:
|
public:
|
||||||
|
@ -672,12 +700,12 @@ void convert(DestinationType &dst, const SourceType &src)
|
||||||
|
|
||||||
static void convert(F16 &dst, const F32 &src)
|
static void convert(F16 &dst, const F32 &src)
|
||||||
{
|
{
|
||||||
dst.value = imath_float_to_half(src.value);
|
dst.value = convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void convert(F32 &dst, const F16 &src)
|
static void convert(F32 &dst, const F16 &src)
|
||||||
{
|
{
|
||||||
dst.value = imath_half_to_float(src.value);
|
dst.value = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.value));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void convert(SRGBA8 &dst, const FLOAT4 &src)
|
static void convert(SRGBA8 &dst, const FLOAT4 &src)
|
||||||
|
@ -690,6 +718,30 @@ static void convert(FLOAT4 &dst, const SRGBA8 &src)
|
||||||
dst.value = src.value.decode();
|
dst.value = src.value.decode();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr uint32_t MASK_10_BITS = 0b1111111111;
|
||||||
|
constexpr uint32_t MASK_11_BITS = 0b11111111111;
|
||||||
|
constexpr uint8_t SHIFT_B = 22;
|
||||||
|
constexpr uint8_t SHIFT_G = 11;
|
||||||
|
constexpr uint8_t SHIFT_R = 0;
|
||||||
|
|
||||||
|
static void convert(FLOAT3 &dst, const B10F_G11G_R11F &src)
|
||||||
|
{
|
||||||
|
dst.value.x = uint32_t_to_float(
|
||||||
|
convert_float_formats<FormatF32, FormatF11>((src.value >> SHIFT_R) & MASK_11_BITS));
|
||||||
|
dst.value.y = uint32_t_to_float(
|
||||||
|
convert_float_formats<FormatF32, FormatF11>((src.value >> SHIFT_G) & MASK_11_BITS));
|
||||||
|
dst.value.z = uint32_t_to_float(
|
||||||
|
convert_float_formats<FormatF32, FormatF10>((src.value >> SHIFT_B) & MASK_10_BITS));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void convert(B10F_G11G_R11F &dst, const FLOAT3 &src)
|
||||||
|
{
|
||||||
|
uint32_t r = convert_float_formats<FormatF11, FormatF32>(float_to_uint32_t(src.value.x));
|
||||||
|
uint32_t g = convert_float_formats<FormatF11, FormatF32>(float_to_uint32_t(src.value.y));
|
||||||
|
uint32_t b = convert_float_formats<FormatF10, FormatF32>(float_to_uint32_t(src.value.z));
|
||||||
|
dst.value = r << SHIFT_R | g << SHIFT_G | b << SHIFT_B;
|
||||||
|
}
|
||||||
|
|
||||||
/* \} */
|
/* \} */
|
||||||
|
|
||||||
template<typename DestinationType, typename SourceType>
|
template<typename DestinationType, typename SourceType>
|
||||||
|
@ -829,6 +881,14 @@ static void convert_buffer(void *dst_memory,
|
||||||
convert_per_component<F32, UnsignedNormalized<DepthComponent24>>(
|
convert_per_component<F32, UnsignedNormalized<DepthComponent24>>(
|
||||||
dst_memory, src_memory, buffer_size, device_format);
|
dst_memory, src_memory, buffer_size, device_format);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ConversionType::FLOAT_TO_B10F_G11F_R11F:
|
||||||
|
convert_per_pixel<B10F_G11G_R11F, FLOAT3>(dst_memory, src_memory, buffer_size);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ConversionType::B10F_G11F_R11F_TO_FLOAT:
|
||||||
|
convert_per_pixel<FLOAT3, B10F_G11G_R11F>(dst_memory, src_memory, buffer_size);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -876,7 +936,8 @@ void convert_device_to_host(void *dst_buffer,
|
||||||
eGPUTextureFormat device_format)
|
eGPUTextureFormat device_format)
|
||||||
{
|
{
|
||||||
ConversionType conversion_type = reversed(host_to_device(host_format, device_format));
|
ConversionType conversion_type = reversed(host_to_device(host_format, device_format));
|
||||||
BLI_assert(conversion_type != ConversionType::UNSUPPORTED);
|
BLI_assert_msg(conversion_type != ConversionType::UNSUPPORTED,
|
||||||
|
"Data conversion between host_format and device_format isn't supported (yet).");
|
||||||
convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type);
|
convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -96,4 +96,171 @@ bool conversion_needed(const GPUVertFormat &vertex_format);
|
||||||
*/
|
*/
|
||||||
void convert_in_place(void *data, const GPUVertFormat &vertex_format, const uint vertex_len);
|
void convert_in_place(void *data, const GPUVertFormat &vertex_format, const uint vertex_len);
|
||||||
|
|
||||||
|
/* -------------------------------------------------------------------- */
|
||||||
|
/** \name Floating point conversions
|
||||||
|
* \{ */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Description of a IEEE 754-1985 floating point data type.
|
||||||
|
*/
|
||||||
|
template<bool HasSignBit, uint8_t MantissaBitLen, uint8_t ExponentBitLen>
|
||||||
|
class FloatingPointFormat {
|
||||||
|
public:
|
||||||
|
static constexpr bool HAS_SIGN = HasSignBit;
|
||||||
Jeroen-Bakker marked this conversation as resolved
Outdated
|
|||||||
|
static constexpr uint8_t SIGN_SHIFT = MantissaBitLen + ExponentBitLen;
|
||||||
|
static constexpr uint32_t SIGN_MASK = HasSignBit ? 1 : 0;
|
||||||
|
static constexpr uint8_t MANTISSA_LEN = MantissaBitLen;
|
||||||
|
static constexpr uint8_t MANTISSA_SHIFT = 0;
|
||||||
|
static constexpr uint32_t MANTISSA_MASK = (1 << MantissaBitLen) - 1;
|
||||||
|
static constexpr uint32_t MANTISSA_NAN_MASK = MANTISSA_MASK;
|
||||||
|
static constexpr uint8_t EXPONENT_SHIFT = MantissaBitLen;
|
||||||
|
static constexpr uint8_t EXPONENT_LEN = ExponentBitLen;
|
||||||
|
static constexpr uint32_t EXPONENT_MASK = (1 << ExponentBitLen) - 1;
|
||||||
|
static constexpr int32_t EXPONENT_BIAS = (1 << (ExponentBitLen - 1)) - 1;
|
||||||
|
static constexpr int32_t EXPONENT_SPECIAL_MASK = EXPONENT_MASK;
|
||||||
|
|
||||||
|
static uint32_t get_mantissa(uint32_t floating_point_number)
|
||||||
|
{
|
||||||
|
return (floating_point_number >> MANTISSA_SHIFT) & MANTISSA_MASK;
|
||||||
|
}
|
||||||
|
static uint32_t clear_mantissa(uint32_t floating_point_number)
|
||||||
|
{
|
||||||
|
return floating_point_number & ~(MANTISSA_MASK << MANTISSA_SHIFT);
|
||||||
|
}
|
||||||
|
static uint32_t set_mantissa(uint32_t mantissa, uint32_t floating_point_number)
|
||||||
|
{
|
||||||
|
uint32_t result = clear_mantissa(floating_point_number);
|
||||||
|
result |= mantissa << MANTISSA_SHIFT;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t get_exponent(uint32_t floating_point_number)
|
||||||
|
{
|
||||||
|
return ((floating_point_number >> EXPONENT_SHIFT) & EXPONENT_MASK);
|
||||||
|
}
|
||||||
|
static uint32_t clear_exponent(uint32_t floating_point_number)
|
||||||
|
{
|
||||||
|
return floating_point_number & ~(EXPONENT_MASK << EXPONENT_SHIFT);
|
||||||
|
}
|
||||||
|
static uint32_t set_exponent(uint32_t exponent, uint32_t floating_point_number)
|
||||||
|
{
|
||||||
|
uint32_t result = clear_exponent(floating_point_number);
|
||||||
|
result |= (exponent) << EXPONENT_SHIFT;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool is_signed(uint32_t floating_point_number)
|
||||||
|
{
|
||||||
|
if constexpr (HasSignBit) {
|
||||||
|
return (floating_point_number >> SIGN_SHIFT) & SIGN_MASK;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
static uint32_t clear_sign(uint32_t floating_point_number)
|
||||||
|
{
|
||||||
|
return floating_point_number & ~(1 << SIGN_SHIFT);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t set_sign(bool sign, uint32_t floating_point_number)
|
||||||
|
{
|
||||||
|
if constexpr (!HasSignBit) {
|
||||||
|
return floating_point_number;
|
||||||
|
}
|
||||||
|
uint32_t result = clear_sign(floating_point_number);
|
||||||
|
result |= uint32_t(sign) << SIGN_SHIFT;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
using FormatF32 = FloatingPointFormat<true, 23, 8>;
|
||||||
|
using FormatF16 = FloatingPointFormat<true, 10, 5>;
|
||||||
|
using FormatF11 = FloatingPointFormat<false, 6, 5>;
|
||||||
|
using FormatF10 = FloatingPointFormat<false, 5, 5>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert between low precision floating (including 32 bit floats).
|
||||||
|
*
|
||||||
|
* The input and output values are bits (uint32_t) as this function does a bit-wise operations to
|
||||||
|
* convert between the formats. Additional conversion rules can be applied to the conversion
|
||||||
|
* function. Due to the implementation the compiler would make an optimized version depending on
|
||||||
|
* the actual possibilities.
|
||||||
|
*/
|
||||||
|
template<
|
||||||
|
/**
|
||||||
|
* FloatingPointFormat of the the value that is converted to.
|
||||||
|
*/
|
||||||
|
typename DestinationFormat,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* FloatingPointFormat of the the value that is converted from.
|
||||||
|
*/
|
||||||
|
typename SourceFormat,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Should negative values be clamped to zero when DestinationFormat doesn't contain a sign
|
||||||
|
* bit. Also -Inf will be clamped to zero.
|
||||||
|
*
|
||||||
|
* When set to `false` and DestinationFormat doesn't contain a sign bit the value will be
|
||||||
|
* made absolute.
|
||||||
|
*/
|
||||||
|
bool ClampNegativeToZero = true>
|
||||||
|
uint32_t convert_float_formats(uint32_t value)
|
||||||
|
{
|
||||||
|
bool is_signed = SourceFormat::is_signed(value);
|
||||||
|
uint32_t mantissa = SourceFormat::get_mantissa(value);
|
||||||
|
int32_t exponent = SourceFormat::get_exponent(value);
|
||||||
|
|
||||||
|
const bool is_nan = (exponent == SourceFormat::EXPONENT_SPECIAL_MASK) && mantissa;
|
||||||
|
const bool is_inf = (exponent == SourceFormat::EXPONENT_SPECIAL_MASK) && (mantissa == 0);
|
||||||
|
const bool is_zero = (exponent == 0 && mantissa == 0);
|
||||||
|
|
||||||
|
/* Sign conversion */
|
||||||
|
if constexpr (!DestinationFormat::HAS_SIGN && ClampNegativeToZero) {
|
||||||
|
if (is_signed && !is_nan) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (is_zero) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_inf) {
|
||||||
|
exponent = DestinationFormat::EXPONENT_SPECIAL_MASK;
|
||||||
|
}
|
||||||
|
else if (is_nan) {
|
||||||
|
exponent = DestinationFormat::EXPONENT_SPECIAL_MASK;
|
||||||
|
mantissa = DestinationFormat::MANTISSA_NAN_MASK;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* Exponent conversion */
|
||||||
|
exponent -= SourceFormat::EXPONENT_BIAS;
|
||||||
|
/* Clamping when destination has lower precision. */
|
||||||
|
if constexpr (SourceFormat::EXPONENT_LEN > DestinationFormat::EXPONENT_LEN) {
|
||||||
|
if (exponent > DestinationFormat::EXPONENT_BIAS) {
|
||||||
|
exponent = 0;
|
||||||
|
mantissa = SourceFormat::MANTISSA_MASK;
|
||||||
|
}
|
||||||
|
else if (exponent < -DestinationFormat::EXPONENT_BIAS) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exponent += DestinationFormat::EXPONENT_BIAS;
|
||||||
|
|
||||||
|
/* Mantissa conversion */
|
||||||
|
if constexpr (SourceFormat::MANTISSA_LEN > DestinationFormat::MANTISSA_LEN) {
|
||||||
|
mantissa = mantissa >> (SourceFormat::MANTISSA_LEN - DestinationFormat::MANTISSA_LEN);
|
||||||
|
}
|
||||||
|
else if constexpr (SourceFormat::MANTISSA_LEN < DestinationFormat::MANTISSA_LEN) {
|
||||||
|
mantissa = mantissa << (DestinationFormat::MANTISSA_LEN - SourceFormat::MANTISSA_LEN);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t result = 0;
|
||||||
|
result = DestinationFormat::set_sign(is_signed, result);
|
||||||
|
result = DestinationFormat::set_exponent(exponent, result);
|
||||||
|
result = DestinationFormat::set_mantissa(mantissa, result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* \} */
|
||||||
}; // namespace blender::gpu
|
}; // namespace blender::gpu
|
||||||
|
|
|
@ -0,0 +1,100 @@
|
||||||
|
/* SPDX-License-Identifier: Apache-2.0 */
|
||||||
|
|
||||||
|
#include "testing/testing.h"
|
||||||
|
|
||||||
|
#include "vk_data_conversion.hh"
|
||||||
|
|
||||||
|
namespace blender::gpu::tests {
|
||||||
|
static void test_f32_f16(uint32_t f32_in, uint32_t f16_expected)
|
||||||
|
{
|
||||||
|
const uint32_t f16 = convert_float_formats<FormatF16, FormatF32>(f32_in);
|
||||||
|
EXPECT_EQ(f16, f16_expected);
|
||||||
|
const uint32_t f32_reverse = convert_float_formats<FormatF32, FormatF16>(f16);
|
||||||
|
EXPECT_EQ(f32_reverse, f32_in);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(VulkanDataConversion, ConvertF32F16)
|
||||||
|
{
|
||||||
|
/* 0.0 */
|
||||||
|
test_f32_f16(0b00000000000000000000000000000000, 0b0000000000000000);
|
||||||
|
/* 0.125 */
|
||||||
|
test_f32_f16(0b00111110000000000000000000000000, 0b0011000000000000);
|
||||||
|
/* 2.0 */
|
||||||
|
test_f32_f16(0b01000000000000000000000000000000, 0b0100000000000000);
|
||||||
|
/* 3.0 */
|
||||||
|
test_f32_f16(0b01000000010000000000000000000000, 0b0100001000000000);
|
||||||
|
/* 4.0 */
|
||||||
|
test_f32_f16(0b01000000100000000000000000000000, 0b0100010000000000);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(VulkanDataConversion, clamp_negative_to_zero)
|
||||||
|
{
|
||||||
|
const uint32_t f32_2 = 0b11000000000000000000000000000000;
|
||||||
|
const uint32_t f32_inf_min = 0b11111111100000000000000000000000;
|
||||||
|
const uint32_t f32_inf_max = 0b01111111100000000000000000000000;
|
||||||
|
const uint32_t f32_nan = 0b11111111111111111111111111111111;
|
||||||
|
|
||||||
|
/* F32(-2) fits in F16. */
|
||||||
|
const uint32_t f16_2_expected = 0b1100000000000000;
|
||||||
|
const uint32_t f16_2a = convert_float_formats<FormatF16, FormatF32, true>(f32_2);
|
||||||
|
EXPECT_EQ(f16_2a, f16_2_expected);
|
||||||
|
|
||||||
|
const uint32_t f16_2b = convert_float_formats<FormatF16, FormatF32, false>(f32_2);
|
||||||
|
EXPECT_EQ(f16_2b, f16_2_expected);
|
||||||
|
|
||||||
|
/* F32(-2) doesn't fit in F11 as F11 only supports unsigned values. Clamp to zero. */
|
||||||
|
const uint32_t f11_0_expected = 0b00000000000;
|
||||||
|
const uint32_t f11_2_expected = 0b10000000000;
|
||||||
|
const uint32_t f11_inf_expected = 0b11111000000;
|
||||||
|
const uint32_t f11_nan_expected = 0b11111111111;
|
||||||
|
{
|
||||||
|
const uint32_t f11_0 = convert_float_formats<FormatF11, FormatF32, true>(f32_2);
|
||||||
|
EXPECT_EQ(f11_0, f11_0_expected);
|
||||||
|
const uint32_t f11_0b = convert_float_formats<FormatF11, FormatF32, true>(f32_inf_min);
|
||||||
|
EXPECT_EQ(f11_0b, f11_0_expected);
|
||||||
|
const uint32_t f11_inf = convert_float_formats<FormatF11, FormatF32, true>(f32_inf_max);
|
||||||
|
EXPECT_EQ(f11_inf, f11_inf_expected);
|
||||||
|
const uint32_t f11_nan = convert_float_formats<FormatF11, FormatF32, true>(f32_nan);
|
||||||
|
EXPECT_EQ(f11_nan, f11_nan_expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* F32(-2) doesn't fit in F11 as F11 only supports unsigned values. Make absolute. */
|
||||||
|
{
|
||||||
|
const uint32_t f11_2 = convert_float_formats<FormatF11, FormatF32, false>(f32_2);
|
||||||
|
EXPECT_EQ(f11_2, f11_2_expected);
|
||||||
|
const uint32_t f11_inf = convert_float_formats<FormatF11, FormatF32, false>(f32_inf_min);
|
||||||
|
EXPECT_EQ(f11_inf, f11_inf_expected);
|
||||||
|
const uint32_t f11_infb = convert_float_formats<FormatF11, FormatF32, false>(f32_inf_max);
|
||||||
|
EXPECT_EQ(f11_infb, f11_inf_expected);
|
||||||
|
const uint32_t f11_nan = convert_float_formats<FormatF11, FormatF32, false>(f32_nan);
|
||||||
|
EXPECT_EQ(f11_nan, f11_nan_expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(VulkanDataConversion, infinity_upper)
|
||||||
|
{
|
||||||
|
const uint32_t f32_inf = 0b01111111100000000000000000000000;
|
||||||
|
|
||||||
|
const uint32_t f16_inf_expected = 0b0111110000000000;
|
||||||
|
const uint32_t f16_inf = convert_float_formats<FormatF16, FormatF32, true>(f32_inf);
|
||||||
|
EXPECT_EQ(f16_inf, f16_inf_expected);
|
||||||
|
|
||||||
|
const uint32_t f11_inf_expected = 0b11111000000;
|
||||||
|
const uint32_t f11_inf = convert_float_formats<FormatF11, FormatF32, true>(f32_inf);
|
||||||
|
EXPECT_EQ(f11_inf, f11_inf_expected);
|
||||||
|
|
||||||
|
const uint32_t f10_inf_expected = 0b1111100000;
|
||||||
|
const uint32_t f10_inf = convert_float_formats<FormatF10, FormatF32, true>(f32_inf);
|
||||||
|
EXPECT_EQ(f10_inf, f10_inf_expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(VulkanDataConversion, infinity_lower)
|
||||||
|
{
|
||||||
|
const uint32_t f32_inf = 0b11111111100000000000000000000000;
|
||||||
|
|
||||||
|
const uint32_t f16_inf_expected = 0b1111110000000000;
|
||||||
|
const uint32_t f16_inf = convert_float_formats<FormatF16, FormatF32, true>(f32_inf);
|
||||||
|
EXPECT_EQ(f16_inf, f16_inf_expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace blender::gpu::tests
|
Loading…
Reference in New Issue
Codestyle