From fea1967037744125fb26e4f093cc36fd4691cd8a Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Tue, 14 Mar 2023 13:57:33 +0100 Subject: [PATCH 01/33] Add initial data conversion. --- source/blender/gpu/CMakeLists.txt | 2 + .../blender/gpu/vulkan/vk_command_buffer.cc | 13 + .../blender/gpu/vulkan/vk_command_buffer.hh | 7 + .../blender/gpu/vulkan/vk_data_conversion.cc | 416 ++++++++++++++++++ .../blender/gpu/vulkan/vk_data_conversion.hh | 56 +++ source/blender/gpu/vulkan/vk_texture.cc | 73 ++- 6 files changed, 560 insertions(+), 7 deletions(-) create mode 100644 source/blender/gpu/vulkan/vk_data_conversion.cc create mode 100644 source/blender/gpu/vulkan/vk_data_conversion.hh diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 102b1b1a57b..31a85e61ec5 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -195,6 +195,7 @@ set(VULKAN_SRC vulkan/vk_command_buffer.cc vulkan/vk_common.cc vulkan/vk_context.cc + vulkan/vk_data_conversion.cc vulkan/vk_descriptor_pools.cc vulkan/vk_descriptor_set.cc vulkan/vk_drawlist.cc @@ -222,6 +223,7 @@ set(VULKAN_SRC vulkan/vk_command_buffer.hh vulkan/vk_common.hh vulkan/vk_context.hh + vulkan/vk_data_conversion.hh vulkan/vk_descriptor_pools.hh vulkan/vk_descriptor_set.hh vulkan/vk_drawlist.hh diff --git a/source/blender/gpu/vulkan/vk_command_buffer.cc b/source/blender/gpu/vulkan/vk_command_buffer.cc index b4526df6aba..9a5463bf890 100644 --- a/source/blender/gpu/vulkan/vk_command_buffer.cc +++ b/source/blender/gpu/vulkan/vk_command_buffer.cc @@ -98,6 +98,19 @@ void VKCommandBuffer::copy(VKBuffer &dst_buffer, regions.data()); } +void VKCommandBuffer::clear(VkImage vk_image, + VkImageLayout vk_image_layout, + const VkClearColorValue &vk_clear_color, + Span ranges) +{ + vkCmdClearColorImage(vk_command_buffer_, + vk_image, + vk_image_layout, + &vk_clear_color, + ranges.size(), + ranges.data()); +} + void VKCommandBuffer::pipeline_barrier(VkPipelineStageFlags source_stages, VkPipelineStageFlags destination_stages) { diff --git a/source/blender/gpu/vulkan/vk_command_buffer.hh b/source/blender/gpu/vulkan/vk_command_buffer.hh index 0f5f47a423a..0a52c66af52 100644 --- a/source/blender/gpu/vulkan/vk_command_buffer.hh +++ b/source/blender/gpu/vulkan/vk_command_buffer.hh @@ -51,6 +51,13 @@ class VKCommandBuffer : NonCopyable, NonMovable { void pipeline_barrier(VkPipelineStageFlags source_stages, VkPipelineStageFlags destination_stages); void pipeline_barrier(Span image_memory_barriers); + /** + * Clear color image resource. + */ + void clear(VkImage vk_image, + VkImageLayout vk_image_layout, + const VkClearColorValue &vk_clear_color, + Span ranges); /** * Stop recording commands, encode + send the recordings to Vulkan, wait for the until the diff --git a/source/blender/gpu/vulkan/vk_data_conversion.cc b/source/blender/gpu/vulkan/vk_data_conversion.cc new file mode 100644 index 00000000000..51edfe71628 --- /dev/null +++ b/source/blender/gpu/vulkan/vk_data_conversion.cc @@ -0,0 +1,416 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2023 Blender Foundation. All rights reserved. */ + +/** \file + * \ingroup gpu + */ + +#include "vk_data_conversion.hh" + +namespace blender::gpu { +static ConversionType type_of_conversion_float(eGPUTextureFormat device_format) +{ + switch (device_format) { + case GPU_RGBA32F: + case GPU_RG32F: + case GPU_R32F: + case GPU_RGB32F: + case GPU_DEPTH_COMPONENT32F: + return ConversionType::UNMODIFIED; + + case GPU_RGBA16F: + case GPU_RG16F: + case GPU_R16F: + case GPU_RGB16F: + return ConversionType::FLOAT_TO_HALF; + + case GPU_RGBA8UI: + case GPU_RGBA8I: + case GPU_RGBA8: + case GPU_RGBA16UI: + case GPU_RGBA16I: + case GPU_RGBA16: + case GPU_RGBA32UI: + case GPU_RGBA32I: + case GPU_RG8UI: + case GPU_RG8I: + case GPU_RG8: + case GPU_RG16UI: + case GPU_RG16I: + case GPU_RG16: + case GPU_RG32UI: + case GPU_RG32I: + case GPU_R8UI: + case GPU_R8I: + case GPU_R8: + case GPU_R16UI: + case GPU_R16I: + case GPU_R16: + case GPU_R32UI: + case GPU_R32I: + case GPU_RGB10_A2: + case GPU_RGB10_A2UI: + case GPU_R11F_G11F_B10F: + case GPU_DEPTH32F_STENCIL8: + case GPU_DEPTH24_STENCIL8: + case GPU_SRGB8_A8: + case GPU_RGBA8_SNORM: + case GPU_RGBA16_SNORM: + case GPU_RGB8UI: + case GPU_RGB8I: + case GPU_RGB8: + case GPU_RGB8_SNORM: + case GPU_RGB16UI: + case GPU_RGB16I: + case GPU_RGB16: + case GPU_RGB16_SNORM: + case GPU_RGB32UI: + case GPU_RGB32I: + case GPU_RG8_SNORM: + case GPU_RG16_SNORM: + case GPU_R8_SNORM: + case GPU_R16_SNORM: + case GPU_SRGB8_A8_DXT1: + case GPU_SRGB8_A8_DXT3: + case GPU_SRGB8_A8_DXT5: + case GPU_RGBA8_DXT1: + case GPU_RGBA8_DXT3: + case GPU_RGBA8_DXT5: + case GPU_SRGB8: + case GPU_RGB9_E5: + case GPU_DEPTH_COMPONENT24: + case GPU_DEPTH_COMPONENT16: + return ConversionType::UNSUPPORTED; + } + return ConversionType::UNSUPPORTED; +} + +static ConversionType type_of_conversion_int(eGPUTextureFormat device_format) +{ + switch (device_format) { + case GPU_RGBA8UI: + case GPU_RGBA8I: + case GPU_RGBA8: + case GPU_RGBA16UI: + case GPU_RGBA16I: + case GPU_RGBA16F: + case GPU_RGBA16: + case GPU_RGBA32UI: + case GPU_RGBA32I: + case GPU_RGBA32F: + case GPU_RG8UI: + case GPU_RG8I: + case GPU_RG8: + case GPU_RG16UI: + case GPU_RG16I: + case GPU_RG16F: + case GPU_RG16: + case GPU_RG32UI: + case GPU_RG32I: + case GPU_RG32F: + case GPU_R8UI: + case GPU_R8I: + case GPU_R8: + case GPU_R16UI: + case GPU_R16I: + case GPU_R16F: + case GPU_R16: + case GPU_R32UI: + case GPU_R32I: + case GPU_R32F: + case GPU_RGB10_A2: + case GPU_RGB10_A2UI: + case GPU_R11F_G11F_B10F: + case GPU_DEPTH32F_STENCIL8: + case GPU_DEPTH24_STENCIL8: + case GPU_SRGB8_A8: + case GPU_RGBA8_SNORM: + case GPU_RGBA16_SNORM: + case GPU_RGB8UI: + case GPU_RGB8I: + case GPU_RGB8: + case GPU_RGB8_SNORM: + case GPU_RGB16UI: + case GPU_RGB16I: + case GPU_RGB16F: + case GPU_RGB16: + case GPU_RGB16_SNORM: + case GPU_RGB32UI: + case GPU_RGB32I: + case GPU_RGB32F: + case GPU_RG8_SNORM: + case GPU_RG16_SNORM: + case GPU_R8_SNORM: + case GPU_R16_SNORM: + case GPU_SRGB8_A8_DXT1: + case GPU_SRGB8_A8_DXT3: + case GPU_SRGB8_A8_DXT5: + case GPU_RGBA8_DXT1: + case GPU_RGBA8_DXT3: + case GPU_RGBA8_DXT5: + case GPU_SRGB8: + case GPU_RGB9_E5: + case GPU_DEPTH_COMPONENT32F: + case GPU_DEPTH_COMPONENT24: + case GPU_DEPTH_COMPONENT16: + return ConversionType::UNSUPPORTED; + } + return ConversionType::UNSUPPORTED; +} + +static ConversionType type_of_conversion_uint(eGPUTextureFormat device_format) +{ + switch (device_format) { + case GPU_RGBA32UI: + case GPU_RG32UI: + case GPU_R32UI: + case GPU_RGB32UI: + return ConversionType::UNMODIFIED; + + case GPU_RGBA16UI: + case GPU_RG16UI: + case GPU_R16UI: + case GPU_RGB16UI: + return ConversionType::UI32_TO_UI16; + + case GPU_RGBA8UI: + case GPU_RGBA8I: + case GPU_RGBA8: + case GPU_RGBA16I: + case GPU_RGBA16F: + case GPU_RGBA16: + case GPU_RGBA32I: + case GPU_RGBA32F: + case GPU_RG8UI: + case GPU_RG8I: + case GPU_RG8: + case GPU_RG16I: + case GPU_RG16F: + case GPU_RG16: + case GPU_RG32I: + case GPU_RG32F: + case GPU_R8UI: + case GPU_R8I: + case GPU_R8: + case GPU_R16I: + case GPU_R16F: + case GPU_R16: + case GPU_R32I: + case GPU_R32F: + case GPU_RGB10_A2: + case GPU_RGB10_A2UI: + case GPU_R11F_G11F_B10F: + case GPU_DEPTH32F_STENCIL8: + case GPU_DEPTH24_STENCIL8: + case GPU_SRGB8_A8: + case GPU_RGBA8_SNORM: + case GPU_RGBA16_SNORM: + case GPU_RGB8UI: + case GPU_RGB8I: + case GPU_RGB8: + case GPU_RGB8_SNORM: + case GPU_RGB16I: + case GPU_RGB16F: + case GPU_RGB16: + case GPU_RGB16_SNORM: + case GPU_RGB32I: + case GPU_RGB32F: + case GPU_RG8_SNORM: + case GPU_RG16_SNORM: + case GPU_R8_SNORM: + case GPU_R16_SNORM: + case GPU_SRGB8_A8_DXT1: + case GPU_SRGB8_A8_DXT3: + case GPU_SRGB8_A8_DXT5: + case GPU_RGBA8_DXT1: + case GPU_RGBA8_DXT3: + case GPU_RGBA8_DXT5: + case GPU_SRGB8: + case GPU_RGB9_E5: + case GPU_DEPTH_COMPONENT32F: + case GPU_DEPTH_COMPONENT24: + case GPU_DEPTH_COMPONENT16: + return ConversionType::UNSUPPORTED; + } + return ConversionType::UNSUPPORTED; +} + +static ConversionType type_of_conversion_half(eGPUTextureFormat device_format) +{ + switch (device_format) { + case GPU_RGBA8UI: + case GPU_RGBA8I: + case GPU_RGBA8: + case GPU_RGBA16UI: + case GPU_RGBA16I: + case GPU_RGBA16F: + case GPU_RGBA16: + case GPU_RGBA32UI: + case GPU_RGBA32I: + case GPU_RGBA32F: + case GPU_RG8UI: + case GPU_RG8I: + case GPU_RG8: + case GPU_RG16UI: + case GPU_RG16I: + case GPU_RG16F: + case GPU_RG16: + case GPU_RG32UI: + case GPU_RG32I: + case GPU_RG32F: + case GPU_R8UI: + case GPU_R8I: + case GPU_R8: + case GPU_R16UI: + case GPU_R16I: + case GPU_R16F: + case GPU_R16: + case GPU_R32UI: + case GPU_R32I: + case GPU_R32F: + case GPU_RGB10_A2: + case GPU_RGB10_A2UI: + case GPU_R11F_G11F_B10F: + case GPU_DEPTH32F_STENCIL8: + case GPU_DEPTH24_STENCIL8: + case GPU_SRGB8_A8: + case GPU_RGBA8_SNORM: + case GPU_RGBA16_SNORM: + case GPU_RGB8UI: + case GPU_RGB8I: + case GPU_RGB8: + case GPU_RGB8_SNORM: + case GPU_RGB16UI: + case GPU_RGB16I: + case GPU_RGB16F: + case GPU_RGB16: + case GPU_RGB16_SNORM: + case GPU_RGB32UI: + case GPU_RGB32I: + case GPU_RGB32F: + case GPU_RG8_SNORM: + case GPU_RG16_SNORM: + case GPU_R8_SNORM: + case GPU_R16_SNORM: + case GPU_SRGB8_A8_DXT1: + case GPU_SRGB8_A8_DXT3: + case GPU_SRGB8_A8_DXT5: + case GPU_RGBA8_DXT1: + case GPU_RGBA8_DXT3: + case GPU_RGBA8_DXT5: + case GPU_SRGB8: + case GPU_RGB9_E5: + case GPU_DEPTH_COMPONENT32F: + case GPU_DEPTH_COMPONENT24: + case GPU_DEPTH_COMPONENT16: + return ConversionType::UNSUPPORTED; + } + return ConversionType::UNSUPPORTED; +} + +ConversionType conversion_type_for_update(eGPUDataFormat host_format, + eGPUTextureFormat device_format) +{ + BLI_assert(validate_data_format(device_format, host_format)); + + switch (host_format) { + case GPU_DATA_FLOAT: + return type_of_conversion_float(device_format); + case GPU_DATA_UINT: + return type_of_conversion_uint(device_format); + case GPU_DATA_INT: + return type_of_conversion_int(device_format); + case GPU_DATA_HALF_FLOAT: + return type_of_conversion_half(device_format); + + case GPU_DATA_UBYTE: + case GPU_DATA_UINT_24_8: + case GPU_DATA_10_11_11_REV: + case GPU_DATA_2_10_10_10_REV: + return ConversionType::UNSUPPORTED; + } + + return ConversionType::UNSUPPORTED; +} + +static ConversionType invert(ConversionType type) +{ + switch (type) { + case ConversionType::UNMODIFIED: + return ConversionType::UNMODIFIED; + + case ConversionType::UI16_TO_UI32: + return ConversionType::UI32_TO_UI16; + case ConversionType::UI32_TO_UI16: + return ConversionType::UI16_TO_UI32; + + case ConversionType::FLOAT_TO_HALF: + return ConversionType::HALF_TO_FLOAT; + case ConversionType::HALF_TO_FLOAT: + return ConversionType::FLOAT_TO_HALF; + + case ConversionType::UNSUPPORTED: + return ConversionType::UNSUPPORTED; + } + + return ConversionType::UNSUPPORTED; +} + +ConversionType conversion_type_for_read(eGPUDataFormat host_format, + eGPUTextureFormat device_format) +{ + return invert(conversion_type_for_update(host_format, device_format)); +} + +/* Copy the contents of src to dst with out performing any actual conversion. */ +template +void copy_unchecked(MutableSpan dst, Span src) +{ + BLI_assert(src.size() == dst.size()); + for (SourceType index : IndexRange(src.size())) { + dst[index] = src[index]; + } +} + +void convert(ConversionType type, + eGPUTextureFormat device_format, + size_t sample_len, + void *dst_memory, + const void *src_memory) +{ + switch (type) { + case ConversionType::UNSUPPORTED: + return; + + case ConversionType::UNMODIFIED: + memcpy(dst_memory, src_memory, sample_len * to_bytesize(device_format)); + return; + + case ConversionType::UI16_TO_UI32: { + size_t component_len = to_component_len(device_format) * sample_len; + Span src = Span(static_cast(src_memory), + component_len); + MutableSpan dst = MutableSpan(static_cast(dst_memory), + component_len); + copy_unchecked(dst, src); + break; + } + + case ConversionType::UI32_TO_UI16: { + size_t component_len = to_component_len(device_format) * sample_len; + Span src = Span(static_cast(src_memory), + component_len); + MutableSpan dst = MutableSpan(static_cast(dst_memory), + component_len); + copy_unchecked(dst, src); + break; + } + + case ConversionType::FLOAT_TO_HALF: + case ConversionType::HALF_TO_FLOAT: + BLI_assert_unreachable(); + return; + } +} + +} // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_data_conversion.hh b/source/blender/gpu/vulkan/vk_data_conversion.hh new file mode 100644 index 00000000000..32ad6dff55d --- /dev/null +++ b/source/blender/gpu/vulkan/vk_data_conversion.hh @@ -0,0 +1,56 @@ + +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2023 Blender Foundation. All rights reserved. */ + +/** \file + * \ingroup gpu + */ + +#pragma once + +#include "gpu_texture_private.hh" + +namespace blender::gpu { + +enum class ConversionType { + /** No conversion needed, result can be directly read back to host memory. */ + UNMODIFIED, + + UI16_TO_UI32, + UI32_TO_UI16, + + /* + UI8_TO_UI32, + I16_TO_I32, + I8_TO_I32, + UI8_TO_I32, + UI8_TO_FLOAT, + UI8_TO_UBYTE, + */ + + /** Convert device 16F to floats. */ + HALF_TO_FLOAT, + FLOAT_TO_HALF, + + /** + * The requested conversion isn't supported. + */ + UNSUPPORTED, +}; + +/** + * Determine the type of conversion that is needed to read back data from GPU device to host + * memory. + */ +ConversionType conversion_type_for_read(eGPUDataFormat host_format, + eGPUTextureFormat device_format); +ConversionType conversion_type_for_update(eGPUDataFormat host_format, + eGPUTextureFormat device_format); + +void convert(ConversionType type, + eGPUTextureFormat device_format, + size_t sample_len, + void *dst_memory, + const void *src_memory); + +}; // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_texture.cc b/source/blender/gpu/vulkan/vk_texture.cc index 1732d54a949..37ff2f72fa1 100644 --- a/source/blender/gpu/vulkan/vk_texture.cc +++ b/source/blender/gpu/vulkan/vk_texture.cc @@ -9,10 +9,13 @@ #include "vk_buffer.hh" #include "vk_context.hh" +#include "vk_data_conversion.hh" #include "vk_memory.hh" #include "vk_shader.hh" #include "vk_shader_interface.hh" +#include "BLI_math_vector.hh" + #include "BKE_global.h" namespace blender::gpu { @@ -34,8 +37,64 @@ void VKTexture::copy_to(Texture * /*tex*/) { } -void VKTexture::clear(eGPUDataFormat /*format*/, const void * /*data*/) +template void copy_color(T dst[4], const T *src) { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} + +static VkClearColorValue to_vk_clear_color_value(eGPUDataFormat format, const void *data) +{ + VkClearColorValue result = {0.0f}; + switch (format) { + case GPU_DATA_FLOAT: { + const float *float_data = static_cast(data); + copy_color(result.float32, float_data); + break; + } + + case GPU_DATA_INT: { + const int32_t *int_data = static_cast(data); + copy_color(result.int32, int_data); + break; + } + + case GPU_DATA_UINT: { + const uint32_t *uint_data = static_cast(data); + copy_color(result.uint32, uint_data); + break; + } + + case GPU_DATA_HALF_FLOAT: + case GPU_DATA_UBYTE: + case GPU_DATA_UINT_24_8: + case GPU_DATA_10_11_11_REV: + case GPU_DATA_2_10_10_10_REV: { + BLI_assert_unreachable(); + break; + } + } + return result; +} + +void VKTexture::clear(eGPUDataFormat format, const void *data) +{ + if (!is_allocated()) { + allocate(); + } + + VKContext &context = *VKContext::get(); + VKCommandBuffer &command_buffer = context.command_buffer_get(); + VkClearColorValue clear_color = to_vk_clear_color_value(format, data); + VkImageSubresourceRange range = {0}; + range.aspectMask = to_vk_image_aspect_flag_bits(format_); + range.levelCount = VK_REMAINING_MIP_LEVELS; + range.layerCount = VK_REMAINING_ARRAY_LAYERS; + + command_buffer.clear( + vk_image_, VK_IMAGE_LAYOUT_GENERAL, clear_color, Span(&range, 1)); } void VKTexture::swizzle_set(const char /*swizzle_mask*/[4]) @@ -80,11 +139,11 @@ void *VKTexture::read(int mip, eGPUDataFormat format) void *data = MEM_mallocN(host_memory_size, __func__); - /* TODO: add conversion when data format is different. */ - BLI_assert_msg(device_memory_size == host_memory_size, + /* Convert data from device to host memory. */ + ConversionType conversion_type = conversion_type_for_read(format, format_); + BLI_assert_msg(conversion_type != ConversionType::UNSUPPORTED, "Memory data conversions not implemented yet"); - - staging_buffer.read(data); + convert(conversion_type, format_, sample_len, data, staging_buffer.mapped_memory_get()); return data; } @@ -152,8 +211,8 @@ bool VKTexture::allocate() image_info.format = to_vk_format(format_); image_info.tiling = VK_IMAGE_TILING_LINEAR; image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT; + image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT; image_info.samples = VK_SAMPLE_COUNT_1_BIT; VkResult result; -- 2.30.2 From 0f30f7591b83c6b1031c2ccf15bef3b91c0c29ce Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Tue, 14 Mar 2023 15:55:48 +0100 Subject: [PATCH 02/33] Added roundtrip test cases. --- source/blender/gpu/intern/gpu_texture.cc | 3 +- source/blender/gpu/tests/texture_test.cc | 192 ++++++++++++++++++ .../blender/gpu/vulkan/vk_command_buffer.cc | 11 + .../blender/gpu/vulkan/vk_command_buffer.hh | 1 + .../blender/gpu/vulkan/vk_data_conversion.cc | 12 +- source/blender/gpu/vulkan/vk_texture.cc | 38 +++- 6 files changed, 246 insertions(+), 11 deletions(-) diff --git a/source/blender/gpu/intern/gpu_texture.cc b/source/blender/gpu/intern/gpu_texture.cc index bfe495c7378..a4db4350c54 100644 --- a/source/blender/gpu/intern/gpu_texture.cc +++ b/source/blender/gpu/intern/gpu_texture.cc @@ -213,7 +213,8 @@ void Texture::detach_from(FrameBuffer *fb) void Texture::update(eGPUDataFormat format, const void *data) { int mip = 0; - int extent[3], offset[3] = {0, 0, 0}; + int extent[3] = {1, 1, 1}; + int offset[3] = {0, 0, 0}; this->mip_size_get(mip, extent); this->update_sub(mip, offset, extent, format, data); } diff --git a/source/blender/gpu/tests/texture_test.cc b/source/blender/gpu/tests/texture_test.cc index c453e9eb2d2..57e8d7c2160 100644 --- a/source/blender/gpu/tests/texture_test.cc +++ b/source/blender/gpu/tests/texture_test.cc @@ -46,4 +46,196 @@ static void test_texture_read() } GPU_TEST(texture_read) +/* -------------------------------------------------------------------- */ +/** \name Roundtrip testing 32F + * \{ */ + +static float *generate_test_data_float(size_t data_len) +{ + float *data = static_cast(MEM_mallocN(data_len * sizeof(float), __func__)); + for (int i : IndexRange(data_len)) { + data[i] = 8.0 / max_ff(i % 8, 0.5f); + } + return data; +} + +template +static void texture_create_upload_read_float() +{ + size_t data_len = Size * Size * ComponentLen; + float *data = generate_test_data_float(data_len); + + eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; + GPUTexture *texture = GPU_texture_create_2d("texture", Size, Size, 1, DeviceFormat, usage, data); + EXPECT_NE(texture, nullptr); + + float *read_data = (float *)GPU_texture_read(texture, GPU_DATA_FLOAT, 0); + for (int i : IndexRange(data_len)) { + EXPECT_EQ(read_data[i], data[i]); + } + MEM_freeN(read_data); + + GPU_texture_free(texture); + MEM_freeN(data); +} + +static void test_texture_roundtrip_FLOAT_RGBA32F() +{ + texture_create_upload_read_float(); +} +GPU_TEST(texture_roundtrip_FLOAT_RGBA32F) + +#if 0 +/* Isn't supported natively on NVidia/Vulkan. */ +static void test_texture_roundtrip_FLOAT_RGBA32F() +{ + texture_create_upload_read_float(); +} +GPU_TEST(texture_roundtrip_FLOAT_RGBA32F) +#endif + +static void test_texture_roundtrip_FLOAT_RG32F() +{ + texture_create_upload_read_float(); +} +GPU_TEST(texture_roundtrip_FLOAT_RG32F) + +static void test_texture_roundtrip_FLOAT_R32F() +{ + texture_create_upload_read_float(); +} +GPU_TEST(texture_roundtrip_FLOAT_R32F) + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Roundtrip testing 32UI + * \{ */ + +static uint32_t *generate_test_data_uint(size_t data_len) +{ + uint32_t *data = static_cast(MEM_mallocN(data_len * sizeof(uint32_t), __func__)); + for (int i : IndexRange(data_len)) { + data[i] = 8 / max_ii(i % 8, 1); + } + return data; +} + +template +static void texture_create_upload_read_uint() +{ + + eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; + GPUTexture *texture = GPU_texture_create_2d( + "texture", Size, Size, 1, DeviceFormat, usage, nullptr); + EXPECT_NE(texture, nullptr); + + size_t data_len = Size * Size * ComponentLen; + uint32_t *data = generate_test_data_uint(data_len); + GPU_texture_update(texture, GPU_DATA_UINT, data); + + uint32_t *read_data = (uint32_t *)GPU_texture_read(texture, GPU_DATA_UINT, 0); + for (int i : IndexRange(data_len)) { + EXPECT_EQ(read_data[i], data[i]); + } + MEM_freeN(read_data); + + GPU_texture_free(texture); + MEM_freeN(data); +} + +static void test_texture_roundtrip_UINT_RGBA32UI() +{ + texture_create_upload_read_uint(); +} +GPU_TEST(texture_roundtrip_UINT_RGBA32UI) + +#if 0 +/* Isn't supported natively on NVidia/Vulkan. */ +static void test_texture_roundtrip_UINT_RGB32UI() +{ + texture_create_upload_read_uint(); +} +GPU_TEST(texture_roundtrip_UINT_RGB32UI) +#endif + +static void test_texture_roundtrip_UINT_RG32UI() +{ + texture_create_upload_read_uint(); +} +GPU_TEST(texture_roundtrip_UINT_RG32UI) + +static void test_texture_roundtrip_UINT_R32UI() +{ + texture_create_upload_read_uint(); +} +GPU_TEST(texture_roundtrip_UINT_R32UI) + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Roundtrip testing 32I + * \{ */ + +static int32_t *generate_test_data_int(size_t data_len) +{ + int32_t *data = static_cast(MEM_mallocN(data_len * sizeof(int32_t), __func__)); + for (int i : IndexRange(data_len)) { + data[i] = 8 / max_ii(i % 8, 1); + } + return data; +} + +template +static void texture_create_upload_read_int() +{ + + eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; + GPUTexture *texture = GPU_texture_create_2d( + "texture", Size, Size, 1, DeviceFormat, usage, nullptr); + EXPECT_NE(texture, nullptr); + + size_t data_len = Size * Size * ComponentLen; + int32_t *data = generate_test_data_int(data_len); + GPU_texture_update(texture, GPU_DATA_INT, data); + + uint32_t *read_data = (uint32_t *)GPU_texture_read(texture, GPU_DATA_INT, 0); + for (int i : IndexRange(data_len)) { + EXPECT_EQ(read_data[i], data[i]); + } + MEM_freeN(read_data); + + GPU_texture_free(texture); + MEM_freeN(data); +} + +static void test_texture_roundtrip_INT_RGBA32I() +{ + texture_create_upload_read_int(); +} +GPU_TEST(texture_roundtrip_INT_RGBA32I) + +#if 0 +/* Isn't supported natively on NVidia/Vulkan. */ +static void test_texture_roundtrip_INT_RGB32I() +{ + texture_create_upload_read_int(); +} +GPU_TEST(texture_roundtrip_INT_RGB32I) +#endif + +static void test_texture_roundtrip_INT_RG32I() +{ + texture_create_upload_read_int(); +} +GPU_TEST(texture_roundtrip_INT_RG32I) + +static void test_texture_roundtrip_INT_R32I() +{ + texture_create_upload_read_int(); +} +GPU_TEST(texture_roundtrip_INT_R32I) + +/** \} */ + } // namespace blender::gpu::tests \ No newline at end of file diff --git a/source/blender/gpu/vulkan/vk_command_buffer.cc b/source/blender/gpu/vulkan/vk_command_buffer.cc index 9a5463bf890..b59976a23cd 100644 --- a/source/blender/gpu/vulkan/vk_command_buffer.cc +++ b/source/blender/gpu/vulkan/vk_command_buffer.cc @@ -97,6 +97,17 @@ void VKCommandBuffer::copy(VKBuffer &dst_buffer, regions.size(), regions.data()); } +void VKCommandBuffer::copy(VKTexture &dst_texture, + VKBuffer &src_buffer, + Span regions) +{ + vkCmdCopyBufferToImage(vk_command_buffer_, + src_buffer.vk_handle(), + dst_texture.vk_image_handle(), + VK_IMAGE_LAYOUT_GENERAL, + regions.size(), + regions.data()); +} void VKCommandBuffer::clear(VkImage vk_image, VkImageLayout vk_image_layout, diff --git a/source/blender/gpu/vulkan/vk_command_buffer.hh b/source/blender/gpu/vulkan/vk_command_buffer.hh index 0a52c66af52..d8f8543eb70 100644 --- a/source/blender/gpu/vulkan/vk_command_buffer.hh +++ b/source/blender/gpu/vulkan/vk_command_buffer.hh @@ -48,6 +48,7 @@ class VKCommandBuffer : NonCopyable, NonMovable { void dispatch(int groups_x_len, int groups_y_len, int groups_z_len); /** Copy the contents of a texture MIP level to the dst buffer. */ void copy(VKBuffer &dst_buffer, VKTexture &src_texture, Span regions); + void copy(VKTexture &dst_texture, VKBuffer &src_buffer, Span regions); void pipeline_barrier(VkPipelineStageFlags source_stages, VkPipelineStageFlags destination_stages); void pipeline_barrier(Span image_memory_barriers); diff --git a/source/blender/gpu/vulkan/vk_data_conversion.cc b/source/blender/gpu/vulkan/vk_data_conversion.cc index 51edfe71628..bb573649a0a 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.cc +++ b/source/blender/gpu/vulkan/vk_data_conversion.cc @@ -14,7 +14,6 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format) case GPU_RGBA32F: case GPU_RG32F: case GPU_R32F: - case GPU_RGB32F: case GPU_DEPTH_COMPONENT32F: return ConversionType::UNMODIFIED; @@ -24,6 +23,7 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format) case GPU_RGB16F: return ConversionType::FLOAT_TO_HALF; + case GPU_RGB32F: /* GPU_RGB32F Not supported by vendors. */ case GPU_RGBA8UI: case GPU_RGBA8I: case GPU_RGBA8: @@ -88,6 +88,11 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format) static ConversionType type_of_conversion_int(eGPUTextureFormat device_format) { switch (device_format) { + case GPU_RGBA32I: + case GPU_RG32I: + case GPU_R32I: + return ConversionType::UNMODIFIED; + case GPU_RGBA8UI: case GPU_RGBA8I: case GPU_RGBA8: @@ -96,7 +101,6 @@ static ConversionType type_of_conversion_int(eGPUTextureFormat device_format) case GPU_RGBA16F: case GPU_RGBA16: case GPU_RGBA32UI: - case GPU_RGBA32I: case GPU_RGBA32F: case GPU_RG8UI: case GPU_RG8I: @@ -106,7 +110,6 @@ static ConversionType type_of_conversion_int(eGPUTextureFormat device_format) case GPU_RG16F: case GPU_RG16: case GPU_RG32UI: - case GPU_RG32I: case GPU_RG32F: case GPU_R8UI: case GPU_R8I: @@ -116,7 +119,6 @@ static ConversionType type_of_conversion_int(eGPUTextureFormat device_format) case GPU_R16F: case GPU_R16: case GPU_R32UI: - case GPU_R32I: case GPU_R32F: case GPU_RGB10_A2: case GPU_RGB10_A2UI: @@ -164,7 +166,6 @@ static ConversionType type_of_conversion_uint(eGPUTextureFormat device_format) case GPU_RGBA32UI: case GPU_RG32UI: case GPU_R32UI: - case GPU_RGB32UI: return ConversionType::UNMODIFIED; case GPU_RGBA16UI: @@ -213,6 +214,7 @@ static ConversionType type_of_conversion_uint(eGPUTextureFormat device_format) case GPU_RGB16F: case GPU_RGB16: case GPU_RGB16_SNORM: + case GPU_RGB32UI: case GPU_RGB32I: case GPU_RGB32F: case GPU_RG8_SNORM: diff --git a/source/blender/gpu/vulkan/vk_texture.cc b/source/blender/gpu/vulkan/vk_texture.cc index 37ff2f72fa1..ba403a49426 100644 --- a/source/blender/gpu/vulkan/vk_texture.cc +++ b/source/blender/gpu/vulkan/vk_texture.cc @@ -148,12 +148,40 @@ void *VKTexture::read(int mip, eGPUDataFormat format) return data; } -void VKTexture::update_sub(int /*mip*/, - int /*offset*/[3], - int /*extent*/[3], - eGPUDataFormat /*format*/, - const void * /*data*/) +void VKTexture::update_sub( + int mip, int /*offset*/[3], int extent[3], eGPUDataFormat format, const void *data) { + if (!is_allocated()) { + allocate(); + } + + /* Vulkan images cannot be directly mapped to host memory and requires a staging buffer. */ + VKContext &context = *VKContext::get(); + VKBuffer staging_buffer; + size_t sample_len = extent[0] * extent[1] * extent[2]; + size_t device_memory_size = sample_len * to_bytesize(format_); + + staging_buffer.create( + context, device_memory_size, GPU_USAGE_DEVICE_ONLY, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); + + ConversionType conversion_type = conversion_type_for_update(format, format_); + BLI_assert_msg(conversion_type != ConversionType::UNSUPPORTED, + "Memory data conversions not implemented yet"); + convert(conversion_type, format_, sample_len, staging_buffer.mapped_memory_get(), data); + + VkBufferImageCopy region = {}; + region.imageExtent.width = extent[0]; + region.imageExtent.height = extent[1]; + region.imageExtent.depth = extent[2]; + region.imageSubresource.aspectMask = to_vk_image_aspect_flag_bits(format_); + region.imageSubresource.mipLevel = mip; + region.imageSubresource.layerCount = 1; + + VKCommandBuffer &command_buffer = context.command_buffer_get(); + command_buffer.copy(*this, staging_buffer, Span(®ion, 1)); + command_buffer.submit(); + + /* TODO: add support for offset. */ } void VKTexture::update_sub(int /*offset*/[3], -- 2.30.2 From b0864d4a167ab80f729d21375c078596f677b2f9 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Tue, 14 Mar 2023 15:57:29 +0100 Subject: [PATCH 03/33] Renamed unmodified to pass_through. --- source/blender/gpu/vulkan/vk_data_conversion.cc | 12 ++++++------ source/blender/gpu/vulkan/vk_data_conversion.hh | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/source/blender/gpu/vulkan/vk_data_conversion.cc b/source/blender/gpu/vulkan/vk_data_conversion.cc index bb573649a0a..a1cba618c6f 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.cc +++ b/source/blender/gpu/vulkan/vk_data_conversion.cc @@ -15,7 +15,7 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format) case GPU_RG32F: case GPU_R32F: case GPU_DEPTH_COMPONENT32F: - return ConversionType::UNMODIFIED; + return ConversionType::PASS_THROUGH; case GPU_RGBA16F: case GPU_RG16F: @@ -91,7 +91,7 @@ static ConversionType type_of_conversion_int(eGPUTextureFormat device_format) case GPU_RGBA32I: case GPU_RG32I: case GPU_R32I: - return ConversionType::UNMODIFIED; + return ConversionType::PASS_THROUGH; case GPU_RGBA8UI: case GPU_RGBA8I: @@ -166,7 +166,7 @@ static ConversionType type_of_conversion_uint(eGPUTextureFormat device_format) case GPU_RGBA32UI: case GPU_RG32UI: case GPU_R32UI: - return ConversionType::UNMODIFIED; + return ConversionType::PASS_THROUGH; case GPU_RGBA16UI: case GPU_RG16UI: @@ -338,8 +338,8 @@ ConversionType conversion_type_for_update(eGPUDataFormat host_format, static ConversionType invert(ConversionType type) { switch (type) { - case ConversionType::UNMODIFIED: - return ConversionType::UNMODIFIED; + case ConversionType::PASS_THROUGH: + return ConversionType::PASS_THROUGH; case ConversionType::UI16_TO_UI32: return ConversionType::UI32_TO_UI16; @@ -384,7 +384,7 @@ void convert(ConversionType type, case ConversionType::UNSUPPORTED: return; - case ConversionType::UNMODIFIED: + case ConversionType::PASS_THROUGH: memcpy(dst_memory, src_memory, sample_len * to_bytesize(device_format)); return; diff --git a/source/blender/gpu/vulkan/vk_data_conversion.hh b/source/blender/gpu/vulkan/vk_data_conversion.hh index 32ad6dff55d..9406823f7b0 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.hh +++ b/source/blender/gpu/vulkan/vk_data_conversion.hh @@ -14,7 +14,7 @@ namespace blender::gpu { enum class ConversionType { /** No conversion needed, result can be directly read back to host memory. */ - UNMODIFIED, + PASS_THROUGH, UI16_TO_UI32, UI32_TO_UI16, -- 2.30.2 From bc1aa48ae9e2670bc65837a4fec6f75a305ef9e1 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Thu, 16 Mar 2023 15:59:05 +0100 Subject: [PATCH 04/33] Added all test cases for data conversion. --- .../blender/gpu/intern/gpu_texture_private.hh | 3 +- source/blender/gpu/tests/texture_test.cc | 875 +++++++++++++++--- .../blender/gpu/vulkan/vk_data_conversion.cc | 363 +++++++- .../blender/gpu/vulkan/vk_data_conversion.hh | 20 +- source/blender/gpu/vulkan/vk_texture.cc | 9 +- 5 files changed, 1066 insertions(+), 204 deletions(-) diff --git a/source/blender/gpu/intern/gpu_texture_private.hh b/source/blender/gpu/intern/gpu_texture_private.hh index 798a410eaae..2b4244221dc 100644 --- a/source/blender/gpu/intern/gpu_texture_private.hh +++ b/source/blender/gpu/intern/gpu_texture_private.hh @@ -759,7 +759,8 @@ inline size_t to_bytesize(eGPUTextureFormat tex_format, eGPUDataFormat data_form } /* Definitely not complete, edit according to the gl specification. */ -inline bool validate_data_format(eGPUTextureFormat tex_format, eGPUDataFormat data_format) +constexpr inline bool validate_data_format(eGPUTextureFormat tex_format, + eGPUDataFormat data_format) { switch (tex_format) { /* Formats texture & render-buffer */ diff --git a/source/blender/gpu/tests/texture_test.cc b/source/blender/gpu/tests/texture_test.cc index 57e8d7c2160..90460ef2eca 100644 --- a/source/blender/gpu/tests/texture_test.cc +++ b/source/blender/gpu/tests/texture_test.cc @@ -3,10 +3,13 @@ #include "MEM_guardedalloc.h" #include "BLI_math_vector.hh" +#include "BLI_vector.hh" #include "GPU_context.h" #include "GPU_texture.h" +#include "gpu_texture_private.hh" + namespace blender::gpu::tests { static void test_texture_read() @@ -46,196 +49,772 @@ static void test_texture_read() } GPU_TEST(texture_read) -/* -------------------------------------------------------------------- */ -/** \name Roundtrip testing 32F - * \{ */ - -static float *generate_test_data_float(size_t data_len) +template static DataType *generate_test_data(size_t data_len) { - float *data = static_cast(MEM_mallocN(data_len * sizeof(float), __func__)); + DataType *data = static_cast(MEM_mallocN(data_len * sizeof(DataType), __func__)); for (int i : IndexRange(data_len)) { - data[i] = 8.0 / max_ff(i % 8, 0.5f); + data[i] = (DataType)(i % 8); } return data; } -template -static void texture_create_upload_read_float() +template +static void texture_create_upload_read() { - size_t data_len = Size * Size * ComponentLen; - float *data = generate_test_data_float(data_len); - - eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; - GPUTexture *texture = GPU_texture_create_2d("texture", Size, Size, 1, DeviceFormat, usage, data); - EXPECT_NE(texture, nullptr); - - float *read_data = (float *)GPU_texture_read(texture, GPU_DATA_FLOAT, 0); - for (int i : IndexRange(data_len)) { - EXPECT_EQ(read_data[i], data[i]); - } - MEM_freeN(read_data); - - GPU_texture_free(texture); - MEM_freeN(data); -} - -static void test_texture_roundtrip_FLOAT_RGBA32F() -{ - texture_create_upload_read_float(); -} -GPU_TEST(texture_roundtrip_FLOAT_RGBA32F) - -#if 0 -/* Isn't supported natively on NVidia/Vulkan. */ -static void test_texture_roundtrip_FLOAT_RGBA32F() -{ - texture_create_upload_read_float(); -} -GPU_TEST(texture_roundtrip_FLOAT_RGBA32F) -#endif - -static void test_texture_roundtrip_FLOAT_RG32F() -{ - texture_create_upload_read_float(); -} -GPU_TEST(texture_roundtrip_FLOAT_RG32F) - -static void test_texture_roundtrip_FLOAT_R32F() -{ - texture_create_upload_read_float(); -} -GPU_TEST(texture_roundtrip_FLOAT_R32F) - -/** \} */ - -/* -------------------------------------------------------------------- */ -/** \name Roundtrip testing 32UI - * \{ */ - -static uint32_t *generate_test_data_uint(size_t data_len) -{ - uint32_t *data = static_cast(MEM_mallocN(data_len * sizeof(uint32_t), __func__)); - for (int i : IndexRange(data_len)) { - data[i] = 8 / max_ii(i % 8, 1); - } - return data; -} - -template -static void texture_create_upload_read_uint() -{ - + static_assert(validate_data_format(DeviceFormat, HostFormat)); eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; GPUTexture *texture = GPU_texture_create_2d( "texture", Size, Size, 1, DeviceFormat, usage, nullptr); EXPECT_NE(texture, nullptr); - size_t data_len = Size * Size * ComponentLen; - uint32_t *data = generate_test_data_uint(data_len); - GPU_texture_update(texture, GPU_DATA_UINT, data); + size_t data_len = Size * Size * to_component_len(DeviceFormat); + DataType *data = static_cast(generate_test_data(data_len)); + GPU_texture_update(texture, HostFormat, data); - uint32_t *read_data = (uint32_t *)GPU_texture_read(texture, GPU_DATA_UINT, 0); + DataType *read_data = static_cast(GPU_texture_read(texture, HostFormat, 0)); for (int i : IndexRange(data_len)) { EXPECT_EQ(read_data[i], data[i]); } MEM_freeN(read_data); + MEM_freeN(data); GPU_texture_free(texture); - MEM_freeN(data); } -static void test_texture_roundtrip_UINT_RGBA32UI() -{ - texture_create_upload_read_uint(); -} -GPU_TEST(texture_roundtrip_UINT_RGBA32UI) - -#if 0 -/* Isn't supported natively on NVidia/Vulkan. */ -static void test_texture_roundtrip_UINT_RGB32UI() -{ - texture_create_upload_read_uint(); -} -GPU_TEST(texture_roundtrip_UINT_RGB32UI) -#endif - -static void test_texture_roundtrip_UINT_RG32UI() -{ - texture_create_upload_read_uint(); -} -GPU_TEST(texture_roundtrip_UINT_RG32UI) - -static void test_texture_roundtrip_UINT_R32UI() -{ - texture_create_upload_read_uint(); -} -GPU_TEST(texture_roundtrip_UINT_R32UI) - -/** \} */ - /* -------------------------------------------------------------------- */ -/** \name Roundtrip testing 32I +/** \name Roundtrip testing GPU_DATA_FLOAT * \{ */ - -static int32_t *generate_test_data_int(size_t data_len) +#if 1 +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8() { - int32_t *data = static_cast(MEM_mallocN(data_len * sizeof(int32_t), __func__)); - for (int i : IndexRange(data_len)) { - data[i] = 8 / max_ii(i % 8, 1); - } - return data; + texture_create_upload_read(); } +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8); -template -static void texture_create_upload_read_int() +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16F() { - - eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; - GPUTexture *texture = GPU_texture_create_2d( - "texture", Size, Size, 1, DeviceFormat, usage, nullptr); - EXPECT_NE(texture, nullptr); - - size_t data_len = Size * Size * ComponentLen; - int32_t *data = generate_test_data_int(data_len); - GPU_texture_update(texture, GPU_DATA_INT, data); - - uint32_t *read_data = (uint32_t *)GPU_texture_read(texture, GPU_DATA_INT, 0); - for (int i : IndexRange(data_len)) { - EXPECT_EQ(read_data[i], data[i]); - } - MEM_freeN(read_data); - - GPU_texture_free(texture); - MEM_freeN(data); + texture_create_upload_read(); } +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16F); -static void test_texture_roundtrip_INT_RGBA32I() +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16() { - texture_create_upload_read_int(); + texture_create_upload_read(); } -GPU_TEST(texture_roundtrip_INT_RGBA32I) +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16); -#if 0 -/* Isn't supported natively on NVidia/Vulkan. */ -static void test_texture_roundtrip_INT_RGB32I() +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA32F() { - texture_create_upload_read_int(); + texture_create_upload_read(); } -GPU_TEST(texture_roundtrip_INT_RGB32I) +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA32F); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG8() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG8); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16F() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16F); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG32F() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG32F); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R8() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R8); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R16F() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R16F); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R16() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R16); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R32F() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R32F); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2UI); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R11F_G11F_B10F() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R11F_G11F_B10F); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_SNORM() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_SNORM); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16_SNORM() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16_SNORM); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB8() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB8); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB8_SNORM() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB8_SNORM); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16F() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16F); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16_SNORM() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16_SNORM); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB32F() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB32F); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG8_SNORM() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG8_SNORM); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16_SNORM() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16_SNORM); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R8_SNORM() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R8_SNORM); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R16_SNORM() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R16_SNORM); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8_DXT1() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8_DXT1); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8_DXT3() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8_DXT3); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8_DXT5() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8_DXT5); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_DXT1() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_DXT1); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_DXT3() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_DXT3); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_DXT5() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_DXT5); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB9_E5() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB9_E5); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT24() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT24); + +static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT16() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT16); #endif +/* \} */ -static void test_texture_roundtrip_INT_RG32I() +/* -------------------------------------------------------------------- */ +/** \name Roundtrip testing GPU_DATA_HALF_FLOAT + * \{ */ +#if 0 +static void test_texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_RGBA16F() { - texture_create_upload_read_int(); + texture_create_upload_read(); } -GPU_TEST(texture_roundtrip_INT_RG32I) +GPU_TEST(texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_RGBA16F); -static void test_texture_roundtrip_INT_R32I() +static void test_texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_RG16F() { - texture_create_upload_read_int(); + texture_create_upload_read(); } -GPU_TEST(texture_roundtrip_INT_R32I) +GPU_TEST(texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_RG16F); +static void test_texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_R16F() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_R16F); + +static void test_texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_RGB16F() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_RGB16F); +#endif +/* \} */ + +/* -------------------------------------------------------------------- */ +/** \name Roundtrip testing GPU_DATA_INT + * \{ */ +#if 0 +static void test_texture_roundtrip__GPU_DATA_INT__GPU_RGBA8I() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_RGBA8I); + +static void test_texture_roundtrip__GPU_DATA_INT__GPU_RGBA16I() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_RGBA16I); + +static void test_texture_roundtrip__GPU_DATA_INT__GPU_RGBA32I() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_RGBA32I); + +static void test_texture_roundtrip__GPU_DATA_INT__GPU_RG8I() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_RG8I); + +static void test_texture_roundtrip__GPU_DATA_INT__GPU_RG16I() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_RG16I); + +static void test_texture_roundtrip__GPU_DATA_INT__GPU_RG32I() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_RG32I); + +static void test_texture_roundtrip__GPU_DATA_INT__GPU_R8I() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_R8I); + +static void test_texture_roundtrip__GPU_DATA_INT__GPU_R16I() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_R16I); + +static void test_texture_roundtrip__GPU_DATA_INT__GPU_R32I() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_R32I); + +static void test_texture_roundtrip__GPU_DATA_INT__GPU_RGB8I() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_RGB8I); + +static void test_texture_roundtrip__GPU_DATA_INT__GPU_RGB16I() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_RGB16I); + +static void test_texture_roundtrip__GPU_DATA_INT__GPU_RGB32I() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_RGB32I); +#endif +/* \} */ + +/* -------------------------------------------------------------------- */ +/** \name Roundtrip testing GPU_DATA_UINT + * \{ */ +#if 0 +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_RGBA8UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_RGBA8UI); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_RGBA16UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_RGBA16UI); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_RGBA32UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_RGBA32UI); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_RG8UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_RG8UI); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_RG16UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_RG16UI); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_RG32UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_RG32UI); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_R8UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_R8UI); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_R16UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_R16UI); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_R32UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_R32UI); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH32F_STENCIL8() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH32F_STENCIL8); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH24_STENCIL8() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH24_STENCIL8); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_RGB8UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_RGB8UI); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_RGB16UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_RGB16UI); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_RGB32UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_RGB32UI); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH_COMPONENT32F() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH_COMPONENT32F); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH_COMPONENT24() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH_COMPONENT24); + +static void test_texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH_COMPONENT16() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH_COMPONENT16); +#endif +/* \} */ + +/* -------------------------------------------------------------------- */ +/** \name Roundtrip testing GPU_DATA_UBYTE + * \{ */ +#if 0 +static void test_texture_roundtrip__GPU_DATA_UBYTE__GPU_RGBA8UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UBYTE__GPU_RGBA8UI); + +static void test_texture_roundtrip__GPU_DATA_UBYTE__GPU_RGBA8() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UBYTE__GPU_RGBA8); + +static void test_texture_roundtrip__GPU_DATA_UBYTE__GPU_RG8UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UBYTE__GPU_RG8UI); + +static void test_texture_roundtrip__GPU_DATA_UBYTE__GPU_RG8() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UBYTE__GPU_RG8); + +static void test_texture_roundtrip__GPU_DATA_UBYTE__GPU_R8UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UBYTE__GPU_R8UI); + +static void test_texture_roundtrip__GPU_DATA_UBYTE__GPU_R8() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UBYTE__GPU_R8); + +static void test_texture_roundtrip__GPU_DATA_UBYTE__GPU_SRGB8_A8() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UBYTE__GPU_SRGB8_A8); + +static void test_texture_roundtrip__GPU_DATA_UBYTE__GPU_RGB8I() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UBYTE__GPU_RGB8I); + +static void test_texture_roundtrip__GPU_DATA_UBYTE__GPU_RGB8() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UBYTE__GPU_RGB8); + +static void test_texture_roundtrip__GPU_DATA_UBYTE__GPU_SRGB8() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UBYTE__GPU_SRGB8); +#endif +/* \} */ + +/* -------------------------------------------------------------------- */ +/** \name Roundtrip testing GPU_DATA_UINT_24_8 + * \{ */ +#if 0 +static void test_texture_roundtrip__GPU_DATA_UINT_24_8__GPU_DEPTH32F_STENCIL8() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT_24_8__GPU_DEPTH32F_STENCIL8); + +static void test_texture_roundtrip__GPU_DATA_UINT_24_8__GPU_DEPTH24_STENCIL8() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_UINT_24_8__GPU_DEPTH24_STENCIL8); +#endif +/* \} */ + +/* -------------------------------------------------------------------- */ +/** \name Roundtrip testing GPU_DATA_10_11_11_REV + * \{ */ +#if 0 +static void test_texture_roundtrip__GPU_DATA_10_11_11_REV__GPU_R11F_G11F_B10F() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_10_11_11_REV__GPU_R11F_G11F_B10F); +#endif +/* \} */ + +/* -------------------------------------------------------------------- */ +/** \name Roundtrip testing GPU_DATA_2_10_10_10_REV + * \{ */ +#if 0 +static void test_texture_roundtrip__GPU_DATA_2_10_10_10_REV__GPU_RGB10_A2() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_2_10_10_10_REV__GPU_RGB10_A2); +static void test_texture_roundtrip__GPU_DATA_2_10_10_10_REV__GPU_RGB10_A2UI() +{ + texture_create_upload_read(); +} +GPU_TEST(texture_roundtrip__GPU_DATA_2_10_10_10_REV__GPU_RGB10_A2UI); +#endif +/* \} */ + +/* -------------------------------------------------------------------- */ +/** \name Generate test cases. + * + * Next section is kept for convenience to regenerate test cases. + * + * \{ */ +#if 0 + static std::string to_prim_type_string(eGPUDataFormat host_format) + { + switch (host_format) { + case GPU_DATA_FLOAT: + return std::string("float"); + + case GPU_DATA_HALF_FLOAT: + return std::string("half"); + case GPU_DATA_INT: + return std::string("int32_t"); + case GPU_DATA_UINT: + return std::string("uint32_t"); + case GPU_DATA_UBYTE: + return std::string("uint8_t"); + case GPU_DATA_UINT_24_8: + case GPU_DATA_10_11_11_REV: + case GPU_DATA_2_10_10_10_REV: + return std::string("void"); + } + return std::string("UNKNOWN"); + } + static std::string to_string(eGPUDataFormat host_format) + { + switch (host_format) { + case GPU_DATA_FLOAT: + return std::string("GPU_DATA_FLOAT"); + + case GPU_DATA_HALF_FLOAT: + return std::string("GPU_DATA_HALF_FLOAT"); + case GPU_DATA_INT: + return std::string("GPU_DATA_INT"); + case GPU_DATA_UINT: + return std::string("GPU_DATA_UINT"); + case GPU_DATA_UBYTE: + return std::string("GPU_DATA_UBYTE"); + case GPU_DATA_UINT_24_8: + return std::string("GPU_DATA_UINT_24_8"); + case GPU_DATA_10_11_11_REV: + return std::string("GPU_DATA_10_11_11_REV"); + case GPU_DATA_2_10_10_10_REV: + return std::string("GPU_DATA_2_10_10_10_REV"); + } + return std::string("UNKNOWN"); + } + + static std::string to_string(eGPUTextureFormat texture_format) + { + return std::string("GPU_") + std::string(GPU_texture_format_name(texture_format)); + } + + TEST(gpu_util, generate_test_cases) + { + Vector host_formats; + host_formats.append(GPU_DATA_FLOAT); + host_formats.append(GPU_DATA_HALF_FLOAT); + host_formats.append(GPU_DATA_INT); + host_formats.append(GPU_DATA_UINT); + host_formats.append(GPU_DATA_UBYTE); + host_formats.append(GPU_DATA_UINT_24_8); + host_formats.append(GPU_DATA_10_11_11_REV); + host_formats.append(GPU_DATA_2_10_10_10_REV); + + Vector texture_formats; + texture_formats.append(GPU_RGBA8UI); + texture_formats.append(GPU_RGBA8I); + texture_formats.append(GPU_RGBA8); + texture_formats.append(GPU_RGBA16UI); + texture_formats.append(GPU_RGBA16I); + texture_formats.append(GPU_RGBA16F); + texture_formats.append(GPU_RGBA16); + texture_formats.append(GPU_RGBA32UI); + texture_formats.append(GPU_RGBA32I); + texture_formats.append(GPU_RGBA32F); + texture_formats.append(GPU_RG8UI); + texture_formats.append(GPU_RG8I); + texture_formats.append(GPU_RG8); + texture_formats.append(GPU_RG16UI); + texture_formats.append(GPU_RG16I); + texture_formats.append(GPU_RG16F); + texture_formats.append(GPU_RG16); + texture_formats.append(GPU_RG32UI); + texture_formats.append(GPU_RG32I); + texture_formats.append(GPU_RG32F); + texture_formats.append(GPU_R8UI); + texture_formats.append(GPU_R8I); + texture_formats.append(GPU_R8); + texture_formats.append(GPU_R16UI); + texture_formats.append(GPU_R16I); + texture_formats.append(GPU_R16F); + texture_formats.append(GPU_R16); + texture_formats.append(GPU_R32UI); + texture_formats.append(GPU_R32I); + texture_formats.append(GPU_R32F); + texture_formats.append(GPU_RGB10_A2); + texture_formats.append(GPU_RGB10_A2UI); + texture_formats.append(GPU_R11F_G11F_B10F); + texture_formats.append(GPU_DEPTH32F_STENCIL8); + texture_formats.append(GPU_DEPTH24_STENCIL8); + texture_formats.append(GPU_SRGB8_A8); + texture_formats.append(GPU_RGBA8_SNORM); + texture_formats.append(GPU_RGBA16_SNORM); + texture_formats.append(GPU_RGB8UI); + texture_formats.append(GPU_RGB8I); + texture_formats.append(GPU_RGB8); + texture_formats.append(GPU_RGB8_SNORM); + texture_formats.append(GPU_RGB16UI); + texture_formats.append(GPU_RGB16I); + texture_formats.append(GPU_RGB16F); + texture_formats.append(GPU_RGB16); + texture_formats.append(GPU_RGB16_SNORM); + texture_formats.append(GPU_RGB32UI); + texture_formats.append(GPU_RGB32I); + texture_formats.append(GPU_RGB32F); + texture_formats.append(GPU_RG8_SNORM); + texture_formats.append(GPU_RG16_SNORM); + texture_formats.append(GPU_R8_SNORM); + texture_formats.append(GPU_R16_SNORM); + texture_formats.append(GPU_SRGB8_A8_DXT1); + texture_formats.append(GPU_SRGB8_A8_DXT3); + texture_formats.append(GPU_SRGB8_A8_DXT5); + texture_formats.append(GPU_RGBA8_DXT1); + texture_formats.append(GPU_RGBA8_DXT3); + texture_formats.append(GPU_RGBA8_DXT5); + texture_formats.append(GPU_SRGB8); + texture_formats.append(GPU_RGB9_E5); + texture_formats.append(GPU_DEPTH_COMPONENT32F); + texture_formats.append(GPU_DEPTH_COMPONENT24); + texture_formats.append(GPU_DEPTH_COMPONENT16); + + for (eGPUDataFormat host_format : host_formats) { + std::cout << "/* -------------------------------------------------------------------- */\n"; + std::cout << "/** \\name Roundtrip testing " << to_string(host_format) << "\n"; + std::cout << " * \\{ */\n\n"; + + for (eGPUTextureFormat texture_format : texture_formats) { + if (!validate_data_format(texture_format, host_format)) { + continue; + } + + std::cout << "static void test_texture_roundtrip__" << to_string(host_format) << "__" + << to_string(texture_format) << "()\n"; + std::cout << "{\n"; + + std::cout << " texture_create_upload_read<" << to_string(texture_format) << ", " + << to_string(host_format) << ", " << to_prim_type_string(host_format) + << ">();\n"; + + std::cout << "}\n"; + std::cout << "GPU_TEST(texture_roundtrip__" << to_string(host_format) << "__" + << to_string(texture_format) << ");\n\n"; + } + std::cout << "/* \\} */\n\n"; + } + } +#endif /** \} */ } // namespace blender::gpu::tests \ No newline at end of file diff --git a/source/blender/gpu/vulkan/vk_data_conversion.cc b/source/blender/gpu/vulkan/vk_data_conversion.cc index a1cba618c6f..21348a0e5a5 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.cc +++ b/source/blender/gpu/vulkan/vk_data_conversion.cc @@ -23,10 +23,20 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format) case GPU_RGB16F: return ConversionType::FLOAT_TO_HALF; + case GPU_RGBA8: + case GPU_RG8: + case GPU_R8: + return ConversionType::FLOAT_TO_UNORM8; + + case GPU_RGBA8_SNORM: + case GPU_RGB8_SNORM: + case GPU_RG8_SNORM: + case GPU_R8_SNORM: + return ConversionType::FLOAT_TO_SNORM8; + case GPU_RGB32F: /* GPU_RGB32F Not supported by vendors. */ case GPU_RGBA8UI: case GPU_RGBA8I: - case GPU_RGBA8: case GPU_RGBA16UI: case GPU_RGBA16I: case GPU_RGBA16: @@ -34,7 +44,6 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format) case GPU_RGBA32I: case GPU_RG8UI: case GPU_RG8I: - case GPU_RG8: case GPU_RG16UI: case GPU_RG16I: case GPU_RG16: @@ -42,7 +51,6 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format) case GPU_RG32I: case GPU_R8UI: case GPU_R8I: - case GPU_R8: case GPU_R16UI: case GPU_R16I: case GPU_R16: @@ -54,21 +62,17 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format) case GPU_DEPTH32F_STENCIL8: case GPU_DEPTH24_STENCIL8: case GPU_SRGB8_A8: - case GPU_RGBA8_SNORM: case GPU_RGBA16_SNORM: case GPU_RGB8UI: case GPU_RGB8I: case GPU_RGB8: - case GPU_RGB8_SNORM: case GPU_RGB16UI: case GPU_RGB16I: case GPU_RGB16: case GPU_RGB16_SNORM: case GPU_RGB32UI: case GPU_RGB32I: - case GPU_RG8_SNORM: case GPU_RG16_SNORM: - case GPU_R8_SNORM: case GPU_R16_SNORM: case GPU_SRGB8_A8_DXT1: case GPU_SRGB8_A8_DXT3: @@ -93,29 +97,33 @@ static ConversionType type_of_conversion_int(eGPUTextureFormat device_format) case GPU_R32I: return ConversionType::PASS_THROUGH; - case GPU_RGBA8UI: + case GPU_RGBA16I: + case GPU_RG16I: + case GPU_R16I: + return ConversionType::I32_TO_I16; + case GPU_RGBA8I: + case GPU_RG8I: + case GPU_R8I: + return ConversionType::I32_TO_I8; + + case GPU_RGBA8UI: case GPU_RGBA8: case GPU_RGBA16UI: - case GPU_RGBA16I: case GPU_RGBA16F: case GPU_RGBA16: case GPU_RGBA32UI: case GPU_RGBA32F: case GPU_RG8UI: - case GPU_RG8I: case GPU_RG8: case GPU_RG16UI: - case GPU_RG16I: case GPU_RG16F: - case GPU_RG16: case GPU_RG32UI: case GPU_RG32F: + case GPU_RG16: case GPU_R8UI: - case GPU_R8I: case GPU_R8: case GPU_R16UI: - case GPU_R16I: case GPU_R16F: case GPU_R16: case GPU_R32UI: @@ -175,6 +183,10 @@ static ConversionType type_of_conversion_uint(eGPUTextureFormat device_format) return ConversionType::UI32_TO_UI16; case GPU_RGBA8UI: + case GPU_RG8UI: + case GPU_R8UI: + return ConversionType::UI32_TO_UI8; + case GPU_RGBA8I: case GPU_RGBA8: case GPU_RGBA16I: @@ -182,7 +194,6 @@ static ConversionType type_of_conversion_uint(eGPUTextureFormat device_format) case GPU_RGBA16: case GPU_RGBA32I: case GPU_RGBA32F: - case GPU_RG8UI: case GPU_RG8I: case GPU_RG8: case GPU_RG16I: @@ -190,7 +201,6 @@ static ConversionType type_of_conversion_uint(eGPUTextureFormat device_format) case GPU_RG16: case GPU_RG32I: case GPU_RG32F: - case GPU_R8UI: case GPU_R8I: case GPU_R8: case GPU_R16I: @@ -310,6 +320,81 @@ static ConversionType type_of_conversion_half(eGPUTextureFormat device_format) return ConversionType::UNSUPPORTED; } +static ConversionType type_of_conversion_ubyte(eGPUTextureFormat device_format) +{ + switch (device_format) { + case GPU_RGBA8UI: + case GPU_RG8UI: + case GPU_R8UI: + return ConversionType::PASS_THROUGH; + + case GPU_RGBA8I: + case GPU_RGBA8: + case GPU_RGBA16UI: + case GPU_RGBA16I: + case GPU_RGBA16F: + case GPU_RGBA16: + case GPU_RGBA32UI: + case GPU_RGBA32I: + case GPU_RGBA32F: + case GPU_RG8I: + case GPU_RG8: + case GPU_RG16UI: + case GPU_RG16I: + case GPU_RG16F: + case GPU_RG16: + case GPU_RG32UI: + case GPU_RG32I: + case GPU_RG32F: + case GPU_R8I: + case GPU_R8: + case GPU_R16UI: + case GPU_R16I: + case GPU_R16F: + case GPU_R16: + case GPU_R32UI: + case GPU_R32I: + case GPU_R32F: + case GPU_RGB10_A2: + case GPU_RGB10_A2UI: + case GPU_R11F_G11F_B10F: + case GPU_DEPTH32F_STENCIL8: + case GPU_DEPTH24_STENCIL8: + case GPU_SRGB8_A8: + case GPU_RGBA8_SNORM: + case GPU_RGBA16_SNORM: + case GPU_RGB8UI: + case GPU_RGB8I: + case GPU_RGB8: + case GPU_RGB8_SNORM: + case GPU_RGB16UI: + case GPU_RGB16I: + case GPU_RGB16F: + case GPU_RGB16: + case GPU_RGB16_SNORM: + case GPU_RGB32UI: + case GPU_RGB32I: + case GPU_RGB32F: + case GPU_RG8_SNORM: + case GPU_RG16_SNORM: + case GPU_R8_SNORM: + case GPU_R16_SNORM: + case GPU_SRGB8_A8_DXT1: + case GPU_SRGB8_A8_DXT3: + case GPU_SRGB8_A8_DXT5: + case GPU_RGBA8_DXT1: + case GPU_RGBA8_DXT3: + case GPU_RGBA8_DXT5: + case GPU_SRGB8: + case GPU_RGB9_E5: + case GPU_DEPTH_COMPONENT32F: + case GPU_DEPTH_COMPONENT24: + case GPU_DEPTH_COMPONENT16: + return ConversionType::UNSUPPORTED; + } + return ConversionType::UNSUPPORTED; +} + ConversionType conversion_type_for_update(eGPUDataFormat host_format, eGPUTextureFormat device_format) { @@ -324,8 +409,9 @@ ConversionType conversion_type_for_update(eGPUDataFormat host_format, return type_of_conversion_int(device_format); case GPU_DATA_HALF_FLOAT: return type_of_conversion_half(device_format); - case GPU_DATA_UBYTE: + return type_of_conversion_ubyte(device_format); + case GPU_DATA_UINT_24_8: case GPU_DATA_10_11_11_REV: case GPU_DATA_2_10_10_10_REV: @@ -337,24 +423,33 @@ ConversionType conversion_type_for_update(eGPUDataFormat host_format, static ConversionType invert(ConversionType type) { +#define CASE_SINGLE(a, b) \ + case ConversionType::a##_TO_##b: \ + return ConversionType::b##_TO_##a; + +#define CASE_PAIR(a, b) \ + CASE_SINGLE(a, b) \ + CASE_SINGLE(b, a) + switch (type) { case ConversionType::PASS_THROUGH: return ConversionType::PASS_THROUGH; - case ConversionType::UI16_TO_UI32: - return ConversionType::UI32_TO_UI16; - case ConversionType::UI32_TO_UI16: - return ConversionType::UI16_TO_UI32; - - case ConversionType::FLOAT_TO_HALF: - return ConversionType::HALF_TO_FLOAT; - case ConversionType::HALF_TO_FLOAT: - return ConversionType::FLOAT_TO_HALF; + CASE_PAIR(FLOAT, UNORM8) + CASE_PAIR(FLOAT, SNORM8) + CASE_PAIR(UI32, UI16) + CASE_PAIR(I32, I16) + CASE_PAIR(UI32, UI8) + CASE_PAIR(I32, I8) + CASE_PAIR(FLOAT, HALF) case ConversionType::UNSUPPORTED: return ConversionType::UNSUPPORTED; } +#undef CASE_PAIR +#undef CASE_SINGLE + return ConversionType::UNSUPPORTED; } @@ -365,15 +460,169 @@ ConversionType conversion_type_for_read(eGPUDataFormat host_format, } /* Copy the contents of src to dst with out performing any actual conversion. */ -template +template void copy_unchecked(MutableSpan dst, Span src) { BLI_assert(src.size() == dst.size()); - for (SourceType index : IndexRange(src.size())) { + for (int64_t index : IndexRange(src.size())) { dst[index] = src[index]; } } +template +void copy_unchecked(void *dst_memory, + const void *src_memory, + eGPUTextureFormat device_format, + size_t sample_len) +{ + size_t total_components = to_component_len(device_format) * sample_len; + Span src = Span(static_cast(src_memory), + total_components); + MutableSpan dst = MutableSpan( + static_cast(dst_memory), total_components); + copy_unchecked(dst, src); +} + +/* Float <=> unsigned normalized */ +static uint8_t clamp_unorm(int32_t unclamped) +{ + if (unclamped < 0.0f) { + return 0; + } + if (unclamped > 255.0f) { + return 255; + } + return uint8_t(unclamped); +} + +template +static DestinationType to_unorm(SourceType value) +{ + return (clamp_unorm((value * 255.0f))); +} + +template +static DestinationType from_unorm(SourceType value) +{ + return DestinationType(value / 255.0f); +} + +template +void float_to_unorm(MutableSpan dst, Span src) +{ + BLI_assert(src.size() == dst.size()); + for (int64_t index : IndexRange(src.size())) { + dst[index] = to_unorm(src[index]); + } +} + +template +void float_to_unorm(void *dst_memory, + const void *src_memory, + eGPUTextureFormat device_format, + size_t sample_len) +{ + size_t total_components = to_component_len(device_format) * sample_len; + Span src = Span(static_cast(src_memory), + total_components); + MutableSpan dst = MutableSpan( + static_cast(dst_memory), total_components); + float_to_unorm(dst, src); +} + +template +void unorm_to_float(MutableSpan dst, Span src) +{ + BLI_assert(src.size() == dst.size()); + for (int64_t index : IndexRange(src.size())) { + dst[index] = from_unorm(src[index]); + } +} + +template +void unorm_to_float(void *dst_memory, + const void *src_memory, + eGPUTextureFormat device_format, + size_t sample_len) +{ + size_t total_components = to_component_len(device_format) * sample_len; + Span src = Span(static_cast(src_memory), + total_components); + MutableSpan dst = MutableSpan( + static_cast(dst_memory), total_components); + unorm_to_float(dst, src); +} + +/* Float <=> signed normalized */ +static int8_t clamp_snorm(int32_t unclamped) +{ + if (unclamped < -127) { + return 0; + } + if (unclamped > 128) { + return 128; + } + return int8_t(unclamped); +} + +template +static DestinationType to_snorm(SourceType value) +{ + return (clamp_snorm((value * 128.0f))); +} + +template +static DestinationType from_snorm(SourceType value) +{ + return DestinationType(value / 128.0f); +} + +template +void float_to_snorm(MutableSpan dst, Span src) +{ + BLI_assert(src.size() == dst.size()); + for (int64_t index : IndexRange(src.size())) { + dst[index] = to_snorm(src[index]); + } +} + +template +void float_to_snorm(void *dst_memory, + const void *src_memory, + eGPUTextureFormat device_format, + size_t sample_len) +{ + size_t total_components = to_component_len(device_format) * sample_len; + Span src = Span(static_cast(src_memory), + total_components); + MutableSpan dst = MutableSpan( + static_cast(dst_memory), total_components); + float_to_snorm(dst, src); +} + +template +void snorm_to_float(MutableSpan dst, Span src) +{ + BLI_assert(src.size() == dst.size()); + for (int64_t index : IndexRange(src.size())) { + dst[index] = from_snorm(src[index]); + } +} + +template +void snorm_to_float(void *dst_memory, + const void *src_memory, + eGPUTextureFormat device_format, + size_t sample_len) +{ + size_t total_components = to_component_len(device_format) * sample_len; + Span src = Span(static_cast(src_memory), + total_components); + MutableSpan dst = MutableSpan( + static_cast(dst_memory), total_components); + snorm_to_float(dst, src); +} + void convert(ConversionType type, eGPUTextureFormat device_format, size_t sample_len, @@ -388,25 +637,51 @@ void convert(ConversionType type, memcpy(dst_memory, src_memory, sample_len * to_bytesize(device_format)); return; - case ConversionType::UI16_TO_UI32: { - size_t component_len = to_component_len(device_format) * sample_len; - Span src = Span(static_cast(src_memory), - component_len); - MutableSpan dst = MutableSpan(static_cast(dst_memory), - component_len); - copy_unchecked(dst, src); + case ConversionType::UI32_TO_UI16: + copy_unchecked(dst_memory, src_memory, device_format, sample_len); break; - } - case ConversionType::UI32_TO_UI16: { - size_t component_len = to_component_len(device_format) * sample_len; - Span src = Span(static_cast(src_memory), - component_len); - MutableSpan dst = MutableSpan(static_cast(dst_memory), - component_len); - copy_unchecked(dst, src); + case ConversionType::UI16_TO_UI32: + copy_unchecked(dst_memory, src_memory, device_format, sample_len); + break; + + case ConversionType::UI32_TO_UI8: + copy_unchecked(dst_memory, src_memory, device_format, sample_len); + break; + + case ConversionType::UI8_TO_UI32: + copy_unchecked(dst_memory, src_memory, device_format, sample_len); + break; + + case ConversionType::I32_TO_I16: + copy_unchecked(dst_memory, src_memory, device_format, sample_len); + break; + + case ConversionType::I16_TO_I32: + copy_unchecked(dst_memory, src_memory, device_format, sample_len); + break; + + case ConversionType::I32_TO_I8: + copy_unchecked(dst_memory, src_memory, device_format, sample_len); + break; + + case ConversionType::I8_TO_I32: + copy_unchecked(dst_memory, src_memory, device_format, sample_len); + break; + + case ConversionType::FLOAT_TO_UNORM8: + float_to_unorm(dst_memory, src_memory, device_format, sample_len); + break; + case ConversionType::UNORM8_TO_FLOAT: + unorm_to_float(dst_memory, src_memory, device_format, sample_len); + break; + + case ConversionType::FLOAT_TO_SNORM8: + float_to_snorm(dst_memory, src_memory, device_format, sample_len); + break; + case ConversionType::SNORM8_TO_FLOAT: + snorm_to_float(dst_memory, src_memory, device_format, sample_len); break; - } case ConversionType::FLOAT_TO_HALF: case ConversionType::HALF_TO_FLOAT: diff --git a/source/blender/gpu/vulkan/vk_data_conversion.hh b/source/blender/gpu/vulkan/vk_data_conversion.hh index 9406823f7b0..879572c2b68 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.hh +++ b/source/blender/gpu/vulkan/vk_data_conversion.hh @@ -16,17 +16,23 @@ enum class ConversionType { /** No conversion needed, result can be directly read back to host memory. */ PASS_THROUGH, - UI16_TO_UI32, - UI32_TO_UI16, + FLOAT_TO_UNORM8, + UNORM8_TO_FLOAT, - /* + FLOAT_TO_SNORM8, + SNORM8_TO_FLOAT, + + UI32_TO_UI16, + UI16_TO_UI32, + + UI32_TO_UI8, UI8_TO_UI32, + + I32_TO_I16, I16_TO_I32, + + I32_TO_I8, I8_TO_I32, - UI8_TO_I32, - UI8_TO_FLOAT, - UI8_TO_UBYTE, - */ /** Convert device 16F to floats. */ HALF_TO_FLOAT, diff --git a/source/blender/gpu/vulkan/vk_texture.cc b/source/blender/gpu/vulkan/vk_texture.cc index ba403a49426..ef8c903f839 100644 --- a/source/blender/gpu/vulkan/vk_texture.cc +++ b/source/blender/gpu/vulkan/vk_texture.cc @@ -149,7 +149,7 @@ void *VKTexture::read(int mip, eGPUDataFormat format) } void VKTexture::update_sub( - int mip, int /*offset*/[3], int extent[3], eGPUDataFormat format, const void *data) + int mip, int offset[3], int extent[3], eGPUDataFormat format, const void *data) { if (!is_allocated()) { allocate(); @@ -173,6 +173,9 @@ void VKTexture::update_sub( region.imageExtent.width = extent[0]; region.imageExtent.height = extent[1]; region.imageExtent.depth = extent[2]; + region.imageOffset.x = offset[0]; + region.imageOffset.y = offset[1]; + region.imageOffset.z = offset[2]; region.imageSubresource.aspectMask = to_vk_image_aspect_flag_bits(format_); region.imageSubresource.mipLevel = mip; region.imageSubresource.layerCount = 1; @@ -180,8 +183,6 @@ void VKTexture::update_sub( VKCommandBuffer &command_buffer = context.command_buffer_get(); command_buffer.copy(*this, staging_buffer, Span(®ion, 1)); command_buffer.submit(); - - /* TODO: add support for offset. */ } void VKTexture::update_sub(int /*offset*/[3], @@ -244,7 +245,7 @@ bool VKTexture::allocate() image_info.samples = VK_SAMPLE_COUNT_1_BIT; VkResult result; - if (G.debug &= G_DEBUG_GPU) { + if (G.debug & G_DEBUG_GPU) { VkImageFormatProperties image_format = {}; result = vkGetPhysicalDeviceImageFormatProperties(context.physical_device_get(), image_info.format, -- 2.30.2 From 755a67776543b76bbe6e39540783df9fc252de4b Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Thu, 16 Mar 2023 18:01:00 +0100 Subject: [PATCH 05/33] Fixed SNORM/UNORM (precision and conversion) --- source/blender/gpu/tests/gpu_testing.cc | 2 +- source/blender/gpu/tests/texture_test.cc | 51 +++++++++- .../blender/gpu/vulkan/vk_data_conversion.cc | 97 ++++++++++++------- .../blender/gpu/vulkan/vk_data_conversion.hh | 6 ++ source/blender/gpu/vulkan/vk_texture.cc | 2 + 5 files changed, 119 insertions(+), 39 deletions(-) diff --git a/source/blender/gpu/tests/gpu_testing.cc b/source/blender/gpu/tests/gpu_testing.cc index fd28a18a24b..82c0154c148 100644 --- a/source/blender/gpu/tests/gpu_testing.cc +++ b/source/blender/gpu/tests/gpu_testing.cc @@ -17,7 +17,7 @@ void GPUTest::SetUp() GPU_backend_type_selection_set(gpu_backend_type); GHOST_GLSettings glSettings = {}; glSettings.context_type = draw_context_type; - glSettings.flags = GHOST_glDebugContext; + //glSettings.flags = GHOST_glDebugContext; CLG_init(); ghost_system = GHOST_CreateSystem(); ghost_context = GHOST_CreateOpenGLContext(ghost_system, glSettings); diff --git a/source/blender/gpu/tests/texture_test.cc b/source/blender/gpu/tests/texture_test.cc index 90460ef2eca..ad16801b320 100644 --- a/source/blender/gpu/tests/texture_test.cc +++ b/source/blender/gpu/tests/texture_test.cc @@ -10,6 +10,15 @@ #include "gpu_texture_private.hh" +/* Not all texture types are supported by all platforms. This define safe guards them until we have + * a working workaround or decided to remove support for those texture types. */ +#define RUN_UNSUPPORTED false +/* Skip tests that haven't been developed yet due to non standard data types. */ +#define RUN_16F_UNIMPLEMENTED false +#define RUN_SRGB_UNIMPLEMENTED false +#define RUN_NON_STANDARD_UNIMPLEMENTED false +#define RUN_COMPONENT_UNIMPLEMENTED false + namespace blender::gpu::tests { static void test_texture_read() @@ -53,7 +62,12 @@ template static DataType *generate_test_data(size_t data_len) { DataType *data = static_cast(MEM_mallocN(data_len * sizeof(DataType), __func__)); for (int i : IndexRange(data_len)) { - data[i] = (DataType)(i % 8); + if (std::is_same()) { + data[i] = (DataType)(i % 8) / 8.0f; + } + else { + data[i] = (DataType)(i % 8); + } } return data; } @@ -75,9 +89,14 @@ static void texture_create_upload_read() GPU_texture_update(texture, HostFormat, data); DataType *read_data = static_cast(GPU_texture_read(texture, HostFormat, 0)); + bool failed = false; for (int i : IndexRange(data_len)) { - EXPECT_EQ(read_data[i], data[i]); + bool ok = abs(read_data[i] - data[i]) < 0.01; + failed |= !ok; + //EXPECT_EQ(read_data[i], data[i]); } + EXPECT_FALSE(failed); + MEM_freeN(read_data); MEM_freeN(data); @@ -87,18 +106,19 @@ static void texture_create_upload_read() /* -------------------------------------------------------------------- */ /** \name Roundtrip testing GPU_DATA_FLOAT * \{ */ -#if 1 static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8() { texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8); +#if RUN_16F_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16F() { texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16F); +#endif static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16() { @@ -118,11 +138,13 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG8() } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG8); +#if RUN_16F_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16F() { texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16F); +#endif static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16() { @@ -142,11 +164,13 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R8() } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R8); +#if RUN_16F_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R16F() { texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R16F); +#endif static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R16() { @@ -160,6 +184,7 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R32F() } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R32F); +#if RUN_NON_STANDARD_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2() { texture_create_upload_read(); @@ -177,12 +202,15 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R11F_G11F_B10F() texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R11F_G11F_B10F); +#endif +#if RUN_SRGB_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8() { texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8); +#endif static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_SNORM() { @@ -196,6 +224,7 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16_SNORM() } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16_SNORM); +#if RUN_UNSUPPORTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB8() { texture_create_upload_read(); @@ -207,19 +236,21 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB8_SNORM() texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB8_SNORM); +#endif +#if RUN_16F_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16F() { texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16F); - +#endif +#if RUN_UNSUPPORTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16() { texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16); - static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16_SNORM() { texture_create_upload_read(); @@ -231,6 +262,7 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB32F() texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB32F); +#endif static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG8_SNORM() { @@ -256,6 +288,7 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R16_SNORM() } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R16_SNORM); +#if RUN_NON_STANDARD_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8_DXT1() { texture_create_upload_read(); @@ -291,25 +324,33 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_DXT5() texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_DXT5); +#endif +#if RUN_SRGB_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8() { texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8); +#endif +#if RUN_NON_STANDARD_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB9_E5() { texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB9_E5); +#endif +#if RUN_UNSUPPORTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F() { texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F); +#endif +#if RUN_COMPONENT_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT24() { texture_create_upload_read(); diff --git a/source/blender/gpu/vulkan/vk_data_conversion.cc b/source/blender/gpu/vulkan/vk_data_conversion.cc index 21348a0e5a5..cda01b2c80e 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.cc +++ b/source/blender/gpu/vulkan/vk_data_conversion.cc @@ -34,26 +34,34 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format) case GPU_R8_SNORM: return ConversionType::FLOAT_TO_SNORM8; + case GPU_RGBA16: + case GPU_RG16: + case GPU_R16: + return ConversionType::FLOAT_TO_UNORM16; + + case GPU_RGBA16_SNORM: + case GPU_RGB16_SNORM: + case GPU_RG16_SNORM: + case GPU_R16_SNORM: + return ConversionType::FLOAT_TO_SNORM16; + case GPU_RGB32F: /* GPU_RGB32F Not supported by vendors. */ case GPU_RGBA8UI: case GPU_RGBA8I: case GPU_RGBA16UI: case GPU_RGBA16I: - case GPU_RGBA16: case GPU_RGBA32UI: case GPU_RGBA32I: case GPU_RG8UI: case GPU_RG8I: case GPU_RG16UI: case GPU_RG16I: - case GPU_RG16: case GPU_RG32UI: case GPU_RG32I: case GPU_R8UI: case GPU_R8I: case GPU_R16UI: case GPU_R16I: - case GPU_R16: case GPU_R32UI: case GPU_R32I: case GPU_RGB10_A2: @@ -62,18 +70,14 @@ static ConversionType type_of_conversion_float(eGPUTextureFormat device_format) case GPU_DEPTH32F_STENCIL8: case GPU_DEPTH24_STENCIL8: case GPU_SRGB8_A8: - case GPU_RGBA16_SNORM: case GPU_RGB8UI: case GPU_RGB8I: case GPU_RGB8: case GPU_RGB16UI: case GPU_RGB16I: case GPU_RGB16: - case GPU_RGB16_SNORM: case GPU_RGB32UI: case GPU_RGB32I: - case GPU_RG16_SNORM: - case GPU_R16_SNORM: case GPU_SRGB8_A8_DXT1: case GPU_SRGB8_A8_DXT3: case GPU_SRGB8_A8_DXT5: @@ -437,6 +441,8 @@ static ConversionType invert(ConversionType type) CASE_PAIR(FLOAT, UNORM8) CASE_PAIR(FLOAT, SNORM8) + CASE_PAIR(FLOAT, UNORM16) + CASE_PAIR(FLOAT, SNORM16) CASE_PAIR(UI32, UI16) CASE_PAIR(I32, I16) CASE_PAIR(UI32, UI8) @@ -484,27 +490,38 @@ void copy_unchecked(void *dst_memory, } /* Float <=> unsigned normalized */ -static uint8_t clamp_unorm(int32_t unclamped) +template constexpr int32_t unorm_scalar() { - if (unclamped < 0.0f) { - return 0; - } - if (unclamped > 255.0f) { - return 255; - } - return uint8_t(unclamped); + return ((1 << (sizeof(Type) * 8)) - 1); +} +template constexpr int32_t snorm_scalar() +{ + return (1 << (sizeof(Type) * 8 - 1)); +} +template constexpr int32_t snorm_max() +{ + return ((1 << (sizeof(Type) * 8)) - 1); +} +template constexpr int32_t snorm_delta() +{ + return (1 << (sizeof(Type) * 8 - 1)) - 1; } template static DestinationType to_unorm(SourceType value) { - return (clamp_unorm((value * 255.0f))); + static constexpr int32_t Multiplier = unorm_scalar(); + static constexpr int32_t Max = Multiplier; + + int32_t before_clamping = value * Multiplier; + return clamp_i(before_clamping, 0, Max); } template static DestinationType from_unorm(SourceType value) { - return DestinationType(value / 255.0f); + static constexpr int32_t Multiplier = unorm_scalar(); + return DestinationType(value) / Multiplier; } template @@ -554,27 +571,23 @@ void unorm_to_float(void *dst_memory, } /* Float <=> signed normalized */ -static int8_t clamp_snorm(int32_t unclamped) -{ - if (unclamped < -127) { - return 0; - } - if (unclamped > 128) { - return 128; - } - return int8_t(unclamped); -} +/* TODO: SNORM needs to be shifted...*/ template static DestinationType to_snorm(SourceType value) { - return (clamp_snorm((value * 128.0f))); + static constexpr int32_t Multiplier = snorm_scalar(); + static constexpr int32_t Max = snorm_max(); + static constexpr int32_t Delta = snorm_delta(); + return (clamp_i((value * Multiplier + Delta), 0, Max)); } template static DestinationType from_snorm(SourceType value) { - return DestinationType(value / 128.0f); + static constexpr int32_t Multiplier = snorm_scalar(); + static constexpr int32_t Delta = snorm_delta(); + return DestinationType(int32_t(value) - Delta) / Multiplier; } template @@ -582,7 +595,9 @@ void float_to_snorm(MutableSpan dst, Span src) { BLI_assert(src.size() == dst.size()); for (int64_t index : IndexRange(src.size())) { - dst[index] = to_snorm(src[index]); + const SourceType src_value = src[index]; + const DestinationType dst_value = to_snorm(src_value); + dst[index] = dst_value; } } @@ -605,7 +620,9 @@ void snorm_to_float(MutableSpan dst, Span src) { BLI_assert(src.size() == dst.size()); for (int64_t index : IndexRange(src.size())) { - dst[index] = from_snorm(src[index]); + const SourceType src_value = src[index]; + const DestinationType dst_value = from_snorm(src_value); + dst[index] = dst_value; } } @@ -677,10 +694,24 @@ void convert(ConversionType type, break; case ConversionType::FLOAT_TO_SNORM8: - float_to_snorm(dst_memory, src_memory, device_format, sample_len); + float_to_snorm(dst_memory, src_memory, device_format, sample_len); break; case ConversionType::SNORM8_TO_FLOAT: - snorm_to_float(dst_memory, src_memory, device_format, sample_len); + snorm_to_float(dst_memory, src_memory, device_format, sample_len); + break; + + case ConversionType::FLOAT_TO_UNORM16: + float_to_unorm(dst_memory, src_memory, device_format, sample_len); + break; + case ConversionType::UNORM16_TO_FLOAT: + unorm_to_float(dst_memory, src_memory, device_format, sample_len); + break; + + case ConversionType::FLOAT_TO_SNORM16: + float_to_snorm(dst_memory, src_memory, device_format, sample_len); + break; + case ConversionType::SNORM16_TO_FLOAT: + snorm_to_float(dst_memory, src_memory, device_format, sample_len); break; case ConversionType::FLOAT_TO_HALF: diff --git a/source/blender/gpu/vulkan/vk_data_conversion.hh b/source/blender/gpu/vulkan/vk_data_conversion.hh index 879572c2b68..f02a5a3468b 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.hh +++ b/source/blender/gpu/vulkan/vk_data_conversion.hh @@ -22,6 +22,12 @@ enum class ConversionType { FLOAT_TO_SNORM8, SNORM8_TO_FLOAT, + FLOAT_TO_UNORM16, + UNORM16_TO_FLOAT, + + FLOAT_TO_SNORM16, + SNORM16_TO_FLOAT, + UI32_TO_UI16, UI16_TO_UI32, diff --git a/source/blender/gpu/vulkan/vk_texture.cc b/source/blender/gpu/vulkan/vk_texture.cc index ef8c903f839..69ec9acb917 100644 --- a/source/blender/gpu/vulkan/vk_texture.cc +++ b/source/blender/gpu/vulkan/vk_texture.cc @@ -203,6 +203,8 @@ bool VKTexture::init_internal() /* Initialization can only happen after the usage is known. By the current API this isn't set * at this moment, so we cannot initialize here. The initialization is postponed until the * allocation of the texture on the device. */ + + /* TODO: return false when texture format isn't supported. */ return true; } -- 2.30.2 From 42420b3d8c4694be6042f181704aa1c7181e8c91 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Fri, 17 Mar 2023 08:17:34 +0100 Subject: [PATCH 06/33] Enabled half_float tests. --- source/blender/gpu/tests/texture_test.cc | 22 ++++++++++--------- .../blender/gpu/vulkan/vk_data_conversion.cc | 8 ++++--- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/source/blender/gpu/tests/texture_test.cc b/source/blender/gpu/tests/texture_test.cc index ad16801b320..8eb11182c97 100644 --- a/source/blender/gpu/tests/texture_test.cc +++ b/source/blender/gpu/tests/texture_test.cc @@ -93,7 +93,7 @@ static void texture_create_upload_read() for (int i : IndexRange(data_len)) { bool ok = abs(read_data[i] - data[i]) < 0.01; failed |= !ok; - //EXPECT_EQ(read_data[i], data[i]); + // EXPECT_EQ(read_data[i], data[i]); } EXPECT_FALSE(failed); @@ -245,6 +245,7 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16F() } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16F); #endif + #if RUN_UNSUPPORTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16() { @@ -363,36 +364,39 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT16() } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT16); #endif + /* \} */ /* -------------------------------------------------------------------- */ /** \name Roundtrip testing GPU_DATA_HALF_FLOAT * \{ */ -#if 0 + static void test_texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_RGBA16F() { - texture_create_upload_read(); + texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_RGBA16F); static void test_texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_RG16F() { - texture_create_upload_read(); + texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_RG16F); static void test_texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_R16F() { - texture_create_upload_read(); + texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_R16F); +#if RUN_UNSUPPORTED static void test_texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_RGB16F() { - texture_create_upload_read(); + texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_RGB16F); #endif + /* \} */ /* -------------------------------------------------------------------- */ @@ -433,9 +437,7 @@ static void test_texture_roundtrip__GPU_DATA_INT__GPU_RG32I() { texture_create_upload_read(); } -GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_RG32I); - -static void test_texture_roundtrip__GPU_DATA_INT__GPU_R8I() +GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_RG32I);test_texture_roundtrip__GPU_DATA_UBYTE__GPU_SRGB8_A8 { texture_create_upload_read(); } @@ -708,7 +710,7 @@ GPU_TEST(texture_roundtrip__GPU_DATA_2_10_10_10_REV__GPU_RGB10_A2UI); return std::string("float"); case GPU_DATA_HALF_FLOAT: - return std::string("half"); + return std::string("uint16_t"); case GPU_DATA_INT: return std::string("int32_t"); case GPU_DATA_UINT: diff --git a/source/blender/gpu/vulkan/vk_data_conversion.cc b/source/blender/gpu/vulkan/vk_data_conversion.cc index cda01b2c80e..575e7693df4 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.cc +++ b/source/blender/gpu/vulkan/vk_data_conversion.cc @@ -254,12 +254,16 @@ static ConversionType type_of_conversion_uint(eGPUTextureFormat device_format) static ConversionType type_of_conversion_half(eGPUTextureFormat device_format) { switch (device_format) { + case GPU_RGBA16F: + case GPU_RG16F: + case GPU_R16F: + return ConversionType::PASS_THROUGH; + case GPU_RGBA8UI: case GPU_RGBA8I: case GPU_RGBA8: case GPU_RGBA16UI: case GPU_RGBA16I: - case GPU_RGBA16F: case GPU_RGBA16: case GPU_RGBA32UI: case GPU_RGBA32I: @@ -269,7 +273,6 @@ static ConversionType type_of_conversion_half(eGPUTextureFormat device_format) case GPU_RG8: case GPU_RG16UI: case GPU_RG16I: - case GPU_RG16F: case GPU_RG16: case GPU_RG32UI: case GPU_RG32I: @@ -279,7 +282,6 @@ static ConversionType type_of_conversion_half(eGPUTextureFormat device_format) case GPU_R8: case GPU_R16UI: case GPU_R16I: - case GPU_R16F: case GPU_R16: case GPU_R32UI: case GPU_R32I: -- 2.30.2 From 12a6b1914d944fb35bb27fd90a621d0426ad45cb Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Fri, 17 Mar 2023 08:49:41 +0100 Subject: [PATCH 07/33] Hide internals of data conversion for quicker development. --- .../blender/gpu/vulkan/vk_data_conversion.cc | 145 +++++++++++++----- .../blender/gpu/vulkan/vk_data_conversion.hh | 85 +++++----- source/blender/gpu/vulkan/vk_texture.cc | 14 +- 3 files changed, 142 insertions(+), 102 deletions(-) diff --git a/source/blender/gpu/vulkan/vk_data_conversion.cc b/source/blender/gpu/vulkan/vk_data_conversion.cc index 575e7693df4..a86783e50fb 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.cc +++ b/source/blender/gpu/vulkan/vk_data_conversion.cc @@ -8,6 +8,48 @@ #include "vk_data_conversion.hh" namespace blender::gpu { + +/* -------------------------------------------------------------------- */ +/** \name Conversion types + * \{ */ +enum class ConversionType { + /** No conversion needed, result can be directly read back to host memory. */ + PASS_THROUGH, + + FLOAT_TO_UNORM8, + UNORM8_TO_FLOAT, + + FLOAT_TO_SNORM8, + SNORM8_TO_FLOAT, + + FLOAT_TO_UNORM16, + UNORM16_TO_FLOAT, + + FLOAT_TO_SNORM16, + SNORM16_TO_FLOAT, + + UI32_TO_UI16, + UI16_TO_UI32, + + UI32_TO_UI8, + UI8_TO_UI32, + + I32_TO_I16, + I16_TO_I32, + + I32_TO_I8, + I8_TO_I32, + + /** Convert device 16F to floats. */ + HALF_TO_FLOAT, + FLOAT_TO_HALF, + + /** + * The requested conversion isn't supported. + */ + UNSUPPORTED, +}; + static ConversionType type_of_conversion_float(eGPUTextureFormat device_format) { switch (device_format) { @@ -401,8 +443,7 @@ static ConversionType type_of_conversion_ubyte(eGPUTextureFormat device_format) return ConversionType::UNSUPPORTED; } -ConversionType conversion_type_for_update(eGPUDataFormat host_format, - eGPUTextureFormat device_format) +static ConversionType host_to_device(eGPUDataFormat host_format, eGPUTextureFormat device_format) { BLI_assert(validate_data_format(device_format, host_format)); @@ -427,7 +468,7 @@ ConversionType conversion_type_for_update(eGPUDataFormat host_format, return ConversionType::UNSUPPORTED; } -static ConversionType invert(ConversionType type) +static ConversionType reversed(ConversionType type) { #define CASE_SINGLE(a, b) \ case ConversionType::a##_TO_##b: \ @@ -461,11 +502,7 @@ static ConversionType invert(ConversionType type) return ConversionType::UNSUPPORTED; } -ConversionType conversion_type_for_read(eGPUDataFormat host_format, - eGPUTextureFormat device_format) -{ - return invert(conversion_type_for_update(host_format, device_format)); -} +/* \} */ /* Copy the contents of src to dst with out performing any actual conversion. */ template @@ -481,9 +518,9 @@ template void copy_unchecked(void *dst_memory, const void *src_memory, eGPUTextureFormat device_format, - size_t sample_len) + size_t buffer_size) { - size_t total_components = to_component_len(device_format) * sample_len; + size_t total_components = to_component_len(device_format) * buffer_size; Span src = Span(static_cast(src_memory), total_components); MutableSpan dst = MutableSpan( @@ -539,9 +576,9 @@ template void float_to_unorm(void *dst_memory, const void *src_memory, eGPUTextureFormat device_format, - size_t sample_len) + size_t buffer_size) { - size_t total_components = to_component_len(device_format) * sample_len; + size_t total_components = to_component_len(device_format) * buffer_size; Span src = Span(static_cast(src_memory), total_components); MutableSpan dst = MutableSpan( @@ -562,9 +599,9 @@ template void unorm_to_float(void *dst_memory, const void *src_memory, eGPUTextureFormat device_format, - size_t sample_len) + size_t buffer_size) { - size_t total_components = to_component_len(device_format) * sample_len; + size_t total_components = to_component_len(device_format) * buffer_size; Span src = Span(static_cast(src_memory), total_components); MutableSpan dst = MutableSpan( @@ -607,9 +644,9 @@ template void float_to_snorm(void *dst_memory, const void *src_memory, eGPUTextureFormat device_format, - size_t sample_len) + size_t buffer_size) { - size_t total_components = to_component_len(device_format) * sample_len; + size_t total_components = to_component_len(device_format) * buffer_size; Span src = Span(static_cast(src_memory), total_components); MutableSpan dst = MutableSpan( @@ -632,9 +669,9 @@ template void snorm_to_float(void *dst_memory, const void *src_memory, eGPUTextureFormat device_format, - size_t sample_len) + size_t buffer_size) { - size_t total_components = to_component_len(device_format) * sample_len; + size_t total_components = to_component_len(device_format) * buffer_size; Span src = Span(static_cast(src_memory), total_components); MutableSpan dst = MutableSpan( @@ -642,78 +679,78 @@ void snorm_to_float(void *dst_memory, snorm_to_float(dst, src); } -void convert(ConversionType type, - eGPUTextureFormat device_format, - size_t sample_len, - void *dst_memory, - const void *src_memory) +static void convert_buffer(void *dst_memory, + const void *src_memory, + size_t buffer_size, + eGPUTextureFormat device_format, + ConversionType type) { switch (type) { case ConversionType::UNSUPPORTED: return; case ConversionType::PASS_THROUGH: - memcpy(dst_memory, src_memory, sample_len * to_bytesize(device_format)); + memcpy(dst_memory, src_memory, buffer_size * to_bytesize(device_format)); return; case ConversionType::UI32_TO_UI16: - copy_unchecked(dst_memory, src_memory, device_format, sample_len); + copy_unchecked(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::UI16_TO_UI32: - copy_unchecked(dst_memory, src_memory, device_format, sample_len); + copy_unchecked(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::UI32_TO_UI8: - copy_unchecked(dst_memory, src_memory, device_format, sample_len); + copy_unchecked(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::UI8_TO_UI32: - copy_unchecked(dst_memory, src_memory, device_format, sample_len); + copy_unchecked(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::I32_TO_I16: - copy_unchecked(dst_memory, src_memory, device_format, sample_len); + copy_unchecked(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::I16_TO_I32: - copy_unchecked(dst_memory, src_memory, device_format, sample_len); + copy_unchecked(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::I32_TO_I8: - copy_unchecked(dst_memory, src_memory, device_format, sample_len); + copy_unchecked(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::I8_TO_I32: - copy_unchecked(dst_memory, src_memory, device_format, sample_len); + copy_unchecked(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::FLOAT_TO_UNORM8: - float_to_unorm(dst_memory, src_memory, device_format, sample_len); + float_to_unorm(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::UNORM8_TO_FLOAT: - unorm_to_float(dst_memory, src_memory, device_format, sample_len); + unorm_to_float(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::FLOAT_TO_SNORM8: - float_to_snorm(dst_memory, src_memory, device_format, sample_len); + float_to_snorm(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::SNORM8_TO_FLOAT: - snorm_to_float(dst_memory, src_memory, device_format, sample_len); + snorm_to_float(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::FLOAT_TO_UNORM16: - float_to_unorm(dst_memory, src_memory, device_format, sample_len); + float_to_unorm(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::UNORM16_TO_FLOAT: - unorm_to_float(dst_memory, src_memory, device_format, sample_len); + unorm_to_float(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::FLOAT_TO_SNORM16: - float_to_snorm(dst_memory, src_memory, device_format, sample_len); + float_to_snorm(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::SNORM16_TO_FLOAT: - snorm_to_float(dst_memory, src_memory, device_format, sample_len); + snorm_to_float(dst_memory, src_memory, device_format, buffer_size); break; case ConversionType::FLOAT_TO_HALF: @@ -723,4 +760,32 @@ void convert(ConversionType type, } } +/* -------------------------------------------------------------------- */ +/** \name API + * \{ */ + +void convert_host_to_device(void *dst_buffer, + const void *src_buffer, + size_t buffer_size, + eGPUDataFormat host_format, + eGPUTextureFormat device_format) +{ + ConversionType conversion_type = host_to_device(host_format, device_format); + BLI_assert(conversion_type != ConversionType::UNSUPPORTED); + convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type); +} + +void convert_device_to_host(void *dst_buffer, + const void *src_buffer, + size_t buffer_size, + eGPUDataFormat host_format, + eGPUTextureFormat device_format) +{ + ConversionType conversion_type = reversed(host_to_device(host_format, device_format)); + BLI_assert(conversion_type != ConversionType::UNSUPPORTED); + convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type); +} + +/* \} */ + } // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_data_conversion.hh b/source/blender/gpu/vulkan/vk_data_conversion.hh index f02a5a3468b..800389ff44e 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.hh +++ b/source/blender/gpu/vulkan/vk_data_conversion.hh @@ -12,57 +12,42 @@ namespace blender::gpu { -enum class ConversionType { - /** No conversion needed, result can be directly read back to host memory. */ - PASS_THROUGH, - - FLOAT_TO_UNORM8, - UNORM8_TO_FLOAT, - - FLOAT_TO_SNORM8, - SNORM8_TO_FLOAT, - - FLOAT_TO_UNORM16, - UNORM16_TO_FLOAT, - - FLOAT_TO_SNORM16, - SNORM16_TO_FLOAT, - - UI32_TO_UI16, - UI16_TO_UI32, - - UI32_TO_UI8, - UI8_TO_UI32, - - I32_TO_I16, - I16_TO_I32, - - I32_TO_I8, - I8_TO_I32, - - /** Convert device 16F to floats. */ - HALF_TO_FLOAT, - FLOAT_TO_HALF, - - /** - * The requested conversion isn't supported. - */ - UNSUPPORTED, -}; +/** + * Convert host buffer to device buffer. + * + * \param dst_buffer: device buffer. + * \param src_buffer: host buffer. + * \param buffer_size: number of pixels to convert from the start of the given buffer. + * \param host_format: format of the host buffer + * \param device_format: format of the device buffer. + * + * \note Will assert when the host_format/device_format combination isn't valid + * (#validate_data_format) or supported. Some combinations aren't supported in Vulkan due to + * platform incompatibility. + */ +void convert_host_to_device(void *dst_buffer, + const void *src_buffer, + size_t buffer_size, + eGPUDataFormat host_format, + eGPUTextureFormat device_format); /** - * Determine the type of conversion that is needed to read back data from GPU device to host - * memory. + * Convert device buffer to host buffer. + * + * \param dst_buffer: host buffer + * \param src_buffer: device buffer. + * \param buffer_size: number of pixels to convert from the start of the given buffer. + * \param host_format: format of the host buffer + * \param device_format: format of the device buffer. + * + * \note Will assert when the host_format/device_format combination isn't valid + * (#validate_data_format) or supported. Some combinations aren't supported in Vulkan due to + * platform incompatibility. */ -ConversionType conversion_type_for_read(eGPUDataFormat host_format, - eGPUTextureFormat device_format); -ConversionType conversion_type_for_update(eGPUDataFormat host_format, - eGPUTextureFormat device_format); +void convert_device_to_host(void *dst_buffer, + const void *src_buffer, + size_t buffer_size, + eGPUDataFormat host_format, + eGPUTextureFormat device_format); -void convert(ConversionType type, - eGPUTextureFormat device_format, - size_t sample_len, - void *dst_memory, - const void *src_memory); - -}; // namespace blender::gpu +}; // namespace blender::gpu \ No newline at end of file diff --git a/source/blender/gpu/vulkan/vk_texture.cc b/source/blender/gpu/vulkan/vk_texture.cc index 69ec9acb917..e7cbf9c7183 100644 --- a/source/blender/gpu/vulkan/vk_texture.cc +++ b/source/blender/gpu/vulkan/vk_texture.cc @@ -138,13 +138,7 @@ void *VKTexture::read(int mip, eGPUDataFormat format) command_buffer.submit(); void *data = MEM_mallocN(host_memory_size, __func__); - - /* Convert data from device to host memory. */ - ConversionType conversion_type = conversion_type_for_read(format, format_); - BLI_assert_msg(conversion_type != ConversionType::UNSUPPORTED, - "Memory data conversions not implemented yet"); - convert(conversion_type, format_, sample_len, data, staging_buffer.mapped_memory_get()); - + convert_device_to_host(data, staging_buffer.mapped_memory_get(), sample_len, format, format_); return data; } @@ -163,11 +157,7 @@ void VKTexture::update_sub( staging_buffer.create( context, device_memory_size, GPU_USAGE_DEVICE_ONLY, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); - - ConversionType conversion_type = conversion_type_for_update(format, format_); - BLI_assert_msg(conversion_type != ConversionType::UNSUPPORTED, - "Memory data conversions not implemented yet"); - convert(conversion_type, format_, sample_len, staging_buffer.mapped_memory_get(), data); + convert_host_to_device(staging_buffer.mapped_memory_get(), data, sample_len, format, format_); VkBufferImageCopy region = {}; region.imageExtent.width = extent[0]; -- 2.30.2 From e7ae0a5dfc07a40380c41988cbaba06f0251ab8a Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Fri, 17 Mar 2023 10:27:00 +0100 Subject: [PATCH 08/33] Reduce complexity by introducing data types. --- .../blender/gpu/vulkan/vk_data_conversion.cc | 326 ++++++++---------- 1 file changed, 141 insertions(+), 185 deletions(-) diff --git a/source/blender/gpu/vulkan/vk_data_conversion.cc b/source/blender/gpu/vulkan/vk_data_conversion.cc index a86783e50fb..de4b02cc55a 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.cc +++ b/source/blender/gpu/vulkan/vk_data_conversion.cc @@ -12,6 +12,7 @@ namespace blender::gpu { /* -------------------------------------------------------------------- */ /** \name Conversion types * \{ */ + enum class ConversionType { /** No conversion needed, result can be directly read back to host memory. */ PASS_THROUGH, @@ -504,179 +505,126 @@ static ConversionType reversed(ConversionType type) /* \} */ -/* Copy the contents of src to dst with out performing any actual conversion. */ +/* -------------------------------------------------------------------- */ +/** \name Data Conversion + * \{ */ + +template struct SignedNormalized { + static_assert(std::is_same() || std::is_same()); + InnerType value; + + static constexpr int32_t scalar() + { + return (1 << (sizeof(InnerType) * 8 - 1)); + } + + static constexpr int32_t delta() + { + return (1 << (sizeof(InnerType) * 8 - 1)) - 1; + } + + static constexpr int32_t max() + { + return ((1 << (sizeof(InnerType) * 8)) - 1); + } +}; + +template struct UnsignedNormalized { + static_assert(std::is_same() || std::is_same()); + InnerType value; + + static constexpr int32_t scalar() + { + return (1 << (sizeof(InnerType) * 8)) - 1; + } + + static constexpr int32_t max() + { + return ((1 << (sizeof(InnerType) * 8)) - 1); + } +}; + +template struct ComponentValue { + InnerType value; +}; + +using F32 = ComponentValue; +using UI8 = ComponentValue; +using UI16 = ComponentValue; +using UI32 = ComponentValue; +using I8 = ComponentValue; +using I16 = ComponentValue; +using I32 = ComponentValue; + +template +void convert_component(SignedNormalized &dst, const F32 &src) +{ + static constexpr int32_t scalar = SignedNormalized::scalar(); + static constexpr int32_t delta = SignedNormalized::delta(); + static constexpr int32_t max = SignedNormalized::max(); + dst.value = (clamp_i((src.value * scalar + delta), 0, max)); +} + +template +void convert_component(F32 &dst, const SignedNormalized &src) +{ + static constexpr int32_t scalar = SignedNormalized::scalar(); + static constexpr int32_t delta = SignedNormalized::delta(); + dst.value = float(int32_t(src.value) - delta) / scalar; +} + +template +void convert_component(UnsignedNormalized &dst, const F32 &src) +{ + static constexpr int32_t scalar = UnsignedNormalized::scalar(); + static constexpr int32_t max = scalar; + dst.value = (clamp_i((src.value * scalar), 0, max)); +} + +template +void convert_component(F32 &dst, const UnsignedNormalized &src) +{ + static constexpr int32_t scalar = UnsignedNormalized::scalar(); + dst.value = float(src.value) / scalar; +} + +/* Copy the contents of src to dst with out performing any actual conversion.*/ template -void copy_unchecked(MutableSpan dst, Span src) +void convert_component(DestinationType &dst, const SourceType &src) +{ + static_assert(std::is_same() || std::is_same() || + std::is_same() || std::is_same() || + std::is_same() || std::is_same()); + static_assert(std::is_same() || std::is_same() || + std::is_same() || std::is_same() || + std::is_same() || std::is_same()); + static_assert(!std::is_same()); + dst.value = src.value; +} + +/* \} */ + +template +void convert_per_component(MutableSpan dst, Span src) { BLI_assert(src.size() == dst.size()); for (int64_t index : IndexRange(src.size())) { - dst[index] = src[index]; + convert_component(dst[index], src[index]); } } template -void copy_unchecked(void *dst_memory, - const void *src_memory, - eGPUTextureFormat device_format, - size_t buffer_size) +void convert_per_component(void *dst_memory, + const void *src_memory, + size_t buffer_size, + eGPUTextureFormat device_format) { size_t total_components = to_component_len(device_format) * buffer_size; Span src = Span(static_cast(src_memory), total_components); MutableSpan dst = MutableSpan( static_cast(dst_memory), total_components); - copy_unchecked(dst, src); -} - -/* Float <=> unsigned normalized */ -template constexpr int32_t unorm_scalar() -{ - return ((1 << (sizeof(Type) * 8)) - 1); -} -template constexpr int32_t snorm_scalar() -{ - return (1 << (sizeof(Type) * 8 - 1)); -} -template constexpr int32_t snorm_max() -{ - return ((1 << (sizeof(Type) * 8)) - 1); -} -template constexpr int32_t snorm_delta() -{ - return (1 << (sizeof(Type) * 8 - 1)) - 1; -} - -template -static DestinationType to_unorm(SourceType value) -{ - static constexpr int32_t Multiplier = unorm_scalar(); - static constexpr int32_t Max = Multiplier; - - int32_t before_clamping = value * Multiplier; - return clamp_i(before_clamping, 0, Max); -} - -template -static DestinationType from_unorm(SourceType value) -{ - static constexpr int32_t Multiplier = unorm_scalar(); - return DestinationType(value) / Multiplier; -} - -template -void float_to_unorm(MutableSpan dst, Span src) -{ - BLI_assert(src.size() == dst.size()); - for (int64_t index : IndexRange(src.size())) { - dst[index] = to_unorm(src[index]); - } -} - -template -void float_to_unorm(void *dst_memory, - const void *src_memory, - eGPUTextureFormat device_format, - size_t buffer_size) -{ - size_t total_components = to_component_len(device_format) * buffer_size; - Span src = Span(static_cast(src_memory), - total_components); - MutableSpan dst = MutableSpan( - static_cast(dst_memory), total_components); - float_to_unorm(dst, src); -} - -template -void unorm_to_float(MutableSpan dst, Span src) -{ - BLI_assert(src.size() == dst.size()); - for (int64_t index : IndexRange(src.size())) { - dst[index] = from_unorm(src[index]); - } -} - -template -void unorm_to_float(void *dst_memory, - const void *src_memory, - eGPUTextureFormat device_format, - size_t buffer_size) -{ - size_t total_components = to_component_len(device_format) * buffer_size; - Span src = Span(static_cast(src_memory), - total_components); - MutableSpan dst = MutableSpan( - static_cast(dst_memory), total_components); - unorm_to_float(dst, src); -} - -/* Float <=> signed normalized */ - -/* TODO: SNORM needs to be shifted...*/ -template -static DestinationType to_snorm(SourceType value) -{ - static constexpr int32_t Multiplier = snorm_scalar(); - static constexpr int32_t Max = snorm_max(); - static constexpr int32_t Delta = snorm_delta(); - return (clamp_i((value * Multiplier + Delta), 0, Max)); -} - -template -static DestinationType from_snorm(SourceType value) -{ - static constexpr int32_t Multiplier = snorm_scalar(); - static constexpr int32_t Delta = snorm_delta(); - return DestinationType(int32_t(value) - Delta) / Multiplier; -} - -template -void float_to_snorm(MutableSpan dst, Span src) -{ - BLI_assert(src.size() == dst.size()); - for (int64_t index : IndexRange(src.size())) { - const SourceType src_value = src[index]; - const DestinationType dst_value = to_snorm(src_value); - dst[index] = dst_value; - } -} - -template -void float_to_snorm(void *dst_memory, - const void *src_memory, - eGPUTextureFormat device_format, - size_t buffer_size) -{ - size_t total_components = to_component_len(device_format) * buffer_size; - Span src = Span(static_cast(src_memory), - total_components); - MutableSpan dst = MutableSpan( - static_cast(dst_memory), total_components); - float_to_snorm(dst, src); -} - -template -void snorm_to_float(MutableSpan dst, Span src) -{ - BLI_assert(src.size() == dst.size()); - for (int64_t index : IndexRange(src.size())) { - const SourceType src_value = src[index]; - const DestinationType dst_value = from_snorm(src_value); - dst[index] = dst_value; - } -} - -template -void snorm_to_float(void *dst_memory, - const void *src_memory, - eGPUTextureFormat device_format, - size_t buffer_size) -{ - size_t total_components = to_component_len(device_format) * buffer_size; - Span src = Span(static_cast(src_memory), - total_components); - MutableSpan dst = MutableSpan( - static_cast(dst_memory), total_components); - snorm_to_float(dst, src); + convert_per_component(dst, src); } static void convert_buffer(void *dst_memory, @@ -694,63 +642,71 @@ static void convert_buffer(void *dst_memory, return; case ConversionType::UI32_TO_UI16: - copy_unchecked(dst_memory, src_memory, device_format, buffer_size); + convert_per_component(dst_memory, src_memory, buffer_size, device_format); break; case ConversionType::UI16_TO_UI32: - copy_unchecked(dst_memory, src_memory, device_format, buffer_size); + convert_per_component(dst_memory, src_memory, buffer_size, device_format); break; case ConversionType::UI32_TO_UI8: - copy_unchecked(dst_memory, src_memory, device_format, buffer_size); + convert_per_component(dst_memory, src_memory, buffer_size, device_format); break; case ConversionType::UI8_TO_UI32: - copy_unchecked(dst_memory, src_memory, device_format, buffer_size); + convert_per_component(dst_memory, src_memory, buffer_size, device_format); break; case ConversionType::I32_TO_I16: - copy_unchecked(dst_memory, src_memory, device_format, buffer_size); + convert_per_component(dst_memory, src_memory, buffer_size, device_format); break; case ConversionType::I16_TO_I32: - copy_unchecked(dst_memory, src_memory, device_format, buffer_size); + convert_per_component(dst_memory, src_memory, buffer_size, device_format); break; case ConversionType::I32_TO_I8: - copy_unchecked(dst_memory, src_memory, device_format, buffer_size); + convert_per_component(dst_memory, src_memory, buffer_size, device_format); break; case ConversionType::I8_TO_I32: - copy_unchecked(dst_memory, src_memory, device_format, buffer_size); - break; - - case ConversionType::FLOAT_TO_UNORM8: - float_to_unorm(dst_memory, src_memory, device_format, buffer_size); - break; - case ConversionType::UNORM8_TO_FLOAT: - unorm_to_float(dst_memory, src_memory, device_format, buffer_size); + convert_per_component(dst_memory, src_memory, buffer_size, device_format); break; case ConversionType::FLOAT_TO_SNORM8: - float_to_snorm(dst_memory, src_memory, device_format, buffer_size); + convert_per_component, F32>( + dst_memory, src_memory, buffer_size, device_format); break; case ConversionType::SNORM8_TO_FLOAT: - snorm_to_float(dst_memory, src_memory, device_format, buffer_size); - break; - - case ConversionType::FLOAT_TO_UNORM16: - float_to_unorm(dst_memory, src_memory, device_format, buffer_size); - break; - case ConversionType::UNORM16_TO_FLOAT: - unorm_to_float(dst_memory, src_memory, device_format, buffer_size); + convert_per_component>( + dst_memory, src_memory, buffer_size, device_format); break; case ConversionType::FLOAT_TO_SNORM16: - float_to_snorm(dst_memory, src_memory, device_format, buffer_size); + convert_per_component, F32>( + dst_memory, src_memory, buffer_size, device_format); break; case ConversionType::SNORM16_TO_FLOAT: - snorm_to_float(dst_memory, src_memory, device_format, buffer_size); + convert_per_component>( + dst_memory, src_memory, buffer_size, device_format); + break; + + case ConversionType::FLOAT_TO_UNORM8: + convert_per_component, F32>( + dst_memory, src_memory, buffer_size, device_format); + break; + case ConversionType::UNORM8_TO_FLOAT: + convert_per_component>( + dst_memory, src_memory, buffer_size, device_format); + break; + + case ConversionType::FLOAT_TO_UNORM16: + convert_per_component, F32>( + dst_memory, src_memory, buffer_size, device_format); + break; + case ConversionType::UNORM16_TO_FLOAT: + convert_per_component>( + dst_memory, src_memory, buffer_size, device_format); break; case ConversionType::FLOAT_TO_HALF: -- 2.30.2 From e75a754065eaf265aa835b31dbd5e17deb0f51ce Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Fri, 17 Mar 2023 11:20:56 +0100 Subject: [PATCH 09/33] Enabled most tests. --- source/blender/gpu/tests/texture_test.cc | 336 ++++++++++-------- .../blender/gpu/vulkan/vk_data_conversion.cc | 28 +- 2 files changed, 201 insertions(+), 163 deletions(-) diff --git a/source/blender/gpu/tests/texture_test.cc b/source/blender/gpu/tests/texture_test.cc index 8eb11182c97..368488df106 100644 --- a/source/blender/gpu/tests/texture_test.cc +++ b/source/blender/gpu/tests/texture_test.cc @@ -13,7 +13,8 @@ /* Not all texture types are supported by all platforms. This define safe guards them until we have * a working workaround or decided to remove support for those texture types. */ #define RUN_UNSUPPORTED false -/* Skip tests that haven't been developed yet due to non standard data types. */ +/* Skip tests that haven't been developed yet due to non standard data types or it needs an + * framebuffer to create the texture.. */ #define RUN_16F_UNIMPLEMENTED false #define RUN_SRGB_UNIMPLEMENTED false #define RUN_NON_STANDARD_UNIMPLEMENTED false @@ -82,7 +83,7 @@ static void texture_create_upload_read() eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; GPUTexture *texture = GPU_texture_create_2d( "texture", Size, Size, 1, DeviceFormat, usage, nullptr); - EXPECT_NE(texture, nullptr); + ASSERT_NE(texture, nullptr); size_t data_len = Size * Size * to_component_len(DeviceFormat); DataType *data = static_cast(generate_test_data(data_len)); @@ -91,8 +92,14 @@ static void texture_create_upload_read() DataType *read_data = static_cast(GPU_texture_read(texture, HostFormat, 0)); bool failed = false; for (int i : IndexRange(data_len)) { - bool ok = abs(read_data[i] - data[i]) < 0.01; - failed |= !ok; + if constexpr (std::is_same_v) { + bool ok = abs(read_data[i] - data[i]) < 0.01; + failed |= !ok; + } + else { + bool ok = (read_data[i] - data[i]) == 0; + failed |= !ok; + } // EXPECT_EQ(read_data[i], data[i]); } EXPECT_FALSE(failed); @@ -402,7 +409,7 @@ GPU_TEST(texture_roundtrip__GPU_DATA_HALF_FLOAT__GPU_RGB16F); /* -------------------------------------------------------------------- */ /** \name Roundtrip testing GPU_DATA_INT * \{ */ -#if 0 + static void test_texture_roundtrip__GPU_DATA_INT__GPU_RGBA8I() { texture_create_upload_read(); @@ -437,7 +444,9 @@ static void test_texture_roundtrip__GPU_DATA_INT__GPU_RG32I() { texture_create_upload_read(); } -GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_RG32I);test_texture_roundtrip__GPU_DATA_UBYTE__GPU_SRGB8_A8 +GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_RG32I); + +static void test_texture_roundtrip__GPU_DATA_INT__GPU_R8I() { texture_create_upload_read(); } @@ -455,6 +464,7 @@ static void test_texture_roundtrip__GPU_DATA_INT__GPU_R32I() } GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_R32I); +#if RUN_UNSUPPORTED static void test_texture_roundtrip__GPU_DATA_INT__GPU_RGB8I() { texture_create_upload_read(); @@ -473,12 +483,13 @@ static void test_texture_roundtrip__GPU_DATA_INT__GPU_RGB32I() } GPU_TEST(texture_roundtrip__GPU_DATA_INT__GPU_RGB32I); #endif + /* \} */ /* -------------------------------------------------------------------- */ /** \name Roundtrip testing GPU_DATA_UINT * \{ */ -#if 0 + static void test_texture_roundtrip__GPU_DATA_UINT__GPU_RGBA8UI() { texture_create_upload_read(); @@ -533,6 +544,7 @@ static void test_texture_roundtrip__GPU_DATA_UINT__GPU_R32UI() } GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_R32UI); +#if RUN_UNSUPPORTED static void test_texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH32F_STENCIL8() { texture_create_upload_read(); @@ -544,7 +556,9 @@ static void test_texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH24_STENCIL8() texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH24_STENCIL8); +#endif +#if RUN_UNSUPPORTED static void test_texture_roundtrip__GPU_DATA_UINT__GPU_RGB8UI() { texture_create_upload_read(); @@ -562,7 +576,9 @@ static void test_texture_roundtrip__GPU_DATA_UINT__GPU_RGB32UI() texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_RGB32UI); +#endif +#if RUN_COMPONENT_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH_COMPONENT32F() { texture_create_upload_read(); @@ -581,12 +597,13 @@ static void test_texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH_COMPONENT16() } GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH_COMPONENT16); #endif + /* \} */ /* -------------------------------------------------------------------- */ /** \name Roundtrip testing GPU_DATA_UBYTE * \{ */ -#if 0 + static void test_texture_roundtrip__GPU_DATA_UBYTE__GPU_RGBA8UI() { texture_create_upload_read(); @@ -623,12 +640,15 @@ static void test_texture_roundtrip__GPU_DATA_UBYTE__GPU_R8() } GPU_TEST(texture_roundtrip__GPU_DATA_UBYTE__GPU_R8); +#if RUN_SRGB_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_UBYTE__GPU_SRGB8_A8() { texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_UBYTE__GPU_SRGB8_A8); +#endif +#if RUN_UNSUPPORTED static void test_texture_roundtrip__GPU_DATA_UBYTE__GPU_RGB8I() { texture_create_upload_read(); @@ -652,7 +672,8 @@ GPU_TEST(texture_roundtrip__GPU_DATA_UBYTE__GPU_SRGB8); /* -------------------------------------------------------------------- */ /** \name Roundtrip testing GPU_DATA_UINT_24_8 * \{ */ -#if 0 + +#if RUN_UNSUPPORTED static void test_texture_roundtrip__GPU_DATA_UINT_24_8__GPU_DEPTH32F_STENCIL8() { texture_create_upload_read(); @@ -665,35 +686,36 @@ static void test_texture_roundtrip__GPU_DATA_UINT_24_8__GPU_DEPTH24_STENCIL8() } GPU_TEST(texture_roundtrip__GPU_DATA_UINT_24_8__GPU_DEPTH24_STENCIL8); #endif + /* \} */ /* -------------------------------------------------------------------- */ /** \name Roundtrip testing GPU_DATA_10_11_11_REV * \{ */ -#if 0 + static void test_texture_roundtrip__GPU_DATA_10_11_11_REV__GPU_R11F_G11F_B10F() { - texture_create_upload_read(); + texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_10_11_11_REV__GPU_R11F_G11F_B10F); -#endif + /* \} */ /* -------------------------------------------------------------------- */ /** \name Roundtrip testing GPU_DATA_2_10_10_10_REV * \{ */ -#if 0 + static void test_texture_roundtrip__GPU_DATA_2_10_10_10_REV__GPU_RGB10_A2() { - texture_create_upload_read(); + texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_2_10_10_10_REV__GPU_RGB10_A2); static void test_texture_roundtrip__GPU_DATA_2_10_10_10_REV__GPU_RGB10_A2UI() { - texture_create_upload_read(); + texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_2_10_10_10_REV__GPU_RGB10_A2UI); -#endif + /* \} */ /* -------------------------------------------------------------------- */ @@ -703,160 +725,158 @@ GPU_TEST(texture_roundtrip__GPU_DATA_2_10_10_10_REV__GPU_RGB10_A2UI); * * \{ */ #if 0 - static std::string to_prim_type_string(eGPUDataFormat host_format) - { - switch (host_format) { - case GPU_DATA_FLOAT: - return std::string("float"); +static std::string to_prim_type_string(eGPUDataFormat host_format) +{ + switch (host_format) { + case GPU_DATA_FLOAT: + return std::string("float"); - case GPU_DATA_HALF_FLOAT: - return std::string("uint16_t"); - case GPU_DATA_INT: - return std::string("int32_t"); - case GPU_DATA_UINT: - return std::string("uint32_t"); - case GPU_DATA_UBYTE: - return std::string("uint8_t"); - case GPU_DATA_UINT_24_8: - case GPU_DATA_10_11_11_REV: - case GPU_DATA_2_10_10_10_REV: - return std::string("void"); - } - return std::string("UNKNOWN"); + case GPU_DATA_HALF_FLOAT: + return std::string("uint16_t"); + case GPU_DATA_INT: + return std::string("int32_t"); + case GPU_DATA_UBYTE: + return std::string("uint8_t"); + case GPU_DATA_UINT: + case GPU_DATA_UINT_24_8: + case GPU_DATA_10_11_11_REV: + case GPU_DATA_2_10_10_10_REV: + return std::string("uint32_t"); } - static std::string to_string(eGPUDataFormat host_format) - { - switch (host_format) { - case GPU_DATA_FLOAT: - return std::string("GPU_DATA_FLOAT"); + return std::string("UNKNOWN"); +} +static std::string to_string(eGPUDataFormat host_format) +{ + switch (host_format) { + case GPU_DATA_FLOAT: + return std::string("GPU_DATA_FLOAT"); - case GPU_DATA_HALF_FLOAT: - return std::string("GPU_DATA_HALF_FLOAT"); - case GPU_DATA_INT: - return std::string("GPU_DATA_INT"); - case GPU_DATA_UINT: - return std::string("GPU_DATA_UINT"); - case GPU_DATA_UBYTE: - return std::string("GPU_DATA_UBYTE"); - case GPU_DATA_UINT_24_8: - return std::string("GPU_DATA_UINT_24_8"); - case GPU_DATA_10_11_11_REV: - return std::string("GPU_DATA_10_11_11_REV"); - case GPU_DATA_2_10_10_10_REV: - return std::string("GPU_DATA_2_10_10_10_REV"); - } - return std::string("UNKNOWN"); + case GPU_DATA_HALF_FLOAT: + return std::string("GPU_DATA_HALF_FLOAT"); + case GPU_DATA_INT: + return std::string("GPU_DATA_INT"); + case GPU_DATA_UINT: + return std::string("GPU_DATA_UINT"); + case GPU_DATA_UBYTE: + return std::string("GPU_DATA_UBYTE"); + case GPU_DATA_UINT_24_8: + return std::string("GPU_DATA_UINT_24_8"); + case GPU_DATA_10_11_11_REV: + return std::string("GPU_DATA_10_11_11_REV"); + case GPU_DATA_2_10_10_10_REV: + return std::string("GPU_DATA_2_10_10_10_REV"); } + return std::string("UNKNOWN"); +} - static std::string to_string(eGPUTextureFormat texture_format) - { - return std::string("GPU_") + std::string(GPU_texture_format_name(texture_format)); - } +static std::string to_string(eGPUTextureFormat texture_format) +{ + return std::string("GPU_") + std::string(GPU_texture_format_name(texture_format)); +} - TEST(gpu_util, generate_test_cases) - { - Vector host_formats; - host_formats.append(GPU_DATA_FLOAT); - host_formats.append(GPU_DATA_HALF_FLOAT); - host_formats.append(GPU_DATA_INT); - host_formats.append(GPU_DATA_UINT); - host_formats.append(GPU_DATA_UBYTE); - host_formats.append(GPU_DATA_UINT_24_8); - host_formats.append(GPU_DATA_10_11_11_REV); - host_formats.append(GPU_DATA_2_10_10_10_REV); +TEST(gpu_util, generate_test_cases) +{ + Vector host_formats; + host_formats.append(GPU_DATA_FLOAT); + host_formats.append(GPU_DATA_HALF_FLOAT); + host_formats.append(GPU_DATA_INT); + host_formats.append(GPU_DATA_UINT); + host_formats.append(GPU_DATA_UBYTE); + host_formats.append(GPU_DATA_UINT_24_8); + host_formats.append(GPU_DATA_10_11_11_REV); + host_formats.append(GPU_DATA_2_10_10_10_REV); - Vector texture_formats; - texture_formats.append(GPU_RGBA8UI); - texture_formats.append(GPU_RGBA8I); - texture_formats.append(GPU_RGBA8); - texture_formats.append(GPU_RGBA16UI); - texture_formats.append(GPU_RGBA16I); - texture_formats.append(GPU_RGBA16F); - texture_formats.append(GPU_RGBA16); - texture_formats.append(GPU_RGBA32UI); - texture_formats.append(GPU_RGBA32I); - texture_formats.append(GPU_RGBA32F); - texture_formats.append(GPU_RG8UI); - texture_formats.append(GPU_RG8I); - texture_formats.append(GPU_RG8); - texture_formats.append(GPU_RG16UI); - texture_formats.append(GPU_RG16I); - texture_formats.append(GPU_RG16F); - texture_formats.append(GPU_RG16); - texture_formats.append(GPU_RG32UI); - texture_formats.append(GPU_RG32I); - texture_formats.append(GPU_RG32F); - texture_formats.append(GPU_R8UI); - texture_formats.append(GPU_R8I); - texture_formats.append(GPU_R8); - texture_formats.append(GPU_R16UI); - texture_formats.append(GPU_R16I); - texture_formats.append(GPU_R16F); - texture_formats.append(GPU_R16); - texture_formats.append(GPU_R32UI); - texture_formats.append(GPU_R32I); - texture_formats.append(GPU_R32F); - texture_formats.append(GPU_RGB10_A2); - texture_formats.append(GPU_RGB10_A2UI); - texture_formats.append(GPU_R11F_G11F_B10F); - texture_formats.append(GPU_DEPTH32F_STENCIL8); - texture_formats.append(GPU_DEPTH24_STENCIL8); - texture_formats.append(GPU_SRGB8_A8); - texture_formats.append(GPU_RGBA8_SNORM); - texture_formats.append(GPU_RGBA16_SNORM); - texture_formats.append(GPU_RGB8UI); - texture_formats.append(GPU_RGB8I); - texture_formats.append(GPU_RGB8); - texture_formats.append(GPU_RGB8_SNORM); - texture_formats.append(GPU_RGB16UI); - texture_formats.append(GPU_RGB16I); - texture_formats.append(GPU_RGB16F); - texture_formats.append(GPU_RGB16); - texture_formats.append(GPU_RGB16_SNORM); - texture_formats.append(GPU_RGB32UI); - texture_formats.append(GPU_RGB32I); - texture_formats.append(GPU_RGB32F); - texture_formats.append(GPU_RG8_SNORM); - texture_formats.append(GPU_RG16_SNORM); - texture_formats.append(GPU_R8_SNORM); - texture_formats.append(GPU_R16_SNORM); - texture_formats.append(GPU_SRGB8_A8_DXT1); - texture_formats.append(GPU_SRGB8_A8_DXT3); - texture_formats.append(GPU_SRGB8_A8_DXT5); - texture_formats.append(GPU_RGBA8_DXT1); - texture_formats.append(GPU_RGBA8_DXT3); - texture_formats.append(GPU_RGBA8_DXT5); - texture_formats.append(GPU_SRGB8); - texture_formats.append(GPU_RGB9_E5); - texture_formats.append(GPU_DEPTH_COMPONENT32F); - texture_formats.append(GPU_DEPTH_COMPONENT24); - texture_formats.append(GPU_DEPTH_COMPONENT16); + Vector texture_formats; + texture_formats.append(GPU_RGBA8UI); + texture_formats.append(GPU_RGBA8I); + texture_formats.append(GPU_RGBA8); + texture_formats.append(GPU_RGBA16UI); + texture_formats.append(GPU_RGBA16I); + texture_formats.append(GPU_RGBA16F); + texture_formats.append(GPU_RGBA16); + texture_formats.append(GPU_RGBA32UI); + texture_formats.append(GPU_RGBA32I); + texture_formats.append(GPU_RGBA32F); + texture_formats.append(GPU_RG8UI); + texture_formats.append(GPU_RG8I); + texture_formats.append(GPU_RG8); + texture_formats.append(GPU_RG16UI); + texture_formats.append(GPU_RG16I); + texture_formats.append(GPU_RG16F); + texture_formats.append(GPU_RG16); + texture_formats.append(GPU_RG32UI); + texture_formats.append(GPU_RG32I); + texture_formats.append(GPU_RG32F); + texture_formats.append(GPU_R8UI); + texture_formats.append(GPU_R8I); + texture_formats.append(GPU_R8); + texture_formats.append(GPU_R16UI); + texture_formats.append(GPU_R16I); + texture_formats.append(GPU_R16F); + texture_formats.append(GPU_R16); + texture_formats.append(GPU_R32UI); + texture_formats.append(GPU_R32I); + texture_formats.append(GPU_R32F); + texture_formats.append(GPU_RGB10_A2); + texture_formats.append(GPU_RGB10_A2UI); + texture_formats.append(GPU_R11F_G11F_B10F); + texture_formats.append(GPU_DEPTH32F_STENCIL8); + texture_formats.append(GPU_DEPTH24_STENCIL8); + texture_formats.append(GPU_SRGB8_A8); + texture_formats.append(GPU_RGBA8_SNORM); + texture_formats.append(GPU_RGBA16_SNORM); + texture_formats.append(GPU_RGB8UI); + texture_formats.append(GPU_RGB8I); + texture_formats.append(GPU_RGB8); + texture_formats.append(GPU_RGB8_SNORM); + texture_formats.append(GPU_RGB16UI); + texture_formats.append(GPU_RGB16I); + texture_formats.append(GPU_RGB16F); + texture_formats.append(GPU_RGB16); + texture_formats.append(GPU_RGB16_SNORM); + texture_formats.append(GPU_RGB32UI); + texture_formats.append(GPU_RGB32I); + texture_formats.append(GPU_RGB32F); + texture_formats.append(GPU_RG8_SNORM); + texture_formats.append(GPU_RG16_SNORM); + texture_formats.append(GPU_R8_SNORM); + texture_formats.append(GPU_R16_SNORM); + texture_formats.append(GPU_SRGB8_A8_DXT1); + texture_formats.append(GPU_SRGB8_A8_DXT3); + texture_formats.append(GPU_SRGB8_A8_DXT5); + texture_formats.append(GPU_RGBA8_DXT1); + texture_formats.append(GPU_RGBA8_DXT3); + texture_formats.append(GPU_RGBA8_DXT5); + texture_formats.append(GPU_SRGB8); + texture_formats.append(GPU_RGB9_E5); + texture_formats.append(GPU_DEPTH_COMPONENT32F); + texture_formats.append(GPU_DEPTH_COMPONENT24); + texture_formats.append(GPU_DEPTH_COMPONENT16); - for (eGPUDataFormat host_format : host_formats) { - std::cout << "/* -------------------------------------------------------------------- */\n"; - std::cout << "/** \\name Roundtrip testing " << to_string(host_format) << "\n"; - std::cout << " * \\{ */\n\n"; + for (eGPUDataFormat host_format : host_formats) { + std::cout << "/* -------------------------------------------------------------------- */\n"; + std::cout << "/** \\name Roundtrip testing " << to_string(host_format) << "\n"; + std::cout << " * \\{ */\n\n"; - for (eGPUTextureFormat texture_format : texture_formats) { - if (!validate_data_format(texture_format, host_format)) { - continue; - } - - std::cout << "static void test_texture_roundtrip__" << to_string(host_format) << "__" - << to_string(texture_format) << "()\n"; - std::cout << "{\n"; - - std::cout << " texture_create_upload_read<" << to_string(texture_format) << ", " - << to_string(host_format) << ", " << to_prim_type_string(host_format) - << ">();\n"; - - std::cout << "}\n"; - std::cout << "GPU_TEST(texture_roundtrip__" << to_string(host_format) << "__" - << to_string(texture_format) << ");\n\n"; + for (eGPUTextureFormat texture_format : texture_formats) { + if (!validate_data_format(texture_format, host_format)) { + continue; } - std::cout << "/* \\} */\n\n"; + + std::cout << "static void test_texture_roundtrip__" << to_string(host_format) << "__" + << to_string(texture_format) << "()\n"; + std::cout << "{\n"; + + std::cout << " texture_create_upload_read<" << to_string(texture_format) << ", " + << to_string(host_format) << ", " << to_prim_type_string(host_format) << ">();\n"; + + std::cout << "}\n"; + std::cout << "GPU_TEST(texture_roundtrip__" << to_string(host_format) << "__" + << to_string(texture_format) << ");\n\n"; } + std::cout << "/* \\} */\n\n"; } +} #endif /** \} */ diff --git a/source/blender/gpu/vulkan/vk_data_conversion.cc b/source/blender/gpu/vulkan/vk_data_conversion.cc index de4b02cc55a..4e908d9e9ac 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.cc +++ b/source/blender/gpu/vulkan/vk_data_conversion.cc @@ -373,12 +373,14 @@ static ConversionType type_of_conversion_ubyte(eGPUTextureFormat device_format) { switch (device_format) { case GPU_RGBA8UI: + case GPU_RGBA8: case GPU_RG8UI: + case GPU_RG8: case GPU_R8UI: + case GPU_R8: return ConversionType::PASS_THROUGH; case GPU_RGBA8I: - case GPU_RGBA8: case GPU_RGBA16UI: case GPU_RGBA16I: case GPU_RGBA16F: @@ -387,7 +389,6 @@ static ConversionType type_of_conversion_ubyte(eGPUTextureFormat device_format) case GPU_RGBA32I: case GPU_RGBA32F: case GPU_RG8I: - case GPU_RG8: case GPU_RG16UI: case GPU_RG16I: case GPU_RG16F: @@ -396,7 +397,6 @@ static ConversionType type_of_conversion_ubyte(eGPUTextureFormat device_format) case GPU_RG32I: case GPU_RG32F: case GPU_R8I: - case GPU_R8: case GPU_R16UI: case GPU_R16I: case GPU_R16F: @@ -444,6 +444,22 @@ static ConversionType type_of_conversion_ubyte(eGPUTextureFormat device_format) return ConversionType::UNSUPPORTED; } +static ConversionType type_of_conversion_r11g11b10(eGPUTextureFormat device_format) +{ + if (device_format == GPU_R11F_G11F_B10F) { + return ConversionType::PASS_THROUGH; + } + return ConversionType::UNSUPPORTED; +} + +static ConversionType type_of_conversion_r10g10b10a2(eGPUTextureFormat device_format) +{ + if (ELEM(device_format, GPU_RGB10_A2, GPU_RGB10_A2UI)) { + return ConversionType::PASS_THROUGH; + } + return ConversionType::UNSUPPORTED; +} + static ConversionType host_to_device(eGPUDataFormat host_format, eGPUTextureFormat device_format) { BLI_assert(validate_data_format(device_format, host_format)); @@ -459,10 +475,12 @@ static ConversionType host_to_device(eGPUDataFormat host_format, eGPUTextureForm return type_of_conversion_half(device_format); case GPU_DATA_UBYTE: return type_of_conversion_ubyte(device_format); + case GPU_DATA_10_11_11_REV: + return type_of_conversion_r11g11b10(device_format); + case GPU_DATA_2_10_10_10_REV: + return type_of_conversion_r10g10b10a2(device_format); case GPU_DATA_UINT_24_8: - case GPU_DATA_10_11_11_REV: - case GPU_DATA_2_10_10_10_REV: return ConversionType::UNSUPPORTED; } -- 2.30.2 From df0e20cae95f039ae6a5b871a8ddbac3d0bb0bd6 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Fri, 17 Mar 2023 12:34:49 +0100 Subject: [PATCH 10/33] Added support for F16. --- source/blender/gpu/tests/texture_test.cc | 175 +++++++++++------- .../blender/gpu/vulkan/vk_data_conversion.cc | 100 +++++++++- 2 files changed, 203 insertions(+), 72 deletions(-) diff --git a/source/blender/gpu/tests/texture_test.cc b/source/blender/gpu/tests/texture_test.cc index 368488df106..978b05cc729 100644 --- a/source/blender/gpu/tests/texture_test.cc +++ b/source/blender/gpu/tests/texture_test.cc @@ -13,9 +13,9 @@ /* Not all texture types are supported by all platforms. This define safe guards them until we have * a working workaround or decided to remove support for those texture types. */ #define RUN_UNSUPPORTED false + /* Skip tests that haven't been developed yet due to non standard data types or it needs an - * framebuffer to create the texture.. */ -#define RUN_16F_UNIMPLEMENTED false + * framebuffer to create the texture. */ #define RUN_SRGB_UNIMPLEMENTED false #define RUN_NON_STANDARD_UNIMPLEMENTED false #define RUN_COMPONENT_UNIMPLEMENTED false @@ -79,6 +79,7 @@ template static void texture_create_upload_read() { + static_assert(!std::is_same()); static_assert(validate_data_format(DeviceFormat, HostFormat)); eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; GPUTexture *texture = GPU_texture_create_2d( @@ -92,15 +93,65 @@ static void texture_create_upload_read() DataType *read_data = static_cast(GPU_texture_read(texture, HostFormat, 0)); bool failed = false; for (int i : IndexRange(data_len)) { - if constexpr (std::is_same_v) { - bool ok = abs(read_data[i] - data[i]) < 0.01; - failed |= !ok; - } - else { - bool ok = (read_data[i] - data[i]) == 0; - failed |= !ok; - } - // EXPECT_EQ(read_data[i], data[i]); + bool ok = (read_data[i] - data[i]) == 0; + failed |= !ok; + } + EXPECT_FALSE(failed); + + MEM_freeN(read_data); + MEM_freeN(data); + + GPU_texture_free(texture); +} + +template +static void texture_create_upload_read_with_bias(float max_allowed_bias) +{ + static_assert(validate_data_format(DeviceFormat, HostFormat)); + eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; + GPUTexture *texture = GPU_texture_create_2d( + "texture", Size, Size, 1, DeviceFormat, usage, nullptr); + ASSERT_NE(texture, nullptr); + + size_t data_len = Size * Size * to_component_len(DeviceFormat); + float *data = static_cast(generate_test_data(data_len)); + GPU_texture_update(texture, HostFormat, data); + + float *read_data = static_cast(GPU_texture_read(texture, HostFormat, 0)); + float max_used_bias = 0.0f; + for (int i : IndexRange(data_len)) { + float bias = abs(read_data[i] - data[i]); + max_used_bias = max_ff(max_used_bias, bias); + } + EXPECT_LE(max_used_bias, max_allowed_bias); + + MEM_freeN(read_data); + MEM_freeN(data); + + GPU_texture_free(texture); +} + +/* Derivative of texture_create_upload_read_pixels that doesn't test each component, but a pixel at + * a time. This is needed to check the R11G11B10 and similar types. */ +template +static void texture_create_upload_read_pixel() +{ + using DataType = uint32_t; + static_assert(validate_data_format(DeviceFormat, HostFormat)); + eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; + GPUTexture *texture = GPU_texture_create_2d( + "texture", Size, Size, 1, DeviceFormat, usage, nullptr); + ASSERT_NE(texture, nullptr); + + size_t data_len = Size * Size; + DataType *data = static_cast(generate_test_data(data_len)); + GPU_texture_update(texture, HostFormat, data); + + DataType *read_data = static_cast(GPU_texture_read(texture, HostFormat, 0)); + bool failed = false; + for (int i : IndexRange(data_len)) { + bool ok = (read_data[i] - data[i]) == 0; + failed |= !ok; } EXPECT_FALSE(failed); @@ -115,98 +166,92 @@ static void texture_create_upload_read() * \{ */ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.004f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8); -#if RUN_16F_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16F() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.9f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16F); -#endif static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.00002f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA32F() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA32F); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG8() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.004f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG8); -#if RUN_16F_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16F() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.9f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16F); -#endif static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.00002f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG32F() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG32F); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R8() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.004f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R8); -#if RUN_16F_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R16F() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.9f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R16F); -#endif static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R16() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.00002f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R16); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R32F() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R32F); #if RUN_NON_STANDARD_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2UI() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB10_A2UI); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R11F_G11F_B10F() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R11F_G11F_B10F); #endif @@ -214,122 +259,118 @@ GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R11F_G11F_B10F); #if RUN_SRGB_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8); #endif static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_SNORM() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_SNORM); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16_SNORM() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16_SNORM); #if RUN_UNSUPPORTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB8() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB8); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB8_SNORM() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB8_SNORM); -#endif -#if RUN_16F_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16F() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16F); -#endif -#if RUN_UNSUPPORTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16_SNORM() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB16_SNORM); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB32F() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB32F); #endif static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG8_SNORM() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG8_SNORM); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16_SNORM() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16_SNORM); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R8_SNORM() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R8_SNORM); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R16_SNORM() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R16_SNORM); #if RUN_NON_STANDARD_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8_DXT1() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8_DXT1); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8_DXT3() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8_DXT3); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8_DXT5() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8_DXT5); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_DXT1() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_DXT1); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_DXT3() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_DXT3); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_DXT5() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_DXT5); #endif @@ -337,7 +378,7 @@ GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_DXT5); #if RUN_SRGB_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8); #endif @@ -345,7 +386,7 @@ GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8); #if RUN_NON_STANDARD_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB9_E5() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB9_E5); #endif @@ -353,7 +394,7 @@ GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB9_E5); #if RUN_UNSUPPORTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F); #endif @@ -361,13 +402,13 @@ GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F); #if RUN_COMPONENT_UNIMPLEMENTED static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT24() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT24); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT16() { - texture_create_upload_read(); + texture_create_upload_read_with_bias(0.0f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT16); #endif @@ -695,7 +736,7 @@ GPU_TEST(texture_roundtrip__GPU_DATA_UINT_24_8__GPU_DEPTH24_STENCIL8); static void test_texture_roundtrip__GPU_DATA_10_11_11_REV__GPU_R11F_G11F_B10F() { - texture_create_upload_read(); + texture_create_upload_read(); } GPU_TEST(texture_roundtrip__GPU_DATA_10_11_11_REV__GPU_R11F_G11F_B10F); @@ -707,12 +748,13 @@ GPU_TEST(texture_roundtrip__GPU_DATA_10_11_11_REV__GPU_R11F_G11F_B10F); static void test_texture_roundtrip__GPU_DATA_2_10_10_10_REV__GPU_RGB10_A2() { - texture_create_upload_read(); + texture_create_upload_read_pixel(); } GPU_TEST(texture_roundtrip__GPU_DATA_2_10_10_10_REV__GPU_RGB10_A2); + static void test_texture_roundtrip__GPU_DATA_2_10_10_10_REV__GPU_RGB10_A2UI() { - texture_create_upload_read(); + texture_create_upload_read_pixel(); } GPU_TEST(texture_roundtrip__GPU_DATA_2_10_10_10_REV__GPU_RGB10_A2UI); @@ -730,17 +772,15 @@ static std::string to_prim_type_string(eGPUDataFormat host_format) switch (host_format) { case GPU_DATA_FLOAT: return std::string("float"); - case GPU_DATA_HALF_FLOAT: return std::string("uint16_t"); case GPU_DATA_INT: return std::string("int32_t"); case GPU_DATA_UBYTE: + case GPU_DATA_10_11_11_REV: return std::string("uint8_t"); case GPU_DATA_UINT: case GPU_DATA_UINT_24_8: - case GPU_DATA_10_11_11_REV: - case GPU_DATA_2_10_10_10_REV: return std::string("uint32_t"); } return std::string("UNKNOWN"); @@ -763,8 +803,6 @@ static std::string to_string(eGPUDataFormat host_format) return std::string("GPU_DATA_UINT_24_8"); case GPU_DATA_10_11_11_REV: return std::string("GPU_DATA_10_11_11_REV"); - case GPU_DATA_2_10_10_10_REV: - return std::string("GPU_DATA_2_10_10_10_REV"); } return std::string("UNKNOWN"); } @@ -784,7 +822,6 @@ TEST(gpu_util, generate_test_cases) host_formats.append(GPU_DATA_UBYTE); host_formats.append(GPU_DATA_UINT_24_8); host_formats.append(GPU_DATA_10_11_11_REV); - host_formats.append(GPU_DATA_2_10_10_10_REV); Vector texture_formats; texture_formats.append(GPU_RGBA8UI); diff --git a/source/blender/gpu/vulkan/vk_data_conversion.cc b/source/blender/gpu/vulkan/vk_data_conversion.cc index 4e908d9e9ac..89511cb2fbf 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.cc +++ b/source/blender/gpu/vulkan/vk_data_conversion.cc @@ -566,7 +566,6 @@ template struct ComponentValue { InnerType value; }; -using F32 = ComponentValue; using UI8 = ComponentValue; using UI16 = ComponentValue; using UI32 = ComponentValue; @@ -574,6 +573,25 @@ using I8 = ComponentValue; using I16 = ComponentValue; using I32 = ComponentValue; +union F32 { + uint32_t u; + float value; + struct { + uint Mantissa : 23; + uint Exponent : 8; + uint Sign : 1; + }; +}; + +union F16 { + uint16_t u; + struct { + uint Mantissa : 10; + uint Exponent : 5; + uint Sign : 1; + }; +}; + template void convert_component(SignedNormalized &dst, const F32 &src) { @@ -620,6 +638,80 @@ void convert_component(DestinationType &dst, const SourceType &src) dst.value = src.value; } +static F16 float_to_half(const F32 &value) +{ + F16 result; + /* Sign bit, shifted to its position. */ + uint sign_bit = value.u & 0x80000000; + sign_bit >>= 16; + /* Exponent. */ + uint exponent_bits = value.u & 0x7f800000; + /* Non-sign bits. */ + uint value_bits = value.u & 0x7fffffff; + value_bits >>= 13; /* Align mantissa on MSB. */ + value_bits -= 0x1c000; /* Adjust bias. */ + /* Flush-to-zero. */ + value_bits = (exponent_bits < 0x38800000) ? 0 : value_bits; + /* Clamp-to-max. */ + value_bits = (exponent_bits > 0x47000000) ? 0x7bff : value_bits; + /* Denormals-as-zero. */ + value_bits = (exponent_bits == 0 ? 0 : value_bits); + /* Re-insert sign bit and return. */ + result.u = (value_bits | sign_bit); + return result; +} + +static F32 half_to_float(const F16 &h) +{ + F32 o = {0}; + + // From ISPC ref code + if (h.Exponent == 0 && h.Mantissa == 0) // (Signed) zero + o.Sign = h.Sign; + else { + if (h.Exponent == 0) // Denormal (will convert to normalized) + { + // Adjust mantissa so it's normalized (and keep track of exp adjust) + int e = -1; + uint m = h.Mantissa; + do { + e++; + m <<= 1; + } while ((m & 0x400) == 0); + + o.Mantissa = (m & 0x3ff) << 13; + o.Exponent = 127 - 15 - e; + o.Sign = h.Sign; + } + else if (h.Exponent == 0x1f) // Inf/NaN + { + // NOTE: It's safe to treat both with the same code path by just truncating + // lower Mantissa bits in NaNs (this is valid). + o.Mantissa = h.Mantissa << 13; + o.Exponent = 255; + o.Sign = h.Sign; + } + else // Normalized number + { + o.Mantissa = h.Mantissa << 13; + o.Exponent = 127 - 15 + h.Exponent; + o.Sign = h.Sign; + } + } + + return o; +} + +static void convert_component(F16 &dst, const F32 &src) +{ + dst = float_to_half(src); +} + +static void convert_component(F32 &dst, const F16 &src) +{ + dst = half_to_float(src); +} + /* \} */ template @@ -728,9 +820,11 @@ static void convert_buffer(void *dst_memory, break; case ConversionType::FLOAT_TO_HALF: + convert_per_component(dst_memory, src_memory, buffer_size, device_format); + break; case ConversionType::HALF_TO_FLOAT: - BLI_assert_unreachable(); - return; + convert_per_component(dst_memory, src_memory, buffer_size, device_format); + break; } } -- 2.30.2 From 478be6712b2511562b72b73055441dbfcf12ebfb Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Fri, 17 Mar 2023 12:40:24 +0100 Subject: [PATCH 11/33] Cleanup. --- source/blender/gpu/tests/gpu_testing.cc | 2 +- source/blender/gpu/tests/texture_test.cc | 159 +---------------------- 2 files changed, 2 insertions(+), 159 deletions(-) diff --git a/source/blender/gpu/tests/gpu_testing.cc b/source/blender/gpu/tests/gpu_testing.cc index 82c0154c148..fd28a18a24b 100644 --- a/source/blender/gpu/tests/gpu_testing.cc +++ b/source/blender/gpu/tests/gpu_testing.cc @@ -17,7 +17,7 @@ void GPUTest::SetUp() GPU_backend_type_selection_set(gpu_backend_type); GHOST_GLSettings glSettings = {}; glSettings.context_type = draw_context_type; - //glSettings.flags = GHOST_glDebugContext; + glSettings.flags = GHOST_glDebugContext; CLG_init(); ghost_system = GHOST_CreateSystem(); ghost_context = GHOST_CreateOpenGLContext(ghost_system, glSettings); diff --git a/source/blender/gpu/tests/texture_test.cc b/source/blender/gpu/tests/texture_test.cc index 978b05cc729..38677c7f121 100644 --- a/source/blender/gpu/tests/texture_test.cc +++ b/source/blender/gpu/tests/texture_test.cc @@ -760,161 +760,4 @@ GPU_TEST(texture_roundtrip__GPU_DATA_2_10_10_10_REV__GPU_RGB10_A2UI); /* \} */ -/* -------------------------------------------------------------------- */ -/** \name Generate test cases. - * - * Next section is kept for convenience to regenerate test cases. - * - * \{ */ -#if 0 -static std::string to_prim_type_string(eGPUDataFormat host_format) -{ - switch (host_format) { - case GPU_DATA_FLOAT: - return std::string("float"); - case GPU_DATA_HALF_FLOAT: - return std::string("uint16_t"); - case GPU_DATA_INT: - return std::string("int32_t"); - case GPU_DATA_UBYTE: - case GPU_DATA_10_11_11_REV: - return std::string("uint8_t"); - case GPU_DATA_UINT: - case GPU_DATA_UINT_24_8: - return std::string("uint32_t"); - } - return std::string("UNKNOWN"); -} -static std::string to_string(eGPUDataFormat host_format) -{ - switch (host_format) { - case GPU_DATA_FLOAT: - return std::string("GPU_DATA_FLOAT"); - - case GPU_DATA_HALF_FLOAT: - return std::string("GPU_DATA_HALF_FLOAT"); - case GPU_DATA_INT: - return std::string("GPU_DATA_INT"); - case GPU_DATA_UINT: - return std::string("GPU_DATA_UINT"); - case GPU_DATA_UBYTE: - return std::string("GPU_DATA_UBYTE"); - case GPU_DATA_UINT_24_8: - return std::string("GPU_DATA_UINT_24_8"); - case GPU_DATA_10_11_11_REV: - return std::string("GPU_DATA_10_11_11_REV"); - } - return std::string("UNKNOWN"); -} - -static std::string to_string(eGPUTextureFormat texture_format) -{ - return std::string("GPU_") + std::string(GPU_texture_format_name(texture_format)); -} - -TEST(gpu_util, generate_test_cases) -{ - Vector host_formats; - host_formats.append(GPU_DATA_FLOAT); - host_formats.append(GPU_DATA_HALF_FLOAT); - host_formats.append(GPU_DATA_INT); - host_formats.append(GPU_DATA_UINT); - host_formats.append(GPU_DATA_UBYTE); - host_formats.append(GPU_DATA_UINT_24_8); - host_formats.append(GPU_DATA_10_11_11_REV); - - Vector texture_formats; - texture_formats.append(GPU_RGBA8UI); - texture_formats.append(GPU_RGBA8I); - texture_formats.append(GPU_RGBA8); - texture_formats.append(GPU_RGBA16UI); - texture_formats.append(GPU_RGBA16I); - texture_formats.append(GPU_RGBA16F); - texture_formats.append(GPU_RGBA16); - texture_formats.append(GPU_RGBA32UI); - texture_formats.append(GPU_RGBA32I); - texture_formats.append(GPU_RGBA32F); - texture_formats.append(GPU_RG8UI); - texture_formats.append(GPU_RG8I); - texture_formats.append(GPU_RG8); - texture_formats.append(GPU_RG16UI); - texture_formats.append(GPU_RG16I); - texture_formats.append(GPU_RG16F); - texture_formats.append(GPU_RG16); - texture_formats.append(GPU_RG32UI); - texture_formats.append(GPU_RG32I); - texture_formats.append(GPU_RG32F); - texture_formats.append(GPU_R8UI); - texture_formats.append(GPU_R8I); - texture_formats.append(GPU_R8); - texture_formats.append(GPU_R16UI); - texture_formats.append(GPU_R16I); - texture_formats.append(GPU_R16F); - texture_formats.append(GPU_R16); - texture_formats.append(GPU_R32UI); - texture_formats.append(GPU_R32I); - texture_formats.append(GPU_R32F); - texture_formats.append(GPU_RGB10_A2); - texture_formats.append(GPU_RGB10_A2UI); - texture_formats.append(GPU_R11F_G11F_B10F); - texture_formats.append(GPU_DEPTH32F_STENCIL8); - texture_formats.append(GPU_DEPTH24_STENCIL8); - texture_formats.append(GPU_SRGB8_A8); - texture_formats.append(GPU_RGBA8_SNORM); - texture_formats.append(GPU_RGBA16_SNORM); - texture_formats.append(GPU_RGB8UI); - texture_formats.append(GPU_RGB8I); - texture_formats.append(GPU_RGB8); - texture_formats.append(GPU_RGB8_SNORM); - texture_formats.append(GPU_RGB16UI); - texture_formats.append(GPU_RGB16I); - texture_formats.append(GPU_RGB16F); - texture_formats.append(GPU_RGB16); - texture_formats.append(GPU_RGB16_SNORM); - texture_formats.append(GPU_RGB32UI); - texture_formats.append(GPU_RGB32I); - texture_formats.append(GPU_RGB32F); - texture_formats.append(GPU_RG8_SNORM); - texture_formats.append(GPU_RG16_SNORM); - texture_formats.append(GPU_R8_SNORM); - texture_formats.append(GPU_R16_SNORM); - texture_formats.append(GPU_SRGB8_A8_DXT1); - texture_formats.append(GPU_SRGB8_A8_DXT3); - texture_formats.append(GPU_SRGB8_A8_DXT5); - texture_formats.append(GPU_RGBA8_DXT1); - texture_formats.append(GPU_RGBA8_DXT3); - texture_formats.append(GPU_RGBA8_DXT5); - texture_formats.append(GPU_SRGB8); - texture_formats.append(GPU_RGB9_E5); - texture_formats.append(GPU_DEPTH_COMPONENT32F); - texture_formats.append(GPU_DEPTH_COMPONENT24); - texture_formats.append(GPU_DEPTH_COMPONENT16); - - for (eGPUDataFormat host_format : host_formats) { - std::cout << "/* -------------------------------------------------------------------- */\n"; - std::cout << "/** \\name Roundtrip testing " << to_string(host_format) << "\n"; - std::cout << " * \\{ */\n\n"; - - for (eGPUTextureFormat texture_format : texture_formats) { - if (!validate_data_format(texture_format, host_format)) { - continue; - } - - std::cout << "static void test_texture_roundtrip__" << to_string(host_format) << "__" - << to_string(texture_format) << "()\n"; - std::cout << "{\n"; - - std::cout << " texture_create_upload_read<" << to_string(texture_format) << ", " - << to_string(host_format) << ", " << to_prim_type_string(host_format) << ">();\n"; - - std::cout << "}\n"; - std::cout << "GPU_TEST(texture_roundtrip__" << to_string(host_format) << "__" - << to_string(texture_format) << ");\n\n"; - } - std::cout << "/* \\} */\n\n"; - } -} -#endif -/** \} */ - -} // namespace blender::gpu::tests \ No newline at end of file +} // namespace blender::gpu::tests -- 2.30.2 From 14c5059c2a800e518ba6e712ac2ed8a84904f2e4 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Fri, 17 Mar 2023 13:30:31 +0100 Subject: [PATCH 12/33] Fix OpenGL tests. --- source/blender/gpu/tests/texture_test.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/blender/gpu/tests/texture_test.cc b/source/blender/gpu/tests/texture_test.cc index 38677c7f121..fab8e27d065 100644 --- a/source/blender/gpu/tests/texture_test.cc +++ b/source/blender/gpu/tests/texture_test.cc @@ -266,13 +266,13 @@ GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_SRGB8_A8); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_SNORM() { - texture_create_upload_read_with_bias(0.0f); + texture_create_upload_read_with_bias(0.004f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA8_SNORM); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16_SNORM() { - texture_create_upload_read_with_bias(0.0f); + texture_create_upload_read_with_bias(0.00002f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGBA16_SNORM); @@ -315,25 +315,25 @@ GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB32F); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG8_SNORM() { - texture_create_upload_read_with_bias(0.0f); + texture_create_upload_read_with_bias(0.004f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG8_SNORM); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16_SNORM() { - texture_create_upload_read_with_bias(0.0f); + texture_create_upload_read_with_bias(0.00002f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RG16_SNORM); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R8_SNORM() { - texture_create_upload_read_with_bias(0.0f); + texture_create_upload_read_with_bias(0.004f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R8_SNORM); static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_R16_SNORM() { - texture_create_upload_read_with_bias(0.0f); + texture_create_upload_read_with_bias(0.00002f); } GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_R16_SNORM); -- 2.30.2 From 89296c8f7e5ae0fd235789343566e7b06163d8e4 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 20 Mar 2023 08:08:43 +0100 Subject: [PATCH 13/33] Use OpenEXR Imath for float<->half conversion. --- source/blender/gpu/CMakeLists.txt | 1 + .../blender/gpu/vulkan/vk_data_conversion.cc | 91 ++----------------- 2 files changed, 7 insertions(+), 85 deletions(-) diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 31a85e61ec5..04c02517136 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -44,6 +44,7 @@ set(INC set(INC_SYS ${Epoxy_INCLUDE_DIRS} + ${IMATH_INCLUDE_DIR} ) set(SRC diff --git a/source/blender/gpu/vulkan/vk_data_conversion.cc b/source/blender/gpu/vulkan/vk_data_conversion.cc index 89511cb2fbf..b5c14593f6a 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.cc +++ b/source/blender/gpu/vulkan/vk_data_conversion.cc @@ -7,6 +7,8 @@ #include "vk_data_conversion.hh" +#include "Imath/half.h" + namespace blender::gpu { /* -------------------------------------------------------------------- */ @@ -572,25 +574,8 @@ using UI32 = ComponentValue; using I8 = ComponentValue; using I16 = ComponentValue; using I32 = ComponentValue; - -union F32 { - uint32_t u; - float value; - struct { - uint Mantissa : 23; - uint Exponent : 8; - uint Sign : 1; - }; -}; - -union F16 { - uint16_t u; - struct { - uint Mantissa : 10; - uint Exponent : 5; - uint Sign : 1; - }; -}; +using F32 = ComponentValue; +using F16 = ComponentValue; template void convert_component(SignedNormalized &dst, const F32 &src) @@ -638,78 +623,14 @@ void convert_component(DestinationType &dst, const SourceType &src) dst.value = src.value; } -static F16 float_to_half(const F32 &value) -{ - F16 result; - /* Sign bit, shifted to its position. */ - uint sign_bit = value.u & 0x80000000; - sign_bit >>= 16; - /* Exponent. */ - uint exponent_bits = value.u & 0x7f800000; - /* Non-sign bits. */ - uint value_bits = value.u & 0x7fffffff; - value_bits >>= 13; /* Align mantissa on MSB. */ - value_bits -= 0x1c000; /* Adjust bias. */ - /* Flush-to-zero. */ - value_bits = (exponent_bits < 0x38800000) ? 0 : value_bits; - /* Clamp-to-max. */ - value_bits = (exponent_bits > 0x47000000) ? 0x7bff : value_bits; - /* Denormals-as-zero. */ - value_bits = (exponent_bits == 0 ? 0 : value_bits); - /* Re-insert sign bit and return. */ - result.u = (value_bits | sign_bit); - return result; -} - -static F32 half_to_float(const F16 &h) -{ - F32 o = {0}; - - // From ISPC ref code - if (h.Exponent == 0 && h.Mantissa == 0) // (Signed) zero - o.Sign = h.Sign; - else { - if (h.Exponent == 0) // Denormal (will convert to normalized) - { - // Adjust mantissa so it's normalized (and keep track of exp adjust) - int e = -1; - uint m = h.Mantissa; - do { - e++; - m <<= 1; - } while ((m & 0x400) == 0); - - o.Mantissa = (m & 0x3ff) << 13; - o.Exponent = 127 - 15 - e; - o.Sign = h.Sign; - } - else if (h.Exponent == 0x1f) // Inf/NaN - { - // NOTE: It's safe to treat both with the same code path by just truncating - // lower Mantissa bits in NaNs (this is valid). - o.Mantissa = h.Mantissa << 13; - o.Exponent = 255; - o.Sign = h.Sign; - } - else // Normalized number - { - o.Mantissa = h.Mantissa << 13; - o.Exponent = 127 - 15 + h.Exponent; - o.Sign = h.Sign; - } - } - - return o; -} - static void convert_component(F16 &dst, const F32 &src) { - dst = float_to_half(src); + dst.value = imath_float_to_half(src.value); } static void convert_component(F32 &dst, const F16 &src) { - dst = half_to_float(src); + dst.value = imath_half_to_float(src.value); } /* \} */ -- 2.30.2 From 34565dfe49f88263d53ec0aa208c49006ed232e8 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 20 Mar 2023 08:21:38 +0100 Subject: [PATCH 14/33] Use GTEST_SKIP when texture could not be created on platform. --- source/blender/gpu/tests/texture_test.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/source/blender/gpu/tests/texture_test.cc b/source/blender/gpu/tests/texture_test.cc index fab8e27d065..2930a20b127 100644 --- a/source/blender/gpu/tests/texture_test.cc +++ b/source/blender/gpu/tests/texture_test.cc @@ -84,7 +84,9 @@ static void texture_create_upload_read() eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; GPUTexture *texture = GPU_texture_create_2d( "texture", Size, Size, 1, DeviceFormat, usage, nullptr); - ASSERT_NE(texture, nullptr); + if (texture == nullptr) { + GTEST_SKIP() << "Platform doesn't support texture format [" << STRINGIFY(DeviceFormat) << "]"; + } size_t data_len = Size * Size * to_component_len(DeviceFormat); DataType *data = static_cast(generate_test_data(data_len)); @@ -111,7 +113,9 @@ static void texture_create_upload_read_with_bias(float max_allowed_bias) eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; GPUTexture *texture = GPU_texture_create_2d( "texture", Size, Size, 1, DeviceFormat, usage, nullptr); - ASSERT_NE(texture, nullptr); + if (texture == nullptr) { + GTEST_SKIP() << "Platform doesn't support texture format [" << STRINGIFY(DeviceFormat) << "]"; + } size_t data_len = Size * Size * to_component_len(DeviceFormat); float *data = static_cast(generate_test_data(data_len)); -- 2.30.2 From 4cacec223cd3f36c0dea324f646d3880a4faca50 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 20 Mar 2023 08:33:36 +0100 Subject: [PATCH 15/33] Fix incorrect datatype F32/16 --- source/blender/gpu/vulkan/vk_data_conversion.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/blender/gpu/vulkan/vk_data_conversion.cc b/source/blender/gpu/vulkan/vk_data_conversion.cc index b5c14593f6a..27cab7fd905 100644 --- a/source/blender/gpu/vulkan/vk_data_conversion.cc +++ b/source/blender/gpu/vulkan/vk_data_conversion.cc @@ -574,8 +574,8 @@ using UI32 = ComponentValue; using I8 = ComponentValue; using I16 = ComponentValue; using I32 = ComponentValue; -using F32 = ComponentValue; -using F16 = ComponentValue; +using F32 = ComponentValue; +using F16 = ComponentValue; template void convert_component(SignedNormalized &dst, const F32 &src) -- 2.30.2 From f9a4fa322bff8ca5bc4781c40e4de708d58418e5 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 20 Mar 2023 13:55:20 +0100 Subject: [PATCH 16/33] GPU: Renderdoc integration. --- extern/renderdoc/README.blender | 5 + extern/renderdoc/include/renderdoc_app.h | 723 ++++++++++++++++++ intern/CMakeLists.txt | 4 + intern/renderdoc_dynload/CMakeLists.txt | 17 + .../include/renderdoc_api.hh | 36 + .../renderdoc_dynload/intern/renderdoc_api.cc | 69 ++ source/blender/gpu/CMakeLists.txt | 4 + source/blender/gpu/opengl/gl_context.hh | 4 + source/blender/gpu/opengl/gl_debug.cc | 4 +- source/blender/gpu/tests/gpu_testing.cc | 14 +- source/blender/gpu/tests/gpu_testing.hh | 2 + 11 files changed, 880 insertions(+), 2 deletions(-) create mode 100644 extern/renderdoc/README.blender create mode 100644 extern/renderdoc/include/renderdoc_app.h create mode 100644 intern/renderdoc_dynload/CMakeLists.txt create mode 100644 intern/renderdoc_dynload/include/renderdoc_api.hh create mode 100644 intern/renderdoc_dynload/intern/renderdoc_api.cc diff --git a/extern/renderdoc/README.blender b/extern/renderdoc/README.blender new file mode 100644 index 00000000000..1d6203951e9 --- /dev/null +++ b/extern/renderdoc/README.blender @@ -0,0 +1,5 @@ +Project: Renderdoc APP +URL: https://github.com/baldurk/renderdoc/ +License: MIT +Upstream version: d47e79ae079783935b8857d6a1730440eafb0b38 +Local modifications: None diff --git a/extern/renderdoc/include/renderdoc_app.h b/extern/renderdoc/include/renderdoc_app.h new file mode 100644 index 00000000000..501aebbda38 --- /dev/null +++ b/extern/renderdoc/include/renderdoc_app.h @@ -0,0 +1,723 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2019-2023 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#pragma once + +////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Documentation for the API is available at https://renderdoc.org/docs/in_application_api.html +// + +#if !defined(RENDERDOC_NO_STDINT) +#include +#endif + +#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER) +#define RENDERDOC_CC __cdecl +#elif defined(__linux__) +#define RENDERDOC_CC +#elif defined(__APPLE__) +#define RENDERDOC_CC +#else +#error "Unknown platform" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +////////////////////////////////////////////////////////////////////////////////////////////////// +// Constants not used directly in below API + +// This is a GUID/magic value used for when applications pass a path where shader debug +// information can be found to match up with a stripped shader. +// the define can be used like so: const GUID RENDERDOC_ShaderDebugMagicValue = +// RENDERDOC_ShaderDebugMagicValue_value +#define RENDERDOC_ShaderDebugMagicValue_struct \ + { \ + 0xeab25520, 0x6670, 0x4865, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \ + } + +// as an alternative when you want a byte array (assuming x86 endianness): +#define RENDERDOC_ShaderDebugMagicValue_bytearray \ + { \ + 0x20, 0x55, 0xb2, 0xea, 0x70, 0x66, 0x65, 0x48, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \ + } + +// truncated version when only a uint64_t is available (e.g. Vulkan tags): +#define RENDERDOC_ShaderDebugMagicValue_truncated 0x48656670eab25520ULL + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc capture options +// + +typedef enum RENDERDOC_CaptureOption { + // Allow the application to enable vsync + // + // Default - enabled + // + // 1 - The application can enable or disable vsync at will + // 0 - vsync is force disabled + eRENDERDOC_Option_AllowVSync = 0, + + // Allow the application to enable fullscreen + // + // Default - enabled + // + // 1 - The application can enable or disable fullscreen at will + // 0 - fullscreen is force disabled + eRENDERDOC_Option_AllowFullscreen = 1, + + // Record API debugging events and messages + // + // Default - disabled + // + // 1 - Enable built-in API debugging features and records the results into + // the capture, which is matched up with events on replay + // 0 - no API debugging is forcibly enabled + eRENDERDOC_Option_APIValidation = 2, + eRENDERDOC_Option_DebugDeviceMode = 2, // deprecated name of this enum + + // Capture CPU callstacks for API events + // + // Default - disabled + // + // 1 - Enables capturing of callstacks + // 0 - no callstacks are captured + eRENDERDOC_Option_CaptureCallstacks = 3, + + // When capturing CPU callstacks, only capture them from actions. + // This option does nothing without the above option being enabled + // + // Default - disabled + // + // 1 - Only captures callstacks for actions. + // Ignored if CaptureCallstacks is disabled + // 0 - Callstacks, if enabled, are captured for every event. + eRENDERDOC_Option_CaptureCallstacksOnlyDraws = 4, + eRENDERDOC_Option_CaptureCallstacksOnlyActions = 4, + + // Specify a delay in seconds to wait for a debugger to attach, after + // creating or injecting into a process, before continuing to allow it to run. + // + // 0 indicates no delay, and the process will run immediately after injection + // + // Default - 0 seconds + // + eRENDERDOC_Option_DelayForDebugger = 5, + + // Verify buffer access. This includes checking the memory returned by a Map() call to + // detect any out-of-bounds modification, as well as initialising buffers with undefined contents + // to a marker value to catch use of uninitialised memory. + // + // NOTE: This option is only valid for OpenGL and D3D11. Explicit APIs such as D3D12 and Vulkan do + // not do the same kind of interception & checking and undefined contents are really undefined. + // + // Default - disabled + // + // 1 - Verify buffer access + // 0 - No verification is performed, and overwriting bounds may cause crashes or corruption in + // RenderDoc. + eRENDERDOC_Option_VerifyBufferAccess = 6, + + // The old name for eRENDERDOC_Option_VerifyBufferAccess was eRENDERDOC_Option_VerifyMapWrites. + // This option now controls the filling of uninitialised buffers with 0xdddddddd which was + // previously always enabled + eRENDERDOC_Option_VerifyMapWrites = eRENDERDOC_Option_VerifyBufferAccess, + + // Hooks any system API calls that create child processes, and injects + // RenderDoc into them recursively with the same options. + // + // Default - disabled + // + // 1 - Hooks into spawned child processes + // 0 - Child processes are not hooked by RenderDoc + eRENDERDOC_Option_HookIntoChildren = 7, + + // By default RenderDoc only includes resources in the final capture necessary + // for that frame, this allows you to override that behaviour. + // + // Default - disabled + // + // 1 - all live resources at the time of capture are included in the capture + // and available for inspection + // 0 - only the resources referenced by the captured frame are included + eRENDERDOC_Option_RefAllResources = 8, + + // **NOTE**: As of RenderDoc v1.1 this option has been deprecated. Setting or + // getting it will be ignored, to allow compatibility with older versions. + // In v1.1 the option acts as if it's always enabled. + // + // By default RenderDoc skips saving initial states for resources where the + // previous contents don't appear to be used, assuming that writes before + // reads indicate previous contents aren't used. + // + // Default - disabled + // + // 1 - initial contents at the start of each captured frame are saved, even if + // they are later overwritten or cleared before being used. + // 0 - unless a read is detected, initial contents will not be saved and will + // appear as black or empty data. + eRENDERDOC_Option_SaveAllInitials = 9, + + // In APIs that allow for the recording of command lists to be replayed later, + // RenderDoc may choose to not capture command lists before a frame capture is + // triggered, to reduce overheads. This means any command lists recorded once + // and replayed many times will not be available and may cause a failure to + // capture. + // + // NOTE: This is only true for APIs where multithreading is difficult or + // discouraged. Newer APIs like Vulkan and D3D12 will ignore this option + // and always capture all command lists since the API is heavily oriented + // around it and the overheads have been reduced by API design. + // + // 1 - All command lists are captured from the start of the application + // 0 - Command lists are only captured if their recording begins during + // the period when a frame capture is in progress. + eRENDERDOC_Option_CaptureAllCmdLists = 10, + + // Mute API debugging output when the API validation mode option is enabled + // + // Default - enabled + // + // 1 - Mute any API debug messages from being displayed or passed through + // 0 - API debugging is displayed as normal + eRENDERDOC_Option_DebugOutputMute = 11, + + // Option to allow vendor extensions to be used even when they may be + // incompatible with RenderDoc and cause corrupted replays or crashes. + // + // Default - inactive + // + // No values are documented, this option should only be used when absolutely + // necessary as directed by a RenderDoc developer. + eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12, + +} RENDERDOC_CaptureOption; + +// Sets an option that controls how RenderDoc behaves on capture. +// +// Returns 1 if the option and value are valid +// Returns 0 if either is invalid and the option is unchanged +typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionU32)(RENDERDOC_CaptureOption opt, uint32_t val); +typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionF32)(RENDERDOC_CaptureOption opt, float val); + +// Gets the current value of an option as a uint32_t +// +// If the option is invalid, 0xffffffff is returned +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_CaptureOption opt); + +// Gets the current value of an option as a float +// +// If the option is invalid, -FLT_MAX is returned +typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt); + +typedef enum RENDERDOC_InputButton { + // '0' - '9' matches ASCII values + eRENDERDOC_Key_0 = 0x30, + eRENDERDOC_Key_1 = 0x31, + eRENDERDOC_Key_2 = 0x32, + eRENDERDOC_Key_3 = 0x33, + eRENDERDOC_Key_4 = 0x34, + eRENDERDOC_Key_5 = 0x35, + eRENDERDOC_Key_6 = 0x36, + eRENDERDOC_Key_7 = 0x37, + eRENDERDOC_Key_8 = 0x38, + eRENDERDOC_Key_9 = 0x39, + + // 'A' - 'Z' matches ASCII values + eRENDERDOC_Key_A = 0x41, + eRENDERDOC_Key_B = 0x42, + eRENDERDOC_Key_C = 0x43, + eRENDERDOC_Key_D = 0x44, + eRENDERDOC_Key_E = 0x45, + eRENDERDOC_Key_F = 0x46, + eRENDERDOC_Key_G = 0x47, + eRENDERDOC_Key_H = 0x48, + eRENDERDOC_Key_I = 0x49, + eRENDERDOC_Key_J = 0x4A, + eRENDERDOC_Key_K = 0x4B, + eRENDERDOC_Key_L = 0x4C, + eRENDERDOC_Key_M = 0x4D, + eRENDERDOC_Key_N = 0x4E, + eRENDERDOC_Key_O = 0x4F, + eRENDERDOC_Key_P = 0x50, + eRENDERDOC_Key_Q = 0x51, + eRENDERDOC_Key_R = 0x52, + eRENDERDOC_Key_S = 0x53, + eRENDERDOC_Key_T = 0x54, + eRENDERDOC_Key_U = 0x55, + eRENDERDOC_Key_V = 0x56, + eRENDERDOC_Key_W = 0x57, + eRENDERDOC_Key_X = 0x58, + eRENDERDOC_Key_Y = 0x59, + eRENDERDOC_Key_Z = 0x5A, + + // leave the rest of the ASCII range free + // in case we want to use it later + eRENDERDOC_Key_NonPrintable = 0x100, + + eRENDERDOC_Key_Divide, + eRENDERDOC_Key_Multiply, + eRENDERDOC_Key_Subtract, + eRENDERDOC_Key_Plus, + + eRENDERDOC_Key_F1, + eRENDERDOC_Key_F2, + eRENDERDOC_Key_F3, + eRENDERDOC_Key_F4, + eRENDERDOC_Key_F5, + eRENDERDOC_Key_F6, + eRENDERDOC_Key_F7, + eRENDERDOC_Key_F8, + eRENDERDOC_Key_F9, + eRENDERDOC_Key_F10, + eRENDERDOC_Key_F11, + eRENDERDOC_Key_F12, + + eRENDERDOC_Key_Home, + eRENDERDOC_Key_End, + eRENDERDOC_Key_Insert, + eRENDERDOC_Key_Delete, + eRENDERDOC_Key_PageUp, + eRENDERDOC_Key_PageDn, + + eRENDERDOC_Key_Backspace, + eRENDERDOC_Key_Tab, + eRENDERDOC_Key_PrtScrn, + eRENDERDOC_Key_Pause, + + eRENDERDOC_Key_Max, +} RENDERDOC_InputButton; + +// Sets which key or keys can be used to toggle focus between multiple windows +// +// If keys is NULL or num is 0, toggle keys will be disabled +typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton *keys, int num); + +// Sets which key or keys can be used to capture the next frame +// +// If keys is NULL or num is 0, captures keys will be disabled +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num); + +typedef enum RENDERDOC_OverlayBits { + // This single bit controls whether the overlay is enabled or disabled globally + eRENDERDOC_Overlay_Enabled = 0x1, + + // Show the average framerate over several seconds as well as min/max + eRENDERDOC_Overlay_FrameRate = 0x2, + + // Show the current frame number + eRENDERDOC_Overlay_FrameNumber = 0x4, + + // Show a list of recent captures, and how many captures have been made + eRENDERDOC_Overlay_CaptureList = 0x8, + + // Default values for the overlay mask + eRENDERDOC_Overlay_Default = (eRENDERDOC_Overlay_Enabled | eRENDERDOC_Overlay_FrameRate | + eRENDERDOC_Overlay_FrameNumber | eRENDERDOC_Overlay_CaptureList), + + // Enable all bits + eRENDERDOC_Overlay_All = ~0U, + + // Disable all bits + eRENDERDOC_Overlay_None = 0, +} RENDERDOC_OverlayBits; + +// returns the overlay bits that have been set +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetOverlayBits)(); +// sets the overlay bits with an and & or mask +typedef void(RENDERDOC_CC *pRENDERDOC_MaskOverlayBits)(uint32_t And, uint32_t Or); + +// this function will attempt to remove RenderDoc's hooks in the application. +// +// Note: that this can only work correctly if done immediately after +// the module is loaded, before any API work happens. RenderDoc will remove its +// injected hooks and shut down. Behaviour is undefined if this is called +// after any API functions have been called, and there is still no guarantee of +// success. +typedef void(RENDERDOC_CC *pRENDERDOC_RemoveHooks)(); + +// DEPRECATED: compatibility for code compiled against pre-1.4.1 headers. +typedef pRENDERDOC_RemoveHooks pRENDERDOC_Shutdown; + +// This function will unload RenderDoc's crash handler. +// +// If you use your own crash handler and don't want RenderDoc's handler to +// intercede, you can call this function to unload it and any unhandled +// exceptions will pass to the next handler. +typedef void(RENDERDOC_CC *pRENDERDOC_UnloadCrashHandler)(); + +// Sets the capture file path template +// +// pathtemplate is a UTF-8 string that gives a template for how captures will be named +// and where they will be saved. +// +// Any extension is stripped off the path, and captures are saved in the directory +// specified, and named with the filename and the frame number appended. If the +// directory does not exist it will be created, including any parent directories. +// +// If pathtemplate is NULL, the template will remain unchanged +// +// Example: +// +// SetCaptureFilePathTemplate("my_captures/example"); +// +// Capture #1 -> my_captures/example_frame123.rdc +// Capture #2 -> my_captures/example_frame456.rdc +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFilePathTemplate)(const char *pathtemplate); + +// returns the current capture path template, see SetCaptureFileTemplate above, as a UTF-8 string +typedef const char *(RENDERDOC_CC *pRENDERDOC_GetCaptureFilePathTemplate)(); + +// DEPRECATED: compatibility for code compiled against pre-1.1.2 headers. +typedef pRENDERDOC_SetCaptureFilePathTemplate pRENDERDOC_SetLogFilePathTemplate; +typedef pRENDERDOC_GetCaptureFilePathTemplate pRENDERDOC_GetLogFilePathTemplate; + +// returns the number of captures that have been made +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetNumCaptures)(); + +// This function returns the details of a capture, by index. New captures are added +// to the end of the list. +// +// filename will be filled with the absolute path to the capture file, as a UTF-8 string +// pathlength will be written with the length in bytes of the filename string +// timestamp will be written with the time of the capture, in seconds since the Unix epoch +// +// Any of the parameters can be NULL and they'll be skipped. +// +// The function will return 1 if the capture index is valid, or 0 if the index is invalid +// If the index is invalid, the values will be unchanged +// +// Note: when captures are deleted in the UI they will remain in this list, so the +// capture path may not exist anymore. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCapture)(uint32_t idx, char *filename, + uint32_t *pathlength, uint64_t *timestamp); + +// Sets the comments associated with a capture file. These comments are displayed in the +// UI program when opening. +// +// filePath should be a path to the capture file to add comments to. If set to NULL or "" +// the most recent capture file created made will be used instead. +// comments should be a NULL-terminated UTF-8 string to add as comments. +// +// Any existing comments will be overwritten. +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFileComments)(const char *filePath, + const char *comments); + +// returns 1 if the RenderDoc UI is connected to this application, 0 otherwise +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsTargetControlConnected)(); + +// DEPRECATED: compatibility for code compiled against pre-1.1.1 headers. +// This was renamed to IsTargetControlConnected in API 1.1.1, the old typedef is kept here for +// backwards compatibility with old code, it is castable either way since it's ABI compatible +// as the same function pointer type. +typedef pRENDERDOC_IsTargetControlConnected pRENDERDOC_IsRemoteAccessConnected; + +// This function will launch the Replay UI associated with the RenderDoc library injected +// into the running application. +// +// if connectTargetControl is 1, the Replay UI will be launched with a command line parameter +// to connect to this application +// cmdline is the rest of the command line, as a UTF-8 string. E.g. a captures to open +// if cmdline is NULL, the command line will be empty. +// +// returns the PID of the replay UI if successful, 0 if not successful. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTargetControl, + const char *cmdline); + +// RenderDoc can return a higher version than requested if it's backwards compatible, +// this function returns the actual version returned. If a parameter is NULL, it will be +// ignored and the others will be filled out. +typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch); + +// Requests that the replay UI show itself (if hidden or not the current top window). This can be +// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle +// showing the UI after making a capture. +// +// This will return 1 if the request was successfully passed on, though it's not guaranteed that +// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current +// target control connection to make such a request, or if there was another error +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)(); + +////////////////////////////////////////////////////////////////////////// +// Capturing functions +// + +// A device pointer is a pointer to the API's root handle. +// +// This would be an ID3D11Device, HGLRC/GLXContext, ID3D12Device, etc +typedef void *RENDERDOC_DevicePointer; + +// A window handle is the OS's native window handle +// +// This would be an HWND, GLXDrawable, etc +typedef void *RENDERDOC_WindowHandle; + +// A helper macro for Vulkan, where the device handle cannot be used directly. +// +// Passing the VkInstance to this macro will return the RENDERDOC_DevicePointer to use. +// +// Specifically, the value needed is the dispatch table pointer, which sits as the first +// pointer-sized object in the memory pointed to by the VkInstance. Thus we cast to a void** and +// indirect once. +#define RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(inst) (*((void **)(inst))) + +// This sets the RenderDoc in-app overlay in the API/window pair as 'active' and it will +// respond to keypresses. Neither parameter can be NULL +typedef void(RENDERDOC_CC *pRENDERDOC_SetActiveWindow)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// capture the next frame on whichever window and API is currently considered active +typedef void(RENDERDOC_CC *pRENDERDOC_TriggerCapture)(); + +// capture the next N frames on whichever window and API is currently considered active +typedef void(RENDERDOC_CC *pRENDERDOC_TriggerMultiFrameCapture)(uint32_t numFrames); + +// When choosing either a device pointer or a window handle to capture, you can pass NULL. +// Passing NULL specifies a 'wildcard' match against anything. This allows you to specify +// any API rendering to a specific window, or a specific API instance rendering to any window, +// or in the simplest case of one window and one API, you can just pass NULL for both. +// +// In either case, if there are two or more possible matching (device,window) pairs it +// is undefined which one will be captured. +// +// Note: for headless rendering you can pass NULL for the window handle and either specify +// a device pointer or leave it NULL as above. + +// Immediately starts capturing API calls on the specified device pointer and window handle. +// +// If there is no matching thing to capture (e.g. no supported API has been initialised), +// this will do nothing. +// +// The results are undefined (including crashes) if two captures are started overlapping, +// even on separate devices and/oror windows. +typedef void(RENDERDOC_CC *pRENDERDOC_StartFrameCapture)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// Returns whether or not a frame capture is currently ongoing anywhere. +// +// This will return 1 if a capture is ongoing, and 0 if there is no capture running +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsFrameCapturing)(); + +// Ends capturing immediately. +// +// This will return 1 if the capture succeeded, and 0 if there was an error capturing. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// Ends capturing immediately and discard any data stored without saving to disk. +// +// This will return 1 if the capture was discarded, and 0 if there was an error or no capture +// was in progress +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// Only valid to be called between a call to StartFrameCapture and EndFrameCapture. Gives a custom +// title to the capture produced which will be displayed in the UI. +// +// If multiple captures are ongoing, this title will be applied to the first capture to end after +// this call. The second capture to end will have no title, unless this function is called again. +// +// Calling this function has no effect if no capture is currently running +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title); + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc API versions +// + +// RenderDoc uses semantic versioning (http://semver.org/). +// +// MAJOR version is incremented when incompatible API changes happen. +// MINOR version is incremented when functionality is added in a backwards-compatible manner. +// PATCH version is incremented when backwards-compatible bug fixes happen. +// +// Note that this means the API returned can be higher than the one you might have requested. +// e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned +// instead of 1.0.0. You can check this with the GetAPIVersion entry point +typedef enum RENDERDOC_Version { + eRENDERDOC_API_Version_1_0_0 = 10000, // RENDERDOC_API_1_0_0 = 1 00 00 + eRENDERDOC_API_Version_1_0_1 = 10001, // RENDERDOC_API_1_0_1 = 1 00 01 + eRENDERDOC_API_Version_1_0_2 = 10002, // RENDERDOC_API_1_0_2 = 1 00 02 + eRENDERDOC_API_Version_1_1_0 = 10100, // RENDERDOC_API_1_1_0 = 1 01 00 + eRENDERDOC_API_Version_1_1_1 = 10101, // RENDERDOC_API_1_1_1 = 1 01 01 + eRENDERDOC_API_Version_1_1_2 = 10102, // RENDERDOC_API_1_1_2 = 1 01 02 + eRENDERDOC_API_Version_1_2_0 = 10200, // RENDERDOC_API_1_2_0 = 1 02 00 + eRENDERDOC_API_Version_1_3_0 = 10300, // RENDERDOC_API_1_3_0 = 1 03 00 + eRENDERDOC_API_Version_1_4_0 = 10400, // RENDERDOC_API_1_4_0 = 1 04 00 + eRENDERDOC_API_Version_1_4_1 = 10401, // RENDERDOC_API_1_4_1 = 1 04 01 + eRENDERDOC_API_Version_1_4_2 = 10402, // RENDERDOC_API_1_4_2 = 1 04 02 + eRENDERDOC_API_Version_1_5_0 = 10500, // RENDERDOC_API_1_5_0 = 1 05 00 + eRENDERDOC_API_Version_1_6_0 = 10600, // RENDERDOC_API_1_6_0 = 1 06 00 +} RENDERDOC_Version; + +// API version changelog: +// +// 1.0.0 - initial release +// 1.0.1 - Bugfix: IsFrameCapturing() was returning false for captures that were triggered +// by keypress or TriggerCapture, instead of Start/EndFrameCapture. +// 1.0.2 - Refactor: Renamed eRENDERDOC_Option_DebugDeviceMode to eRENDERDOC_Option_APIValidation +// 1.1.0 - Add feature: TriggerMultiFrameCapture(). Backwards compatible with 1.0.x since the new +// function pointer is added to the end of the struct, the original layout is identical +// 1.1.1 - Refactor: Renamed remote access to target control (to better disambiguate from remote +// replay/remote server concept in replay UI) +// 1.1.2 - Refactor: Renamed "log file" in function names to just capture, to clarify that these +// are captures and not debug logging files. This is the first API version in the v1.0 +// branch. +// 1.2.0 - Added feature: SetCaptureFileComments() to add comments to a capture file that will be +// displayed in the UI program on load. +// 1.3.0 - Added feature: New capture option eRENDERDOC_Option_AllowUnsupportedVendorExtensions +// which allows users to opt-in to allowing unsupported vendor extensions to function. +// Should be used at the user's own risk. +// Refactor: Renamed eRENDERDOC_Option_VerifyMapWrites to +// eRENDERDOC_Option_VerifyBufferAccess, which now also controls initialisation to +// 0xdddddddd of uninitialised buffer contents. +// 1.4.0 - Added feature: DiscardFrameCapture() to discard a frame capture in progress and stop +// capturing without saving anything to disk. +// 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening +// 1.4.2 - Refactor: Renamed 'draws' to 'actions' in callstack capture option. +// 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected +// 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a +// capture made with StartFrameCapture() or EndFrameCapture() + +typedef struct RENDERDOC_API_1_6_0 +{ + pRENDERDOC_GetAPIVersion GetAPIVersion; + + pRENDERDOC_SetCaptureOptionU32 SetCaptureOptionU32; + pRENDERDOC_SetCaptureOptionF32 SetCaptureOptionF32; + + pRENDERDOC_GetCaptureOptionU32 GetCaptureOptionU32; + pRENDERDOC_GetCaptureOptionF32 GetCaptureOptionF32; + + pRENDERDOC_SetFocusToggleKeys SetFocusToggleKeys; + pRENDERDOC_SetCaptureKeys SetCaptureKeys; + + pRENDERDOC_GetOverlayBits GetOverlayBits; + pRENDERDOC_MaskOverlayBits MaskOverlayBits; + + // Shutdown was renamed to RemoveHooks in 1.4.1. + // These unions allow old code to continue compiling without changes + union + { + pRENDERDOC_Shutdown Shutdown; + pRENDERDOC_RemoveHooks RemoveHooks; + }; + pRENDERDOC_UnloadCrashHandler UnloadCrashHandler; + + // Get/SetLogFilePathTemplate was renamed to Get/SetCaptureFilePathTemplate in 1.1.2. + // These unions allow old code to continue compiling without changes + union + { + // deprecated name + pRENDERDOC_SetLogFilePathTemplate SetLogFilePathTemplate; + // current name + pRENDERDOC_SetCaptureFilePathTemplate SetCaptureFilePathTemplate; + }; + union + { + // deprecated name + pRENDERDOC_GetLogFilePathTemplate GetLogFilePathTemplate; + // current name + pRENDERDOC_GetCaptureFilePathTemplate GetCaptureFilePathTemplate; + }; + + pRENDERDOC_GetNumCaptures GetNumCaptures; + pRENDERDOC_GetCapture GetCapture; + + pRENDERDOC_TriggerCapture TriggerCapture; + + // IsRemoteAccessConnected was renamed to IsTargetControlConnected in 1.1.1. + // This union allows old code to continue compiling without changes + union + { + // deprecated name + pRENDERDOC_IsRemoteAccessConnected IsRemoteAccessConnected; + // current name + pRENDERDOC_IsTargetControlConnected IsTargetControlConnected; + }; + pRENDERDOC_LaunchReplayUI LaunchReplayUI; + + pRENDERDOC_SetActiveWindow SetActiveWindow; + + pRENDERDOC_StartFrameCapture StartFrameCapture; + pRENDERDOC_IsFrameCapturing IsFrameCapturing; + pRENDERDOC_EndFrameCapture EndFrameCapture; + + // new function in 1.1.0 + pRENDERDOC_TriggerMultiFrameCapture TriggerMultiFrameCapture; + + // new function in 1.2.0 + pRENDERDOC_SetCaptureFileComments SetCaptureFileComments; + + // new function in 1.4.0 + pRENDERDOC_DiscardFrameCapture DiscardFrameCapture; + + // new function in 1.5.0 + pRENDERDOC_ShowReplayUI ShowReplayUI; + + // new function in 1.6.0 + pRENDERDOC_SetCaptureTitle SetCaptureTitle; +} RENDERDOC_API_1_6_0; + +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_2_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_3_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_5_0; + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc API entry point +// +// This entry point can be obtained via GetProcAddress/dlsym if RenderDoc is available. +// +// The name is the same as the typedef - "RENDERDOC_GetAPI" +// +// This function is not thread safe, and should not be called on multiple threads at once. +// Ideally, call this once as early as possible in your application's startup, before doing +// any API work, since some configuration functionality etc has to be done also before +// initialising any APIs. +// +// Parameters: +// version is a single value from the RENDERDOC_Version above. +// +// outAPIPointers will be filled out with a pointer to the corresponding struct of function +// pointers. +// +// Returns: +// 1 - if the outAPIPointers has been filled with a pointer to the API struct requested +// 0 - if the requested version is not supported or the arguments are invalid. +// +typedef int(RENDERDOC_CC *pRENDERDOC_GetAPI)(RENDERDOC_Version version, void **outAPIPointers); + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/intern/CMakeLists.txt b/intern/CMakeLists.txt index e1dfc7043e9..c9df16a481b 100644 --- a/intern/CMakeLists.txt +++ b/intern/CMakeLists.txt @@ -67,6 +67,10 @@ if(UNIX AND NOT APPLE) add_subdirectory(libc_compat) endif() +if (NOT APPLE) + add_subdirectory(renderdoc_dynload) +endif() + if(UNIX AND NOT APPLE) # Important this comes after "ghost" as it uses includes defined by GHOST's CMake. if(WITH_GHOST_WAYLAND AND WITH_GHOST_WAYLAND_DYNLOAD) diff --git a/intern/renderdoc_dynload/CMakeLists.txt b/intern/renderdoc_dynload/CMakeLists.txt new file mode 100644 index 00000000000..90fa9951975 --- /dev/null +++ b/intern/renderdoc_dynload/CMakeLists.txt @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +set(INC + include + ../../extern/renderdoc/include +) + +set(INC_SYS +) + +set(SRC + intern/renderdoc_api.cc + + include/renderdoc_api.hh +) + +blender_add_lib(bf_intern_renderdoc_dynload "${SRC}" "${INC}" "${INC_SYS}" "${LIB}") \ No newline at end of file diff --git a/intern/renderdoc_dynload/include/renderdoc_api.hh b/intern/renderdoc_dynload/include/renderdoc_api.hh new file mode 100644 index 00000000000..dc4a62a4ad3 --- /dev/null +++ b/intern/renderdoc_dynload/include/renderdoc_api.hh @@ -0,0 +1,36 @@ +#pragma once + +#include "renderdoc_app.h" + +namespace renderdoc::api { +class Renderdoc { + private: + enum class State { + /** + * Initial state of the API indicating that the API hasn't checked if it can find renderdoc. + */ + UNINITIALIZED, + + /** + * API has looked for renderdoc, but couldn't find it. This indicates that renderdoc isn't + * available on the platform, or wasn't registered correctly. + */ + NOT_FOUND, + + /** + * API has loaded the symbols of renderdoc. + */ + LOADED, + }; + State state_ = State::UNINITIALIZED; + RENDERDOC_API_1_6_0 *renderdoc_api_ = nullptr; + + public: + void start_frame_capture(); + void end_frame_capture(); + + private: + bool check_loaded(); + void load(); +}; +} // namespace renderdoc::api \ No newline at end of file diff --git a/intern/renderdoc_dynload/intern/renderdoc_api.cc b/intern/renderdoc_dynload/intern/renderdoc_api.cc new file mode 100644 index 00000000000..d20f7c649b9 --- /dev/null +++ b/intern/renderdoc_dynload/intern/renderdoc_api.cc @@ -0,0 +1,69 @@ + +#include "renderdoc_api.hh" + +#ifdef _WIN32 +#else +# include +#endif +#include + +namespace renderdoc::api { +void Renderdoc::start_frame_capture() +{ + if (!check_loaded()) { + return; + } + renderdoc_api_->StartFrameCapture(nullptr, nullptr); +} + +void Renderdoc::end_frame_capture() +{ + if (!check_loaded()) { + return; + } + renderdoc_api_->EndFrameCapture(nullptr, nullptr); +} + +bool Renderdoc::check_loaded() +{ + switch (state_) { + case State::UNINITIALIZED: + load(); + return renderdoc_api_ != nullptr; + break; + case State::NOT_FOUND: + return false; + case State::LOADED: + return true; + } + return false; +} + +void Renderdoc::load() +{ +#ifdef _WIN32 + if (HMODULE mod = GetModuleHandleA("renderdoc.dll")) { + pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)GetProcAddress(mod, + "RENDERDOC_GetAPI"); + RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_1_2, (void **)&renderdoc_api_); + } +#else + if (void *mod = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD)) { + pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)dlsym(mod, "RENDERDOC_GetAPI"); + RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_1_2, (void **)&renderdoc_api_); + } +#endif + + if (renderdoc_api_ != nullptr) { + int major; + int minor; + int patch; + renderdoc_api_->GetAPIVersion(&major, &minor, &patch); + std::cout << "Found renderdoc API [" << major << "." << minor << "." << patch << "]"; + } + else { + std::cerr << "Unable to load renderdoc API.\n"; + } +} + +} // namespace renderdoc::api \ No newline at end of file diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 329cb211c3f..2f6f2339cfc 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -40,6 +40,8 @@ set(INC ../../../intern/ghost ../../../intern/guardedalloc ../../../intern/mantaflow/extern + ../../../extern/renderdoc/include + ../../../intern/renderdoc_dynload/include ) set(INC_SYS @@ -736,6 +738,7 @@ target_link_libraries(bf_gpu PUBLIC bf_compositor_shaders bf_draw_shaders bf_gpu_shaders + bf_intern_renderdoc_dynload ) if(WITH_OPENCOLORIO) @@ -780,6 +783,7 @@ if(WITH_GPU_BUILDTIME_SHADER_BUILDER) bf_intern_clog bf_blenlib bf_intern_ghost + bf_intern_renderdoc_dynload ${PLATFORM_LINKLIBS} ) target_include_directories(shader_builder PRIVATE ${INC} ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/source/blender/gpu/opengl/gl_context.hh b/source/blender/gpu/opengl/gl_context.hh index 2d19bc9a10d..b27bd5bc440 100644 --- a/source/blender/gpu/opengl/gl_context.hh +++ b/source/blender/gpu/opengl/gl_context.hh @@ -7,6 +7,8 @@ #pragma once +#include "renderdoc_api.hh" + #include "gpu_context_private.hh" #include "GPU_framebuffer.h" @@ -95,6 +97,8 @@ class GLContext : public Context { /** #GLBackend owns this data. */ GLSharedOrphanLists &shared_orphan_list_; + renderdoc::api::Renderdoc renderdoc_; + public: GLContext(void *ghost_window, GLSharedOrphanLists &shared_orphan_list); ~GLContext(); diff --git a/source/blender/gpu/opengl/gl_debug.cc b/source/blender/gpu/opengl/gl_debug.cc index 00f11934804..49806b0ba9b 100644 --- a/source/blender/gpu/opengl/gl_debug.cc +++ b/source/blender/gpu/opengl/gl_debug.cc @@ -382,11 +382,13 @@ void GLContext::debug_group_end() bool GLContext::debug_capture_begin() { - return false; + renderdoc_.start_frame_capture(); + return true; } void GLContext::debug_capture_end() { + renderdoc_.end_frame_capture(); } void *GLContext::debug_capture_scope_create(const char * /*name*/) diff --git a/source/blender/gpu/tests/gpu_testing.cc b/source/blender/gpu/tests/gpu_testing.cc index fd28a18a24b..b3570993d44 100644 --- a/source/blender/gpu/tests/gpu_testing.cc +++ b/source/blender/gpu/tests/gpu_testing.cc @@ -5,36 +5,48 @@ #include "CLG_log.h" #include "GPU_context.h" +#include "GPU_debug.h" #include "GPU_init_exit.h" #include "gpu_testing.hh" #include "GHOST_C-api.h" +#include "BKE_global.h" + namespace blender::gpu { void GPUTest::SetUp() { + prev_g_debug_ = G.debug; + G.debug |= G_DEBUG_GPU; + + CLG_init(); GPU_backend_type_selection_set(gpu_backend_type); GHOST_GLSettings glSettings = {}; glSettings.context_type = draw_context_type; glSettings.flags = GHOST_glDebugContext; - CLG_init(); ghost_system = GHOST_CreateSystem(); ghost_context = GHOST_CreateOpenGLContext(ghost_system, glSettings); GHOST_ActivateOpenGLContext(ghost_context); context = GPU_context_create(nullptr, ghost_context); GPU_init(); + GPU_context_begin_frame(context); + GPU_debug_capture_begin(); } void GPUTest::TearDown() { + GPU_debug_capture_end(); GPU_context_end_frame(context); + GPU_exit(); GPU_context_discard(context); GHOST_DisposeOpenGLContext(ghost_system, ghost_context); GHOST_DisposeSystem(ghost_system); CLG_exit(); + + G.debug = prev_g_debug_; } } // namespace blender::gpu diff --git a/source/blender/gpu/tests/gpu_testing.hh b/source/blender/gpu/tests/gpu_testing.hh index dfab3529134..74d075da46e 100644 --- a/source/blender/gpu/tests/gpu_testing.hh +++ b/source/blender/gpu/tests/gpu_testing.hh @@ -24,6 +24,8 @@ class GPUTest : public ::testing::Test { GHOST_ContextHandle ghost_context; struct GPUContext *context; + int32_t prev_g_debug_; + protected: GPUTest(GHOST_TDrawingContextType draw_context_type, eGPUBackendType gpu_backend_type) : draw_context_type(draw_context_type), gpu_backend_type(gpu_backend_type) -- 2.30.2 From 087091af29056bb2dc6e2d2aba39a9560cefee56 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 20 Mar 2023 14:53:49 +0100 Subject: [PATCH 17/33] Add OpenGL and Vulkan integration to renderdoc. --- .../include/renderdoc_api.hh | 6 ++- .../renderdoc_dynload/intern/renderdoc_api.cc | 11 ++-- source/blender/gpu/CMakeLists.txt | 1 + source/blender/gpu/opengl/gl_backend.hh | 6 +++ source/blender/gpu/opengl/gl_context.hh | 3 -- source/blender/gpu/opengl/gl_debug.cc | 15 +++++- source/blender/gpu/vulkan/vk_backend.hh | 11 ++++ source/blender/gpu/vulkan/vk_context.cc | 31 ----------- source/blender/gpu/vulkan/vk_debug.cc | 54 +++++++++++++++++++ source/blender/gpu/vulkan/vk_shader.cc | 2 +- 10 files changed, 97 insertions(+), 43 deletions(-) create mode 100644 source/blender/gpu/vulkan/vk_debug.cc diff --git a/intern/renderdoc_dynload/include/renderdoc_api.hh b/intern/renderdoc_dynload/include/renderdoc_api.hh index dc4a62a4ad3..3d58045d3b1 100644 --- a/intern/renderdoc_dynload/include/renderdoc_api.hh +++ b/intern/renderdoc_dynload/include/renderdoc_api.hh @@ -26,8 +26,10 @@ class Renderdoc { RENDERDOC_API_1_6_0 *renderdoc_api_ = nullptr; public: - void start_frame_capture(); - void end_frame_capture(); + void start_frame_capture(RENDERDOC_DevicePointer device_handle, + RENDERDOC_WindowHandle window_handle); + void end_frame_capture(RENDERDOC_DevicePointer device_handle, + RENDERDOC_WindowHandle window_handle); private: bool check_loaded(); diff --git a/intern/renderdoc_dynload/intern/renderdoc_api.cc b/intern/renderdoc_dynload/intern/renderdoc_api.cc index d20f7c649b9..cc85ba2b027 100644 --- a/intern/renderdoc_dynload/intern/renderdoc_api.cc +++ b/intern/renderdoc_dynload/intern/renderdoc_api.cc @@ -8,20 +8,23 @@ #include namespace renderdoc::api { -void Renderdoc::start_frame_capture() +void Renderdoc::start_frame_capture(RENDERDOC_DevicePointer device_handle, + RENDERDOC_WindowHandle window_handle) { if (!check_loaded()) { return; } - renderdoc_api_->StartFrameCapture(nullptr, nullptr); + renderdoc_api_->StartFrameCapture(device_handle, window_handle); } -void Renderdoc::end_frame_capture() +void Renderdoc::end_frame_capture(RENDERDOC_DevicePointer device_handle, + RENDERDOC_WindowHandle window_handle) + { if (!check_loaded()) { return; } - renderdoc_api_->EndFrameCapture(nullptr, nullptr); + renderdoc_api_->EndFrameCapture(device_handle, window_handle); } bool Renderdoc::check_loaded() diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 2f6f2339cfc..733d24fb9fb 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -197,6 +197,7 @@ set(VULKAN_SRC vulkan/vk_command_buffer.cc vulkan/vk_common.cc vulkan/vk_context.cc + vulkan/vk_debug.cc vulkan/vk_descriptor_pools.cc vulkan/vk_descriptor_set.cc vulkan/vk_drawlist.cc diff --git a/source/blender/gpu/opengl/gl_backend.hh b/source/blender/gpu/opengl/gl_backend.hh index eb9466e0e26..16f126ad6e1 100644 --- a/source/blender/gpu/opengl/gl_backend.hh +++ b/source/blender/gpu/opengl/gl_backend.hh @@ -11,6 +11,8 @@ #include "BLI_vector.hh" +#include "renderdoc_api.hh" + #include "gl_batch.hh" #include "gl_compute.hh" #include "gl_context.hh" @@ -30,6 +32,7 @@ namespace gpu { class GLBackend : public GPUBackend { private: GLSharedOrphanLists shared_orphan_list_; + renderdoc::api::Renderdoc renderdoc_; public: GLBackend() @@ -155,6 +158,9 @@ class GLBackend : public GPUBackend { void render_end(void) override{}; void render_step(void) override{}; + void debug_capture_begin(); + void debug_capture_end(); + private: static void platform_init(); static void platform_exit(); diff --git a/source/blender/gpu/opengl/gl_context.hh b/source/blender/gpu/opengl/gl_context.hh index b27bd5bc440..978337c08d3 100644 --- a/source/blender/gpu/opengl/gl_context.hh +++ b/source/blender/gpu/opengl/gl_context.hh @@ -7,8 +7,6 @@ #pragma once -#include "renderdoc_api.hh" - #include "gpu_context_private.hh" #include "GPU_framebuffer.h" @@ -97,7 +95,6 @@ class GLContext : public Context { /** #GLBackend owns this data. */ GLSharedOrphanLists &shared_orphan_list_; - renderdoc::api::Renderdoc renderdoc_; public: GLContext(void *ghost_window, GLSharedOrphanLists &shared_orphan_list); diff --git a/source/blender/gpu/opengl/gl_debug.cc b/source/blender/gpu/opengl/gl_debug.cc index 49806b0ba9b..7f375c406a6 100644 --- a/source/blender/gpu/opengl/gl_debug.cc +++ b/source/blender/gpu/opengl/gl_debug.cc @@ -19,6 +19,7 @@ #include "CLG_log.h" +#include "gl_backend.hh" #include "gl_context.hh" #include "gl_uniform_buffer.hh" @@ -382,13 +383,23 @@ void GLContext::debug_group_end() bool GLContext::debug_capture_begin() { - renderdoc_.start_frame_capture(); + GLBackend::get()->debug_capture_begin(); return true; } +void GLBackend::debug_capture_begin() +{ + renderdoc_.start_frame_capture(nullptr, nullptr); +} + void GLContext::debug_capture_end() { - renderdoc_.end_frame_capture(); + GLBackend::get()->debug_capture_end(); +} + +void GLBackend::debug_capture_end() +{ + renderdoc_.end_frame_capture(nullptr, nullptr); } void *GLContext::debug_capture_scope_create(const char * /*name*/) diff --git a/source/blender/gpu/vulkan/vk_backend.hh b/source/blender/gpu/vulkan/vk_backend.hh index 01c85792018..ca09b77b4a7 100644 --- a/source/blender/gpu/vulkan/vk_backend.hh +++ b/source/blender/gpu/vulkan/vk_backend.hh @@ -9,6 +9,8 @@ #include "gpu_backend.hh" +#include "renderdoc_api.hh" + #include "vk_common.hh" #include "shaderc/shaderc.hpp" @@ -20,6 +22,7 @@ class VKContext; class VKBackend : public GPUBackend { private: shaderc::Compiler shaderc_compiler_; + renderdoc::api::Renderdoc renderdoc_api_; public: VKBackend() @@ -59,10 +62,18 @@ class VKBackend : public GPUBackend { void render_end() override; void render_step() override; + void debug_capture_begin(VkInstance vk_instance); + void debug_capture_end(VkInstance vk_instance); + shaderc::Compiler &get_shaderc_compiler(); static void capabilities_init(VKContext &context); + static VKBackend &get() + { + return *static_cast(GPUBackend::get()); + } + private: static void init_platform(); static void platform_exit(); diff --git a/source/blender/gpu/vulkan/vk_context.cc b/source/blender/gpu/vulkan/vk_context.cc index 42b8d9fbd9a..6528a2b8aca 100644 --- a/source/blender/gpu/vulkan/vk_context.cc +++ b/source/blender/gpu/vulkan/vk_context.cc @@ -120,35 +120,4 @@ void VKContext::memory_statistics_get(int * /*total_mem*/, int * /*free_mem*/) { } -void VKContext::debug_group_begin(const char *, int) -{ -} - -void VKContext::debug_group_end() -{ -} - -bool VKContext::debug_capture_begin() -{ - return false; -} - -void VKContext::debug_capture_end() -{ -} - -void *VKContext::debug_capture_scope_create(const char * /*name*/) -{ - return nullptr; -} - -bool VKContext::debug_capture_scope_begin(void * /*scope*/) -{ - return false; -} - -void VKContext::debug_capture_scope_end(void * /*scope*/) -{ -} - } // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_debug.cc b/source/blender/gpu/vulkan/vk_debug.cc new file mode 100644 index 00000000000..5bf5acc4f73 --- /dev/null +++ b/source/blender/gpu/vulkan/vk_debug.cc @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2022 Blender Foundation. All rights reserved. */ + +/** \file + * \ingroup gpu + */ + +#include "vk_backend.hh" +#include "vk_context.hh" + +namespace blender::gpu { +void VKContext::debug_group_begin(const char *, int) +{ +} + +void VKContext::debug_group_end() +{ +} + +bool VKContext::debug_capture_begin() +{ + VKBackend::get().debug_capture_begin(vk_instance_); + return true; +} + +void VKBackend::debug_capture_begin(VkInstance vk_instance) +{ + renderdoc_api_.start_frame_capture(vk_instance, nullptr); +} + +void VKContext::debug_capture_end() +{ + VKBackend::get().debug_capture_end(vk_instance_); +} + +void VKBackend::debug_capture_end(VkInstance vk_instance) +{ + renderdoc_api_.end_frame_capture(vk_instance, nullptr); +} + +void *VKContext::debug_capture_scope_create(const char * /*name*/) +{ + return nullptr; +} + +bool VKContext::debug_capture_scope_begin(void * /*scope*/) +{ + return false; +} + +void VKContext::debug_capture_scope_end(void * /*scope*/) +{ +} +} // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_shader.cc b/source/blender/gpu/vulkan/vk_shader.cc index cd502b7d085..69d7f8014ad 100644 --- a/source/blender/gpu/vulkan/vk_shader.cc +++ b/source/blender/gpu/vulkan/vk_shader.cc @@ -542,7 +542,7 @@ Vector VKShader::compile_glsl_to_spirv(Span sources, shaderc_shader_kind stage) { std::string combined_sources = combine_sources(sources); - VKBackend &backend = static_cast(*VKBackend::get()); + VKBackend &backend = VKBackend::get(); shaderc::Compiler &compiler = backend.get_shaderc_compiler(); shaderc::CompileOptions options; options.SetOptimizationLevel(shaderc_optimization_level_performance); -- 2.30.2 From 74d1d9f1a494ed52f8c45817db563b62099b93f6 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 20 Mar 2023 15:29:21 +0100 Subject: [PATCH 18/33] Cleanup. Headers, empty lines. --- intern/renderdoc_dynload/include/renderdoc_api.hh | 7 +++++++ intern/renderdoc_dynload/intern/renderdoc_api.cc | 2 ++ source/blender/gpu/opengl/gl_context.hh | 1 - source/blender/gpu/vulkan/vk_debug.cc | 2 +- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/intern/renderdoc_dynload/include/renderdoc_api.hh b/intern/renderdoc_dynload/include/renderdoc_api.hh index 3d58045d3b1..059e25631a5 100644 --- a/intern/renderdoc_dynload/include/renderdoc_api.hh +++ b/intern/renderdoc_dynload/include/renderdoc_api.hh @@ -1,4 +1,6 @@ #pragma once +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2023 Blender Foundation. All rights reserved. */ #include "renderdoc_app.h" @@ -32,6 +34,11 @@ class Renderdoc { RENDERDOC_WindowHandle window_handle); private: + /** + * Check if renderdoc has been loaded. + * + * When not loaded it tries to load the API, but only tries to do it once. + */ bool check_loaded(); void load(); }; diff --git a/intern/renderdoc_dynload/intern/renderdoc_api.cc b/intern/renderdoc_dynload/intern/renderdoc_api.cc index cc85ba2b027..2ae2c28e258 100644 --- a/intern/renderdoc_dynload/intern/renderdoc_api.cc +++ b/intern/renderdoc_dynload/intern/renderdoc_api.cc @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2023 Blender Foundation. All rights reserved. */ #include "renderdoc_api.hh" diff --git a/source/blender/gpu/opengl/gl_context.hh b/source/blender/gpu/opengl/gl_context.hh index 978337c08d3..2d19bc9a10d 100644 --- a/source/blender/gpu/opengl/gl_context.hh +++ b/source/blender/gpu/opengl/gl_context.hh @@ -95,7 +95,6 @@ class GLContext : public Context { /** #GLBackend owns this data. */ GLSharedOrphanLists &shared_orphan_list_; - public: GLContext(void *ghost_window, GLSharedOrphanLists &shared_orphan_list); ~GLContext(); diff --git a/source/blender/gpu/vulkan/vk_debug.cc b/source/blender/gpu/vulkan/vk_debug.cc index 5bf5acc4f73..0f434553465 100644 --- a/source/blender/gpu/vulkan/vk_debug.cc +++ b/source/blender/gpu/vulkan/vk_debug.cc @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-or-later - * Copyright 2022 Blender Foundation. All rights reserved. */ + * Copyright 2023 Blender Foundation. All rights reserved. */ /** \file * \ingroup gpu -- 2.30.2 From ce34c913cd355189810e490eb4deb7fc842f27dc Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 20 Mar 2023 16:01:19 +0100 Subject: [PATCH 19/33] Put renderdoc integration behind `WITH_RENDERDOC` compile option. --- CMakeLists.txt | 2 ++ intern/CMakeLists.txt | 2 +- source/blender/gpu/CMakeLists.txt | 16 ++++++++++++---- source/blender/gpu/opengl/gl_backend.hh | 6 +++++- source/blender/gpu/opengl/gl_debug.cc | 4 ++++ source/blender/gpu/vulkan/vk_backend.hh | 6 +++++- source/blender/gpu/vulkan/vk_debug.cc | 8 ++++++++ 7 files changed, 37 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 86785737b11..1feb27f8356 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -617,10 +617,12 @@ endif() option(WITH_OPENGL "When off limits visibility of the opengl headers to just bf_gpu and gawain (temporary option for development purposes)" ON) option(WITH_GPU_BUILDTIME_SHADER_BUILDER "Shader builder is a developer option enabling linting on GLSL during compilation" OFF) +option(WITH_RENDERDOC "Use Renderdoc API to capture frames" OFF) mark_as_advanced( WITH_OPENGL WITH_GPU_BUILDTIME_SHADER_BUILDER + WITH_RENDERDOC ) # Vulkan diff --git a/intern/CMakeLists.txt b/intern/CMakeLists.txt index c9df16a481b..7a9593c6222 100644 --- a/intern/CMakeLists.txt +++ b/intern/CMakeLists.txt @@ -67,7 +67,7 @@ if(UNIX AND NOT APPLE) add_subdirectory(libc_compat) endif() -if (NOT APPLE) +if (WITH_RENDERDOC) add_subdirectory(renderdoc_dynload) endif() diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 733d24fb9fb..7b9f9e44dce 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -40,10 +40,16 @@ set(INC ../../../intern/ghost ../../../intern/guardedalloc ../../../intern/mantaflow/extern - ../../../extern/renderdoc/include - ../../../intern/renderdoc_dynload/include ) +if(WITH_RENDERDOC) + list(APPEND INC + ../../../extern/renderdoc/include + ../../../intern/renderdoc_dynload/include + ) + add_definitions(-DWITH_RENDERDOC) +endif() + set(INC_SYS ${Epoxy_INCLUDE_DIRS} ) @@ -739,13 +745,16 @@ target_link_libraries(bf_gpu PUBLIC bf_compositor_shaders bf_draw_shaders bf_gpu_shaders - bf_intern_renderdoc_dynload ) if(WITH_OPENCOLORIO) target_link_libraries(bf_gpu PUBLIC bf_ocio_shaders) endif() +if(WITH_RENDERDOC) + target_link_libraries(bf_gpu PUBLIC bf_intern_renderdoc_dynload) +endif() + if(CXX_WARN_NO_SUGGEST_OVERRIDE) target_compile_options(bf_gpu PRIVATE $<$:-Wsuggest-override>) @@ -784,7 +793,6 @@ if(WITH_GPU_BUILDTIME_SHADER_BUILDER) bf_intern_clog bf_blenlib bf_intern_ghost - bf_intern_renderdoc_dynload ${PLATFORM_LINKLIBS} ) target_include_directories(shader_builder PRIVATE ${INC} ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/source/blender/gpu/opengl/gl_backend.hh b/source/blender/gpu/opengl/gl_backend.hh index 16f126ad6e1..d9ed0fa021f 100644 --- a/source/blender/gpu/opengl/gl_backend.hh +++ b/source/blender/gpu/opengl/gl_backend.hh @@ -11,7 +11,9 @@ #include "BLI_vector.hh" -#include "renderdoc_api.hh" +#ifdef WITH_RENDERDOC +# include "renderdoc_api.hh" +#endif #include "gl_batch.hh" #include "gl_compute.hh" @@ -32,7 +34,9 @@ namespace gpu { class GLBackend : public GPUBackend { private: GLSharedOrphanLists shared_orphan_list_; + #ifdef WITH_RENDERDOC renderdoc::api::Renderdoc renderdoc_; + #endif public: GLBackend() diff --git a/source/blender/gpu/opengl/gl_debug.cc b/source/blender/gpu/opengl/gl_debug.cc index 7f375c406a6..04862edbfaa 100644 --- a/source/blender/gpu/opengl/gl_debug.cc +++ b/source/blender/gpu/opengl/gl_debug.cc @@ -389,7 +389,9 @@ bool GLContext::debug_capture_begin() void GLBackend::debug_capture_begin() { +#ifdef WITH_RENDERDOC renderdoc_.start_frame_capture(nullptr, nullptr); +#endif } void GLContext::debug_capture_end() @@ -399,7 +401,9 @@ void GLContext::debug_capture_end() void GLBackend::debug_capture_end() { +#ifdef WITH_RENDERDOC renderdoc_.end_frame_capture(nullptr, nullptr); +#endif } void *GLContext::debug_capture_scope_create(const char * /*name*/) diff --git a/source/blender/gpu/vulkan/vk_backend.hh b/source/blender/gpu/vulkan/vk_backend.hh index ca09b77b4a7..84d7961c6d3 100644 --- a/source/blender/gpu/vulkan/vk_backend.hh +++ b/source/blender/gpu/vulkan/vk_backend.hh @@ -9,7 +9,9 @@ #include "gpu_backend.hh" -#include "renderdoc_api.hh" +#ifdef WITH_RENDERDOC +# include "renderdoc_api.hh" +#endif #include "vk_common.hh" @@ -22,7 +24,9 @@ class VKContext; class VKBackend : public GPUBackend { private: shaderc::Compiler shaderc_compiler_; +#ifdef WITH_RENDERDOC renderdoc::api::Renderdoc renderdoc_api_; +#endif public: VKBackend() diff --git a/source/blender/gpu/vulkan/vk_debug.cc b/source/blender/gpu/vulkan/vk_debug.cc index 0f434553465..45dcc353380 100644 --- a/source/blender/gpu/vulkan/vk_debug.cc +++ b/source/blender/gpu/vulkan/vk_debug.cc @@ -25,7 +25,11 @@ bool VKContext::debug_capture_begin() void VKBackend::debug_capture_begin(VkInstance vk_instance) { +#ifdef WITH_RENDERDOC renderdoc_api_.start_frame_capture(vk_instance, nullptr); +#else + UNUSED_VARS(vk_instance); +#endif } void VKContext::debug_capture_end() @@ -35,7 +39,11 @@ void VKContext::debug_capture_end() void VKBackend::debug_capture_end(VkInstance vk_instance) { +#ifdef WITH_RENDERDOC renderdoc_api_.end_frame_capture(vk_instance, nullptr); +#else + UNUSED_VARS(vk_instance); +#endif } void *VKContext::debug_capture_scope_create(const char * /*name*/) -- 2.30.2 From 7a8d733b113888662dd096265bbb140e807e6943 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Tue, 21 Mar 2023 11:34:02 +0100 Subject: [PATCH 20/33] Added test cases for framebuffer clear color. --- source/blender/gpu/CMakeLists.txt | 1 + source/blender/gpu/tests/framebuffer_test.cc | 114 +++++++++++++++++++ source/blender/gpu/vulkan/vk_framebuffer.cc | 23 +++- source/blender/gpu/vulkan/vk_framebuffer.hh | 3 + 4 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 source/blender/gpu/tests/framebuffer_test.cc diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 305f2d7f5a4..74c0a789646 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -814,6 +814,7 @@ if(WITH_GTESTS) set(TEST_SRC tests/gpu_testing.cc + tests/framebuffer_test.cc tests/index_buffer_test.cc tests/push_constants_test.cc tests/shader_test.cc diff --git a/source/blender/gpu/tests/framebuffer_test.cc b/source/blender/gpu/tests/framebuffer_test.cc new file mode 100644 index 00000000000..a2313764108 --- /dev/null +++ b/source/blender/gpu/tests/framebuffer_test.cc @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +#include "testing/testing.h" + +#include "GPU_framebuffer.h" +#include "gpu_testing.hh" + +#include "BLI_math_vector.hh" + +namespace blender::gpu::tests { + +static void test_framebuffer_clear_color_single_attachment() +{ + const int2 size(10, 10); + eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; + GPUTexture *texture = GPU_texture_create_2d( + __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); + + GPUFrameBuffer *framebuffer = GPU_framebuffer_create(__func__); + GPU_framebuffer_ensure_config(&framebuffer, + {GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(texture)}); + GPU_framebuffer_bind(framebuffer); + + const float4 clear_color(0.1f, 0.2f, 0.5f, 1.0f); + GPU_framebuffer_clear_color(framebuffer, clear_color); + GPU_finish(); + + float4 *read_data = static_cast(GPU_texture_read(texture, GPU_DATA_FLOAT, 0)); + for (float4 pixel_color : Span(read_data, size.x * size.y)) { + EXPECT_EQ(pixel_color, clear_color); + } + MEM_freeN(read_data); + + GPU_framebuffer_free(framebuffer); + GPU_texture_free(texture); +} +GPU_TEST(framebuffer_clear_color_single_attachment); + +static void test_framebuffer_clear_color_multiple_attachments() +{ + const int2 size(10, 10); + eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; + GPUTexture *texture1 = GPU_texture_create_2d( + __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); + GPUTexture *texture2 = GPU_texture_create_2d( + __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); + + GPUFrameBuffer *framebuffer = GPU_framebuffer_create(__func__); + GPU_framebuffer_ensure_config( + &framebuffer, + {GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(texture1), GPU_ATTACHMENT_TEXTURE(texture2)}); + GPU_framebuffer_bind(framebuffer); + + const float4 clear_color(0.1f, 0.2f, 0.5f, 1.0f); + GPU_framebuffer_clear_color(framebuffer, clear_color); + GPU_finish(); + + float4 *read_data1 = static_cast(GPU_texture_read(texture1, GPU_DATA_FLOAT, 0)); + for (float4 pixel_color : Span(read_data1, size.x * size.y)) { + EXPECT_EQ(pixel_color, clear_color); + } + MEM_freeN(read_data1); + + float4 *read_data2 = static_cast(GPU_texture_read(texture2, GPU_DATA_FLOAT, 0)); + for (float4 pixel_color : Span(read_data1, size.x * size.y)) { + EXPECT_EQ(pixel_color, clear_color); + } + MEM_freeN(read_data2); + + GPU_framebuffer_free(framebuffer); + GPU_texture_free(texture1); + GPU_texture_free(texture2); +} +GPU_TEST(framebuffer_clear_color_multiple_attachments); + +static void test_framebuffer_clear_multiple_color_multiple_attachments() +{ + const int2 size(10, 10); + eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; + GPUTexture *texture1 = GPU_texture_create_2d( + __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); + GPUTexture *texture2 = GPU_texture_create_2d( + __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); + + GPUFrameBuffer *framebuffer = GPU_framebuffer_create(__func__); + GPU_framebuffer_ensure_config( + &framebuffer, + {GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(texture1), GPU_ATTACHMENT_TEXTURE(texture2)}); + GPU_framebuffer_bind(framebuffer); + + const float4 clear_color[2] = {float4(0.1f, 0.2f, 0.5f, 1.0f), float4(0.5f, 0.2f, 0.1f, 1.0f)}; + GPU_framebuffer_multi_clear( + framebuffer, static_cast(static_cast(clear_color))); + GPU_finish(); + + float4 *read_data1 = static_cast(GPU_texture_read(texture1, GPU_DATA_FLOAT, 0)); + for (float4 pixel_color : Span(read_data1, size.x * size.y)) { + EXPECT_EQ(pixel_color, clear_color[0]); + } + MEM_freeN(read_data1); + + float4 *read_data2 = static_cast(GPU_texture_read(texture2, GPU_DATA_FLOAT, 0)); + for (float4 pixel_color : Span(read_data1, size.x * size.y)) { + EXPECT_EQ(pixel_color, clear_color[1]); + } + MEM_freeN(read_data2); + + GPU_framebuffer_free(framebuffer); + GPU_texture_free(texture1); + GPU_texture_free(texture2); +} +GPU_TEST(framebuffer_clear_multiple_color_multiple_attachments); + +} // namespace blender::gpu::tests diff --git a/source/blender/gpu/vulkan/vk_framebuffer.cc b/source/blender/gpu/vulkan/vk_framebuffer.cc index 70bd5f0fbaf..2242dd97e5c 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.cc +++ b/source/blender/gpu/vulkan/vk_framebuffer.cc @@ -62,7 +62,7 @@ void VKFrameBuffer::clear(eGPUFrameBufferBits /*buffers*/, { } -void VKFrameBuffer::clear_multi(const float (*/*clear_col*/)[4]) +void VKFrameBuffer::clear_multi(const float (* /*clear_col*/)[4]) { } @@ -96,4 +96,25 @@ void VKFrameBuffer::blit_to(eGPUFrameBufferBits /*planes*/, { } +void VKFrameBuffer::update_attachments() +{ + if (!dirty_attachments_) { + return; + } + /* + remove_all_attachments(); + Vec attachment_descriptors; + + for (GPUAttachmentType type = GPU_FB_MAX_ATTACHMENT - 1; type >= 0; --type) { + GPUAttachment &attachment = attachments_[type]; + switch (type) { + case GPU_FB_DEPTH_ATTACHMENT: + case GPU_FB_DEPTH_STENCIL_ATTACHMENT: + } + } + */ + + dirty_attachments_ = false; +} + } // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_framebuffer.hh b/source/blender/gpu/vulkan/vk_framebuffer.hh index aed52304030..74f33b2c576 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.hh +++ b/source/blender/gpu/vulkan/vk_framebuffer.hh @@ -71,6 +71,9 @@ class VKFrameBuffer : public FrameBuffer { int dst_slot, int dst_offset_x, int dst_offset_y) override; + + private: + void update_attachments(); }; } // namespace blender::gpu -- 2.30.2 From 4e447a9dfcdf9a1f323b13118f32cb1451f86c59 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Tue, 21 Mar 2023 13:41:35 +0100 Subject: [PATCH 21/33] WIP. Some house keeping. --- .../blender/gpu/vulkan/vk_command_buffer.cc | 6 ++ .../blender/gpu/vulkan/vk_command_buffer.hh | 1 + source/blender/gpu/vulkan/vk_framebuffer.cc | 66 +++++++++++++++++-- source/blender/gpu/vulkan/vk_framebuffer.hh | 2 + 4 files changed, 71 insertions(+), 4 deletions(-) diff --git a/source/blender/gpu/vulkan/vk_command_buffer.cc b/source/blender/gpu/vulkan/vk_command_buffer.cc index 6b71031d6b7..38531178c07 100644 --- a/source/blender/gpu/vulkan/vk_command_buffer.cc +++ b/source/blender/gpu/vulkan/vk_command_buffer.cc @@ -127,6 +127,12 @@ void VKCommandBuffer::clear(VkImage vk_image, ranges.data()); } +void VKCommandBuffer::clear(Span attachments, Span areas) +{ + vkCmdClearAttachments( + vk_command_buffer_, attachments.size(), attachments.data(), areas.size(), areas.data()); +} + void VKCommandBuffer::pipeline_barrier(VkPipelineStageFlags source_stages, VkPipelineStageFlags destination_stages) { diff --git a/source/blender/gpu/vulkan/vk_command_buffer.hh b/source/blender/gpu/vulkan/vk_command_buffer.hh index efe24468571..b448d17aacb 100644 --- a/source/blender/gpu/vulkan/vk_command_buffer.hh +++ b/source/blender/gpu/vulkan/vk_command_buffer.hh @@ -59,6 +59,7 @@ class VKCommandBuffer : NonCopyable, NonMovable { VkImageLayout vk_image_layout, const VkClearColorValue &vk_clear_color, Span ranges); + void clear(Span attachments, Span areas); void fill(VKBuffer &buffer, uint32_t data); /** diff --git a/source/blender/gpu/vulkan/vk_framebuffer.cc b/source/blender/gpu/vulkan/vk_framebuffer.cc index 2242dd97e5c..bc0c83269f1 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.cc +++ b/source/blender/gpu/vulkan/vk_framebuffer.cc @@ -6,6 +6,7 @@ */ #include "vk_framebuffer.hh" +#include "vk_texture.hh" namespace blender::gpu { @@ -48,6 +49,10 @@ VKFrameBuffer::~VKFrameBuffer() void VKFrameBuffer::bind(bool /*enabled_srgb*/) { + update_attachments(); + + // VKContext &context = *VKContext::get(); + // context.framebuffer_bind(*this); } bool VKFrameBuffer::check(char /*err_out*/[256]) @@ -55,11 +60,48 @@ bool VKFrameBuffer::check(char /*err_out*/[256]) return false; } -void VKFrameBuffer::clear(eGPUFrameBufferBits /*buffers*/, - const float /*clear_col*/[4], - float /*clear_depth*/, - uint /*clear_stencil*/) +void VKFrameBuffer::clear(eGPUFrameBufferBits buffers, + const float clear_col[4], + float clear_depth, + uint clear_stencil) { + Vector clear_attachments; + + if (buffers & (GPU_DEPTH_BIT | GPU_STENCIL_BIT)) { + VkClearAttachment clear_attachment = {}; + clear_attachment.aspectMask = (buffers & GPU_DEPTH_BIT ? VK_IMAGE_ASPECT_DEPTH_BIT : 0) | + (buffers & GPU_STENCIL_BIT ? VK_IMAGE_ASPECT_STENCIL_BIT : 0); + clear_attachment.clearValue.depthStencil.depth = clear_depth; + clear_attachment.clearValue.depthStencil.stencil = clear_stencil; + clear_attachments.append(clear_attachment); + } + + if (buffers & GPU_COLOR_BIT) { + for (int color_slot = 0; color_slot < GPU_FB_MAX_COLOR_ATTACHMENT; color_slot++) { + GPUAttachment &attachment = attachments_[GPU_FB_COLOR_ATTACHMENT0 + color_slot]; + if (attachment.tex == nullptr) { + continue; + } + VkClearAttachment clear_attachment = {}; + clear_attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + clear_attachment.colorAttachment = color_slot; + copy_v4_v4(clear_attachment.clearValue.color.float32, clear_col); + clear_attachments.append(clear_attachment); + } + } + + VkClearRect clear_rect = {}; + /* Extract to function? I expect I need this multiple times. */ + clear_rect.rect.offset.x = 1; + clear_rect.rect.offset.y = 1; + clear_rect.rect.extent.width = width_; + clear_rect.rect.extent.height = height_; + clear_rect.baseArrayLayer = 0; + clear_rect.layerCount = 1; + + VKContext &context = *VKContext::get(); + VKCommandBuffer &command_buffer = context.command_buffer_get(); + command_buffer.clear(clear_attachments, Span(&clear_rect, 1)); } void VKFrameBuffer::clear_multi(const float (* /*clear_col*/)[4]) @@ -98,9 +140,16 @@ void VKFrameBuffer::blit_to(eGPUFrameBufferBits /*planes*/, void VKFrameBuffer::update_attachments() { + if (immutable_) { + return; + } if (!dirty_attachments_) { return; } + + render_pass_free(); + render_pass_create(); + /* remove_all_attachments(); Vec attachment_descriptors; @@ -116,5 +165,14 @@ void VKFrameBuffer::update_attachments() dirty_attachments_ = false; } +void VKFrameBuffer::render_pass_create() +{ + BLI_assert(!immutable_); +} + +void VKFrameBuffer::render_pass_free() +{ + BLI_assert(!immutable_); +} } // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_framebuffer.hh b/source/blender/gpu/vulkan/vk_framebuffer.hh index 74f33b2c576..dc905316dce 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.hh +++ b/source/blender/gpu/vulkan/vk_framebuffer.hh @@ -74,6 +74,8 @@ class VKFrameBuffer : public FrameBuffer { private: void update_attachments(); + void render_pass_free(); + void render_pass_create(); }; } // namespace blender::gpu -- 2.30.2 From 7dcb4503422d1973b12406ee0750668e4c657307 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Tue, 21 Mar 2023 15:42:49 +0100 Subject: [PATCH 22/33] Binding textures + framebuffer. --- source/blender/gpu/tests/framebuffer_test.cc | 10 +- .../blender/gpu/vulkan/vk_command_buffer.cc | 13 ++ .../blender/gpu/vulkan/vk_command_buffer.hh | 3 + source/blender/gpu/vulkan/vk_framebuffer.cc | 124 +++++++++++++++--- source/blender/gpu/vulkan/vk_framebuffer.hh | 2 +- source/blender/gpu/vulkan/vk_texture.cc | 9 ++ source/blender/gpu/vulkan/vk_texture.hh | 2 + 7 files changed, 139 insertions(+), 24 deletions(-) diff --git a/source/blender/gpu/tests/framebuffer_test.cc b/source/blender/gpu/tests/framebuffer_test.cc index a2313764108..1633bd9354a 100644 --- a/source/blender/gpu/tests/framebuffer_test.cc +++ b/source/blender/gpu/tests/framebuffer_test.cc @@ -14,7 +14,7 @@ static void test_framebuffer_clear_color_single_attachment() const int2 size(10, 10); eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; GPUTexture *texture = GPU_texture_create_2d( - __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); + __func__, UNPACK2(size), 1, GPU_RGBA16F, usage, nullptr); GPUFrameBuffer *framebuffer = GPU_framebuffer_create(__func__); GPU_framebuffer_ensure_config(&framebuffer, @@ -41,9 +41,9 @@ static void test_framebuffer_clear_color_multiple_attachments() const int2 size(10, 10); eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; GPUTexture *texture1 = GPU_texture_create_2d( - __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); + __func__, UNPACK2(size), 1, GPU_RGBA16F, usage, nullptr); GPUTexture *texture2 = GPU_texture_create_2d( - __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); + __func__, UNPACK2(size), 1, GPU_RGBA16F, usage, nullptr); GPUFrameBuffer *framebuffer = GPU_framebuffer_create(__func__); GPU_framebuffer_ensure_config( @@ -78,9 +78,9 @@ static void test_framebuffer_clear_multiple_color_multiple_attachments() const int2 size(10, 10); eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; GPUTexture *texture1 = GPU_texture_create_2d( - __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); + __func__, UNPACK2(size), 1, GPU_RGBA16F, usage, nullptr); GPUTexture *texture2 = GPU_texture_create_2d( - __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); + __func__, UNPACK2(size), 1, GPU_RGBA16F, usage, nullptr); GPUFrameBuffer *framebuffer = GPU_framebuffer_create(__func__); GPU_framebuffer_ensure_config( diff --git a/source/blender/gpu/vulkan/vk_command_buffer.cc b/source/blender/gpu/vulkan/vk_command_buffer.cc index 38531178c07..ff204bc73d1 100644 --- a/source/blender/gpu/vulkan/vk_command_buffer.cc +++ b/source/blender/gpu/vulkan/vk_command_buffer.cc @@ -72,6 +72,19 @@ void VKCommandBuffer::bind(const VKDescriptorSet &descriptor_set, vk_command_buffer_, bind_point, vk_pipeline_layout, 0, 1, &vk_descriptor_set, 0, 0); } +void VKCommandBuffer::bind(const VkRenderPass vk_render_pass, + const VkFramebuffer vk_framebuffer, + VkRect2D render_area) +{ + VkRenderPassBeginInfo render_pass_begin_info{}; + render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + render_pass_begin_info.renderPass = vk_render_pass; + render_pass_begin_info.framebuffer = vk_framebuffer; + render_pass_begin_info.renderArea = render_area; + + vkCmdBeginRenderPass(vk_command_buffer_, &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); +} + void VKCommandBuffer::push_constants(const VKPushConstants &push_constants, const VkPipelineLayout vk_pipeline_layout, const VkShaderStageFlags vk_shader_stages) diff --git a/source/blender/gpu/vulkan/vk_command_buffer.hh b/source/blender/gpu/vulkan/vk_command_buffer.hh index b448d17aacb..43d5c341aa0 100644 --- a/source/blender/gpu/vulkan/vk_command_buffer.hh +++ b/source/blender/gpu/vulkan/vk_command_buffer.hh @@ -37,6 +37,9 @@ class VKCommandBuffer : NonCopyable, NonMovable { void bind(const VKDescriptorSet &descriptor_set, const VkPipelineLayout vk_pipeline_layout, VkPipelineBindPoint bind_point); + void bind(const VkRenderPass vk_render_pass, + const VkFramebuffer vk_framebuffer, + VkRect2D render_area); /** * Add a push constant command to the command buffer. * diff --git a/source/blender/gpu/vulkan/vk_framebuffer.cc b/source/blender/gpu/vulkan/vk_framebuffer.cc index bc0c83269f1..23101e538bb 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.cc +++ b/source/blender/gpu/vulkan/vk_framebuffer.cc @@ -6,6 +6,7 @@ */ #include "vk_framebuffer.hh" +#include "vk_memory.hh" #include "vk_texture.hh" namespace blender::gpu { @@ -21,7 +22,7 @@ VKFrameBuffer::VKFrameBuffer(const char *name) : FrameBuffer(name) VKFrameBuffer::VKFrameBuffer(const char *name, VkFramebuffer vk_framebuffer, - VkRenderPass /*vk_render_pass*/, + VkRenderPass vk_render_pass, VkExtent2D vk_extent) : FrameBuffer(name) { @@ -31,6 +32,7 @@ VKFrameBuffer::VKFrameBuffer(const char *name, width_ = vk_extent.width; height_ = vk_extent.height; vk_framebuffer_ = vk_framebuffer; + vk_render_pass_ = vk_render_pass; viewport_[0] = scissor_[0] = 0; viewport_[1] = scissor_[1] = 0; @@ -40,8 +42,8 @@ VKFrameBuffer::VKFrameBuffer(const char *name, VKFrameBuffer::~VKFrameBuffer() { - if (!immutable_ && vk_framebuffer_ != VK_NULL_HANDLE) { - vkDestroyFramebuffer(vk_device_, vk_framebuffer_, NULL); + if (!immutable_) { + render_pass_free(); } } @@ -51,8 +53,14 @@ void VKFrameBuffer::bind(bool /*enabled_srgb*/) { update_attachments(); - // VKContext &context = *VKContext::get(); - // context.framebuffer_bind(*this); + VKContext &context = *VKContext::get(); + VKCommandBuffer &command_buffer = context.command_buffer_get(); + VkRect2D render_area{}; + render_area.offset.x = 0; + render_area.offset.y = 0; + render_area.extent.width = width_; + render_area.extent.height = height_; + command_buffer.bind(vk_render_pass_, vk_framebuffer_, render_area); } bool VKFrameBuffer::check(char /*err_out*/[256]) @@ -150,29 +158,109 @@ void VKFrameBuffer::update_attachments() render_pass_free(); render_pass_create(); - /* - remove_all_attachments(); - Vec attachment_descriptors; - - for (GPUAttachmentType type = GPU_FB_MAX_ATTACHMENT - 1; type >= 0; --type) { - GPUAttachment &attachment = attachments_[type]; - switch (type) { - case GPU_FB_DEPTH_ATTACHMENT: - case GPU_FB_DEPTH_STENCIL_ATTACHMENT: - } - } - */ - dirty_attachments_ = false; } void VKFrameBuffer::render_pass_create() { BLI_assert(!immutable_); + BLI_assert(vk_render_pass_ == VK_NULL_HANDLE); + BLI_assert(vk_framebuffer_ == VK_NULL_HANDLE); + + VK_ALLOCATION_CALLBACKS + + /* Track first attachment for size.*/ + GPUAttachmentType first_attachment = GPU_FB_MAX_ATTACHMENT; + + Vector attachment_descriptions; + Vector color_attachments; + Vector image_views; + + for (int color_slot = 0; color_slot < GPU_FB_MAX_COLOR_ATTACHMENT; color_slot++) { + GPUAttachment &attachment = attachments_[GPU_FB_COLOR_ATTACHMENT0 + color_slot]; + if (attachment.tex == nullptr) { + continue; + } + + if (first_attachment == GPU_FB_MAX_ATTACHMENT) { + first_attachment = GPU_FB_COLOR_ATTACHMENT0 + color_slot; + } + + VKTexture &texture = *static_cast(unwrap(attachment.tex)); + /* We might want to split framebuffer and render target....*/ + texture.ensure_allocated(); + image_views.append(texture.vk_image_view_handle()); + + VkAttachmentDescription attachment_description{}; + attachment_description.format = to_vk_format(texture.format_get()); + attachment_description.samples = VK_SAMPLE_COUNT_1_BIT; + attachment_description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment_description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment_description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment_description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachment_description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + attachment_description.finalLayout = VK_IMAGE_LAYOUT_GENERAL; + attachment_descriptions.append(attachment_description); + + VkAttachmentReference color_attachment{}; + color_attachment.attachment = color_slot; + color_attachment.layout = VK_IMAGE_LAYOUT_GENERAL; + color_attachments.append(color_attachment); + } + + VkSubpassDescription subpass{}; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.colorAttachmentCount = color_attachments.size(); + subpass.pColorAttachments = color_attachments.data(); + + VkRenderPassCreateInfo render_pass_info{}; + render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + render_pass_info.attachmentCount = attachment_descriptions.size(); + render_pass_info.pAttachments = attachment_descriptions.data(); + render_pass_info.subpassCount = 1; + render_pass_info.pSubpasses = &subpass; + + VKContext &context = *VKContext::get(); + vkCreateRenderPass( + context.device_get(), &render_pass_info, vk_allocation_callbacks, &vk_render_pass_); + + if (first_attachment != GPU_FB_MAX_ATTACHMENT) { + GPUAttachment &attachment = attachments_[first_attachment]; + BLI_assert(attachment.tex); + + int size[3]; + GPU_texture_get_mipmap_size(attachment.tex, attachment.mip, size); + this->size_set(size[0], size[1]); + } + else { + this->size_set(0, 0); + } + + VkFramebufferCreateInfo framebuffer_create_info{}; + framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + framebuffer_create_info.renderPass = vk_render_pass_; + framebuffer_create_info.attachmentCount = image_views.size(); + framebuffer_create_info.pAttachments = image_views.data(); + framebuffer_create_info.width = width_; + framebuffer_create_info.height = height_; + framebuffer_create_info.layers = 1; + + vkCreateFramebuffer( + context.device_get(), &framebuffer_create_info, vk_allocation_callbacks, &vk_framebuffer_); } void VKFrameBuffer::render_pass_free() { BLI_assert(!immutable_); + if (vk_render_pass_ == VK_NULL_HANDLE) { + return; + } + VK_ALLOCATION_CALLBACKS + + VKContext &context = *VKContext::get(); + vkDestroyRenderPass(context.device_get(), vk_render_pass_, vk_allocation_callbacks); + vkDestroyFramebuffer(context.device_get(), vk_framebuffer_, vk_allocation_callbacks); + vk_render_pass_ = VK_NULL_HANDLE; + vk_framebuffer_ = VK_NULL_HANDLE; } } // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_framebuffer.hh b/source/blender/gpu/vulkan/vk_framebuffer.hh index dc905316dce..83dd5c54625 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.hh +++ b/source/blender/gpu/vulkan/vk_framebuffer.hh @@ -20,7 +20,7 @@ class VKFrameBuffer : public FrameBuffer { /* Vulkan device who created the handle. */ VkDevice vk_device_ = VK_NULL_HANDLE; /* Base render pass used for framebuffer creation. */ - VkRenderPass render_pass_ = VK_NULL_HANDLE; + VkRenderPass vk_render_pass_ = VK_NULL_HANDLE; /* Number of layers if the attachments are layered textures. */ int depth_ = 1; /** Internal frame-buffers are immutable. */ diff --git a/source/blender/gpu/vulkan/vk_texture.cc b/source/blender/gpu/vulkan/vk_texture.cc index e7cbf9c7183..f24b5ea3df4 100644 --- a/source/blender/gpu/vulkan/vk_texture.cc +++ b/source/blender/gpu/vulkan/vk_texture.cc @@ -208,6 +208,13 @@ bool VKTexture::init_internal(const GPUTexture * /*src*/, int /*mip_offset*/, in return false; } +void VKTexture::ensure_allocated() +{ + if (!is_allocated()) { + allocate(); + } +} + bool VKTexture::is_allocated() { return vk_image_ != VK_NULL_HANDLE && allocation_ != VK_NULL_HANDLE; @@ -234,6 +241,8 @@ bool VKTexture::allocate() image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT; + // TODO: this conflicts with other usages. | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; image_info.samples = VK_SAMPLE_COUNT_1_BIT; VkResult result; diff --git a/source/blender/gpu/vulkan/vk_texture.hh b/source/blender/gpu/vulkan/vk_texture.hh index 37a4b063139..d6195aaff54 100644 --- a/source/blender/gpu/vulkan/vk_texture.hh +++ b/source/blender/gpu/vulkan/vk_texture.hh @@ -50,6 +50,8 @@ class VKTexture : public Texture { return vk_image_view_; } + void ensure_allocated(); + protected: bool init_internal() override; bool init_internal(GPUVertBuf *vbo) override; -- 2.30.2 From 393100d9e592898267e4ca1365c39b7e958c83fc Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Thu, 23 Mar 2023 12:07:32 +0100 Subject: [PATCH 23/33] Add support for multi clear --- source/blender/gpu/tests/framebuffer_test.cc | 10 +- .../blender/gpu/vulkan/vk_command_buffer.cc | 19 ++-- .../blender/gpu/vulkan/vk_command_buffer.hh | 9 +- source/blender/gpu/vulkan/vk_context.cc | 42 +++++++- source/blender/gpu/vulkan/vk_context.hh | 6 ++ source/blender/gpu/vulkan/vk_framebuffer.cc | 100 +++++++++++------- source/blender/gpu/vulkan/vk_framebuffer.hh | 36 ++++++- source/blender/gpu/vulkan/vk_texture.cc | 37 +++++-- source/blender/gpu/vulkan/vk_texture.hh | 4 +- 9 files changed, 194 insertions(+), 69 deletions(-) diff --git a/source/blender/gpu/tests/framebuffer_test.cc b/source/blender/gpu/tests/framebuffer_test.cc index 1633bd9354a..a2313764108 100644 --- a/source/blender/gpu/tests/framebuffer_test.cc +++ b/source/blender/gpu/tests/framebuffer_test.cc @@ -14,7 +14,7 @@ static void test_framebuffer_clear_color_single_attachment() const int2 size(10, 10); eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; GPUTexture *texture = GPU_texture_create_2d( - __func__, UNPACK2(size), 1, GPU_RGBA16F, usage, nullptr); + __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); GPUFrameBuffer *framebuffer = GPU_framebuffer_create(__func__); GPU_framebuffer_ensure_config(&framebuffer, @@ -41,9 +41,9 @@ static void test_framebuffer_clear_color_multiple_attachments() const int2 size(10, 10); eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; GPUTexture *texture1 = GPU_texture_create_2d( - __func__, UNPACK2(size), 1, GPU_RGBA16F, usage, nullptr); + __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); GPUTexture *texture2 = GPU_texture_create_2d( - __func__, UNPACK2(size), 1, GPU_RGBA16F, usage, nullptr); + __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); GPUFrameBuffer *framebuffer = GPU_framebuffer_create(__func__); GPU_framebuffer_ensure_config( @@ -78,9 +78,9 @@ static void test_framebuffer_clear_multiple_color_multiple_attachments() const int2 size(10, 10); eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; GPUTexture *texture1 = GPU_texture_create_2d( - __func__, UNPACK2(size), 1, GPU_RGBA16F, usage, nullptr); + __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); GPUTexture *texture2 = GPU_texture_create_2d( - __func__, UNPACK2(size), 1, GPU_RGBA16F, usage, nullptr); + __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); GPUFrameBuffer *framebuffer = GPU_framebuffer_create(__func__); GPU_framebuffer_ensure_config( diff --git a/source/blender/gpu/vulkan/vk_command_buffer.cc b/source/blender/gpu/vulkan/vk_command_buffer.cc index ff204bc73d1..a381a8a5421 100644 --- a/source/blender/gpu/vulkan/vk_command_buffer.cc +++ b/source/blender/gpu/vulkan/vk_command_buffer.cc @@ -8,6 +8,7 @@ #include "vk_command_buffer.hh" #include "vk_buffer.hh" #include "vk_context.hh" +#include "vk_framebuffer.hh" #include "vk_memory.hh" #include "vk_pipeline.hh" #include "vk_texture.hh" @@ -72,19 +73,21 @@ void VKCommandBuffer::bind(const VKDescriptorSet &descriptor_set, vk_command_buffer_, bind_point, vk_pipeline_layout, 0, 1, &vk_descriptor_set, 0, 0); } -void VKCommandBuffer::bind(const VkRenderPass vk_render_pass, - const VkFramebuffer vk_framebuffer, - VkRect2D render_area) +void VKCommandBuffer::begin_render_pass(const VKFrameBuffer &framebuffer) { - VkRenderPassBeginInfo render_pass_begin_info{}; + VkRenderPassBeginInfo render_pass_begin_info = {}; render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - render_pass_begin_info.renderPass = vk_render_pass; - render_pass_begin_info.framebuffer = vk_framebuffer; - render_pass_begin_info.renderArea = render_area; - + render_pass_begin_info.renderPass = framebuffer.vk_render_pass_get(); + render_pass_begin_info.framebuffer = framebuffer.vk_framebuffer_get(); + render_pass_begin_info.renderArea = framebuffer.vk_render_area_get(); vkCmdBeginRenderPass(vk_command_buffer_, &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); } +void VKCommandBuffer::end_render_pass(const VKFrameBuffer & /*framebuffer*/) +{ + vkCmdEndRenderPass(vk_command_buffer_); +} + void VKCommandBuffer::push_constants(const VKPushConstants &push_constants, const VkPipelineLayout vk_pipeline_layout, const VkShaderStageFlags vk_shader_stages) diff --git a/source/blender/gpu/vulkan/vk_command_buffer.hh b/source/blender/gpu/vulkan/vk_command_buffer.hh index 43d5c341aa0..71d3cda5852 100644 --- a/source/blender/gpu/vulkan/vk_command_buffer.hh +++ b/source/blender/gpu/vulkan/vk_command_buffer.hh @@ -13,10 +13,11 @@ namespace blender::gpu { class VKBuffer; -class VKTexture; -class VKPushConstants; -class VKPipeline; class VKDescriptorSet; +class VKFrameBuffer; +class VKPipeline; +class VKPushConstants; +class VKTexture; /** Command buffer to keep track of the life-time of a command buffer. */ class VKCommandBuffer : NonCopyable, NonMovable { @@ -37,6 +38,8 @@ class VKCommandBuffer : NonCopyable, NonMovable { void bind(const VKDescriptorSet &descriptor_set, const VkPipelineLayout vk_pipeline_layout, VkPipelineBindPoint bind_point); + void begin_render_pass(const VKFrameBuffer &framebuffer); + void end_render_pass(const VKFrameBuffer &framebuffer); void bind(const VkRenderPass vk_render_pass, const VkFramebuffer vk_framebuffer, VkRect2D render_area); diff --git a/source/blender/gpu/vulkan/vk_context.cc b/source/blender/gpu/vulkan/vk_context.cc index 6528a2b8aca..3812bf0b9a0 100644 --- a/source/blender/gpu/vulkan/vk_context.cc +++ b/source/blender/gpu/vulkan/vk_context.cc @@ -51,7 +51,7 @@ VKContext::VKContext(void *ghost_window, void *ghost_context) VKBackend::capabilities_init(*this); /* For off-screen contexts. Default frame-buffer is empty. */ - active_fb = back_left = new VKFrameBuffer("back_left"); + back_left = new VKFrameBuffer("back_left"); } VKContext::~VKContext() @@ -71,19 +71,24 @@ void VKContext::activate() { if (ghost_window_) { VkImage image; /* TODO will be used for reading later... */ - VkFramebuffer framebuffer; + VkFramebuffer vk_framebuffer; VkRenderPass render_pass; VkExtent2D extent; uint32_t fb_id; GHOST_GetVulkanBackbuffer( - (GHOST_WindowHandle)ghost_window_, &image, &framebuffer, &render_pass, &extent, &fb_id); + (GHOST_WindowHandle)ghost_window_, &image, &vk_framebuffer, &render_pass, &extent, &fb_id); /* Recreate the gpu::VKFrameBuffer wrapper after every swap. */ + if (has_active_framebuffer()) { + deactivate_framebuffer(); + } delete back_left; - back_left = new VKFrameBuffer("back_left", framebuffer, render_pass, extent); - active_fb = back_left; + VKFrameBuffer *framebuffer = new VKFrameBuffer( + "back_left", vk_framebuffer, render_pass, extent); + back_left = framebuffer; + framebuffer->bind(false); } } @@ -113,6 +118,9 @@ void VKContext::flush() void VKContext::finish() { + if (has_active_framebuffer()) { + deactivate_framebuffer(); + } command_buffer_.submit(); } @@ -120,4 +128,28 @@ void VKContext::memory_statistics_get(int * /*total_mem*/, int * /*free_mem*/) { } +void VKContext::activate_framebuffer(VKFrameBuffer &framebuffer) +{ + if (has_active_framebuffer()) { + deactivate_framebuffer(); + } + + BLI_assert(active_fb == nullptr); + active_fb = &framebuffer; + command_buffer_.begin_render_pass(framebuffer); +} + +bool VKContext::has_active_framebuffer() const +{ + return active_fb != nullptr; +} + +void VKContext::deactivate_framebuffer() +{ + BLI_assert(active_fb != nullptr); + VKFrameBuffer *framebuffer = unwrap(active_fb); + command_buffer_.end_render_pass(*framebuffer); + active_fb = nullptr; +} + } // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_context.hh b/source/blender/gpu/vulkan/vk_context.hh index b646492faec..d5c0a03dcba 100644 --- a/source/blender/gpu/vulkan/vk_context.hh +++ b/source/blender/gpu/vulkan/vk_context.hh @@ -13,6 +13,7 @@ #include "vk_descriptor_pools.hh" namespace blender::gpu { +class VKFrameBuffer; class VKContext : public Context { private: @@ -55,6 +56,9 @@ class VKContext : public Context { bool debug_capture_scope_begin(void *scope) override; void debug_capture_scope_end(void *scope) override; + void activate_framebuffer(VKFrameBuffer &framebuffer); + void deactivate_framebuffer(); + static VKContext *get(void) { return static_cast(Context::get()); @@ -102,6 +106,8 @@ class VKContext : public Context { private: void init_physical_device_limits(); + + bool has_active_framebuffer() const; }; } // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_framebuffer.cc b/source/blender/gpu/vulkan/vk_framebuffer.cc index 23101e538bb..5b6c5edf9eb 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.cc +++ b/source/blender/gpu/vulkan/vk_framebuffer.cc @@ -54,13 +54,17 @@ void VKFrameBuffer::bind(bool /*enabled_srgb*/) update_attachments(); VKContext &context = *VKContext::get(); - VKCommandBuffer &command_buffer = context.command_buffer_get(); + context.activate_framebuffer(*this); +} + +VkRect2D VKFrameBuffer::vk_render_area_get() const +{ VkRect2D render_area{}; render_area.offset.x = 0; render_area.offset.y = 0; render_area.extent.width = width_; render_area.extent.height = height_; - command_buffer.bind(vk_render_pass_, vk_framebuffer_, render_area); + return render_area; } bool VKFrameBuffer::check(char /*err_out*/[256]) @@ -68,52 +72,74 @@ bool VKFrameBuffer::check(char /*err_out*/[256]) return false; } -void VKFrameBuffer::clear(eGPUFrameBufferBits buffers, - const float clear_col[4], - float clear_depth, - uint clear_stencil) +void VKFrameBuffer::build_clear_attachments_depth_stencil( + const eGPUFrameBufferBits buffers, + float clear_depth, + uint32_t clear_stencil, + Vector &r_attachments) const { - Vector clear_attachments; + VkClearAttachment clear_attachment = {}; + clear_attachment.aspectMask = (buffers & GPU_DEPTH_BIT ? VK_IMAGE_ASPECT_DEPTH_BIT : 0) | + (buffers & GPU_STENCIL_BIT ? VK_IMAGE_ASPECT_STENCIL_BIT : 0); + clear_attachment.clearValue.depthStencil.depth = clear_depth; + clear_attachment.clearValue.depthStencil.stencil = clear_stencil; + r_attachments.append(clear_attachment); +} - if (buffers & (GPU_DEPTH_BIT | GPU_STENCIL_BIT)) { - VkClearAttachment clear_attachment = {}; - clear_attachment.aspectMask = (buffers & GPU_DEPTH_BIT ? VK_IMAGE_ASPECT_DEPTH_BIT : 0) | - (buffers & GPU_STENCIL_BIT ? VK_IMAGE_ASPECT_STENCIL_BIT : 0); - clear_attachment.clearValue.depthStencil.depth = clear_depth; - clear_attachment.clearValue.depthStencil.stencil = clear_stencil; - clear_attachments.append(clear_attachment); - } - - if (buffers & GPU_COLOR_BIT) { - for (int color_slot = 0; color_slot < GPU_FB_MAX_COLOR_ATTACHMENT; color_slot++) { - GPUAttachment &attachment = attachments_[GPU_FB_COLOR_ATTACHMENT0 + color_slot]; - if (attachment.tex == nullptr) { - continue; - } - VkClearAttachment clear_attachment = {}; - clear_attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - clear_attachment.colorAttachment = color_slot; - copy_v4_v4(clear_attachment.clearValue.color.float32, clear_col); - clear_attachments.append(clear_attachment); +void VKFrameBuffer::build_clear_attachments_color(const float (*clear_colors)[4], + const bool multi_clear_colors, + Vector &r_attachments) const +{ + int color_index = 0; + for (int color_slot = 0; color_slot < GPU_FB_MAX_COLOR_ATTACHMENT; color_slot++) { + const GPUAttachment &attachment = attachments_[GPU_FB_COLOR_ATTACHMENT0 + color_slot]; + if (attachment.tex == nullptr) { + continue; } - } + VkClearAttachment clear_attachment = {}; + clear_attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + clear_attachment.colorAttachment = color_slot; + copy_v4_v4(clear_attachment.clearValue.color.float32, clear_colors[color_index]); + r_attachments.append(clear_attachment); + color_index += multi_clear_colors ? 1 : 0; + } +} + +void VKFrameBuffer::clear(const Vector &attachments) const +{ VkClearRect clear_rect = {}; - /* Extract to function? I expect I need this multiple times. */ - clear_rect.rect.offset.x = 1; - clear_rect.rect.offset.y = 1; - clear_rect.rect.extent.width = width_; - clear_rect.rect.extent.height = height_; + clear_rect.rect = vk_render_area_get(); clear_rect.baseArrayLayer = 0; clear_rect.layerCount = 1; VKContext &context = *VKContext::get(); VKCommandBuffer &command_buffer = context.command_buffer_get(); - command_buffer.clear(clear_attachments, Span(&clear_rect, 1)); + command_buffer.clear(attachments, Span(&clear_rect, 1)); } -void VKFrameBuffer::clear_multi(const float (* /*clear_col*/)[4]) +void VKFrameBuffer::clear(const eGPUFrameBufferBits buffers, + const float clear_color[4], + float clear_depth, + uint clear_stencil) { + Vector attachments; + if (buffers & (GPU_DEPTH_BIT | GPU_STENCIL_BIT)) { + build_clear_attachments_depth_stencil(buffers, clear_depth, clear_stencil, attachments); + } + if (buffers & GPU_COLOR_BIT) { + float clear_color_single[4]; + copy_v4_v4(clear_color_single, clear_color); + build_clear_attachments_color(&clear_color_single, false, attachments); + } + clear(attachments); +} + +void VKFrameBuffer::clear_multi(const float (*clear_color)[4]) +{ + Vector attachments; + build_clear_attachments_color(clear_color, true, attachments); + clear(attachments); } void VKFrameBuffer::clear_attachment(GPUAttachmentType /*type*/, @@ -212,7 +238,7 @@ void VKFrameBuffer::render_pass_create() subpass.colorAttachmentCount = color_attachments.size(); subpass.pColorAttachments = color_attachments.data(); - VkRenderPassCreateInfo render_pass_info{}; + VkRenderPassCreateInfo render_pass_info = {}; render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; render_pass_info.attachmentCount = attachment_descriptions.size(); render_pass_info.pAttachments = attachment_descriptions.data(); @@ -235,7 +261,7 @@ void VKFrameBuffer::render_pass_create() this->size_set(0, 0); } - VkFramebufferCreateInfo framebuffer_create_info{}; + VkFramebufferCreateInfo framebuffer_create_info = {}; framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; framebuffer_create_info.renderPass = vk_render_pass_; framebuffer_create_info.attachmentCount = image_views.size(); diff --git a/source/blender/gpu/vulkan/vk_framebuffer.hh b/source/blender/gpu/vulkan/vk_framebuffer.hh index 83dd5c54625..15ed5bb3f83 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.hh +++ b/source/blender/gpu/vulkan/vk_framebuffer.hh @@ -7,6 +7,10 @@ #pragma once +#include "BLI_math_vector.hh" +#include "BLI_span.hh" +#include "BLI_vector.hh" + #include "gpu_framebuffer_private.hh" #include "vk_common.hh" @@ -46,10 +50,10 @@ class VKFrameBuffer : public FrameBuffer { void bind(bool enabled_srgb) override; bool check(char err_out[256]) override; void clear(eGPUFrameBufferBits buffers, - const float clear_col[4], + const float clear_color[4], float clear_depth, uint clear_stencil) override; - void clear_multi(const float (*clear_col)[4]) override; + void clear_multi(const float (*clear_color)[4]) override; void clear_attachment(GPUAttachmentType type, eGPUDataFormat data_format, const void *clear_value) override; @@ -72,10 +76,38 @@ class VKFrameBuffer : public FrameBuffer { int dst_offset_x, int dst_offset_y) override; + VkFramebuffer vk_framebuffer_get() const + { + BLI_assert(vk_framebuffer_ != VK_NULL_HANDLE); + return vk_framebuffer_; + } + + VkRenderPass vk_render_pass_get() const + { + BLI_assert(vk_render_pass_ != VK_NULL_HANDLE); + return vk_render_pass_; + } + VkRect2D vk_render_area_get() const; + private: void update_attachments(); void render_pass_free(); void render_pass_create(); + + /* Clearing attachments */ + void build_clear_attachments_depth_stencil(eGPUFrameBufferBits buffers, + float clear_depth, + uint32_t clear_stencil, + Vector &r_attachments) const; + void build_clear_attachments_color(const float (*clear_colors)[4], + const bool multi_clear_colors, + Vector &r_attachments) const; + void clear(const Vector &attachments) const; }; +static inline VKFrameBuffer *unwrap(FrameBuffer *framebuffer) +{ + return static_cast(framebuffer); +} + } // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_texture.cc b/source/blender/gpu/vulkan/vk_texture.cc index f24b5ea3df4..5ccfb23efe1 100644 --- a/source/blender/gpu/vulkan/vk_texture.cc +++ b/source/blender/gpu/vulkan/vk_texture.cc @@ -215,11 +215,32 @@ void VKTexture::ensure_allocated() } } -bool VKTexture::is_allocated() +bool VKTexture::is_allocated() const { return vk_image_ != VK_NULL_HANDLE && allocation_ != VK_NULL_HANDLE; } +static VkImageUsageFlagBits to_vk_image_usage(const eGPUTextureUsage usage) +{ + VkImageUsageFlagBits result = static_cast(VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT); + if (usage & GPU_TEXTURE_USAGE_SHADER_READ) { + result = static_cast(result | VK_IMAGE_USAGE_STORAGE_BIT); + } + if (usage & GPU_TEXTURE_USAGE_SHADER_WRITE) { + result = static_cast(result | VK_IMAGE_USAGE_STORAGE_BIT); + } + if (usage & GPU_TEXTURE_USAGE_ATTACHMENT) { + /* TODO add other types of attachments based on the format. */ + result = static_cast(result | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + } + if (usage & GPU_TEXTURE_USAGE_HOST_READ) { + result = static_cast(result | VK_IMAGE_USAGE_TRANSFER_SRC_BIT); + } + + return result; +} + bool VKTexture::allocate() { BLI_assert(!is_allocated()); @@ -237,12 +258,14 @@ bool VKTexture::allocate() image_info.mipLevels = 1; image_info.arrayLayers = 1; image_info.format = to_vk_format(format_); - image_info.tiling = VK_IMAGE_TILING_LINEAR; + /* Some platforms (NVIDIA) requires that attached textures are always tiled optimal. + * + * As image data are always accessed via an staging buffer we can enable optimal tiling for all + * texture. Tilings based on actual usages should be done in `VKFramebuffer`. + */ + image_info.tiling = VK_IMAGE_TILING_OPTIMAL; image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT; - // TODO: this conflicts with other usages. | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + image_info.usage = to_vk_image_usage(gpu_image_usage_flags_); image_info.samples = VK_SAMPLE_COUNT_1_BIT; VkResult result; @@ -263,8 +286,6 @@ bool VKTexture::allocate() VmaAllocationCreateInfo allocCreateInfo = {}; allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; - allocCreateInfo.flags = static_cast( - VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT); allocCreateInfo.priority = 1.0f; result = vmaCreateImage(context.mem_allocator_get(), &image_info, diff --git a/source/blender/gpu/vulkan/vk_texture.hh b/source/blender/gpu/vulkan/vk_texture.hh index d6195aaff54..a18c8812660 100644 --- a/source/blender/gpu/vulkan/vk_texture.hh +++ b/source/blender/gpu/vulkan/vk_texture.hh @@ -43,10 +43,12 @@ class VKTexture : public Texture { void image_bind(int location); VkImage vk_image_handle() const { + BLI_assert(is_allocated()); return vk_image_; } VkImageView vk_image_view_handle() const { + BLI_assert(is_allocated()); return vk_image_view_; } @@ -59,7 +61,7 @@ class VKTexture : public Texture { private: /** Is this texture already allocated on device. */ - bool is_allocated(); + bool is_allocated() const; /** * Allocate the texture of the device. Result is `true` when texture is successfully allocated * on the device. -- 2.30.2 From 13c2286f8c144e2e814a3b0289fff267699f677b Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Thu, 23 Mar 2023 13:58:10 +0100 Subject: [PATCH 24/33] Support for clear depth. --- source/blender/gpu/tests/framebuffer_test.cc | 26 +++++++++++ source/blender/gpu/vulkan/vk_framebuffer.cc | 48 ++++++++++++++++---- source/blender/gpu/vulkan/vk_texture.cc | 14 ++++-- 3 files changed, 76 insertions(+), 12 deletions(-) diff --git a/source/blender/gpu/tests/framebuffer_test.cc b/source/blender/gpu/tests/framebuffer_test.cc index a2313764108..484906cf316 100644 --- a/source/blender/gpu/tests/framebuffer_test.cc +++ b/source/blender/gpu/tests/framebuffer_test.cc @@ -111,4 +111,30 @@ static void test_framebuffer_clear_multiple_color_multiple_attachments() } GPU_TEST(framebuffer_clear_multiple_color_multiple_attachments); +static void test_framebuffer_clear_depth() +{ + const int2 size(10, 10); + eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; + GPUTexture *texture = GPU_texture_create_2d( + __func__, UNPACK2(size), 1, GPU_DEPTH_COMPONENT32F, usage, nullptr); + + GPUFrameBuffer *framebuffer = GPU_framebuffer_create(__func__); + GPU_framebuffer_ensure_config(&framebuffer, {GPU_ATTACHMENT_TEXTURE(texture)}); + GPU_framebuffer_bind(framebuffer); + + const float clear_depth = 0.5f; + GPU_framebuffer_clear_depth(framebuffer, clear_depth); + GPU_finish(); + + float *read_data = static_cast(GPU_texture_read(texture, GPU_DATA_FLOAT, 0)); + for (float pixel_depth : Span(read_data, size.x * size.y)) { + EXPECT_EQ(pixel_depth, clear_depth); + } + MEM_freeN(read_data); + + GPU_framebuffer_free(framebuffer); + GPU_texture_free(texture); +} +GPU_TEST(framebuffer_clear_depth); + } // namespace blender::gpu::tests diff --git a/source/blender/gpu/vulkan/vk_framebuffer.cc b/source/blender/gpu/vulkan/vk_framebuffer.cc index 5b6c5edf9eb..fadf93b7948 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.cc +++ b/source/blender/gpu/vulkan/vk_framebuffer.cc @@ -200,19 +200,20 @@ void VKFrameBuffer::render_pass_create() Vector attachment_descriptions; Vector color_attachments; Vector image_views; + VkAttachmentReference depth_attachment = {}; + bool has_depth_attachment = false; - for (int color_slot = 0; color_slot < GPU_FB_MAX_COLOR_ATTACHMENT; color_slot++) { - GPUAttachment &attachment = attachments_[GPU_FB_COLOR_ATTACHMENT0 + color_slot]; + for (int type = GPU_FB_DEPTH_ATTACHMENT; type < GPU_FB_MAX_ATTACHMENT; type++) { + GPUAttachment &attachment = attachments_[type]; if (attachment.tex == nullptr) { continue; } if (first_attachment == GPU_FB_MAX_ATTACHMENT) { - first_attachment = GPU_FB_COLOR_ATTACHMENT0 + color_slot; + first_attachment = static_cast(type); } VKTexture &texture = *static_cast(unwrap(attachment.tex)); - /* We might want to split framebuffer and render target....*/ texture.ensure_allocated(); image_views.append(texture.vk_image_view_handle()); @@ -227,16 +228,46 @@ void VKFrameBuffer::render_pass_create() attachment_description.finalLayout = VK_IMAGE_LAYOUT_GENERAL; attachment_descriptions.append(attachment_description); - VkAttachmentReference color_attachment{}; - color_attachment.attachment = color_slot; - color_attachment.layout = VK_IMAGE_LAYOUT_GENERAL; - color_attachments.append(color_attachment); + /* TODO: should we also add unused attachments so the attachment reflect the internal lists.*/ + int attachment_index = attachment_descriptions.size() - 1; + + switch (type) { + case GPU_FB_DEPTH_ATTACHMENT: + BLI_assert(!has_depth_attachment); + has_depth_attachment = true; + depth_attachment.attachment = attachment_index; + depth_attachment.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + break; + + case GPU_FB_DEPTH_STENCIL_ATTACHMENT: + BLI_assert(!has_depth_attachment); + has_depth_attachment = true; + depth_attachment.attachment = attachment_index; + depth_attachment.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + break; + + case GPU_FB_COLOR_ATTACHMENT0: + case GPU_FB_COLOR_ATTACHMENT1: + case GPU_FB_COLOR_ATTACHMENT2: + case GPU_FB_COLOR_ATTACHMENT3: + case GPU_FB_COLOR_ATTACHMENT4: + case GPU_FB_COLOR_ATTACHMENT5: + case GPU_FB_COLOR_ATTACHMENT6: + case GPU_FB_COLOR_ATTACHMENT7: + VkAttachmentReference color_attachment{}; + color_attachment.attachment = attachment_index; + color_attachment.layout = VK_IMAGE_LAYOUT_GENERAL; + color_attachments.append(color_attachment); + } } VkSubpassDescription subpass{}; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass.colorAttachmentCount = color_attachments.size(); subpass.pColorAttachments = color_attachments.data(); + if (has_depth_attachment) { + subpass.pDepthStencilAttachment = &depth_attachment; + } VkRenderPassCreateInfo render_pass_info = {}; render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; @@ -261,6 +292,7 @@ void VKFrameBuffer::render_pass_create() this->size_set(0, 0); } + /* We might want to split framebuffer and render target....*/ VkFramebufferCreateInfo framebuffer_create_info = {}; framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; framebuffer_create_info.renderPass = vk_render_pass_; diff --git a/source/blender/gpu/vulkan/vk_texture.cc b/source/blender/gpu/vulkan/vk_texture.cc index 5ccfb23efe1..2e023ddcf7c 100644 --- a/source/blender/gpu/vulkan/vk_texture.cc +++ b/source/blender/gpu/vulkan/vk_texture.cc @@ -220,7 +220,8 @@ bool VKTexture::is_allocated() const return vk_image_ != VK_NULL_HANDLE && allocation_ != VK_NULL_HANDLE; } -static VkImageUsageFlagBits to_vk_image_usage(const eGPUTextureUsage usage) +static VkImageUsageFlagBits to_vk_image_usage(const eGPUTextureUsage usage, + const eGPUFrameBufferBits framebuffer_bits) { VkImageUsageFlagBits result = static_cast(VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); @@ -231,8 +232,13 @@ static VkImageUsageFlagBits to_vk_image_usage(const eGPUTextureUsage usage) result = static_cast(result | VK_IMAGE_USAGE_STORAGE_BIT); } if (usage & GPU_TEXTURE_USAGE_ATTACHMENT) { - /* TODO add other types of attachments based on the format. */ - result = static_cast(result | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + if (framebuffer_bits & GPU_COLOR_BIT) { + result = static_cast(result | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + } + if (framebuffer_bits & (GPU_DEPTH_BIT | GPU_STENCIL_BIT)) { + result = static_cast(result | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); + } } if (usage & GPU_TEXTURE_USAGE_HOST_READ) { result = static_cast(result | VK_IMAGE_USAGE_TRANSFER_SRC_BIT); @@ -265,7 +271,7 @@ bool VKTexture::allocate() */ image_info.tiling = VK_IMAGE_TILING_OPTIMAL; image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_info.usage = to_vk_image_usage(gpu_image_usage_flags_); + image_info.usage = to_vk_image_usage(gpu_image_usage_flags_, to_framebuffer_bits(format_)); image_info.samples = VK_SAMPLE_COUNT_1_BIT; VkResult result; -- 2.30.2 From 632494cf0b3defb80dd14d0589ae88e6ac39f74f Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Thu, 23 Mar 2023 14:41:33 +0100 Subject: [PATCH 25/33] Fix test cases. --- source/blender/gpu/vulkan/vk_framebuffer.cc | 10 ++++------ source/blender/gpu/vulkan/vk_texture.cc | 20 +++++++++++++------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/source/blender/gpu/vulkan/vk_framebuffer.cc b/source/blender/gpu/vulkan/vk_framebuffer.cc index fadf93b7948..3401adb9a74 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.cc +++ b/source/blender/gpu/vulkan/vk_framebuffer.cc @@ -146,6 +146,10 @@ void VKFrameBuffer::clear_attachment(GPUAttachmentType /*type*/, eGPUDataFormat /*data_format*/, const void * /*clear_value*/) { + /* Clearing of a single attachment was added to implement `clear_multi` in OpenGL. As + * `clear_multi` is supported in Vulkan it isn't needed to implement this method. + */ + BLI_assert_unreachable(); } void VKFrameBuffer::attachment_set_loadstore_op(GPUAttachmentType /*type*/, @@ -233,12 +237,6 @@ void VKFrameBuffer::render_pass_create() switch (type) { case GPU_FB_DEPTH_ATTACHMENT: - BLI_assert(!has_depth_attachment); - has_depth_attachment = true; - depth_attachment.attachment = attachment_index; - depth_attachment.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - break; - case GPU_FB_DEPTH_STENCIL_ATTACHMENT: BLI_assert(!has_depth_attachment); has_depth_attachment = true; diff --git a/source/blender/gpu/vulkan/vk_texture.cc b/source/blender/gpu/vulkan/vk_texture.cc index 2e023ddcf7c..5b5b1d83bc0 100644 --- a/source/blender/gpu/vulkan/vk_texture.cc +++ b/source/blender/gpu/vulkan/vk_texture.cc @@ -221,7 +221,7 @@ bool VKTexture::is_allocated() const } static VkImageUsageFlagBits to_vk_image_usage(const eGPUTextureUsage usage, - const eGPUFrameBufferBits framebuffer_bits) + const eGPUTextureFormatFlag format_flag) { VkImageUsageFlagBits result = static_cast(VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); @@ -232,12 +232,18 @@ static VkImageUsageFlagBits to_vk_image_usage(const eGPUTextureUsage usage, result = static_cast(result | VK_IMAGE_USAGE_STORAGE_BIT); } if (usage & GPU_TEXTURE_USAGE_ATTACHMENT) { - if (framebuffer_bits & GPU_COLOR_BIT) { - result = static_cast(result | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + if (format_flag & (GPU_FORMAT_NORMALIZED_INTEGER | GPU_FORMAT_COMPRESSED)) { + /* These formats aren't supported as an attachment. When using GPU_TEXTURE_USAGE_DEFAULT they + * are still being evaluated to be attachable. So we need to skip them.*/ } - if (framebuffer_bits & (GPU_DEPTH_BIT | GPU_STENCIL_BIT)) { - result = static_cast(result | - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); + else { + if (format_flag & (GPU_FORMAT_DEPTH | GPU_FORMAT_STENCIL)) { + result = static_cast(result | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT); + } + else { + result = static_cast(result | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + } } } if (usage & GPU_TEXTURE_USAGE_HOST_READ) { @@ -271,7 +277,7 @@ bool VKTexture::allocate() */ image_info.tiling = VK_IMAGE_TILING_OPTIMAL; image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_info.usage = to_vk_image_usage(gpu_image_usage_flags_, to_framebuffer_bits(format_)); + image_info.usage = to_vk_image_usage(gpu_image_usage_flags_, format_flag_); image_info.samples = VK_SAMPLE_COUNT_1_BIT; VkResult result; -- 2.30.2 From 4d0707ab665f92febaa444ca19625c90e1ae0b3c Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Fri, 24 Mar 2023 08:03:08 +0100 Subject: [PATCH 26/33] Fix failed merge. --- source/blender/gpu/opengl/gl_debug.cc | 11 ----------- source/blender/gpu/vulkan/vk_context.cc | 3 --- 2 files changed, 14 deletions(-) diff --git a/source/blender/gpu/opengl/gl_debug.cc b/source/blender/gpu/opengl/gl_debug.cc index a4930f4387a..51433ec9948 100644 --- a/source/blender/gpu/opengl/gl_debug.cc +++ b/source/blender/gpu/opengl/gl_debug.cc @@ -383,16 +383,6 @@ void GLContext::debug_group_end() bool GLContext::debug_capture_begin() { -<<<<<<< HEAD - GLBackend::get()->debug_capture_begin(); - return true; -} - -void GLBackend::debug_capture_begin() -{ -#ifdef WITH_RENDERDOC - renderdoc_.start_frame_capture(nullptr, nullptr); -======= return GLBackend::get()->debug_capture_begin(); } @@ -402,7 +392,6 @@ bool GLBackend::debug_capture_begin() return renderdoc_.start_frame_capture(nullptr, nullptr); #else return false; ->>>>>>> main #endif } diff --git a/source/blender/gpu/vulkan/vk_context.cc b/source/blender/gpu/vulkan/vk_context.cc index 0d41f5387d2..3812bf0b9a0 100644 --- a/source/blender/gpu/vulkan/vk_context.cc +++ b/source/blender/gpu/vulkan/vk_context.cc @@ -128,7 +128,6 @@ void VKContext::memory_statistics_get(int * /*total_mem*/, int * /*free_mem*/) { } -<<<<<<< HEAD void VKContext::activate_framebuffer(VKFrameBuffer &framebuffer) { if (has_active_framebuffer()) { @@ -153,6 +152,4 @@ void VKContext::deactivate_framebuffer() active_fb = nullptr; } -======= ->>>>>>> main } // namespace blender::gpu -- 2.30.2 From d8ebd6440b15ed485ac422ab1ce599e54d6140e0 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Fri, 24 Mar 2023 08:18:48 +0100 Subject: [PATCH 27/33] Cleanup --- source/blender/gpu/vulkan/vk_command_buffer.hh | 8 +++++--- source/blender/gpu/vulkan/vk_framebuffer.cc | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/source/blender/gpu/vulkan/vk_command_buffer.hh b/source/blender/gpu/vulkan/vk_command_buffer.hh index 025b4949ec4..4ea87af7b14 100644 --- a/source/blender/gpu/vulkan/vk_command_buffer.hh +++ b/source/blender/gpu/vulkan/vk_command_buffer.hh @@ -42,9 +42,7 @@ class VKCommandBuffer : NonCopyable, NonMovable { VkPipelineBindPoint bind_point); void begin_render_pass(const VKFrameBuffer &framebuffer); void end_render_pass(const VKFrameBuffer &framebuffer); - void bind(const VkRenderPass vk_render_pass, - const VkFramebuffer vk_framebuffer, - VkRect2D render_area); + /** * Add a push constant command to the command buffer. * @@ -67,6 +65,10 @@ class VKCommandBuffer : NonCopyable, NonMovable { VkImageLayout vk_image_layout, const VkClearColorValue &vk_clear_color, Span ranges); + + /** + * Clear attachments of the active framebuffer. + */ void clear(Span attachments, Span areas); void fill(VKBuffer &buffer, uint32_t data); diff --git a/source/blender/gpu/vulkan/vk_framebuffer.cc b/source/blender/gpu/vulkan/vk_framebuffer.cc index 3401adb9a74..b1741c61372 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.cc +++ b/source/blender/gpu/vulkan/vk_framebuffer.cc @@ -59,7 +59,7 @@ void VKFrameBuffer::bind(bool /*enabled_srgb*/) VkRect2D VKFrameBuffer::vk_render_area_get() const { - VkRect2D render_area{}; + VkRect2D render_area = {}; render_area.offset.x = 0; render_area.offset.y = 0; render_area.extent.width = width_; -- 2.30.2 From 90263c4163231d5b4cc712d9b3d974f1a3a18218 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Fri, 24 Mar 2023 14:10:38 +0100 Subject: [PATCH 28/33] Convert texture to transfer optimized during read back. --- .../blender/gpu/vulkan/vk_command_buffer.cc | 4 +- source/blender/gpu/vulkan/vk_framebuffer.cc | 187 ++++++++++++------ source/blender/gpu/vulkan/vk_texture.cc | 49 ++++- source/blender/gpu/vulkan/vk_texture.hh | 37 ++++ 4 files changed, 200 insertions(+), 77 deletions(-) diff --git a/source/blender/gpu/vulkan/vk_command_buffer.cc b/source/blender/gpu/vulkan/vk_command_buffer.cc index 007f509e288..e41dd71df0a 100644 --- a/source/blender/gpu/vulkan/vk_command_buffer.cc +++ b/source/blender/gpu/vulkan/vk_command_buffer.cc @@ -114,7 +114,7 @@ void VKCommandBuffer::copy(VKBuffer &dst_buffer, { vkCmdCopyImageToBuffer(vk_command_buffer_, src_texture.vk_image_handle(), - VK_IMAGE_LAYOUT_GENERAL, + src_texture.current_layout_get(), dst_buffer.vk_handle(), regions.size(), regions.data()); @@ -126,7 +126,7 @@ void VKCommandBuffer::copy(VKTexture &dst_texture, vkCmdCopyBufferToImage(vk_command_buffer_, src_buffer.vk_handle(), dst_texture.vk_image_handle(), - VK_IMAGE_LAYOUT_GENERAL, + dst_texture.current_layout_get(), regions.size(), regions.data()); } diff --git a/source/blender/gpu/vulkan/vk_framebuffer.cc b/source/blender/gpu/vulkan/vk_framebuffer.cc index b1741c61372..51d1f60757f 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.cc +++ b/source/blender/gpu/vulkan/vk_framebuffer.cc @@ -60,10 +60,23 @@ void VKFrameBuffer::bind(bool /*enabled_srgb*/) VkRect2D VKFrameBuffer::vk_render_area_get() const { VkRect2D render_area = {}; - render_area.offset.x = 0; - render_area.offset.y = 0; - render_area.extent.width = width_; - render_area.extent.height = height_; + int render_rect[4]; + viewport_get(render_rect); + if (scissor_test_get()) { + int scissor_rect[4]; + int viewport_rect[4]; + copy_v4_v4_int(viewport_rect, render_rect); + scissor_get(scissor_rect); + int2 scissor_offset_delta = int2(scissor_rect) - int2(viewport_rect); + render_rect[0] = max_ii(viewport_rect[0], scissor_rect[0]); + render_rect[1] = max_ii(viewport_rect[1], scissor_rect[1]); + render_rect[2] = min_ii(viewport_rect[2] - scissor_offset_delta[0], scissor_rect[2]); + render_rect[3] = min_ii(viewport_rect[3] - scissor_offset_delta[1], scissor_rect[3]); + } + render_area.offset.x = render_rect[0]; + render_area.offset.y = render_rect[1]; + render_area.extent.width = render_rect[2]; + render_area.extent.height = render_rect[3]; return render_area; } @@ -106,6 +119,10 @@ void VKFrameBuffer::build_clear_attachments_color(const float (*clear_colors)[4] } } +/* -------------------------------------------------------------------- */ +/** \name Clear + * \{ */ + void VKFrameBuffer::clear(const Vector &attachments) const { VkClearRect clear_rect = {}; @@ -152,12 +169,24 @@ void VKFrameBuffer::clear_attachment(GPUAttachmentType /*type*/, BLI_assert_unreachable(); } +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Load/Store operations + * \{ */ + void VKFrameBuffer::attachment_set_loadstore_op(GPUAttachmentType /*type*/, eGPULoadOp /*load_action*/, eGPUStoreOp /*store_action*/) { } +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Read back + * \{ */ + void VKFrameBuffer::read(eGPUFrameBufferBits /*planes*/, eGPUDataFormat /*format*/, const int /*area*/[4], @@ -167,6 +196,12 @@ void VKFrameBuffer::read(eGPUFrameBufferBits /*planes*/, { } +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Blit operations + * \{ */ + void VKFrameBuffer::blit_to(eGPUFrameBufferBits /*planes*/, int /*src_slot*/, FrameBuffer * /*dst*/, @@ -176,6 +211,12 @@ void VKFrameBuffer::blit_to(eGPUFrameBufferBits /*planes*/, { } +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Update attachments + * \{ */ + void VKFrameBuffer::update_attachments() { if (immutable_) { @@ -190,6 +231,7 @@ void VKFrameBuffer::update_attachments() dirty_attachments_ = false; } + void VKFrameBuffer::render_pass_create() { BLI_assert(!immutable_); @@ -201,75 +243,100 @@ void VKFrameBuffer::render_pass_create() /* Track first attachment for size.*/ GPUAttachmentType first_attachment = GPU_FB_MAX_ATTACHMENT; - Vector attachment_descriptions; - Vector color_attachments; - Vector image_views; + std::array attachment_descriptions; + std::array image_views; + std::array attachment_references; + /*Vector color_attachments; VkAttachmentReference depth_attachment = {}; + */ bool has_depth_attachment = false; + bool found_attachment = false; + int depth_location = -1; - for (int type = GPU_FB_DEPTH_ATTACHMENT; type < GPU_FB_MAX_ATTACHMENT; type++) { + for (int type = GPU_FB_MAX_ATTACHMENT - 1; type >= 0; type--) { GPUAttachment &attachment = attachments_[type]; - if (attachment.tex == nullptr) { + if (attachment.tex == nullptr && !found_attachment) { + /* Move the depth texture to the next binding point after all color textures. The binding + * location of the color textures should be kept in sync between ShaderCreateInfos and the + * framebuffer attachments. The depth buffer should be the last slot. */ + depth_location = max_ii(type - GPU_FB_COLOR_ATTACHMENT0, 0); continue; } + found_attachment |= attachment.tex != nullptr; - if (first_attachment == GPU_FB_MAX_ATTACHMENT) { + /* Keep the first attachment to the first color attachment, or to the depth buffer when there + * is no color attachment. */ + if (attachment.tex != nullptr && + (first_attachment == GPU_FB_MAX_ATTACHMENT || type >= GPU_FB_COLOR_ATTACHMENT0)) { first_attachment = static_cast(type); } - VKTexture &texture = *static_cast(unwrap(attachment.tex)); - texture.ensure_allocated(); - image_views.append(texture.vk_image_view_handle()); + int attachment_location = type >= GPU_FB_COLOR_ATTACHMENT0 ? type - GPU_FB_COLOR_ATTACHMENT0 : + depth_location; - VkAttachmentDescription attachment_description{}; - attachment_description.format = to_vk_format(texture.format_get()); - attachment_description.samples = VK_SAMPLE_COUNT_1_BIT; - attachment_description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - attachment_description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - attachment_description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - attachment_description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - attachment_description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - attachment_description.finalLayout = VK_IMAGE_LAYOUT_GENERAL; - attachment_descriptions.append(attachment_description); + if (attachment.tex) { + /* Ensure texture is allocated to ensure the image view.*/ + VKTexture &texture = *static_cast(unwrap(attachment.tex)); + texture.ensure_allocated(); + image_views[attachment_location] = texture.vk_image_view_handle(); - /* TODO: should we also add unused attachments so the attachment reflect the internal lists.*/ - int attachment_index = attachment_descriptions.size() - 1; + VkAttachmentDescription &attachment_description = + attachment_descriptions[attachment_location]; + attachment_description.flags = 0; + attachment_description.format = to_vk_format(texture.format_get()); + attachment_description.samples = VK_SAMPLE_COUNT_1_BIT; + attachment_description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment_description.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment_description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment_description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachment_description.initialLayout = VK_IMAGE_LAYOUT_GENERAL; + attachment_description.finalLayout = VK_IMAGE_LAYOUT_GENERAL; - switch (type) { - case GPU_FB_DEPTH_ATTACHMENT: - case GPU_FB_DEPTH_STENCIL_ATTACHMENT: - BLI_assert(!has_depth_attachment); - has_depth_attachment = true; - depth_attachment.attachment = attachment_index; - depth_attachment.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - break; + /* Create the attachment reference. */ + const bool is_depth_attachment = ELEM( + type, GPU_FB_DEPTH_ATTACHMENT, GPU_FB_DEPTH_STENCIL_ATTACHMENT); - case GPU_FB_COLOR_ATTACHMENT0: - case GPU_FB_COLOR_ATTACHMENT1: - case GPU_FB_COLOR_ATTACHMENT2: - case GPU_FB_COLOR_ATTACHMENT3: - case GPU_FB_COLOR_ATTACHMENT4: - case GPU_FB_COLOR_ATTACHMENT5: - case GPU_FB_COLOR_ATTACHMENT6: - case GPU_FB_COLOR_ATTACHMENT7: - VkAttachmentReference color_attachment{}; - color_attachment.attachment = attachment_index; - color_attachment.layout = VK_IMAGE_LAYOUT_GENERAL; - color_attachments.append(color_attachment); + BLI_assert_msg(!is_depth_attachment || !has_depth_attachment, + "There can only be one depth/stencil attachment."); + has_depth_attachment |= is_depth_attachment; + VkAttachmentReference &attachment_reference = attachment_references[attachment_location]; + attachment_reference.attachment = attachment_location; + attachment_reference.layout = is_depth_attachment ? + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_GENERAL; } } - VkSubpassDescription subpass{}; + /* Update the size, viewport & scissor based on the first attachment. */ + if (first_attachment != GPU_FB_MAX_ATTACHMENT) { + GPUAttachment &attachment = attachments_[first_attachment]; + BLI_assert(attachment.tex); + + int size[3]; + GPU_texture_get_mipmap_size(attachment.tex, attachment.mip, size); + size_set(size[0], size[1]); + } + else { + this->size_set(0, 0); + } + viewport_reset(); + scissor_reset(); + + /* Create render pass. */ + + const int attachment_len = has_depth_attachment ? depth_location + 1 : depth_location; + const int color_attachment_len = depth_location; + VkSubpassDescription subpass = {}; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - subpass.colorAttachmentCount = color_attachments.size(); - subpass.pColorAttachments = color_attachments.data(); + subpass.colorAttachmentCount = color_attachment_len; + subpass.pColorAttachments = attachment_references.begin(); if (has_depth_attachment) { - subpass.pDepthStencilAttachment = &depth_attachment; + subpass.pDepthStencilAttachment = &attachment_references[depth_location]; } VkRenderPassCreateInfo render_pass_info = {}; render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - render_pass_info.attachmentCount = attachment_descriptions.size(); + render_pass_info.attachmentCount = attachment_len; render_pass_info.pAttachments = attachment_descriptions.data(); render_pass_info.subpassCount = 1; render_pass_info.pSubpasses = &subpass; @@ -278,24 +345,12 @@ void VKFrameBuffer::render_pass_create() vkCreateRenderPass( context.device_get(), &render_pass_info, vk_allocation_callbacks, &vk_render_pass_); - if (first_attachment != GPU_FB_MAX_ATTACHMENT) { - GPUAttachment &attachment = attachments_[first_attachment]; - BLI_assert(attachment.tex); - - int size[3]; - GPU_texture_get_mipmap_size(attachment.tex, attachment.mip, size); - this->size_set(size[0], size[1]); - } - else { - this->size_set(0, 0); - } - - /* We might want to split framebuffer and render target....*/ + /* We might want to split framebuffer and render pass....*/ VkFramebufferCreateInfo framebuffer_create_info = {}; framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; framebuffer_create_info.renderPass = vk_render_pass_; - framebuffer_create_info.attachmentCount = image_views.size(); - framebuffer_create_info.pAttachments = image_views.data(); + framebuffer_create_info.attachmentCount = attachment_len; + framebuffer_create_info.pAttachments = image_views.begin(); framebuffer_create_info.width = width_; framebuffer_create_info.height = height_; framebuffer_create_info.layers = 1; @@ -319,4 +374,6 @@ void VKFrameBuffer::render_pass_free() vk_framebuffer_ = VK_NULL_HANDLE; } +/** \} */ + } // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_texture.cc b/source/blender/gpu/vulkan/vk_texture.cc index 5b5b1d83bc0..914df06767b 100644 --- a/source/blender/gpu/vulkan/vk_texture.cc +++ b/source/blender/gpu/vulkan/vk_texture.cc @@ -111,8 +111,11 @@ void VKTexture::mip_range_set(int /*min*/, int /*max*/) void *VKTexture::read(int mip, eGPUDataFormat format) { - /* Vulkan images cannot be directly mapped to host memory and requires a staging buffer. */ VKContext &context = *VKContext::get(); + const VkImageLayout previous_layout = current_layout_get(); + layout_ensure(context, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + /* Vulkan images cannot be directly mapped to host memory and requires a staging buffer. */ VKBuffer staging_buffer; /* NOTE: mip_size_get() won't override any dimension that is equal to 0. */ @@ -136,6 +139,7 @@ void *VKTexture::read(int mip, eGPUDataFormat format) VKCommandBuffer &command_buffer = context.command_buffer_get(); command_buffer.copy(staging_buffer, *this, Span(®ion, 1)); command_buffer.submit(); + layout_ensure(context, previous_layout); void *data = MEM_mallocN(host_memory_size, __func__); convert_device_to_host(data, staging_buffer.mapped_memory_get(), sample_len, format, format_); @@ -310,15 +314,7 @@ bool VKTexture::allocate() } /* Promote image to the correct layout. */ - VkImageMemoryBarrier barrier{}; - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - barrier.image = vk_image_; - barrier.subresourceRange.aspectMask = to_vk_image_aspect_flag_bits(format_); - barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS; - barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; - context.command_buffer_get().pipeline_barrier(Span(&barrier, 1)); + layout_ensure(context, VK_IMAGE_LAYOUT_GENERAL); VK_ALLOCATION_CALLBACKS VkImageViewCreateInfo image_view_info = {}; @@ -349,4 +345,37 @@ void VKTexture::image_bind(int binding) shader->pipeline_get().descriptor_set_get().image_bind(*this, location); } +/* -------------------------------------------------------------------- */ +/** \name Image Layout + * \{ */ + +VkImageLayout VKTexture::current_layout_get() const +{ + return current_layout_; +} + +void VKTexture::current_layout_set(const VkImageLayout new_layout) +{ + current_layout_ = new_layout; +} + +void VKTexture::layout_ensure(VKContext &context, const VkImageLayout requested_layout) +{ + const VkImageLayout current_layout = current_layout_get(); + if (current_layout == requested_layout) { + return; + } + VkImageMemoryBarrier barrier{}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.oldLayout = current_layout; + barrier.newLayout = requested_layout; + barrier.image = vk_image_; + barrier.subresourceRange.aspectMask = to_vk_image_aspect_flag_bits(format_); + barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS; + barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; + context.command_buffer_get().pipeline_barrier(Span(&barrier, 1)); + current_layout_set(requested_layout); +} +/** \} */ + } // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_texture.hh b/source/blender/gpu/vulkan/vk_texture.hh index a18c8812660..0920bf12bf5 100644 --- a/source/blender/gpu/vulkan/vk_texture.hh +++ b/source/blender/gpu/vulkan/vk_texture.hh @@ -17,6 +17,12 @@ class VKTexture : public Texture { VkImageView vk_image_view_ = VK_NULL_HANDLE; VmaAllocation allocation_ = VK_NULL_HANDLE; + /* Last image layout of the texture. Framebuffer and barriers can alter/require the actual layout + * to be changed. During this it requires to set the current layout in order to know which + * conversion should happen. #current_layout_ keep track of the layout so the correct conversion + * can be done.*/ + VkImageLayout current_layout_ = VK_IMAGE_LAYOUT_UNDEFINED; + public: VKTexture(const char *name) : Texture(name) { @@ -62,6 +68,7 @@ class VKTexture : public Texture { private: /** Is this texture already allocated on device. */ bool is_allocated() const; + /** * Allocate the texture of the device. Result is `true` when texture is successfully allocated * on the device. @@ -69,6 +76,36 @@ class VKTexture : public Texture { bool allocate(); VkImageViewType vk_image_view_type() const; + + /* -------------------------------------------------------------------- */ + /** \name Image Layout + * \{ */ + public: + /** + * Change the current layout setting, without actually changing the layout. + * + * Vulkan can change the layout of an image, when a command is being executed. + * The start of a render pass or the end of a render pass can also alter the + * actual layout of the image. This method allows to change the last known layout + * that the image is using. + * + * NOTE: When we add command encoding, this should partly being done inside + * the command encoder, as there is more accurate determination of the transition + * of the layout. Only the final transition should then be stored inside the texture + * to be used by as initial layout for the next set of commands. + */ + void current_layout_set(VkImageLayout new_layout); + VkImageLayout current_layout_get() const; + + /** + * Ensure the layout of the texture. This also performs the conversion by adding a memory + * barrier to the active command buffer to perform the conversion. + * + * When texture is already in the requested layout, nothing will be done. + */ + void layout_ensure(VKContext &context, VkImageLayout requested_layout); + + /** \} */ }; static inline VKTexture *unwrap(Texture *tex) -- 2.30.2 From a406c6e2116cd5a6a159cd046e3884ca932853a8 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Fri, 24 Mar 2023 15:48:19 +0100 Subject: [PATCH 29/33] Added support for scissor testing. --- source/blender/gpu/tests/framebuffer_test.cc | 59 ++++++++++++++++++++ source/blender/gpu/vulkan/vk_framebuffer.cc | 25 ++++----- source/blender/gpu/vulkan/vk_texture.cc | 6 +- 3 files changed, 74 insertions(+), 16 deletions(-) diff --git a/source/blender/gpu/tests/framebuffer_test.cc b/source/blender/gpu/tests/framebuffer_test.cc index 484906cf316..afadcb39e68 100644 --- a/source/blender/gpu/tests/framebuffer_test.cc +++ b/source/blender/gpu/tests/framebuffer_test.cc @@ -137,4 +137,63 @@ static void test_framebuffer_clear_depth() } GPU_TEST(framebuffer_clear_depth); +static void test_framebuffer_scissor_test() +{ + const int2 size(128, 128); + const int bar_size = 16; + eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ; + GPUTexture *texture = GPU_texture_create_2d( + __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); + + GPUFrameBuffer *framebuffer = GPU_framebuffer_create(__func__); + GPU_framebuffer_ensure_config(&framebuffer, + {GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(texture)}); + GPU_framebuffer_bind(framebuffer); + + const float4 color1(0.0f); + const float4 color2(0.5f); + const float4 color3(1.0f); + GPU_framebuffer_clear_color(framebuffer, color1); + + GPU_scissor_test(true); + for (int x = 0; x < size.x; x += 2 * bar_size) { + GPU_scissor(x, 0, bar_size, size.y); + GPU_framebuffer_clear_color(framebuffer, color2); + } + for (int y = 0; y < size.y; y += 2 * bar_size) { + GPU_scissor(0, y, size.x, bar_size); + GPU_framebuffer_clear_color(framebuffer, color3); + } + GPU_scissor_test(false); + GPU_finish(); + + float4 *read_data = static_cast(GPU_texture_read(texture, GPU_DATA_FLOAT, 0)); + int offset = 0; + for (float4 pixel_color : Span(read_data, size.x * size.y)) { + int x = offset % size.x; + int y = offset / size.x; + int bar_x = x / bar_size; + int bar_y = y / bar_size; + + if (bar_y % 2 == 0) { + EXPECT_EQ(pixel_color, color3); + } + else { + if (bar_x % 2 == 0) { + EXPECT_EQ(pixel_color, color2); + } + else { + EXPECT_EQ(pixel_color, color1); + } + } + + offset++; + } + MEM_freeN(read_data); + + GPU_framebuffer_free(framebuffer); + GPU_texture_free(texture); +} +GPU_TEST(framebuffer_scissor_test); + } // namespace blender::gpu::tests diff --git a/source/blender/gpu/vulkan/vk_framebuffer.cc b/source/blender/gpu/vulkan/vk_framebuffer.cc index 51d1f60757f..8a4535ddeec 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.cc +++ b/source/blender/gpu/vulkan/vk_framebuffer.cc @@ -60,23 +60,22 @@ void VKFrameBuffer::bind(bool /*enabled_srgb*/) VkRect2D VKFrameBuffer::vk_render_area_get() const { VkRect2D render_area = {}; - int render_rect[4]; - viewport_get(render_rect); + if (scissor_test_get()) { int scissor_rect[4]; - int viewport_rect[4]; - copy_v4_v4_int(viewport_rect, render_rect); scissor_get(scissor_rect); - int2 scissor_offset_delta = int2(scissor_rect) - int2(viewport_rect); - render_rect[0] = max_ii(viewport_rect[0], scissor_rect[0]); - render_rect[1] = max_ii(viewport_rect[1], scissor_rect[1]); - render_rect[2] = min_ii(viewport_rect[2] - scissor_offset_delta[0], scissor_rect[2]); - render_rect[3] = min_ii(viewport_rect[3] - scissor_offset_delta[1], scissor_rect[3]); + render_area.offset.x = scissor_rect[0]; + render_area.offset.y = scissor_rect[1]; + render_area.extent.width = scissor_rect[2]; + render_area.extent.height = scissor_rect[3]; } - render_area.offset.x = render_rect[0]; - render_area.offset.y = render_rect[1]; - render_area.extent.width = render_rect[2]; - render_area.extent.height = render_rect[3]; + else { + render_area.offset.x = 0; + render_area.offset.y = 0; + render_area.extent.width = width_; + render_area.extent.height = height_; + } + return render_area; } diff --git a/source/blender/gpu/vulkan/vk_texture.cc b/source/blender/gpu/vulkan/vk_texture.cc index 914df06767b..750ae2df39b 100644 --- a/source/blender/gpu/vulkan/vk_texture.cc +++ b/source/blender/gpu/vulkan/vk_texture.cc @@ -92,9 +92,10 @@ void VKTexture::clear(eGPUDataFormat format, const void *data) range.aspectMask = to_vk_image_aspect_flag_bits(format_); range.levelCount = VK_REMAINING_MIP_LEVELS; range.layerCount = VK_REMAINING_ARRAY_LAYERS; + layout_ensure(context, VK_IMAGE_LAYOUT_GENERAL); command_buffer.clear( - vk_image_, VK_IMAGE_LAYOUT_GENERAL, clear_color, Span(&range, 1)); + vk_image_, current_layout_get(), clear_color, Span(&range, 1)); } void VKTexture::swizzle_set(const char /*swizzle_mask*/[4]) @@ -112,7 +113,6 @@ void VKTexture::mip_range_set(int /*min*/, int /*max*/) void *VKTexture::read(int mip, eGPUDataFormat format) { VKContext &context = *VKContext::get(); - const VkImageLayout previous_layout = current_layout_get(); layout_ensure(context, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); /* Vulkan images cannot be directly mapped to host memory and requires a staging buffer. */ @@ -139,7 +139,6 @@ void *VKTexture::read(int mip, eGPUDataFormat format) VKCommandBuffer &command_buffer = context.command_buffer_get(); command_buffer.copy(staging_buffer, *this, Span(®ion, 1)); command_buffer.submit(); - layout_ensure(context, previous_layout); void *data = MEM_mallocN(host_memory_size, __func__); convert_device_to_host(data, staging_buffer.mapped_memory_get(), sample_len, format, format_); @@ -174,6 +173,7 @@ void VKTexture::update_sub( region.imageSubresource.mipLevel = mip; region.imageSubresource.layerCount = 1; + layout_ensure(context, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); VKCommandBuffer &command_buffer = context.command_buffer_get(); command_buffer.copy(*this, staging_buffer, Span(®ion, 1)); command_buffer.submit(); -- 2.30.2 From 56c2ccea10b43d46d351d8b24f3f697142473e72 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 27 Mar 2023 14:49:52 +0200 Subject: [PATCH 30/33] Fix setting the correct clear color based on the texture format. --- source/blender/gpu/tests/framebuffer_test.cc | 9 +++-- source/blender/gpu/vulkan/vk_common.cc | 42 ++++++++++++++++++++ source/blender/gpu/vulkan/vk_common.hh | 1 + source/blender/gpu/vulkan/vk_framebuffer.cc | 6 ++- source/blender/gpu/vulkan/vk_texture.cc | 42 -------------------- 5 files changed, 52 insertions(+), 48 deletions(-) diff --git a/source/blender/gpu/tests/framebuffer_test.cc b/source/blender/gpu/tests/framebuffer_test.cc index afadcb39e68..02d8ae82fb7 100644 --- a/source/blender/gpu/tests/framebuffer_test.cc +++ b/source/blender/gpu/tests/framebuffer_test.cc @@ -43,7 +43,7 @@ static void test_framebuffer_clear_color_multiple_attachments() GPUTexture *texture1 = GPU_texture_create_2d( __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); GPUTexture *texture2 = GPU_texture_create_2d( - __func__, UNPACK2(size), 1, GPU_RGBA32F, usage, nullptr); + __func__, UNPACK2(size), 1, GPU_RGBA32UI, usage, nullptr); GPUFrameBuffer *framebuffer = GPU_framebuffer_create(__func__); GPU_framebuffer_ensure_config( @@ -61,9 +61,10 @@ static void test_framebuffer_clear_color_multiple_attachments() } MEM_freeN(read_data1); - float4 *read_data2 = static_cast(GPU_texture_read(texture2, GPU_DATA_FLOAT, 0)); - for (float4 pixel_color : Span(read_data1, size.x * size.y)) { - EXPECT_EQ(pixel_color, clear_color); + uint4 *read_data2 = static_cast(GPU_texture_read(texture2, GPU_DATA_UINT, 0)); + uint4 clear_color_uint(1036831949, 1045220557, 1056964608, 1065353216); + for (uint4 pixel_color : Span(read_data2, size.x * size.y)) { + EXPECT_EQ(pixel_color, clear_color_uint); } MEM_freeN(read_data2); diff --git a/source/blender/gpu/vulkan/vk_common.cc b/source/blender/gpu/vulkan/vk_common.cc index 08d46962418..cfb16415646 100644 --- a/source/blender/gpu/vulkan/vk_common.cc +++ b/source/blender/gpu/vulkan/vk_common.cc @@ -307,4 +307,46 @@ VkComponentMapping to_vk_component_mapping(const eGPUTextureFormat /*format*/) return component_mapping; } +template void copy_color(T dst[4], const T *src) +{ + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; +} + +VkClearColorValue to_vk_clear_color_value(const eGPUDataFormat format, const void *data) +{ + VkClearColorValue result = {0.0f}; + switch (format) { + case GPU_DATA_FLOAT: { + const float *float_data = static_cast(data); + copy_color(result.float32, float_data); + break; + } + + case GPU_DATA_INT: { + const int32_t *int_data = static_cast(data); + copy_color(result.int32, int_data); + break; + } + + case GPU_DATA_UINT: { + const uint32_t *uint_data = static_cast(data); + copy_color(result.uint32, uint_data); + break; + } + + case GPU_DATA_HALF_FLOAT: + case GPU_DATA_UBYTE: + case GPU_DATA_UINT_24_8: + case GPU_DATA_10_11_11_REV: + case GPU_DATA_2_10_10_10_REV: { + BLI_assert_unreachable(); + break; + } + } + return result; +} + } // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_common.hh b/source/blender/gpu/vulkan/vk_common.hh index af661342e57..10596a84989 100644 --- a/source/blender/gpu/vulkan/vk_common.hh +++ b/source/blender/gpu/vulkan/vk_common.hh @@ -24,5 +24,6 @@ VkFormat to_vk_format(const eGPUTextureFormat format); VkComponentMapping to_vk_component_mapping(const eGPUTextureFormat format); VkImageViewType to_vk_image_view_type(const eGPUTextureType type); VkImageType to_vk_image_type(const eGPUTextureType type); +VkClearColorValue to_vk_clear_color_value(const eGPUDataFormat format, const void *data); } // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_framebuffer.cc b/source/blender/gpu/vulkan/vk_framebuffer.cc index 8a4535ddeec..026373794df 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.cc +++ b/source/blender/gpu/vulkan/vk_framebuffer.cc @@ -75,7 +75,7 @@ VkRect2D VKFrameBuffer::vk_render_area_get() const render_area.extent.width = width_; render_area.extent.height = height_; } - + return render_area; } @@ -111,7 +111,9 @@ void VKFrameBuffer::build_clear_attachments_color(const float (*clear_colors)[4] VkClearAttachment clear_attachment = {}; clear_attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; clear_attachment.colorAttachment = color_slot; - copy_v4_v4(clear_attachment.clearValue.color.float32, clear_colors[color_index]); + eGPUDataFormat data_format = to_data_format(GPU_texture_format(attachment.tex)); + clear_attachment.clearValue.color = to_vk_clear_color_value(data_format, + &clear_colors[color_index]); r_attachments.append(clear_attachment); color_index += multi_clear_colors ? 1 : 0; diff --git a/source/blender/gpu/vulkan/vk_texture.cc b/source/blender/gpu/vulkan/vk_texture.cc index 750ae2df39b..a57c9ff47f3 100644 --- a/source/blender/gpu/vulkan/vk_texture.cc +++ b/source/blender/gpu/vulkan/vk_texture.cc @@ -37,48 +37,6 @@ void VKTexture::copy_to(Texture * /*tex*/) { } -template void copy_color(T dst[4], const T *src) -{ - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; -} - -static VkClearColorValue to_vk_clear_color_value(eGPUDataFormat format, const void *data) -{ - VkClearColorValue result = {0.0f}; - switch (format) { - case GPU_DATA_FLOAT: { - const float *float_data = static_cast(data); - copy_color(result.float32, float_data); - break; - } - - case GPU_DATA_INT: { - const int32_t *int_data = static_cast(data); - copy_color(result.int32, int_data); - break; - } - - case GPU_DATA_UINT: { - const uint32_t *uint_data = static_cast(data); - copy_color(result.uint32, uint_data); - break; - } - - case GPU_DATA_HALF_FLOAT: - case GPU_DATA_UBYTE: - case GPU_DATA_UINT_24_8: - case GPU_DATA_10_11_11_REV: - case GPU_DATA_2_10_10_10_REV: { - BLI_assert_unreachable(); - break; - } - } - return result; -} - void VKTexture::clear(eGPUDataFormat format, const void *data) { if (!is_allocated()) { -- 2.30.2 From 9d4650967b58adb7e676f1c9c5533bc26330bec8 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 27 Mar 2023 14:57:32 +0200 Subject: [PATCH 31/33] Fix gramar in comment. --- source/blender/gpu/vulkan/vk_texture.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/blender/gpu/vulkan/vk_texture.hh b/source/blender/gpu/vulkan/vk_texture.hh index 0920bf12bf5..29e5be95b47 100644 --- a/source/blender/gpu/vulkan/vk_texture.hh +++ b/source/blender/gpu/vulkan/vk_texture.hh @@ -82,7 +82,7 @@ class VKTexture : public Texture { * \{ */ public: /** - * Change the current layout setting, without actually changing the layout. + * Update the current layout attribute, without actually changing the layout. * * Vulkan can change the layout of an image, when a command is being executed. * The start of a render pass or the end of a render pass can also alter the @@ -100,7 +100,7 @@ class VKTexture : public Texture { /** * Ensure the layout of the texture. This also performs the conversion by adding a memory * barrier to the active command buffer to perform the conversion. - * + * * When texture is already in the requested layout, nothing will be done. */ void layout_ensure(VKContext &context, VkImageLayout requested_layout); -- 2.30.2 From d0a77111125394e1b7864247f2df76a52a2a0ed6 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 27 Mar 2023 15:03:02 +0200 Subject: [PATCH 32/33] Added deprecated code warning. --- source/blender/gpu/vulkan/vk_framebuffer.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/blender/gpu/vulkan/vk_framebuffer.cc b/source/blender/gpu/vulkan/vk_framebuffer.cc index 026373794df..fcadc690d9c 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.cc +++ b/source/blender/gpu/vulkan/vk_framebuffer.cc @@ -210,6 +210,8 @@ void VKFrameBuffer::blit_to(eGPUFrameBufferBits /*planes*/, int /*dst_offset_x*/, int /*dst_offset_y*/) { + /* Framebuffer blitting is deprecated and not used anymore. */ + BLI_assert_unreachable(); } /** \} */ -- 2.30.2 From 21608f93487ef71c860c5953683165f595aaef3a Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 27 Mar 2023 15:13:20 +0200 Subject: [PATCH 33/33] Remove unreachable statement as it is still in use by offscreen. --- source/blender/gpu/vulkan/vk_framebuffer.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/source/blender/gpu/vulkan/vk_framebuffer.cc b/source/blender/gpu/vulkan/vk_framebuffer.cc index fcadc690d9c..026373794df 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.cc +++ b/source/blender/gpu/vulkan/vk_framebuffer.cc @@ -210,8 +210,6 @@ void VKFrameBuffer::blit_to(eGPUFrameBufferBits /*planes*/, int /*dst_offset_x*/, int /*dst_offset_y*/) { - /* Framebuffer blitting is deprecated and not used anymore. */ - BLI_assert_unreachable(); } /** \} */ -- 2.30.2