2023-06-08 08:53:18 +02:00
6 changed files with 87 additions and 20 deletions
--- a/source/blender/gpu/tests/texture_test.cc
+++ b/source/blender/gpu/tests/texture_test.cc
@ -428,13 +428,11 @@ static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB9_E5()
 GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_RGB9_E5);
 #endif

-#if RUN_UNSUPPORTED
 static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F()
 {
  texture_create_upload_read_with_bias<GPU_DEPTH_COMPONENT32F, GPU_DATA_FLOAT>(0.0f);
 }
 GPU_TEST(texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT32F);
-#endif

 #if RUN_COMPONENT_UNIMPLEMENTED
 static void test_texture_roundtrip__GPU_DATA_FLOAT__GPU_DEPTH_COMPONENT24()
@ -622,7 +620,6 @@ static void test_texture_roundtrip__GPU_DATA_UINT__GPU_R32UI()
 }
 GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_R32UI);

-#if RUN_UNSUPPORTED
 static void test_texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH32F_STENCIL8()
 {
  texture_create_upload_read<GPU_DEPTH32F_STENCIL8, GPU_DATA_UINT, uint32_t>();
@ -634,7 +631,6 @@ static void test_texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH24_STENCIL8()
  texture_create_upload_read<GPU_DEPTH24_STENCIL8, GPU_DATA_UINT, uint32_t>();
 }
 GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH24_STENCIL8);
-#endif

 #if RUN_UNSUPPORTED
 static void test_texture_roundtrip__GPU_DATA_UINT__GPU_RGB8UI()
@ -656,7 +652,6 @@ static void test_texture_roundtrip__GPU_DATA_UINT__GPU_RGB32UI()
 GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_RGB32UI);
 #endif

-#if RUN_COMPONENT_UNIMPLEMENTED
 static void test_texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH_COMPONENT32F()
 {
  texture_create_upload_read<GPU_DEPTH_COMPONENT32F, GPU_DATA_UINT, uint32_t>();
@ -669,6 +664,7 @@ static void test_texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH_COMPONENT24()
 }
 GPU_TEST(texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH_COMPONENT24);

+#if RUN_COMPONENT_UNIMPLEMENTED
 static void test_texture_roundtrip__GPU_DATA_UINT__GPU_DEPTH_COMPONENT16()
 {
  texture_create_upload_read<GPU_DEPTH_COMPONENT16, GPU_DATA_UINT, uint32_t>();
--- a/source/blender/gpu/vulkan/vk_backend.cc
+++ b/source/blender/gpu/vulkan/vk_backend.cc
@ -73,6 +73,18 @@ void VKBackend::platform_init(const VKDevice &device)
           driver_version.c_str());
 }

+void VKBackend::detect_workarounds(VKDevice &device)
+{
+  VKWorkarounds workarounds;
+
+  /* AMD GPUs don't support texture formats that use are aligned to 24 or 48 bits. */
+  if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_ANY, GPU_DRIVER_ANY)) {
+    workarounds.not_aligned_pixel_formats = true;
+  }
+
+  device.workarounds_ = workarounds;
+}
+
 void VKBackend::platform_exit()
 {
  GPG.clear();
@ -174,7 +186,7 @@ shaderc::Compiler &VKBackend::get_shaderc_compiler()
  return shaderc_compiler_;
 }

-void VKBackend::capabilities_init(const VKDevice &device)
+void VKBackend::capabilities_init(VKDevice &device)
 {
  const VkPhysicalDeviceProperties &properties = device.physical_device_properties_get();
  const VkPhysicalDeviceLimits &limits = properties.limits;
@ -205,6 +217,8 @@ void VKBackend::capabilities_init(const VKDevice &device)
  GCaps.max_varying_floats = limits.maxVertexOutputComponents;
  GCaps.max_shader_storage_buffer_bindings = limits.maxPerStageDescriptorStorageBuffers;
  GCaps.max_compute_shader_storage_blocks = limits.maxPerStageDescriptorStorageBuffers;
+
+  detect_workarounds(device);
 }

 }  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_backend.hh
+++ b/source/blender/gpu/vulkan/vk_backend.hh
@ -88,9 +88,10 @@ class VKBackend : public GPUBackend {
  }

  static void platform_init(const VKDevice &device);
-  static void capabilities_init(const VKDevice &device);
+  static void capabilities_init(VKDevice &device);

 private:
+  static void detect_workarounds(VKDevice &device);
  static void platform_init();
  static void platform_exit();

--- a/source/blender/gpu/vulkan/vk_data_conversion.cc
+++ b/source/blender/gpu/vulkan/vk_data_conversion.cc
@ -32,6 +32,9 @@ enum class ConversionType {
  FLOAT_TO_SNORM16,
  SNORM16_TO_FLOAT,

+  FLOAT_TO_UNORM32,
+  UNORM32_TO_FLOAT,
+
  UI32_TO_UI16,
  UI16_TO_UI32,

@ -239,6 +242,7 @@ static ConversionType type_of_conversion_uint(eGPUTextureFormat device_format)
    case GPU_RGBA32UI:
    case GPU_RG32UI:
    case GPU_R32UI:
+    case GPU_DEPTH_COMPONENT24:
      return ConversionType::PASS_THROUGH;

    case GPU_RGBA16UI:
@ -252,6 +256,10 @@ static ConversionType type_of_conversion_uint(eGPUTextureFormat device_format)
    case GPU_R8UI:
      return ConversionType::UI32_TO_UI8;

+    case GPU_DEPTH_COMPONENT32F:
+    case GPU_DEPTH32F_STENCIL8:
+      return ConversionType::UNORM32_TO_FLOAT;
+
    case GPU_RGBA8I:
    case GPU_RGBA8:
    case GPU_RGBA16I:
@ -276,7 +284,6 @@ static ConversionType type_of_conversion_uint(eGPUTextureFormat device_format)
    case GPU_RGB10_A2:
    case GPU_RGB10_A2UI:
    case GPU_R11F_G11F_B10F:
-    case GPU_DEPTH32F_STENCIL8:
    case GPU_DEPTH24_STENCIL8:
    case GPU_SRGB8_A8:
    case GPU_RGBA8_SNORM:
@ -304,8 +311,6 @@ static ConversionType type_of_conversion_uint(eGPUTextureFormat device_format)
    case GPU_RGBA8_DXT5:
    case GPU_SRGB8:
    case GPU_RGB9_E5:
-    case GPU_DEPTH_COMPONENT32F:
-    case GPU_DEPTH_COMPONENT24:
    case GPU_DEPTH_COMPONENT16:
      return ConversionType::UNSUPPORTED;
  }
@ -523,6 +528,7 @@ static ConversionType reversed(ConversionType type)
      CASE_PAIR(FLOAT, SNORM8)
      CASE_PAIR(FLOAT, UNORM16)
      CASE_PAIR(FLOAT, SNORM16)
+      CASE_PAIR(FLOAT, UNORM32)
      CASE_PAIR(UI32, UI16)
      CASE_PAIR(I32, I16)
      CASE_PAIR(UI32, UI8)
@ -632,6 +638,7 @@ template<typename InnerType> struct SignedNormalized {

 template<typename InnerType> struct UnsignedNormalized {
  static_assert(std::is_same<InnerType, uint8_t>() || std::is_same<InnerType, uint16_t>() ||
+                std::is_same<InnerType, uint32_t>() ||
                std::is_same<InnerType, DepthComponent24>());
  InnerType value;

@ -645,15 +652,24 @@ template<typename InnerType> struct UnsignedNormalized {
    }
  }

-  static constexpr int32_t scalar()
+  static constexpr uint32_t scalar()
  {
-
-    return (1 << (used_byte_size() * 8)) - 1;
+    if constexpr (std::is_same<InnerType, DepthComponent24>()) {
+      return (1 << (used_byte_size() * 8)) - 1;
+    }
+    else {
+      return std::numeric_limits<InnerType>::max();
+    }
  }

-  static constexpr int32_t max()
+  static constexpr uint32_t max()
  {
-    return ((1 << (used_byte_size() * 8)) - 1);
+    if constexpr (std::is_same<InnerType, DepthComponent24>()) {
+      return (1 << (used_byte_size() * 8)) - 1;
+    }
+    else {
+      return std::numeric_limits<InnerType>::max();
+    }
  }
 };

@ -674,15 +690,15 @@ template<typename StorageType> void convert(F32 &dst, const SignedNormalized<Sto

 template<typename StorageType> void convert(UnsignedNormalized<StorageType> &dst, const F32 &src)
 {
-  static constexpr int32_t scalar = UnsignedNormalized<StorageType>::scalar();
-  static constexpr int32_t max = scalar;
-  dst.value = (clamp_i((src.value * scalar), 0, max));
+  static constexpr uint32_t scalar = UnsignedNormalized<StorageType>::scalar();
+  static constexpr uint32_t max = scalar;
+  dst.value = (clamp_f((src.value * scalar), 0, max));
 }

 template<typename StorageType> void convert(F32 &dst, const UnsignedNormalized<StorageType> &src)
 {
-  static constexpr int32_t scalar = UnsignedNormalized<StorageType>::scalar();
-  dst.value = float(int32_t(src.value)) / scalar;
+  static constexpr uint32_t scalar = UnsignedNormalized<StorageType>::scalar();
+  dst.value = float(uint32_t(src.value)) / scalar;
 }

 /* Copy the contents of src to dst with out performing any actual conversion. */
@ -860,6 +876,15 @@ static void convert_buffer(void *dst_memory,
          dst_memory, src_memory, buffer_size, device_format);
      break;

+    case ConversionType::FLOAT_TO_UNORM32:
+      convert_per_component<UnsignedNormalized<uint32_t>, F32>(
+          dst_memory, src_memory, buffer_size, device_format);
+      break;
+    case ConversionType::UNORM32_TO_FLOAT:
+      convert_per_component<F32, UnsignedNormalized<uint32_t>>(
+          dst_memory, src_memory, buffer_size, device_format);
+      break;
+
    case ConversionType::FLOAT_TO_HALF:
      convert_per_component<F16, F32>(dst_memory, src_memory, buffer_size, device_format);
      break;
--- a/source/blender/gpu/vulkan/vk_device.hh
+++ b/source/blender/gpu/vulkan/vk_device.hh
@ -15,6 +15,17 @@
 #include "vk_descriptor_pools.hh"

 namespace blender::gpu {
+class VKBackend;
+
+struct VKWorkarounds {
+  /**
+   * Some devices don't support pixel formats that are aligned to 24 and 48 bits.
+   * In this case we need to use a different texture format.
+   *
+   * If set to true we should work around this issue by using a different texture format.
+   */
+  bool not_aligned_pixel_formats = false;
+};

 class VKDevice : public NonCopyable {
 private:
@ -35,6 +46,9 @@ class VKDevice : public NonCopyable {
  /** Functions of vk_ext_debugutils for this device/instance. */
  debug::VKDebuggingTools debugging_tools_;

+  /* Workarounds */
+  VKWorkarounds workarounds_;
+
 public:
  VkPhysicalDevice physical_device_get() const
  {
@ -95,11 +109,19 @@ class VKDevice : public NonCopyable {
  std::string vendor_name() const;
  std::string driver_version() const;

+  const VKWorkarounds &workarounds_get() const
+  {
+    return workarounds_;
+  }
+
 private:
  void init_physical_device_properties();
  void init_debug_callbacks();
  void init_memory_allocator();
  void init_descriptor_pools();
+
+  /* During initialization the backend requires access to update the workarounds. */
+  friend VKBackend;
 };

 }  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_texture.cc
+++ b/source/blender/gpu/vulkan/vk_texture.cc
@ -220,6 +220,15 @@ bool VKTexture::init_internal()
   * at this moment, so we cannot initialize here. The initialization is postponed until the
   * allocation of the texture on the device. */

+  const VKDevice &device = VKBackend::get().device_get();
+  const VKWorkarounds &workarounds = device.workarounds_get();
+  if (format_ == GPU_DEPTH_COMPONENT24 && workarounds.not_aligned_pixel_formats) {
+    format_ = GPU_DEPTH_COMPONENT32F;
+  }
+  if (format_ == GPU_DEPTH24_STENCIL8 && workarounds.not_aligned_pixel_formats) {
+    format_ = GPU_DEPTH32F_STENCIL8;
+  }
+
  /* TODO: return false when texture format isn't supported. */
  return true;
 }