This patch adds initial support for compute shaders to the vulkan backend. As the development is oriented to the test- cases we have the implementation is limited to what is used there. It has been validated that with this patch that the following test cases are running as expected - `GPUVulkanTest.gpu_shader_compute_vbo` - `GPUVulkanTest.gpu_shader_compute_ibo` - `GPUVulkanTest.gpu_shader_compute_ssbo` - `GPUVulkanTest.gpu_storage_buffer_create_update_read` - `GPUVulkanTest.gpu_shader_compute_2d` This patch includes: - Allocating VkBuffer on device. - Uploading data from CPU to VkBuffer. - Binding VkBuffer as SSBO to a compute shader. - Execute compute shader and altering VkBuffer. - Download the VkBuffer to CPU ram. - Validate that it worked. - Use device only vertex buffer as SSBO - Use device only index buffer as SSBO - Use device only image buffers GHOST API has been changed as the original design was created before we even had support for compute shaders in blender. The function `GHOST_getVulkanBackbuffer` has been separated to retrieve the command buffer without a backbuffer (`GHOST_getVulkanCommandBuffer`). In order to do correct command buffer processing we needed access to the queue owned by GHOST. This is returned as part of the `GHOST_getVulkanHandles` function. Open topics (not considered part of this patch) - Memory barriers & command buffer encoding - Indirect compute dispatching - Rest of the test cases - Data conversions when requested data format is different than on device. - GPUVulkanTest.gpu_shader_compute_1d is supported on AMD devices. NVIDIA doesn't seem to support 1d textures. Pull-request: #104518
108 lines
3.2 KiB
C++
108 lines
3.2 KiB
C++
/* SPDX-License-Identifier: GPL-2.0-or-later
|
|
* Copyright 2023 Blender Foundation. All rights reserved. */
|
|
|
|
/** \file
|
|
* \ingroup gpu
|
|
*/
|
|
|
|
#include "vk_buffer.hh"
|
|
|
|
namespace blender::gpu {
|
|
|
|
VKBuffer::~VKBuffer()
|
|
{
|
|
VKContext &context = *VKContext::get();
|
|
free(context);
|
|
}
|
|
|
|
bool VKBuffer::is_allocated() const
|
|
{
|
|
return allocation_ != VK_NULL_HANDLE;
|
|
}
|
|
|
|
static VmaAllocationCreateFlagBits vma_allocation_flags(GPUUsageType usage)
|
|
{
|
|
switch (usage) {
|
|
case GPU_USAGE_STATIC:
|
|
return static_cast<VmaAllocationCreateFlagBits>(
|
|
VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT);
|
|
case GPU_USAGE_DYNAMIC:
|
|
return static_cast<VmaAllocationCreateFlagBits>(
|
|
VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT);
|
|
case GPU_USAGE_DEVICE_ONLY:
|
|
return static_cast<VmaAllocationCreateFlagBits>(
|
|
VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT |
|
|
VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT);
|
|
case GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY:
|
|
case GPU_USAGE_STREAM:
|
|
break;
|
|
}
|
|
BLI_assert_msg(false, "Unimplemented GPUUsageType");
|
|
return static_cast<VmaAllocationCreateFlagBits>(VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT |
|
|
VMA_ALLOCATION_CREATE_MAPPED_BIT);
|
|
}
|
|
|
|
bool VKBuffer::create(VKContext &context,
|
|
int64_t size_in_bytes,
|
|
GPUUsageType usage,
|
|
VkBufferUsageFlagBits buffer_usage)
|
|
{
|
|
BLI_assert(!is_allocated());
|
|
|
|
size_in_bytes_ = size_in_bytes;
|
|
|
|
VmaAllocator allocator = context.mem_allocator_get();
|
|
VkBufferCreateInfo create_info = {};
|
|
create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
|
create_info.flags = 0;
|
|
create_info.size = size_in_bytes;
|
|
create_info.usage = buffer_usage;
|
|
/* We use the same command queue for the compute and graphics pipeline, so it is safe to use
|
|
* exclusive resource handling. */
|
|
create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
|
create_info.queueFamilyIndexCount = 1;
|
|
create_info.pQueueFamilyIndices = context.queue_family_ptr_get();
|
|
|
|
VmaAllocationCreateInfo vma_create_info = {};
|
|
vma_create_info.flags = vma_allocation_flags(usage);
|
|
vma_create_info.priority = 1.0f;
|
|
vma_create_info.usage = VMA_MEMORY_USAGE_AUTO;
|
|
|
|
VkResult result = vmaCreateBuffer(
|
|
allocator, &create_info, &vma_create_info, &vk_buffer_, &allocation_, nullptr);
|
|
return result == VK_SUCCESS;
|
|
}
|
|
|
|
bool VKBuffer::update(VKContext &context, const void *data)
|
|
{
|
|
void *mapped_memory;
|
|
bool result = map(context, &mapped_memory);
|
|
if (result) {
|
|
memcpy(mapped_memory, data, size_in_bytes_);
|
|
unmap(context);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
bool VKBuffer::map(VKContext &context, void **r_mapped_memory) const
|
|
{
|
|
VmaAllocator allocator = context.mem_allocator_get();
|
|
VkResult result = vmaMapMemory(allocator, allocation_, r_mapped_memory);
|
|
return result == VK_SUCCESS;
|
|
}
|
|
|
|
void VKBuffer::unmap(VKContext &context) const
|
|
{
|
|
VmaAllocator allocator = context.mem_allocator_get();
|
|
vmaUnmapMemory(allocator, allocation_);
|
|
}
|
|
|
|
bool VKBuffer::free(VKContext &context)
|
|
{
|
|
VmaAllocator allocator = context.mem_allocator_get();
|
|
vmaDestroyBuffer(allocator, vk_buffer_, allocation_);
|
|
return true;
|
|
}
|
|
|
|
} // namespace blender::gpu
|