This repository has been archived on 2023-10-09. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
blender-archive/source/blender/gpu/metal/mtl_index_buffer.mm
Jeroen Bakker f828ecf4ba GPU: Use same read back API as SSBOs
The GPU module has 2 different styles when reading back data from
GPU buffers. The SSBOs used a memcpy to copy the data to a
pre-allocated buffer. IndexBuf/VertBuf gave back a driver/platform
controlled pointer to the memory.

Readback is done for test cases returning mapped pointers is not safe.
For this reason we settled on using the same approach as the SSBO.
Copy the data to a caller pre-allocated buffer.

Reason why this API is currently changed is that the Vulkan API is more
strict on mapping/unmapping buffers that can lead to potential issues
down the road.

Pull Request #104571
2023-02-13 08:34:19 +01:00

519 lines
18 KiB
C++

/* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup gpu
*/
#include "mtl_index_buffer.hh"
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "BLI_span.hh"
namespace blender::gpu {
/* -------------------------------------------------------------------- */
/** \name Core MTLIndexBuf implementation.
* \{ */
MTLIndexBuf::~MTLIndexBuf()
{
if (ibo_ != nullptr && !this->is_subrange_) {
ibo_->free();
}
this->free_optimized_buffer();
}
void MTLIndexBuf::free_optimized_buffer()
{
if (optimized_ibo_) {
optimized_ibo_->free();
optimized_ibo_ = nullptr;
}
}
void MTLIndexBuf::bind_as_ssbo(uint32_t binding)
{
/* Flag buffer as incompatible with optimized/patched buffers as contents
* can now have partial modifications from the GPU. */
this->flag_can_optimize(false);
this->free_optimized_buffer();
/* Ensure we have a valid IBO. */
BLI_assert(this->ibo_);
/* TODO(Metal): Support index buffer SSBO's. Dependent on compute implementation. */
MTL_LOG_WARNING("MTLIndexBuf::bind_as_ssbo not yet implemented!\n");
}
void MTLIndexBuf::read(uint32_t *data) const
{
if (ibo_ != nullptr) {
void *host_ptr = ibo_->get_host_ptr();
memcpy(data, host_ptr, size_get());
}
BLI_assert(false && "Index buffer not ready to be read.");
}
void MTLIndexBuf::upload_data()
{
/* Handle sub-range upload. */
if (is_subrange_) {
MTLIndexBuf *mtlsrc = static_cast<MTLIndexBuf *>(src_);
mtlsrc->upload_data();
#ifndef NDEBUG
BLI_assert_msg(!mtlsrc->point_restarts_stripped_,
"Cannot use sub-range on stripped point buffer.");
#endif
/* If parent sub-range allocation has changed,
* update our index buffer. */
if (alloc_size_ != mtlsrc->alloc_size_ || ibo_ != mtlsrc->ibo_) {
/* Update index buffer and allocation from source. */
alloc_size_ = mtlsrc->alloc_size_;
ibo_ = mtlsrc->ibo_;
/* Reset any allocated patched or optimized index buffers. */
this->free_optimized_buffer();
}
return;
}
/* If new data ready, and index buffer already exists, release current. */
if ((ibo_ != nullptr) && (this->data_ != nullptr)) {
MTL_LOG_INFO("Re-creating index buffer with new data. IndexBuf %p\n", this);
ibo_->free();
ibo_ = nullptr;
}
/* Prepare Buffer and Upload Data. */
if (ibo_ == nullptr && data_ != nullptr) {
alloc_size_ = this->size_get();
if (alloc_size_ == 0) {
MTL_LOG_WARNING("[Metal] Warning! Trying to allocate index buffer with size=0 bytes\n");
}
else {
ibo_ = MTLContext::get_global_memory_manager()->allocate_with_data(alloc_size_, true, data_);
BLI_assert(ibo_);
ibo_->set_label(@"Index Buffer");
}
/* No need to keep copy of data_ in system memory. */
MEM_SAFE_FREE(data_);
}
}
void MTLIndexBuf::update_sub(uint32_t start, uint32_t len, const void *data)
{
BLI_assert(!is_subrange_);
/* If host-side data still exists, modify and upload as normal */
if (data_ != nullptr) {
/* Free index buffer if one exists. */
if (ibo_ != nullptr && !this->is_subrange_) {
ibo_->free();
ibo_ = nullptr;
}
BLI_assert(start + len < this->size_get());
/* Apply start byte offset to data pointer. */
void *modified_base_ptr = data_;
uint8_t *ptr = static_cast<uint8_t *>(modified_base_ptr);
ptr += start;
modified_base_ptr = static_cast<void *>(ptr);
/* Modify host-side data. */
memcpy(modified_base_ptr, data, len);
return;
}
/* Verify buffer. */
BLI_assert(ibo_ != nullptr);
/* Otherwise, we will inject a data update, using staged data, into the command stream.
* Stage update contents in temporary buffer. */
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
MTLTemporaryBuffer range = ctx->get_scratchbuffer_manager().scratch_buffer_allocate_range(len);
memcpy(range.data, data, len);
/* Copy updated contents into primary buffer.
* These changes need to be uploaded via blit to ensure the data copies happen in-order. */
id<MTLBuffer> dest_buffer = ibo_->get_metal_buffer();
BLI_assert(dest_buffer != nil);
id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
[enc copyFromBuffer:range.metal_buffer
sourceOffset:(uint32_t)range.buffer_offset
toBuffer:dest_buffer
destinationOffset:start
size:len];
/* Synchronize changes back to host to ensure CPU-side data is up-to-date for non
* Shared buffers. */
if (dest_buffer.storageMode == MTLStorageModeManaged) {
[enc synchronizeResource:dest_buffer];
}
/* Invalidate patched/optimized buffers. */
this->free_optimized_buffer();
/* Flag buffer as incompatible with optimized/patched buffers as contents
* have partial modifications. */
this->flag_can_optimize(false);
BLI_assert(false);
}
void MTLIndexBuf::flag_can_optimize(bool can_optimize)
{
can_optimize_ = can_optimize;
}
/** \} */
/** \name Index buffer optimization and topology emulation
*
* Index buffer optimization and emulation. Optimize index buffers by
* eliminating restart-indices.
* Emulate unsupported index types e.g. Triangle Fan and Line Loop.
* \{ */
/* Returns total vertices in new buffer. */
template<typename T>
static uint32_t populate_optimized_tri_strip_buf(Span<T> original_data,
MutableSpan<T> output_data,
uint32_t input_index_len)
{
/* Generate #TriangleList from #TriangleStrip. */
uint32_t current_vert_len = 0;
uint32_t current_output_ind = 0;
T indices[3];
for (int c_index = 0; c_index < input_index_len; c_index++) {
T current_index = original_data[c_index];
if (current_index == T(-1)) {
/* Stop current primitive. Move onto next. */
current_vert_len = 0;
}
else {
if (current_vert_len < 3) {
/* Prepare first triangle.
* Cache indices before generating a triangle, in case we have bad primitive-restarts. */
indices[current_vert_len] = current_index;
}
/* Emit triangle once we reach 3 input verts in current strip. */
if (current_vert_len == 3) {
/* First triangle in strip. */
output_data[current_output_ind++] = indices[0];
output_data[current_output_ind++] = indices[1];
output_data[current_output_ind++] = indices[2];
}
else if (current_vert_len > 3) {
/* All other triangles in strip.
* These triangles are populated using data from previous 2 vertices
* and the latest index. */
uint32_t tri_id = current_vert_len - 3;
uint32_t base_output_ind = current_output_ind;
if ((tri_id % 2) == 0) {
output_data[base_output_ind + 0] = output_data[base_output_ind - 2];
output_data[base_output_ind + 1] = current_index;
output_data[base_output_ind + 2] = output_data[base_output_ind - 1];
}
else {
output_data[base_output_ind + 0] = output_data[base_output_ind - 1];
output_data[base_output_ind + 1] = output_data[base_output_ind - 2];
output_data[base_output_ind + 2] = current_index;
}
current_output_ind += 3;
}
/* Increment relative vertex index. */
current_vert_len++;
}
}
return current_output_ind;
}
/* Returns total vertices in new buffer. */
template<typename T>
static uint32_t populate_emulated_tri_fan_buf(Span<T> original_data,
MutableSpan<T> output_data,
uint32_t input_index_len)
{
/* Generate #TriangleList from #TriangleFan. */
T base_prim_ind_val = 0;
uint32_t current_vert_len = 0;
uint32_t current_output_ind = 0;
T indices[3];
for (int c_index = 0; c_index < input_index_len; c_index++) {
T current_index = original_data[c_index];
if (current_index == T(-1)) {
/* Stop current primitive. Move onto next. */
current_vert_len = 0;
}
else {
if (current_vert_len < 3) {
/* Prepare first triangle.
* Cache indices before generating a triangle, in case we have bad primitive-restarts. */
indices[current_vert_len] = current_index;
}
/* emit triangle once we reach 3 input verts in current strip. */
if (current_vert_len == 3) {
/* First triangle in strip. */
output_data[current_output_ind++] = indices[0];
output_data[current_output_ind++] = indices[1];
output_data[current_output_ind++] = indices[2];
base_prim_ind_val = indices[0];
}
else if (current_vert_len > 3) {
/* All other triangles in strip.
* These triangles are populated using data from previous 2 vertices
* and the latest index. */
uint32_t base_output_ind = current_output_ind;
output_data[base_output_ind + 0] = base_prim_ind_val;
output_data[base_output_ind + 1] = output_data[base_output_ind - 1];
output_data[base_output_ind + 2] = current_index;
current_output_ind += 3;
}
/* Increment relative vertex index. */
current_vert_len++;
}
}
return current_output_ind;
}
id<MTLBuffer> MTLIndexBuf::get_index_buffer(GPUPrimType &in_out_primitive_type,
uint32_t &in_out_v_count)
{
/* Determine whether to return the original index buffer, or whether we
* should emulate an unsupported primitive type, or optimize a restart-
* compatible type for faster performance. */
bool should_optimize_or_emulate = (in_out_primitive_type == GPU_PRIM_TRI_FAN) ||
(in_out_primitive_type == GPU_PRIM_TRI_STRIP);
if (!should_optimize_or_emulate || is_subrange_ || !can_optimize_) {
/* Ensure we are not optimized. */
BLI_assert(this->optimized_ibo_ == nullptr);
/* Return regular index buffer. */
BLI_assert(this->ibo_ && this->ibo_->get_metal_buffer());
return this->ibo_->get_metal_buffer();
}
/* Perform optimization on type. */
GPUPrimType input_prim_type = in_out_primitive_type;
this->upload_data();
if (!ibo_ && optimized_ibo_ == nullptr) {
/* Cannot optimize buffer if no source IBO exists. */
return nil;
}
/* Verify whether existing index buffer is valid. */
if (optimized_ibo_ != nullptr && optimized_primitive_type_ != input_prim_type) {
BLI_assert_msg(false,
"Cannot change the optimized primitive format after generation, as source "
"index buffer data is discarded.");
return nil;
}
/* Generate optimized index buffer. */
if (optimized_ibo_ == nullptr) {
/* Generate unwrapped index buffer. */
switch (input_prim_type) {
case GPU_PRIM_TRI_FAN: {
/* Calculate maximum size. */
uint32_t max_possible_verts = (this->index_len_ - 2) * 3;
BLI_assert(max_possible_verts > 0);
/* Allocate new buffer. */
optimized_ibo_ = MTLContext::get_global_memory_manager()->allocate(
max_possible_verts *
((index_type_ == GPU_INDEX_U16) ? sizeof(uint16_t) : sizeof(uint32_t)),
true);
/* Populate new index buffer. */
if (index_type_ == GPU_INDEX_U16) {
Span<uint16_t> orig_data(static_cast<const uint16_t *>(ibo_->get_host_ptr()),
this->index_len_);
MutableSpan<uint16_t> output_data(
static_cast<uint16_t *>(optimized_ibo_->get_host_ptr()), max_possible_verts);
emulated_v_count = populate_emulated_tri_fan_buf<uint16_t>(
orig_data, output_data, this->index_len_);
}
else {
Span<uint32_t> orig_data(static_cast<const uint32_t *>(ibo_->get_host_ptr()),
this->index_len_);
MutableSpan<uint32_t> output_data(
static_cast<uint32_t *>(optimized_ibo_->get_host_ptr()), max_possible_verts);
emulated_v_count = populate_emulated_tri_fan_buf<uint32_t>(
orig_data, output_data, this->index_len_);
}
BLI_assert(emulated_v_count <= max_possible_verts);
/* Flush buffer and output. */
optimized_ibo_->flush();
optimized_primitive_type_ = input_prim_type;
in_out_v_count = emulated_v_count;
in_out_primitive_type = GPU_PRIM_TRIS;
}
case GPU_PRIM_TRI_STRIP: {
/* Calculate maximum size. */
uint32_t max_possible_verts = (this->index_len_ - 2) * 3;
BLI_assert(max_possible_verts > 0);
/* Allocate new buffer. */
optimized_ibo_ = MTLContext::get_global_memory_manager()->allocate(
max_possible_verts *
((index_type_ == GPU_INDEX_U16) ? sizeof(uint16_t) : sizeof(uint32_t)),
true);
/* Populate new index buffer. */
if (index_type_ == GPU_INDEX_U16) {
Span<uint16_t> orig_data(static_cast<const uint16_t *>(ibo_->get_host_ptr()),
this->index_len_);
MutableSpan<uint16_t> output_data(
static_cast<uint16_t *>(optimized_ibo_->get_host_ptr()), max_possible_verts);
emulated_v_count = populate_optimized_tri_strip_buf<uint16_t>(
orig_data, output_data, this->index_len_);
}
else {
Span<uint32_t> orig_data(static_cast<const uint32_t *>(ibo_->get_host_ptr()),
this->index_len_);
MutableSpan<uint32_t> output_data(
static_cast<uint32_t *>(optimized_ibo_->get_host_ptr()), max_possible_verts);
emulated_v_count = populate_optimized_tri_strip_buf<uint32_t>(
orig_data, output_data, this->index_len_);
}
BLI_assert(emulated_v_count <= max_possible_verts);
/* Flush buffer and output. */
optimized_ibo_->flush();
optimized_primitive_type_ = input_prim_type;
in_out_v_count = emulated_v_count;
in_out_primitive_type = GPU_PRIM_TRIS;
} break;
case GPU_PRIM_LINE_STRIP: {
/* TODO(Metal): Line strip topology types would benefit from optimization to remove
* primitive restarts, however, these do not occur frequently, nor with
* significant geometry counts. */
MTL_LOG_INFO("TODO: Primitive topology: Optimize line strip topology types\n");
} break;
case GPU_PRIM_LINE_LOOP: {
/* TODO(Metal): Line Loop primitive type requires use of optimized index buffer for
* emulation, if used with indexed rendering. This path is currently not hit as #LineLoop
* does not currently appear to be used alongside an index buffer. */
MTL_LOG_WARNING(
"TODO: Primitive topology: Line Loop Index buffer optimization required for "
"emulation.\n");
} break;
case GPU_PRIM_TRIS:
case GPU_PRIM_LINES:
case GPU_PRIM_POINTS: {
/* Should not get here - TRIS/LINES/POINTS do not require emulation or optimization. */
BLI_assert_unreachable();
return nil;
}
default:
/* Should not get here - Invalid primitive type. */
BLI_assert_unreachable();
break;
}
}
/* Return optimized buffer. */
if (optimized_ibo_ != nullptr) {
/* Delete original buffer if one still exists, as we do no need it. */
if (ibo_ != nullptr) {
ibo_->free();
ibo_ = nullptr;
}
/* Output params. */
in_out_v_count = emulated_v_count;
in_out_primitive_type = GPU_PRIM_TRIS;
return optimized_ibo_->get_metal_buffer();
}
return nil;
}
void MTLIndexBuf::strip_restart_indices()
{
/* We remove point buffer primitive restart indices by swapping restart indices
* with the first valid index at the end of the index buffer and reducing the
* length. Primitive restarts are invalid in Metal for non-restart-compatible
* primitive types. We also cannot just use zero unlike for Lines and Triangles,
* as we cannot create de-generative point primitives to hide geometry, as each
* point is independent.
* Instead, we must remove these hidden indices from the index buffer.
* NOTE: This happens prior to index squeezing so operate on 32-bit indices. */
MutableSpan<uint32_t> uint_idx(static_cast<uint32_t *>(data_), index_len_);
for (uint i = 0; i < index_len_; i++) {
if (uint_idx[i] == 0xFFFFFFFFu) {
/* Find swap index at end of index buffer. */
int swap_index = -1;
for (uint j = index_len_ - 1; j >= i && index_len_ > 0; j--) {
/* If end index is restart, just reduce length. */
if (uint_idx[j] == 0xFFFFFFFFu) {
index_len_--;
continue;
}
/* Otherwise assign swap index. */
swap_index = j;
break;
}
/* If index_len_ == 0, this means all indices were flagged as hidden, with restart index
* values. Hence we will entirely skip the draw. */
if (index_len_ > 0) {
/* If swap index is not valid, then there were no valid non-restart indices
* to swap with. However, the above loop will have removed these indices by
* reducing the length of indices. Debug assertions verify that the restart
* index is no longer included. */
if (swap_index == -1) {
BLI_assert(index_len_ <= i);
}
else {
/* If we have found an index we can swap with, flip the values.
* We also reduce the length. As per above loop, swap_index should
* now be outside the index length range. */
uint32_t swap_index_value = uint_idx[swap_index];
uint_idx[i] = swap_index_value;
uint_idx[swap_index] = 0xFFFFFFFFu;
index_len_--;
BLI_assert(index_len_ <= swap_index);
}
}
}
}
#ifndef NDEBUG
/* Flag as having been stripped to ensure invalid usage is tracked. */
point_restarts_stripped_ = true;
#endif
}
/** \} */
} // blender::gpu