518 lines
18 KiB
C++
518 lines
18 KiB
C++
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
/** \file
|
|
* \ingroup gpu
|
|
*/
|
|
|
|
#include "mtl_index_buffer.hh"
|
|
#include "mtl_context.hh"
|
|
#include "mtl_debug.hh"
|
|
|
|
#include "BLI_span.hh"
|
|
|
|
namespace blender::gpu {
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/** \name Core MTLIndexBuf implementation.
|
|
* \{ */
|
|
|
|
MTLIndexBuf::~MTLIndexBuf()
|
|
{
|
|
if (ibo_ != nullptr && !this->is_subrange_) {
|
|
ibo_->free();
|
|
}
|
|
this->free_optimized_buffer();
|
|
}
|
|
|
|
void MTLIndexBuf::free_optimized_buffer()
|
|
{
|
|
if (optimized_ibo_) {
|
|
optimized_ibo_->free();
|
|
optimized_ibo_ = nullptr;
|
|
}
|
|
}
|
|
|
|
void MTLIndexBuf::bind_as_ssbo(uint32_t binding)
|
|
{
|
|
/* Flag buffer as incompatible with optimized/patched buffers as contents
|
|
* can now have partial modifications from the GPU. */
|
|
this->flag_can_optimize(false);
|
|
this->free_optimized_buffer();
|
|
|
|
/* Ensure we have a valid IBO. */
|
|
BLI_assert(this->ibo_);
|
|
|
|
/* TODO(Metal): Support index buffer SSBO's. Dependent on compute implementation. */
|
|
MTL_LOG_WARNING("MTLIndexBuf::bind_as_ssbo not yet implemented!\n");
|
|
}
|
|
|
|
const uint32_t *MTLIndexBuf::read() const
|
|
{
|
|
if (ibo_ != nullptr) {
|
|
|
|
/* Return host pointer. */
|
|
void *data = ibo_->get_host_ptr();
|
|
return static_cast<uint32_t *>(data);
|
|
}
|
|
BLI_assert(false && "Index buffer not ready to be read.");
|
|
return nullptr;
|
|
}
|
|
|
|
void MTLIndexBuf::upload_data()
|
|
{
|
|
/* Handle sub-range upload. */
|
|
if (is_subrange_) {
|
|
MTLIndexBuf *mtlsrc = static_cast<MTLIndexBuf *>(src_);
|
|
mtlsrc->upload_data();
|
|
|
|
#ifndef NDEBUG
|
|
BLI_assert_msg(!mtlsrc->point_restarts_stripped_,
|
|
"Cannot use sub-range on stripped point buffer.");
|
|
#endif
|
|
|
|
/* If parent sub-range allocation has changed,
|
|
* update our index buffer. */
|
|
if (alloc_size_ != mtlsrc->alloc_size_ || ibo_ != mtlsrc->ibo_) {
|
|
|
|
/* Update index buffer and allocation from source. */
|
|
alloc_size_ = mtlsrc->alloc_size_;
|
|
ibo_ = mtlsrc->ibo_;
|
|
|
|
/* Reset any allocated patched or optimized index buffers. */
|
|
this->free_optimized_buffer();
|
|
}
|
|
return;
|
|
}
|
|
|
|
/* If new data ready, and index buffer already exists, release current. */
|
|
if ((ibo_ != nullptr) && (this->data_ != nullptr)) {
|
|
MTL_LOG_INFO("Re-creating index buffer with new data. IndexBuf %p\n", this);
|
|
ibo_->free();
|
|
ibo_ = nullptr;
|
|
}
|
|
|
|
/* Prepare Buffer and Upload Data. */
|
|
if (ibo_ == nullptr && data_ != nullptr) {
|
|
alloc_size_ = this->size_get();
|
|
if (alloc_size_ == 0) {
|
|
MTL_LOG_WARNING("[Metal] Warning! Trying to allocate index buffer with size=0 bytes\n");
|
|
}
|
|
else {
|
|
ibo_ = MTLContext::get_global_memory_manager().allocate_with_data(alloc_size_, true, data_);
|
|
BLI_assert(ibo_);
|
|
ibo_->set_label(@"Index Buffer");
|
|
}
|
|
|
|
/* No need to keep copy of data_ in system memory. */
|
|
MEM_SAFE_FREE(data_);
|
|
}
|
|
}
|
|
|
|
void MTLIndexBuf::update_sub(uint32_t start, uint32_t len, const void *data)
|
|
{
|
|
BLI_assert(!is_subrange_);
|
|
|
|
/* If host-side data still exists, modify and upload as normal */
|
|
if (data_ != nullptr) {
|
|
|
|
/* Free index buffer if one exists. */
|
|
if (ibo_ != nullptr && !this->is_subrange_) {
|
|
ibo_->free();
|
|
ibo_ = nullptr;
|
|
}
|
|
|
|
BLI_assert(start + len < this->size_get());
|
|
|
|
/* Apply start byte offset to data pointer. */
|
|
void *modified_base_ptr = data_;
|
|
uint8_t *ptr = static_cast<uint8_t *>(modified_base_ptr);
|
|
ptr += start;
|
|
modified_base_ptr = static_cast<void *>(ptr);
|
|
|
|
/* Modify host-side data. */
|
|
memcpy(modified_base_ptr, data, len);
|
|
return;
|
|
}
|
|
|
|
/* Verify buffer. */
|
|
BLI_assert(ibo_ != nullptr);
|
|
|
|
/* Otherwise, we will inject a data update, using staged data, into the command stream.
|
|
* Stage update contents in temporary buffer. */
|
|
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
|
|
BLI_assert(ctx);
|
|
MTLTemporaryBuffer range = ctx->get_scratchbuffer_manager().scratch_buffer_allocate_range(len);
|
|
memcpy(range.data, data, len);
|
|
|
|
/* Copy updated contents into primary buffer.
|
|
* These changes need to be uploaded via blit to ensure the data copies happen in-order. */
|
|
id<MTLBuffer> dest_buffer = ibo_->get_metal_buffer();
|
|
BLI_assert(dest_buffer != nil);
|
|
|
|
id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
|
|
[enc copyFromBuffer:range.metal_buffer
|
|
sourceOffset:(uint32_t)range.buffer_offset
|
|
toBuffer:dest_buffer
|
|
destinationOffset:start
|
|
size:len];
|
|
|
|
/* Synchronize changes back to host to ensure CPU-side data is up-to-date for non
|
|
* Shared buffers. */
|
|
if (dest_buffer.storageMode == MTLStorageModeManaged) {
|
|
[enc synchronizeResource:dest_buffer];
|
|
}
|
|
|
|
/* Invalidate patched/optimized buffers. */
|
|
this->free_optimized_buffer();
|
|
|
|
/* Flag buffer as incompatible with optimized/patched buffers as contents
|
|
* have partial modifications. */
|
|
this->flag_can_optimize(false);
|
|
|
|
BLI_assert(false);
|
|
}
|
|
|
|
void MTLIndexBuf::flag_can_optimize(bool can_optimize)
|
|
{
|
|
can_optimize_ = can_optimize;
|
|
}
|
|
|
|
/** \} */
|
|
|
|
/** \name Index buffer optimization and topology emulation
|
|
*
|
|
* Index buffer optimization and emulation. Optimize index buffers by
|
|
* eliminating restart-indices.
|
|
* Emulate unsupported index types e.g. Triangle Fan and Line Loop.
|
|
* \{ */
|
|
|
|
/* Returns total vertices in new buffer. */
|
|
template<typename T>
|
|
static uint32_t populate_optimized_tri_strip_buf(Span<T> original_data,
|
|
MutableSpan<T> output_data,
|
|
uint32_t input_index_len)
|
|
{
|
|
/* Generate #TriangleList from #TriangleStrip. */
|
|
uint32_t current_vert_len = 0;
|
|
uint32_t current_output_ind = 0;
|
|
T indices[3];
|
|
|
|
for (int c_index = 0; c_index < input_index_len; c_index++) {
|
|
T current_index = original_data[c_index];
|
|
if (current_index == T(-1)) {
|
|
/* Stop current primitive. Move onto next. */
|
|
current_vert_len = 0;
|
|
}
|
|
else {
|
|
if (current_vert_len < 3) {
|
|
/* Prepare first triangle.
|
|
* Cache indices before generating a triangle, in case we have bad primitive-restarts. */
|
|
indices[current_vert_len] = current_index;
|
|
}
|
|
|
|
/* Emit triangle once we reach 3 input verts in current strip. */
|
|
if (current_vert_len == 3) {
|
|
/* First triangle in strip. */
|
|
output_data[current_output_ind++] = indices[0];
|
|
output_data[current_output_ind++] = indices[1];
|
|
output_data[current_output_ind++] = indices[2];
|
|
}
|
|
else if (current_vert_len > 3) {
|
|
/* All other triangles in strip.
|
|
* These triangles are populated using data from previous 2 vertices
|
|
* and the latest index. */
|
|
uint32_t tri_id = current_vert_len - 3;
|
|
uint32_t base_output_ind = current_output_ind;
|
|
if ((tri_id % 2) == 0) {
|
|
output_data[base_output_ind + 0] = output_data[base_output_ind - 2];
|
|
output_data[base_output_ind + 1] = current_index;
|
|
output_data[base_output_ind + 2] = output_data[base_output_ind - 1];
|
|
}
|
|
else {
|
|
output_data[base_output_ind + 0] = output_data[base_output_ind - 1];
|
|
output_data[base_output_ind + 1] = output_data[base_output_ind - 2];
|
|
output_data[base_output_ind + 2] = current_index;
|
|
}
|
|
current_output_ind += 3;
|
|
}
|
|
|
|
/* Increment relative vertex index. */
|
|
current_vert_len++;
|
|
}
|
|
}
|
|
return current_output_ind;
|
|
}
|
|
|
|
/* Returns total vertices in new buffer. */
|
|
template<typename T>
|
|
static uint32_t populate_emulated_tri_fan_buf(Span<T> original_data,
|
|
MutableSpan<T> output_data,
|
|
uint32_t input_index_len)
|
|
{
|
|
/* Generate #TriangleList from #TriangleFan. */
|
|
T base_prim_ind_val = 0;
|
|
uint32_t current_vert_len = 0;
|
|
uint32_t current_output_ind = 0;
|
|
T indices[3];
|
|
|
|
for (int c_index = 0; c_index < input_index_len; c_index++) {
|
|
T current_index = original_data[c_index];
|
|
if (current_index == T(-1)) {
|
|
/* Stop current primitive. Move onto next. */
|
|
current_vert_len = 0;
|
|
}
|
|
else {
|
|
if (current_vert_len < 3) {
|
|
/* Prepare first triangle.
|
|
* Cache indices before generating a triangle, in case we have bad primitive-restarts. */
|
|
indices[current_vert_len] = current_index;
|
|
}
|
|
|
|
/* emit triangle once we reach 3 input verts in current strip. */
|
|
if (current_vert_len == 3) {
|
|
/* First triangle in strip. */
|
|
output_data[current_output_ind++] = indices[0];
|
|
output_data[current_output_ind++] = indices[1];
|
|
output_data[current_output_ind++] = indices[2];
|
|
base_prim_ind_val = indices[0];
|
|
}
|
|
else if (current_vert_len > 3) {
|
|
/* All other triangles in strip.
|
|
* These triangles are populated using data from previous 2 vertices
|
|
* and the latest index. */
|
|
uint32_t base_output_ind = current_output_ind;
|
|
|
|
output_data[base_output_ind + 0] = base_prim_ind_val;
|
|
output_data[base_output_ind + 1] = output_data[base_output_ind - 1];
|
|
output_data[base_output_ind + 2] = current_index;
|
|
current_output_ind += 3;
|
|
}
|
|
|
|
/* Increment relative vertex index. */
|
|
current_vert_len++;
|
|
}
|
|
}
|
|
return current_output_ind;
|
|
}
|
|
|
|
id<MTLBuffer> MTLIndexBuf::get_index_buffer(GPUPrimType &in_out_primitive_type,
|
|
uint32_t &in_out_v_count)
|
|
{
|
|
/* Determine whether to return the original index buffer, or whether we
|
|
* should emulate an unsupported primitive type, or optimize a restart-
|
|
* compatible type for faster performance. */
|
|
bool should_optimize_or_emulate = (in_out_primitive_type == GPU_PRIM_TRI_FAN) ||
|
|
(in_out_primitive_type == GPU_PRIM_TRI_STRIP);
|
|
if (!should_optimize_or_emulate || is_subrange_ || !can_optimize_) {
|
|
/* Ensure we are not optimized. */
|
|
BLI_assert(this->optimized_ibo_ == nullptr);
|
|
|
|
/* Return regular index buffer. */
|
|
BLI_assert(this->ibo_ && this->ibo_->get_metal_buffer());
|
|
return this->ibo_->get_metal_buffer();
|
|
}
|
|
|
|
/* Perform optimization on type. */
|
|
GPUPrimType input_prim_type = in_out_primitive_type;
|
|
this->upload_data();
|
|
if (!ibo_ && optimized_ibo_ == nullptr) {
|
|
/* Cannot optimize buffer if no source IBO exists. */
|
|
return nil;
|
|
}
|
|
|
|
/* Verify whether existing index buffer is valid. */
|
|
if (optimized_ibo_ != nullptr && optimized_primitive_type_ != input_prim_type) {
|
|
BLI_assert_msg(false,
|
|
"Cannot change the optimized primitive format after generation, as source "
|
|
"index buffer data is discarded.");
|
|
return nil;
|
|
}
|
|
|
|
/* Generate optimized index buffer. */
|
|
if (optimized_ibo_ == nullptr) {
|
|
|
|
/* Generate unwrapped index buffer. */
|
|
switch (input_prim_type) {
|
|
case GPU_PRIM_TRI_FAN: {
|
|
|
|
/* Calculate maximum size. */
|
|
uint32_t max_possible_verts = (this->index_len_ - 2) * 3;
|
|
BLI_assert(max_possible_verts > 0);
|
|
|
|
/* Allocate new buffer. */
|
|
optimized_ibo_ = MTLContext::get_global_memory_manager().allocate(
|
|
max_possible_verts *
|
|
((index_type_ == GPU_INDEX_U16) ? sizeof(uint16_t) : sizeof(uint32_t)),
|
|
true);
|
|
|
|
/* Populate new index buffer. */
|
|
if (index_type_ == GPU_INDEX_U16) {
|
|
Span<uint16_t> orig_data(static_cast<const uint16_t *>(ibo_->get_host_ptr()),
|
|
this->index_len_);
|
|
MutableSpan<uint16_t> output_data(
|
|
static_cast<uint16_t *>(optimized_ibo_->get_host_ptr()), this->index_len_);
|
|
emulated_v_count = populate_emulated_tri_fan_buf<uint16_t>(
|
|
orig_data, output_data, this->index_len_);
|
|
}
|
|
else {
|
|
Span<uint32_t> orig_data(static_cast<const uint32_t *>(ibo_->get_host_ptr()),
|
|
this->index_len_);
|
|
MutableSpan<uint32_t> output_data(
|
|
static_cast<uint32_t *>(optimized_ibo_->get_host_ptr()), this->index_len_);
|
|
emulated_v_count = populate_emulated_tri_fan_buf<uint32_t>(
|
|
orig_data, output_data, this->index_len_);
|
|
}
|
|
|
|
BLI_assert(emulated_v_count <= max_possible_verts);
|
|
|
|
/* Flush buffer and output. */
|
|
optimized_ibo_->flush();
|
|
optimized_primitive_type_ = input_prim_type;
|
|
in_out_v_count = emulated_v_count;
|
|
in_out_primitive_type = GPU_PRIM_TRIS;
|
|
}
|
|
|
|
case GPU_PRIM_TRI_STRIP: {
|
|
|
|
/* Calculate maximum size. */
|
|
uint32_t max_possible_verts = (this->index_len_ - 2) * 3;
|
|
BLI_assert(max_possible_verts > 0);
|
|
|
|
/* Allocate new buffer. */
|
|
optimized_ibo_ = MTLContext::get_global_memory_manager().allocate(
|
|
max_possible_verts *
|
|
((index_type_ == GPU_INDEX_U16) ? sizeof(uint16_t) : sizeof(uint32_t)),
|
|
true);
|
|
|
|
/* Populate new index buffer. */
|
|
if (index_type_ == GPU_INDEX_U16) {
|
|
Span<uint16_t> orig_data(static_cast<const uint16_t *>(ibo_->get_host_ptr()),
|
|
this->index_len_);
|
|
MutableSpan<uint16_t> output_data(
|
|
static_cast<uint16_t *>(optimized_ibo_->get_host_ptr()), this->index_len_);
|
|
emulated_v_count = populate_optimized_tri_strip_buf<uint16_t>(
|
|
orig_data, output_data, this->index_len_);
|
|
}
|
|
else {
|
|
Span<uint32_t> orig_data(static_cast<const uint32_t *>(ibo_->get_host_ptr()),
|
|
this->index_len_);
|
|
MutableSpan<uint32_t> output_data(
|
|
static_cast<uint32_t *>(optimized_ibo_->get_host_ptr()), this->index_len_);
|
|
emulated_v_count = populate_optimized_tri_strip_buf<uint32_t>(
|
|
orig_data, output_data, this->index_len_);
|
|
}
|
|
|
|
BLI_assert(emulated_v_count <= max_possible_verts);
|
|
|
|
/* Flush buffer and output. */
|
|
optimized_ibo_->flush();
|
|
optimized_primitive_type_ = input_prim_type;
|
|
in_out_v_count = emulated_v_count;
|
|
in_out_primitive_type = GPU_PRIM_TRIS;
|
|
} break;
|
|
|
|
case GPU_PRIM_LINE_STRIP: {
|
|
/* TODO(Metal): Line strip topology types would benefit from optimization to remove
|
|
* primitive restarts, however, these do not occur frequently, nor with
|
|
* significant geometry counts. */
|
|
MTL_LOG_INFO("TODO: Primitive topology: Optimize line strip topology types\n");
|
|
} break;
|
|
|
|
case GPU_PRIM_LINE_LOOP: {
|
|
/* TODO(Metal): Line Loop primitive type requires use of optimized index buffer for
|
|
* emulation, if used with indexed rendering. This path is currently not hit as #LineLoop
|
|
* does not currently appear to be used alongside an index buffer. */
|
|
MTL_LOG_WARNING(
|
|
"TODO: Primitive topology: Line Loop Index buffer optimization required for "
|
|
"emulation.\n");
|
|
} break;
|
|
|
|
case GPU_PRIM_TRIS:
|
|
case GPU_PRIM_LINES:
|
|
case GPU_PRIM_POINTS: {
|
|
/* Should not get here - TRIS/LINES/POINTS do not require emulation or optimization. */
|
|
BLI_assert_unreachable();
|
|
return nil;
|
|
}
|
|
|
|
default:
|
|
/* Should not get here - Invalid primitive type. */
|
|
BLI_assert_unreachable();
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Return optimized buffer. */
|
|
if (optimized_ibo_ != nullptr) {
|
|
|
|
/* Delete original buffer if one still exists, as we do no need it. */
|
|
if (ibo_ != nullptr) {
|
|
ibo_->free();
|
|
ibo_ = nullptr;
|
|
}
|
|
|
|
/* Output params. */
|
|
in_out_v_count = emulated_v_count;
|
|
in_out_primitive_type = GPU_PRIM_TRIS;
|
|
return optimized_ibo_->get_metal_buffer();
|
|
}
|
|
return nil;
|
|
}
|
|
|
|
void MTLIndexBuf::strip_restart_indices()
|
|
{
|
|
/* We remove point buffer primitive restart indices by swapping restart indices
|
|
* with the first valid index at the end of the index buffer and reducing the
|
|
* length. Primitive restarts are invalid in Metal for non-restart-compatible
|
|
* primitive types. We also cannot just use zero unlike for Lines and Triangles,
|
|
* as we cannot create de-generative point primitives to hide geometry, as each
|
|
* point is independent.
|
|
* Instead, we must remove these hidden indices from the index buffer.
|
|
* NOTE: This happens prior to index squeezing so operate on 32-bit indices. */
|
|
MutableSpan<uint32_t> uint_idx(static_cast<uint32_t *>(data_), index_len_);
|
|
for (uint i = 0; i < index_len_; i++) {
|
|
if (uint_idx[i] == 0xFFFFFFFFu) {
|
|
|
|
/* Find swap index at end of index buffer. */
|
|
int swap_index = -1;
|
|
for (uint j = index_len_ - 1; j >= i; j--) {
|
|
/* If end index is restart, just reduce length. */
|
|
if (uint_idx[j] == 0xFFFFFFFFu) {
|
|
index_len_--;
|
|
continue;
|
|
}
|
|
/* Otherwise assign swap index. */
|
|
swap_index = j;
|
|
break;
|
|
}
|
|
|
|
/* If swap index is not valid, then there were no valid non-restart indices
|
|
* to swap with. However, the above loop will have removed these indices by
|
|
* reducing the length of indices. Debug assertions verify that the restart
|
|
* index is no longer included. */
|
|
if (swap_index == -1) {
|
|
BLI_assert(index_len_ <= i);
|
|
}
|
|
else {
|
|
/* If we have found an index we can swap with, flip the values.
|
|
* We also reduce the length. As per above loop, swap_index should
|
|
* now be outside the index length range. */
|
|
uint32_t swap_index_value = uint_idx[swap_index];
|
|
uint_idx[i] = swap_index_value;
|
|
uint_idx[swap_index] = 0xFFFFFFFFu;
|
|
index_len_--;
|
|
BLI_assert(index_len_ <= swap_index);
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
/* Flag as having been stripped to ensure invalid usage is tracked. */
|
|
point_restarts_stripped_ = true;
|
|
#endif
|
|
}
|
|
|
|
/** \} */
|
|
|
|
} // blender::gpu
|