Metal: Storage buffer and explicit bind location support #107175

Merged
Clément Foucault merged 4 commits from Jason-Fielder/blender:MetalSSBO_Support_Apr20 into main 2023-05-03 11:46:36 +02:00
27 changed files with 1456 additions and 300 deletions

View File

@ -275,6 +275,7 @@ set(METAL_SRC
metal/mtl_shader_generator.mm
metal/mtl_shader_interface.mm
metal/mtl_state.mm
metal/mtl_storage_buffer.mm
metal/mtl_texture.mm
metal/mtl_texture_util.mm
metal/mtl_uniform_buffer.mm
@ -300,6 +301,7 @@ set(METAL_SRC
metal/mtl_shader_interface_type.hh
metal/mtl_shader_shared.h
metal/mtl_state.hh
metal/mtl_storage_buffer.hh
metal/mtl_texture.hh
metal/mtl_uniform_buffer.hh
metal/mtl_vertex_buffer.hh

View File

@ -52,11 +52,7 @@ class MTLBackend : public GPUBackend {
void samplers_update() override;
void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) override;
void compute_dispatch_indirect(StorageBuf *indirect_buf) override
{
/* Placeholder */
}
void compute_dispatch_indirect(StorageBuf *indirect_buf) override;
/* MTL Allocators need to be implemented in separate .mm files, due to allocation of Objective-C
* objects. */

View File

@ -16,6 +16,7 @@
#include "mtl_index_buffer.hh"
#include "mtl_query.hh"
#include "mtl_shader.hh"
#include "mtl_storage_buffer.hh"
#include "mtl_uniform_buffer.hh"
#include "mtl_vertex_buffer.hh"
@ -100,8 +101,7 @@ UniformBuf *MTLBackend::uniformbuf_alloc(int size, const char *name)
StorageBuf *MTLBackend::storagebuf_alloc(int size, GPUUsageType usage, const char *name)
{
/* TODO(Metal): Implement MTLStorageBuf. */
return nullptr;
return new MTLStorageBuf(size, usage, name);
}
VertBuf *MTLBackend::vertbuf_alloc()
@ -398,16 +398,15 @@ void MTLBackend::capabilities_init(MTLContext *ctx)
GCaps.shader_image_load_store_support = ([device supportsFamily:MTLGPUFamilyApple3] ||
MTLBackend::capabilities.supports_family_mac1 ||
MTLBackend::capabilities.supports_family_mac2);
/* TODO(Metal): Add support? */
GCaps.shader_draw_parameters_support = false;
GCaps.compute_shader_support = true;
GCaps.shader_storage_buffer_objects_support = true;
GCaps.shader_draw_parameters_support = true;
GCaps.geometry_shader_support = false;
GCaps.shader_storage_buffer_objects_support =
false; /* TODO(Metal): implement Storage Buffer support. */
/* Maximum buffer bindings: 31. Consider required slot for uniforms/UBOs/Vertex attributes.
* Can use argument buffers if a higher limit is required. */
GCaps.max_shader_storage_buffer_bindings = 24;
GCaps.max_shader_storage_buffer_bindings = 14;
if (GCaps.compute_shader_support) {
GCaps.max_work_group_count[0] = 65535;
@ -460,6 +459,18 @@ void MTLBackend::compute_dispatch(int groups_x_len, int groups_y_len, int groups
}
}
void MTLBackend::compute_dispatch_indirect(StorageBuf *indirect_buf)
{
/* Fetch Context.
* With Metal, workload submission and resource management occurs within the context.
* Call compute dispatch on valid context. */
MTLContext *ctx = MTLContext::get();
BLI_assert(ctx != nullptr);
if (ctx) {
ctx->compute_dispatch_indirect(indirect_buf);
}
}

Indentation isn't correct.

Indentation isn't correct.
/** \} */
} // blender::gpu

View File

@ -80,10 +80,7 @@ class MTLBatch : public Batch {
~MTLBatch(){};
void draw(int v_first, int v_count, int i_first, int i_count) override;
void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override
{
/* TODO(Metal): Support indirect draw commands. */
}
void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override;
void multi_draw_indirect(GPUStorageBuf *indirect_buf,
int count,
intptr_t offset,
@ -94,7 +91,7 @@ class MTLBatch : public Batch {
/* Returns an initialized RenderComandEncoder for drawing if all is good.
* Otherwise, nil. */
id<MTLRenderCommandEncoder> bind(uint v_first, uint v_count, uint i_first, uint i_count);
id<MTLRenderCommandEncoder> bind(uint v_count);
void unbind();
/* Convenience getters. */
@ -118,6 +115,7 @@ class MTLBatch : public Batch {
private:
void shader_bind();
void draw_advanced(int v_first, int v_count, int i_first, int i_count);
void draw_advanced_indirect(GPUStorageBuf *indirect_buf, intptr_t offset);
int prepare_vertex_binding(MTLVertBuf *verts,
MTLRenderPipelineStateDescriptor &desc,
const MTLShaderInterface *interface,
@ -126,8 +124,7 @@ class MTLBatch : public Batch {
id<MTLBuffer> get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_type, uint32_t &v_count);
void prepare_vertex_descriptor_and_bindings(
MTLVertBuf **buffers, int &num_buffers, int v_first, int v_count, int i_first, int i_count);
void prepare_vertex_descriptor_and_bindings(MTLVertBuf **buffers, int &num_buffers);
MEM_CXX_CLASS_ALLOC_FUNCS("MTLBatch");
};

View File

@ -20,6 +20,7 @@
#include "mtl_debug.hh"
#include "mtl_index_buffer.hh"
#include "mtl_shader.hh"
#include "mtl_storage_buffer.hh"
#include "mtl_vertex_buffer.hh"
#include <string>
@ -37,6 +38,14 @@ void MTLBatch::draw(int v_first, int v_count, int i_first, int i_count)
this->draw_advanced(v_first, v_count, i_first, i_count);
}
void MTLBatch::draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset)
{
if (this->flag & GPU_BATCH_INVALID) {
this->shader_in_use_ = false;
}
this->draw_advanced_indirect(indirect_buf, offset);
}
void MTLBatch::shader_bind()
{
if (active_shader_ && active_shader_->is_valid()) {
@ -394,7 +403,7 @@ int MTLBatch::prepare_vertex_binding(MTLVertBuf *verts,
return -1;
}
id<MTLRenderCommandEncoder> MTLBatch::bind(uint v_first, uint v_count, uint i_first, uint i_count)
id<MTLRenderCommandEncoder> MTLBatch::bind(uint v_count)
{
/* Setup draw call and render pipeline state here. Called by every draw, but setup here so that
* MTLDrawList only needs to perform setup a single time. */
@ -440,7 +449,7 @@ id<MTLRenderCommandEncoder> MTLBatch::bind(uint v_first, uint v_count, uint i_fi
* shader's input.
* A unique vertex descriptor will result in a new PipelineStateObject
* being generated for the currently bound shader. */
prepare_vertex_descriptor_and_bindings(buffers, num_buffers, v_first, v_count, i_first, i_count);
prepare_vertex_descriptor_and_bindings(buffers, num_buffers);
/* Prepare Vertex Buffers - Run before RenderCommandEncoder in case BlitCommandEncoder buffer
* data operations are required. */
@ -585,8 +594,7 @@ id<MTLRenderCommandEncoder> MTLBatch::bind(uint v_first, uint v_count, uint i_fi
void MTLBatch::unbind() {}
void MTLBatch::prepare_vertex_descriptor_and_bindings(
MTLVertBuf **buffers, int &num_buffers, int v_first, int v_count, int i_first, int i_count)
void MTLBatch::prepare_vertex_descriptor_and_bindings(MTLVertBuf **buffers, int &num_buffers)
{
/* Here we populate the MTLContext vertex descriptor and resolve which buffers need to be bound.
@ -743,8 +751,8 @@ void MTLBatch::draw_advanced(int v_first, int v_count, int i_first, int i_count)
#endif
/* Setup RenderPipelineState for batch. */
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
id<MTLRenderCommandEncoder> rec = this->bind(v_first, v_count, i_first, i_count);
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
id<MTLRenderCommandEncoder> rec = this->bind(v_count);
if (rec == nil) {
return;
}
@ -880,6 +888,84 @@ void MTLBatch::draw_advanced(int v_first, int v_count, int i_first, int i_count)
this->unbind();
}
void MTLBatch::draw_advanced_indirect(GPUStorageBuf *indirect_buf, intptr_t offset)
{
/* Setup RenderPipelineState for batch. */
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
id<MTLRenderCommandEncoder> rec = this->bind(0);
if (rec == nil) {
printf("Failed to open Render Command encoder for DRAW INDIRECT\n");
Review

We should go over the printf statements and replace them with CLog in a separate PR. Although these prints should not be visible to end users, using CLog gives more control to the developer and provides a priority/severity of the message what will help developers who aren't familiar with the Metal backend/GPU back-end.

We also have the MTL_LOG_* macros.

We should go over the printf statements and replace them with CLog in a separate PR. Although these prints should not be visible to end users, using CLog gives more control to the developer and provides a priority/severity of the message what will help developers who aren't familiar with the Metal backend/GPU back-end. We also have the `MTL_LOG_*` macros.
return;
}
/* Render using SSBO Vertex Fetch not supported by Draw Indirect.
* NOTE: Add support? */
if (active_shader_->get_uses_ssbo_vertex_fetch()) {
printf("Draw indirect for SSBO vertex fetch disabled\n");
return;
}
/* Fetch IndexBuffer and resolve primitive type. */
MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem));
MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
if (mtl_needs_topology_emulation(this->prim_type)) {
BLI_assert_msg(false, "Metal Topology emulation unsupported for draw indirect.\n");

I usually use BLI_assert_msg() for unsupported features. That makes it harder to overlook.

I usually use `BLI_assert_msg()` for unsupported features. That makes it harder to overlook.
return;
}
/* Fetch indirect buffer Metal handle. */
MTLStorageBuf *mtlssbo = static_cast<MTLStorageBuf *>(unwrap(indirect_buf));
id<MTLBuffer> mtl_indirect_buf = mtlssbo->get_metal_buffer();
BLI_assert(mtl_indirect_buf != nil);
if (mtl_indirect_buf == nil) {
MTL_LOG_WARNING("Metal Indirect Draw Storage Buffer is nil.\n");

Redundant with the MTL_LOG_WARNING bellow.

Redundant with the MTL_LOG_WARNING bellow.
return;
}
if (mtl_elem == NULL) {
/* Set depth stencil state (requires knowledge of primitive type). */
ctx->ensure_depth_stencil_state(mtl_prim_type);
/* Issue draw call. */
[rec drawPrimitives:mtl_prim_type indirectBuffer:mtl_indirect_buf indirectBufferOffset:offset];
ctx->main_command_buffer.register_draw_counters(1);
}
else {
/* Fetch index buffer. May return an index buffer of a differing format,
* if index buffer optimization is used. In these cases, final_prim_type and
* index_count get updated with the new properties. */
MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_);
GPUPrimType final_prim_type = this->prim_type;
uint index_count = 0;
id<MTLBuffer> index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count);
mtl_prim_type = gpu_prim_type_to_metal(final_prim_type);
BLI_assert(index_buffer != nil);
if (index_buffer != nil) {
/* Set depth stencil state (requires knowledge of primitive type). */
ctx->ensure_depth_stencil_state(mtl_prim_type);
/* Issue draw call. */
[rec drawIndexedPrimitives:mtl_prim_type
indexType:index_type
indexBuffer:index_buffer
indexBufferOffset:0
indirectBuffer:mtl_indirect_buf
indirectBufferOffset:offset];
ctx->main_command_buffer.register_draw_counters(1);
}
else {
BLI_assert_msg(false, "Index buffer does not have backing Metal buffer");
}
}
/* End of draw. */
this->unbind();
}
/** \} */
/* -------------------------------------------------------------------- */
@ -1010,4 +1096,4 @@ id<MTLBuffer> MTLBatch::get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_typ
/** \} */
} // blender::gpu
} // namespace blender::gpu

View File

@ -17,17 +17,17 @@ namespace gpu {
/* Max limit without using bind-less for samplers. */
#define MTL_MAX_DEFAULT_SAMPLERS 16
/* Total maximum buffers which can be bound to an encoder, for use within a shader.
* MTL_MAX_UNIFORM_BUFFER_BINDINGS + MTL_MAX_STORAGE_BUFFER_BINDINGS must be <=
* than MTL_MAX_BUFFER_BINDINGS. */
* Uniform buffers and storage buffers share the set of available bind buffers.
* The total number of buffer bindings must be <= MTL_MAX_BUFFER_BINDINGS
* We also require an additional 3 core buffers for:
* - Argument buffer for bindless resources (e.g. samplers)
* - Transform feedback buffer
* - Default push constant block
* Along with up to 6+1 buffers for vertex data, and index data. */
#define MTL_MAX_BUFFER_BINDINGS 31
#define MTL_MAX_UNIFORM_BUFFER_BINDINGS 16
#define MTL_MAX_STORAGE_BUFFER_BINDINGS 12
#define MTL_MAX_VERTEX_INPUT_ATTRIBUTES 31
#define MTL_MAX_UNIFORMS_PER_BLOCK 64
static_assert((MTL_MAX_UNIFORM_BUFFER_BINDINGS + MTL_MAX_STORAGE_BUFFER_BINDINGS) <=
MTL_MAX_BUFFER_BINDINGS);
/* Context-specific limits -- populated in 'MTLBackend::platform_init' */
struct MTLCapabilities {

View File

@ -46,6 +46,7 @@ namespace blender::gpu {
class MTLContext;
class MTLCommandBufferManager;
class MTLUniformBuf;
class MTLStorageBuf;
/* Structs containing information on current binding state for textures and samplers. */
struct MTLTextureBinding {
@ -436,6 +437,11 @@ struct MTLUniformBufferBinding {
MTLUniformBuf *ubo;
};
struct MTLStorageBufferBinding {
bool bound;
MTLStorageBuf *ssbo;
};
struct MTLContextGlobalShaderPipelineState {
bool initialised;
@ -455,12 +461,18 @@ struct MTLContextGlobalShaderPipelineState {
MTLShader *active_shader;
/* Global Uniform Buffers. */
MTLUniformBufferBinding ubo_bindings[MTL_MAX_UNIFORM_BUFFER_BINDINGS];
MTLUniformBufferBinding ubo_bindings[MTL_MAX_BUFFER_BINDINGS];
/* Storage buffer. */
MTLStorageBufferBinding ssbo_bindings[MTL_MAX_BUFFER_BINDINGS];
/* Context Texture bindings. */
MTLTextureBinding texture_bindings[MTL_MAX_TEXTURE_SLOTS];
MTLSamplerBinding sampler_bindings[MTL_MAX_SAMPLER_SLOTS];
/* Image bindings. */
MTLTextureBinding image_bindings[MTL_MAX_TEXTURE_SLOTS];
/*** --- Render Pipeline State --- ***/
/* Track global render pipeline state for the current context. The functions in GPU_state.h
* modify these parameters. Certain values, tagged [PSO], are parameters which are required to be
@ -771,10 +783,10 @@ class MTLContext : public Context {
MTLFrameBuffer *get_default_framebuffer();
/* Context Global-State Texture Binding. */
void texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit);
void texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit, bool is_image);
void sampler_bind(MTLSamplerState, uint sampler_unit);
void texture_unbind(gpu::MTLTexture *mtl_texture);
void texture_unbind_all();
void texture_unbind(gpu::MTLTexture *mtl_texture, bool is_image);
void texture_unbind_all(bool is_image);
void sampler_state_cache_init();
id<MTLSamplerState> get_sampler_from_state(MTLSamplerState state);
id<MTLSamplerState> get_default_sampler_state();
@ -822,6 +834,7 @@ class MTLContext : public Context {
/* Compute. */
bool ensure_compute_pipeline_state();
void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len);
void compute_dispatch_indirect(StorageBuf *indirect_buf);
/* State assignment. */
void set_viewport(int origin_x, int origin_y, int width, int height);

View File

@ -12,6 +12,7 @@
#include "mtl_shader.hh"
#include "mtl_shader_interface.hh"
#include "mtl_state.hh"
#include "mtl_storage_buffer.hh"
#include "mtl_uniform_buffer.hh"
#include "mtl_vertex_buffer.hh"
@ -20,6 +21,7 @@
#include "GPU_capabilities.h"
#include "GPU_matrix.h"
#include "GPU_shader.h"
#include "GPU_storage_buffer.h"
#include "GPU_texture.h"
#include "GPU_uniform_buffer.h"
#include "GPU_vertex_buffer.h"
@ -33,6 +35,27 @@
using namespace blender;
using namespace blender::gpu;
/* Debug option to bind null buffer for missing UBOs.
* Enabled by default. TODO: Ensure all required UBO bindings are present. */
#define DEBUG_BIND_NULL_BUFFER_FOR_MISSING_UBO 1
/* Debug option to bind null buffer for missing SSBOs. NOTE: This is unsafe if replacing a
* write-enabled SSBO and should only be used for debugging to identify binding-related issues. */
#define DEBUG_BIND_NULL_BUFFER_FOR_MISSING_SSBO 0
/* Error or warning depending on debug flag. */
#if DEBUG_BIND_NULL_BUFFER_FOR_MISSING_UBO == 1
# define MTL_LOG_UBO_ERROR MTL_LOG_WARNING
#else
# define MTL_LOG_UBO_ERROR MTL_LOG_ERROR
#endif
#if DEBUG_BIND_NULL_BUFFER_FOR_MISSING_SSBO == 1
# define MTL_LOG_SSBO_ERROR MTL_LOG_WARNING
#else
# define MTL_LOG_SSBO_ERROR MTL_LOG_ERROR
#endif
namespace blender::gpu {
/* Global memory manager. */
@ -265,11 +288,11 @@ MTLContext::~MTLContext()
/* Release update/blit shaders. */
this->get_texture_utils().cleanup();
/* Detach resource references */
/* Detach resource references. */
GPU_texture_unbind_all();
/* Unbind UBOs */
for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
/* Unbind UBOs. */
for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) {
if (this->pipeline_state.ubo_bindings[i].bound &&
this->pipeline_state.ubo_bindings[i].ubo != nullptr)
{
@ -279,7 +302,16 @@ MTLContext::~MTLContext()
}
}
/* Release Dummy resources */
/* Unbind SSBOs. */
for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) {
if (this->pipeline_state.ssbo_bindings[i].bound &&
this->pipeline_state.ssbo_bindings[i].ssbo != nullptr)
{
this->pipeline_state.ssbo_bindings[i].ssbo->unbind();
}
}
/* Release Dummy resources. */
this->free_dummy_resources();
/* Release Sampler States. */
@ -371,7 +403,7 @@ void MTLContext::activate()
}
/* Reset UBO bind state. */
for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) {
if (this->pipeline_state.ubo_bindings[i].bound &&
this->pipeline_state.ubo_bindings[i].ubo != nullptr)
{
@ -380,6 +412,16 @@ void MTLContext::activate()
}
}
/* Reset SSBO bind state. */
for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) {
if (this->pipeline_state.ssbo_bindings[i].bound &&
this->pipeline_state.ssbo_bindings[i].ssbo != nullptr)
{
this->pipeline_state.ssbo_bindings[i].bound = false;
this->pipeline_state.ssbo_bindings[i].ssbo = nullptr;
}
}
/* Ensure imm active. */
immActivate();
}
@ -670,17 +712,27 @@ void MTLContext::pipeline_state_init()
/* Clear bindings state. */
for (int t = 0; t < GPU_max_textures(); t++) {
/* Textures. */
this->pipeline_state.texture_bindings[t].used = false;
this->pipeline_state.texture_bindings[t].slot_index = -1;
this->pipeline_state.texture_bindings[t].texture_resource = nullptr;
/* Images. */
this->pipeline_state.image_bindings[t].used = false;
this->pipeline_state.image_bindings[t].slot_index = -1;
this->pipeline_state.image_bindings[t].texture_resource = nullptr;
}
for (int s = 0; s < MTL_MAX_SAMPLER_SLOTS; s++) {
this->pipeline_state.sampler_bindings[s].used = false;
}
for (int u = 0; u < MTL_MAX_UNIFORM_BUFFER_BINDINGS; u++) {
for (int u = 0; u < MTL_MAX_BUFFER_BINDINGS; u++) {
this->pipeline_state.ubo_bindings[u].bound = false;
this->pipeline_state.ubo_bindings[u].ubo = nullptr;
}
for (int u = 0; u < MTL_MAX_BUFFER_BINDINGS; u++) {
this->pipeline_state.ssbo_bindings[u].bound = false;
this->pipeline_state.ssbo_bindings[u].ssbo = nullptr;
}
}
/*** State defaults -- restored by GPU_state_init. ***/
@ -1052,7 +1104,7 @@ bool MTLContext::ensure_uniform_buffer_bindings(
rps.last_bound_shader_state.pso_index_ !=
pipeline_state_instance->shader_pso_index);
const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
const MTLShaderBufferBlock &push_constant_block = shader_interface->get_push_constant_block();
if (push_constant_block.size > 0) {
/* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index
@ -1088,25 +1140,22 @@ bool MTLContext::ensure_uniform_buffer_bindings(
* match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
* and not owned by the shader instance. */
for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
const MTLShaderBufferBlock &ubo = shader_interface->get_uniform_block(ubo_index);
if (ubo.buffer_index >= 0) {
/* Uniform Buffer index offset by 1 as the first shader buffer binding slot is reserved for
* the uniform PushConstantBlock. */
const uint32_t buffer_index = ubo.buffer_index + 1;
int ubo_offset = 0;
if (ubo.buffer_index >= 0 && ubo.location >= 0) {
/* Explicit lookup location for UBO in bind table. */
const uint32_t ubo_location = ubo.location;
/* buffer(N) index of where to bind the UBO. */
const uint32_t buffer_index = ubo.buffer_index;
id<MTLBuffer> ubo_buffer = nil;
int ubo_size = 0;
bool bind_dummy_buffer = false;
if (this->pipeline_state.ubo_bindings[ubo.buffer_index].bound) {
if (this->pipeline_state.ubo_bindings[ubo_location].bound) {
/* Fetch UBO global-binding properties from slot. */
ubo_offset = 0;
ubo_buffer = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_metal_buffer(
&ubo_offset);
ubo_size = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_size();
ubo_buffer = this->pipeline_state.ubo_bindings[ubo_location].ubo->get_metal_buffer();
ubo_size = this->pipeline_state.ubo_bindings[ubo_location].ubo->get_size();
/* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid
* allocating zero buffers. */
@ -1145,13 +1194,15 @@ bool MTLContext::ensure_uniform_buffer_bindings(
/* If ubo size is smaller than the size expected by the shader, we need to bind the
* dummy buffer, which will be big enough, to avoid an OOB error. */
if (ubo_size < expected_size) {
MTL_LOG_INFO(
"[Error][UBO] UBO (UBO Name: %s) bound at index: %d with size %d (Expected size "
MTL_LOG_UBO_ERROR(
"[Error][UBO] UBO (UBO Name: %s) bound at location: %d (buffer[[%d]]) with size "
"%d (Expected size "
"%d) (Shader Name: %s) is too small -- binding NULL buffer. This is likely an "
"over-binding, which is not used, but we need this to avoid validation "
"issues\n",
shader_interface->get_name_at_offset(ubo.name_offset),
buffer_index,
ubo_location,
pipeline_state_instance->base_uniform_buffer_index + buffer_index,
ubo_size,
expected_size,
shader_interface->get_name());
@ -1161,18 +1212,19 @@ bool MTLContext::ensure_uniform_buffer_bindings(
}
}
else {
MTL_LOG_INFO(
"[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but "
MTL_LOG_UBO_ERROR(
"[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer slot: %d "
"(buffer[[%d]])-- but "
"nothing was bound -- binding dummy buffer\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ubo.name_offset),
buffer_index);
ubo_location,
pipeline_state_instance->base_uniform_buffer_index + buffer_index);
bind_dummy_buffer = true;
}
if (bind_dummy_buffer) {
/* Perform Dummy binding. */
ubo_offset = 0;
ubo_buffer = this->get_null_buffer();
ubo_size = [ubo_buffer length];
}
@ -1185,17 +1237,17 @@ bool MTLContext::ensure_uniform_buffer_bindings(
/* Bind Vertex UBO. */
if (bool(ubo.stage_mask & ShaderStage::VERTEX)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
rps.bind_vertex_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
rps.bind_vertex_buffer(ubo_buffer, 0, buffer_bind_index);
}
/* Bind Fragment UBOs. */
if (bool(ubo.stage_mask & ShaderStage::FRAGMENT)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
rps.bind_fragment_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
rps.bind_fragment_buffer(ubo_buffer, 0, buffer_bind_index);
}
}
else {
MTL_LOG_WARNING(
MTL_LOG_UBO_ERROR(

Not sure what gpu_uniformbuffer refers to.

Not sure what `gpu_uniformbuffer` refers to.
"[UBO] Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer "
"is NULL!\n",
shader_interface->get_name(),
@ -1204,6 +1256,79 @@ bool MTLContext::ensure_uniform_buffer_bindings(
}
}
}
/* Bind Global GPUStorageBuf's */
/* Iterate through expected SSBOs in the shader interface, and check if the globally bound ones
* match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
* and not owned by the shader instance. */
for (const uint ssbo_index : IndexRange(shader_interface->get_total_storage_blocks())) {
const MTLShaderBufferBlock &ssbo = shader_interface->get_storage_block(ssbo_index);

UBO > SSBO

UBO > SSBO
if (ssbo.buffer_index >= 0 && ssbo.location >= 0) {
/* Explicit lookup location for SSBO in bind table.*/
const uint32_t ssbo_location = ssbo.location;
/* buffer(N) index of where to bind the SSBO. */
const uint32_t buffer_index = ssbo.buffer_index;
id<MTLBuffer> ssbo_buffer = nil;
int ssbo_size = 0;
UNUSED_VARS_NDEBUG(ssbo_size);
if (this->pipeline_state.ssbo_bindings[ssbo_location].bound) {
/* Fetch SSBO global-binding properties from slot. */
ssbo_buffer = this->pipeline_state.ssbo_bindings[ssbo_location].ssbo->get_metal_buffer();
ssbo_size = this->pipeline_state.ssbo_bindings[ssbo_location].ssbo->get_size();
/* For SSBOs, we always need to ensure the buffer exists, as it may be written to. */
BLI_assert(ssbo_buffer != nil);
BLI_assert(ssbo_size > 0);
}
else {
MTL_LOG_SSBO_ERROR(
"[Error][SSBO] Shader '%s' expected SSBO '%s' to be bound at buffer location: %d "
"(buffer[[%d]]) -- "
"but "
"nothing was bound.\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ssbo.name_offset),
ssbo_location,
pipeline_state_instance->base_storage_buffer_index + buffer_index);
#if DEBUG_BIND_NULL_BUFFER_FOR_MISSING_SSBO == 1
ssbo_buffer = this->get_null_buffer();
ssbo_size = [ssbo_buffer length];
#endif
}
if (ssbo_buffer != nil) {
uint32_t buffer_bind_index = pipeline_state_instance->base_storage_buffer_index +
buffer_index;
/* Bind Vertex SSBO. */
if (bool(ssbo.stage_mask & ShaderStage::VERTEX)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
rps.bind_vertex_buffer(ssbo_buffer, 0, buffer_bind_index);
}
/* Bind Fragment SSBOs. */
if (bool(ssbo.stage_mask & ShaderStage::FRAGMENT)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
rps.bind_fragment_buffer(ssbo_buffer, 0, buffer_bind_index);
}
}
else {
MTL_LOG_SSBO_ERROR(
"[Error][SSBO] Shader '%s' had SSBO '%s' bound at SSBO location: %d "
"(buffer[["
"%d]]) -- but bound MTLStorageBuf was nil.\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ssbo.name_offset),
ssbo_location,
pipeline_state_instance->base_storage_buffer_index + buffer_index);
}
}
}
return true;
}
@ -1218,7 +1343,7 @@ bool MTLContext::ensure_uniform_buffer_bindings(
MTLComputeState &cs = this->main_command_buffer.get_compute_state();
/* Fetch push constant block and bind. */
const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
const MTLShaderBufferBlock &push_constant_block = shader_interface->get_push_constant_block();
if (push_constant_block.size > 0) {
/* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index
@ -1245,25 +1370,22 @@ bool MTLContext::ensure_uniform_buffer_bindings(
* match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
* and not owned by the shader instance. */
for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
const MTLShaderBufferBlock &ubo = shader_interface->get_uniform_block(ubo_index);
if (ubo.buffer_index >= 0) {
/* Uniform Buffer index offset by 1 as the first shader buffer binding slot is reserved for
* the uniform PushConstantBlock. */
const uint32_t buffer_index = ubo.buffer_index + 1;
int ubo_offset = 0;
/* Explicit lookup location for UBO in bind table. */
const uint32_t ubo_location = ubo.location;
/* buffer(N) index of where to bind the UBO. */
const uint32_t buffer_index = ubo.buffer_index;
id<MTLBuffer> ubo_buffer = nil;
int ubo_size = 0;
bool bind_dummy_buffer = false;
if (this->pipeline_state.ubo_bindings[ubo.buffer_index].bound) {
if (this->pipeline_state.ubo_bindings[ubo_location].bound) {
/* Fetch UBO global-binding properties from slot. */
ubo_offset = 0;
ubo_buffer = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_metal_buffer(
&ubo_offset);
ubo_size = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_size();
ubo_buffer = this->pipeline_state.ubo_bindings[ubo_location].ubo->get_metal_buffer();
ubo_size = this->pipeline_state.ubo_bindings[ubo_location].ubo->get_size();
UNUSED_VARS_NDEBUG(ubo_size);
/* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid
@ -1277,18 +1399,19 @@ bool MTLContext::ensure_uniform_buffer_bindings(
}
}
else {
MTL_LOG_INFO(
"[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but "
MTL_LOG_UBO_ERROR(
"[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer location: %d "
"(buffer[[%d]]) -- but "
"nothing was bound -- binding dummy buffer\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ubo.name_offset),
buffer_index);
ubo_location,
pipeline_state_instance.base_uniform_buffer_index + buffer_index);
bind_dummy_buffer = true;
}
if (bind_dummy_buffer) {
/* Perform Dummy binding. */
ubo_offset = 0;
ubo_buffer = this->get_null_buffer();
ubo_size = [ubo_buffer length];
}
@ -1297,14 +1420,14 @@ bool MTLContext::ensure_uniform_buffer_bindings(
uint32_t buffer_bind_index = pipeline_state_instance.base_uniform_buffer_index +
buffer_index;
/* Bind Vertex UBO. */
/* Bind Compute UBO. */
if (bool(ubo.stage_mask & ShaderStage::COMPUTE)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
cs.bind_compute_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
cs.bind_compute_buffer(ubo_buffer, 0, buffer_bind_index);
}
}
else {
MTL_LOG_WARNING(
MTL_LOG_UBO_ERROR(
"[UBO] Compute Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer "
"is NULL!\n",
shader_interface->get_name(),
@ -1313,6 +1436,72 @@ bool MTLContext::ensure_uniform_buffer_bindings(
}
}
}
/* Bind Global GPUStorageBuffers. */
/* Iterate through expected SSBOs in the shader interface, and check if the globally bound ones
* match. */
for (const uint ssbo_index : IndexRange(shader_interface->get_total_storage_blocks())) {
const MTLShaderBufferBlock &ssbo = shader_interface->get_storage_block(ssbo_index);
if (ssbo.buffer_index >= 0 && ssbo.location >= 0) {
/* Explicit lookup location for UBO in bind table.*/
const uint32_t ssbo_location = ssbo.location;
/* buffer(N) index of where to bind the UBO. */
const uint32_t buffer_index = ssbo.buffer_index;
id<MTLBuffer> ssbo_buffer = nil;
int ssbo_size = 0;
if (this->pipeline_state.ssbo_bindings[ssbo_location].bound) {
/* Fetch UBO global-binding properties from slot. */
ssbo_buffer = this->pipeline_state.ssbo_bindings[ssbo_location].ssbo->get_metal_buffer();
ssbo_size = this->pipeline_state.ssbo_bindings[ssbo_location].ssbo->get_size();
UNUSED_VARS_NDEBUG(ssbo_size);
/* For SSBOs, we always need to ensure the buffer exists, as it may be written to. */
BLI_assert(ssbo_buffer != nil);
BLI_assert(ssbo_size > 0);
}
else {
MTL_LOG_SSBO_ERROR(
"[Error][SSBO] Shader '%s' expected SSBO '%s' to be bound at SSBO location: %d "
"(buffer[["
"%d]]) -- but "
"nothing was bound.\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ssbo.name_offset),
ssbo_location,
pipeline_state_instance.base_storage_buffer_index + buffer_index);
#if DEBUG_BIND_NULL_BUFFER_FOR_MISSING_SSBO == 1
ssbo_buffer = this->get_null_buffer();
ssbo_size = [ssbo_buffer length];
#endif
}
if (ssbo_buffer != nil) {
uint32_t buffer_bind_index = pipeline_state_instance.base_storage_buffer_index +
buffer_index;
/* Bind Vertex UBO. */
if (bool(ssbo.stage_mask & ShaderStage::COMPUTE)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
cs.bind_compute_buffer(ssbo_buffer, 0, buffer_bind_index);
}
}
else {
MTL_LOG_SSBO_ERROR(
"[Error][SSBO] Shader '%s' had SSBO '%s' bound at SSBO location: %d "
"(buffer[["
"%d]]) -- but bound MTLStorageBuf was nil.\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ssbo.name_offset),
ssbo_location,
pipeline_state_instance.base_storage_buffer_index + buffer_index);
}
}
}
return true;
}
@ -1352,13 +1541,27 @@ void MTLContext::ensure_texture_bindings(
continue;
}
/* Determine bind lookup table depending on whether an image binding or texture.
* NOTE: Images and Texture Samplers share a binding table in Metal. */
bool is_resource_sampler = shader_texture_info.is_texture_sampler;
MTLTextureBinding(&resource_bind_table)[MTL_MAX_TEXTURE_SLOTS] =
(is_resource_sampler) ? this->pipeline_state.texture_bindings :
this->pipeline_state.image_bindings;
/* Texture resource bind slot in shader `[[texture(n)]]`. */
int slot = shader_texture_info.slot_index;
/* Explicit bind location for texture. */
int location = shader_texture_info.location;
/* Default sampler. */
MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
if (slot >= 0 && slot < GPU_max_textures()) {
bool bind_dummy_texture = true;
if (this->pipeline_state.texture_bindings[slot].used) {
gpu::MTLTexture *bound_texture =
this->pipeline_state.texture_bindings[slot].texture_resource;
MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot];
if (resource_bind_table[location].used) {
gpu::MTLTexture *bound_texture = resource_bind_table[location].texture_resource;
MTLSamplerBinding &bound_sampler = (is_resource_sampler) ?
this->pipeline_state.sampler_bindings[location] :
default_binding;
BLI_assert(bound_texture);
BLI_assert(bound_sampler.used);
@ -1385,10 +1588,11 @@ void MTLContext::ensure_texture_bindings(
* expected in the shader interface. This is a problem and we will need to bind
* a dummy texture to ensure correct API usage. */
MTL_LOG_WARNING(
"(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong "
"(Shader '%s') Texture (%s) %p bound to slot %d is incompatible -- Wrong "
"texture target type. (Expecting type %d, actual type %d) (binding "
"name:'%s')(texture name:'%s')\n",
shader_interface->get_name(),
is_resource_sampler ? "TextureSampler" : "TextureImage",
bound_texture,
slot,
shader_texture_info.type,
@ -1399,9 +1603,12 @@ void MTLContext::ensure_texture_bindings(
}
else {
MTL_LOG_WARNING(
"Shader '%s' expected texture to be bound to slot %d -- No texture was "
"Shader '%s' expected texture (%s) to be bound to location %d (texture[[%d]]) -- No "
"texture was "
"bound. (name:'%s')\n",
shader_interface->get_name(),
is_resource_sampler ? "TextureSampler" : "TextureImage",
location,
slot,
shader_interface->get_name_at_offset(shader_texture_info.name_offset));
}
@ -1417,7 +1624,6 @@ void MTLContext::ensure_texture_bindings(
slot);
/* Bind default sampler state. */
MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
rps.bind_vertex_sampler(default_binding, use_argument_buffer_for_samplers, slot);
}
if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
@ -1427,16 +1633,16 @@ void MTLContext::ensure_texture_bindings(
slot);
/* Bind default sampler state. */
MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
rps.bind_fragment_sampler(default_binding, use_argument_buffer_for_samplers, slot);
}
}
}
else {
MTL_LOG_WARNING(
"Shader %p expected texture to be bound to slot %d -- Slot exceeds the "
"Shader %p expected texture (%s) to be bound to slot %d -- Slot exceeds the "
"hardware/API limit of '%d'. (name:'%s')\n",
this->pipeline_state.active_shader,
is_resource_sampler ? "TextureSampler" : "TextureImage",
slot,
GPU_max_textures(),
shader_interface->get_name_at_offset(shader_texture_info.name_offset));
@ -1569,13 +1775,27 @@ void MTLContext::ensure_texture_bindings(
continue;
}
/* Determine bind lookup table depending on whether an image binding or texture.
* NOTE: Images and Texture Samplers share a binding table in Metal. */
bool is_resource_sampler = shader_texture_info.is_texture_sampler;
MTLTextureBinding(&resource_bind_table)[MTL_MAX_TEXTURE_SLOTS] =
(is_resource_sampler) ? this->pipeline_state.texture_bindings :
this->pipeline_state.image_bindings;
/* Texture resource bind slot in shader `[[texture(n)]]`. */
int slot = shader_texture_info.slot_index;
/* Explicit bind location for texture. */
int location = shader_texture_info.location;
/* Default sampler. */
MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
if (slot >= 0 && slot < GPU_max_textures()) {
bool bind_dummy_texture = true;
if (this->pipeline_state.texture_bindings[slot].used) {
gpu::MTLTexture *bound_texture =
this->pipeline_state.texture_bindings[slot].texture_resource;
MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot];
if (resource_bind_table[location].used) {
gpu::MTLTexture *bound_texture = resource_bind_table[location].texture_resource;
MTLSamplerBinding &bound_sampler = (is_resource_sampler) ?
this->pipeline_state.sampler_bindings[location] :
default_binding;
BLI_assert(bound_texture);
BLI_assert(bound_sampler.used);
@ -1597,10 +1817,11 @@ void MTLContext::ensure_texture_bindings(
* expected in the shader interface. This is a problem and we will need to bind
* a dummy texture to ensure correct API usage. */
MTL_LOG_WARNING(
"(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong "
"(Shader '%s') Texture (%s) %p bound to slot %d is incompatible -- Wrong "
"texture target type. (Expecting type %d, actual type %d) (binding "
"name:'%s')(texture name:'%s')\n",
shader_interface->get_name(),
is_resource_sampler ? "TextureSampler" : "TextureImage",
bound_texture,
slot,
shader_texture_info.type,
@ -1611,9 +1832,12 @@ void MTLContext::ensure_texture_bindings(
}
else {
MTL_LOG_WARNING(
"Shader '%s' expected texture to be bound to slot %d -- No texture was "
"Shader '%s' expected texture (%s) to be bound to location %d (texture[[%d]]) -- No "
"texture was "
"bound. (name:'%s')\n",
shader_interface->get_name(),
is_resource_sampler ? "TextureSampler" : "TextureImage",
location,
slot,
shader_interface->get_name_at_offset(shader_texture_info.name_offset));
}
@ -1636,9 +1860,10 @@ void MTLContext::ensure_texture_bindings(
}
else {
MTL_LOG_WARNING(
"Shader %p expected texture to be bound to slot %d -- Slot exceeds the "
"Shader %p expected texture (%s) to be bound to slot %d -- Slot exceeds the "
"hardware/API limit of '%d'. (name:'%s')\n",
this->pipeline_state.active_shader,
is_resource_sampler ? "TextureSampler" : "TextureImage",
slot,
GPU_max_textures(),
shader_interface->get_name_at_offset(shader_texture_info.name_offset));
@ -1900,6 +2125,47 @@ bool MTLContext::ensure_compute_pipeline_state()
}
void MTLContext::compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len)
{
/* Ensure all resources required by upcoming compute submission are correctly bound to avoid
* out of bounds reads/writes. */
if (!this->ensure_compute_pipeline_state()) {
return;
}
/* Shader instance. */
MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
const MTLComputePipelineStateInstance &compute_pso_inst =
this->pipeline_state.active_shader->get_compute_pipeline_state();
/* Begin compute encoder. */
id<MTLComputeCommandEncoder> compute_encoder =
this->main_command_buffer.ensure_begin_compute_encoder();
BLI_assert(compute_encoder != nil);
/* Bind PSO. */
MTLComputeState &cs = this->main_command_buffer.get_compute_state();
cs.bind_pso(compute_pso_inst.pso);
/* Bind buffers. */
this->ensure_uniform_buffer_bindings(compute_encoder, shader_interface, compute_pso_inst);
/** Ensure resource bindings. */
/* Texture Bindings. */
/* We will iterate through all texture bindings on the context and determine if any of the
* active slots match those in our shader interface. If so, textures will be bound. */
if (shader_interface->get_total_textures() > 0) {
this->ensure_texture_bindings(compute_encoder, shader_interface, compute_pso_inst);

Early out instead of indenting codeblock.

Early out instead of indenting codeblock.
}
/* Dispatch compute. */
[compute_encoder dispatchThreadgroups:MTLSizeMake(max_ii(groups_x_len, 1),
max_ii(groups_y_len, 1),
max_ii(groups_z_len, 1))
threadsPerThreadgroup:MTLSizeMake(compute_pso_inst.threadgroup_x_len,
compute_pso_inst.threadgroup_y_len,
compute_pso_inst.threadgroup_z_len)];
}
void MTLContext::compute_dispatch_indirect(StorageBuf *indirect_buf)
{
/* Ensure all resources required by upcoming compute submission are correctly bound. */
if (this->ensure_compute_pipeline_state()) {
@ -1927,11 +2193,22 @@ void MTLContext::compute_dispatch(int groups_x_len, int groups_y_len, int groups
this->ensure_texture_bindings(compute_encoder, shader_interface, compute_pso_inst);
}
/* Dispatch compute. */
[compute_encoder dispatchThreadgroups:MTLSizeMake(groups_x_len, groups_y_len, groups_z_len)
threadsPerThreadgroup:MTLSizeMake(compute_pso_inst.threadgroup_x_len,
compute_pso_inst.threadgroup_y_len,
compute_pso_inst.threadgroup_z_len)];
/* Indirect Dispatch compute. */
MTLStorageBuf *mtlssbo = static_cast<MTLStorageBuf *>(indirect_buf);
id<MTLBuffer> mtl_indirect_buf = mtlssbo->get_metal_buffer();
BLI_assert(mtl_indirect_buf != nil);
if (mtl_indirect_buf == nil) {
MTL_LOG_WARNING("Metal Indirect Compute dispatch storage buffer does not exist.\n");
return;
}
/* Indirect Compute dispatch. */
[compute_encoder
dispatchThreadgroupsWithIndirectBuffer:mtl_indirect_buf
indirectBufferOffset:0
threadsPerThreadgroup:MTLSizeMake(compute_pso_inst.threadgroup_x_len,
compute_pso_inst.threadgroup_y_len,
compute_pso_inst.threadgroup_z_len)];
}
}
@ -1980,7 +2257,7 @@ bool MTLContext::is_visibility_dirty() const
/** \name Texture State Management
* \{ */
void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit)
void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit, bool is_image)
{
BLI_assert(this);
BLI_assert(mtl_texture);
@ -1995,9 +2272,14 @@ void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit)
return;
}
MTLTextureBinding(
&resource_bind_table)[MTL_MAX_TEXTURE_SLOTS] = (is_image) ?
this->pipeline_state.image_bindings :
this->pipeline_state.texture_bindings;
/* Bind new texture. */
this->pipeline_state.texture_bindings[texture_unit].texture_resource = mtl_texture;
this->pipeline_state.texture_bindings[texture_unit].used = true;
resource_bind_table[texture_unit].texture_resource = mtl_texture;
resource_bind_table[texture_unit].used = true;
mtl_texture->is_bound_ = true;
}
@ -2016,15 +2298,20 @@ void MTLContext::sampler_bind(MTLSamplerState sampler_state, uint sampler_unit)
this->pipeline_state.sampler_bindings[sampler_unit] = {true, sampler_state};
}
void MTLContext::texture_unbind(gpu::MTLTexture *mtl_texture)
void MTLContext::texture_unbind(gpu::MTLTexture *mtl_texture, bool is_image)
{
BLI_assert(mtl_texture);
MTLTextureBinding(
&resource_bind_table)[MTL_MAX_TEXTURE_SLOTS] = (is_image) ?
this->pipeline_state.image_bindings :
this->pipeline_state.texture_bindings;
/* Iterate through textures in state and unbind. */
for (int i = 0; i < min_uu(GPU_max_textures(), MTL_MAX_TEXTURE_SLOTS); i++) {
if (this->pipeline_state.texture_bindings[i].texture_resource == mtl_texture) {
this->pipeline_state.texture_bindings[i].texture_resource = nullptr;
this->pipeline_state.texture_bindings[i].used = false;
if (resource_bind_table[i].texture_resource == mtl_texture) {
resource_bind_table[i].texture_resource = nullptr;
resource_bind_table[i].used = false;
}
}
@ -2032,16 +2319,18 @@ void MTLContext::texture_unbind(gpu::MTLTexture *mtl_texture)
mtl_texture->is_bound_ = false;
}
void MTLContext::texture_unbind_all()
void MTLContext::texture_unbind_all(bool is_image)
{
MTLTextureBinding(
&resource_bind_table)[MTL_MAX_TEXTURE_SLOTS] = (is_image) ?
this->pipeline_state.image_bindings :
this->pipeline_state.texture_bindings;
/* Iterate through context's bound textures. */
for (int t = 0; t < min_uu(GPU_max_textures(), MTL_MAX_TEXTURE_SLOTS); t++) {
if (this->pipeline_state.texture_bindings[t].used &&
this->pipeline_state.texture_bindings[t].texture_resource)
{
this->pipeline_state.texture_bindings[t].used = false;
this->pipeline_state.texture_bindings[t].texture_resource = nullptr;
if (resource_bind_table[t].used && resource_bind_table[t].texture_resource) {
resource_bind_table[t].used = false;
resource_bind_table[t].texture_resource = nullptr;
}
}
}

View File

@ -66,7 +66,7 @@ MTLDrawList::~MTLDrawList()
void MTLDrawList::init()
{
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
BLI_assert(MDI_ENABLED);
BLI_assert(data_ == nullptr);
@ -186,7 +186,7 @@ void MTLDrawList::submit()
/* Bind Batch to setup render pipeline state. */
BLI_assert(batch_ != nullptr);
id<MTLRenderCommandEncoder> rec = batch_->bind(0, 0, 0, 0);
id<MTLRenderCommandEncoder> rec = batch_->bind(0);
if (rec == nil) {
BLI_assert_msg(false, "A RenderCommandEncoder should always be available!\n");
return;

View File

@ -18,12 +18,16 @@ namespace blender::gpu {
class MTLIndexBuf : public IndexBuf {
friend class MTLBatch;
friend class MTLDrawList;
friend class MTLStorageBuf; /* For bind as SSBO resource access. */
private:
/* Metal buffer resource. */
gpu::MTLBuffer *ibo_ = nullptr;
uint64_t alloc_size_ = 0;
/* SSBO wrapper for bind_as_ssbo support. */
MTLStorageBuf *ssbo_wrapper_ = nullptr;
#ifndef NDEBUG
/* Flags whether point index buffer has been compacted
* to remove false restart indices. */

View File

@ -7,6 +7,7 @@
#include "mtl_index_buffer.hh"
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "mtl_storage_buffer.hh"
#include "BLI_span.hh"
@ -22,6 +23,11 @@ MTLIndexBuf::~MTLIndexBuf()
ibo_->free();
}
this->free_optimized_buffer();
if (ssbo_wrapper_) {
delete ssbo_wrapper_;
ssbo_wrapper_ = nullptr;
}
}
void MTLIndexBuf::free_optimized_buffer()
@ -42,8 +48,14 @@ void MTLIndexBuf::bind_as_ssbo(uint32_t binding)
/* Ensure we have a valid IBO. */
BLI_assert(this->ibo_);
/* TODO(Metal): Support index buffer SSBO's. Dependent on compute implementation. */
MTL_LOG_WARNING("MTLIndexBuf::bind_as_ssbo not yet implemented!\n");
/* Ensure resource is initialized. */
this->upload_data();
/* Create MTLStorageBuffer to wrap this resource and use conventional binding. */
if (ssbo_wrapper_ == nullptr) {
ssbo_wrapper_ = new MTLStorageBuf(this, alloc_size_);
}
ssbo_wrapper_->bind(binding);
}
void MTLIndexBuf::read(uint32_t *data) const

View File

@ -61,7 +61,7 @@ void MTLQueryPool::init(GPUQueryType type)
void MTLQueryPool::begin_query()
{
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
/* Ensure our allocated buffer pool has enough space for the current queries. */
int query_id = query_issued_;
@ -88,7 +88,7 @@ void MTLQueryPool::begin_query()
void MTLQueryPool::end_query()
{
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
id<MTLRenderCommandEncoder> rec = ctx->main_command_buffer.get_active_render_command_encoder();
[rec setVisibilityResultMode:MTLVisibilityResultModeDisabled offset:0];
@ -96,7 +96,7 @@ void MTLQueryPool::end_query()
void MTLQueryPool::get_occlusion_result(MutableSpan<uint32_t> r_values)
{
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
/* Create a blit encoder to synchronize the query buffer results between
* GPU and CPU when not using shared-memory. */

View File

@ -36,7 +36,7 @@ class MTLShaderInterface;
class MTLContext;
/* Debug control. */
#define MTL_SHADER_DEBUG_EXPORT_SOURCE 0
#define MTL_SHADER_DEBUG_EXPORT_SOURCE 1
#define MTL_SHADER_TRANSLATION_DEBUG_OUTPUT 0
/* Separate print used only during development and debugging. */
@ -71,7 +71,7 @@ struct MTLRenderPipelineStateInstance {
* bound buffers such as vertex buffers, as the count can vary. */
int base_uniform_buffer_index;
/* Base bind index for binding storage buffers. */
int base_ssbo_buffer_index;
int base_storage_buffer_index;
/* buffer bind slot used for null attributes (-1 if not needed). */
int null_attribute_buffer_index;
/* buffer bind used for transform feedback output buffer. */
@ -101,7 +101,7 @@ struct MTLComputePipelineStateInstance {
* bound buffers such as vertex buffers, as the count can vary. */
int base_uniform_buffer_index = -1;
/* Base bind index for binding storage buffers. */
int base_ssbo_buffer_index = -1;
int base_storage_buffer_index = -1;
int threadgroup_x_len = 1;
int threadgroup_y_len = 1;

View File

@ -386,7 +386,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
valid_ = true;
/* Prepare backing data storage for local uniforms. */
const MTLShaderUniformBlock &push_constant_block = mtl_interface->get_push_constant_block();
const MTLShaderBufferBlock &push_constant_block = mtl_interface->get_push_constant_block();
if (push_constant_block.size > 0) {
push_constant_data_ = MEM_callocN(push_constant_block.size, __func__);
this->push_constant_bindstate_mark_dirty(true);
@ -625,7 +625,7 @@ void MTLShader::warm_cache(int limit)
{
if (parent_shader_ != nullptr) {
MTLContext *ctx = MTLContext::get();
MTLShader *parent_mtl = reinterpret_cast<MTLShader *>(parent_shader_);
MTLShader *parent_mtl = static_cast<MTLShader *>(parent_shader_);
/* Extract PSO descriptors from parent shader. */
blender::Vector<MTLRenderPipelineStateDescriptor> descriptors;
@ -990,6 +990,19 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
type:MTLDataTypeInt
withName:@"MTL_uniform_buffer_base_index"];
/* Storage buffer bind index.
* This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
* and an additional space for the push constant block.

naming convention: Dont use uppercase prefixes for local variable.

naming convention: Dont use uppercase prefixes for local variable.

This convention is also used throughout the module. So I would leave that to a cleanup commit.

This convention is also used throughout the module. So I would leave that to a cleanup commit.
* If the shader does not have any uniform blocks, then we can place directly after the push
* constant block. As we do not need an extra spot for the UBO at index '0'. */
int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 +
((mtl_interface->get_total_uniform_blocks() > 0) ?
mtl_interface->get_total_uniform_blocks() :
0);
[values setConstantValue:&MTL_storage_buffer_base_index
type:MTLDataTypeInt
withName:@"MTL_storage_buffer_base_index"];
/* Transform feedback constant.
* Ensure buffer is placed after existing buffers, including default buffers. */
int MTL_transform_feedback_buffer_index = -1;
@ -997,9 +1010,10 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
/* If using argument buffers, insert index after argument buffer index. Otherwise, insert
* after uniform buffer bindings. */
MTL_transform_feedback_buffer_index =
(mtl_interface->uses_argument_buffer_for_samplers()) ?
(mtl_interface->get_argument_buffer_bind_index(ShaderStage::VERTEX) + 1) :
(MTL_uniform_buffer_base_index + mtl_interface->get_max_ubo_index() + 2);
MTL_uniform_buffer_base_index +
((mtl_interface->uses_argument_buffer_for_samplers()) ?
(mtl_interface->get_argument_buffer_bind_index(ShaderStage::VERTEX) + 1) :
(mtl_interface->get_max_buffer_index() + 2));
}
if (this->transform_feedback_type_ != GPU_SHADER_TFB_NONE) {
@ -1132,10 +1146,10 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
* We need to ensure that the PSO will have valid bind-point ranges, or is using the
* appropriate bindless fallback path if any bind limits are exceeded. */
#ifdef NDEBUG
/* Ensure UBO and PushConstantBlock bindings are within range. */
/* Ensure Buffer bindings are within range. */
BLI_assert_msg((MTL_uniform_buffer_base_index + get_max_ubo_index() + 2) <
MTL_MAX_BUFFER_BINDINGS,
"UBO bindings exceed the fragment bind table limit.");
"UBO and SSBO bindings exceed the fragment bind table limit.");
/* Transform feedback buffer. */
if (transform_feedback_type_ != GPU_SHADER_TFB_NONE) {
@ -1179,6 +1193,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
pso_inst->frag = desc.fragmentFunction;
pso_inst->pso = pso;
pso_inst->base_uniform_buffer_index = MTL_uniform_buffer_base_index;
pso_inst->base_storage_buffer_index = MTL_storage_buffer_base_index;
pso_inst->null_attribute_buffer_index = (using_null_buffer) ? null_buffer_index : -1;
pso_inst->transform_feedback_buffer_index = MTL_transform_feedback_buffer_index;
pso_inst->prim_type = prim_type;
@ -1283,6 +1298,8 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
{
/* NOTE(Metal): Bakes and caches a PSO for compute. */
BLI_assert(this);
MTLShaderInterface *mtl_interface = this->get_interface();
BLI_assert(mtl_interface);
BLI_assert(this->is_valid());
BLI_assert(shader_library_compute_ != nil);
@ -1304,7 +1321,19 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
type:MTLDataTypeInt
withName:@"MTL_uniform_buffer_base_index"];
/* TODO: SSBO binding base index. */
/* Storage buffer bind index.
* This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
* and an additional space for the push constant block.
* If the shader does not have any uniform blocks, then we can place directly after the push
* constant block. As we do not need an extra spot for the UBO at index '0'. */
int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 +
((mtl_interface->get_total_uniform_blocks() > 0) ?
mtl_interface->get_total_uniform_blocks() :
0);
[values setConstantValue:&MTL_storage_buffer_base_index
type:MTLDataTypeInt
withName:@"MTL_storage_buffer_base_index"];
/* Compile compute function. */
NSError *error = nullptr;
@ -1312,6 +1341,8 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
newFunctionWithName:compute_function_name_
constantValues:values
error:&error];
compute_function.label = [NSString stringWithUTF8String:this->name];
if (error) {
NSLog(@"Compile Error - Metal Shader compute function, error %@", error);
@ -1327,6 +1358,7 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
id<MTLComputePipelineState> pso = [ctx->device
newComputePipelineStateWithFunction:compute_function
error:&error];
if (error) {
NSLog(@"Failed to create PSO for compute shader: %s error %@\n", this->name, error);
BLI_assert(false);
@ -1350,8 +1382,7 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
compute_pso_instance_.compute = [compute_function retain];
compute_pso_instance_.pso = [pso retain];
compute_pso_instance_.base_uniform_buffer_index = MTL_uniform_buffer_base_index;
/* TODO: Add SSBO base buffer index support. */
compute_pso_instance_.base_ssbo_buffer_index = -1;
compute_pso_instance_.base_storage_buffer_index = MTL_storage_buffer_base_index;
}
return true;
}
@ -1507,7 +1538,7 @@ void MTLShader::ssbo_vertex_fetch_bind_attributes_end(id<MTLRenderCommandEncoder
}
/* Bind NULL buffer to given VBO slot. */
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
id<MTLBuffer> null_buf = ctx->get_null_attribute_buffer();
BLI_assert(null_buf);

View File

@ -105,6 +105,22 @@
* }
* \endcode
*
* -- Metal buffer bindings structure --
*
* Metal shader contains several different binding types. All buffers are bound using the buffer(N)
* binding attribute tag. However, different ranges serve different purposes. The structure of the
* bindings always happen as follows:
*
* Vertex Buffers (N) <-- 0
* Index buffer
* Default Push constant block for uniforms <-- MTL_uniform_buffer_base_index
* Uniform buffers <-- MTL_uniform_buffer_base_index+1
* Storage buffers <-- MTL_storage_buffer_base_index
* Samplers/argument buffer table <-- last buffer + 1
* Transform feedback buffer <-- MTL_transform_feedback_buffer_index ~last_buffer+2
*
* Up to a maximum of 31 bindings.
*
* -- SSBO-vertex-fetchmode --
*
* SSBO-vertex-fetchmode is a special option wherein vertex buffers are bound directly
@ -200,14 +216,17 @@ struct MSLUniform {
}
};
struct MSLUniformBlock {
struct MSLBufferBlock {
std::string type_name;
std::string name;
ShaderStage stage;
bool is_array;
/* Resource index in buffer*/
uint slot;
uint location;
shader::Qualifier qualifiers;
bool operator==(const MSLUniformBlock &right) const
bool operator==(const MSLBufferBlock &right) const
{
return (type_name == right.type_name && name == right.name);
}
@ -221,11 +240,16 @@ enum MSLTextureSamplerAccess {
TEXTURE_ACCESS_READWRITE,
};
struct MSLTextureSampler {
struct MSLTextureResource {
ShaderStage stage;
shader::ImageType type;
std::string name;
MSLTextureSamplerAccess access;
/* Whether resource is a texture sampler or an image. */
bool is_texture_sampler;
/* Index in shader bind table [[texture(N)]].*/
uint slot;
/* Explicit bind index provided by ShaderCreateInfo. */
uint location;
eGPUTextureType get_texture_binding_type() const;
@ -233,20 +257,7 @@ struct MSLTextureSampler {
void resolve_binding_indices();
MSLTextureSampler(ShaderStage in_stage,
shader::ImageType in_sampler_type,
std::string in_sampler_name,
MSLTextureSamplerAccess in_access,
uint in_location)
: stage(in_stage),
type(in_sampler_type),
name(in_sampler_name),
access(in_access),
location(in_location)
{
}
bool operator==(const MSLTextureSampler &right) const
bool operator==(const MSLTextureResource &right) const
{
/* We do not compare stage as we want to avoid duplication of resources used across multiple
* stages. */
@ -370,9 +381,10 @@ class MSLGeneratorInterface {
public:
/** Shader stage input/output binding information.
* Derived from shader source reflection or GPUShaderCreateInfo. */
blender::Vector<MSLUniformBlock> uniform_blocks;
blender::Vector<MSLBufferBlock> uniform_blocks;
blender::Vector<MSLBufferBlock> storage_blocks;
blender::Vector<MSLUniform> uniforms;
blender::Vector<MSLTextureSampler> texture_samplers;
blender::Vector<MSLTextureResource> texture_samplers;
blender::Vector<MSLVertexInputAttribute> vertex_input_attributes;
blender::Vector<MSLVertexOutputAttribute> vertex_output_varyings;
/* Should match vertex outputs, but defined separately as
@ -386,7 +398,8 @@ class MSLGeneratorInterface {
blender::Vector<char> clip_distances;
/* Shared Memory Blocks. */
blender::Vector<MSLSharedMemoryBlock> shared_memory_blocks;
/* Max bind IDs. */
int max_tex_bind_index = 0;
/** GL Global usage. */
/* Whether GL position is used, or an alternative vertex output should be the default. */
bool uses_gl_Position;
@ -415,12 +428,14 @@ class MSLGeneratorInterface {
bool uses_gl_NumWorkGroups;
bool uses_gl_LocalInvocationIndex;
bool uses_gl_LocalInvocationID;
/* Early fragment tests. */
bool uses_early_fragment_test;
/* Parameters. */
shader::DepthWrite depth_write;
/* Bind index trackers. */
int max_ubo_slot = -1;
int max_buffer_slot = 0;
/* Shader buffer bind indices for argument buffers per shader stage.
* NOTE: Compute stage will re-use index 0. */
@ -463,8 +478,10 @@ class MSLGeneratorInterface {
/* Samplers. */
bool use_argument_buffer_for_samplers() const;
uint32_t num_samplers_for_stage(ShaderStage stage) const;
uint32_t max_sampler_index_for_stage(ShaderStage stage) const;
/* Returns the bind index, relative to MTL_uniform_buffer_base_index. */
/* Returns the bind index, relative to
* MTL_uniform_buffer_base_index+MTL_storage_buffer_base_index. */
uint32_t get_sampler_argument_buffer_bind_index(ShaderStage stage);
/* Code generation utility functions. */
@ -480,7 +497,7 @@ class MSLGeneratorInterface {
std::string generate_msl_fragment_entry_stub();
std::string generate_msl_compute_entry_stub();
std::string generate_msl_global_uniform_population(ShaderStage stage);
std::string generate_ubo_block_macro_chain(MSLUniformBlock block);
std::string generate_ubo_block_macro_chain(MSLBufferBlock block);
std::string generate_msl_uniform_block_population(ShaderStage stage);
std::string generate_msl_vertex_attribute_input_population();
std::string generate_msl_vertex_output_population();
@ -546,7 +563,9 @@ inline bool is_builtin_type(std::string type)
{
/* Add Types as needed. */
/* TODO(Metal): Consider replacing this with a switch and `constexpr` hash and switch.
* Though most efficient and maintainable approach to be determined. */
* Though most efficient and maintainable approach to be determined.
* NOTE: Some duplicate types exit for Metal and GLSL representations, as generated typenames
* from createinfo may use GLSL signature. */
static std::map<std::string, eMTLDataType> glsl_builtin_types = {
{"float", MTL_DATATYPE_FLOAT},
{"vec2", MTL_DATATYPE_FLOAT2},
@ -556,10 +575,17 @@ inline bool is_builtin_type(std::string type)
{"ivec2", MTL_DATATYPE_INT2},
{"ivec3", MTL_DATATYPE_INT3},
{"ivec4", MTL_DATATYPE_INT4},
{"int2", MTL_DATATYPE_INT2},
{"int3", MTL_DATATYPE_INT3},
{"int4", MTL_DATATYPE_INT4},
{"uint32_t", MTL_DATATYPE_UINT},
{"uvec2", MTL_DATATYPE_UINT2},
{"uvec3", MTL_DATATYPE_UINT3},
{"uvec4", MTL_DATATYPE_UINT4},
{"uint", MTL_DATATYPE_UINT},
{"uint2", MTL_DATATYPE_UINT2},
{"uint3", MTL_DATATYPE_UINT3},
{"uint4", MTL_DATATYPE_UINT4},
{"mat3", MTL_DATATYPE_FLOAT3x3},
{"mat4", MTL_DATATYPE_FLOAT4x4},
{"bool", MTL_DATATYPE_INT},

View File

@ -712,8 +712,30 @@ static void print_resource(std::ostream &os, const ShaderCreateInfo::Resource &r
}
break;
}
case ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER:
case ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: {
int64_t array_offset = res.storagebuf.name.find_first_of("[");
bool writeable = (res.storagebuf.qualifiers & shader::Qualifier::WRITE) ==
shader::Qualifier::WRITE;
const char *memory_scope = ((writeable) ? "device " : "constant ");
fclem marked this conversation as resolved

ssbo_name.ubo_element > ssbo_name.ssbo_element

ssbo_name.ubo_element > ssbo_name.ssbo_element
if (array_offset == -1) {
/* Create local class member as device pointer reference to bound SSBO.
* Given usage within a shader follows ssbo_name.ssbo_element syntax, we can
* dereference the pointer as the compiler will optimize this data fetch.
* To do this, we also give the UBO name a post-fix of `_local` to avoid
* macro accessor collisions. */
os << memory_scope << res.storagebuf.type_name << " *" << res.storagebuf.name
<< "_local;\n";
os << "#define " << res.storagebuf.name << " (*" << res.storagebuf.name << "_local)\n";
}
else {
/* For arrays, we can directly provide the constant access pointer, as the array
* syntax will de-reference this at the correct fetch index. */
StringRef name_no_array = StringRef(res.storagebuf.name.c_str(), array_offset);
os << memory_scope << res.storagebuf.type_name << " *" << name_no_array << ";\n";
}
break;
}
}
}
@ -976,6 +998,9 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
shd_builder_->glsl_fragment_source_.find("gl_FragDepth") !=
std::string::npos;
msl_iface.depth_write = info->depth_write_;
/* Early fragment tests. */
msl_iface.uses_early_fragment_test = info->early_fragment_test_;
}
/* Generate SSBO vertex fetch mode uniform data hooks. */
@ -1002,7 +1027,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
if (msl_iface.use_argument_buffer_for_samplers()) {
ss_vertex << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl;
ss_vertex << "#define ARGUMENT_BUFFER_NUM_SAMPLERS "
<< msl_iface.num_samplers_for_stage(ShaderStage::VERTEX) << std::endl;
<< msl_iface.max_sampler_index_for_stage(ShaderStage::VERTEX) + 1 << std::endl;
}
if (msl_iface.uses_ssbo_vertex_fetch_mode) {
ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl;
@ -1164,7 +1189,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
* struct, as both are needed within texture functions.
* e.g. `_mtl_combined_image_sampler_2d<float, access::read>`
* The exact typename is generated inside `get_msl_typestring_wrapper()`. */
for (const MSLTextureSampler &tex : msl_iface.texture_samplers) {
for (const MSLTextureResource &tex : msl_iface.texture_samplers) {
if (bool(tex.stage & ShaderStage::VERTEX)) {
ss_vertex << "\tthread " << tex.get_msl_typestring_wrapper(false) << ";" << std::endl;
}
@ -1193,7 +1218,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
if (msl_iface.use_argument_buffer_for_samplers()) {
ss_fragment << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl;
ss_fragment << "#define ARGUMENT_BUFFER_NUM_SAMPLERS "
<< msl_iface.num_samplers_for_stage(ShaderStage::FRAGMENT) << std::endl;
<< msl_iface.max_sampler_index_for_stage(ShaderStage::FRAGMENT) + 1 << std::endl;
}
/* Inject common Metal header. */
@ -1277,7 +1302,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
}
/* Add Texture members. */
for (const MSLTextureSampler &tex : msl_iface.texture_samplers) {
for (const MSLTextureResource &tex : msl_iface.texture_samplers) {
if (bool(tex.stage & ShaderStage::FRAGMENT)) {
ss_fragment << "\tthread " << tex.get_msl_typestring_wrapper(false) << ";" << std::endl;
}
@ -1445,7 +1470,7 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
if (msl_iface.use_argument_buffer_for_samplers()) {
ss_compute << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl;
ss_compute << "#define ARGUMENT_BUFFER_NUM_SAMPLERS "
<< msl_iface.num_samplers_for_stage(ShaderStage::COMPUTE) << std::endl;
<< msl_iface.max_sampler_index_for_stage(ShaderStage::COMPUTE) + 1 << std::endl;
}
/* Inject static workgroup sizes. */
@ -1486,7 +1511,7 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
* struct, as both are needed within texture functions.
* e.g. `_mtl_combined_image_sampler_2d<float, access::read>`
* The exact typename is generated inside `get_msl_typestring_wrapper()`. */
for (const MSLTextureSampler &tex : msl_iface.texture_samplers) {
for (const MSLTextureResource &tex : msl_iface.texture_samplers) {
if (bool(tex.stage & ShaderStage::COMPUTE)) {
ss_compute << "\tthread " << tex.get_msl_typestring_wrapper(false) << ";" << std::endl;
}
@ -1563,6 +1588,31 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
this->set_compute_function_name(@"compute_function_entry");
#endif
/* DEBUG: Export source to file for manual verification. */
#if MTL_SHADER_DEBUG_EXPORT_SOURCE
NSFileManager *sharedFM = [NSFileManager defaultManager];
NSURL *app_bundle_url = [[NSBundle mainBundle] bundleURL];
NSURL *shader_dir = [[app_bundle_url URLByDeletingLastPathComponent]
URLByAppendingPathComponent:@"Shaders/"
isDirectory:YES];
[sharedFM createDirectoryAtURL:shader_dir
withIntermediateDirectories:YES
attributes:nil
error:nil];
const char *path_cstr = [shader_dir fileSystemRepresentation];
std::ofstream compute_fs;
compute_fs.open(
(std::string(path_cstr) + "/" + std::string(this->name) + "_GeneratedComputeShader.msl")
.c_str());
compute_fs << ss_compute.str();
compute_fs.close();
shader_debug_printf(
"Compute Shader Saved to: %s\n",
(std::string(path_cstr) + std::string(this->name) + "_GeneratedComputeShader.msl").c_str());
#endif
NSString *msl_final_compute = [NSString stringWithUTF8String:ss_compute.str().c_str()];
this->shader_compute_source_from_msl(msl_final_compute);
@ -1700,8 +1750,11 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
/* NOTE: Metal requires Samplers and images to share slots. We will re-map these.
* If `auto_resource_location_` is not used, then slot collision could occur and
* this should be resolved in the original create-info. */
* this should be resolved in the original create-info.
* UBOs and SSBOs also share the same bind table. */
int texture_slot_id = 0;
int ubo_buffer_slot_id_ = 0;
int storage_buffer_slot_id_ = 0;
/* Determine max sampler slot for image resource offset, when not using auto resource location,
* as image resources cannot overlap sampler ranges. */
@ -1727,13 +1780,6 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
switch (res.bind_type) {
case shader::ShaderCreateInfo::Resource::BindType::SAMPLER: {
/* Re-map sampler slot to share texture indices with images.
* Only applies if `auto_resource_location_` is enabled. */
BLI_assert(res.slot >= 0 && res.slot < MTL_MAX_TEXTURE_SLOTS);
unsigned int used_slot = (create_info_->auto_resource_location_) ? (texture_slot_id++) :
res.slot;
BLI_assert(used_slot < MTL_MAX_TEXTURE_SLOTS);
/* Samplers to have access::sample by default. */
MSLTextureSamplerAccess access = MSLTextureSamplerAccess::TEXTURE_ACCESS_SAMPLE;
/* TextureBuffers must have read/write/read-write access pattern. */
@ -1744,22 +1790,21 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ;
}
MSLTextureSampler msl_tex(
ShaderStage::ANY, res.sampler.type, res.sampler.name, access, used_slot);
MSLTextureResource msl_tex;
msl_tex.stage = ShaderStage::ANY;
msl_tex.type = res.sampler.type;
msl_tex.name = res.sampler.name;
msl_tex.access = access;
msl_tex.slot = texture_slot_id++;
msl_tex.location = (create_info_->auto_resource_location_) ? msl_tex.slot : res.slot;
msl_tex.is_texture_sampler = true;
BLI_assert(msl_tex.slot < MTL_MAX_TEXTURE_SLOTS);
texture_samplers.append(msl_tex);
max_tex_bind_index = max_ii(max_tex_bind_index, msl_tex.slot);
} break;
case shader::ShaderCreateInfo::Resource::BindType::IMAGE: {
/* Re-map sampler slot to share texture indices with samplers.
* Automatically applies if `auto_resource_location_` is enabled.
* Otherwise, if not using automatic resource location, offset by max sampler slot. */
BLI_assert(res.slot >= 0 && res.slot < MTL_MAX_TEXTURE_SLOTS);
unsigned int used_slot = (create_info_->auto_resource_location_) ?
(texture_slot_id++) :
res.slot + max_sampler_slot + 1;
BLI_assert(used_slot < MTL_MAX_TEXTURE_SLOTS);
/* Flatten qualifier flags into final access state. */
MSLTextureSamplerAccess access;
if (bool(res.image.qualifiers & Qualifier::READ_WRITE)) {
@ -1771,29 +1816,39 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
else {

I don't think we need vertex shaders to write to image targets, but is that a limitation of Metal? Would be preferable to put asserts in other backends.

I don't think we need vertex shaders to write to image targets, but is that a limitation of Metal? Would be preferable to put asserts in other backends.
access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ;
}
BLI_assert(used_slot >= 0 && used_slot < MTL_MAX_TEXTURE_SLOTS);
/* Writeable image targets only assigned to Fragment and compute shaders. */
MSLTextureSampler msl_tex(ShaderStage::FRAGMENT | ShaderStage::COMPUTE,
res.image.type,
res.image.name,
access,
used_slot);
texture_samplers.append(msl_tex);
MSLTextureResource msl_image;
msl_image.stage = ShaderStage::FRAGMENT | ShaderStage::COMPUTE;
msl_image.type = res.image.type;
msl_image.name = res.image.name;
msl_image.access = access;
msl_image.slot = texture_slot_id++;
msl_image.location = (create_info_->auto_resource_location_) ? msl_image.slot : res.slot;
msl_image.is_texture_sampler = false;
BLI_assert(msl_image.slot < MTL_MAX_TEXTURE_SLOTS);
texture_samplers.append(msl_image);
max_tex_bind_index = max_ii(max_tex_bind_index, msl_image.slot);
} break;
case shader::ShaderCreateInfo::Resource::BindType::UNIFORM_BUFFER: {
MSLUniformBlock ubo;
MSLBufferBlock ubo;
BLI_assert(res.uniformbuf.type_name.size() > 0);
BLI_assert(res.uniformbuf.name.size() > 0);
int64_t array_offset = res.uniformbuf.name.find_first_of("[");
/* UBO should either use an existing declared UBO bind slot, or automatically resolve
* index. */
ubo.slot = (create_info_->auto_resource_location_) ? uniform_blocks.size() : res.slot;
BLI_assert(ubo.slot >= 0 && ubo.slot < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
max_ubo_slot = max_ii(max_ubo_slot, ubo.slot);
/* We maintain two bind indices. "Slot" refers to the storage index buffer(N) in which
* we will bind the resource. "Location" refers to the explicit bind index specified
* in ShaderCreateInfo.
* NOTE: ubo.slot is offset by one, as first UBO slot is reserved for push constant data.
*/
ubo.slot = 1 + (ubo_buffer_slot_id_++);
ubo.location = (create_info_->auto_resource_location_) ? ubo.slot : res.slot;
BLI_assert(ubo.location >= 0 && ubo.location < MTL_MAX_BUFFER_BINDINGS);
ubo.qualifiers = shader::Qualifier::READ;
ubo.type_name = res.uniformbuf.type_name;
ubo.is_array = (array_offset > -1);
if (ubo.is_array) {
@ -1809,13 +1864,40 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
} break;
case shader::ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: {
/* TODO(Metal): Support shader storage buffer in Metal.
* Pending compute support. */
MSLBufferBlock ssbo;
BLI_assert(res.storagebuf.type_name.size() > 0);
BLI_assert(res.storagebuf.name.size() > 0);
int64_t array_offset = res.storagebuf.name.find_first_of("[");
/* We maintain two bind indices. "Slot" refers to the storage index buffer(N) in which
* we will bind the resource. "Location" refers to the explicit bind index specified
* in ShaderCreateInfo. */
ssbo.slot = storage_buffer_slot_id_++;
ssbo.location = (create_info_->auto_resource_location_) ? ssbo.slot : res.slot;
BLI_assert(ssbo.location >= 0 && ssbo.location < MTL_MAX_BUFFER_BINDINGS);
ssbo.qualifiers = res.storagebuf.qualifiers;
ssbo.type_name = res.storagebuf.type_name;
ssbo.is_array = (array_offset > -1);
if (ssbo.is_array) {
/* If is array UBO, strip out array tag from name. */
StringRef name_no_array = StringRef(res.storagebuf.name.c_str(), array_offset);
ssbo.name = name_no_array;
}
else {
ssbo.name = res.storagebuf.name;
}
ssbo.stage = ShaderStage::ANY;
storage_blocks.append(ssbo);
} break;
}
}
}
/* Assign maximum buffer. */
max_buffer_slot = storage_buffer_slot_id_ + ubo_buffer_slot_id_ + 1;
/** Vertex Inputs. */
bool all_attr_location_assigned = true;
for (const ShaderCreateInfo::VertIn &attr : info->vertex_inputs_) {
@ -1865,10 +1947,28 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
bool MSLGeneratorInterface::use_argument_buffer_for_samplers() const
{
/* We can only use argument buffers IF sampler count exceeds static limit of 16,
* AND we can support more samplers with an argument buffer.
* NOTE: We reserve one constant sampler within the shader for fast read via point-sampling. */
return texture_samplers.size() >= 15 && GPU_max_samplers() > 16;
/* We can only use argument buffers IF highest sampler index exceeds static limit of 16,
* AND we can support more samplers with an argument buffer. */
bool use_argument_buffer = (texture_samplers.size() >= 15 || max_tex_bind_index >= 14) &&
GPU_max_samplers() > 15;
#ifndef NDEBUG
/* Due to explicit bind location support, we may be below the sampler limit, but forced to offset
* bindings due to the range being high. Introduce debug check here to issue warning. In these
* cases, if explicit bind location support is not required, best to use auto_resource_location
* to optimize bind point packing. */
if (use_argument_buffer && texture_samplers.size() < 15) {
MTL_LOG_WARNING(
"Compiled Shader '%s' is falling back to bindless via argument buffers due to having a "
"texture sampler of Index: %u Which exceeds the limit of 15+1. However shader only uses "
"%d textures. Consider optimising bind points with .auto_resource_location(true).\n",
parent_shader_.name_get(),
max_tex_bind_index,
(int)texture_samplers.size());
}
#endif
return use_argument_buffer;
}
uint32_t MSLGeneratorInterface::num_samplers_for_stage(ShaderStage stage) const
@ -1878,6 +1978,13 @@ uint32_t MSLGeneratorInterface::num_samplers_for_stage(ShaderStage stage) const
return texture_samplers.size();
}
uint32_t MSLGeneratorInterface::max_sampler_index_for_stage(ShaderStage stage) const
{
/* NOTE: Sampler bindings and argument buffer shared across stages,
* in case stages share texture/sampler bindings. */
return max_tex_bind_index;
}
uint32_t MSLGeneratorInterface::get_sampler_argument_buffer_bind_index(ShaderStage stage)
{
/* Note: Shader stage must be a singular index. Compound shader masks are not valid for this
@ -1889,7 +1996,7 @@ uint32_t MSLGeneratorInterface::get_sampler_argument_buffer_bind_index(ShaderSta
}
/* Sampler argument buffer to follow UBOs and PushConstantBlock. */
sampler_argument_buffer_bind_index[get_shader_stage_index(stage)] = (max_ubo_slot + 2);
sampler_argument_buffer_bind_index[get_shader_stage_index(stage)] = (max_buffer_slot + 1);
return sampler_argument_buffer_bind_index[get_shader_stage_index(stage)];
}
@ -2014,6 +2121,11 @@ std::string MSLGeneratorInterface::generate_msl_fragment_entry_stub()
/* Undefine uniform mappings to avoid name collisions. */

indentation

indentation
out << generate_msl_uniform_undefs(ShaderStage::FRAGMENT);
/* Early fragment tests. */
if (uses_early_fragment_test) {
out << "[[early_fragment_tests]]" << std::endl;
}
/* Generate function entry point signature w/ resource bindings and inputs. */
#ifndef NDEBUG
out << "fragment " << get_stage_class_name(ShaderStage::FRAGMENT)
@ -2138,7 +2250,6 @@ std::string MSLGeneratorInterface::generate_msl_compute_entry_stub()
out << this->generate_msl_texture_vars(ShaderStage::COMPUTE);
out << this->generate_msl_global_uniform_population(ShaderStage::COMPUTE);
out << this->generate_msl_uniform_block_population(ShaderStage::COMPUTE);
/* TODO(Metal): SSBO Population. */
/* Execute original 'main' function within class scope. */
out << "\t/* Execute Compute main function */\t" << std::endl
@ -2170,10 +2281,10 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
stage == ShaderStage::COMPUTE);
/* Generate texture signatures for textures used by this stage. */
BLI_assert(this->texture_samplers.size() <= GPU_max_textures_vert());
for (const MSLTextureSampler &tex : this->texture_samplers) {
for (const MSLTextureResource &tex : this->texture_samplers) {
if (bool(tex.stage & stage)) {
out << parameter_delimiter(is_first_parameter) << "\n\t" << tex.get_msl_typestring(false)
<< " [[texture(" << tex.location << ")]]";
<< " [[texture(" << tex.slot << ")]]";
}
}
@ -2191,10 +2302,10 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
/* Maximum Limit of samplers defined in the function argument table is
* `MTL_MAX_DEFAULT_SAMPLERS=16`. */
BLI_assert(this->texture_samplers.size() <= MTL_MAX_DEFAULT_SAMPLERS);
for (const MSLTextureSampler &tex : this->texture_samplers) {
for (const MSLTextureResource &tex : this->texture_samplers) {
if (bool(tex.stage & stage)) {
out << parameter_delimiter(is_first_parameter) << "\n\tsampler " << tex.name
<< "_sampler [[sampler(" << tex.location << ")]]";
<< "_sampler [[sampler(" << tex.slot << ")]]";
}
}
@ -2211,7 +2322,7 @@ void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream
ShaderStage stage,
bool &is_first_parameter)
{
for (const MSLUniformBlock &ubo : this->uniform_blocks) {
for (const MSLBufferBlock &ubo : this->uniform_blocks) {
if (bool(ubo.stage & stage)) {
/* For literal/existing global types, we do not need the class name-space accessor. */
out << parameter_delimiter(is_first_parameter) << "\n\tconstant ";
@ -2223,7 +2334,27 @@ void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream
* MTL_uniform_buffer_base_index is an offset depending on the number of unique VBOs
* bound for the current PSO specialization. */
out << ubo.type_name << "* " << ubo.name << "[[buffer(MTL_uniform_buffer_base_index+"
<< (ubo.slot + 1) << ")]]";
<< ubo.slot << ")]]";
}
}
/* Storage buffers. */
for (const MSLBufferBlock &ssbo : this->storage_blocks) {
if (bool(ssbo.stage & stage)) {
/* For literal/existing global types, we do not need the class name-space accessor. */
bool writeable = (ssbo.qualifiers & shader::Qualifier::WRITE) == shader::Qualifier::WRITE;
const char *memory_scope = ((writeable) ? "device " : "constant ");
out << parameter_delimiter(is_first_parameter) << "\n\t" << memory_scope;
if (!is_builtin_type(ssbo.type_name)) {
out << get_stage_class_name(stage) << "::";
}
/* #StorageBuffer bind indices start at `MTL_storage_buffer_base_index`.
* MTL_storage_buffer_base_index follows immediately after all uniform blocks.
* such that MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index +
* uniform_blocks.size() + 1. Where the additional buffer is reserved for the
* #PushConstantBlock (push constants). */
out << ssbo.type_name << "* " << ssbo.name << "[[buffer(MTL_storage_buffer_base_index+"
<< (ssbo.slot) << ")]]";
}
}
}
@ -2411,9 +2542,13 @@ std::string MSLGeneratorInterface::generate_msl_uniform_undefs(ShaderStage shade
out << "#undef " << uniform.name << std::endl;
}
/* UBO block undef. */
for (const MSLUniformBlock &ubo : this->uniform_blocks) {
for (const MSLBufferBlock &ubo : this->uniform_blocks) {
out << "#undef " << ubo.name << std::endl;
}
/* SSBO block undef. */
for (const MSLBufferBlock &ssbo : this->storage_blocks) {
out << "#undef " << ssbo.name << std::endl;
}
return out.str();
}
@ -2698,7 +2833,7 @@ std::string MSLGeneratorInterface::generate_msl_uniform_block_population(ShaderS
/* Populate Global Uniforms. */
std::stringstream out;
out << "\t/* Copy UBO block references into local class variables */" << std::endl;
for (const MSLUniformBlock &ubo : this->uniform_blocks) {
for (const MSLBufferBlock &ubo : this->uniform_blocks) {
/* Only include blocks which are used within this stage. */
if (bool(ubo.stage & stage)) {
@ -2714,6 +2849,26 @@ std::string MSLGeneratorInterface::generate_msl_uniform_block_population(ShaderS
out << " = " << ubo.name << ";" << std::endl;
}
}
/* Populate storage buffer references. */
out << "\t/* Copy SSBO block references into local class variables */" << std::endl;
for (const MSLBufferBlock &ssbo : this->storage_blocks) {
/* Only include blocks which are used within this stage. */
if (bool(ssbo.stage & stage)) {
/* Generate UBO reference assignment.
* NOTE(Metal): We append `_local` post-fix onto the class member name
* for the ubo to avoid name collision with the UBO accessor macro.
* We only need to add this post-fix for the non-array access variant,
* as the array is indexed directly, rather than requiring a dereference. */
out << "\t" << get_shader_stage_instance_name(stage) << "." << ssbo.name;
if (!ssbo.is_array) {
out << "_local";
}
out << " = " << ssbo.name << ";" << std::endl;
}
}
out << std::endl;
return out.str();
}
@ -3080,7 +3235,7 @@ std::string MSLGeneratorInterface::generate_msl_texture_vars(ShaderStage shader_
if (this->use_argument_buffer_for_samplers()) {
out << "\t" << get_shader_stage_instance_name(shader_stage) << "."
<< this->texture_samplers[i].name << ".samp = &samplers.sampler_args["
<< this->texture_samplers[i].location << "];" << std::endl;
<< this->texture_samplers[i].slot << "];" << std::endl;
}
else {
out << "\t" << get_shader_stage_instance_name(shader_stage) << "."
@ -3302,20 +3457,36 @@ MTLShaderInterface *MSLGeneratorInterface::bake_shader_interface(const char *nam
name_buffer_size,
name_buffer_offset),
this->uniform_blocks[uniform_block].slot,
this->uniform_blocks[uniform_block].location,
0,
this->uniform_blocks[uniform_block].stage);
}
/* Prepare Interface Storage Blocks. */
for (int storage_block = 0; storage_block < this->storage_blocks.size(); storage_block++) {
interface->add_storage_block(
name_buffer_copystr(&interface->name_buffer_,
this->storage_blocks[storage_block].name.c_str(),
name_buffer_size,
name_buffer_offset),
this->storage_blocks[storage_block].slot,
this->storage_blocks[storage_block].location,
0,
this->storage_blocks[storage_block].stage);
}
/* Texture/sampler bindings to interface. */
for (const MSLTextureSampler &texture_sampler : this->texture_samplers) {
for (const MSLTextureResource &input_texture : this->texture_samplers) {
interface->add_texture(name_buffer_copystr(&interface->name_buffer_,
texture_sampler.name.c_str(),
input_texture.name.c_str(),
name_buffer_size,
name_buffer_offset),
texture_sampler.location,
texture_sampler.get_texture_binding_type(),
texture_sampler.get_sampler_format(),
texture_sampler.stage);
input_texture.slot,
input_texture.location,
input_texture.get_texture_binding_type(),
input_texture.get_sampler_format(),
input_texture.is_texture_sampler,
input_texture.stage);
}
/* Sampler Parameters. */
@ -3336,7 +3507,7 @@ MTLShaderInterface *MSLGeneratorInterface::bake_shader_interface(const char *nam
return interface;
}
std::string MSLTextureSampler::get_msl_texture_type_str() const
std::string MSLTextureResource::get_msl_texture_type_str() const
{
/* Add Types as needed. */
switch (this->type) {
@ -3444,7 +3615,7 @@ std::string MSLTextureSampler::get_msl_texture_type_str() const
};
}
std::string MSLTextureSampler::get_msl_wrapper_type_str() const
std::string MSLTextureResource::get_msl_wrapper_type_str() const
{
/* Add Types as needed. */
switch (this->type) {
@ -3552,7 +3723,7 @@ std::string MSLTextureSampler::get_msl_wrapper_type_str() const
};
}
std::string MSLTextureSampler::get_msl_return_type_str() const
std::string MSLTextureResource::get_msl_return_type_str() const
{
/* Add Types as needed */
switch (this->type) {
@ -3607,7 +3778,7 @@ std::string MSLTextureSampler::get_msl_return_type_str() const
};
}
eGPUTextureType MSLTextureSampler::get_texture_binding_type() const
eGPUTextureType MSLTextureResource::get_texture_binding_type() const
{
/* Add Types as needed */
switch (this->type) {
@ -3714,7 +3885,7 @@ eGPUTextureType MSLTextureSampler::get_texture_binding_type() const
};
}
eGPUSamplerFormat MSLTextureSampler::get_sampler_format() const
eGPUSamplerFormat MSLTextureResource::get_sampler_format() const
{
switch (this->type) {
case ImageType::FLOAT_BUFFER:

View File

@ -107,12 +107,13 @@ struct MTLShaderInputAttribute {
uint32_t matrix_element_count;
};
struct MTLShaderUniformBlock {
struct MTLShaderBufferBlock {
uint32_t name_offset;
uint32_t size = 0;
/* Buffer resource bind index in shader `[[buffer(index)]]`. */
uint32_t buffer_index;
/* Explicit bind location for texture. */
int location;
/* Tracking for manual uniform addition. */
uint32_t current_offset;
ShaderStage stage_mask;
@ -120,7 +121,7 @@ struct MTLShaderUniformBlock {
struct MTLShaderUniform {
uint32_t name_offset;
/* Index of `MTLShaderUniformBlock` this uniform belongs to. */
/* Index of `MTLShaderBufferBlock` this uniform belongs to. */
uint32_t size_in_bytes;
uint32_t byte_offset;
eMTLDataType type;
@ -132,9 +133,13 @@ struct MTLShaderTexture {
uint32_t name_offset;
/* Texture resource bind slot in shader `[[texture(n)]]`. */
int slot_index;
/* Explicit bind location for texture. */
int location;
eGPUTextureType type;
eGPUSamplerFormat sampler_format;
ShaderStage stage_mask;
/* Whether texture resource is expected to be image or sampler. */
bool is_texture_sampler;
};
struct MTLShaderSampler {
@ -173,8 +178,13 @@ class MTLShaderInterface : public ShaderInterface {
/* Uniform Blocks. */
uint32_t total_uniform_blocks_;
uint32_t max_uniformbuf_index_;
MTLShaderUniformBlock ubos_[MTL_MAX_UNIFORM_BUFFER_BINDINGS];
MTLShaderUniformBlock push_constant_block_;
MTLShaderBufferBlock ubos_[MTL_MAX_BUFFER_BINDINGS];
MTLShaderBufferBlock push_constant_block_;
/* Storage blocks. */
uint32_t total_storage_blocks_;
uint32_t max_storagebuf_index_;
MTLShaderBufferBlock ssbos_[MTL_MAX_BUFFER_BINDINGS];
/* Textures. */
/* Textures support explicit binding indices, so some texture slots
@ -207,13 +217,21 @@ class MTLShaderInterface : public ShaderInterface {
int matrix_element_count = 1);
uint32_t add_uniform_block(uint32_t name_offset,
uint32_t buffer_index,
uint32_t location,
uint32_t size,
ShaderStage stage_mask = ShaderStage::ANY);
uint32_t add_storage_block(uint32_t name_offset,
uint32_t buffer_index,
uint32_t location,
uint32_t size,
ShaderStage stage_mask = ShaderStage::ANY);
void add_uniform(uint32_t name_offset, eMTLDataType type, int array_len = 1);
void add_texture(uint32_t name_offset,
uint32_t texture_slot,
uint32_t location,
eGPUTextureType tex_binding_type,
eGPUSamplerFormat sampler_format,
bool is_texture_sampler,
ShaderStage stage_mask = ShaderStage::FRAGMENT);
void add_push_constant_block(uint32_t name_offset);
@ -232,14 +250,20 @@ class MTLShaderInterface : public ShaderInterface {
uint32_t get_total_uniforms() const;
/* Fetch Uniform Blocks. */
const MTLShaderUniformBlock &get_uniform_block(uint index) const;
const MTLShaderBufferBlock &get_uniform_block(uint index) const;
uint32_t get_total_uniform_blocks() const;
uint32_t get_max_ubo_index() const;
bool has_uniform_block(uint32_t block_index) const;
uint32_t get_uniform_block_size(uint32_t block_index) const;
/* Fetch Storage Blocks. */
const MTLShaderBufferBlock &get_storage_block(uint index) const;
uint32_t get_total_storage_blocks() const;
bool has_storage_block(uint32_t block_index) const;
uint32_t get_storage_block_size(uint32_t block_index) const;
/* Push constant uniform data block should always be available. */
const MTLShaderUniformBlock &get_push_constant_block() const;
const MTLShaderBufferBlock &get_push_constant_block() const;
uint32_t get_max_buffer_index() const;
/* Fetch textures. */
const MTLShaderTexture &get_texture(uint index) const;

View File

@ -56,6 +56,8 @@ void MTLShaderInterface::init()
total_attributes_ = 0;
total_uniform_blocks_ = 0;
max_uniformbuf_index_ = 0;
total_storage_blocks_ = 0;
max_storagebuf_index_ = 0;
total_uniforms_ = 0;
total_textures_ = 0;
max_texture_index_ = -1;
@ -109,6 +111,7 @@ void MTLShaderInterface::add_input_attribute(uint32_t name_offset,
uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset,
uint32_t buffer_index,
uint32_t location,
uint32_t size,
ShaderStage stage_mask)
{
@ -117,9 +120,12 @@ uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset,
size += 16 - (size % 16);
}
MTLShaderUniformBlock &uni_block = ubos_[total_uniform_blocks_];
BLI_assert(buffer_index < MTL_MAX_BUFFER_BINDINGS);
MTLShaderBufferBlock &uni_block = ubos_[total_uniform_blocks_];
uni_block.name_offset = name_offset;
uni_block.buffer_index = buffer_index;
uni_block.location = location;
uni_block.size = size;
uni_block.current_offset = 0;
uni_block.stage_mask = ShaderStage::ANY;
@ -127,6 +133,30 @@ uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset,
return (total_uniform_blocks_++);
}
uint32_t MTLShaderInterface::add_storage_block(uint32_t name_offset,
uint32_t buffer_index,
uint32_t location,
uint32_t size,
ShaderStage stage_mask)
{
/* Ensure Size is 16 byte aligned to guarantees alignment rules are satisfied. */
if ((size % 16) != 0) {
size += 16 - (size % 16);
}
BLI_assert(buffer_index < MTL_MAX_BUFFER_BINDINGS);
MTLShaderBufferBlock &ssbo_block = ssbos_[total_storage_blocks_];
ssbo_block.name_offset = name_offset;
ssbo_block.buffer_index = buffer_index;
ssbo_block.location = location;
ssbo_block.size = size;
ssbo_block.current_offset = 0;
ssbo_block.stage_mask = ShaderStage::ANY;
max_storagebuf_index_ = max_ii(max_storagebuf_index_, buffer_index);
return (total_storage_blocks_++);
}
void MTLShaderInterface::add_push_constant_block(uint32_t name_offset)
{
push_constant_block_.name_offset = name_offset;
@ -189,8 +219,10 @@ void MTLShaderInterface::add_uniform(uint32_t name_offset, eMTLDataType type, in
void MTLShaderInterface::add_texture(uint32_t name_offset,
uint32_t texture_slot,
uint32_t location,
eGPUTextureType tex_binding_type,
eGPUSamplerFormat sampler_format,
bool is_texture_sampler,
ShaderStage stage_mask)
{
BLI_assert(texture_slot >= 0 && texture_slot < GPU_max_textures());
@ -201,8 +233,10 @@ void MTLShaderInterface::add_texture(uint32_t name_offset,
BLI_assert_msg(tex.used == false, "Texture slot already in-use by another binding");
tex.name_offset = name_offset;
tex.slot_index = texture_slot;
tex.location = location;
tex.type = tex_binding_type;
tex.sampler_format = sampler_format;
tex.is_texture_sampler = is_texture_sampler;
tex.stage_mask = stage_mask;
tex.used = true;
total_textures_++;
@ -272,9 +306,7 @@ void MTLShaderInterface::prepare_common_shader_inputs()
attr_len_ = this->get_total_attributes();
ubo_len_ = this->get_total_uniform_blocks();
uniform_len_ = this->get_total_uniforms() + this->get_total_textures();
/* TODO(Metal): Support storage buffer bindings. Pending compute shader support. */
ssbo_len_ = 0;
ssbo_len_ = this->get_total_storage_blocks();
/* Calculate total inputs and allocate #ShaderInput array. */
/* NOTE: We use the existing `name_buffer_` allocated for internal input structs. */
@ -300,16 +332,17 @@ void MTLShaderInterface::prepare_common_shader_inputs()
BLI_assert(&inputs_[attr_len_] >= current_input);
current_input = &inputs_[attr_len_];
for (const int ubo_index : IndexRange(total_uniform_blocks_)) {
MTLShaderUniformBlock &shd_ubo = ubos_[ubo_index];
MTLShaderBufferBlock &shd_ubo = ubos_[ubo_index];
current_input->name_offset = shd_ubo.name_offset;
current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ubo.name_offset));
/* Location refers to the index in the ubos_ array. */
current_input->location = ubo_index;
current_input->location = shd_ubo.location;
/* Binding location refers to the UBO bind slot in
* #MTLContextGlobalShaderPipelineState::ubo_bindings. The buffer bind index [[buffer(N)]]
* within the shader will apply an offset for bound vertex buffers and the default uniform
* PushConstantBlock. */
current_input->binding = shd_ubo.buffer_index;
* PushConstantBlock.
* see `mtl_shader_generator.hh` for buffer binding table breakdown. */
current_input->binding = shd_ubo.location;
current_input++;
}
@ -352,15 +385,24 @@ void MTLShaderInterface::prepare_common_shader_inputs()
current_input->location = texture_index + total_uniforms_;
/* Binding represents texture slot `[[texture(n)]]`. */
current_input->binding = shd_tex.slot_index;
current_input->binding = shd_tex.location;
current_input++;
}
}
/* SSBO bindings.
* TODO(Metal): Support SSBOs. Pending compute support. */
/* SSBO bindings. */
BLI_assert(&inputs_[attr_len_ + ubo_len_ + uniform_len_] >= current_input);
current_input = &inputs_[attr_len_ + ubo_len_ + uniform_len_];
BLI_assert(ssbo_len_ >= total_storage_blocks_);
for (const int ssbo_index : IndexRange(total_storage_blocks_)) {
MTLShaderBufferBlock &shd_ssbo = ssbos_[ssbo_index];
current_input->name_offset = shd_ssbo.name_offset;
current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ssbo.name_offset));
/* `Location` is used as the returned explicit bind index for SSBOs. */
current_input->location = shd_ssbo.location;
current_input->binding = shd_ssbo.location;
current_input++;
}
this->sort_inputs();
@ -419,14 +461,14 @@ uint32_t MTLShaderInterface::get_total_uniforms() const
}
/* Uniform Blocks. */
const MTLShaderUniformBlock &MTLShaderInterface::get_uniform_block(uint index) const
const MTLShaderBufferBlock &MTLShaderInterface::get_uniform_block(uint index) const
{
BLI_assert(index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
BLI_assert(index < MTL_MAX_BUFFER_BINDINGS);
BLI_assert(index < get_total_uniform_blocks());
return ubos_[index];
}
const MTLShaderUniformBlock &MTLShaderInterface::get_push_constant_block() const
const MTLShaderBufferBlock &MTLShaderInterface::get_push_constant_block() const
{
return push_constant_block_;
}
@ -436,11 +478,6 @@ uint32_t MTLShaderInterface::get_total_uniform_blocks() const
return total_uniform_blocks_;
}
uint32_t MTLShaderInterface::get_max_ubo_index() const
{
return max_uniformbuf_index_;
}
bool MTLShaderInterface::has_uniform_block(uint32_t block_index) const
{
return (block_index < total_uniform_blocks_);
@ -451,6 +488,35 @@ uint32_t MTLShaderInterface::get_uniform_block_size(uint32_t block_index) const
return (block_index < total_uniform_blocks_) ? ubos_[block_index].size : 0;
}
/* Storage Blocks. */
const MTLShaderBufferBlock &MTLShaderInterface::get_storage_block(uint index) const
{
BLI_assert(index < MTL_MAX_BUFFER_BINDINGS);
BLI_assert(index < get_total_storage_blocks());
return ssbos_[index];
}
uint32_t MTLShaderInterface::get_total_storage_blocks() const
{
return total_storage_blocks_;
}
bool MTLShaderInterface::has_storage_block(uint32_t block_index) const
{
return (block_index < total_storage_blocks_);
}
uint32_t MTLShaderInterface::get_storage_block_size(uint32_t block_index) const
{
return (block_index < total_storage_blocks_) ? ssbos_[block_index].size : 0;
}
uint32_t MTLShaderInterface::get_max_buffer_index() const
{
/* PushConstantBlock + All uniform blocks + all storage blocks. */
return 1 + get_total_uniform_blocks() + get_total_storage_blocks();
}
/* Textures. */
const MTLShaderTexture &MTLShaderInterface::get_texture(uint index) const
{

View File

@ -573,7 +573,7 @@ void MTLStateManager::issue_barrier(eGPUBarrier barrier_bits)
eGPUStageBarrierBits before_stages = GPU_BARRIER_STAGE_ANY;
eGPUStageBarrierBits after_stages = GPU_BARRIER_STAGE_ANY;
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
ctx->main_command_buffer.insert_memory_barrier(barrier_bits, before_stages, after_stages);
@ -651,7 +651,7 @@ void MTLStateManager::texture_bind(Texture *tex_, GPUSamplerState sampler_type,
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
if (unit >= 0) {
ctx->texture_bind(mtl_tex, unit);
ctx->texture_bind(mtl_tex, unit, false);
/* Fetching textures default sampler configuration and applying
* eGPUSampler State on top. This path exists to support
@ -670,14 +670,14 @@ void MTLStateManager::texture_unbind(Texture *tex_)
gpu::MTLTexture *mtl_tex = static_cast<gpu::MTLTexture *>(tex_);
BLI_assert(mtl_tex);
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
ctx->texture_unbind(mtl_tex);
ctx->texture_unbind(mtl_tex, false);
}
void MTLStateManager::texture_unbind_all()
{
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
ctx->texture_unbind_all();
ctx->texture_unbind_all(false);
}
/** \} */
@ -688,17 +688,30 @@ void MTLStateManager::texture_unbind_all()
void MTLStateManager::image_bind(Texture *tex_, int unit)
{
this->texture_bind(tex_, GPUSamplerState::default_sampler(), unit);
BLI_assert(tex_);
gpu::MTLTexture *mtl_tex = static_cast<gpu::MTLTexture *>(tex_);
BLI_assert(mtl_tex);
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
if (unit >= 0) {
ctx->texture_bind(mtl_tex, unit, true);
}
}
void MTLStateManager::image_unbind(Texture *tex_)
{
this->texture_unbind(tex_);
BLI_assert(tex_);
gpu::MTLTexture *mtl_tex = static_cast<gpu::MTLTexture *>(tex_);
BLI_assert(mtl_tex);
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
ctx->texture_unbind(mtl_tex, true);
}
void MTLStateManager::image_unbind_all()
{
this->texture_unbind_all();
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
ctx->texture_unbind_all(true);
}
/** \} */

View File

@ -4,3 +4,82 @@
/** \file
* \ingroup gpu
*/
#pragma once
#include "MEM_guardedalloc.h"
#include "gpu_storage_buffer_private.hh"
#include "mtl_context.hh"
namespace blender {
namespace gpu {
class MTLUniformBuf;
class MTLVertBuf;
class MTLIndexBuf;
/**
* Implementation of Storage Buffers using Metal.
*/
class MTLStorageBuf : public StorageBuf {
private:
/** Allocation Handle or indirect wrapped instance.
* MTLStorageBuf can wrap a MTLVertBuf, MTLIndexBuf or MTLUniformBuf for binding as a writeable
* resource. */
enum {
MTL_STORAGE_BUF_TYPE_DEFAULT = 0,
MTL_STORAGE_BUF_TYPE_UNIFORMBUF = 1,
MTL_STORAGE_BUF_TYPE_VERTBUF = 2,
MTL_STORAGE_BUF_TYPE_INDEXBUF = 3,
} storage_source_ = MTL_STORAGE_BUF_TYPE_DEFAULT;
union {
/* Own alloation. */
gpu::MTLBuffer *metal_buffer_;
/* Wrapped type. */
MTLUniformBuf *uniform_buffer_;
MTLVertBuf *vertex_buffer_;
MTLIndexBuf *index_buffer_;
};
/* Whether buffer has contents, if false, no GPU buffer will
* have yet been allocated. */
bool has_data_ = false;
/** Bind-state tracking. */
int bind_slot_ = -1;
MTLContext *bound_ctx_ = nullptr;
/** Usage type. */
GPUUsageType usage_;
public:
MTLStorageBuf(size_t size, GPUUsageType usage, const char *name);
~MTLStorageBuf();
MTLStorageBuf(MTLUniformBuf *uniform_buf, size_t size);
MTLStorageBuf(MTLVertBuf *uniform_buf, size_t size);
MTLStorageBuf(MTLIndexBuf *uniform_buf, size_t size);
void update(const void *data) override;
void bind(int slot) override;
void unbind() override;
void clear(uint32_t clear_value) override;
void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) override;
void read(void *data) override;
void init();
id<MTLBuffer> get_metal_buffer();
int get_size();
const char *get_name()
{
return name_;
}
private:
MEM_CXX_CLASS_ALLOC_FUNCS("MTLStorageBuf");
};
} // namespace gpu
} // namespace blender

View File

@ -4,3 +4,302 @@
/** \file
* \ingroup gpu
*/
#include "BLI_string.h"
#include "gpu_backend.hh"
#include "gpu_context_private.hh"
#include "mtl_backend.hh"
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "mtl_index_buffer.hh"
#include "mtl_storage_buffer.hh"
#include "mtl_uniform_buffer.hh"
#include "mtl_vertex_buffer.hh"
namespace blender::gpu {
/* -------------------------------------------------------------------- */
/** \name Creation & Deletion
* \{ */
MTLStorageBuf::MTLStorageBuf(size_t size, GPUUsageType usage, const char *name)
: StorageBuf(size, name)
{
usage_ = usage;
/* Do not create SSBO MTL buffer here to allow allocation from any thread. */
storage_source_ = MTL_STORAGE_BUF_TYPE_DEFAULT;
metal_buffer_ = nullptr;
}
MTLStorageBuf::MTLStorageBuf(MTLUniformBuf *uniform_buf, size_t size)
: StorageBuf(size, "UniformBuffer_as_SSBO")
{
usage_ = GPU_USAGE_DYNAMIC;
storage_source_ = MTL_STORAGE_BUF_TYPE_UNIFORMBUF;
uniform_buffer_ = uniform_buf;
BLI_assert(uniform_buffer_ != nullptr);
}
MTLStorageBuf::MTLStorageBuf(MTLVertBuf *vert_buf, size_t size)
: StorageBuf(size, "VertexBuffer_as_SSBO")
{
usage_ = GPU_USAGE_DYNAMIC;
storage_source_ = MTL_STORAGE_BUF_TYPE_VERTBUF;
vertex_buffer_ = vert_buf;
BLI_assert(vertex_buffer_ != nullptr);
}
MTLStorageBuf::MTLStorageBuf(MTLIndexBuf *index_buf, size_t size)
: StorageBuf(size, "IndexBuffer_as_SSBO")
{
usage_ = GPU_USAGE_DYNAMIC;
storage_source_ = MTL_STORAGE_BUF_TYPE_INDEXBUF;
index_buffer_ = index_buf;
BLI_assert(index_buffer_ != nullptr);
}
MTLStorageBuf::~MTLStorageBuf()
{
if (storage_source_ == MTL_STORAGE_BUF_TYPE_DEFAULT) {
if (metal_buffer_ != nullptr) {
metal_buffer_->free();
metal_buffer_ = nullptr;
}
has_data_ = false;
}
/* Ensure SSBO is not bound to active CTX.
* SSBO bindings are reset upon Context-switch so we do not need
* to check deactivated context's. */
MTLContext *ctx = MTLContext::get();
if (ctx) {
for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) {
MTLStorageBufferBinding &slot = ctx->pipeline_state.ssbo_bindings[i];
if (slot.bound && slot.ssbo == this) {
slot.bound = false;
slot.ssbo = nullptr;
}
}
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Data upload / update
* \{ */
void MTLStorageBuf::init()
{
/* We only need to initialize the storage buffer for default buffer types. */
if (storage_source_ != MTL_STORAGE_BUF_TYPE_DEFAULT) {
return;
}
BLI_assert(this);
BLI_assert(size_in_bytes_ > 0);
/* Allocate MTL buffer */
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
BLI_assert(ctx->device);
UNUSED_VARS_NDEBUG(ctx);
metal_buffer_ = MTLContext::get_global_memory_manager()->allocate(size_in_bytes_, true);
#ifndef NDEBUG
metal_buffer_->set_label([NSString stringWithFormat:@"Storage Buffer %s", name_]);
#endif
BLI_assert(metal_buffer_ != nullptr);
BLI_assert(metal_buffer_->get_metal_buffer() != nil);
has_data_ = false;
}
void MTLStorageBuf::update(const void *data)
{
/* We only need to initialize the storage buffer for default buffer types. */
if (storage_source_ != MTL_STORAGE_BUF_TYPE_DEFAULT) {
return;
}
/* Ensure buffer has been allocated. */
if (metal_buffer_ == nullptr) {
init();
}
BLI_assert(data != nullptr);
if (data != nullptr) {
/* Upload data. */
BLI_assert(data != nullptr);
BLI_assert(!(metal_buffer_->get_resource_options() & MTLResourceStorageModePrivate));
BLI_assert(size_in_bytes_ <= metal_buffer_->get_size());
BLI_assert(size_in_bytes_ <= [metal_buffer_->get_metal_buffer() length]);
memcpy(metal_buffer_->get_host_ptr(), data, size_in_bytes_);
metal_buffer_->flush_range(0, size_in_bytes_);
has_data_ = true;
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Usage
* \{ */
void MTLStorageBuf::bind(int slot)
{
if (slot >= MTL_MAX_BUFFER_BINDINGS) {
fprintf(
stderr,
"Error: Trying to bind \"%s\" ssbo to slot %d which is above the reported limit of %d.\n",
name_,
slot,
MTL_MAX_BUFFER_BINDINGS);
BLI_assert(false);
return;
}
if (metal_buffer_ == nullptr) {
this->init();
}
if (data_ != nullptr) {
this->update(data_);
MEM_SAFE_FREE(data_);
}
/* Bind current UBO to active context. */
MTLContext *ctx = MTLContext::get();
BLI_assert(ctx);
MTLStorageBufferBinding &ctx_ssbo_bind_slot = ctx->pipeline_state.ssbo_bindings[slot];
ctx_ssbo_bind_slot.ssbo = this;
ctx_ssbo_bind_slot.bound = true;
bind_slot_ = slot;
bound_ctx_ = ctx;
}
void MTLStorageBuf::unbind()
{
/* Unbind in debug mode to validate missing binds.
* Otherwise, only perform a full unbind upon destruction
* to ensure no lingering references. */
#ifndef NDEBUG
if (true) {
#else
if (G.debug & G_DEBUG_GPU) {
#endif
if (bound_ctx_ != nullptr && bind_slot_ > -1) {
MTLStorageBufferBinding &ctx_ssbo_bind_slot =
bound_ctx_->pipeline_state.ssbo_bindings[bind_slot_];
if (ctx_ssbo_bind_slot.bound && ctx_ssbo_bind_slot.ssbo == this) {
ctx_ssbo_bind_slot.bound = false;
ctx_ssbo_bind_slot.ssbo = nullptr;
}
}
}
/* Reset bind index. */
bind_slot_ = -1;
bound_ctx_ = nullptr;
}
void MTLStorageBuf::clear(uint32_t clear_value)
{
/* Fetch active context. */
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert_msg(ctx, "Clears should always be performed while a valid context exists.");
if (metal_buffer_ == nullptr) {
this->init();
}
if (ctx) {
id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
[blit_encoder fillBuffer:metal_buffer_->get_metal_buffer()
range:NSMakeRange(0, size_in_bytes_)
value:clear_value];
}
}
void MTLStorageBuf::copy_sub(VertBuf *src_, uint dst_offset, uint src_offset, uint copy_size)
{
/* TODO(Metal): Support Copy sub operation. */
MTL_LOG_WARNING("MTLStorageBuf::copy_sub not yet supported.\n");

"TLStorageBuf" > "TLStorageBuf"

"TLStorageBuf" > "TLStorageBuf"
}
void MTLStorageBuf::read(void *data)
{
if (data == nullptr) {
return;
}
if (metal_buffer_ == nullptr) {
this->init();
}
/* Managed buffers need to be explicitly flushed back to host. */
if (metal_buffer_->get_resource_options() & MTLResourceStorageModeManaged) {
/* Fetch active context. */
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
/* Ensure GPU updates are flushed back to CPU. */
id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
[blit_encoder synchronizeResource:metal_buffer_->get_metal_buffer()];
/* Ensure sync has occured. */
GPU_finish();
}
/* Read data. NOTE: Unless explicitly synchronized with GPU work, results may not be ready. */
memcpy(data, metal_buffer_->get_host_ptr(), size_in_bytes_);
}
id<MTLBuffer> MTLStorageBuf::get_metal_buffer()
{
gpu::MTLBuffer *source_buffer = nullptr;
switch (storage_source_) {
/* Default SSBO buffer comes from own allocation. */
case MTL_STORAGE_BUF_TYPE_DEFAULT: {
if (metal_buffer_ == nullptr) {
this->init();
}
if (data_ != nullptr) {
this->update(data_);
MEM_SAFE_FREE(data_);
}
source_buffer = metal_buffer_;
} break;
/* SSBO buffer comes from Uniform Buffer. */
case MTL_STORAGE_BUF_TYPE_UNIFORMBUF: {
source_buffer = uniform_buffer_->metal_buffer_;
} break;
/* SSBO buffer comes from Vertex Buffer. */
case MTL_STORAGE_BUF_TYPE_VERTBUF: {
source_buffer = vertex_buffer_->vbo_;
} break;
/* SSBO buffer comes from Index Buffer. */
case MTL_STORAGE_BUF_TYPE_INDEXBUF: {
source_buffer = index_buffer_->ibo_;
} break;
}
/* Return Metal allocation handle and flag as used. */
BLI_assert(source_buffer != nullptr);
source_buffer->debug_ensure_used();
return source_buffer->get_metal_buffer();
}
int MTLStorageBuf::get_size()
{
BLI_assert(this);
return size_in_bytes_;
}
} // namespace blender::gpu

View File

@ -13,10 +13,14 @@
namespace blender::gpu {
class MTLStorageBuf;
/**
* Implementation of Uniform Buffers using Metal.
**/
class MTLUniformBuf : public UniformBuf {
friend class MTLStorageBuf; /* For bind as SSBO resource access. */
private:
/* Allocation Handle. */
gpu::MTLBuffer *metal_buffer_ = nullptr;
@ -29,6 +33,9 @@ class MTLUniformBuf : public UniformBuf {
int bind_slot_ = -1;
MTLContext *bound_ctx_ = nullptr;
/* SSBO wrapper for bind_as_ssbo support. */
MTLStorageBuf *ssbo_wrapper_ = nullptr;
public:
MTLUniformBuf(size_t size, const char *name);
~MTLUniformBuf();
@ -39,7 +46,7 @@ class MTLUniformBuf : public UniformBuf {
void unbind() override;
void clear_to_zero() override;
id<MTLBuffer> get_metal_buffer(int *r_offset);
id<MTLBuffer> get_metal_buffer();
int get_size();
const char *get_name()
{

View File

@ -14,6 +14,7 @@
#include "mtl_backend.hh"
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "mtl_storage_buffer.hh"
#include "mtl_uniform_buffer.hh"
namespace blender::gpu {
@ -33,7 +34,7 @@ MTLUniformBuf::~MTLUniformBuf()
* to check deactivated context's. */
MTLContext *ctx = MTLContext::get();
if (ctx) {
for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) {
MTLUniformBufferBinding &slot = ctx->pipeline_state.ubo_bindings[i];
if (slot.bound && slot.ubo == this) {
slot.bound = false;
@ -41,6 +42,11 @@ MTLUniformBuf::~MTLUniformBuf()
}
}
}
if (ssbo_wrapper_) {
delete ssbo_wrapper_;
ssbo_wrapper_ = nullptr;
}
}
void MTLUniformBuf::update(const void *data)
@ -67,7 +73,9 @@ void MTLUniformBuf::update(const void *data)
size_in_bytes_, true, data);
has_data_ = true;
metal_buffer_->set_label(@"Uniform Buffer");
#ifndef NDEBUG
metal_buffer_->set_label([NSString stringWithFormat:@"Uniform Buffer %s", name_]);
#endif
BLI_assert(metal_buffer_ != nullptr);
BLI_assert(metal_buffer_->get_metal_buffer() != nil);
}
@ -94,7 +102,7 @@ void MTLUniformBuf::bind(int slot)
return;
}
BLI_assert(slot < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
BLI_assert(slot < MTL_MAX_BUFFER_BINDINGS);
/* Bind current UBO to active context. */
MTLContext *ctx = MTLContext::get();
@ -126,7 +134,25 @@ void MTLUniformBuf::bind_as_ssbo(int slot)
return;
}
BLI_assert_msg(0, "Not implemented yet");
/* We need to ensure data is actually allocated if using as an SSBO, as resource may be written
* to. */
if (metal_buffer_ == nullptr) {
/* Check if we have any deferred data to upload. */
if (data_ != nullptr) {
this->update(data_);
MEM_SAFE_FREE(data_);
}
else {
this->clear_to_zero();
}
}
/* Create MTLStorageBuffer to wrap this resource and use conventional binding. */
if (ssbo_wrapper_ == nullptr) {
ssbo_wrapper_ = new MTLStorageBuf(this, size_in_bytes_);
}
ssbo_wrapper_->bind(slot);
}
void MTLUniformBuf::unbind()
@ -154,19 +180,14 @@ void MTLUniformBuf::unbind()
bound_ctx_ = nullptr;
}
id<MTLBuffer> MTLUniformBuf::get_metal_buffer(int *r_offset)
id<MTLBuffer> MTLUniformBuf::get_metal_buffer()
{
BLI_assert(this);
*r_offset = 0;
if (metal_buffer_ != nullptr && has_data_) {
*r_offset = 0;
metal_buffer_->debug_ensure_used();
return metal_buffer_->get_metal_buffer();
}
else {
*r_offset = 0;
return nil;
}
return nil;
}
int MTLUniformBuf::get_size()

View File

@ -22,7 +22,8 @@ class MTLVertBuf : public VertBuf {
friend class gpu::MTLTexture; /* For buffer texture. */
friend class MTLShader; /* For transform feedback. */
friend class MTLBatch;
friend class MTLContext; /* For transform feedback. */
friend class MTLContext; /* For transform feedback. */
friend class MTLStorageBuf; /* For bind as SSBO resource access. */
private:
/** Metal buffer allocation. **/
@ -37,6 +38,8 @@ class MTLVertBuf : public VertBuf {
uint64_t alloc_size_ = 0;
/** Whether existing allocation has been submitted for use by the GPU. */
bool contents_in_flight_ = false;
/* SSBO wrapper for bind_as_ssbo support. */
MTLStorageBuf *ssbo_wrapper_ = nullptr;
/* Fetch Metal buffer and offset into allocation if necessary.
* Access limited to friend classes. */

View File

@ -5,6 +5,7 @@
*/
#include "mtl_vertex_buffer.hh"
#include "mtl_debug.hh"
#include "mtl_storage_buffer.hh"
namespace blender::gpu {
@ -48,6 +49,11 @@ void MTLVertBuf::release_data()
GPU_TEXTURE_FREE_SAFE(buffer_texture_);
MEM_SAFE_FREE(data);
if (ssbo_wrapper_) {
delete ssbo_wrapper_;
ssbo_wrapper_ = nullptr;
}
}
void MTLVertBuf::duplicate_data(VertBuf *dst_)
@ -253,7 +259,7 @@ void MTLVertBuf::bind()
void MTLVertBuf::update_sub(uint start, uint len, const void *data)
{
/* Fetch and verify active context. */
MTLContext *ctx = reinterpret_cast<MTLContext *>(unwrap(GPU_context_active_get()));
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
BLI_assert(ctx->device);
@ -292,10 +298,16 @@ void MTLVertBuf::update_sub(uint start, uint len, const void *data)
void MTLVertBuf::bind_as_ssbo(uint binding)
{
/* TODO(Metal): Support binding of buffers as SSBOs.
* Pending overall compute support for Metal backend. */
MTL_LOG_WARNING("MTLVertBuf::bind_as_ssbo not yet implemented!\n");
this->flag_used();
/* Ensure resource is initialized. */
this->bind();
/* Create MTLStorageBuffer to wrap this resource and use conventional binding. */
if (ssbo_wrapper_ == nullptr) {
ssbo_wrapper_ = new MTLStorageBuf(this, alloc_size_);
}
ssbo_wrapper_->bind(binding);
}
void MTLVertBuf::bind_as_texture(uint binding)

View File

@ -189,13 +189,14 @@ template<typename T> T atomicXor(device T &mem, T data)
return atomic_fetch_xor_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
/* Unblock texture atomic compilation.
* TODO(Metal): This is not correct for global atomic behaviour, but will be safe within a single thread.
/* Unblock texture atomic compilation.
* TODO(Metal): This is not correct for global atomic behaviour, but will be safe within a single
* thread.
* We need to re-visit the solution for this use-case and use a 2D texture buffer instead. */
#define imageAtomicMin(tex, coord, data) \
uint val = _texelFetch_internal(tex, coord, 0).r;\
_texture_write_internal(tex, coord, uint4((val < data) ? val : data));\
tex.texture->fence();
uint val = _texelFetch_internal(tex, coord, 0).r; \
_texture_write_internal(tex, coord, uint4((val < data) ? val : data)); \
tex.texture->fence();
/* Used to replace 'out' in function parameters with threadlocal reference
* shortened to avoid expanding the glsl source string. */
@ -1186,17 +1187,16 @@ inline float4 uintBitsToFloat(uint4 f)
template<typename T> T findLSB(T x)
{
/* ctz returns the number of trailing zeroes. To fetch the index of the LSB, we can also use this
* value as index, however need to filter out the case where the input value is zero to match
* value as index, however we need to filter out the case where the input value is zero to match
* GLSL functionality. */
return (x == T(0)) ? T(-1) : T(ctz(x));
}
template<typename T> T findMSB(T x)
{
/* clz returns the number of leading zeroes. To fetch the index of the LSB, we can also use this
* value as index when offset by 1. however need to filter out the case where the input value is
* zero to match GLSL functionality. 000000010*/
return (x == T(0)) ? T(-1) : (clz(T(0)) - clz(x) - T(1));
/* clz returns the number of leading zeroes. To fetch the index of the MSB, we can also use this
* value as index when offset by 1. */
return (sizeof(T) * 8) - T(1) - clz(x);
}
/* Texture size functions. Add texture types as needed. */

View File

@ -15,12 +15,6 @@
#define depthCubeArray samplerCubeArray
#define depth2DArrayShadow sampler2DArrayShadow
/* Memory scope and pass by reference types.
* NOTE: These are required by Metal, but are not required in all cases by GLSL. */
#define device
#define threadgroup
#define OUT(type, name, array_len) out type name[array_len]
/* Backend Functions. */
#define select(A, B, mask) mix(A, B, mask)