diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 675d7c22b81..e6c46a23e4e 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -275,6 +275,7 @@ set(METAL_SRC metal/mtl_shader_generator.mm metal/mtl_shader_interface.mm metal/mtl_state.mm + metal/mtl_storage_buffer.mm metal/mtl_texture.mm metal/mtl_texture_util.mm metal/mtl_uniform_buffer.mm @@ -300,6 +301,7 @@ set(METAL_SRC metal/mtl_shader_interface_type.hh metal/mtl_shader_shared.h metal/mtl_state.hh + metal/mtl_storage_buffer.hh metal/mtl_texture.hh metal/mtl_uniform_buffer.hh metal/mtl_vertex_buffer.hh diff --git a/source/blender/gpu/metal/mtl_backend.hh b/source/blender/gpu/metal/mtl_backend.hh index b2ba5ce0fbf..06938cf7a60 100644 --- a/source/blender/gpu/metal/mtl_backend.hh +++ b/source/blender/gpu/metal/mtl_backend.hh @@ -52,11 +52,7 @@ class MTLBackend : public GPUBackend { void samplers_update() override; void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) override; - - void compute_dispatch_indirect(StorageBuf *indirect_buf) override - { - /* Placeholder */ - } + void compute_dispatch_indirect(StorageBuf *indirect_buf) override; /* MTL Allocators need to be implemented in separate .mm files, due to allocation of Objective-C * objects. */ diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm index c4a3f38b83b..6d2289fcc71 100644 --- a/source/blender/gpu/metal/mtl_backend.mm +++ b/source/blender/gpu/metal/mtl_backend.mm @@ -16,6 +16,7 @@ #include "mtl_index_buffer.hh" #include "mtl_query.hh" #include "mtl_shader.hh" +#include "mtl_storage_buffer.hh" #include "mtl_uniform_buffer.hh" #include "mtl_vertex_buffer.hh" @@ -100,8 +101,7 @@ UniformBuf *MTLBackend::uniformbuf_alloc(int size, const char *name) StorageBuf *MTLBackend::storagebuf_alloc(int size, GPUUsageType usage, const char *name) { - /* TODO(Metal): Implement MTLStorageBuf. */ - return nullptr; + return new MTLStorageBuf(size, usage, name); } VertBuf *MTLBackend::vertbuf_alloc() @@ -398,16 +398,15 @@ void MTLBackend::capabilities_init(MTLContext *ctx) GCaps.shader_image_load_store_support = ([device supportsFamily:MTLGPUFamilyApple3] || MTLBackend::capabilities.supports_family_mac1 || MTLBackend::capabilities.supports_family_mac2); - /* TODO(Metal): Add support? */ - GCaps.shader_draw_parameters_support = false; GCaps.compute_shader_support = true; + GCaps.shader_storage_buffer_objects_support = true; + GCaps.shader_draw_parameters_support = true; + GCaps.geometry_shader_support = false; - GCaps.shader_storage_buffer_objects_support = - false; /* TODO(Metal): implement Storage Buffer support. */ /* Maximum buffer bindings: 31. Consider required slot for uniforms/UBOs/Vertex attributes. * Can use argument buffers if a higher limit is required. */ - GCaps.max_shader_storage_buffer_bindings = 24; + GCaps.max_shader_storage_buffer_bindings = 14; if (GCaps.compute_shader_support) { GCaps.max_work_group_count[0] = 65535; @@ -460,6 +459,18 @@ void MTLBackend::compute_dispatch(int groups_x_len, int groups_y_len, int groups } } +void MTLBackend::compute_dispatch_indirect(StorageBuf *indirect_buf) +{ + /* Fetch Context. + * With Metal, workload submission and resource management occurs within the context. + * Call compute dispatch on valid context. */ + MTLContext *ctx = MTLContext::get(); + BLI_assert(ctx != nullptr); + if (ctx) { + ctx->compute_dispatch_indirect(indirect_buf); + } +} + /** \} */ } // blender::gpu diff --git a/source/blender/gpu/metal/mtl_batch.hh b/source/blender/gpu/metal/mtl_batch.hh index 107fbe91c9c..1eed080e27e 100644 --- a/source/blender/gpu/metal/mtl_batch.hh +++ b/source/blender/gpu/metal/mtl_batch.hh @@ -80,10 +80,7 @@ class MTLBatch : public Batch { ~MTLBatch(){}; void draw(int v_first, int v_count, int i_first, int i_count) override; - void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override - { - /* TODO(Metal): Support indirect draw commands. */ - } + void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override; void multi_draw_indirect(GPUStorageBuf *indirect_buf, int count, intptr_t offset, @@ -94,7 +91,7 @@ class MTLBatch : public Batch { /* Returns an initialized RenderComandEncoder for drawing if all is good. * Otherwise, nil. */ - id bind(uint v_first, uint v_count, uint i_first, uint i_count); + id bind(uint v_count); void unbind(); /* Convenience getters. */ @@ -118,6 +115,7 @@ class MTLBatch : public Batch { private: void shader_bind(); void draw_advanced(int v_first, int v_count, int i_first, int i_count); + void draw_advanced_indirect(GPUStorageBuf *indirect_buf, intptr_t offset); int prepare_vertex_binding(MTLVertBuf *verts, MTLRenderPipelineStateDescriptor &desc, const MTLShaderInterface *interface, @@ -126,8 +124,7 @@ class MTLBatch : public Batch { id get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_type, uint32_t &v_count); - void prepare_vertex_descriptor_and_bindings( - MTLVertBuf **buffers, int &num_buffers, int v_first, int v_count, int i_first, int i_count); + void prepare_vertex_descriptor_and_bindings(MTLVertBuf **buffers, int &num_buffers); MEM_CXX_CLASS_ALLOC_FUNCS("MTLBatch"); }; diff --git a/source/blender/gpu/metal/mtl_batch.mm b/source/blender/gpu/metal/mtl_batch.mm index d93ced7b650..707f997cfb0 100644 --- a/source/blender/gpu/metal/mtl_batch.mm +++ b/source/blender/gpu/metal/mtl_batch.mm @@ -20,6 +20,7 @@ #include "mtl_debug.hh" #include "mtl_index_buffer.hh" #include "mtl_shader.hh" +#include "mtl_storage_buffer.hh" #include "mtl_vertex_buffer.hh" #include @@ -37,6 +38,14 @@ void MTLBatch::draw(int v_first, int v_count, int i_first, int i_count) this->draw_advanced(v_first, v_count, i_first, i_count); } +void MTLBatch::draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) +{ + if (this->flag & GPU_BATCH_INVALID) { + this->shader_in_use_ = false; + } + this->draw_advanced_indirect(indirect_buf, offset); +} + void MTLBatch::shader_bind() { if (active_shader_ && active_shader_->is_valid()) { @@ -394,7 +403,7 @@ int MTLBatch::prepare_vertex_binding(MTLVertBuf *verts, return -1; } -id MTLBatch::bind(uint v_first, uint v_count, uint i_first, uint i_count) +id MTLBatch::bind(uint v_count) { /* Setup draw call and render pipeline state here. Called by every draw, but setup here so that * MTLDrawList only needs to perform setup a single time. */ @@ -440,7 +449,7 @@ id MTLBatch::bind(uint v_first, uint v_count, uint i_fi * shader's input. * A unique vertex descriptor will result in a new PipelineStateObject * being generated for the currently bound shader. */ - prepare_vertex_descriptor_and_bindings(buffers, num_buffers, v_first, v_count, i_first, i_count); + prepare_vertex_descriptor_and_bindings(buffers, num_buffers); /* Prepare Vertex Buffers - Run before RenderCommandEncoder in case BlitCommandEncoder buffer * data operations are required. */ @@ -585,8 +594,7 @@ id MTLBatch::bind(uint v_first, uint v_count, uint i_fi void MTLBatch::unbind() {} -void MTLBatch::prepare_vertex_descriptor_and_bindings( - MTLVertBuf **buffers, int &num_buffers, int v_first, int v_count, int i_first, int i_count) +void MTLBatch::prepare_vertex_descriptor_and_bindings(MTLVertBuf **buffers, int &num_buffers) { /* Here we populate the MTLContext vertex descriptor and resolve which buffers need to be bound. @@ -743,8 +751,8 @@ void MTLBatch::draw_advanced(int v_first, int v_count, int i_first, int i_count) #endif /* Setup RenderPipelineState for batch. */ - MTLContext *ctx = reinterpret_cast(GPU_context_active_get()); - id rec = this->bind(v_first, v_count, i_first, i_count); + MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); + id rec = this->bind(v_count); if (rec == nil) { return; } @@ -880,6 +888,84 @@ void MTLBatch::draw_advanced(int v_first, int v_count, int i_first, int i_count) this->unbind(); } +void MTLBatch::draw_advanced_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) +{ + /* Setup RenderPipelineState for batch. */ + MTLContext *ctx = reinterpret_cast(GPU_context_active_get()); + id rec = this->bind(0); + if (rec == nil) { + printf("Failed to open Render Command encoder for DRAW INDIRECT\n"); + return; + } + + /* Render using SSBO Vertex Fetch not supported by Draw Indirect. + * NOTE: Add support? */ + if (active_shader_->get_uses_ssbo_vertex_fetch()) { + printf("Draw indirect for SSBO vertex fetch disabled\n"); + return; + } + + /* Fetch IndexBuffer and resolve primitive type. */ + MTLIndexBuf *mtl_elem = static_cast(reinterpret_cast(this->elem)); + MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type); + + if (mtl_needs_topology_emulation(this->prim_type)) { + BLI_assert_msg(false, "Metal Topology emulation unsupported for draw indirect.\n"); + return; + } + + /* Fetch indirect buffer Metal handle. */ + MTLStorageBuf *mtlssbo = static_cast(unwrap(indirect_buf)); + id mtl_indirect_buf = mtlssbo->get_metal_buffer(); + BLI_assert(mtl_indirect_buf != nil); + if (mtl_indirect_buf == nil) { + MTL_LOG_WARNING("Metal Indirect Draw Storage Buffer is nil.\n"); + return; + } + + if (mtl_elem == NULL) { + /* Set depth stencil state (requires knowledge of primitive type). */ + ctx->ensure_depth_stencil_state(mtl_prim_type); + + /* Issue draw call. */ + [rec drawPrimitives:mtl_prim_type indirectBuffer:mtl_indirect_buf indirectBufferOffset:offset]; + ctx->main_command_buffer.register_draw_counters(1); + } + else { + /* Fetch index buffer. May return an index buffer of a differing format, + * if index buffer optimization is used. In these cases, final_prim_type and + * index_count get updated with the new properties. */ + MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_); + GPUPrimType final_prim_type = this->prim_type; + uint index_count = 0; + + id index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count); + mtl_prim_type = gpu_prim_type_to_metal(final_prim_type); + BLI_assert(index_buffer != nil); + + if (index_buffer != nil) { + + /* Set depth stencil state (requires knowledge of primitive type). */ + ctx->ensure_depth_stencil_state(mtl_prim_type); + + /* Issue draw call. */ + [rec drawIndexedPrimitives:mtl_prim_type + indexType:index_type + indexBuffer:index_buffer + indexBufferOffset:0 + indirectBuffer:mtl_indirect_buf + indirectBufferOffset:offset]; + ctx->main_command_buffer.register_draw_counters(1); + } + else { + BLI_assert_msg(false, "Index buffer does not have backing Metal buffer"); + } + } + + /* End of draw. */ + this->unbind(); +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -1010,4 +1096,4 @@ id MTLBatch::get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_typ /** \} */ -} // blender::gpu +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_capabilities.hh b/source/blender/gpu/metal/mtl_capabilities.hh index f236bae4e92..0a71716851c 100644 --- a/source/blender/gpu/metal/mtl_capabilities.hh +++ b/source/blender/gpu/metal/mtl_capabilities.hh @@ -17,17 +17,17 @@ namespace gpu { /* Max limit without using bind-less for samplers. */ #define MTL_MAX_DEFAULT_SAMPLERS 16 /* Total maximum buffers which can be bound to an encoder, for use within a shader. - * MTL_MAX_UNIFORM_BUFFER_BINDINGS + MTL_MAX_STORAGE_BUFFER_BINDINGS must be <= - * than MTL_MAX_BUFFER_BINDINGS. */ + * Uniform buffers and storage buffers share the set of available bind buffers. + * The total number of buffer bindings must be <= MTL_MAX_BUFFER_BINDINGS + * We also require an additional 3 core buffers for: + * - Argument buffer for bindless resources (e.g. samplers) + * - Transform feedback buffer + * - Default push constant block + * Along with up to 6+1 buffers for vertex data, and index data. */ #define MTL_MAX_BUFFER_BINDINGS 31 -#define MTL_MAX_UNIFORM_BUFFER_BINDINGS 16 -#define MTL_MAX_STORAGE_BUFFER_BINDINGS 12 #define MTL_MAX_VERTEX_INPUT_ATTRIBUTES 31 #define MTL_MAX_UNIFORMS_PER_BLOCK 64 -static_assert((MTL_MAX_UNIFORM_BUFFER_BINDINGS + MTL_MAX_STORAGE_BUFFER_BINDINGS) <= - MTL_MAX_BUFFER_BINDINGS); - /* Context-specific limits -- populated in 'MTLBackend::platform_init' */ struct MTLCapabilities { diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh index 1d2b73a4ade..d4766ade61f 100644 --- a/source/blender/gpu/metal/mtl_context.hh +++ b/source/blender/gpu/metal/mtl_context.hh @@ -46,6 +46,7 @@ namespace blender::gpu { class MTLContext; class MTLCommandBufferManager; class MTLUniformBuf; +class MTLStorageBuf; /* Structs containing information on current binding state for textures and samplers. */ struct MTLTextureBinding { @@ -436,6 +437,11 @@ struct MTLUniformBufferBinding { MTLUniformBuf *ubo; }; +struct MTLStorageBufferBinding { + bool bound; + MTLStorageBuf *ssbo; +}; + struct MTLContextGlobalShaderPipelineState { bool initialised; @@ -455,12 +461,18 @@ struct MTLContextGlobalShaderPipelineState { MTLShader *active_shader; /* Global Uniform Buffers. */ - MTLUniformBufferBinding ubo_bindings[MTL_MAX_UNIFORM_BUFFER_BINDINGS]; + MTLUniformBufferBinding ubo_bindings[MTL_MAX_BUFFER_BINDINGS]; + + /* Storage buffer. */ + MTLStorageBufferBinding ssbo_bindings[MTL_MAX_BUFFER_BINDINGS]; /* Context Texture bindings. */ MTLTextureBinding texture_bindings[MTL_MAX_TEXTURE_SLOTS]; MTLSamplerBinding sampler_bindings[MTL_MAX_SAMPLER_SLOTS]; + /* Image bindings. */ + MTLTextureBinding image_bindings[MTL_MAX_TEXTURE_SLOTS]; + /*** --- Render Pipeline State --- ***/ /* Track global render pipeline state for the current context. The functions in GPU_state.h * modify these parameters. Certain values, tagged [PSO], are parameters which are required to be @@ -771,10 +783,10 @@ class MTLContext : public Context { MTLFrameBuffer *get_default_framebuffer(); /* Context Global-State Texture Binding. */ - void texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit); + void texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit, bool is_image); void sampler_bind(MTLSamplerState, uint sampler_unit); - void texture_unbind(gpu::MTLTexture *mtl_texture); - void texture_unbind_all(); + void texture_unbind(gpu::MTLTexture *mtl_texture, bool is_image); + void texture_unbind_all(bool is_image); void sampler_state_cache_init(); id get_sampler_from_state(MTLSamplerState state); id get_default_sampler_state(); @@ -822,6 +834,7 @@ class MTLContext : public Context { /* Compute. */ bool ensure_compute_pipeline_state(); void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len); + void compute_dispatch_indirect(StorageBuf *indirect_buf); /* State assignment. */ void set_viewport(int origin_x, int origin_y, int width, int height); diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm index 37e948766db..d0b761ca920 100644 --- a/source/blender/gpu/metal/mtl_context.mm +++ b/source/blender/gpu/metal/mtl_context.mm @@ -12,6 +12,7 @@ #include "mtl_shader.hh" #include "mtl_shader_interface.hh" #include "mtl_state.hh" +#include "mtl_storage_buffer.hh" #include "mtl_uniform_buffer.hh" #include "mtl_vertex_buffer.hh" @@ -20,6 +21,7 @@ #include "GPU_capabilities.h" #include "GPU_matrix.h" #include "GPU_shader.h" +#include "GPU_storage_buffer.h" #include "GPU_texture.h" #include "GPU_uniform_buffer.h" #include "GPU_vertex_buffer.h" @@ -33,6 +35,27 @@ using namespace blender; using namespace blender::gpu; +/* Debug option to bind null buffer for missing UBOs. + * Enabled by default. TODO: Ensure all required UBO bindings are present. */ +#define DEBUG_BIND_NULL_BUFFER_FOR_MISSING_UBO 1 + +/* Debug option to bind null buffer for missing SSBOs. NOTE: This is unsafe if replacing a + * write-enabled SSBO and should only be used for debugging to identify binding-related issues. */ +#define DEBUG_BIND_NULL_BUFFER_FOR_MISSING_SSBO 0 + +/* Error or warning depending on debug flag. */ +#if DEBUG_BIND_NULL_BUFFER_FOR_MISSING_UBO == 1 +# define MTL_LOG_UBO_ERROR MTL_LOG_WARNING +#else +# define MTL_LOG_UBO_ERROR MTL_LOG_ERROR +#endif + +#if DEBUG_BIND_NULL_BUFFER_FOR_MISSING_SSBO == 1 +# define MTL_LOG_SSBO_ERROR MTL_LOG_WARNING +#else +# define MTL_LOG_SSBO_ERROR MTL_LOG_ERROR +#endif + namespace blender::gpu { /* Global memory manager. */ @@ -265,11 +288,11 @@ MTLContext::~MTLContext() /* Release update/blit shaders. */ this->get_texture_utils().cleanup(); - /* Detach resource references */ + /* Detach resource references. */ GPU_texture_unbind_all(); - /* Unbind UBOs */ - for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) { + /* Unbind UBOs. */ + for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) { if (this->pipeline_state.ubo_bindings[i].bound && this->pipeline_state.ubo_bindings[i].ubo != nullptr) { @@ -279,7 +302,16 @@ MTLContext::~MTLContext() } } - /* Release Dummy resources */ + /* Unbind SSBOs. */ + for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) { + if (this->pipeline_state.ssbo_bindings[i].bound && + this->pipeline_state.ssbo_bindings[i].ssbo != nullptr) + { + this->pipeline_state.ssbo_bindings[i].ssbo->unbind(); + } + } + + /* Release Dummy resources. */ this->free_dummy_resources(); /* Release Sampler States. */ @@ -371,7 +403,7 @@ void MTLContext::activate() } /* Reset UBO bind state. */ - for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) { + for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) { if (this->pipeline_state.ubo_bindings[i].bound && this->pipeline_state.ubo_bindings[i].ubo != nullptr) { @@ -380,6 +412,16 @@ void MTLContext::activate() } } + /* Reset SSBO bind state. */ + for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) { + if (this->pipeline_state.ssbo_bindings[i].bound && + this->pipeline_state.ssbo_bindings[i].ssbo != nullptr) + { + this->pipeline_state.ssbo_bindings[i].bound = false; + this->pipeline_state.ssbo_bindings[i].ssbo = nullptr; + } + } + /* Ensure imm active. */ immActivate(); } @@ -670,17 +712,27 @@ void MTLContext::pipeline_state_init() /* Clear bindings state. */ for (int t = 0; t < GPU_max_textures(); t++) { + /* Textures. */ this->pipeline_state.texture_bindings[t].used = false; this->pipeline_state.texture_bindings[t].slot_index = -1; this->pipeline_state.texture_bindings[t].texture_resource = nullptr; + + /* Images. */ + this->pipeline_state.image_bindings[t].used = false; + this->pipeline_state.image_bindings[t].slot_index = -1; + this->pipeline_state.image_bindings[t].texture_resource = nullptr; } for (int s = 0; s < MTL_MAX_SAMPLER_SLOTS; s++) { this->pipeline_state.sampler_bindings[s].used = false; } - for (int u = 0; u < MTL_MAX_UNIFORM_BUFFER_BINDINGS; u++) { + for (int u = 0; u < MTL_MAX_BUFFER_BINDINGS; u++) { this->pipeline_state.ubo_bindings[u].bound = false; this->pipeline_state.ubo_bindings[u].ubo = nullptr; } + for (int u = 0; u < MTL_MAX_BUFFER_BINDINGS; u++) { + this->pipeline_state.ssbo_bindings[u].bound = false; + this->pipeline_state.ssbo_bindings[u].ssbo = nullptr; + } } /*** State defaults -- restored by GPU_state_init. ***/ @@ -1052,7 +1104,7 @@ bool MTLContext::ensure_uniform_buffer_bindings( rps.last_bound_shader_state.pso_index_ != pipeline_state_instance->shader_pso_index); - const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block(); + const MTLShaderBufferBlock &push_constant_block = shader_interface->get_push_constant_block(); if (push_constant_block.size > 0) { /* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index @@ -1088,25 +1140,22 @@ bool MTLContext::ensure_uniform_buffer_bindings( * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global, * and not owned by the shader instance. */ for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) { - const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index); + const MTLShaderBufferBlock &ubo = shader_interface->get_uniform_block(ubo_index); - if (ubo.buffer_index >= 0) { - - /* Uniform Buffer index offset by 1 as the first shader buffer binding slot is reserved for - * the uniform PushConstantBlock. */ - const uint32_t buffer_index = ubo.buffer_index + 1; - int ubo_offset = 0; + if (ubo.buffer_index >= 0 && ubo.location >= 0) { + /* Explicit lookup location for UBO in bind table. */ + const uint32_t ubo_location = ubo.location; + /* buffer(N) index of where to bind the UBO. */ + const uint32_t buffer_index = ubo.buffer_index; id ubo_buffer = nil; int ubo_size = 0; bool bind_dummy_buffer = false; - if (this->pipeline_state.ubo_bindings[ubo.buffer_index].bound) { + if (this->pipeline_state.ubo_bindings[ubo_location].bound) { /* Fetch UBO global-binding properties from slot. */ - ubo_offset = 0; - ubo_buffer = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_metal_buffer( - &ubo_offset); - ubo_size = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_size(); + ubo_buffer = this->pipeline_state.ubo_bindings[ubo_location].ubo->get_metal_buffer(); + ubo_size = this->pipeline_state.ubo_bindings[ubo_location].ubo->get_size(); /* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid * allocating zero buffers. */ @@ -1145,13 +1194,15 @@ bool MTLContext::ensure_uniform_buffer_bindings( /* If ubo size is smaller than the size expected by the shader, we need to bind the * dummy buffer, which will be big enough, to avoid an OOB error. */ if (ubo_size < expected_size) { - MTL_LOG_INFO( - "[Error][UBO] UBO (UBO Name: %s) bound at index: %d with size %d (Expected size " + MTL_LOG_UBO_ERROR( + "[Error][UBO] UBO (UBO Name: %s) bound at location: %d (buffer[[%d]]) with size " + "%d (Expected size " "%d) (Shader Name: %s) is too small -- binding NULL buffer. This is likely an " "over-binding, which is not used, but we need this to avoid validation " "issues\n", shader_interface->get_name_at_offset(ubo.name_offset), - buffer_index, + ubo_location, + pipeline_state_instance->base_uniform_buffer_index + buffer_index, ubo_size, expected_size, shader_interface->get_name()); @@ -1161,18 +1212,19 @@ bool MTLContext::ensure_uniform_buffer_bindings( } } else { - MTL_LOG_INFO( - "[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but " + MTL_LOG_UBO_ERROR( + "[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer slot: %d " + "(buffer[[%d]])-- but " "nothing was bound -- binding dummy buffer\n", shader_interface->get_name(), shader_interface->get_name_at_offset(ubo.name_offset), - buffer_index); + ubo_location, + pipeline_state_instance->base_uniform_buffer_index + buffer_index); bind_dummy_buffer = true; } if (bind_dummy_buffer) { /* Perform Dummy binding. */ - ubo_offset = 0; ubo_buffer = this->get_null_buffer(); ubo_size = [ubo_buffer length]; } @@ -1185,17 +1237,17 @@ bool MTLContext::ensure_uniform_buffer_bindings( /* Bind Vertex UBO. */ if (bool(ubo.stage_mask & ShaderStage::VERTEX)) { BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS); - rps.bind_vertex_buffer(ubo_buffer, ubo_offset, buffer_bind_index); + rps.bind_vertex_buffer(ubo_buffer, 0, buffer_bind_index); } /* Bind Fragment UBOs. */ if (bool(ubo.stage_mask & ShaderStage::FRAGMENT)) { BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS); - rps.bind_fragment_buffer(ubo_buffer, ubo_offset, buffer_bind_index); + rps.bind_fragment_buffer(ubo_buffer, 0, buffer_bind_index); } } else { - MTL_LOG_WARNING( + MTL_LOG_UBO_ERROR( "[UBO] Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer " "is NULL!\n", shader_interface->get_name(), @@ -1204,6 +1256,79 @@ bool MTLContext::ensure_uniform_buffer_bindings( } } } + + /* Bind Global GPUStorageBuf's */ + /* Iterate through expected SSBOs in the shader interface, and check if the globally bound ones + * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global, + * and not owned by the shader instance. */ + for (const uint ssbo_index : IndexRange(shader_interface->get_total_storage_blocks())) { + const MTLShaderBufferBlock &ssbo = shader_interface->get_storage_block(ssbo_index); + + if (ssbo.buffer_index >= 0 && ssbo.location >= 0) { + /* Explicit lookup location for SSBO in bind table.*/ + const uint32_t ssbo_location = ssbo.location; + /* buffer(N) index of where to bind the SSBO. */ + const uint32_t buffer_index = ssbo.buffer_index; + id ssbo_buffer = nil; + int ssbo_size = 0; + UNUSED_VARS_NDEBUG(ssbo_size); + + if (this->pipeline_state.ssbo_bindings[ssbo_location].bound) { + + /* Fetch SSBO global-binding properties from slot. */ + ssbo_buffer = this->pipeline_state.ssbo_bindings[ssbo_location].ssbo->get_metal_buffer(); + ssbo_size = this->pipeline_state.ssbo_bindings[ssbo_location].ssbo->get_size(); + + /* For SSBOs, we always need to ensure the buffer exists, as it may be written to. */ + BLI_assert(ssbo_buffer != nil); + BLI_assert(ssbo_size > 0); + } + else { + MTL_LOG_SSBO_ERROR( + "[Error][SSBO] Shader '%s' expected SSBO '%s' to be bound at buffer location: %d " + "(buffer[[%d]]) -- " + "but " + "nothing was bound.\n", + shader_interface->get_name(), + shader_interface->get_name_at_offset(ssbo.name_offset), + ssbo_location, + pipeline_state_instance->base_storage_buffer_index + buffer_index); + +#if DEBUG_BIND_NULL_BUFFER_FOR_MISSING_SSBO == 1 + ssbo_buffer = this->get_null_buffer(); + ssbo_size = [ssbo_buffer length]; +#endif + } + + if (ssbo_buffer != nil) { + uint32_t buffer_bind_index = pipeline_state_instance->base_storage_buffer_index + + buffer_index; + + /* Bind Vertex SSBO. */ + if (bool(ssbo.stage_mask & ShaderStage::VERTEX)) { + BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS); + rps.bind_vertex_buffer(ssbo_buffer, 0, buffer_bind_index); + } + + /* Bind Fragment SSBOs. */ + if (bool(ssbo.stage_mask & ShaderStage::FRAGMENT)) { + BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS); + rps.bind_fragment_buffer(ssbo_buffer, 0, buffer_bind_index); + } + } + else { + MTL_LOG_SSBO_ERROR( + "[Error][SSBO] Shader '%s' had SSBO '%s' bound at SSBO location: %d " + "(buffer[[" + "%d]]) -- but bound MTLStorageBuf was nil.\n", + shader_interface->get_name(), + shader_interface->get_name_at_offset(ssbo.name_offset), + ssbo_location, + pipeline_state_instance->base_storage_buffer_index + buffer_index); + } + } + } + return true; } @@ -1218,7 +1343,7 @@ bool MTLContext::ensure_uniform_buffer_bindings( MTLComputeState &cs = this->main_command_buffer.get_compute_state(); /* Fetch push constant block and bind. */ - const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block(); + const MTLShaderBufferBlock &push_constant_block = shader_interface->get_push_constant_block(); if (push_constant_block.size > 0) { /* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index @@ -1245,25 +1370,22 @@ bool MTLContext::ensure_uniform_buffer_bindings( * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global, * and not owned by the shader instance. */ for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) { - const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index); + const MTLShaderBufferBlock &ubo = shader_interface->get_uniform_block(ubo_index); if (ubo.buffer_index >= 0) { - - /* Uniform Buffer index offset by 1 as the first shader buffer binding slot is reserved for - * the uniform PushConstantBlock. */ - const uint32_t buffer_index = ubo.buffer_index + 1; - int ubo_offset = 0; + /* Explicit lookup location for UBO in bind table. */ + const uint32_t ubo_location = ubo.location; + /* buffer(N) index of where to bind the UBO. */ + const uint32_t buffer_index = ubo.buffer_index; id ubo_buffer = nil; int ubo_size = 0; bool bind_dummy_buffer = false; - if (this->pipeline_state.ubo_bindings[ubo.buffer_index].bound) { + if (this->pipeline_state.ubo_bindings[ubo_location].bound) { /* Fetch UBO global-binding properties from slot. */ - ubo_offset = 0; - ubo_buffer = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_metal_buffer( - &ubo_offset); - ubo_size = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_size(); + ubo_buffer = this->pipeline_state.ubo_bindings[ubo_location].ubo->get_metal_buffer(); + ubo_size = this->pipeline_state.ubo_bindings[ubo_location].ubo->get_size(); UNUSED_VARS_NDEBUG(ubo_size); /* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid @@ -1277,18 +1399,19 @@ bool MTLContext::ensure_uniform_buffer_bindings( } } else { - MTL_LOG_INFO( - "[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but " + MTL_LOG_UBO_ERROR( + "[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer location: %d " + "(buffer[[%d]]) -- but " "nothing was bound -- binding dummy buffer\n", shader_interface->get_name(), shader_interface->get_name_at_offset(ubo.name_offset), - buffer_index); + ubo_location, + pipeline_state_instance.base_uniform_buffer_index + buffer_index); bind_dummy_buffer = true; } if (bind_dummy_buffer) { /* Perform Dummy binding. */ - ubo_offset = 0; ubo_buffer = this->get_null_buffer(); ubo_size = [ubo_buffer length]; } @@ -1297,14 +1420,14 @@ bool MTLContext::ensure_uniform_buffer_bindings( uint32_t buffer_bind_index = pipeline_state_instance.base_uniform_buffer_index + buffer_index; - /* Bind Vertex UBO. */ + /* Bind Compute UBO. */ if (bool(ubo.stage_mask & ShaderStage::COMPUTE)) { BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS); - cs.bind_compute_buffer(ubo_buffer, ubo_offset, buffer_bind_index); + cs.bind_compute_buffer(ubo_buffer, 0, buffer_bind_index); } } else { - MTL_LOG_WARNING( + MTL_LOG_UBO_ERROR( "[UBO] Compute Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer " "is NULL!\n", shader_interface->get_name(), @@ -1313,6 +1436,72 @@ bool MTLContext::ensure_uniform_buffer_bindings( } } } + + /* Bind Global GPUStorageBuffers. */ + /* Iterate through expected SSBOs in the shader interface, and check if the globally bound ones + * match. */ + for (const uint ssbo_index : IndexRange(shader_interface->get_total_storage_blocks())) { + const MTLShaderBufferBlock &ssbo = shader_interface->get_storage_block(ssbo_index); + + if (ssbo.buffer_index >= 0 && ssbo.location >= 0) { + /* Explicit lookup location for UBO in bind table.*/ + const uint32_t ssbo_location = ssbo.location; + /* buffer(N) index of where to bind the UBO. */ + const uint32_t buffer_index = ssbo.buffer_index; + id ssbo_buffer = nil; + int ssbo_size = 0; + + if (this->pipeline_state.ssbo_bindings[ssbo_location].bound) { + + /* Fetch UBO global-binding properties from slot. */ + ssbo_buffer = this->pipeline_state.ssbo_bindings[ssbo_location].ssbo->get_metal_buffer(); + ssbo_size = this->pipeline_state.ssbo_bindings[ssbo_location].ssbo->get_size(); + UNUSED_VARS_NDEBUG(ssbo_size); + + /* For SSBOs, we always need to ensure the buffer exists, as it may be written to. */ + BLI_assert(ssbo_buffer != nil); + BLI_assert(ssbo_size > 0); + } + else { + MTL_LOG_SSBO_ERROR( + "[Error][SSBO] Shader '%s' expected SSBO '%s' to be bound at SSBO location: %d " + "(buffer[[" + "%d]]) -- but " + "nothing was bound.\n", + shader_interface->get_name(), + shader_interface->get_name_at_offset(ssbo.name_offset), + ssbo_location, + pipeline_state_instance.base_storage_buffer_index + buffer_index); + +#if DEBUG_BIND_NULL_BUFFER_FOR_MISSING_SSBO == 1 + ssbo_buffer = this->get_null_buffer(); + ssbo_size = [ssbo_buffer length]; +#endif + } + + if (ssbo_buffer != nil) { + uint32_t buffer_bind_index = pipeline_state_instance.base_storage_buffer_index + + buffer_index; + + /* Bind Vertex UBO. */ + if (bool(ssbo.stage_mask & ShaderStage::COMPUTE)) { + BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS); + cs.bind_compute_buffer(ssbo_buffer, 0, buffer_bind_index); + } + } + else { + MTL_LOG_SSBO_ERROR( + "[Error][SSBO] Shader '%s' had SSBO '%s' bound at SSBO location: %d " + "(buffer[[" + "%d]]) -- but bound MTLStorageBuf was nil.\n", + shader_interface->get_name(), + shader_interface->get_name_at_offset(ssbo.name_offset), + ssbo_location, + pipeline_state_instance.base_storage_buffer_index + buffer_index); + } + } + } + return true; } @@ -1352,13 +1541,27 @@ void MTLContext::ensure_texture_bindings( continue; } + /* Determine bind lookup table depending on whether an image binding or texture. + * NOTE: Images and Texture Samplers share a binding table in Metal. */ + bool is_resource_sampler = shader_texture_info.is_texture_sampler; + MTLTextureBinding(&resource_bind_table)[MTL_MAX_TEXTURE_SLOTS] = + (is_resource_sampler) ? this->pipeline_state.texture_bindings : + this->pipeline_state.image_bindings; + + /* Texture resource bind slot in shader `[[texture(n)]]`. */ int slot = shader_texture_info.slot_index; + /* Explicit bind location for texture. */ + int location = shader_texture_info.location; + /* Default sampler. */ + MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE}; + if (slot >= 0 && slot < GPU_max_textures()) { bool bind_dummy_texture = true; - if (this->pipeline_state.texture_bindings[slot].used) { - gpu::MTLTexture *bound_texture = - this->pipeline_state.texture_bindings[slot].texture_resource; - MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot]; + if (resource_bind_table[location].used) { + gpu::MTLTexture *bound_texture = resource_bind_table[location].texture_resource; + MTLSamplerBinding &bound_sampler = (is_resource_sampler) ? + this->pipeline_state.sampler_bindings[location] : + default_binding; BLI_assert(bound_texture); BLI_assert(bound_sampler.used); @@ -1385,10 +1588,11 @@ void MTLContext::ensure_texture_bindings( * expected in the shader interface. This is a problem and we will need to bind * a dummy texture to ensure correct API usage. */ MTL_LOG_WARNING( - "(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong " + "(Shader '%s') Texture (%s) %p bound to slot %d is incompatible -- Wrong " "texture target type. (Expecting type %d, actual type %d) (binding " "name:'%s')(texture name:'%s')\n", shader_interface->get_name(), + is_resource_sampler ? "TextureSampler" : "TextureImage", bound_texture, slot, shader_texture_info.type, @@ -1399,9 +1603,12 @@ void MTLContext::ensure_texture_bindings( } else { MTL_LOG_WARNING( - "Shader '%s' expected texture to be bound to slot %d -- No texture was " + "Shader '%s' expected texture (%s) to be bound to location %d (texture[[%d]]) -- No " + "texture was " "bound. (name:'%s')\n", shader_interface->get_name(), + is_resource_sampler ? "TextureSampler" : "TextureImage", + location, slot, shader_interface->get_name_at_offset(shader_texture_info.name_offset)); } @@ -1417,7 +1624,6 @@ void MTLContext::ensure_texture_bindings( slot); /* Bind default sampler state. */ - MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE}; rps.bind_vertex_sampler(default_binding, use_argument_buffer_for_samplers, slot); } if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) { @@ -1427,16 +1633,16 @@ void MTLContext::ensure_texture_bindings( slot); /* Bind default sampler state. */ - MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE}; rps.bind_fragment_sampler(default_binding, use_argument_buffer_for_samplers, slot); } } } else { MTL_LOG_WARNING( - "Shader %p expected texture to be bound to slot %d -- Slot exceeds the " + "Shader %p expected texture (%s) to be bound to slot %d -- Slot exceeds the " "hardware/API limit of '%d'. (name:'%s')\n", this->pipeline_state.active_shader, + is_resource_sampler ? "TextureSampler" : "TextureImage", slot, GPU_max_textures(), shader_interface->get_name_at_offset(shader_texture_info.name_offset)); @@ -1569,13 +1775,27 @@ void MTLContext::ensure_texture_bindings( continue; } + /* Determine bind lookup table depending on whether an image binding or texture. + * NOTE: Images and Texture Samplers share a binding table in Metal. */ + bool is_resource_sampler = shader_texture_info.is_texture_sampler; + MTLTextureBinding(&resource_bind_table)[MTL_MAX_TEXTURE_SLOTS] = + (is_resource_sampler) ? this->pipeline_state.texture_bindings : + this->pipeline_state.image_bindings; + + /* Texture resource bind slot in shader `[[texture(n)]]`. */ int slot = shader_texture_info.slot_index; + /* Explicit bind location for texture. */ + int location = shader_texture_info.location; + /* Default sampler. */ + MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE}; + if (slot >= 0 && slot < GPU_max_textures()) { bool bind_dummy_texture = true; - if (this->pipeline_state.texture_bindings[slot].used) { - gpu::MTLTexture *bound_texture = - this->pipeline_state.texture_bindings[slot].texture_resource; - MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot]; + if (resource_bind_table[location].used) { + gpu::MTLTexture *bound_texture = resource_bind_table[location].texture_resource; + MTLSamplerBinding &bound_sampler = (is_resource_sampler) ? + this->pipeline_state.sampler_bindings[location] : + default_binding; BLI_assert(bound_texture); BLI_assert(bound_sampler.used); @@ -1597,10 +1817,11 @@ void MTLContext::ensure_texture_bindings( * expected in the shader interface. This is a problem and we will need to bind * a dummy texture to ensure correct API usage. */ MTL_LOG_WARNING( - "(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong " + "(Shader '%s') Texture (%s) %p bound to slot %d is incompatible -- Wrong " "texture target type. (Expecting type %d, actual type %d) (binding " "name:'%s')(texture name:'%s')\n", shader_interface->get_name(), + is_resource_sampler ? "TextureSampler" : "TextureImage", bound_texture, slot, shader_texture_info.type, @@ -1611,9 +1832,12 @@ void MTLContext::ensure_texture_bindings( } else { MTL_LOG_WARNING( - "Shader '%s' expected texture to be bound to slot %d -- No texture was " + "Shader '%s' expected texture (%s) to be bound to location %d (texture[[%d]]) -- No " + "texture was " "bound. (name:'%s')\n", shader_interface->get_name(), + is_resource_sampler ? "TextureSampler" : "TextureImage", + location, slot, shader_interface->get_name_at_offset(shader_texture_info.name_offset)); } @@ -1636,9 +1860,10 @@ void MTLContext::ensure_texture_bindings( } else { MTL_LOG_WARNING( - "Shader %p expected texture to be bound to slot %d -- Slot exceeds the " + "Shader %p expected texture (%s) to be bound to slot %d -- Slot exceeds the " "hardware/API limit of '%d'. (name:'%s')\n", this->pipeline_state.active_shader, + is_resource_sampler ? "TextureSampler" : "TextureImage", slot, GPU_max_textures(), shader_interface->get_name_at_offset(shader_texture_info.name_offset)); @@ -1900,6 +2125,47 @@ bool MTLContext::ensure_compute_pipeline_state() } void MTLContext::compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) +{ + /* Ensure all resources required by upcoming compute submission are correctly bound to avoid + * out of bounds reads/writes. */ + if (!this->ensure_compute_pipeline_state()) { + return; + } + + /* Shader instance. */ + MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface(); + const MTLComputePipelineStateInstance &compute_pso_inst = + this->pipeline_state.active_shader->get_compute_pipeline_state(); + + /* Begin compute encoder. */ + id compute_encoder = + this->main_command_buffer.ensure_begin_compute_encoder(); + BLI_assert(compute_encoder != nil); + + /* Bind PSO. */ + MTLComputeState &cs = this->main_command_buffer.get_compute_state(); + cs.bind_pso(compute_pso_inst.pso); + + /* Bind buffers. */ + this->ensure_uniform_buffer_bindings(compute_encoder, shader_interface, compute_pso_inst); + /** Ensure resource bindings. */ + /* Texture Bindings. */ + /* We will iterate through all texture bindings on the context and determine if any of the + * active slots match those in our shader interface. If so, textures will be bound. */ + if (shader_interface->get_total_textures() > 0) { + this->ensure_texture_bindings(compute_encoder, shader_interface, compute_pso_inst); + } + + /* Dispatch compute. */ + [compute_encoder dispatchThreadgroups:MTLSizeMake(max_ii(groups_x_len, 1), + max_ii(groups_y_len, 1), + max_ii(groups_z_len, 1)) + threadsPerThreadgroup:MTLSizeMake(compute_pso_inst.threadgroup_x_len, + compute_pso_inst.threadgroup_y_len, + compute_pso_inst.threadgroup_z_len)]; +} + +void MTLContext::compute_dispatch_indirect(StorageBuf *indirect_buf) { /* Ensure all resources required by upcoming compute submission are correctly bound. */ if (this->ensure_compute_pipeline_state()) { @@ -1927,11 +2193,22 @@ void MTLContext::compute_dispatch(int groups_x_len, int groups_y_len, int groups this->ensure_texture_bindings(compute_encoder, shader_interface, compute_pso_inst); } - /* Dispatch compute. */ - [compute_encoder dispatchThreadgroups:MTLSizeMake(groups_x_len, groups_y_len, groups_z_len) - threadsPerThreadgroup:MTLSizeMake(compute_pso_inst.threadgroup_x_len, - compute_pso_inst.threadgroup_y_len, - compute_pso_inst.threadgroup_z_len)]; + /* Indirect Dispatch compute. */ + MTLStorageBuf *mtlssbo = static_cast(indirect_buf); + id mtl_indirect_buf = mtlssbo->get_metal_buffer(); + BLI_assert(mtl_indirect_buf != nil); + if (mtl_indirect_buf == nil) { + MTL_LOG_WARNING("Metal Indirect Compute dispatch storage buffer does not exist.\n"); + return; + } + + /* Indirect Compute dispatch. */ + [compute_encoder + dispatchThreadgroupsWithIndirectBuffer:mtl_indirect_buf + indirectBufferOffset:0 + threadsPerThreadgroup:MTLSizeMake(compute_pso_inst.threadgroup_x_len, + compute_pso_inst.threadgroup_y_len, + compute_pso_inst.threadgroup_z_len)]; } } @@ -1980,7 +2257,7 @@ bool MTLContext::is_visibility_dirty() const /** \name Texture State Management * \{ */ -void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit) +void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit, bool is_image) { BLI_assert(this); BLI_assert(mtl_texture); @@ -1995,9 +2272,14 @@ void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit) return; } + MTLTextureBinding( + &resource_bind_table)[MTL_MAX_TEXTURE_SLOTS] = (is_image) ? + this->pipeline_state.image_bindings : + this->pipeline_state.texture_bindings; + /* Bind new texture. */ - this->pipeline_state.texture_bindings[texture_unit].texture_resource = mtl_texture; - this->pipeline_state.texture_bindings[texture_unit].used = true; + resource_bind_table[texture_unit].texture_resource = mtl_texture; + resource_bind_table[texture_unit].used = true; mtl_texture->is_bound_ = true; } @@ -2016,15 +2298,20 @@ void MTLContext::sampler_bind(MTLSamplerState sampler_state, uint sampler_unit) this->pipeline_state.sampler_bindings[sampler_unit] = {true, sampler_state}; } -void MTLContext::texture_unbind(gpu::MTLTexture *mtl_texture) +void MTLContext::texture_unbind(gpu::MTLTexture *mtl_texture, bool is_image) { BLI_assert(mtl_texture); + MTLTextureBinding( + &resource_bind_table)[MTL_MAX_TEXTURE_SLOTS] = (is_image) ? + this->pipeline_state.image_bindings : + this->pipeline_state.texture_bindings; + /* Iterate through textures in state and unbind. */ for (int i = 0; i < min_uu(GPU_max_textures(), MTL_MAX_TEXTURE_SLOTS); i++) { - if (this->pipeline_state.texture_bindings[i].texture_resource == mtl_texture) { - this->pipeline_state.texture_bindings[i].texture_resource = nullptr; - this->pipeline_state.texture_bindings[i].used = false; + if (resource_bind_table[i].texture_resource == mtl_texture) { + resource_bind_table[i].texture_resource = nullptr; + resource_bind_table[i].used = false; } } @@ -2032,16 +2319,18 @@ void MTLContext::texture_unbind(gpu::MTLTexture *mtl_texture) mtl_texture->is_bound_ = false; } -void MTLContext::texture_unbind_all() +void MTLContext::texture_unbind_all(bool is_image) { + MTLTextureBinding( + &resource_bind_table)[MTL_MAX_TEXTURE_SLOTS] = (is_image) ? + this->pipeline_state.image_bindings : + this->pipeline_state.texture_bindings; + /* Iterate through context's bound textures. */ for (int t = 0; t < min_uu(GPU_max_textures(), MTL_MAX_TEXTURE_SLOTS); t++) { - if (this->pipeline_state.texture_bindings[t].used && - this->pipeline_state.texture_bindings[t].texture_resource) - { - - this->pipeline_state.texture_bindings[t].used = false; - this->pipeline_state.texture_bindings[t].texture_resource = nullptr; + if (resource_bind_table[t].used && resource_bind_table[t].texture_resource) { + resource_bind_table[t].used = false; + resource_bind_table[t].texture_resource = nullptr; } } } diff --git a/source/blender/gpu/metal/mtl_drawlist.mm b/source/blender/gpu/metal/mtl_drawlist.mm index 03f7f3dbf7f..739a9810874 100644 --- a/source/blender/gpu/metal/mtl_drawlist.mm +++ b/source/blender/gpu/metal/mtl_drawlist.mm @@ -66,7 +66,7 @@ MTLDrawList::~MTLDrawList() void MTLDrawList::init() { - MTLContext *ctx = reinterpret_cast(GPU_context_active_get()); + MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); BLI_assert(ctx); BLI_assert(MDI_ENABLED); BLI_assert(data_ == nullptr); @@ -186,7 +186,7 @@ void MTLDrawList::submit() /* Bind Batch to setup render pipeline state. */ BLI_assert(batch_ != nullptr); - id rec = batch_->bind(0, 0, 0, 0); + id rec = batch_->bind(0); if (rec == nil) { BLI_assert_msg(false, "A RenderCommandEncoder should always be available!\n"); return; diff --git a/source/blender/gpu/metal/mtl_index_buffer.hh b/source/blender/gpu/metal/mtl_index_buffer.hh index dd828598110..182c1d2bf10 100644 --- a/source/blender/gpu/metal/mtl_index_buffer.hh +++ b/source/blender/gpu/metal/mtl_index_buffer.hh @@ -18,12 +18,16 @@ namespace blender::gpu { class MTLIndexBuf : public IndexBuf { friend class MTLBatch; friend class MTLDrawList; + friend class MTLStorageBuf; /* For bind as SSBO resource access. */ private: /* Metal buffer resource. */ gpu::MTLBuffer *ibo_ = nullptr; uint64_t alloc_size_ = 0; + /* SSBO wrapper for bind_as_ssbo support. */ + MTLStorageBuf *ssbo_wrapper_ = nullptr; + #ifndef NDEBUG /* Flags whether point index buffer has been compacted * to remove false restart indices. */ diff --git a/source/blender/gpu/metal/mtl_index_buffer.mm b/source/blender/gpu/metal/mtl_index_buffer.mm index 6a912983492..b59837f4c98 100644 --- a/source/blender/gpu/metal/mtl_index_buffer.mm +++ b/source/blender/gpu/metal/mtl_index_buffer.mm @@ -7,6 +7,7 @@ #include "mtl_index_buffer.hh" #include "mtl_context.hh" #include "mtl_debug.hh" +#include "mtl_storage_buffer.hh" #include "BLI_span.hh" @@ -22,6 +23,11 @@ MTLIndexBuf::~MTLIndexBuf() ibo_->free(); } this->free_optimized_buffer(); + + if (ssbo_wrapper_) { + delete ssbo_wrapper_; + ssbo_wrapper_ = nullptr; + } } void MTLIndexBuf::free_optimized_buffer() @@ -42,8 +48,14 @@ void MTLIndexBuf::bind_as_ssbo(uint32_t binding) /* Ensure we have a valid IBO. */ BLI_assert(this->ibo_); - /* TODO(Metal): Support index buffer SSBO's. Dependent on compute implementation. */ - MTL_LOG_WARNING("MTLIndexBuf::bind_as_ssbo not yet implemented!\n"); + /* Ensure resource is initialized. */ + this->upload_data(); + + /* Create MTLStorageBuffer to wrap this resource and use conventional binding. */ + if (ssbo_wrapper_ == nullptr) { + ssbo_wrapper_ = new MTLStorageBuf(this, alloc_size_); + } + ssbo_wrapper_->bind(binding); } void MTLIndexBuf::read(uint32_t *data) const diff --git a/source/blender/gpu/metal/mtl_query.mm b/source/blender/gpu/metal/mtl_query.mm index 897f9cb202e..0c3af079a10 100644 --- a/source/blender/gpu/metal/mtl_query.mm +++ b/source/blender/gpu/metal/mtl_query.mm @@ -61,7 +61,7 @@ void MTLQueryPool::init(GPUQueryType type) void MTLQueryPool::begin_query() { - MTLContext *ctx = reinterpret_cast(GPU_context_active_get()); + MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); /* Ensure our allocated buffer pool has enough space for the current queries. */ int query_id = query_issued_; @@ -88,7 +88,7 @@ void MTLQueryPool::begin_query() void MTLQueryPool::end_query() { - MTLContext *ctx = reinterpret_cast(GPU_context_active_get()); + MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); id rec = ctx->main_command_buffer.get_active_render_command_encoder(); [rec setVisibilityResultMode:MTLVisibilityResultModeDisabled offset:0]; @@ -96,7 +96,7 @@ void MTLQueryPool::end_query() void MTLQueryPool::get_occlusion_result(MutableSpan r_values) { - MTLContext *ctx = reinterpret_cast(GPU_context_active_get()); + MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); /* Create a blit encoder to synchronize the query buffer results between * GPU and CPU when not using shared-memory. */ diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh index 5903d562ac7..8224cee1268 100644 --- a/source/blender/gpu/metal/mtl_shader.hh +++ b/source/blender/gpu/metal/mtl_shader.hh @@ -36,7 +36,7 @@ class MTLShaderInterface; class MTLContext; /* Debug control. */ -#define MTL_SHADER_DEBUG_EXPORT_SOURCE 0 +#define MTL_SHADER_DEBUG_EXPORT_SOURCE 1 #define MTL_SHADER_TRANSLATION_DEBUG_OUTPUT 0 /* Separate print used only during development and debugging. */ @@ -71,7 +71,7 @@ struct MTLRenderPipelineStateInstance { * bound buffers such as vertex buffers, as the count can vary. */ int base_uniform_buffer_index; /* Base bind index for binding storage buffers. */ - int base_ssbo_buffer_index; + int base_storage_buffer_index; /* buffer bind slot used for null attributes (-1 if not needed). */ int null_attribute_buffer_index; /* buffer bind used for transform feedback output buffer. */ @@ -101,7 +101,7 @@ struct MTLComputePipelineStateInstance { * bound buffers such as vertex buffers, as the count can vary. */ int base_uniform_buffer_index = -1; /* Base bind index for binding storage buffers. */ - int base_ssbo_buffer_index = -1; + int base_storage_buffer_index = -1; int threadgroup_x_len = 1; int threadgroup_y_len = 1; diff --git a/source/blender/gpu/metal/mtl_shader.mm b/source/blender/gpu/metal/mtl_shader.mm index 5b5af064b25..9229b6e800f 100644 --- a/source/blender/gpu/metal/mtl_shader.mm +++ b/source/blender/gpu/metal/mtl_shader.mm @@ -386,7 +386,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info) valid_ = true; /* Prepare backing data storage for local uniforms. */ - const MTLShaderUniformBlock &push_constant_block = mtl_interface->get_push_constant_block(); + const MTLShaderBufferBlock &push_constant_block = mtl_interface->get_push_constant_block(); if (push_constant_block.size > 0) { push_constant_data_ = MEM_callocN(push_constant_block.size, __func__); this->push_constant_bindstate_mark_dirty(true); @@ -625,7 +625,7 @@ void MTLShader::warm_cache(int limit) { if (parent_shader_ != nullptr) { MTLContext *ctx = MTLContext::get(); - MTLShader *parent_mtl = reinterpret_cast(parent_shader_); + MTLShader *parent_mtl = static_cast(parent_shader_); /* Extract PSO descriptors from parent shader. */ blender::Vector descriptors; @@ -990,6 +990,19 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state( type:MTLDataTypeInt withName:@"MTL_uniform_buffer_base_index"]; + /* Storage buffer bind index. + * This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers, + * and an additional space for the push constant block. + * If the shader does not have any uniform blocks, then we can place directly after the push + * constant block. As we do not need an extra spot for the UBO at index '0'. */ + int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 + + ((mtl_interface->get_total_uniform_blocks() > 0) ? + mtl_interface->get_total_uniform_blocks() : + 0); + [values setConstantValue:&MTL_storage_buffer_base_index + type:MTLDataTypeInt + withName:@"MTL_storage_buffer_base_index"]; + /* Transform feedback constant. * Ensure buffer is placed after existing buffers, including default buffers. */ int MTL_transform_feedback_buffer_index = -1; @@ -997,9 +1010,10 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state( /* If using argument buffers, insert index after argument buffer index. Otherwise, insert * after uniform buffer bindings. */ MTL_transform_feedback_buffer_index = - (mtl_interface->uses_argument_buffer_for_samplers()) ? - (mtl_interface->get_argument_buffer_bind_index(ShaderStage::VERTEX) + 1) : - (MTL_uniform_buffer_base_index + mtl_interface->get_max_ubo_index() + 2); + MTL_uniform_buffer_base_index + + ((mtl_interface->uses_argument_buffer_for_samplers()) ? + (mtl_interface->get_argument_buffer_bind_index(ShaderStage::VERTEX) + 1) : + (mtl_interface->get_max_buffer_index() + 2)); } if (this->transform_feedback_type_ != GPU_SHADER_TFB_NONE) { @@ -1132,10 +1146,10 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state( * We need to ensure that the PSO will have valid bind-point ranges, or is using the * appropriate bindless fallback path if any bind limits are exceeded. */ #ifdef NDEBUG - /* Ensure UBO and PushConstantBlock bindings are within range. */ + /* Ensure Buffer bindings are within range. */ BLI_assert_msg((MTL_uniform_buffer_base_index + get_max_ubo_index() + 2) < MTL_MAX_BUFFER_BINDINGS, - "UBO bindings exceed the fragment bind table limit."); + "UBO and SSBO bindings exceed the fragment bind table limit."); /* Transform feedback buffer. */ if (transform_feedback_type_ != GPU_SHADER_TFB_NONE) { @@ -1179,6 +1193,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state( pso_inst->frag = desc.fragmentFunction; pso_inst->pso = pso; pso_inst->base_uniform_buffer_index = MTL_uniform_buffer_base_index; + pso_inst->base_storage_buffer_index = MTL_storage_buffer_base_index; pso_inst->null_attribute_buffer_index = (using_null_buffer) ? null_buffer_index : -1; pso_inst->transform_feedback_buffer_index = MTL_transform_feedback_buffer_index; pso_inst->prim_type = prim_type; @@ -1283,6 +1298,8 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx) { /* NOTE(Metal): Bakes and caches a PSO for compute. */ BLI_assert(this); + MTLShaderInterface *mtl_interface = this->get_interface(); + BLI_assert(mtl_interface); BLI_assert(this->is_valid()); BLI_assert(shader_library_compute_ != nil); @@ -1304,7 +1321,19 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx) type:MTLDataTypeInt withName:@"MTL_uniform_buffer_base_index"]; - /* TODO: SSBO binding base index. */ + /* Storage buffer bind index. + * This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers, + * and an additional space for the push constant block. + * If the shader does not have any uniform blocks, then we can place directly after the push + * constant block. As we do not need an extra spot for the UBO at index '0'. */ + int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 + + ((mtl_interface->get_total_uniform_blocks() > 0) ? + mtl_interface->get_total_uniform_blocks() : + 0); + + [values setConstantValue:&MTL_storage_buffer_base_index + type:MTLDataTypeInt + withName:@"MTL_storage_buffer_base_index"]; /* Compile compute function. */ NSError *error = nullptr; @@ -1312,6 +1341,8 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx) newFunctionWithName:compute_function_name_ constantValues:values error:&error]; + compute_function.label = [NSString stringWithUTF8String:this->name]; + if (error) { NSLog(@"Compile Error - Metal Shader compute function, error %@", error); @@ -1327,6 +1358,7 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx) id pso = [ctx->device newComputePipelineStateWithFunction:compute_function error:&error]; + if (error) { NSLog(@"Failed to create PSO for compute shader: %s error %@\n", this->name, error); BLI_assert(false); @@ -1350,8 +1382,7 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx) compute_pso_instance_.compute = [compute_function retain]; compute_pso_instance_.pso = [pso retain]; compute_pso_instance_.base_uniform_buffer_index = MTL_uniform_buffer_base_index; - /* TODO: Add SSBO base buffer index support. */ - compute_pso_instance_.base_ssbo_buffer_index = -1; + compute_pso_instance_.base_storage_buffer_index = MTL_storage_buffer_base_index; } return true; } @@ -1507,7 +1538,7 @@ void MTLShader::ssbo_vertex_fetch_bind_attributes_end(id(GPU_context_active_get()); + MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); id null_buf = ctx->get_null_attribute_buffer(); BLI_assert(null_buf); diff --git a/source/blender/gpu/metal/mtl_shader_generator.hh b/source/blender/gpu/metal/mtl_shader_generator.hh index 8b5e438bd44..ac04aa9eb30 100644 --- a/source/blender/gpu/metal/mtl_shader_generator.hh +++ b/source/blender/gpu/metal/mtl_shader_generator.hh @@ -105,6 +105,22 @@ * } * \endcode * + * -- Metal buffer bindings structure -- + * + * Metal shader contains several different binding types. All buffers are bound using the buffer(N) + * binding attribute tag. However, different ranges serve different purposes. The structure of the + * bindings always happen as follows: + * + * Vertex Buffers (N) <-- 0 + * Index buffer + * Default Push constant block for uniforms <-- MTL_uniform_buffer_base_index + * Uniform buffers <-- MTL_uniform_buffer_base_index+1 + * Storage buffers <-- MTL_storage_buffer_base_index + * Samplers/argument buffer table <-- last buffer + 1 + * Transform feedback buffer <-- MTL_transform_feedback_buffer_index ~last_buffer+2 + * + * Up to a maximum of 31 bindings. + * * -- SSBO-vertex-fetchmode -- * * SSBO-vertex-fetchmode is a special option wherein vertex buffers are bound directly @@ -200,14 +216,17 @@ struct MSLUniform { } }; -struct MSLUniformBlock { +struct MSLBufferBlock { std::string type_name; std::string name; ShaderStage stage; bool is_array; + /* Resource index in buffer*/ uint slot; + uint location; + shader::Qualifier qualifiers; - bool operator==(const MSLUniformBlock &right) const + bool operator==(const MSLBufferBlock &right) const { return (type_name == right.type_name && name == right.name); } @@ -221,11 +240,16 @@ enum MSLTextureSamplerAccess { TEXTURE_ACCESS_READWRITE, }; -struct MSLTextureSampler { +struct MSLTextureResource { ShaderStage stage; shader::ImageType type; std::string name; MSLTextureSamplerAccess access; + /* Whether resource is a texture sampler or an image. */ + bool is_texture_sampler; + /* Index in shader bind table [[texture(N)]].*/ + uint slot; + /* Explicit bind index provided by ShaderCreateInfo. */ uint location; eGPUTextureType get_texture_binding_type() const; @@ -233,20 +257,7 @@ struct MSLTextureSampler { void resolve_binding_indices(); - MSLTextureSampler(ShaderStage in_stage, - shader::ImageType in_sampler_type, - std::string in_sampler_name, - MSLTextureSamplerAccess in_access, - uint in_location) - : stage(in_stage), - type(in_sampler_type), - name(in_sampler_name), - access(in_access), - location(in_location) - { - } - - bool operator==(const MSLTextureSampler &right) const + bool operator==(const MSLTextureResource &right) const { /* We do not compare stage as we want to avoid duplication of resources used across multiple * stages. */ @@ -370,9 +381,10 @@ class MSLGeneratorInterface { public: /** Shader stage input/output binding information. * Derived from shader source reflection or GPUShaderCreateInfo. */ - blender::Vector uniform_blocks; + blender::Vector uniform_blocks; + blender::Vector storage_blocks; blender::Vector uniforms; - blender::Vector texture_samplers; + blender::Vector texture_samplers; blender::Vector vertex_input_attributes; blender::Vector vertex_output_varyings; /* Should match vertex outputs, but defined separately as @@ -386,7 +398,8 @@ class MSLGeneratorInterface { blender::Vector clip_distances; /* Shared Memory Blocks. */ blender::Vector shared_memory_blocks; - + /* Max bind IDs. */ + int max_tex_bind_index = 0; /** GL Global usage. */ /* Whether GL position is used, or an alternative vertex output should be the default. */ bool uses_gl_Position; @@ -415,12 +428,14 @@ class MSLGeneratorInterface { bool uses_gl_NumWorkGroups; bool uses_gl_LocalInvocationIndex; bool uses_gl_LocalInvocationID; + /* Early fragment tests. */ + bool uses_early_fragment_test; /* Parameters. */ shader::DepthWrite depth_write; /* Bind index trackers. */ - int max_ubo_slot = -1; + int max_buffer_slot = 0; /* Shader buffer bind indices for argument buffers per shader stage. * NOTE: Compute stage will re-use index 0. */ @@ -463,8 +478,10 @@ class MSLGeneratorInterface { /* Samplers. */ bool use_argument_buffer_for_samplers() const; uint32_t num_samplers_for_stage(ShaderStage stage) const; + uint32_t max_sampler_index_for_stage(ShaderStage stage) const; - /* Returns the bind index, relative to MTL_uniform_buffer_base_index. */ + /* Returns the bind index, relative to + * MTL_uniform_buffer_base_index+MTL_storage_buffer_base_index. */ uint32_t get_sampler_argument_buffer_bind_index(ShaderStage stage); /* Code generation utility functions. */ @@ -480,7 +497,7 @@ class MSLGeneratorInterface { std::string generate_msl_fragment_entry_stub(); std::string generate_msl_compute_entry_stub(); std::string generate_msl_global_uniform_population(ShaderStage stage); - std::string generate_ubo_block_macro_chain(MSLUniformBlock block); + std::string generate_ubo_block_macro_chain(MSLBufferBlock block); std::string generate_msl_uniform_block_population(ShaderStage stage); std::string generate_msl_vertex_attribute_input_population(); std::string generate_msl_vertex_output_population(); @@ -546,7 +563,9 @@ inline bool is_builtin_type(std::string type) { /* Add Types as needed. */ /* TODO(Metal): Consider replacing this with a switch and `constexpr` hash and switch. - * Though most efficient and maintainable approach to be determined. */ + * Though most efficient and maintainable approach to be determined. + * NOTE: Some duplicate types exit for Metal and GLSL representations, as generated typenames + * from createinfo may use GLSL signature. */ static std::map glsl_builtin_types = { {"float", MTL_DATATYPE_FLOAT}, {"vec2", MTL_DATATYPE_FLOAT2}, @@ -556,10 +575,17 @@ inline bool is_builtin_type(std::string type) {"ivec2", MTL_DATATYPE_INT2}, {"ivec3", MTL_DATATYPE_INT3}, {"ivec4", MTL_DATATYPE_INT4}, + {"int2", MTL_DATATYPE_INT2}, + {"int3", MTL_DATATYPE_INT3}, + {"int4", MTL_DATATYPE_INT4}, {"uint32_t", MTL_DATATYPE_UINT}, {"uvec2", MTL_DATATYPE_UINT2}, {"uvec3", MTL_DATATYPE_UINT3}, {"uvec4", MTL_DATATYPE_UINT4}, + {"uint", MTL_DATATYPE_UINT}, + {"uint2", MTL_DATATYPE_UINT2}, + {"uint3", MTL_DATATYPE_UINT3}, + {"uint4", MTL_DATATYPE_UINT4}, {"mat3", MTL_DATATYPE_FLOAT3x3}, {"mat4", MTL_DATATYPE_FLOAT4x4}, {"bool", MTL_DATATYPE_INT}, diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm index 8e85d12a98e..9621cf3db69 100644 --- a/source/blender/gpu/metal/mtl_shader_generator.mm +++ b/source/blender/gpu/metal/mtl_shader_generator.mm @@ -712,8 +712,30 @@ static void print_resource(std::ostream &os, const ShaderCreateInfo::Resource &r } break; } - case ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: + case ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: { + int64_t array_offset = res.storagebuf.name.find_first_of("["); + bool writeable = (res.storagebuf.qualifiers & shader::Qualifier::WRITE) == + shader::Qualifier::WRITE; + const char *memory_scope = ((writeable) ? "device " : "constant "); + if (array_offset == -1) { + /* Create local class member as device pointer reference to bound SSBO. + * Given usage within a shader follows ssbo_name.ssbo_element syntax, we can + * dereference the pointer as the compiler will optimize this data fetch. + * To do this, we also give the UBO name a post-fix of `_local` to avoid + * macro accessor collisions. */ + + os << memory_scope << res.storagebuf.type_name << " *" << res.storagebuf.name + << "_local;\n"; + os << "#define " << res.storagebuf.name << " (*" << res.storagebuf.name << "_local)\n"; + } + else { + /* For arrays, we can directly provide the constant access pointer, as the array + * syntax will de-reference this at the correct fetch index. */ + StringRef name_no_array = StringRef(res.storagebuf.name.c_str(), array_offset); + os << memory_scope << res.storagebuf.type_name << " *" << name_no_array << ";\n"; + } break; + } } } @@ -976,6 +998,9 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info) shd_builder_->glsl_fragment_source_.find("gl_FragDepth") != std::string::npos; msl_iface.depth_write = info->depth_write_; + + /* Early fragment tests. */ + msl_iface.uses_early_fragment_test = info->early_fragment_test_; } /* Generate SSBO vertex fetch mode uniform data hooks. */ @@ -1002,7 +1027,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info) if (msl_iface.use_argument_buffer_for_samplers()) { ss_vertex << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl; ss_vertex << "#define ARGUMENT_BUFFER_NUM_SAMPLERS " - << msl_iface.num_samplers_for_stage(ShaderStage::VERTEX) << std::endl; + << msl_iface.max_sampler_index_for_stage(ShaderStage::VERTEX) + 1 << std::endl; } if (msl_iface.uses_ssbo_vertex_fetch_mode) { ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl; @@ -1164,7 +1189,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info) * struct, as both are needed within texture functions. * e.g. `_mtl_combined_image_sampler_2d` * The exact typename is generated inside `get_msl_typestring_wrapper()`. */ - for (const MSLTextureSampler &tex : msl_iface.texture_samplers) { + for (const MSLTextureResource &tex : msl_iface.texture_samplers) { if (bool(tex.stage & ShaderStage::VERTEX)) { ss_vertex << "\tthread " << tex.get_msl_typestring_wrapper(false) << ";" << std::endl; } @@ -1193,7 +1218,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info) if (msl_iface.use_argument_buffer_for_samplers()) { ss_fragment << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl; ss_fragment << "#define ARGUMENT_BUFFER_NUM_SAMPLERS " - << msl_iface.num_samplers_for_stage(ShaderStage::FRAGMENT) << std::endl; + << msl_iface.max_sampler_index_for_stage(ShaderStage::FRAGMENT) + 1 << std::endl; } /* Inject common Metal header. */ @@ -1277,7 +1302,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info) } /* Add Texture members. */ - for (const MSLTextureSampler &tex : msl_iface.texture_samplers) { + for (const MSLTextureResource &tex : msl_iface.texture_samplers) { if (bool(tex.stage & ShaderStage::FRAGMENT)) { ss_fragment << "\tthread " << tex.get_msl_typestring_wrapper(false) << ";" << std::endl; } @@ -1445,7 +1470,7 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i if (msl_iface.use_argument_buffer_for_samplers()) { ss_compute << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl; ss_compute << "#define ARGUMENT_BUFFER_NUM_SAMPLERS " - << msl_iface.num_samplers_for_stage(ShaderStage::COMPUTE) << std::endl; + << msl_iface.max_sampler_index_for_stage(ShaderStage::COMPUTE) + 1 << std::endl; } /* Inject static workgroup sizes. */ @@ -1486,7 +1511,7 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i * struct, as both are needed within texture functions. * e.g. `_mtl_combined_image_sampler_2d` * The exact typename is generated inside `get_msl_typestring_wrapper()`. */ - for (const MSLTextureSampler &tex : msl_iface.texture_samplers) { + for (const MSLTextureResource &tex : msl_iface.texture_samplers) { if (bool(tex.stage & ShaderStage::COMPUTE)) { ss_compute << "\tthread " << tex.get_msl_typestring_wrapper(false) << ";" << std::endl; } @@ -1563,6 +1588,31 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i this->set_compute_function_name(@"compute_function_entry"); #endif + /* DEBUG: Export source to file for manual verification. */ +#if MTL_SHADER_DEBUG_EXPORT_SOURCE + NSFileManager *sharedFM = [NSFileManager defaultManager]; + NSURL *app_bundle_url = [[NSBundle mainBundle] bundleURL]; + NSURL *shader_dir = [[app_bundle_url URLByDeletingLastPathComponent] + URLByAppendingPathComponent:@"Shaders/" + isDirectory:YES]; + [sharedFM createDirectoryAtURL:shader_dir + withIntermediateDirectories:YES + attributes:nil + error:nil]; + const char *path_cstr = [shader_dir fileSystemRepresentation]; + + std::ofstream compute_fs; + compute_fs.open( + (std::string(path_cstr) + "/" + std::string(this->name) + "_GeneratedComputeShader.msl") + .c_str()); + compute_fs << ss_compute.str(); + compute_fs.close(); + + shader_debug_printf( + "Compute Shader Saved to: %s\n", + (std::string(path_cstr) + std::string(this->name) + "_GeneratedComputeShader.msl").c_str()); +#endif + NSString *msl_final_compute = [NSString stringWithUTF8String:ss_compute.str().c_str()]; this->shader_compute_source_from_msl(msl_final_compute); @@ -1700,8 +1750,11 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn /* NOTE: Metal requires Samplers and images to share slots. We will re-map these. * If `auto_resource_location_` is not used, then slot collision could occur and - * this should be resolved in the original create-info. */ + * this should be resolved in the original create-info. + * UBOs and SSBOs also share the same bind table. */ int texture_slot_id = 0; + int ubo_buffer_slot_id_ = 0; + int storage_buffer_slot_id_ = 0; /* Determine max sampler slot for image resource offset, when not using auto resource location, * as image resources cannot overlap sampler ranges. */ @@ -1727,13 +1780,6 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn switch (res.bind_type) { case shader::ShaderCreateInfo::Resource::BindType::SAMPLER: { - /* Re-map sampler slot to share texture indices with images. - * Only applies if `auto_resource_location_` is enabled. */ - BLI_assert(res.slot >= 0 && res.slot < MTL_MAX_TEXTURE_SLOTS); - unsigned int used_slot = (create_info_->auto_resource_location_) ? (texture_slot_id++) : - res.slot; - BLI_assert(used_slot < MTL_MAX_TEXTURE_SLOTS); - /* Samplers to have access::sample by default. */ MSLTextureSamplerAccess access = MSLTextureSamplerAccess::TEXTURE_ACCESS_SAMPLE; /* TextureBuffers must have read/write/read-write access pattern. */ @@ -1744,22 +1790,21 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ; } - MSLTextureSampler msl_tex( - ShaderStage::ANY, res.sampler.type, res.sampler.name, access, used_slot); + MSLTextureResource msl_tex; + msl_tex.stage = ShaderStage::ANY; + msl_tex.type = res.sampler.type; + msl_tex.name = res.sampler.name; + msl_tex.access = access; + msl_tex.slot = texture_slot_id++; + msl_tex.location = (create_info_->auto_resource_location_) ? msl_tex.slot : res.slot; + msl_tex.is_texture_sampler = true; + BLI_assert(msl_tex.slot < MTL_MAX_TEXTURE_SLOTS); + texture_samplers.append(msl_tex); + max_tex_bind_index = max_ii(max_tex_bind_index, msl_tex.slot); } break; case shader::ShaderCreateInfo::Resource::BindType::IMAGE: { - - /* Re-map sampler slot to share texture indices with samplers. - * Automatically applies if `auto_resource_location_` is enabled. - * Otherwise, if not using automatic resource location, offset by max sampler slot. */ - BLI_assert(res.slot >= 0 && res.slot < MTL_MAX_TEXTURE_SLOTS); - unsigned int used_slot = (create_info_->auto_resource_location_) ? - (texture_slot_id++) : - res.slot + max_sampler_slot + 1; - BLI_assert(used_slot < MTL_MAX_TEXTURE_SLOTS); - /* Flatten qualifier flags into final access state. */ MSLTextureSamplerAccess access; if (bool(res.image.qualifiers & Qualifier::READ_WRITE)) { @@ -1771,29 +1816,39 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn else { access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ; } - BLI_assert(used_slot >= 0 && used_slot < MTL_MAX_TEXTURE_SLOTS); /* Writeable image targets only assigned to Fragment and compute shaders. */ - MSLTextureSampler msl_tex(ShaderStage::FRAGMENT | ShaderStage::COMPUTE, - res.image.type, - res.image.name, - access, - used_slot); - texture_samplers.append(msl_tex); + MSLTextureResource msl_image; + msl_image.stage = ShaderStage::FRAGMENT | ShaderStage::COMPUTE; + msl_image.type = res.image.type; + msl_image.name = res.image.name; + msl_image.access = access; + msl_image.slot = texture_slot_id++; + msl_image.location = (create_info_->auto_resource_location_) ? msl_image.slot : res.slot; + msl_image.is_texture_sampler = false; + BLI_assert(msl_image.slot < MTL_MAX_TEXTURE_SLOTS); + + texture_samplers.append(msl_image); + max_tex_bind_index = max_ii(max_tex_bind_index, msl_image.slot); } break; case shader::ShaderCreateInfo::Resource::BindType::UNIFORM_BUFFER: { - MSLUniformBlock ubo; + MSLBufferBlock ubo; BLI_assert(res.uniformbuf.type_name.size() > 0); BLI_assert(res.uniformbuf.name.size() > 0); int64_t array_offset = res.uniformbuf.name.find_first_of("["); - /* UBO should either use an existing declared UBO bind slot, or automatically resolve - * index. */ - ubo.slot = (create_info_->auto_resource_location_) ? uniform_blocks.size() : res.slot; - BLI_assert(ubo.slot >= 0 && ubo.slot < MTL_MAX_UNIFORM_BUFFER_BINDINGS); - max_ubo_slot = max_ii(max_ubo_slot, ubo.slot); + /* We maintain two bind indices. "Slot" refers to the storage index buffer(N) in which + * we will bind the resource. "Location" refers to the explicit bind index specified + * in ShaderCreateInfo. + * NOTE: ubo.slot is offset by one, as first UBO slot is reserved for push constant data. + */ + ubo.slot = 1 + (ubo_buffer_slot_id_++); + ubo.location = (create_info_->auto_resource_location_) ? ubo.slot : res.slot; + BLI_assert(ubo.location >= 0 && ubo.location < MTL_MAX_BUFFER_BINDINGS); + + ubo.qualifiers = shader::Qualifier::READ; ubo.type_name = res.uniformbuf.type_name; ubo.is_array = (array_offset > -1); if (ubo.is_array) { @@ -1809,13 +1864,40 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn } break; case shader::ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: { - /* TODO(Metal): Support shader storage buffer in Metal. - * Pending compute support. */ + MSLBufferBlock ssbo; + BLI_assert(res.storagebuf.type_name.size() > 0); + BLI_assert(res.storagebuf.name.size() > 0); + int64_t array_offset = res.storagebuf.name.find_first_of("["); + + /* We maintain two bind indices. "Slot" refers to the storage index buffer(N) in which + * we will bind the resource. "Location" refers to the explicit bind index specified + * in ShaderCreateInfo. */ + ssbo.slot = storage_buffer_slot_id_++; + ssbo.location = (create_info_->auto_resource_location_) ? ssbo.slot : res.slot; + + BLI_assert(ssbo.location >= 0 && ssbo.location < MTL_MAX_BUFFER_BINDINGS); + + ssbo.qualifiers = res.storagebuf.qualifiers; + ssbo.type_name = res.storagebuf.type_name; + ssbo.is_array = (array_offset > -1); + if (ssbo.is_array) { + /* If is array UBO, strip out array tag from name. */ + StringRef name_no_array = StringRef(res.storagebuf.name.c_str(), array_offset); + ssbo.name = name_no_array; + } + else { + ssbo.name = res.storagebuf.name; + } + ssbo.stage = ShaderStage::ANY; + storage_blocks.append(ssbo); } break; } } } + /* Assign maximum buffer. */ + max_buffer_slot = storage_buffer_slot_id_ + ubo_buffer_slot_id_ + 1; + /** Vertex Inputs. */ bool all_attr_location_assigned = true; for (const ShaderCreateInfo::VertIn &attr : info->vertex_inputs_) { @@ -1865,10 +1947,28 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn bool MSLGeneratorInterface::use_argument_buffer_for_samplers() const { - /* We can only use argument buffers IF sampler count exceeds static limit of 16, - * AND we can support more samplers with an argument buffer. - * NOTE: We reserve one constant sampler within the shader for fast read via point-sampling. */ - return texture_samplers.size() >= 15 && GPU_max_samplers() > 16; + /* We can only use argument buffers IF highest sampler index exceeds static limit of 16, + * AND we can support more samplers with an argument buffer. */ + bool use_argument_buffer = (texture_samplers.size() >= 15 || max_tex_bind_index >= 14) && + GPU_max_samplers() > 15; + +#ifndef NDEBUG + /* Due to explicit bind location support, we may be below the sampler limit, but forced to offset + * bindings due to the range being high. Introduce debug check here to issue warning. In these + * cases, if explicit bind location support is not required, best to use auto_resource_location + * to optimize bind point packing. */ + if (use_argument_buffer && texture_samplers.size() < 15) { + MTL_LOG_WARNING( + "Compiled Shader '%s' is falling back to bindless via argument buffers due to having a " + "texture sampler of Index: %u Which exceeds the limit of 15+1. However shader only uses " + "%d textures. Consider optimising bind points with .auto_resource_location(true).\n", + parent_shader_.name_get(), + max_tex_bind_index, + (int)texture_samplers.size()); + } +#endif + + return use_argument_buffer; } uint32_t MSLGeneratorInterface::num_samplers_for_stage(ShaderStage stage) const @@ -1878,6 +1978,13 @@ uint32_t MSLGeneratorInterface::num_samplers_for_stage(ShaderStage stage) const return texture_samplers.size(); } +uint32_t MSLGeneratorInterface::max_sampler_index_for_stage(ShaderStage stage) const +{ + /* NOTE: Sampler bindings and argument buffer shared across stages, + * in case stages share texture/sampler bindings. */ + return max_tex_bind_index; +} + uint32_t MSLGeneratorInterface::get_sampler_argument_buffer_bind_index(ShaderStage stage) { /* Note: Shader stage must be a singular index. Compound shader masks are not valid for this @@ -1889,7 +1996,7 @@ uint32_t MSLGeneratorInterface::get_sampler_argument_buffer_bind_index(ShaderSta } /* Sampler argument buffer to follow UBOs and PushConstantBlock. */ - sampler_argument_buffer_bind_index[get_shader_stage_index(stage)] = (max_ubo_slot + 2); + sampler_argument_buffer_bind_index[get_shader_stage_index(stage)] = (max_buffer_slot + 1); return sampler_argument_buffer_bind_index[get_shader_stage_index(stage)]; } @@ -2014,6 +2121,11 @@ std::string MSLGeneratorInterface::generate_msl_fragment_entry_stub() /* Undefine uniform mappings to avoid name collisions. */ out << generate_msl_uniform_undefs(ShaderStage::FRAGMENT); + /* Early fragment tests. */ + if (uses_early_fragment_test) { + out << "[[early_fragment_tests]]" << std::endl; + } + /* Generate function entry point signature w/ resource bindings and inputs. */ #ifndef NDEBUG out << "fragment " << get_stage_class_name(ShaderStage::FRAGMENT) @@ -2138,7 +2250,6 @@ std::string MSLGeneratorInterface::generate_msl_compute_entry_stub() out << this->generate_msl_texture_vars(ShaderStage::COMPUTE); out << this->generate_msl_global_uniform_population(ShaderStage::COMPUTE); out << this->generate_msl_uniform_block_population(ShaderStage::COMPUTE); - /* TODO(Metal): SSBO Population. */ /* Execute original 'main' function within class scope. */ out << "\t/* Execute Compute main function */\t" << std::endl @@ -2170,10 +2281,10 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream stage == ShaderStage::COMPUTE); /* Generate texture signatures for textures used by this stage. */ BLI_assert(this->texture_samplers.size() <= GPU_max_textures_vert()); - for (const MSLTextureSampler &tex : this->texture_samplers) { + for (const MSLTextureResource &tex : this->texture_samplers) { if (bool(tex.stage & stage)) { out << parameter_delimiter(is_first_parameter) << "\n\t" << tex.get_msl_typestring(false) - << " [[texture(" << tex.location << ")]]"; + << " [[texture(" << tex.slot << ")]]"; } } @@ -2191,10 +2302,10 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream /* Maximum Limit of samplers defined in the function argument table is * `MTL_MAX_DEFAULT_SAMPLERS=16`. */ BLI_assert(this->texture_samplers.size() <= MTL_MAX_DEFAULT_SAMPLERS); - for (const MSLTextureSampler &tex : this->texture_samplers) { + for (const MSLTextureResource &tex : this->texture_samplers) { if (bool(tex.stage & stage)) { out << parameter_delimiter(is_first_parameter) << "\n\tsampler " << tex.name - << "_sampler [[sampler(" << tex.location << ")]]"; + << "_sampler [[sampler(" << tex.slot << ")]]"; } } @@ -2211,7 +2322,7 @@ void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream ShaderStage stage, bool &is_first_parameter) { - for (const MSLUniformBlock &ubo : this->uniform_blocks) { + for (const MSLBufferBlock &ubo : this->uniform_blocks) { if (bool(ubo.stage & stage)) { /* For literal/existing global types, we do not need the class name-space accessor. */ out << parameter_delimiter(is_first_parameter) << "\n\tconstant "; @@ -2223,7 +2334,27 @@ void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream * MTL_uniform_buffer_base_index is an offset depending on the number of unique VBOs * bound for the current PSO specialization. */ out << ubo.type_name << "* " << ubo.name << "[[buffer(MTL_uniform_buffer_base_index+" - << (ubo.slot + 1) << ")]]"; + << ubo.slot << ")]]"; + } + } + + /* Storage buffers. */ + for (const MSLBufferBlock &ssbo : this->storage_blocks) { + if (bool(ssbo.stage & stage)) { + /* For literal/existing global types, we do not need the class name-space accessor. */ + bool writeable = (ssbo.qualifiers & shader::Qualifier::WRITE) == shader::Qualifier::WRITE; + const char *memory_scope = ((writeable) ? "device " : "constant "); + out << parameter_delimiter(is_first_parameter) << "\n\t" << memory_scope; + if (!is_builtin_type(ssbo.type_name)) { + out << get_stage_class_name(stage) << "::"; + } + /* #StorageBuffer bind indices start at `MTL_storage_buffer_base_index`. + * MTL_storage_buffer_base_index follows immediately after all uniform blocks. + * such that MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + + * uniform_blocks.size() + 1. Where the additional buffer is reserved for the + * #PushConstantBlock (push constants). */ + out << ssbo.type_name << "* " << ssbo.name << "[[buffer(MTL_storage_buffer_base_index+" + << (ssbo.slot) << ")]]"; } } } @@ -2411,9 +2542,13 @@ std::string MSLGeneratorInterface::generate_msl_uniform_undefs(ShaderStage shade out << "#undef " << uniform.name << std::endl; } /* UBO block undef. */ - for (const MSLUniformBlock &ubo : this->uniform_blocks) { + for (const MSLBufferBlock &ubo : this->uniform_blocks) { out << "#undef " << ubo.name << std::endl; } + /* SSBO block undef. */ + for (const MSLBufferBlock &ssbo : this->storage_blocks) { + out << "#undef " << ssbo.name << std::endl; + } return out.str(); } @@ -2698,7 +2833,7 @@ std::string MSLGeneratorInterface::generate_msl_uniform_block_population(ShaderS /* Populate Global Uniforms. */ std::stringstream out; out << "\t/* Copy UBO block references into local class variables */" << std::endl; - for (const MSLUniformBlock &ubo : this->uniform_blocks) { + for (const MSLBufferBlock &ubo : this->uniform_blocks) { /* Only include blocks which are used within this stage. */ if (bool(ubo.stage & stage)) { @@ -2714,6 +2849,26 @@ std::string MSLGeneratorInterface::generate_msl_uniform_block_population(ShaderS out << " = " << ubo.name << ";" << std::endl; } } + + /* Populate storage buffer references. */ + out << "\t/* Copy SSBO block references into local class variables */" << std::endl; + for (const MSLBufferBlock &ssbo : this->storage_blocks) { + + /* Only include blocks which are used within this stage. */ + if (bool(ssbo.stage & stage)) { + /* Generate UBO reference assignment. + * NOTE(Metal): We append `_local` post-fix onto the class member name + * for the ubo to avoid name collision with the UBO accessor macro. + * We only need to add this post-fix for the non-array access variant, + * as the array is indexed directly, rather than requiring a dereference. */ + out << "\t" << get_shader_stage_instance_name(stage) << "." << ssbo.name; + if (!ssbo.is_array) { + out << "_local"; + } + out << " = " << ssbo.name << ";" << std::endl; + } + } + out << std::endl; return out.str(); } @@ -3080,7 +3235,7 @@ std::string MSLGeneratorInterface::generate_msl_texture_vars(ShaderStage shader_ if (this->use_argument_buffer_for_samplers()) { out << "\t" << get_shader_stage_instance_name(shader_stage) << "." << this->texture_samplers[i].name << ".samp = &samplers.sampler_args[" - << this->texture_samplers[i].location << "];" << std::endl; + << this->texture_samplers[i].slot << "];" << std::endl; } else { out << "\t" << get_shader_stage_instance_name(shader_stage) << "." @@ -3302,20 +3457,36 @@ MTLShaderInterface *MSLGeneratorInterface::bake_shader_interface(const char *nam name_buffer_size, name_buffer_offset), this->uniform_blocks[uniform_block].slot, + this->uniform_blocks[uniform_block].location, 0, this->uniform_blocks[uniform_block].stage); } + /* Prepare Interface Storage Blocks. */ + for (int storage_block = 0; storage_block < this->storage_blocks.size(); storage_block++) { + interface->add_storage_block( + name_buffer_copystr(&interface->name_buffer_, + this->storage_blocks[storage_block].name.c_str(), + name_buffer_size, + name_buffer_offset), + this->storage_blocks[storage_block].slot, + this->storage_blocks[storage_block].location, + 0, + this->storage_blocks[storage_block].stage); + } + /* Texture/sampler bindings to interface. */ - for (const MSLTextureSampler &texture_sampler : this->texture_samplers) { + for (const MSLTextureResource &input_texture : this->texture_samplers) { interface->add_texture(name_buffer_copystr(&interface->name_buffer_, - texture_sampler.name.c_str(), + input_texture.name.c_str(), name_buffer_size, name_buffer_offset), - texture_sampler.location, - texture_sampler.get_texture_binding_type(), - texture_sampler.get_sampler_format(), - texture_sampler.stage); + input_texture.slot, + input_texture.location, + input_texture.get_texture_binding_type(), + input_texture.get_sampler_format(), + input_texture.is_texture_sampler, + input_texture.stage); } /* Sampler Parameters. */ @@ -3336,7 +3507,7 @@ MTLShaderInterface *MSLGeneratorInterface::bake_shader_interface(const char *nam return interface; } -std::string MSLTextureSampler::get_msl_texture_type_str() const +std::string MSLTextureResource::get_msl_texture_type_str() const { /* Add Types as needed. */ switch (this->type) { @@ -3444,7 +3615,7 @@ std::string MSLTextureSampler::get_msl_texture_type_str() const }; } -std::string MSLTextureSampler::get_msl_wrapper_type_str() const +std::string MSLTextureResource::get_msl_wrapper_type_str() const { /* Add Types as needed. */ switch (this->type) { @@ -3552,7 +3723,7 @@ std::string MSLTextureSampler::get_msl_wrapper_type_str() const }; } -std::string MSLTextureSampler::get_msl_return_type_str() const +std::string MSLTextureResource::get_msl_return_type_str() const { /* Add Types as needed */ switch (this->type) { @@ -3607,7 +3778,7 @@ std::string MSLTextureSampler::get_msl_return_type_str() const }; } -eGPUTextureType MSLTextureSampler::get_texture_binding_type() const +eGPUTextureType MSLTextureResource::get_texture_binding_type() const { /* Add Types as needed */ switch (this->type) { @@ -3714,7 +3885,7 @@ eGPUTextureType MSLTextureSampler::get_texture_binding_type() const }; } -eGPUSamplerFormat MSLTextureSampler::get_sampler_format() const +eGPUSamplerFormat MSLTextureResource::get_sampler_format() const { switch (this->type) { case ImageType::FLOAT_BUFFER: diff --git a/source/blender/gpu/metal/mtl_shader_interface.hh b/source/blender/gpu/metal/mtl_shader_interface.hh index da9489685b8..e43254d262f 100644 --- a/source/blender/gpu/metal/mtl_shader_interface.hh +++ b/source/blender/gpu/metal/mtl_shader_interface.hh @@ -107,12 +107,13 @@ struct MTLShaderInputAttribute { uint32_t matrix_element_count; }; -struct MTLShaderUniformBlock { +struct MTLShaderBufferBlock { uint32_t name_offset; uint32_t size = 0; /* Buffer resource bind index in shader `[[buffer(index)]]`. */ uint32_t buffer_index; - + /* Explicit bind location for texture. */ + int location; /* Tracking for manual uniform addition. */ uint32_t current_offset; ShaderStage stage_mask; @@ -120,7 +121,7 @@ struct MTLShaderUniformBlock { struct MTLShaderUniform { uint32_t name_offset; - /* Index of `MTLShaderUniformBlock` this uniform belongs to. */ + /* Index of `MTLShaderBufferBlock` this uniform belongs to. */ uint32_t size_in_bytes; uint32_t byte_offset; eMTLDataType type; @@ -132,9 +133,13 @@ struct MTLShaderTexture { uint32_t name_offset; /* Texture resource bind slot in shader `[[texture(n)]]`. */ int slot_index; + /* Explicit bind location for texture. */ + int location; eGPUTextureType type; eGPUSamplerFormat sampler_format; ShaderStage stage_mask; + /* Whether texture resource is expected to be image or sampler. */ + bool is_texture_sampler; }; struct MTLShaderSampler { @@ -173,8 +178,13 @@ class MTLShaderInterface : public ShaderInterface { /* Uniform Blocks. */ uint32_t total_uniform_blocks_; uint32_t max_uniformbuf_index_; - MTLShaderUniformBlock ubos_[MTL_MAX_UNIFORM_BUFFER_BINDINGS]; - MTLShaderUniformBlock push_constant_block_; + MTLShaderBufferBlock ubos_[MTL_MAX_BUFFER_BINDINGS]; + MTLShaderBufferBlock push_constant_block_; + + /* Storage blocks. */ + uint32_t total_storage_blocks_; + uint32_t max_storagebuf_index_; + MTLShaderBufferBlock ssbos_[MTL_MAX_BUFFER_BINDINGS]; /* Textures. */ /* Textures support explicit binding indices, so some texture slots @@ -207,13 +217,21 @@ class MTLShaderInterface : public ShaderInterface { int matrix_element_count = 1); uint32_t add_uniform_block(uint32_t name_offset, uint32_t buffer_index, + uint32_t location, + uint32_t size, + ShaderStage stage_mask = ShaderStage::ANY); + uint32_t add_storage_block(uint32_t name_offset, + uint32_t buffer_index, + uint32_t location, uint32_t size, ShaderStage stage_mask = ShaderStage::ANY); void add_uniform(uint32_t name_offset, eMTLDataType type, int array_len = 1); void add_texture(uint32_t name_offset, uint32_t texture_slot, + uint32_t location, eGPUTextureType tex_binding_type, eGPUSamplerFormat sampler_format, + bool is_texture_sampler, ShaderStage stage_mask = ShaderStage::FRAGMENT); void add_push_constant_block(uint32_t name_offset); @@ -232,14 +250,20 @@ class MTLShaderInterface : public ShaderInterface { uint32_t get_total_uniforms() const; /* Fetch Uniform Blocks. */ - const MTLShaderUniformBlock &get_uniform_block(uint index) const; + const MTLShaderBufferBlock &get_uniform_block(uint index) const; uint32_t get_total_uniform_blocks() const; - uint32_t get_max_ubo_index() const; bool has_uniform_block(uint32_t block_index) const; uint32_t get_uniform_block_size(uint32_t block_index) const; + /* Fetch Storage Blocks. */ + const MTLShaderBufferBlock &get_storage_block(uint index) const; + uint32_t get_total_storage_blocks() const; + bool has_storage_block(uint32_t block_index) const; + uint32_t get_storage_block_size(uint32_t block_index) const; + /* Push constant uniform data block should always be available. */ - const MTLShaderUniformBlock &get_push_constant_block() const; + const MTLShaderBufferBlock &get_push_constant_block() const; + uint32_t get_max_buffer_index() const; /* Fetch textures. */ const MTLShaderTexture &get_texture(uint index) const; diff --git a/source/blender/gpu/metal/mtl_shader_interface.mm b/source/blender/gpu/metal/mtl_shader_interface.mm index 93312f977a3..4427089d366 100644 --- a/source/blender/gpu/metal/mtl_shader_interface.mm +++ b/source/blender/gpu/metal/mtl_shader_interface.mm @@ -56,6 +56,8 @@ void MTLShaderInterface::init() total_attributes_ = 0; total_uniform_blocks_ = 0; max_uniformbuf_index_ = 0; + total_storage_blocks_ = 0; + max_storagebuf_index_ = 0; total_uniforms_ = 0; total_textures_ = 0; max_texture_index_ = -1; @@ -109,6 +111,7 @@ void MTLShaderInterface::add_input_attribute(uint32_t name_offset, uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset, uint32_t buffer_index, + uint32_t location, uint32_t size, ShaderStage stage_mask) { @@ -117,9 +120,12 @@ uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset, size += 16 - (size % 16); } - MTLShaderUniformBlock &uni_block = ubos_[total_uniform_blocks_]; + BLI_assert(buffer_index < MTL_MAX_BUFFER_BINDINGS); + + MTLShaderBufferBlock &uni_block = ubos_[total_uniform_blocks_]; uni_block.name_offset = name_offset; uni_block.buffer_index = buffer_index; + uni_block.location = location; uni_block.size = size; uni_block.current_offset = 0; uni_block.stage_mask = ShaderStage::ANY; @@ -127,6 +133,30 @@ uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset, return (total_uniform_blocks_++); } +uint32_t MTLShaderInterface::add_storage_block(uint32_t name_offset, + uint32_t buffer_index, + uint32_t location, + uint32_t size, + ShaderStage stage_mask) +{ + /* Ensure Size is 16 byte aligned to guarantees alignment rules are satisfied. */ + if ((size % 16) != 0) { + size += 16 - (size % 16); + } + + BLI_assert(buffer_index < MTL_MAX_BUFFER_BINDINGS); + + MTLShaderBufferBlock &ssbo_block = ssbos_[total_storage_blocks_]; + ssbo_block.name_offset = name_offset; + ssbo_block.buffer_index = buffer_index; + ssbo_block.location = location; + ssbo_block.size = size; + ssbo_block.current_offset = 0; + ssbo_block.stage_mask = ShaderStage::ANY; + max_storagebuf_index_ = max_ii(max_storagebuf_index_, buffer_index); + return (total_storage_blocks_++); +} + void MTLShaderInterface::add_push_constant_block(uint32_t name_offset) { push_constant_block_.name_offset = name_offset; @@ -189,8 +219,10 @@ void MTLShaderInterface::add_uniform(uint32_t name_offset, eMTLDataType type, in void MTLShaderInterface::add_texture(uint32_t name_offset, uint32_t texture_slot, + uint32_t location, eGPUTextureType tex_binding_type, eGPUSamplerFormat sampler_format, + bool is_texture_sampler, ShaderStage stage_mask) { BLI_assert(texture_slot >= 0 && texture_slot < GPU_max_textures()); @@ -201,8 +233,10 @@ void MTLShaderInterface::add_texture(uint32_t name_offset, BLI_assert_msg(tex.used == false, "Texture slot already in-use by another binding"); tex.name_offset = name_offset; tex.slot_index = texture_slot; + tex.location = location; tex.type = tex_binding_type; tex.sampler_format = sampler_format; + tex.is_texture_sampler = is_texture_sampler; tex.stage_mask = stage_mask; tex.used = true; total_textures_++; @@ -272,9 +306,7 @@ void MTLShaderInterface::prepare_common_shader_inputs() attr_len_ = this->get_total_attributes(); ubo_len_ = this->get_total_uniform_blocks(); uniform_len_ = this->get_total_uniforms() + this->get_total_textures(); - - /* TODO(Metal): Support storage buffer bindings. Pending compute shader support. */ - ssbo_len_ = 0; + ssbo_len_ = this->get_total_storage_blocks(); /* Calculate total inputs and allocate #ShaderInput array. */ /* NOTE: We use the existing `name_buffer_` allocated for internal input structs. */ @@ -300,16 +332,17 @@ void MTLShaderInterface::prepare_common_shader_inputs() BLI_assert(&inputs_[attr_len_] >= current_input); current_input = &inputs_[attr_len_]; for (const int ubo_index : IndexRange(total_uniform_blocks_)) { - MTLShaderUniformBlock &shd_ubo = ubos_[ubo_index]; + MTLShaderBufferBlock &shd_ubo = ubos_[ubo_index]; current_input->name_offset = shd_ubo.name_offset; current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ubo.name_offset)); /* Location refers to the index in the ubos_ array. */ - current_input->location = ubo_index; + current_input->location = shd_ubo.location; /* Binding location refers to the UBO bind slot in * #MTLContextGlobalShaderPipelineState::ubo_bindings. The buffer bind index [[buffer(N)]] * within the shader will apply an offset for bound vertex buffers and the default uniform - * PushConstantBlock. */ - current_input->binding = shd_ubo.buffer_index; + * PushConstantBlock. + * see `mtl_shader_generator.hh` for buffer binding table breakdown. */ + current_input->binding = shd_ubo.location; current_input++; } @@ -352,15 +385,24 @@ void MTLShaderInterface::prepare_common_shader_inputs() current_input->location = texture_index + total_uniforms_; /* Binding represents texture slot `[[texture(n)]]`. */ - current_input->binding = shd_tex.slot_index; + current_input->binding = shd_tex.location; current_input++; } } - /* SSBO bindings. - * TODO(Metal): Support SSBOs. Pending compute support. */ + /* SSBO bindings. */ BLI_assert(&inputs_[attr_len_ + ubo_len_ + uniform_len_] >= current_input); current_input = &inputs_[attr_len_ + ubo_len_ + uniform_len_]; + BLI_assert(ssbo_len_ >= total_storage_blocks_); + for (const int ssbo_index : IndexRange(total_storage_blocks_)) { + MTLShaderBufferBlock &shd_ssbo = ssbos_[ssbo_index]; + current_input->name_offset = shd_ssbo.name_offset; + current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ssbo.name_offset)); + /* `Location` is used as the returned explicit bind index for SSBOs. */ + current_input->location = shd_ssbo.location; + current_input->binding = shd_ssbo.location; + current_input++; + } this->sort_inputs(); @@ -419,14 +461,14 @@ uint32_t MTLShaderInterface::get_total_uniforms() const } /* Uniform Blocks. */ -const MTLShaderUniformBlock &MTLShaderInterface::get_uniform_block(uint index) const +const MTLShaderBufferBlock &MTLShaderInterface::get_uniform_block(uint index) const { - BLI_assert(index < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + BLI_assert(index < MTL_MAX_BUFFER_BINDINGS); BLI_assert(index < get_total_uniform_blocks()); return ubos_[index]; } -const MTLShaderUniformBlock &MTLShaderInterface::get_push_constant_block() const +const MTLShaderBufferBlock &MTLShaderInterface::get_push_constant_block() const { return push_constant_block_; } @@ -436,11 +478,6 @@ uint32_t MTLShaderInterface::get_total_uniform_blocks() const return total_uniform_blocks_; } -uint32_t MTLShaderInterface::get_max_ubo_index() const -{ - return max_uniformbuf_index_; -} - bool MTLShaderInterface::has_uniform_block(uint32_t block_index) const { return (block_index < total_uniform_blocks_); @@ -451,6 +488,35 @@ uint32_t MTLShaderInterface::get_uniform_block_size(uint32_t block_index) const return (block_index < total_uniform_blocks_) ? ubos_[block_index].size : 0; } +/* Storage Blocks. */ +const MTLShaderBufferBlock &MTLShaderInterface::get_storage_block(uint index) const +{ + BLI_assert(index < MTL_MAX_BUFFER_BINDINGS); + BLI_assert(index < get_total_storage_blocks()); + return ssbos_[index]; +} + +uint32_t MTLShaderInterface::get_total_storage_blocks() const +{ + return total_storage_blocks_; +} + +bool MTLShaderInterface::has_storage_block(uint32_t block_index) const +{ + return (block_index < total_storage_blocks_); +} + +uint32_t MTLShaderInterface::get_storage_block_size(uint32_t block_index) const +{ + return (block_index < total_storage_blocks_) ? ssbos_[block_index].size : 0; +} + +uint32_t MTLShaderInterface::get_max_buffer_index() const +{ + /* PushConstantBlock + All uniform blocks + all storage blocks. */ + return 1 + get_total_uniform_blocks() + get_total_storage_blocks(); +} + /* Textures. */ const MTLShaderTexture &MTLShaderInterface::get_texture(uint index) const { diff --git a/source/blender/gpu/metal/mtl_state.mm b/source/blender/gpu/metal/mtl_state.mm index a77d7069b5a..ac0407bc7b0 100644 --- a/source/blender/gpu/metal/mtl_state.mm +++ b/source/blender/gpu/metal/mtl_state.mm @@ -573,7 +573,7 @@ void MTLStateManager::issue_barrier(eGPUBarrier barrier_bits) eGPUStageBarrierBits before_stages = GPU_BARRIER_STAGE_ANY; eGPUStageBarrierBits after_stages = GPU_BARRIER_STAGE_ANY; - MTLContext *ctx = reinterpret_cast(GPU_context_active_get()); + MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); BLI_assert(ctx); ctx->main_command_buffer.insert_memory_barrier(barrier_bits, before_stages, after_stages); @@ -651,7 +651,7 @@ void MTLStateManager::texture_bind(Texture *tex_, GPUSamplerState sampler_type, MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); if (unit >= 0) { - ctx->texture_bind(mtl_tex, unit); + ctx->texture_bind(mtl_tex, unit, false); /* Fetching textures default sampler configuration and applying * eGPUSampler State on top. This path exists to support @@ -670,14 +670,14 @@ void MTLStateManager::texture_unbind(Texture *tex_) gpu::MTLTexture *mtl_tex = static_cast(tex_); BLI_assert(mtl_tex); MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); - ctx->texture_unbind(mtl_tex); + ctx->texture_unbind(mtl_tex, false); } void MTLStateManager::texture_unbind_all() { MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); BLI_assert(ctx); - ctx->texture_unbind_all(); + ctx->texture_unbind_all(false); } /** \} */ @@ -688,17 +688,30 @@ void MTLStateManager::texture_unbind_all() void MTLStateManager::image_bind(Texture *tex_, int unit) { - this->texture_bind(tex_, GPUSamplerState::default_sampler(), unit); + BLI_assert(tex_); + gpu::MTLTexture *mtl_tex = static_cast(tex_); + BLI_assert(mtl_tex); + + MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); + if (unit >= 0) { + ctx->texture_bind(mtl_tex, unit, true); + } } void MTLStateManager::image_unbind(Texture *tex_) { - this->texture_unbind(tex_); + BLI_assert(tex_); + gpu::MTLTexture *mtl_tex = static_cast(tex_); + BLI_assert(mtl_tex); + MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); + ctx->texture_unbind(mtl_tex, true); } void MTLStateManager::image_unbind_all() { - this->texture_unbind_all(); + MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + ctx->texture_unbind_all(true); } /** \} */ diff --git a/source/blender/gpu/metal/mtl_storage_buffer.hh b/source/blender/gpu/metal/mtl_storage_buffer.hh index 17aabae64f8..175d7b2a26e 100644 --- a/source/blender/gpu/metal/mtl_storage_buffer.hh +++ b/source/blender/gpu/metal/mtl_storage_buffer.hh @@ -4,3 +4,82 @@ /** \file * \ingroup gpu */ + +#pragma once + +#include "MEM_guardedalloc.h" +#include "gpu_storage_buffer_private.hh" + +#include "mtl_context.hh" + +namespace blender { +namespace gpu { + +class MTLUniformBuf; +class MTLVertBuf; +class MTLIndexBuf; + +/** + * Implementation of Storage Buffers using Metal. + */ +class MTLStorageBuf : public StorageBuf { + private: + /** Allocation Handle or indirect wrapped instance. + * MTLStorageBuf can wrap a MTLVertBuf, MTLIndexBuf or MTLUniformBuf for binding as a writeable + * resource. */ + enum { + MTL_STORAGE_BUF_TYPE_DEFAULT = 0, + MTL_STORAGE_BUF_TYPE_UNIFORMBUF = 1, + MTL_STORAGE_BUF_TYPE_VERTBUF = 2, + MTL_STORAGE_BUF_TYPE_INDEXBUF = 3, + } storage_source_ = MTL_STORAGE_BUF_TYPE_DEFAULT; + + union { + /* Own alloation. */ + gpu::MTLBuffer *metal_buffer_; + /* Wrapped type. */ + MTLUniformBuf *uniform_buffer_; + MTLVertBuf *vertex_buffer_; + MTLIndexBuf *index_buffer_; + }; + + /* Whether buffer has contents, if false, no GPU buffer will + * have yet been allocated. */ + bool has_data_ = false; + /** Bind-state tracking. */ + int bind_slot_ = -1; + MTLContext *bound_ctx_ = nullptr; + + /** Usage type. */ + GPUUsageType usage_; + + public: + MTLStorageBuf(size_t size, GPUUsageType usage, const char *name); + ~MTLStorageBuf(); + + MTLStorageBuf(MTLUniformBuf *uniform_buf, size_t size); + MTLStorageBuf(MTLVertBuf *uniform_buf, size_t size); + MTLStorageBuf(MTLIndexBuf *uniform_buf, size_t size); + + void update(const void *data) override; + void bind(int slot) override; + void unbind() override; + void clear(uint32_t clear_value) override; + void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) override; + void read(void *data) override; + + void init(); + + id get_metal_buffer(); + int get_size(); + const char *get_name() + { + return name_; + } + + private: + MEM_CXX_CLASS_ALLOC_FUNCS("MTLStorageBuf"); +}; + +} // namespace gpu +} // namespace blender diff --git a/source/blender/gpu/metal/mtl_storage_buffer.mm b/source/blender/gpu/metal/mtl_storage_buffer.mm index 17aabae64f8..2dfc94cccd9 100644 --- a/source/blender/gpu/metal/mtl_storage_buffer.mm +++ b/source/blender/gpu/metal/mtl_storage_buffer.mm @@ -4,3 +4,302 @@ /** \file * \ingroup gpu */ + +#include "BLI_string.h" + +#include "gpu_backend.hh" +#include "gpu_context_private.hh" + +#include "mtl_backend.hh" +#include "mtl_context.hh" +#include "mtl_debug.hh" +#include "mtl_index_buffer.hh" +#include "mtl_storage_buffer.hh" +#include "mtl_uniform_buffer.hh" +#include "mtl_vertex_buffer.hh" + +namespace blender::gpu { + +/* -------------------------------------------------------------------- */ +/** \name Creation & Deletion + * \{ */ + +MTLStorageBuf::MTLStorageBuf(size_t size, GPUUsageType usage, const char *name) + : StorageBuf(size, name) +{ + usage_ = usage; + /* Do not create SSBO MTL buffer here to allow allocation from any thread. */ + storage_source_ = MTL_STORAGE_BUF_TYPE_DEFAULT; + metal_buffer_ = nullptr; +} + +MTLStorageBuf::MTLStorageBuf(MTLUniformBuf *uniform_buf, size_t size) + : StorageBuf(size, "UniformBuffer_as_SSBO") +{ + usage_ = GPU_USAGE_DYNAMIC; + storage_source_ = MTL_STORAGE_BUF_TYPE_UNIFORMBUF; + uniform_buffer_ = uniform_buf; + BLI_assert(uniform_buffer_ != nullptr); +} + +MTLStorageBuf::MTLStorageBuf(MTLVertBuf *vert_buf, size_t size) + : StorageBuf(size, "VertexBuffer_as_SSBO") +{ + usage_ = GPU_USAGE_DYNAMIC; + storage_source_ = MTL_STORAGE_BUF_TYPE_VERTBUF; + vertex_buffer_ = vert_buf; + BLI_assert(vertex_buffer_ != nullptr); +} + +MTLStorageBuf::MTLStorageBuf(MTLIndexBuf *index_buf, size_t size) + : StorageBuf(size, "IndexBuffer_as_SSBO") +{ + usage_ = GPU_USAGE_DYNAMIC; + storage_source_ = MTL_STORAGE_BUF_TYPE_INDEXBUF; + index_buffer_ = index_buf; + BLI_assert(index_buffer_ != nullptr); +} + +MTLStorageBuf::~MTLStorageBuf() +{ + if (storage_source_ == MTL_STORAGE_BUF_TYPE_DEFAULT) { + if (metal_buffer_ != nullptr) { + metal_buffer_->free(); + metal_buffer_ = nullptr; + } + has_data_ = false; + } + + /* Ensure SSBO is not bound to active CTX. + * SSBO bindings are reset upon Context-switch so we do not need + * to check deactivated context's. */ + MTLContext *ctx = MTLContext::get(); + if (ctx) { + for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) { + MTLStorageBufferBinding &slot = ctx->pipeline_state.ssbo_bindings[i]; + if (slot.bound && slot.ssbo == this) { + slot.bound = false; + slot.ssbo = nullptr; + } + } + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Data upload / update + * \{ */ + +void MTLStorageBuf::init() +{ + /* We only need to initialize the storage buffer for default buffer types. */ + if (storage_source_ != MTL_STORAGE_BUF_TYPE_DEFAULT) { + return; + } + BLI_assert(this); + BLI_assert(size_in_bytes_ > 0); + + /* Allocate MTL buffer */ + MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + BLI_assert(ctx->device); + UNUSED_VARS_NDEBUG(ctx); + + metal_buffer_ = MTLContext::get_global_memory_manager()->allocate(size_in_bytes_, true); + +#ifndef NDEBUG + metal_buffer_->set_label([NSString stringWithFormat:@"Storage Buffer %s", name_]); +#endif + BLI_assert(metal_buffer_ != nullptr); + BLI_assert(metal_buffer_->get_metal_buffer() != nil); + + has_data_ = false; +} + +void MTLStorageBuf::update(const void *data) +{ + /* We only need to initialize the storage buffer for default buffer types. */ + if (storage_source_ != MTL_STORAGE_BUF_TYPE_DEFAULT) { + return; + } + + /* Ensure buffer has been allocated. */ + if (metal_buffer_ == nullptr) { + init(); + } + + BLI_assert(data != nullptr); + if (data != nullptr) { + /* Upload data. */ + BLI_assert(data != nullptr); + BLI_assert(!(metal_buffer_->get_resource_options() & MTLResourceStorageModePrivate)); + BLI_assert(size_in_bytes_ <= metal_buffer_->get_size()); + BLI_assert(size_in_bytes_ <= [metal_buffer_->get_metal_buffer() length]); + memcpy(metal_buffer_->get_host_ptr(), data, size_in_bytes_); + metal_buffer_->flush_range(0, size_in_bytes_); + has_data_ = true; + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Usage + * \{ */ + +void MTLStorageBuf::bind(int slot) +{ + if (slot >= MTL_MAX_BUFFER_BINDINGS) { + fprintf( + stderr, + "Error: Trying to bind \"%s\" ssbo to slot %d which is above the reported limit of %d.\n", + name_, + slot, + MTL_MAX_BUFFER_BINDINGS); + BLI_assert(false); + return; + } + + if (metal_buffer_ == nullptr) { + this->init(); + } + + if (data_ != nullptr) { + this->update(data_); + MEM_SAFE_FREE(data_); + } + + /* Bind current UBO to active context. */ + MTLContext *ctx = MTLContext::get(); + BLI_assert(ctx); + + MTLStorageBufferBinding &ctx_ssbo_bind_slot = ctx->pipeline_state.ssbo_bindings[slot]; + ctx_ssbo_bind_slot.ssbo = this; + ctx_ssbo_bind_slot.bound = true; + + bind_slot_ = slot; + bound_ctx_ = ctx; +} + +void MTLStorageBuf::unbind() +{ + /* Unbind in debug mode to validate missing binds. + * Otherwise, only perform a full unbind upon destruction + * to ensure no lingering references. */ +#ifndef NDEBUG + if (true) { +#else + if (G.debug & G_DEBUG_GPU) { +#endif + if (bound_ctx_ != nullptr && bind_slot_ > -1) { + MTLStorageBufferBinding &ctx_ssbo_bind_slot = + bound_ctx_->pipeline_state.ssbo_bindings[bind_slot_]; + if (ctx_ssbo_bind_slot.bound && ctx_ssbo_bind_slot.ssbo == this) { + ctx_ssbo_bind_slot.bound = false; + ctx_ssbo_bind_slot.ssbo = nullptr; + } + } + } + + /* Reset bind index. */ + bind_slot_ = -1; + bound_ctx_ = nullptr; +} + +void MTLStorageBuf::clear(uint32_t clear_value) +{ + /* Fetch active context. */ + MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); + BLI_assert_msg(ctx, "Clears should always be performed while a valid context exists."); + + if (metal_buffer_ == nullptr) { + this->init(); + } + + if (ctx) { + id blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder(); + [blit_encoder fillBuffer:metal_buffer_->get_metal_buffer() + range:NSMakeRange(0, size_in_bytes_) + value:clear_value]; + } +} + +void MTLStorageBuf::copy_sub(VertBuf *src_, uint dst_offset, uint src_offset, uint copy_size) +{ + /* TODO(Metal): Support Copy sub operation. */ + MTL_LOG_WARNING("MTLStorageBuf::copy_sub not yet supported.\n"); +} + +void MTLStorageBuf::read(void *data) +{ + if (data == nullptr) { + return; + } + + if (metal_buffer_ == nullptr) { + this->init(); + } + + /* Managed buffers need to be explicitly flushed back to host. */ + if (metal_buffer_->get_resource_options() & MTLResourceStorageModeManaged) { + /* Fetch active context. */ + MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + + /* Ensure GPU updates are flushed back to CPU. */ + id blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder(); + [blit_encoder synchronizeResource:metal_buffer_->get_metal_buffer()]; + + /* Ensure sync has occured. */ + GPU_finish(); + } + + /* Read data. NOTE: Unless explicitly synchronized with GPU work, results may not be ready. */ + memcpy(data, metal_buffer_->get_host_ptr(), size_in_bytes_); +} + +id MTLStorageBuf::get_metal_buffer() +{ + + gpu::MTLBuffer *source_buffer = nullptr; + switch (storage_source_) { + /* Default SSBO buffer comes from own allocation. */ + case MTL_STORAGE_BUF_TYPE_DEFAULT: { + if (metal_buffer_ == nullptr) { + this->init(); + } + + if (data_ != nullptr) { + this->update(data_); + MEM_SAFE_FREE(data_); + } + source_buffer = metal_buffer_; + } break; + /* SSBO buffer comes from Uniform Buffer. */ + case MTL_STORAGE_BUF_TYPE_UNIFORMBUF: { + source_buffer = uniform_buffer_->metal_buffer_; + } break; + /* SSBO buffer comes from Vertex Buffer. */ + case MTL_STORAGE_BUF_TYPE_VERTBUF: { + source_buffer = vertex_buffer_->vbo_; + } break; + /* SSBO buffer comes from Index Buffer. */ + case MTL_STORAGE_BUF_TYPE_INDEXBUF: { + source_buffer = index_buffer_->ibo_; + } break; + } + + /* Return Metal allocation handle and flag as used. */ + BLI_assert(source_buffer != nullptr); + source_buffer->debug_ensure_used(); + return source_buffer->get_metal_buffer(); +} + +int MTLStorageBuf::get_size() +{ + BLI_assert(this); + return size_in_bytes_; +} + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_uniform_buffer.hh b/source/blender/gpu/metal/mtl_uniform_buffer.hh index b2250fc28d4..a3766235bbd 100644 --- a/source/blender/gpu/metal/mtl_uniform_buffer.hh +++ b/source/blender/gpu/metal/mtl_uniform_buffer.hh @@ -13,10 +13,14 @@ namespace blender::gpu { +class MTLStorageBuf; + /** * Implementation of Uniform Buffers using Metal. **/ class MTLUniformBuf : public UniformBuf { + friend class MTLStorageBuf; /* For bind as SSBO resource access. */ + private: /* Allocation Handle. */ gpu::MTLBuffer *metal_buffer_ = nullptr; @@ -29,6 +33,9 @@ class MTLUniformBuf : public UniformBuf { int bind_slot_ = -1; MTLContext *bound_ctx_ = nullptr; + /* SSBO wrapper for bind_as_ssbo support. */ + MTLStorageBuf *ssbo_wrapper_ = nullptr; + public: MTLUniformBuf(size_t size, const char *name); ~MTLUniformBuf(); @@ -39,7 +46,7 @@ class MTLUniformBuf : public UniformBuf { void unbind() override; void clear_to_zero() override; - id get_metal_buffer(int *r_offset); + id get_metal_buffer(); int get_size(); const char *get_name() { diff --git a/source/blender/gpu/metal/mtl_uniform_buffer.mm b/source/blender/gpu/metal/mtl_uniform_buffer.mm index 53664581b8c..2082de892dd 100644 --- a/source/blender/gpu/metal/mtl_uniform_buffer.mm +++ b/source/blender/gpu/metal/mtl_uniform_buffer.mm @@ -14,6 +14,7 @@ #include "mtl_backend.hh" #include "mtl_context.hh" #include "mtl_debug.hh" +#include "mtl_storage_buffer.hh" #include "mtl_uniform_buffer.hh" namespace blender::gpu { @@ -33,7 +34,7 @@ MTLUniformBuf::~MTLUniformBuf() * to check deactivated context's. */ MTLContext *ctx = MTLContext::get(); if (ctx) { - for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) { + for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) { MTLUniformBufferBinding &slot = ctx->pipeline_state.ubo_bindings[i]; if (slot.bound && slot.ubo == this) { slot.bound = false; @@ -41,6 +42,11 @@ MTLUniformBuf::~MTLUniformBuf() } } } + + if (ssbo_wrapper_) { + delete ssbo_wrapper_; + ssbo_wrapper_ = nullptr; + } } void MTLUniformBuf::update(const void *data) @@ -67,7 +73,9 @@ void MTLUniformBuf::update(const void *data) size_in_bytes_, true, data); has_data_ = true; - metal_buffer_->set_label(@"Uniform Buffer"); +#ifndef NDEBUG + metal_buffer_->set_label([NSString stringWithFormat:@"Uniform Buffer %s", name_]); +#endif BLI_assert(metal_buffer_ != nullptr); BLI_assert(metal_buffer_->get_metal_buffer() != nil); } @@ -94,7 +102,7 @@ void MTLUniformBuf::bind(int slot) return; } - BLI_assert(slot < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + BLI_assert(slot < MTL_MAX_BUFFER_BINDINGS); /* Bind current UBO to active context. */ MTLContext *ctx = MTLContext::get(); @@ -126,7 +134,25 @@ void MTLUniformBuf::bind_as_ssbo(int slot) return; } - BLI_assert_msg(0, "Not implemented yet"); + /* We need to ensure data is actually allocated if using as an SSBO, as resource may be written + * to. */ + if (metal_buffer_ == nullptr) { + /* Check if we have any deferred data to upload. */ + if (data_ != nullptr) { + this->update(data_); + MEM_SAFE_FREE(data_); + } + else { + this->clear_to_zero(); + } + } + + /* Create MTLStorageBuffer to wrap this resource and use conventional binding. */ + if (ssbo_wrapper_ == nullptr) { + ssbo_wrapper_ = new MTLStorageBuf(this, size_in_bytes_); + } + + ssbo_wrapper_->bind(slot); } void MTLUniformBuf::unbind() @@ -154,19 +180,14 @@ void MTLUniformBuf::unbind() bound_ctx_ = nullptr; } -id MTLUniformBuf::get_metal_buffer(int *r_offset) +id MTLUniformBuf::get_metal_buffer() { BLI_assert(this); - *r_offset = 0; if (metal_buffer_ != nullptr && has_data_) { - *r_offset = 0; metal_buffer_->debug_ensure_used(); return metal_buffer_->get_metal_buffer(); } - else { - *r_offset = 0; - return nil; - } + return nil; } int MTLUniformBuf::get_size() diff --git a/source/blender/gpu/metal/mtl_vertex_buffer.hh b/source/blender/gpu/metal/mtl_vertex_buffer.hh index 056e2062ab1..05a896919b6 100644 --- a/source/blender/gpu/metal/mtl_vertex_buffer.hh +++ b/source/blender/gpu/metal/mtl_vertex_buffer.hh @@ -22,7 +22,8 @@ class MTLVertBuf : public VertBuf { friend class gpu::MTLTexture; /* For buffer texture. */ friend class MTLShader; /* For transform feedback. */ friend class MTLBatch; - friend class MTLContext; /* For transform feedback. */ + friend class MTLContext; /* For transform feedback. */ + friend class MTLStorageBuf; /* For bind as SSBO resource access. */ private: /** Metal buffer allocation. **/ @@ -37,6 +38,8 @@ class MTLVertBuf : public VertBuf { uint64_t alloc_size_ = 0; /** Whether existing allocation has been submitted for use by the GPU. */ bool contents_in_flight_ = false; + /* SSBO wrapper for bind_as_ssbo support. */ + MTLStorageBuf *ssbo_wrapper_ = nullptr; /* Fetch Metal buffer and offset into allocation if necessary. * Access limited to friend classes. */ diff --git a/source/blender/gpu/metal/mtl_vertex_buffer.mm b/source/blender/gpu/metal/mtl_vertex_buffer.mm index 027a8f50281..5ce6909486c 100644 --- a/source/blender/gpu/metal/mtl_vertex_buffer.mm +++ b/source/blender/gpu/metal/mtl_vertex_buffer.mm @@ -5,6 +5,7 @@ */ #include "mtl_vertex_buffer.hh" #include "mtl_debug.hh" +#include "mtl_storage_buffer.hh" namespace blender::gpu { @@ -48,6 +49,11 @@ void MTLVertBuf::release_data() GPU_TEXTURE_FREE_SAFE(buffer_texture_); MEM_SAFE_FREE(data); + + if (ssbo_wrapper_) { + delete ssbo_wrapper_; + ssbo_wrapper_ = nullptr; + } } void MTLVertBuf::duplicate_data(VertBuf *dst_) @@ -253,7 +259,7 @@ void MTLVertBuf::bind() void MTLVertBuf::update_sub(uint start, uint len, const void *data) { /* Fetch and verify active context. */ - MTLContext *ctx = reinterpret_cast(unwrap(GPU_context_active_get())); + MTLContext *ctx = static_cast(unwrap(GPU_context_active_get())); BLI_assert(ctx); BLI_assert(ctx->device); @@ -292,10 +298,16 @@ void MTLVertBuf::update_sub(uint start, uint len, const void *data) void MTLVertBuf::bind_as_ssbo(uint binding) { - /* TODO(Metal): Support binding of buffers as SSBOs. - * Pending overall compute support for Metal backend. */ - MTL_LOG_WARNING("MTLVertBuf::bind_as_ssbo not yet implemented!\n"); this->flag_used(); + + /* Ensure resource is initialized. */ + this->bind(); + + /* Create MTLStorageBuffer to wrap this resource and use conventional binding. */ + if (ssbo_wrapper_ == nullptr) { + ssbo_wrapper_ = new MTLStorageBuf(this, alloc_size_); + } + ssbo_wrapper_->bind(binding); } void MTLVertBuf::bind_as_texture(uint binding) diff --git a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl index 0b96a584701..1c920a32342 100644 --- a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl +++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl @@ -189,13 +189,14 @@ template T atomicXor(device T &mem, T data) return atomic_fetch_xor_explicit((device _atomic *)&mem, data, memory_order_relaxed); } -/* Unblock texture atomic compilation. - * TODO(Metal): This is not correct for global atomic behaviour, but will be safe within a single thread. +/* Unblock texture atomic compilation. + * TODO(Metal): This is not correct for global atomic behaviour, but will be safe within a single + * thread. * We need to re-visit the solution for this use-case and use a 2D texture buffer instead. */ #define imageAtomicMin(tex, coord, data) \ - uint val = _texelFetch_internal(tex, coord, 0).r;\ - _texture_write_internal(tex, coord, uint4((val < data) ? val : data));\ - tex.texture->fence(); + uint val = _texelFetch_internal(tex, coord, 0).r; \ + _texture_write_internal(tex, coord, uint4((val < data) ? val : data)); \ + tex.texture->fence(); /* Used to replace 'out' in function parameters with threadlocal reference * shortened to avoid expanding the glsl source string. */ @@ -1186,17 +1187,16 @@ inline float4 uintBitsToFloat(uint4 f) template T findLSB(T x) { /* ctz returns the number of trailing zeroes. To fetch the index of the LSB, we can also use this - * value as index, however need to filter out the case where the input value is zero to match + * value as index, however we need to filter out the case where the input value is zero to match * GLSL functionality. */ return (x == T(0)) ? T(-1) : T(ctz(x)); } template T findMSB(T x) { - /* clz returns the number of leading zeroes. To fetch the index of the LSB, we can also use this - * value as index when offset by 1. however need to filter out the case where the input value is - * zero to match GLSL functionality. 000000010*/ - return (x == T(0)) ? T(-1) : (clz(T(0)) - clz(x) - T(1)); + /* clz returns the number of leading zeroes. To fetch the index of the MSB, we can also use this + * value as index when offset by 1. */ + return (sizeof(T) * 8) - T(1) - clz(x); } /* Texture size functions. Add texture types as needed. */ diff --git a/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl b/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl index eb09f580391..f2d972ea574 100644 --- a/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl +++ b/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl @@ -15,12 +15,6 @@ #define depthCubeArray samplerCubeArray #define depth2DArrayShadow sampler2DArrayShadow -/* Memory scope and pass by reference types. - * NOTE: These are required by Metal, but are not required in all cases by GLSL. */ -#define device -#define threadgroup -#define OUT(type, name, array_len) out type name[array_len] - /* Backend Functions. */ #define select(A, B, mask) mix(A, B, mask)