Geometry Nodes: add simulation support #104924

Closed
Hans Goudey wants to merge 211 commits from geometry-nodes-simulation into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
30 changed files with 1503 additions and 327 deletions
Showing only changes of commit 319f31b8ae - Show all commits

View File

@ -102,3 +102,6 @@ b5d310b569e07a937798a2d38539cfd290149f1c
# Cleanup: clang-format.
40d4a4cb1a6b4c3c2a486e8f2868f547530e0811
# Code Style: format (with BraceWrapping::AfterControlStatement "MultiLine").
6859bb6e67031765e79e525ae62bf2ebf4df2330

View File

@ -302,6 +302,12 @@ void OneapiDevice::mem_copy_to(device_memory &mem)
<< string_human_readable_size(mem.memory_size()) << ")";
}
/* After getting runtime errors we need to avoid performing oneAPI runtime operations
* because the associated GPU context may be in an invalid state at this point. */
if (have_error()) {
return;
}
if (mem.type == MEM_GLOBAL) {
global_free(mem);
global_alloc(mem);
@ -334,6 +340,12 @@ void OneapiDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t
<< " data " << size << " bytes";
}
/* After getting runtime errors we need to avoid performing oneAPI runtime operations
* because the associated GPU context may be in an invalid state at this point. */
if (have_error()) {
return;
}
assert(device_queue_);
assert(size != 0);
@ -357,6 +369,12 @@ void OneapiDevice::mem_zero(device_memory &mem)
<< string_human_readable_size(mem.memory_size()) << ")\n";
}
/* After getting runtime errors we need to avoid performing oneAPI runtime operations
* because the associated GPU context may be in an invalid state at this point. */
if (have_error()) {
return;
}
if (!mem.device_pointer) {
mem_alloc(mem);
}
@ -602,33 +620,33 @@ bool OneapiDevice::usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t n
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
OneapiDevice::check_usm(queue_, dest, true);
OneapiDevice::check_usm(queue_, src, true);
sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
# ifdef WITH_CYCLES_DEBUG
try {
sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
# ifdef WITH_CYCLES_DEBUG
/* NOTE(@nsirgien) Waiting on memory operation may give more precise error
* messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
*/
mem_event.wait_and_throw();
return true;
# else
sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
bool from_device_to_host = dest_type == sycl::usm::alloc::host &&
src_type == sycl::usm::alloc::device;
bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown ||
src_type == sycl::usm::alloc::unknown;
/* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host
* may not wait until the end of the transfer before using the memory.
*/
if (from_device_to_host || host_or_device_memop_with_offset)
mem_event.wait();
return true;
# endif
}
catch (sycl::exception const &e) {
oneapi_error_string_ = e.what();
return false;
}
# else
sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
bool from_device_to_host = dest_type == sycl::usm::alloc::host &&
src_type == sycl::usm::alloc::device;
bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown ||
src_type == sycl::usm::alloc::unknown;
/* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host
* may not wait until the end of the transfer before using the memory.
*/
if (from_device_to_host || host_or_device_memop_with_offset)
mem_event.wait();
return true;
# endif
}
bool OneapiDevice::usm_memset(SyclQueue *queue_,
@ -639,23 +657,22 @@ bool OneapiDevice::usm_memset(SyclQueue *queue_,
assert(queue_);
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
OneapiDevice::check_usm(queue_, usm_ptr, true);
sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
# ifdef WITH_CYCLES_DEBUG
try {
sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
# ifdef WITH_CYCLES_DEBUG
/* NOTE(@nsirgien) Waiting on memory operation may give more precise error
* messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
*/
mem_event.wait_and_throw();
# else
(void)mem_event;
# endif
return true;
}
catch (sycl::exception const &e) {
oneapi_error_string_ = e.what();
return false;
}
# else
(void)mem_event;
return true;
# endif
}
bool OneapiDevice::queue_synchronize(SyclQueue *queue_)

View File

@ -1137,7 +1137,7 @@ bool BLI_path_abs_from_cwd(char *path, const size_t maxlen)
#ifdef _WIN32
/**
* Tries appending each of the semicolon-separated extensions in the `PATHEXT`
* environment variable (Windows-only) onto `name` in turn until such a file is found.
* environment variable (Windows-only) onto `program_name` in turn until such a file is found.
* Returns success/failure.
*/
bool BLI_path_program_extensions_add_win32(char *program_name, const size_t maxlen)
@ -1145,19 +1145,19 @@ bool BLI_path_program_extensions_add_win32(char *program_name, const size_t maxl
bool retval = false;
int type;
type = BLI_exists(name);
type = BLI_exists(program_name);
if ((type == 0) || S_ISDIR(type)) {
/* Typically 3-5, ".EXE", ".BAT"... etc. */
const int ext_max = 12;
const char *ext = BLI_getenv("PATHEXT");
if (ext) {
const int name_len = strlen(name);
const int name_len = strlen(program_name);
char *filename = alloca(name_len + ext_max);
char *filename_ext;
const char *ext_next;
/* Null terminated in the loop. */
memcpy(filename, name, name_len);
memcpy(filename, program_name, name_len);
filename_ext = filename + name_len;
do {
@ -1172,7 +1172,7 @@ bool BLI_path_program_extensions_add_win32(char *program_name, const size_t maxl
type = BLI_exists(filename);
if (type && (!S_ISDIR(type))) {
retval = true;
BLI_strncpy(name, filename, maxlen);
BLI_strncpy(program_name, filename, maxlen);
break;
}
}

View File

@ -275,6 +275,7 @@ set(METAL_SRC
metal/mtl_shader_generator.mm
metal/mtl_shader_interface.mm
metal/mtl_state.mm
metal/mtl_storage_buffer.mm
metal/mtl_texture.mm
metal/mtl_texture_util.mm
metal/mtl_uniform_buffer.mm
@ -300,6 +301,7 @@ set(METAL_SRC
metal/mtl_shader_interface_type.hh
metal/mtl_shader_shared.h
metal/mtl_state.hh
metal/mtl_storage_buffer.hh
metal/mtl_texture.hh
metal/mtl_uniform_buffer.hh
metal/mtl_vertex_buffer.hh

View File

@ -52,11 +52,7 @@ class MTLBackend : public GPUBackend {
void samplers_update() override;
void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) override;
void compute_dispatch_indirect(StorageBuf *indirect_buf) override
{
/* Placeholder */
}
void compute_dispatch_indirect(StorageBuf *indirect_buf) override;
/* MTL Allocators need to be implemented in separate .mm files, due to allocation of Objective-C
* objects. */

View File

@ -16,6 +16,7 @@
#include "mtl_index_buffer.hh"
#include "mtl_query.hh"
#include "mtl_shader.hh"
#include "mtl_storage_buffer.hh"
#include "mtl_uniform_buffer.hh"
#include "mtl_vertex_buffer.hh"
@ -100,8 +101,7 @@ UniformBuf *MTLBackend::uniformbuf_alloc(int size, const char *name)
StorageBuf *MTLBackend::storagebuf_alloc(int size, GPUUsageType usage, const char *name)
{
/* TODO(Metal): Implement MTLStorageBuf. */
return nullptr;
return new MTLStorageBuf(size, usage, name);
}
VertBuf *MTLBackend::vertbuf_alloc()
@ -398,16 +398,15 @@ void MTLBackend::capabilities_init(MTLContext *ctx)
GCaps.shader_image_load_store_support = ([device supportsFamily:MTLGPUFamilyApple3] ||
MTLBackend::capabilities.supports_family_mac1 ||
MTLBackend::capabilities.supports_family_mac2);
/* TODO(Metal): Add support? */
GCaps.shader_draw_parameters_support = false;
GCaps.compute_shader_support = true;
GCaps.shader_storage_buffer_objects_support = true;
GCaps.shader_draw_parameters_support = true;
GCaps.geometry_shader_support = false;
GCaps.shader_storage_buffer_objects_support =
false; /* TODO(Metal): implement Storage Buffer support. */
/* Maximum buffer bindings: 31. Consider required slot for uniforms/UBOs/Vertex attributes.
* Can use argument buffers if a higher limit is required. */
GCaps.max_shader_storage_buffer_bindings = 24;
GCaps.max_shader_storage_buffer_bindings = 14;
if (GCaps.compute_shader_support) {
GCaps.max_work_group_count[0] = 65535;
@ -460,6 +459,18 @@ void MTLBackend::compute_dispatch(int groups_x_len, int groups_y_len, int groups
}
}
void MTLBackend::compute_dispatch_indirect(StorageBuf *indirect_buf)
{
/* Fetch Context.
* With Metal, workload submission and resource management occurs within the context.
* Call compute dispatch on valid context. */
MTLContext *ctx = MTLContext::get();
BLI_assert(ctx != nullptr);
if (ctx) {
ctx->compute_dispatch_indirect(indirect_buf);
}
}
/** \} */
} // blender::gpu

View File

@ -80,10 +80,7 @@ class MTLBatch : public Batch {
~MTLBatch(){};
void draw(int v_first, int v_count, int i_first, int i_count) override;
void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override
{
/* TODO(Metal): Support indirect draw commands. */
}
void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override;
void multi_draw_indirect(GPUStorageBuf *indirect_buf,
int count,
intptr_t offset,
@ -94,7 +91,7 @@ class MTLBatch : public Batch {
/* Returns an initialized RenderComandEncoder for drawing if all is good.
* Otherwise, nil. */
id<MTLRenderCommandEncoder> bind(uint v_first, uint v_count, uint i_first, uint i_count);
id<MTLRenderCommandEncoder> bind(uint v_count);
void unbind();
/* Convenience getters. */
@ -118,6 +115,7 @@ class MTLBatch : public Batch {
private:
void shader_bind();
void draw_advanced(int v_first, int v_count, int i_first, int i_count);
void draw_advanced_indirect(GPUStorageBuf *indirect_buf, intptr_t offset);
int prepare_vertex_binding(MTLVertBuf *verts,
MTLRenderPipelineStateDescriptor &desc,
const MTLShaderInterface *interface,
@ -126,8 +124,7 @@ class MTLBatch : public Batch {
id<MTLBuffer> get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_type, uint32_t &v_count);
void prepare_vertex_descriptor_and_bindings(
MTLVertBuf **buffers, int &num_buffers, int v_first, int v_count, int i_first, int i_count);
void prepare_vertex_descriptor_and_bindings(MTLVertBuf **buffers, int &num_buffers);
MEM_CXX_CLASS_ALLOC_FUNCS("MTLBatch");
};

View File

@ -20,6 +20,7 @@
#include "mtl_debug.hh"
#include "mtl_index_buffer.hh"
#include "mtl_shader.hh"
#include "mtl_storage_buffer.hh"
#include "mtl_vertex_buffer.hh"
#include <string>
@ -37,6 +38,14 @@ void MTLBatch::draw(int v_first, int v_count, int i_first, int i_count)
this->draw_advanced(v_first, v_count, i_first, i_count);
}
void MTLBatch::draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset)
{
if (this->flag & GPU_BATCH_INVALID) {
this->shader_in_use_ = false;
}
this->draw_advanced_indirect(indirect_buf, offset);
}
void MTLBatch::shader_bind()
{
if (active_shader_ && active_shader_->is_valid()) {
@ -394,7 +403,7 @@ int MTLBatch::prepare_vertex_binding(MTLVertBuf *verts,
return -1;
}
id<MTLRenderCommandEncoder> MTLBatch::bind(uint v_first, uint v_count, uint i_first, uint i_count)
id<MTLRenderCommandEncoder> MTLBatch::bind(uint v_count)
{
/* Setup draw call and render pipeline state here. Called by every draw, but setup here so that
* MTLDrawList only needs to perform setup a single time. */
@ -440,7 +449,7 @@ id<MTLRenderCommandEncoder> MTLBatch::bind(uint v_first, uint v_count, uint i_fi
* shader's input.
* A unique vertex descriptor will result in a new PipelineStateObject
* being generated for the currently bound shader. */
prepare_vertex_descriptor_and_bindings(buffers, num_buffers, v_first, v_count, i_first, i_count);
prepare_vertex_descriptor_and_bindings(buffers, num_buffers);
/* Prepare Vertex Buffers - Run before RenderCommandEncoder in case BlitCommandEncoder buffer
* data operations are required. */
@ -585,8 +594,7 @@ id<MTLRenderCommandEncoder> MTLBatch::bind(uint v_first, uint v_count, uint i_fi
void MTLBatch::unbind() {}
void MTLBatch::prepare_vertex_descriptor_and_bindings(
MTLVertBuf **buffers, int &num_buffers, int v_first, int v_count, int i_first, int i_count)
void MTLBatch::prepare_vertex_descriptor_and_bindings(MTLVertBuf **buffers, int &num_buffers)
{
/* Here we populate the MTLContext vertex descriptor and resolve which buffers need to be bound.
@ -743,8 +751,8 @@ void MTLBatch::draw_advanced(int v_first, int v_count, int i_first, int i_count)
#endif
/* Setup RenderPipelineState for batch. */
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
id<MTLRenderCommandEncoder> rec = this->bind(v_first, v_count, i_first, i_count);
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
id<MTLRenderCommandEncoder> rec = this->bind(v_count);
if (rec == nil) {
return;
}
@ -880,6 +888,84 @@ void MTLBatch::draw_advanced(int v_first, int v_count, int i_first, int i_count)
this->unbind();
}
void MTLBatch::draw_advanced_indirect(GPUStorageBuf *indirect_buf, intptr_t offset)
{
/* Setup RenderPipelineState for batch. */
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
id<MTLRenderCommandEncoder> rec = this->bind(0);
if (rec == nil) {
printf("Failed to open Render Command encoder for DRAW INDIRECT\n");
return;
}
/* Render using SSBO Vertex Fetch not supported by Draw Indirect.
* NOTE: Add support? */
if (active_shader_->get_uses_ssbo_vertex_fetch()) {
printf("Draw indirect for SSBO vertex fetch disabled\n");
return;
}
/* Fetch IndexBuffer and resolve primitive type. */
MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem));
MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
if (mtl_needs_topology_emulation(this->prim_type)) {
BLI_assert_msg(false, "Metal Topology emulation unsupported for draw indirect.\n");
return;
}
/* Fetch indirect buffer Metal handle. */
MTLStorageBuf *mtlssbo = static_cast<MTLStorageBuf *>(unwrap(indirect_buf));
id<MTLBuffer> mtl_indirect_buf = mtlssbo->get_metal_buffer();
BLI_assert(mtl_indirect_buf != nil);
if (mtl_indirect_buf == nil) {
MTL_LOG_WARNING("Metal Indirect Draw Storage Buffer is nil.\n");
return;
}
if (mtl_elem == NULL) {
/* Set depth stencil state (requires knowledge of primitive type). */
ctx->ensure_depth_stencil_state(mtl_prim_type);
/* Issue draw call. */
[rec drawPrimitives:mtl_prim_type indirectBuffer:mtl_indirect_buf indirectBufferOffset:offset];
ctx->main_command_buffer.register_draw_counters(1);
}
else {
/* Fetch index buffer. May return an index buffer of a differing format,
* if index buffer optimization is used. In these cases, final_prim_type and
* index_count get updated with the new properties. */
MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_);
GPUPrimType final_prim_type = this->prim_type;
uint index_count = 0;
id<MTLBuffer> index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count);
mtl_prim_type = gpu_prim_type_to_metal(final_prim_type);
BLI_assert(index_buffer != nil);
if (index_buffer != nil) {
/* Set depth stencil state (requires knowledge of primitive type). */
ctx->ensure_depth_stencil_state(mtl_prim_type);
/* Issue draw call. */
[rec drawIndexedPrimitives:mtl_prim_type
indexType:index_type
indexBuffer:index_buffer
indexBufferOffset:0
indirectBuffer:mtl_indirect_buf
indirectBufferOffset:offset];
ctx->main_command_buffer.register_draw_counters(1);
}
else {
BLI_assert_msg(false, "Index buffer does not have backing Metal buffer");
}
}
/* End of draw. */
this->unbind();
}
/** \} */
/* -------------------------------------------------------------------- */
@ -1010,4 +1096,4 @@ id<MTLBuffer> MTLBatch::get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_typ
/** \} */
} // blender::gpu
} // namespace blender::gpu

View File

@ -17,17 +17,17 @@ namespace gpu {
/* Max limit without using bind-less for samplers. */
#define MTL_MAX_DEFAULT_SAMPLERS 16
/* Total maximum buffers which can be bound to an encoder, for use within a shader.
* MTL_MAX_UNIFORM_BUFFER_BINDINGS + MTL_MAX_STORAGE_BUFFER_BINDINGS must be <=
* than MTL_MAX_BUFFER_BINDINGS. */
* Uniform buffers and storage buffers share the set of available bind buffers.
* The total number of buffer bindings must be <= MTL_MAX_BUFFER_BINDINGS
* We also require an additional 3 core buffers for:
* - Argument buffer for bindless resources (e.g. samplers)
* - Transform feedback buffer
* - Default push constant block
* Along with up to 6+1 buffers for vertex data, and index data. */
#define MTL_MAX_BUFFER_BINDINGS 31
#define MTL_MAX_UNIFORM_BUFFER_BINDINGS 16
#define MTL_MAX_STORAGE_BUFFER_BINDINGS 12
#define MTL_MAX_VERTEX_INPUT_ATTRIBUTES 31
#define MTL_MAX_UNIFORMS_PER_BLOCK 64
static_assert((MTL_MAX_UNIFORM_BUFFER_BINDINGS + MTL_MAX_STORAGE_BUFFER_BINDINGS) <=
MTL_MAX_BUFFER_BINDINGS);
/* Context-specific limits -- populated in 'MTLBackend::platform_init' */
struct MTLCapabilities {

View File

@ -46,6 +46,7 @@ namespace blender::gpu {
class MTLContext;
class MTLCommandBufferManager;
class MTLUniformBuf;
class MTLStorageBuf;
/* Structs containing information on current binding state for textures and samplers. */
struct MTLTextureBinding {
@ -436,6 +437,11 @@ struct MTLUniformBufferBinding {
MTLUniformBuf *ubo;
};
struct MTLStorageBufferBinding {
bool bound;
MTLStorageBuf *ssbo;
};
struct MTLContextGlobalShaderPipelineState {
bool initialised;
@ -455,12 +461,18 @@ struct MTLContextGlobalShaderPipelineState {
MTLShader *active_shader;
/* Global Uniform Buffers. */
MTLUniformBufferBinding ubo_bindings[MTL_MAX_UNIFORM_BUFFER_BINDINGS];
MTLUniformBufferBinding ubo_bindings[MTL_MAX_BUFFER_BINDINGS];
/* Storage buffer. */
MTLStorageBufferBinding ssbo_bindings[MTL_MAX_BUFFER_BINDINGS];
/* Context Texture bindings. */
MTLTextureBinding texture_bindings[MTL_MAX_TEXTURE_SLOTS];
MTLSamplerBinding sampler_bindings[MTL_MAX_SAMPLER_SLOTS];
/* Image bindings. */
MTLTextureBinding image_bindings[MTL_MAX_TEXTURE_SLOTS];
/*** --- Render Pipeline State --- ***/
/* Track global render pipeline state for the current context. The functions in GPU_state.h
* modify these parameters. Certain values, tagged [PSO], are parameters which are required to be
@ -771,10 +783,10 @@ class MTLContext : public Context {
MTLFrameBuffer *get_default_framebuffer();
/* Context Global-State Texture Binding. */
void texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit);
void texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit, bool is_image);
void sampler_bind(MTLSamplerState, uint sampler_unit);
void texture_unbind(gpu::MTLTexture *mtl_texture);
void texture_unbind_all();
void texture_unbind(gpu::MTLTexture *mtl_texture, bool is_image);
void texture_unbind_all(bool is_image);
void sampler_state_cache_init();
id<MTLSamplerState> get_sampler_from_state(MTLSamplerState state);
id<MTLSamplerState> get_default_sampler_state();
@ -822,6 +834,7 @@ class MTLContext : public Context {
/* Compute. */
bool ensure_compute_pipeline_state();
void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len);
void compute_dispatch_indirect(StorageBuf *indirect_buf);
/* State assignment. */
void set_viewport(int origin_x, int origin_y, int width, int height);

View File

@ -12,6 +12,7 @@
#include "mtl_shader.hh"
#include "mtl_shader_interface.hh"
#include "mtl_state.hh"
#include "mtl_storage_buffer.hh"
#include "mtl_uniform_buffer.hh"
#include "mtl_vertex_buffer.hh"
@ -20,6 +21,7 @@
#include "GPU_capabilities.h"
#include "GPU_matrix.h"
#include "GPU_shader.h"
#include "GPU_storage_buffer.h"
#include "GPU_texture.h"
#include "GPU_uniform_buffer.h"
#include "GPU_vertex_buffer.h"
@ -33,6 +35,27 @@
using namespace blender;
using namespace blender::gpu;
/* Debug option to bind null buffer for missing UBOs.
* Enabled by default. TODO: Ensure all required UBO bindings are present. */
#define DEBUG_BIND_NULL_BUFFER_FOR_MISSING_UBO 1
/* Debug option to bind null buffer for missing SSBOs. NOTE: This is unsafe if replacing a
* write-enabled SSBO and should only be used for debugging to identify binding-related issues. */
#define DEBUG_BIND_NULL_BUFFER_FOR_MISSING_SSBO 0
/* Error or warning depending on debug flag. */
#if DEBUG_BIND_NULL_BUFFER_FOR_MISSING_UBO == 1
# define MTL_LOG_UBO_ERROR MTL_LOG_WARNING
#else
# define MTL_LOG_UBO_ERROR MTL_LOG_ERROR
#endif
#if DEBUG_BIND_NULL_BUFFER_FOR_MISSING_SSBO == 1
# define MTL_LOG_SSBO_ERROR MTL_LOG_WARNING
#else
# define MTL_LOG_SSBO_ERROR MTL_LOG_ERROR
#endif
namespace blender::gpu {
/* Global memory manager. */
@ -265,11 +288,11 @@ MTLContext::~MTLContext()
/* Release update/blit shaders. */
this->get_texture_utils().cleanup();
/* Detach resource references */
/* Detach resource references. */
GPU_texture_unbind_all();
/* Unbind UBOs */
for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
/* Unbind UBOs. */
for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) {
if (this->pipeline_state.ubo_bindings[i].bound &&
this->pipeline_state.ubo_bindings[i].ubo != nullptr)
{
@ -279,7 +302,16 @@ MTLContext::~MTLContext()
}
}
/* Release Dummy resources */
/* Unbind SSBOs. */
for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) {
if (this->pipeline_state.ssbo_bindings[i].bound &&
this->pipeline_state.ssbo_bindings[i].ssbo != nullptr)
{
this->pipeline_state.ssbo_bindings[i].ssbo->unbind();
}
}
/* Release Dummy resources. */
this->free_dummy_resources();
/* Release Sampler States. */
@ -371,7 +403,7 @@ void MTLContext::activate()
}
/* Reset UBO bind state. */
for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) {
if (this->pipeline_state.ubo_bindings[i].bound &&
this->pipeline_state.ubo_bindings[i].ubo != nullptr)
{
@ -380,6 +412,16 @@ void MTLContext::activate()
}
}
/* Reset SSBO bind state. */
for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) {
if (this->pipeline_state.ssbo_bindings[i].bound &&
this->pipeline_state.ssbo_bindings[i].ssbo != nullptr)
{
this->pipeline_state.ssbo_bindings[i].bound = false;
this->pipeline_state.ssbo_bindings[i].ssbo = nullptr;
}
}
/* Ensure imm active. */
immActivate();
}
@ -670,17 +712,27 @@ void MTLContext::pipeline_state_init()
/* Clear bindings state. */
for (int t = 0; t < GPU_max_textures(); t++) {
/* Textures. */
this->pipeline_state.texture_bindings[t].used = false;
this->pipeline_state.texture_bindings[t].slot_index = -1;
this->pipeline_state.texture_bindings[t].texture_resource = nullptr;
/* Images. */
this->pipeline_state.image_bindings[t].used = false;
this->pipeline_state.image_bindings[t].slot_index = -1;
this->pipeline_state.image_bindings[t].texture_resource = nullptr;
}
for (int s = 0; s < MTL_MAX_SAMPLER_SLOTS; s++) {
this->pipeline_state.sampler_bindings[s].used = false;
}
for (int u = 0; u < MTL_MAX_UNIFORM_BUFFER_BINDINGS; u++) {
for (int u = 0; u < MTL_MAX_BUFFER_BINDINGS; u++) {
this->pipeline_state.ubo_bindings[u].bound = false;
this->pipeline_state.ubo_bindings[u].ubo = nullptr;
}
for (int u = 0; u < MTL_MAX_BUFFER_BINDINGS; u++) {
this->pipeline_state.ssbo_bindings[u].bound = false;
this->pipeline_state.ssbo_bindings[u].ssbo = nullptr;
}
}
/*** State defaults -- restored by GPU_state_init. ***/
@ -1052,7 +1104,7 @@ bool MTLContext::ensure_uniform_buffer_bindings(
rps.last_bound_shader_state.pso_index_ !=
pipeline_state_instance->shader_pso_index);
const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
const MTLShaderBufferBlock &push_constant_block = shader_interface->get_push_constant_block();
if (push_constant_block.size > 0) {
/* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index
@ -1088,25 +1140,22 @@ bool MTLContext::ensure_uniform_buffer_bindings(
* match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
* and not owned by the shader instance. */
for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
const MTLShaderBufferBlock &ubo = shader_interface->get_uniform_block(ubo_index);
if (ubo.buffer_index >= 0) {
/* Uniform Buffer index offset by 1 as the first shader buffer binding slot is reserved for
* the uniform PushConstantBlock. */
const uint32_t buffer_index = ubo.buffer_index + 1;
int ubo_offset = 0;
if (ubo.buffer_index >= 0 && ubo.location >= 0) {
/* Explicit lookup location for UBO in bind table. */
const uint32_t ubo_location = ubo.location;
/* buffer(N) index of where to bind the UBO. */
const uint32_t buffer_index = ubo.buffer_index;
id<MTLBuffer> ubo_buffer = nil;
int ubo_size = 0;
bool bind_dummy_buffer = false;
if (this->pipeline_state.ubo_bindings[ubo.buffer_index].bound) {
if (this->pipeline_state.ubo_bindings[ubo_location].bound) {
/* Fetch UBO global-binding properties from slot. */
ubo_offset = 0;
ubo_buffer = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_metal_buffer(
&ubo_offset);
ubo_size = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_size();
ubo_buffer = this->pipeline_state.ubo_bindings[ubo_location].ubo->get_metal_buffer();
ubo_size = this->pipeline_state.ubo_bindings[ubo_location].ubo->get_size();
/* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid
* allocating zero buffers. */
@ -1145,13 +1194,15 @@ bool MTLContext::ensure_uniform_buffer_bindings(
/* If ubo size is smaller than the size expected by the shader, we need to bind the
* dummy buffer, which will be big enough, to avoid an OOB error. */
if (ubo_size < expected_size) {
MTL_LOG_INFO(
"[Error][UBO] UBO (UBO Name: %s) bound at index: %d with size %d (Expected size "
MTL_LOG_UBO_ERROR(
"[Error][UBO] UBO (UBO Name: %s) bound at location: %d (buffer[[%d]]) with size "
"%d (Expected size "
"%d) (Shader Name: %s) is too small -- binding NULL buffer. This is likely an "
"over-binding, which is not used, but we need this to avoid validation "
"issues\n",
shader_interface->get_name_at_offset(ubo.name_offset),
buffer_index,
ubo_location,
pipeline_state_instance->base_uniform_buffer_index + buffer_index,
ubo_size,
expected_size,
shader_interface->get_name());
@ -1161,18 +1212,19 @@ bool MTLContext::ensure_uniform_buffer_bindings(
}
}
else {
MTL_LOG_INFO(
"[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but "
MTL_LOG_UBO_ERROR(
"[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer slot: %d "
"(buffer[[%d]])-- but "
"nothing was bound -- binding dummy buffer\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ubo.name_offset),
buffer_index);
ubo_location,
pipeline_state_instance->base_uniform_buffer_index + buffer_index);
bind_dummy_buffer = true;
}
if (bind_dummy_buffer) {
/* Perform Dummy binding. */
ubo_offset = 0;
ubo_buffer = this->get_null_buffer();
ubo_size = [ubo_buffer length];
}
@ -1185,17 +1237,17 @@ bool MTLContext::ensure_uniform_buffer_bindings(
/* Bind Vertex UBO. */
if (bool(ubo.stage_mask & ShaderStage::VERTEX)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
rps.bind_vertex_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
rps.bind_vertex_buffer(ubo_buffer, 0, buffer_bind_index);
}
/* Bind Fragment UBOs. */
if (bool(ubo.stage_mask & ShaderStage::FRAGMENT)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
rps.bind_fragment_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
rps.bind_fragment_buffer(ubo_buffer, 0, buffer_bind_index);
}
}
else {
MTL_LOG_WARNING(
MTL_LOG_UBO_ERROR(
"[UBO] Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer "
"is NULL!\n",
shader_interface->get_name(),
@ -1204,6 +1256,79 @@ bool MTLContext::ensure_uniform_buffer_bindings(
}
}
}
/* Bind Global GPUStorageBuf's */
/* Iterate through expected SSBOs in the shader interface, and check if the globally bound ones
* match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
* and not owned by the shader instance. */
for (const uint ssbo_index : IndexRange(shader_interface->get_total_storage_blocks())) {
const MTLShaderBufferBlock &ssbo = shader_interface->get_storage_block(ssbo_index);
if (ssbo.buffer_index >= 0 && ssbo.location >= 0) {
/* Explicit lookup location for SSBO in bind table.*/
const uint32_t ssbo_location = ssbo.location;
/* buffer(N) index of where to bind the SSBO. */
const uint32_t buffer_index = ssbo.buffer_index;
id<MTLBuffer> ssbo_buffer = nil;
int ssbo_size = 0;
UNUSED_VARS_NDEBUG(ssbo_size);
if (this->pipeline_state.ssbo_bindings[ssbo_location].bound) {
/* Fetch SSBO global-binding properties from slot. */
ssbo_buffer = this->pipeline_state.ssbo_bindings[ssbo_location].ssbo->get_metal_buffer();
ssbo_size = this->pipeline_state.ssbo_bindings[ssbo_location].ssbo->get_size();
/* For SSBOs, we always need to ensure the buffer exists, as it may be written to. */
BLI_assert(ssbo_buffer != nil);
BLI_assert(ssbo_size > 0);
}
else {
MTL_LOG_SSBO_ERROR(
"[Error][SSBO] Shader '%s' expected SSBO '%s' to be bound at buffer location: %d "
"(buffer[[%d]]) -- "
"but "
"nothing was bound.\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ssbo.name_offset),
ssbo_location,
pipeline_state_instance->base_storage_buffer_index + buffer_index);
#if DEBUG_BIND_NULL_BUFFER_FOR_MISSING_SSBO == 1
ssbo_buffer = this->get_null_buffer();
ssbo_size = [ssbo_buffer length];
#endif
}
if (ssbo_buffer != nil) {
uint32_t buffer_bind_index = pipeline_state_instance->base_storage_buffer_index +
buffer_index;
/* Bind Vertex SSBO. */
if (bool(ssbo.stage_mask & ShaderStage::VERTEX)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
rps.bind_vertex_buffer(ssbo_buffer, 0, buffer_bind_index);
}
/* Bind Fragment SSBOs. */
if (bool(ssbo.stage_mask & ShaderStage::FRAGMENT)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
rps.bind_fragment_buffer(ssbo_buffer, 0, buffer_bind_index);
}
}
else {
MTL_LOG_SSBO_ERROR(
"[Error][SSBO] Shader '%s' had SSBO '%s' bound at SSBO location: %d "
"(buffer[["
"%d]]) -- but bound MTLStorageBuf was nil.\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ssbo.name_offset),
ssbo_location,
pipeline_state_instance->base_storage_buffer_index + buffer_index);
}
}
}
return true;
}
@ -1218,7 +1343,7 @@ bool MTLContext::ensure_uniform_buffer_bindings(
MTLComputeState &cs = this->main_command_buffer.get_compute_state();
/* Fetch push constant block and bind. */
const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
const MTLShaderBufferBlock &push_constant_block = shader_interface->get_push_constant_block();
if (push_constant_block.size > 0) {
/* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index
@ -1245,25 +1370,22 @@ bool MTLContext::ensure_uniform_buffer_bindings(
* match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
* and not owned by the shader instance. */
for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
const MTLShaderBufferBlock &ubo = shader_interface->get_uniform_block(ubo_index);
if (ubo.buffer_index >= 0) {
/* Uniform Buffer index offset by 1 as the first shader buffer binding slot is reserved for
* the uniform PushConstantBlock. */
const uint32_t buffer_index = ubo.buffer_index + 1;
int ubo_offset = 0;
/* Explicit lookup location for UBO in bind table. */
const uint32_t ubo_location = ubo.location;
/* buffer(N) index of where to bind the UBO. */
const uint32_t buffer_index = ubo.buffer_index;
id<MTLBuffer> ubo_buffer = nil;
int ubo_size = 0;
bool bind_dummy_buffer = false;
if (this->pipeline_state.ubo_bindings[ubo.buffer_index].bound) {
if (this->pipeline_state.ubo_bindings[ubo_location].bound) {
/* Fetch UBO global-binding properties from slot. */
ubo_offset = 0;
ubo_buffer = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_metal_buffer(
&ubo_offset);
ubo_size = this->pipeline_state.ubo_bindings[ubo.buffer_index].ubo->get_size();
ubo_buffer = this->pipeline_state.ubo_bindings[ubo_location].ubo->get_metal_buffer();
ubo_size = this->pipeline_state.ubo_bindings[ubo_location].ubo->get_size();
UNUSED_VARS_NDEBUG(ubo_size);
/* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid
@ -1277,18 +1399,19 @@ bool MTLContext::ensure_uniform_buffer_bindings(
}
}
else {
MTL_LOG_INFO(
"[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but "
MTL_LOG_UBO_ERROR(
"[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer location: %d "
"(buffer[[%d]]) -- but "
"nothing was bound -- binding dummy buffer\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ubo.name_offset),
buffer_index);
ubo_location,
pipeline_state_instance.base_uniform_buffer_index + buffer_index);
bind_dummy_buffer = true;
}
if (bind_dummy_buffer) {
/* Perform Dummy binding. */
ubo_offset = 0;
ubo_buffer = this->get_null_buffer();
ubo_size = [ubo_buffer length];
}
@ -1297,14 +1420,14 @@ bool MTLContext::ensure_uniform_buffer_bindings(
uint32_t buffer_bind_index = pipeline_state_instance.base_uniform_buffer_index +
buffer_index;
/* Bind Vertex UBO. */
/* Bind Compute UBO. */
if (bool(ubo.stage_mask & ShaderStage::COMPUTE)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
cs.bind_compute_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
cs.bind_compute_buffer(ubo_buffer, 0, buffer_bind_index);
}
}
else {
MTL_LOG_WARNING(
MTL_LOG_UBO_ERROR(
"[UBO] Compute Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer "
"is NULL!\n",
shader_interface->get_name(),
@ -1313,6 +1436,72 @@ bool MTLContext::ensure_uniform_buffer_bindings(
}
}
}
/* Bind Global GPUStorageBuffers. */
/* Iterate through expected SSBOs in the shader interface, and check if the globally bound ones
* match. */
for (const uint ssbo_index : IndexRange(shader_interface->get_total_storage_blocks())) {
const MTLShaderBufferBlock &ssbo = shader_interface->get_storage_block(ssbo_index);
if (ssbo.buffer_index >= 0 && ssbo.location >= 0) {
/* Explicit lookup location for UBO in bind table.*/
const uint32_t ssbo_location = ssbo.location;
/* buffer(N) index of where to bind the UBO. */
const uint32_t buffer_index = ssbo.buffer_index;
id<MTLBuffer> ssbo_buffer = nil;
int ssbo_size = 0;
if (this->pipeline_state.ssbo_bindings[ssbo_location].bound) {
/* Fetch UBO global-binding properties from slot. */
ssbo_buffer = this->pipeline_state.ssbo_bindings[ssbo_location].ssbo->get_metal_buffer();
ssbo_size = this->pipeline_state.ssbo_bindings[ssbo_location].ssbo->get_size();
UNUSED_VARS_NDEBUG(ssbo_size);
/* For SSBOs, we always need to ensure the buffer exists, as it may be written to. */
BLI_assert(ssbo_buffer != nil);
BLI_assert(ssbo_size > 0);
}
else {
MTL_LOG_SSBO_ERROR(
"[Error][SSBO] Shader '%s' expected SSBO '%s' to be bound at SSBO location: %d "
"(buffer[["
"%d]]) -- but "
"nothing was bound.\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ssbo.name_offset),
ssbo_location,
pipeline_state_instance.base_storage_buffer_index + buffer_index);
#if DEBUG_BIND_NULL_BUFFER_FOR_MISSING_SSBO == 1
ssbo_buffer = this->get_null_buffer();
ssbo_size = [ssbo_buffer length];
#endif
}
if (ssbo_buffer != nil) {
uint32_t buffer_bind_index = pipeline_state_instance.base_storage_buffer_index +
buffer_index;
/* Bind Vertex UBO. */
if (bool(ssbo.stage_mask & ShaderStage::COMPUTE)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
cs.bind_compute_buffer(ssbo_buffer, 0, buffer_bind_index);
}
}
else {
MTL_LOG_SSBO_ERROR(
"[Error][SSBO] Shader '%s' had SSBO '%s' bound at SSBO location: %d "
"(buffer[["
"%d]]) -- but bound MTLStorageBuf was nil.\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ssbo.name_offset),
ssbo_location,
pipeline_state_instance.base_storage_buffer_index + buffer_index);
}
}
}
return true;
}
@ -1352,13 +1541,27 @@ void MTLContext::ensure_texture_bindings(
continue;
}
/* Determine bind lookup table depending on whether an image binding or texture.
* NOTE: Images and Texture Samplers share a binding table in Metal. */
bool is_resource_sampler = shader_texture_info.is_texture_sampler;
MTLTextureBinding(&resource_bind_table)[MTL_MAX_TEXTURE_SLOTS] =
(is_resource_sampler) ? this->pipeline_state.texture_bindings :
this->pipeline_state.image_bindings;
/* Texture resource bind slot in shader `[[texture(n)]]`. */
int slot = shader_texture_info.slot_index;
/* Explicit bind location for texture. */
int location = shader_texture_info.location;
/* Default sampler. */
MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
if (slot >= 0 && slot < GPU_max_textures()) {
bool bind_dummy_texture = true;
if (this->pipeline_state.texture_bindings[slot].used) {
gpu::MTLTexture *bound_texture =
this->pipeline_state.texture_bindings[slot].texture_resource;
MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot];
if (resource_bind_table[location].used) {
gpu::MTLTexture *bound_texture = resource_bind_table[location].texture_resource;
MTLSamplerBinding &bound_sampler = (is_resource_sampler) ?
this->pipeline_state.sampler_bindings[location] :
default_binding;
BLI_assert(bound_texture);
BLI_assert(bound_sampler.used);
@ -1385,10 +1588,11 @@ void MTLContext::ensure_texture_bindings(
* expected in the shader interface. This is a problem and we will need to bind
* a dummy texture to ensure correct API usage. */
MTL_LOG_WARNING(
"(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong "
"(Shader '%s') Texture (%s) %p bound to slot %d is incompatible -- Wrong "
"texture target type. (Expecting type %d, actual type %d) (binding "
"name:'%s')(texture name:'%s')\n",
shader_interface->get_name(),
is_resource_sampler ? "TextureSampler" : "TextureImage",
bound_texture,
slot,
shader_texture_info.type,
@ -1399,9 +1603,12 @@ void MTLContext::ensure_texture_bindings(
}
else {
MTL_LOG_WARNING(
"Shader '%s' expected texture to be bound to slot %d -- No texture was "
"Shader '%s' expected texture (%s) to be bound to location %d (texture[[%d]]) -- No "
"texture was "
"bound. (name:'%s')\n",
shader_interface->get_name(),
is_resource_sampler ? "TextureSampler" : "TextureImage",
location,
slot,
shader_interface->get_name_at_offset(shader_texture_info.name_offset));
}
@ -1417,7 +1624,6 @@ void MTLContext::ensure_texture_bindings(
slot);
/* Bind default sampler state. */
MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
rps.bind_vertex_sampler(default_binding, use_argument_buffer_for_samplers, slot);
}
if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
@ -1427,16 +1633,16 @@ void MTLContext::ensure_texture_bindings(
slot);
/* Bind default sampler state. */
MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
rps.bind_fragment_sampler(default_binding, use_argument_buffer_for_samplers, slot);
}
}
}
else {
MTL_LOG_WARNING(
"Shader %p expected texture to be bound to slot %d -- Slot exceeds the "
"Shader %p expected texture (%s) to be bound to slot %d -- Slot exceeds the "
"hardware/API limit of '%d'. (name:'%s')\n",
this->pipeline_state.active_shader,
is_resource_sampler ? "TextureSampler" : "TextureImage",
slot,
GPU_max_textures(),
shader_interface->get_name_at_offset(shader_texture_info.name_offset));
@ -1569,13 +1775,27 @@ void MTLContext::ensure_texture_bindings(
continue;
}
/* Determine bind lookup table depending on whether an image binding or texture.
* NOTE: Images and Texture Samplers share a binding table in Metal. */
bool is_resource_sampler = shader_texture_info.is_texture_sampler;
MTLTextureBinding(&resource_bind_table)[MTL_MAX_TEXTURE_SLOTS] =
(is_resource_sampler) ? this->pipeline_state.texture_bindings :
this->pipeline_state.image_bindings;
/* Texture resource bind slot in shader `[[texture(n)]]`. */
int slot = shader_texture_info.slot_index;
/* Explicit bind location for texture. */
int location = shader_texture_info.location;
/* Default sampler. */
MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
if (slot >= 0 && slot < GPU_max_textures()) {
bool bind_dummy_texture = true;
if (this->pipeline_state.texture_bindings[slot].used) {
gpu::MTLTexture *bound_texture =
this->pipeline_state.texture_bindings[slot].texture_resource;
MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot];
if (resource_bind_table[location].used) {
gpu::MTLTexture *bound_texture = resource_bind_table[location].texture_resource;
MTLSamplerBinding &bound_sampler = (is_resource_sampler) ?
this->pipeline_state.sampler_bindings[location] :
default_binding;
BLI_assert(bound_texture);
BLI_assert(bound_sampler.used);
@ -1597,10 +1817,11 @@ void MTLContext::ensure_texture_bindings(
* expected in the shader interface. This is a problem and we will need to bind
* a dummy texture to ensure correct API usage. */
MTL_LOG_WARNING(
"(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong "
"(Shader '%s') Texture (%s) %p bound to slot %d is incompatible -- Wrong "
"texture target type. (Expecting type %d, actual type %d) (binding "
"name:'%s')(texture name:'%s')\n",
shader_interface->get_name(),
is_resource_sampler ? "TextureSampler" : "TextureImage",
bound_texture,
slot,
shader_texture_info.type,
@ -1611,9 +1832,12 @@ void MTLContext::ensure_texture_bindings(
}
else {
MTL_LOG_WARNING(
"Shader '%s' expected texture to be bound to slot %d -- No texture was "
"Shader '%s' expected texture (%s) to be bound to location %d (texture[[%d]]) -- No "
"texture was "
"bound. (name:'%s')\n",
shader_interface->get_name(),
is_resource_sampler ? "TextureSampler" : "TextureImage",
location,
slot,
shader_interface->get_name_at_offset(shader_texture_info.name_offset));
}
@ -1636,9 +1860,10 @@ void MTLContext::ensure_texture_bindings(
}
else {
MTL_LOG_WARNING(
"Shader %p expected texture to be bound to slot %d -- Slot exceeds the "
"Shader %p expected texture (%s) to be bound to slot %d -- Slot exceeds the "
"hardware/API limit of '%d'. (name:'%s')\n",
this->pipeline_state.active_shader,
is_resource_sampler ? "TextureSampler" : "TextureImage",
slot,
GPU_max_textures(),
shader_interface->get_name_at_offset(shader_texture_info.name_offset));
@ -1900,6 +2125,47 @@ bool MTLContext::ensure_compute_pipeline_state()
}
void MTLContext::compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len)
{
/* Ensure all resources required by upcoming compute submission are correctly bound to avoid
* out of bounds reads/writes. */
if (!this->ensure_compute_pipeline_state()) {
return;
}
/* Shader instance. */
MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
const MTLComputePipelineStateInstance &compute_pso_inst =
this->pipeline_state.active_shader->get_compute_pipeline_state();
/* Begin compute encoder. */
id<MTLComputeCommandEncoder> compute_encoder =
this->main_command_buffer.ensure_begin_compute_encoder();
BLI_assert(compute_encoder != nil);
/* Bind PSO. */
MTLComputeState &cs = this->main_command_buffer.get_compute_state();
cs.bind_pso(compute_pso_inst.pso);
/* Bind buffers. */
this->ensure_uniform_buffer_bindings(compute_encoder, shader_interface, compute_pso_inst);
/** Ensure resource bindings. */
/* Texture Bindings. */
/* We will iterate through all texture bindings on the context and determine if any of the
* active slots match those in our shader interface. If so, textures will be bound. */
if (shader_interface->get_total_textures() > 0) {
this->ensure_texture_bindings(compute_encoder, shader_interface, compute_pso_inst);
}
/* Dispatch compute. */
[compute_encoder dispatchThreadgroups:MTLSizeMake(max_ii(groups_x_len, 1),
max_ii(groups_y_len, 1),
max_ii(groups_z_len, 1))
threadsPerThreadgroup:MTLSizeMake(compute_pso_inst.threadgroup_x_len,
compute_pso_inst.threadgroup_y_len,
compute_pso_inst.threadgroup_z_len)];
}
void MTLContext::compute_dispatch_indirect(StorageBuf *indirect_buf)
{
/* Ensure all resources required by upcoming compute submission are correctly bound. */
if (this->ensure_compute_pipeline_state()) {
@ -1927,11 +2193,22 @@ void MTLContext::compute_dispatch(int groups_x_len, int groups_y_len, int groups
this->ensure_texture_bindings(compute_encoder, shader_interface, compute_pso_inst);
}
/* Dispatch compute. */
[compute_encoder dispatchThreadgroups:MTLSizeMake(groups_x_len, groups_y_len, groups_z_len)
threadsPerThreadgroup:MTLSizeMake(compute_pso_inst.threadgroup_x_len,
compute_pso_inst.threadgroup_y_len,
compute_pso_inst.threadgroup_z_len)];
/* Indirect Dispatch compute. */
MTLStorageBuf *mtlssbo = static_cast<MTLStorageBuf *>(indirect_buf);
id<MTLBuffer> mtl_indirect_buf = mtlssbo->get_metal_buffer();
BLI_assert(mtl_indirect_buf != nil);
if (mtl_indirect_buf == nil) {
MTL_LOG_WARNING("Metal Indirect Compute dispatch storage buffer does not exist.\n");
return;
}
/* Indirect Compute dispatch. */
[compute_encoder
dispatchThreadgroupsWithIndirectBuffer:mtl_indirect_buf
indirectBufferOffset:0
threadsPerThreadgroup:MTLSizeMake(compute_pso_inst.threadgroup_x_len,
compute_pso_inst.threadgroup_y_len,
compute_pso_inst.threadgroup_z_len)];
}
}
@ -1980,7 +2257,7 @@ bool MTLContext::is_visibility_dirty() const
/** \name Texture State Management
* \{ */
void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit)
void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit, bool is_image)
{
BLI_assert(this);
BLI_assert(mtl_texture);
@ -1995,9 +2272,14 @@ void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit)
return;
}
MTLTextureBinding(
&resource_bind_table)[MTL_MAX_TEXTURE_SLOTS] = (is_image) ?
this->pipeline_state.image_bindings :
this->pipeline_state.texture_bindings;
/* Bind new texture. */
this->pipeline_state.texture_bindings[texture_unit].texture_resource = mtl_texture;
this->pipeline_state.texture_bindings[texture_unit].used = true;
resource_bind_table[texture_unit].texture_resource = mtl_texture;
resource_bind_table[texture_unit].used = true;
mtl_texture->is_bound_ = true;
}
@ -2016,15 +2298,20 @@ void MTLContext::sampler_bind(MTLSamplerState sampler_state, uint sampler_unit)
this->pipeline_state.sampler_bindings[sampler_unit] = {true, sampler_state};
}
void MTLContext::texture_unbind(gpu::MTLTexture *mtl_texture)
void MTLContext::texture_unbind(gpu::MTLTexture *mtl_texture, bool is_image)
{
BLI_assert(mtl_texture);
MTLTextureBinding(
&resource_bind_table)[MTL_MAX_TEXTURE_SLOTS] = (is_image) ?
this->pipeline_state.image_bindings :
this->pipeline_state.texture_bindings;
/* Iterate through textures in state and unbind. */
for (int i = 0; i < min_uu(GPU_max_textures(), MTL_MAX_TEXTURE_SLOTS); i++) {
if (this->pipeline_state.texture_bindings[i].texture_resource == mtl_texture) {
this->pipeline_state.texture_bindings[i].texture_resource = nullptr;
this->pipeline_state.texture_bindings[i].used = false;
if (resource_bind_table[i].texture_resource == mtl_texture) {
resource_bind_table[i].texture_resource = nullptr;
resource_bind_table[i].used = false;
}
}
@ -2032,16 +2319,18 @@ void MTLContext::texture_unbind(gpu::MTLTexture *mtl_texture)
mtl_texture->is_bound_ = false;
}
void MTLContext::texture_unbind_all()
void MTLContext::texture_unbind_all(bool is_image)
{
MTLTextureBinding(
&resource_bind_table)[MTL_MAX_TEXTURE_SLOTS] = (is_image) ?
this->pipeline_state.image_bindings :
this->pipeline_state.texture_bindings;
/* Iterate through context's bound textures. */
for (int t = 0; t < min_uu(GPU_max_textures(), MTL_MAX_TEXTURE_SLOTS); t++) {
if (this->pipeline_state.texture_bindings[t].used &&
this->pipeline_state.texture_bindings[t].texture_resource)
{
this->pipeline_state.texture_bindings[t].used = false;
this->pipeline_state.texture_bindings[t].texture_resource = nullptr;
if (resource_bind_table[t].used && resource_bind_table[t].texture_resource) {
resource_bind_table[t].used = false;
resource_bind_table[t].texture_resource = nullptr;
}
}
}

View File

@ -66,7 +66,7 @@ MTLDrawList::~MTLDrawList()
void MTLDrawList::init()
{
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
BLI_assert(MDI_ENABLED);
BLI_assert(data_ == nullptr);
@ -186,7 +186,7 @@ void MTLDrawList::submit()
/* Bind Batch to setup render pipeline state. */
BLI_assert(batch_ != nullptr);
id<MTLRenderCommandEncoder> rec = batch_->bind(0, 0, 0, 0);
id<MTLRenderCommandEncoder> rec = batch_->bind(0);
if (rec == nil) {
BLI_assert_msg(false, "A RenderCommandEncoder should always be available!\n");
return;

View File

@ -18,12 +18,16 @@ namespace blender::gpu {
class MTLIndexBuf : public IndexBuf {
friend class MTLBatch;
friend class MTLDrawList;
friend class MTLStorageBuf; /* For bind as SSBO resource access. */
private:
/* Metal buffer resource. */
gpu::MTLBuffer *ibo_ = nullptr;
uint64_t alloc_size_ = 0;
/* SSBO wrapper for bind_as_ssbo support. */
MTLStorageBuf *ssbo_wrapper_ = nullptr;
#ifndef NDEBUG
/* Flags whether point index buffer has been compacted
* to remove false restart indices. */

View File

@ -7,6 +7,7 @@
#include "mtl_index_buffer.hh"
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "mtl_storage_buffer.hh"
#include "BLI_span.hh"
@ -22,6 +23,11 @@ MTLIndexBuf::~MTLIndexBuf()
ibo_->free();
}
this->free_optimized_buffer();
if (ssbo_wrapper_) {
delete ssbo_wrapper_;
ssbo_wrapper_ = nullptr;
}
}
void MTLIndexBuf::free_optimized_buffer()
@ -42,8 +48,14 @@ void MTLIndexBuf::bind_as_ssbo(uint32_t binding)
/* Ensure we have a valid IBO. */
BLI_assert(this->ibo_);
/* TODO(Metal): Support index buffer SSBO's. Dependent on compute implementation. */
MTL_LOG_WARNING("MTLIndexBuf::bind_as_ssbo not yet implemented!\n");
/* Ensure resource is initialized. */
this->upload_data();
/* Create MTLStorageBuffer to wrap this resource and use conventional binding. */
if (ssbo_wrapper_ == nullptr) {
ssbo_wrapper_ = new MTLStorageBuf(this, alloc_size_);
}
ssbo_wrapper_->bind(binding);
}
void MTLIndexBuf::read(uint32_t *data) const

View File

@ -61,7 +61,7 @@ void MTLQueryPool::init(GPUQueryType type)
void MTLQueryPool::begin_query()
{
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
/* Ensure our allocated buffer pool has enough space for the current queries. */
int query_id = query_issued_;
@ -88,7 +88,7 @@ void MTLQueryPool::begin_query()
void MTLQueryPool::end_query()
{
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
id<MTLRenderCommandEncoder> rec = ctx->main_command_buffer.get_active_render_command_encoder();
[rec setVisibilityResultMode:MTLVisibilityResultModeDisabled offset:0];
@ -96,7 +96,7 @@ void MTLQueryPool::end_query()
void MTLQueryPool::get_occlusion_result(MutableSpan<uint32_t> r_values)
{
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
/* Create a blit encoder to synchronize the query buffer results between
* GPU and CPU when not using shared-memory. */

View File

@ -36,7 +36,7 @@ class MTLShaderInterface;
class MTLContext;
/* Debug control. */
#define MTL_SHADER_DEBUG_EXPORT_SOURCE 0
#define MTL_SHADER_DEBUG_EXPORT_SOURCE 1
#define MTL_SHADER_TRANSLATION_DEBUG_OUTPUT 0
/* Separate print used only during development and debugging. */
@ -71,7 +71,7 @@ struct MTLRenderPipelineStateInstance {
* bound buffers such as vertex buffers, as the count can vary. */
int base_uniform_buffer_index;
/* Base bind index for binding storage buffers. */
int base_ssbo_buffer_index;
int base_storage_buffer_index;
/* buffer bind slot used for null attributes (-1 if not needed). */
int null_attribute_buffer_index;
/* buffer bind used for transform feedback output buffer. */
@ -101,7 +101,7 @@ struct MTLComputePipelineStateInstance {
* bound buffers such as vertex buffers, as the count can vary. */
int base_uniform_buffer_index = -1;
/* Base bind index for binding storage buffers. */
int base_ssbo_buffer_index = -1;
int base_storage_buffer_index = -1;
int threadgroup_x_len = 1;
int threadgroup_y_len = 1;

View File

@ -386,7 +386,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
valid_ = true;
/* Prepare backing data storage for local uniforms. */
const MTLShaderUniformBlock &push_constant_block = mtl_interface->get_push_constant_block();
const MTLShaderBufferBlock &push_constant_block = mtl_interface->get_push_constant_block();
if (push_constant_block.size > 0) {
push_constant_data_ = MEM_callocN(push_constant_block.size, __func__);
this->push_constant_bindstate_mark_dirty(true);
@ -625,7 +625,7 @@ void MTLShader::warm_cache(int limit)
{
if (parent_shader_ != nullptr) {
MTLContext *ctx = MTLContext::get();
MTLShader *parent_mtl = reinterpret_cast<MTLShader *>(parent_shader_);
MTLShader *parent_mtl = static_cast<MTLShader *>(parent_shader_);
/* Extract PSO descriptors from parent shader. */
blender::Vector<MTLRenderPipelineStateDescriptor> descriptors;
@ -990,6 +990,19 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
type:MTLDataTypeInt
withName:@"MTL_uniform_buffer_base_index"];
/* Storage buffer bind index.
* This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
* and an additional space for the push constant block.
* If the shader does not have any uniform blocks, then we can place directly after the push
* constant block. As we do not need an extra spot for the UBO at index '0'. */
int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 +
((mtl_interface->get_total_uniform_blocks() > 0) ?
mtl_interface->get_total_uniform_blocks() :
0);
[values setConstantValue:&MTL_storage_buffer_base_index
type:MTLDataTypeInt
withName:@"MTL_storage_buffer_base_index"];
/* Transform feedback constant.
* Ensure buffer is placed after existing buffers, including default buffers. */
int MTL_transform_feedback_buffer_index = -1;
@ -997,9 +1010,10 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
/* If using argument buffers, insert index after argument buffer index. Otherwise, insert
* after uniform buffer bindings. */
MTL_transform_feedback_buffer_index =
(mtl_interface->uses_argument_buffer_for_samplers()) ?
(mtl_interface->get_argument_buffer_bind_index(ShaderStage::VERTEX) + 1) :
(MTL_uniform_buffer_base_index + mtl_interface->get_max_ubo_index() + 2);
MTL_uniform_buffer_base_index +
((mtl_interface->uses_argument_buffer_for_samplers()) ?
(mtl_interface->get_argument_buffer_bind_index(ShaderStage::VERTEX) + 1) :
(mtl_interface->get_max_buffer_index() + 2));
}
if (this->transform_feedback_type_ != GPU_SHADER_TFB_NONE) {
@ -1132,10 +1146,10 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
* We need to ensure that the PSO will have valid bind-point ranges, or is using the
* appropriate bindless fallback path if any bind limits are exceeded. */
#ifdef NDEBUG
/* Ensure UBO and PushConstantBlock bindings are within range. */
/* Ensure Buffer bindings are within range. */
BLI_assert_msg((MTL_uniform_buffer_base_index + get_max_ubo_index() + 2) <
MTL_MAX_BUFFER_BINDINGS,
"UBO bindings exceed the fragment bind table limit.");
"UBO and SSBO bindings exceed the fragment bind table limit.");
/* Transform feedback buffer. */
if (transform_feedback_type_ != GPU_SHADER_TFB_NONE) {
@ -1179,6 +1193,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
pso_inst->frag = desc.fragmentFunction;
pso_inst->pso = pso;
pso_inst->base_uniform_buffer_index = MTL_uniform_buffer_base_index;
pso_inst->base_storage_buffer_index = MTL_storage_buffer_base_index;
pso_inst->null_attribute_buffer_index = (using_null_buffer) ? null_buffer_index : -1;
pso_inst->transform_feedback_buffer_index = MTL_transform_feedback_buffer_index;
pso_inst->prim_type = prim_type;
@ -1283,6 +1298,8 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
{
/* NOTE(Metal): Bakes and caches a PSO for compute. */
BLI_assert(this);
MTLShaderInterface *mtl_interface = this->get_interface();
BLI_assert(mtl_interface);
BLI_assert(this->is_valid());
BLI_assert(shader_library_compute_ != nil);
@ -1304,7 +1321,19 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
type:MTLDataTypeInt
withName:@"MTL_uniform_buffer_base_index"];
/* TODO: SSBO binding base index. */
/* Storage buffer bind index.
* This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
* and an additional space for the push constant block.
* If the shader does not have any uniform blocks, then we can place directly after the push
* constant block. As we do not need an extra spot for the UBO at index '0'. */
int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index + 1 +
((mtl_interface->get_total_uniform_blocks() > 0) ?
mtl_interface->get_total_uniform_blocks() :
0);
[values setConstantValue:&MTL_storage_buffer_base_index
type:MTLDataTypeInt
withName:@"MTL_storage_buffer_base_index"];
/* Compile compute function. */
NSError *error = nullptr;
@ -1312,6 +1341,8 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
newFunctionWithName:compute_function_name_
constantValues:values
error:&error];
compute_function.label = [NSString stringWithUTF8String:this->name];
if (error) {
NSLog(@"Compile Error - Metal Shader compute function, error %@", error);
@ -1327,6 +1358,7 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
id<MTLComputePipelineState> pso = [ctx->device
newComputePipelineStateWithFunction:compute_function
error:&error];
if (error) {
NSLog(@"Failed to create PSO for compute shader: %s error %@\n", this->name, error);
BLI_assert(false);
@ -1350,8 +1382,7 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
compute_pso_instance_.compute = [compute_function retain];
compute_pso_instance_.pso = [pso retain];
compute_pso_instance_.base_uniform_buffer_index = MTL_uniform_buffer_base_index;
/* TODO: Add SSBO base buffer index support. */
compute_pso_instance_.base_ssbo_buffer_index = -1;
compute_pso_instance_.base_storage_buffer_index = MTL_storage_buffer_base_index;
}
return true;
}
@ -1507,7 +1538,7 @@ void MTLShader::ssbo_vertex_fetch_bind_attributes_end(id<MTLRenderCommandEncoder
}
/* Bind NULL buffer to given VBO slot. */
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
id<MTLBuffer> null_buf = ctx->get_null_attribute_buffer();
BLI_assert(null_buf);

View File

@ -105,6 +105,22 @@
* }
* \endcode
*
* -- Metal buffer bindings structure --
*
* Metal shader contains several different binding types. All buffers are bound using the buffer(N)
* binding attribute tag. However, different ranges serve different purposes. The structure of the
* bindings always happen as follows:
*
* Vertex Buffers (N) <-- 0
* Index buffer
* Default Push constant block for uniforms <-- MTL_uniform_buffer_base_index
* Uniform buffers <-- MTL_uniform_buffer_base_index+1
* Storage buffers <-- MTL_storage_buffer_base_index
* Samplers/argument buffer table <-- last buffer + 1
* Transform feedback buffer <-- MTL_transform_feedback_buffer_index ~last_buffer+2
*
* Up to a maximum of 31 bindings.
*
* -- SSBO-vertex-fetchmode --
*
* SSBO-vertex-fetchmode is a special option wherein vertex buffers are bound directly
@ -200,14 +216,17 @@ struct MSLUniform {
}
};
struct MSLUniformBlock {
struct MSLBufferBlock {
std::string type_name;
std::string name;
ShaderStage stage;
bool is_array;
/* Resource index in buffer*/
uint slot;
uint location;
shader::Qualifier qualifiers;
bool operator==(const MSLUniformBlock &right) const
bool operator==(const MSLBufferBlock &right) const
{
return (type_name == right.type_name && name == right.name);
}
@ -221,11 +240,16 @@ enum MSLTextureSamplerAccess {
TEXTURE_ACCESS_READWRITE,
};
struct MSLTextureSampler {
struct MSLTextureResource {
ShaderStage stage;
shader::ImageType type;
std::string name;
MSLTextureSamplerAccess access;
/* Whether resource is a texture sampler or an image. */
bool is_texture_sampler;
/* Index in shader bind table [[texture(N)]].*/
uint slot;
/* Explicit bind index provided by ShaderCreateInfo. */
uint location;
eGPUTextureType get_texture_binding_type() const;
@ -233,20 +257,7 @@ struct MSLTextureSampler {
void resolve_binding_indices();
MSLTextureSampler(ShaderStage in_stage,
shader::ImageType in_sampler_type,
std::string in_sampler_name,
MSLTextureSamplerAccess in_access,
uint in_location)
: stage(in_stage),
type(in_sampler_type),
name(in_sampler_name),
access(in_access),
location(in_location)
{
}
bool operator==(const MSLTextureSampler &right) const
bool operator==(const MSLTextureResource &right) const
{
/* We do not compare stage as we want to avoid duplication of resources used across multiple
* stages. */
@ -370,9 +381,10 @@ class MSLGeneratorInterface {
public:
/** Shader stage input/output binding information.
* Derived from shader source reflection or GPUShaderCreateInfo. */
blender::Vector<MSLUniformBlock> uniform_blocks;
blender::Vector<MSLBufferBlock> uniform_blocks;
blender::Vector<MSLBufferBlock> storage_blocks;
blender::Vector<MSLUniform> uniforms;
blender::Vector<MSLTextureSampler> texture_samplers;
blender::Vector<MSLTextureResource> texture_samplers;
blender::Vector<MSLVertexInputAttribute> vertex_input_attributes;
blender::Vector<MSLVertexOutputAttribute> vertex_output_varyings;
/* Should match vertex outputs, but defined separately as
@ -386,7 +398,8 @@ class MSLGeneratorInterface {
blender::Vector<char> clip_distances;
/* Shared Memory Blocks. */
blender::Vector<MSLSharedMemoryBlock> shared_memory_blocks;
/* Max bind IDs. */
int max_tex_bind_index = 0;
/** GL Global usage. */
/* Whether GL position is used, or an alternative vertex output should be the default. */
bool uses_gl_Position;
@ -415,12 +428,14 @@ class MSLGeneratorInterface {
bool uses_gl_NumWorkGroups;
bool uses_gl_LocalInvocationIndex;
bool uses_gl_LocalInvocationID;
/* Early fragment tests. */
bool uses_early_fragment_test;
/* Parameters. */
shader::DepthWrite depth_write;
/* Bind index trackers. */
int max_ubo_slot = -1;
int max_buffer_slot = 0;
/* Shader buffer bind indices for argument buffers per shader stage.
* NOTE: Compute stage will re-use index 0. */
@ -463,8 +478,10 @@ class MSLGeneratorInterface {
/* Samplers. */
bool use_argument_buffer_for_samplers() const;
uint32_t num_samplers_for_stage(ShaderStage stage) const;
uint32_t max_sampler_index_for_stage(ShaderStage stage) const;
/* Returns the bind index, relative to MTL_uniform_buffer_base_index. */
/* Returns the bind index, relative to
* MTL_uniform_buffer_base_index+MTL_storage_buffer_base_index. */
uint32_t get_sampler_argument_buffer_bind_index(ShaderStage stage);
/* Code generation utility functions. */
@ -480,7 +497,7 @@ class MSLGeneratorInterface {
std::string generate_msl_fragment_entry_stub();
std::string generate_msl_compute_entry_stub();
std::string generate_msl_global_uniform_population(ShaderStage stage);
std::string generate_ubo_block_macro_chain(MSLUniformBlock block);
std::string generate_ubo_block_macro_chain(MSLBufferBlock block);
std::string generate_msl_uniform_block_population(ShaderStage stage);
std::string generate_msl_vertex_attribute_input_population();
std::string generate_msl_vertex_output_population();
@ -546,7 +563,9 @@ inline bool is_builtin_type(std::string type)
{
/* Add Types as needed. */
/* TODO(Metal): Consider replacing this with a switch and `constexpr` hash and switch.
* Though most efficient and maintainable approach to be determined. */
* Though most efficient and maintainable approach to be determined.
* NOTE: Some duplicate types exit for Metal and GLSL representations, as generated typenames
* from createinfo may use GLSL signature. */
static std::map<std::string, eMTLDataType> glsl_builtin_types = {
{"float", MTL_DATATYPE_FLOAT},
{"vec2", MTL_DATATYPE_FLOAT2},
@ -556,10 +575,17 @@ inline bool is_builtin_type(std::string type)
{"ivec2", MTL_DATATYPE_INT2},
{"ivec3", MTL_DATATYPE_INT3},
{"ivec4", MTL_DATATYPE_INT4},
{"int2", MTL_DATATYPE_INT2},
{"int3", MTL_DATATYPE_INT3},
{"int4", MTL_DATATYPE_INT4},
{"uint32_t", MTL_DATATYPE_UINT},
{"uvec2", MTL_DATATYPE_UINT2},
{"uvec3", MTL_DATATYPE_UINT3},
{"uvec4", MTL_DATATYPE_UINT4},
{"uint", MTL_DATATYPE_UINT},
{"uint2", MTL_DATATYPE_UINT2},
{"uint3", MTL_DATATYPE_UINT3},
{"uint4", MTL_DATATYPE_UINT4},
{"mat3", MTL_DATATYPE_FLOAT3x3},
{"mat4", MTL_DATATYPE_FLOAT4x4},
{"bool", MTL_DATATYPE_INT},

View File

@ -712,8 +712,30 @@ static void print_resource(std::ostream &os, const ShaderCreateInfo::Resource &r
}
break;
}
case ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER:
case ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: {
int64_t array_offset = res.storagebuf.name.find_first_of("[");
bool writeable = (res.storagebuf.qualifiers & shader::Qualifier::WRITE) ==
shader::Qualifier::WRITE;
const char *memory_scope = ((writeable) ? "device " : "constant ");
if (array_offset == -1) {
/* Create local class member as device pointer reference to bound SSBO.
* Given usage within a shader follows ssbo_name.ssbo_element syntax, we can
* dereference the pointer as the compiler will optimize this data fetch.
* To do this, we also give the UBO name a post-fix of `_local` to avoid
* macro accessor collisions. */
os << memory_scope << res.storagebuf.type_name << " *" << res.storagebuf.name
<< "_local;\n";
os << "#define " << res.storagebuf.name << " (*" << res.storagebuf.name << "_local)\n";
}
else {
/* For arrays, we can directly provide the constant access pointer, as the array
* syntax will de-reference this at the correct fetch index. */
StringRef name_no_array = StringRef(res.storagebuf.name.c_str(), array_offset);
os << memory_scope << res.storagebuf.type_name << " *" << name_no_array << ";\n";
}
break;
}
}
}
@ -976,6 +998,9 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
shd_builder_->glsl_fragment_source_.find("gl_FragDepth") !=
std::string::npos;
msl_iface.depth_write = info->depth_write_;
/* Early fragment tests. */
msl_iface.uses_early_fragment_test = info->early_fragment_test_;
}
/* Generate SSBO vertex fetch mode uniform data hooks. */
@ -1002,7 +1027,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
if (msl_iface.use_argument_buffer_for_samplers()) {
ss_vertex << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl;
ss_vertex << "#define ARGUMENT_BUFFER_NUM_SAMPLERS "
<< msl_iface.num_samplers_for_stage(ShaderStage::VERTEX) << std::endl;
<< msl_iface.max_sampler_index_for_stage(ShaderStage::VERTEX) + 1 << std::endl;
}
if (msl_iface.uses_ssbo_vertex_fetch_mode) {
ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl;
@ -1164,7 +1189,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
* struct, as both are needed within texture functions.
* e.g. `_mtl_combined_image_sampler_2d<float, access::read>`
* The exact typename is generated inside `get_msl_typestring_wrapper()`. */
for (const MSLTextureSampler &tex : msl_iface.texture_samplers) {
for (const MSLTextureResource &tex : msl_iface.texture_samplers) {
if (bool(tex.stage & ShaderStage::VERTEX)) {
ss_vertex << "\tthread " << tex.get_msl_typestring_wrapper(false) << ";" << std::endl;
}
@ -1193,7 +1218,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
if (msl_iface.use_argument_buffer_for_samplers()) {
ss_fragment << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl;
ss_fragment << "#define ARGUMENT_BUFFER_NUM_SAMPLERS "
<< msl_iface.num_samplers_for_stage(ShaderStage::FRAGMENT) << std::endl;
<< msl_iface.max_sampler_index_for_stage(ShaderStage::FRAGMENT) + 1 << std::endl;
}
/* Inject common Metal header. */
@ -1277,7 +1302,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
}
/* Add Texture members. */
for (const MSLTextureSampler &tex : msl_iface.texture_samplers) {
for (const MSLTextureResource &tex : msl_iface.texture_samplers) {
if (bool(tex.stage & ShaderStage::FRAGMENT)) {
ss_fragment << "\tthread " << tex.get_msl_typestring_wrapper(false) << ";" << std::endl;
}
@ -1445,7 +1470,7 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
if (msl_iface.use_argument_buffer_for_samplers()) {
ss_compute << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl;
ss_compute << "#define ARGUMENT_BUFFER_NUM_SAMPLERS "
<< msl_iface.num_samplers_for_stage(ShaderStage::COMPUTE) << std::endl;
<< msl_iface.max_sampler_index_for_stage(ShaderStage::COMPUTE) + 1 << std::endl;
}
/* Inject static workgroup sizes. */
@ -1486,7 +1511,7 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
* struct, as both are needed within texture functions.
* e.g. `_mtl_combined_image_sampler_2d<float, access::read>`
* The exact typename is generated inside `get_msl_typestring_wrapper()`. */
for (const MSLTextureSampler &tex : msl_iface.texture_samplers) {
for (const MSLTextureResource &tex : msl_iface.texture_samplers) {
if (bool(tex.stage & ShaderStage::COMPUTE)) {
ss_compute << "\tthread " << tex.get_msl_typestring_wrapper(false) << ";" << std::endl;
}
@ -1563,6 +1588,31 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
this->set_compute_function_name(@"compute_function_entry");
#endif
/* DEBUG: Export source to file for manual verification. */
#if MTL_SHADER_DEBUG_EXPORT_SOURCE
NSFileManager *sharedFM = [NSFileManager defaultManager];
NSURL *app_bundle_url = [[NSBundle mainBundle] bundleURL];
NSURL *shader_dir = [[app_bundle_url URLByDeletingLastPathComponent]
URLByAppendingPathComponent:@"Shaders/"
isDirectory:YES];
[sharedFM createDirectoryAtURL:shader_dir
withIntermediateDirectories:YES
attributes:nil
error:nil];
const char *path_cstr = [shader_dir fileSystemRepresentation];
std::ofstream compute_fs;
compute_fs.open(
(std::string(path_cstr) + "/" + std::string(this->name) + "_GeneratedComputeShader.msl")
.c_str());
compute_fs << ss_compute.str();
compute_fs.close();
shader_debug_printf(
"Compute Shader Saved to: %s\n",
(std::string(path_cstr) + std::string(this->name) + "_GeneratedComputeShader.msl").c_str());
#endif
NSString *msl_final_compute = [NSString stringWithUTF8String:ss_compute.str().c_str()];
this->shader_compute_source_from_msl(msl_final_compute);
@ -1700,8 +1750,11 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
/* NOTE: Metal requires Samplers and images to share slots. We will re-map these.
* If `auto_resource_location_` is not used, then slot collision could occur and
* this should be resolved in the original create-info. */
* this should be resolved in the original create-info.
* UBOs and SSBOs also share the same bind table. */
int texture_slot_id = 0;
int ubo_buffer_slot_id_ = 0;
int storage_buffer_slot_id_ = 0;
/* Determine max sampler slot for image resource offset, when not using auto resource location,
* as image resources cannot overlap sampler ranges. */
@ -1727,13 +1780,6 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
switch (res.bind_type) {
case shader::ShaderCreateInfo::Resource::BindType::SAMPLER: {
/* Re-map sampler slot to share texture indices with images.
* Only applies if `auto_resource_location_` is enabled. */
BLI_assert(res.slot >= 0 && res.slot < MTL_MAX_TEXTURE_SLOTS);
unsigned int used_slot = (create_info_->auto_resource_location_) ? (texture_slot_id++) :
res.slot;
BLI_assert(used_slot < MTL_MAX_TEXTURE_SLOTS);
/* Samplers to have access::sample by default. */
MSLTextureSamplerAccess access = MSLTextureSamplerAccess::TEXTURE_ACCESS_SAMPLE;
/* TextureBuffers must have read/write/read-write access pattern. */
@ -1744,22 +1790,21 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ;
}
MSLTextureSampler msl_tex(
ShaderStage::ANY, res.sampler.type, res.sampler.name, access, used_slot);
MSLTextureResource msl_tex;
msl_tex.stage = ShaderStage::ANY;
msl_tex.type = res.sampler.type;
msl_tex.name = res.sampler.name;
msl_tex.access = access;
msl_tex.slot = texture_slot_id++;
msl_tex.location = (create_info_->auto_resource_location_) ? msl_tex.slot : res.slot;
msl_tex.is_texture_sampler = true;
BLI_assert(msl_tex.slot < MTL_MAX_TEXTURE_SLOTS);
texture_samplers.append(msl_tex);
max_tex_bind_index = max_ii(max_tex_bind_index, msl_tex.slot);
} break;
case shader::ShaderCreateInfo::Resource::BindType::IMAGE: {
/* Re-map sampler slot to share texture indices with samplers.
* Automatically applies if `auto_resource_location_` is enabled.
* Otherwise, if not using automatic resource location, offset by max sampler slot. */
BLI_assert(res.slot >= 0 && res.slot < MTL_MAX_TEXTURE_SLOTS);
unsigned int used_slot = (create_info_->auto_resource_location_) ?
(texture_slot_id++) :
res.slot + max_sampler_slot + 1;
BLI_assert(used_slot < MTL_MAX_TEXTURE_SLOTS);
/* Flatten qualifier flags into final access state. */
MSLTextureSamplerAccess access;
if (bool(res.image.qualifiers & Qualifier::READ_WRITE)) {
@ -1771,29 +1816,39 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
else {
access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ;
}
BLI_assert(used_slot >= 0 && used_slot < MTL_MAX_TEXTURE_SLOTS);
/* Writeable image targets only assigned to Fragment and compute shaders. */
MSLTextureSampler msl_tex(ShaderStage::FRAGMENT | ShaderStage::COMPUTE,
res.image.type,
res.image.name,
access,
used_slot);
texture_samplers.append(msl_tex);
MSLTextureResource msl_image;
msl_image.stage = ShaderStage::FRAGMENT | ShaderStage::COMPUTE;
msl_image.type = res.image.type;
msl_image.name = res.image.name;
msl_image.access = access;
msl_image.slot = texture_slot_id++;
msl_image.location = (create_info_->auto_resource_location_) ? msl_image.slot : res.slot;
msl_image.is_texture_sampler = false;
BLI_assert(msl_image.slot < MTL_MAX_TEXTURE_SLOTS);
texture_samplers.append(msl_image);
max_tex_bind_index = max_ii(max_tex_bind_index, msl_image.slot);
} break;
case shader::ShaderCreateInfo::Resource::BindType::UNIFORM_BUFFER: {
MSLUniformBlock ubo;
MSLBufferBlock ubo;
BLI_assert(res.uniformbuf.type_name.size() > 0);
BLI_assert(res.uniformbuf.name.size() > 0);
int64_t array_offset = res.uniformbuf.name.find_first_of("[");
/* UBO should either use an existing declared UBO bind slot, or automatically resolve
* index. */
ubo.slot = (create_info_->auto_resource_location_) ? uniform_blocks.size() : res.slot;
BLI_assert(ubo.slot >= 0 && ubo.slot < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
max_ubo_slot = max_ii(max_ubo_slot, ubo.slot);
/* We maintain two bind indices. "Slot" refers to the storage index buffer(N) in which
* we will bind the resource. "Location" refers to the explicit bind index specified
* in ShaderCreateInfo.
* NOTE: ubo.slot is offset by one, as first UBO slot is reserved for push constant data.
*/
ubo.slot = 1 + (ubo_buffer_slot_id_++);
ubo.location = (create_info_->auto_resource_location_) ? ubo.slot : res.slot;
BLI_assert(ubo.location >= 0 && ubo.location < MTL_MAX_BUFFER_BINDINGS);
ubo.qualifiers = shader::Qualifier::READ;
ubo.type_name = res.uniformbuf.type_name;
ubo.is_array = (array_offset > -1);
if (ubo.is_array) {
@ -1809,13 +1864,40 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
} break;
case shader::ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: {
/* TODO(Metal): Support shader storage buffer in Metal.
* Pending compute support. */
MSLBufferBlock ssbo;
BLI_assert(res.storagebuf.type_name.size() > 0);
BLI_assert(res.storagebuf.name.size() > 0);
int64_t array_offset = res.storagebuf.name.find_first_of("[");
/* We maintain two bind indices. "Slot" refers to the storage index buffer(N) in which
* we will bind the resource. "Location" refers to the explicit bind index specified
* in ShaderCreateInfo. */
ssbo.slot = storage_buffer_slot_id_++;
ssbo.location = (create_info_->auto_resource_location_) ? ssbo.slot : res.slot;
BLI_assert(ssbo.location >= 0 && ssbo.location < MTL_MAX_BUFFER_BINDINGS);
ssbo.qualifiers = res.storagebuf.qualifiers;
ssbo.type_name = res.storagebuf.type_name;
ssbo.is_array = (array_offset > -1);
if (ssbo.is_array) {
/* If is array UBO, strip out array tag from name. */
StringRef name_no_array = StringRef(res.storagebuf.name.c_str(), array_offset);
ssbo.name = name_no_array;
}
else {
ssbo.name = res.storagebuf.name;
}
ssbo.stage = ShaderStage::ANY;
storage_blocks.append(ssbo);
} break;
}
}
}
/* Assign maximum buffer. */
max_buffer_slot = storage_buffer_slot_id_ + ubo_buffer_slot_id_ + 1;
/** Vertex Inputs. */
bool all_attr_location_assigned = true;
for (const ShaderCreateInfo::VertIn &attr : info->vertex_inputs_) {
@ -1865,10 +1947,28 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
bool MSLGeneratorInterface::use_argument_buffer_for_samplers() const
{
/* We can only use argument buffers IF sampler count exceeds static limit of 16,
* AND we can support more samplers with an argument buffer.
* NOTE: We reserve one constant sampler within the shader for fast read via point-sampling. */
return texture_samplers.size() >= 15 && GPU_max_samplers() > 16;
/* We can only use argument buffers IF highest sampler index exceeds static limit of 16,
* AND we can support more samplers with an argument buffer. */
bool use_argument_buffer = (texture_samplers.size() >= 15 || max_tex_bind_index >= 14) &&
GPU_max_samplers() > 15;
#ifndef NDEBUG
/* Due to explicit bind location support, we may be below the sampler limit, but forced to offset
* bindings due to the range being high. Introduce debug check here to issue warning. In these
* cases, if explicit bind location support is not required, best to use auto_resource_location
* to optimize bind point packing. */
if (use_argument_buffer && texture_samplers.size() < 15) {
MTL_LOG_WARNING(
"Compiled Shader '%s' is falling back to bindless via argument buffers due to having a "
"texture sampler of Index: %u Which exceeds the limit of 15+1. However shader only uses "
"%d textures. Consider optimising bind points with .auto_resource_location(true).\n",
parent_shader_.name_get(),
max_tex_bind_index,
(int)texture_samplers.size());
}
#endif
return use_argument_buffer;
}
uint32_t MSLGeneratorInterface::num_samplers_for_stage(ShaderStage stage) const
@ -1878,6 +1978,13 @@ uint32_t MSLGeneratorInterface::num_samplers_for_stage(ShaderStage stage) const
return texture_samplers.size();
}
uint32_t MSLGeneratorInterface::max_sampler_index_for_stage(ShaderStage stage) const
{
/* NOTE: Sampler bindings and argument buffer shared across stages,
* in case stages share texture/sampler bindings. */
return max_tex_bind_index;
}
uint32_t MSLGeneratorInterface::get_sampler_argument_buffer_bind_index(ShaderStage stage)
{
/* Note: Shader stage must be a singular index. Compound shader masks are not valid for this
@ -1889,7 +1996,7 @@ uint32_t MSLGeneratorInterface::get_sampler_argument_buffer_bind_index(ShaderSta
}
/* Sampler argument buffer to follow UBOs and PushConstantBlock. */
sampler_argument_buffer_bind_index[get_shader_stage_index(stage)] = (max_ubo_slot + 2);
sampler_argument_buffer_bind_index[get_shader_stage_index(stage)] = (max_buffer_slot + 1);
return sampler_argument_buffer_bind_index[get_shader_stage_index(stage)];
}
@ -2014,6 +2121,11 @@ std::string MSLGeneratorInterface::generate_msl_fragment_entry_stub()
/* Undefine uniform mappings to avoid name collisions. */
out << generate_msl_uniform_undefs(ShaderStage::FRAGMENT);
/* Early fragment tests. */
if (uses_early_fragment_test) {
out << "[[early_fragment_tests]]" << std::endl;
}
/* Generate function entry point signature w/ resource bindings and inputs. */
#ifndef NDEBUG
out << "fragment " << get_stage_class_name(ShaderStage::FRAGMENT)
@ -2138,7 +2250,6 @@ std::string MSLGeneratorInterface::generate_msl_compute_entry_stub()
out << this->generate_msl_texture_vars(ShaderStage::COMPUTE);
out << this->generate_msl_global_uniform_population(ShaderStage::COMPUTE);
out << this->generate_msl_uniform_block_population(ShaderStage::COMPUTE);
/* TODO(Metal): SSBO Population. */
/* Execute original 'main' function within class scope. */
out << "\t/* Execute Compute main function */\t" << std::endl
@ -2170,10 +2281,10 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
stage == ShaderStage::COMPUTE);
/* Generate texture signatures for textures used by this stage. */
BLI_assert(this->texture_samplers.size() <= GPU_max_textures_vert());
for (const MSLTextureSampler &tex : this->texture_samplers) {
for (const MSLTextureResource &tex : this->texture_samplers) {
if (bool(tex.stage & stage)) {
out << parameter_delimiter(is_first_parameter) << "\n\t" << tex.get_msl_typestring(false)
<< " [[texture(" << tex.location << ")]]";
<< " [[texture(" << tex.slot << ")]]";
}
}
@ -2191,10 +2302,10 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
/* Maximum Limit of samplers defined in the function argument table is
* `MTL_MAX_DEFAULT_SAMPLERS=16`. */
BLI_assert(this->texture_samplers.size() <= MTL_MAX_DEFAULT_SAMPLERS);
for (const MSLTextureSampler &tex : this->texture_samplers) {
for (const MSLTextureResource &tex : this->texture_samplers) {
if (bool(tex.stage & stage)) {
out << parameter_delimiter(is_first_parameter) << "\n\tsampler " << tex.name
<< "_sampler [[sampler(" << tex.location << ")]]";
<< "_sampler [[sampler(" << tex.slot << ")]]";
}
}
@ -2211,7 +2322,7 @@ void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream
ShaderStage stage,
bool &is_first_parameter)
{
for (const MSLUniformBlock &ubo : this->uniform_blocks) {
for (const MSLBufferBlock &ubo : this->uniform_blocks) {
if (bool(ubo.stage & stage)) {
/* For literal/existing global types, we do not need the class name-space accessor. */
out << parameter_delimiter(is_first_parameter) << "\n\tconstant ";
@ -2223,7 +2334,27 @@ void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream
* MTL_uniform_buffer_base_index is an offset depending on the number of unique VBOs
* bound for the current PSO specialization. */
out << ubo.type_name << "* " << ubo.name << "[[buffer(MTL_uniform_buffer_base_index+"
<< (ubo.slot + 1) << ")]]";
<< ubo.slot << ")]]";
}
}
/* Storage buffers. */
for (const MSLBufferBlock &ssbo : this->storage_blocks) {
if (bool(ssbo.stage & stage)) {
/* For literal/existing global types, we do not need the class name-space accessor. */
bool writeable = (ssbo.qualifiers & shader::Qualifier::WRITE) == shader::Qualifier::WRITE;
const char *memory_scope = ((writeable) ? "device " : "constant ");
out << parameter_delimiter(is_first_parameter) << "\n\t" << memory_scope;
if (!is_builtin_type(ssbo.type_name)) {
out << get_stage_class_name(stage) << "::";
}
/* #StorageBuffer bind indices start at `MTL_storage_buffer_base_index`.
* MTL_storage_buffer_base_index follows immediately after all uniform blocks.
* such that MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index +
* uniform_blocks.size() + 1. Where the additional buffer is reserved for the
* #PushConstantBlock (push constants). */
out << ssbo.type_name << "* " << ssbo.name << "[[buffer(MTL_storage_buffer_base_index+"
<< (ssbo.slot) << ")]]";
}
}
}
@ -2411,9 +2542,13 @@ std::string MSLGeneratorInterface::generate_msl_uniform_undefs(ShaderStage shade
out << "#undef " << uniform.name << std::endl;
}
/* UBO block undef. */
for (const MSLUniformBlock &ubo : this->uniform_blocks) {
for (const MSLBufferBlock &ubo : this->uniform_blocks) {
out << "#undef " << ubo.name << std::endl;
}
/* SSBO block undef. */
for (const MSLBufferBlock &ssbo : this->storage_blocks) {
out << "#undef " << ssbo.name << std::endl;
}
return out.str();
}
@ -2698,7 +2833,7 @@ std::string MSLGeneratorInterface::generate_msl_uniform_block_population(ShaderS
/* Populate Global Uniforms. */
std::stringstream out;
out << "\t/* Copy UBO block references into local class variables */" << std::endl;
for (const MSLUniformBlock &ubo : this->uniform_blocks) {
for (const MSLBufferBlock &ubo : this->uniform_blocks) {
/* Only include blocks which are used within this stage. */
if (bool(ubo.stage & stage)) {
@ -2714,6 +2849,26 @@ std::string MSLGeneratorInterface::generate_msl_uniform_block_population(ShaderS
out << " = " << ubo.name << ";" << std::endl;
}
}
/* Populate storage buffer references. */
out << "\t/* Copy SSBO block references into local class variables */" << std::endl;
for (const MSLBufferBlock &ssbo : this->storage_blocks) {
/* Only include blocks which are used within this stage. */
if (bool(ssbo.stage & stage)) {
/* Generate UBO reference assignment.
* NOTE(Metal): We append `_local` post-fix onto the class member name
* for the ubo to avoid name collision with the UBO accessor macro.
* We only need to add this post-fix for the non-array access variant,
* as the array is indexed directly, rather than requiring a dereference. */
out << "\t" << get_shader_stage_instance_name(stage) << "." << ssbo.name;
if (!ssbo.is_array) {
out << "_local";
}
out << " = " << ssbo.name << ";" << std::endl;
}
}
out << std::endl;
return out.str();
}
@ -3080,7 +3235,7 @@ std::string MSLGeneratorInterface::generate_msl_texture_vars(ShaderStage shader_
if (this->use_argument_buffer_for_samplers()) {
out << "\t" << get_shader_stage_instance_name(shader_stage) << "."
<< this->texture_samplers[i].name << ".samp = &samplers.sampler_args["
<< this->texture_samplers[i].location << "];" << std::endl;
<< this->texture_samplers[i].slot << "];" << std::endl;
}
else {
out << "\t" << get_shader_stage_instance_name(shader_stage) << "."
@ -3302,20 +3457,36 @@ MTLShaderInterface *MSLGeneratorInterface::bake_shader_interface(const char *nam
name_buffer_size,
name_buffer_offset),
this->uniform_blocks[uniform_block].slot,
this->uniform_blocks[uniform_block].location,
0,
this->uniform_blocks[uniform_block].stage);
}
/* Prepare Interface Storage Blocks. */
for (int storage_block = 0; storage_block < this->storage_blocks.size(); storage_block++) {
interface->add_storage_block(
name_buffer_copystr(&interface->name_buffer_,
this->storage_blocks[storage_block].name.c_str(),
name_buffer_size,
name_buffer_offset),
this->storage_blocks[storage_block].slot,
this->storage_blocks[storage_block].location,
0,
this->storage_blocks[storage_block].stage);
}
/* Texture/sampler bindings to interface. */
for (const MSLTextureSampler &texture_sampler : this->texture_samplers) {
for (const MSLTextureResource &input_texture : this->texture_samplers) {
interface->add_texture(name_buffer_copystr(&interface->name_buffer_,
texture_sampler.name.c_str(),
input_texture.name.c_str(),
name_buffer_size,
name_buffer_offset),
texture_sampler.location,
texture_sampler.get_texture_binding_type(),
texture_sampler.get_sampler_format(),
texture_sampler.stage);
input_texture.slot,
input_texture.location,
input_texture.get_texture_binding_type(),
input_texture.get_sampler_format(),
input_texture.is_texture_sampler,
input_texture.stage);
}
/* Sampler Parameters. */
@ -3336,7 +3507,7 @@ MTLShaderInterface *MSLGeneratorInterface::bake_shader_interface(const char *nam
return interface;
}
std::string MSLTextureSampler::get_msl_texture_type_str() const
std::string MSLTextureResource::get_msl_texture_type_str() const
{
/* Add Types as needed. */
switch (this->type) {
@ -3444,7 +3615,7 @@ std::string MSLTextureSampler::get_msl_texture_type_str() const
};
}
std::string MSLTextureSampler::get_msl_wrapper_type_str() const
std::string MSLTextureResource::get_msl_wrapper_type_str() const
{
/* Add Types as needed. */
switch (this->type) {
@ -3552,7 +3723,7 @@ std::string MSLTextureSampler::get_msl_wrapper_type_str() const
};
}
std::string MSLTextureSampler::get_msl_return_type_str() const
std::string MSLTextureResource::get_msl_return_type_str() const
{
/* Add Types as needed */
switch (this->type) {
@ -3607,7 +3778,7 @@ std::string MSLTextureSampler::get_msl_return_type_str() const
};
}
eGPUTextureType MSLTextureSampler::get_texture_binding_type() const
eGPUTextureType MSLTextureResource::get_texture_binding_type() const
{
/* Add Types as needed */
switch (this->type) {
@ -3714,7 +3885,7 @@ eGPUTextureType MSLTextureSampler::get_texture_binding_type() const
};
}
eGPUSamplerFormat MSLTextureSampler::get_sampler_format() const
eGPUSamplerFormat MSLTextureResource::get_sampler_format() const
{
switch (this->type) {
case ImageType::FLOAT_BUFFER:

View File

@ -107,12 +107,13 @@ struct MTLShaderInputAttribute {
uint32_t matrix_element_count;
};
struct MTLShaderUniformBlock {
struct MTLShaderBufferBlock {
uint32_t name_offset;
uint32_t size = 0;
/* Buffer resource bind index in shader `[[buffer(index)]]`. */
uint32_t buffer_index;
/* Explicit bind location for texture. */
int location;
/* Tracking for manual uniform addition. */
uint32_t current_offset;
ShaderStage stage_mask;
@ -120,7 +121,7 @@ struct MTLShaderUniformBlock {
struct MTLShaderUniform {
uint32_t name_offset;
/* Index of `MTLShaderUniformBlock` this uniform belongs to. */
/* Index of `MTLShaderBufferBlock` this uniform belongs to. */
uint32_t size_in_bytes;
uint32_t byte_offset;
eMTLDataType type;
@ -132,9 +133,13 @@ struct MTLShaderTexture {
uint32_t name_offset;
/* Texture resource bind slot in shader `[[texture(n)]]`. */
int slot_index;
/* Explicit bind location for texture. */
int location;
eGPUTextureType type;
eGPUSamplerFormat sampler_format;
ShaderStage stage_mask;
/* Whether texture resource is expected to be image or sampler. */
bool is_texture_sampler;
};
struct MTLShaderSampler {
@ -173,8 +178,13 @@ class MTLShaderInterface : public ShaderInterface {
/* Uniform Blocks. */
uint32_t total_uniform_blocks_;
uint32_t max_uniformbuf_index_;
MTLShaderUniformBlock ubos_[MTL_MAX_UNIFORM_BUFFER_BINDINGS];
MTLShaderUniformBlock push_constant_block_;
MTLShaderBufferBlock ubos_[MTL_MAX_BUFFER_BINDINGS];
MTLShaderBufferBlock push_constant_block_;
/* Storage blocks. */
uint32_t total_storage_blocks_;
uint32_t max_storagebuf_index_;
MTLShaderBufferBlock ssbos_[MTL_MAX_BUFFER_BINDINGS];
/* Textures. */
/* Textures support explicit binding indices, so some texture slots
@ -207,13 +217,21 @@ class MTLShaderInterface : public ShaderInterface {
int matrix_element_count = 1);
uint32_t add_uniform_block(uint32_t name_offset,
uint32_t buffer_index,
uint32_t location,
uint32_t size,
ShaderStage stage_mask = ShaderStage::ANY);
uint32_t add_storage_block(uint32_t name_offset,
uint32_t buffer_index,
uint32_t location,
uint32_t size,
ShaderStage stage_mask = ShaderStage::ANY);
void add_uniform(uint32_t name_offset, eMTLDataType type, int array_len = 1);
void add_texture(uint32_t name_offset,
uint32_t texture_slot,
uint32_t location,
eGPUTextureType tex_binding_type,
eGPUSamplerFormat sampler_format,
bool is_texture_sampler,
ShaderStage stage_mask = ShaderStage::FRAGMENT);
void add_push_constant_block(uint32_t name_offset);
@ -232,14 +250,20 @@ class MTLShaderInterface : public ShaderInterface {
uint32_t get_total_uniforms() const;
/* Fetch Uniform Blocks. */
const MTLShaderUniformBlock &get_uniform_block(uint index) const;
const MTLShaderBufferBlock &get_uniform_block(uint index) const;
uint32_t get_total_uniform_blocks() const;
uint32_t get_max_ubo_index() const;
bool has_uniform_block(uint32_t block_index) const;
uint32_t get_uniform_block_size(uint32_t block_index) const;
/* Fetch Storage Blocks. */
const MTLShaderBufferBlock &get_storage_block(uint index) const;
uint32_t get_total_storage_blocks() const;
bool has_storage_block(uint32_t block_index) const;
uint32_t get_storage_block_size(uint32_t block_index) const;
/* Push constant uniform data block should always be available. */
const MTLShaderUniformBlock &get_push_constant_block() const;
const MTLShaderBufferBlock &get_push_constant_block() const;
uint32_t get_max_buffer_index() const;
/* Fetch textures. */
const MTLShaderTexture &get_texture(uint index) const;

View File

@ -56,6 +56,8 @@ void MTLShaderInterface::init()
total_attributes_ = 0;
total_uniform_blocks_ = 0;
max_uniformbuf_index_ = 0;
total_storage_blocks_ = 0;
max_storagebuf_index_ = 0;
total_uniforms_ = 0;
total_textures_ = 0;
max_texture_index_ = -1;
@ -109,6 +111,7 @@ void MTLShaderInterface::add_input_attribute(uint32_t name_offset,
uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset,
uint32_t buffer_index,
uint32_t location,
uint32_t size,
ShaderStage stage_mask)
{
@ -117,9 +120,12 @@ uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset,
size += 16 - (size % 16);
}
MTLShaderUniformBlock &uni_block = ubos_[total_uniform_blocks_];
BLI_assert(buffer_index < MTL_MAX_BUFFER_BINDINGS);
MTLShaderBufferBlock &uni_block = ubos_[total_uniform_blocks_];
uni_block.name_offset = name_offset;
uni_block.buffer_index = buffer_index;
uni_block.location = location;
uni_block.size = size;
uni_block.current_offset = 0;
uni_block.stage_mask = ShaderStage::ANY;
@ -127,6 +133,30 @@ uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset,
return (total_uniform_blocks_++);
}
uint32_t MTLShaderInterface::add_storage_block(uint32_t name_offset,
uint32_t buffer_index,
uint32_t location,
uint32_t size,
ShaderStage stage_mask)
{
/* Ensure Size is 16 byte aligned to guarantees alignment rules are satisfied. */
if ((size % 16) != 0) {
size += 16 - (size % 16);
}
BLI_assert(buffer_index < MTL_MAX_BUFFER_BINDINGS);
MTLShaderBufferBlock &ssbo_block = ssbos_[total_storage_blocks_];
ssbo_block.name_offset = name_offset;
ssbo_block.buffer_index = buffer_index;
ssbo_block.location = location;
ssbo_block.size = size;
ssbo_block.current_offset = 0;
ssbo_block.stage_mask = ShaderStage::ANY;
max_storagebuf_index_ = max_ii(max_storagebuf_index_, buffer_index);
return (total_storage_blocks_++);
}
void MTLShaderInterface::add_push_constant_block(uint32_t name_offset)
{
push_constant_block_.name_offset = name_offset;
@ -189,8 +219,10 @@ void MTLShaderInterface::add_uniform(uint32_t name_offset, eMTLDataType type, in
void MTLShaderInterface::add_texture(uint32_t name_offset,
uint32_t texture_slot,
uint32_t location,
eGPUTextureType tex_binding_type,
eGPUSamplerFormat sampler_format,
bool is_texture_sampler,
ShaderStage stage_mask)
{
BLI_assert(texture_slot >= 0 && texture_slot < GPU_max_textures());
@ -201,8 +233,10 @@ void MTLShaderInterface::add_texture(uint32_t name_offset,
BLI_assert_msg(tex.used == false, "Texture slot already in-use by another binding");
tex.name_offset = name_offset;
tex.slot_index = texture_slot;
tex.location = location;
tex.type = tex_binding_type;
tex.sampler_format = sampler_format;
tex.is_texture_sampler = is_texture_sampler;
tex.stage_mask = stage_mask;
tex.used = true;
total_textures_++;
@ -272,9 +306,7 @@ void MTLShaderInterface::prepare_common_shader_inputs()
attr_len_ = this->get_total_attributes();
ubo_len_ = this->get_total_uniform_blocks();
uniform_len_ = this->get_total_uniforms() + this->get_total_textures();
/* TODO(Metal): Support storage buffer bindings. Pending compute shader support. */
ssbo_len_ = 0;
ssbo_len_ = this->get_total_storage_blocks();
/* Calculate total inputs and allocate #ShaderInput array. */
/* NOTE: We use the existing `name_buffer_` allocated for internal input structs. */
@ -300,16 +332,17 @@ void MTLShaderInterface::prepare_common_shader_inputs()
BLI_assert(&inputs_[attr_len_] >= current_input);
current_input = &inputs_[attr_len_];
for (const int ubo_index : IndexRange(total_uniform_blocks_)) {
MTLShaderUniformBlock &shd_ubo = ubos_[ubo_index];
MTLShaderBufferBlock &shd_ubo = ubos_[ubo_index];
current_input->name_offset = shd_ubo.name_offset;
current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ubo.name_offset));
/* Location refers to the index in the ubos_ array. */
current_input->location = ubo_index;
current_input->location = shd_ubo.location;
/* Binding location refers to the UBO bind slot in
* #MTLContextGlobalShaderPipelineState::ubo_bindings. The buffer bind index [[buffer(N)]]
* within the shader will apply an offset for bound vertex buffers and the default uniform
* PushConstantBlock. */
current_input->binding = shd_ubo.buffer_index;
* PushConstantBlock.
* see `mtl_shader_generator.hh` for buffer binding table breakdown. */
current_input->binding = shd_ubo.location;
current_input++;
}
@ -352,15 +385,24 @@ void MTLShaderInterface::prepare_common_shader_inputs()
current_input->location = texture_index + total_uniforms_;
/* Binding represents texture slot `[[texture(n)]]`. */
current_input->binding = shd_tex.slot_index;
current_input->binding = shd_tex.location;
current_input++;
}
}
/* SSBO bindings.
* TODO(Metal): Support SSBOs. Pending compute support. */
/* SSBO bindings. */
BLI_assert(&inputs_[attr_len_ + ubo_len_ + uniform_len_] >= current_input);
current_input = &inputs_[attr_len_ + ubo_len_ + uniform_len_];
BLI_assert(ssbo_len_ >= total_storage_blocks_);
for (const int ssbo_index : IndexRange(total_storage_blocks_)) {
MTLShaderBufferBlock &shd_ssbo = ssbos_[ssbo_index];
current_input->name_offset = shd_ssbo.name_offset;
current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ssbo.name_offset));
/* `Location` is used as the returned explicit bind index for SSBOs. */
current_input->location = shd_ssbo.location;
current_input->binding = shd_ssbo.location;
current_input++;
}
this->sort_inputs();
@ -419,14 +461,14 @@ uint32_t MTLShaderInterface::get_total_uniforms() const
}
/* Uniform Blocks. */
const MTLShaderUniformBlock &MTLShaderInterface::get_uniform_block(uint index) const
const MTLShaderBufferBlock &MTLShaderInterface::get_uniform_block(uint index) const
{
BLI_assert(index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
BLI_assert(index < MTL_MAX_BUFFER_BINDINGS);
BLI_assert(index < get_total_uniform_blocks());
return ubos_[index];
}
const MTLShaderUniformBlock &MTLShaderInterface::get_push_constant_block() const
const MTLShaderBufferBlock &MTLShaderInterface::get_push_constant_block() const
{
return push_constant_block_;
}
@ -436,11 +478,6 @@ uint32_t MTLShaderInterface::get_total_uniform_blocks() const
return total_uniform_blocks_;
}
uint32_t MTLShaderInterface::get_max_ubo_index() const
{
return max_uniformbuf_index_;
}
bool MTLShaderInterface::has_uniform_block(uint32_t block_index) const
{
return (block_index < total_uniform_blocks_);
@ -451,6 +488,35 @@ uint32_t MTLShaderInterface::get_uniform_block_size(uint32_t block_index) const
return (block_index < total_uniform_blocks_) ? ubos_[block_index].size : 0;
}
/* Storage Blocks. */
const MTLShaderBufferBlock &MTLShaderInterface::get_storage_block(uint index) const
{
BLI_assert(index < MTL_MAX_BUFFER_BINDINGS);
BLI_assert(index < get_total_storage_blocks());
return ssbos_[index];
}
uint32_t MTLShaderInterface::get_total_storage_blocks() const
{
return total_storage_blocks_;
}
bool MTLShaderInterface::has_storage_block(uint32_t block_index) const
{
return (block_index < total_storage_blocks_);
}
uint32_t MTLShaderInterface::get_storage_block_size(uint32_t block_index) const
{
return (block_index < total_storage_blocks_) ? ssbos_[block_index].size : 0;
}
uint32_t MTLShaderInterface::get_max_buffer_index() const
{
/* PushConstantBlock + All uniform blocks + all storage blocks. */
return 1 + get_total_uniform_blocks() + get_total_storage_blocks();
}
/* Textures. */
const MTLShaderTexture &MTLShaderInterface::get_texture(uint index) const
{

View File

@ -573,7 +573,7 @@ void MTLStateManager::issue_barrier(eGPUBarrier barrier_bits)
eGPUStageBarrierBits before_stages = GPU_BARRIER_STAGE_ANY;
eGPUStageBarrierBits after_stages = GPU_BARRIER_STAGE_ANY;
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
ctx->main_command_buffer.insert_memory_barrier(barrier_bits, before_stages, after_stages);
@ -651,7 +651,7 @@ void MTLStateManager::texture_bind(Texture *tex_, GPUSamplerState sampler_type,
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
if (unit >= 0) {
ctx->texture_bind(mtl_tex, unit);
ctx->texture_bind(mtl_tex, unit, false);
/* Fetching textures default sampler configuration and applying
* eGPUSampler State on top. This path exists to support
@ -670,14 +670,14 @@ void MTLStateManager::texture_unbind(Texture *tex_)
gpu::MTLTexture *mtl_tex = static_cast<gpu::MTLTexture *>(tex_);
BLI_assert(mtl_tex);
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
ctx->texture_unbind(mtl_tex);
ctx->texture_unbind(mtl_tex, false);
}
void MTLStateManager::texture_unbind_all()
{
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
ctx->texture_unbind_all();
ctx->texture_unbind_all(false);
}
/** \} */
@ -688,17 +688,30 @@ void MTLStateManager::texture_unbind_all()
void MTLStateManager::image_bind(Texture *tex_, int unit)
{
this->texture_bind(tex_, GPUSamplerState::default_sampler(), unit);
BLI_assert(tex_);
gpu::MTLTexture *mtl_tex = static_cast<gpu::MTLTexture *>(tex_);
BLI_assert(mtl_tex);
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
if (unit >= 0) {
ctx->texture_bind(mtl_tex, unit, true);
}
}
void MTLStateManager::image_unbind(Texture *tex_)
{
this->texture_unbind(tex_);
BLI_assert(tex_);
gpu::MTLTexture *mtl_tex = static_cast<gpu::MTLTexture *>(tex_);
BLI_assert(mtl_tex);
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
ctx->texture_unbind(mtl_tex, true);
}
void MTLStateManager::image_unbind_all()
{
this->texture_unbind_all();
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
ctx->texture_unbind_all(true);
}
/** \} */

View File

@ -4,3 +4,82 @@
/** \file
* \ingroup gpu
*/
#pragma once
#include "MEM_guardedalloc.h"
#include "gpu_storage_buffer_private.hh"
#include "mtl_context.hh"
namespace blender {
namespace gpu {
class MTLUniformBuf;
class MTLVertBuf;
class MTLIndexBuf;
/**
* Implementation of Storage Buffers using Metal.
*/
class MTLStorageBuf : public StorageBuf {
private:
/** Allocation Handle or indirect wrapped instance.
* MTLStorageBuf can wrap a MTLVertBuf, MTLIndexBuf or MTLUniformBuf for binding as a writeable
* resource. */
enum {
MTL_STORAGE_BUF_TYPE_DEFAULT = 0,
MTL_STORAGE_BUF_TYPE_UNIFORMBUF = 1,
MTL_STORAGE_BUF_TYPE_VERTBUF = 2,
MTL_STORAGE_BUF_TYPE_INDEXBUF = 3,
} storage_source_ = MTL_STORAGE_BUF_TYPE_DEFAULT;
union {
/* Own alloation. */
gpu::MTLBuffer *metal_buffer_;
/* Wrapped type. */
MTLUniformBuf *uniform_buffer_;
MTLVertBuf *vertex_buffer_;
MTLIndexBuf *index_buffer_;
};
/* Whether buffer has contents, if false, no GPU buffer will
* have yet been allocated. */
bool has_data_ = false;
/** Bind-state tracking. */
int bind_slot_ = -1;
MTLContext *bound_ctx_ = nullptr;
/** Usage type. */
GPUUsageType usage_;
public:
MTLStorageBuf(size_t size, GPUUsageType usage, const char *name);
~MTLStorageBuf();
MTLStorageBuf(MTLUniformBuf *uniform_buf, size_t size);
MTLStorageBuf(MTLVertBuf *uniform_buf, size_t size);
MTLStorageBuf(MTLIndexBuf *uniform_buf, size_t size);
void update(const void *data) override;
void bind(int slot) override;
void unbind() override;
void clear(uint32_t clear_value) override;
void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) override;
void read(void *data) override;
void init();
id<MTLBuffer> get_metal_buffer();
int get_size();
const char *get_name()
{
return name_;
}
private:
MEM_CXX_CLASS_ALLOC_FUNCS("MTLStorageBuf");
};
} // namespace gpu
} // namespace blender

View File

@ -4,3 +4,302 @@
/** \file
* \ingroup gpu
*/
#include "BLI_string.h"
#include "gpu_backend.hh"
#include "gpu_context_private.hh"
#include "mtl_backend.hh"
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "mtl_index_buffer.hh"
#include "mtl_storage_buffer.hh"
#include "mtl_uniform_buffer.hh"
#include "mtl_vertex_buffer.hh"
namespace blender::gpu {
/* -------------------------------------------------------------------- */
/** \name Creation & Deletion
* \{ */
MTLStorageBuf::MTLStorageBuf(size_t size, GPUUsageType usage, const char *name)
: StorageBuf(size, name)
{
usage_ = usage;
/* Do not create SSBO MTL buffer here to allow allocation from any thread. */
storage_source_ = MTL_STORAGE_BUF_TYPE_DEFAULT;
metal_buffer_ = nullptr;
}
MTLStorageBuf::MTLStorageBuf(MTLUniformBuf *uniform_buf, size_t size)
: StorageBuf(size, "UniformBuffer_as_SSBO")
{
usage_ = GPU_USAGE_DYNAMIC;
storage_source_ = MTL_STORAGE_BUF_TYPE_UNIFORMBUF;
uniform_buffer_ = uniform_buf;
BLI_assert(uniform_buffer_ != nullptr);
}
MTLStorageBuf::MTLStorageBuf(MTLVertBuf *vert_buf, size_t size)
: StorageBuf(size, "VertexBuffer_as_SSBO")
{
usage_ = GPU_USAGE_DYNAMIC;
storage_source_ = MTL_STORAGE_BUF_TYPE_VERTBUF;
vertex_buffer_ = vert_buf;
BLI_assert(vertex_buffer_ != nullptr);
}
MTLStorageBuf::MTLStorageBuf(MTLIndexBuf *index_buf, size_t size)
: StorageBuf(size, "IndexBuffer_as_SSBO")
{
usage_ = GPU_USAGE_DYNAMIC;
storage_source_ = MTL_STORAGE_BUF_TYPE_INDEXBUF;
index_buffer_ = index_buf;
BLI_assert(index_buffer_ != nullptr);
}
MTLStorageBuf::~MTLStorageBuf()
{
if (storage_source_ == MTL_STORAGE_BUF_TYPE_DEFAULT) {
if (metal_buffer_ != nullptr) {
metal_buffer_->free();
metal_buffer_ = nullptr;
}
has_data_ = false;
}
/* Ensure SSBO is not bound to active CTX.
* SSBO bindings are reset upon Context-switch so we do not need
* to check deactivated context's. */
MTLContext *ctx = MTLContext::get();
if (ctx) {
for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) {
MTLStorageBufferBinding &slot = ctx->pipeline_state.ssbo_bindings[i];
if (slot.bound && slot.ssbo == this) {
slot.bound = false;
slot.ssbo = nullptr;
}
}
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Data upload / update
* \{ */
void MTLStorageBuf::init()
{
/* We only need to initialize the storage buffer for default buffer types. */
if (storage_source_ != MTL_STORAGE_BUF_TYPE_DEFAULT) {
return;
}
BLI_assert(this);
BLI_assert(size_in_bytes_ > 0);
/* Allocate MTL buffer */
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
BLI_assert(ctx->device);
UNUSED_VARS_NDEBUG(ctx);
metal_buffer_ = MTLContext::get_global_memory_manager()->allocate(size_in_bytes_, true);
#ifndef NDEBUG
metal_buffer_->set_label([NSString stringWithFormat:@"Storage Buffer %s", name_]);
#endif
BLI_assert(metal_buffer_ != nullptr);
BLI_assert(metal_buffer_->get_metal_buffer() != nil);
has_data_ = false;
}
void MTLStorageBuf::update(const void *data)
{
/* We only need to initialize the storage buffer for default buffer types. */
if (storage_source_ != MTL_STORAGE_BUF_TYPE_DEFAULT) {
return;
}
/* Ensure buffer has been allocated. */
if (metal_buffer_ == nullptr) {
init();
}
BLI_assert(data != nullptr);
if (data != nullptr) {
/* Upload data. */
BLI_assert(data != nullptr);
BLI_assert(!(metal_buffer_->get_resource_options() & MTLResourceStorageModePrivate));
BLI_assert(size_in_bytes_ <= metal_buffer_->get_size());
BLI_assert(size_in_bytes_ <= [metal_buffer_->get_metal_buffer() length]);
memcpy(metal_buffer_->get_host_ptr(), data, size_in_bytes_);
metal_buffer_->flush_range(0, size_in_bytes_);
has_data_ = true;
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Usage
* \{ */
void MTLStorageBuf::bind(int slot)
{
if (slot >= MTL_MAX_BUFFER_BINDINGS) {
fprintf(
stderr,
"Error: Trying to bind \"%s\" ssbo to slot %d which is above the reported limit of %d.\n",
name_,
slot,
MTL_MAX_BUFFER_BINDINGS);
BLI_assert(false);
return;
}
if (metal_buffer_ == nullptr) {
this->init();
}
if (data_ != nullptr) {
this->update(data_);
MEM_SAFE_FREE(data_);
}
/* Bind current UBO to active context. */
MTLContext *ctx = MTLContext::get();
BLI_assert(ctx);
MTLStorageBufferBinding &ctx_ssbo_bind_slot = ctx->pipeline_state.ssbo_bindings[slot];
ctx_ssbo_bind_slot.ssbo = this;
ctx_ssbo_bind_slot.bound = true;
bind_slot_ = slot;
bound_ctx_ = ctx;
}
void MTLStorageBuf::unbind()
{
/* Unbind in debug mode to validate missing binds.
* Otherwise, only perform a full unbind upon destruction
* to ensure no lingering references. */
#ifndef NDEBUG
if (true) {
#else
if (G.debug & G_DEBUG_GPU) {
#endif
if (bound_ctx_ != nullptr && bind_slot_ > -1) {
MTLStorageBufferBinding &ctx_ssbo_bind_slot =
bound_ctx_->pipeline_state.ssbo_bindings[bind_slot_];
if (ctx_ssbo_bind_slot.bound && ctx_ssbo_bind_slot.ssbo == this) {
ctx_ssbo_bind_slot.bound = false;
ctx_ssbo_bind_slot.ssbo = nullptr;
}
}
}
/* Reset bind index. */
bind_slot_ = -1;
bound_ctx_ = nullptr;
}
void MTLStorageBuf::clear(uint32_t clear_value)
{
/* Fetch active context. */
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert_msg(ctx, "Clears should always be performed while a valid context exists.");
if (metal_buffer_ == nullptr) {
this->init();
}
if (ctx) {
id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
[blit_encoder fillBuffer:metal_buffer_->get_metal_buffer()
range:NSMakeRange(0, size_in_bytes_)
value:clear_value];
}
}
void MTLStorageBuf::copy_sub(VertBuf *src_, uint dst_offset, uint src_offset, uint copy_size)
{
/* TODO(Metal): Support Copy sub operation. */
MTL_LOG_WARNING("MTLStorageBuf::copy_sub not yet supported.\n");
}
void MTLStorageBuf::read(void *data)
{
if (data == nullptr) {
return;
}
if (metal_buffer_ == nullptr) {
this->init();
}
/* Managed buffers need to be explicitly flushed back to host. */
if (metal_buffer_->get_resource_options() & MTLResourceStorageModeManaged) {
/* Fetch active context. */
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
/* Ensure GPU updates are flushed back to CPU. */
id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
[blit_encoder synchronizeResource:metal_buffer_->get_metal_buffer()];
/* Ensure sync has occured. */
GPU_finish();
}
/* Read data. NOTE: Unless explicitly synchronized with GPU work, results may not be ready. */
memcpy(data, metal_buffer_->get_host_ptr(), size_in_bytes_);
}
id<MTLBuffer> MTLStorageBuf::get_metal_buffer()
{
gpu::MTLBuffer *source_buffer = nullptr;
switch (storage_source_) {
/* Default SSBO buffer comes from own allocation. */
case MTL_STORAGE_BUF_TYPE_DEFAULT: {
if (metal_buffer_ == nullptr) {
this->init();
}
if (data_ != nullptr) {
this->update(data_);
MEM_SAFE_FREE(data_);
}
source_buffer = metal_buffer_;
} break;
/* SSBO buffer comes from Uniform Buffer. */
case MTL_STORAGE_BUF_TYPE_UNIFORMBUF: {
source_buffer = uniform_buffer_->metal_buffer_;
} break;
/* SSBO buffer comes from Vertex Buffer. */
case MTL_STORAGE_BUF_TYPE_VERTBUF: {
source_buffer = vertex_buffer_->vbo_;
} break;
/* SSBO buffer comes from Index Buffer. */
case MTL_STORAGE_BUF_TYPE_INDEXBUF: {
source_buffer = index_buffer_->ibo_;
} break;
}
/* Return Metal allocation handle and flag as used. */
BLI_assert(source_buffer != nullptr);
source_buffer->debug_ensure_used();
return source_buffer->get_metal_buffer();
}
int MTLStorageBuf::get_size()
{
BLI_assert(this);
return size_in_bytes_;
}
} // namespace blender::gpu

View File

@ -13,10 +13,14 @@
namespace blender::gpu {
class MTLStorageBuf;
/**
* Implementation of Uniform Buffers using Metal.
**/
class MTLUniformBuf : public UniformBuf {
friend class MTLStorageBuf; /* For bind as SSBO resource access. */
private:
/* Allocation Handle. */
gpu::MTLBuffer *metal_buffer_ = nullptr;
@ -29,6 +33,9 @@ class MTLUniformBuf : public UniformBuf {
int bind_slot_ = -1;
MTLContext *bound_ctx_ = nullptr;
/* SSBO wrapper for bind_as_ssbo support. */
MTLStorageBuf *ssbo_wrapper_ = nullptr;
public:
MTLUniformBuf(size_t size, const char *name);
~MTLUniformBuf();
@ -39,7 +46,7 @@ class MTLUniformBuf : public UniformBuf {
void unbind() override;
void clear_to_zero() override;
id<MTLBuffer> get_metal_buffer(int *r_offset);
id<MTLBuffer> get_metal_buffer();
int get_size();
const char *get_name()
{

View File

@ -14,6 +14,7 @@
#include "mtl_backend.hh"
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "mtl_storage_buffer.hh"
#include "mtl_uniform_buffer.hh"
namespace blender::gpu {
@ -33,7 +34,7 @@ MTLUniformBuf::~MTLUniformBuf()
* to check deactivated context's. */
MTLContext *ctx = MTLContext::get();
if (ctx) {
for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
for (int i = 0; i < MTL_MAX_BUFFER_BINDINGS; i++) {
MTLUniformBufferBinding &slot = ctx->pipeline_state.ubo_bindings[i];
if (slot.bound && slot.ubo == this) {
slot.bound = false;
@ -41,6 +42,11 @@ MTLUniformBuf::~MTLUniformBuf()
}
}
}
if (ssbo_wrapper_) {
delete ssbo_wrapper_;
ssbo_wrapper_ = nullptr;
}
}
void MTLUniformBuf::update(const void *data)
@ -67,7 +73,9 @@ void MTLUniformBuf::update(const void *data)
size_in_bytes_, true, data);
has_data_ = true;
metal_buffer_->set_label(@"Uniform Buffer");
#ifndef NDEBUG
metal_buffer_->set_label([NSString stringWithFormat:@"Uniform Buffer %s", name_]);
#endif
BLI_assert(metal_buffer_ != nullptr);
BLI_assert(metal_buffer_->get_metal_buffer() != nil);
}
@ -94,7 +102,7 @@ void MTLUniformBuf::bind(int slot)
return;
}
BLI_assert(slot < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
BLI_assert(slot < MTL_MAX_BUFFER_BINDINGS);
/* Bind current UBO to active context. */
MTLContext *ctx = MTLContext::get();
@ -126,7 +134,25 @@ void MTLUniformBuf::bind_as_ssbo(int slot)
return;
}
BLI_assert_msg(0, "Not implemented yet");
/* We need to ensure data is actually allocated if using as an SSBO, as resource may be written
* to. */
if (metal_buffer_ == nullptr) {
/* Check if we have any deferred data to upload. */
if (data_ != nullptr) {
this->update(data_);
MEM_SAFE_FREE(data_);
}
else {
this->clear_to_zero();
}
}
/* Create MTLStorageBuffer to wrap this resource and use conventional binding. */
if (ssbo_wrapper_ == nullptr) {
ssbo_wrapper_ = new MTLStorageBuf(this, size_in_bytes_);
}
ssbo_wrapper_->bind(slot);
}
void MTLUniformBuf::unbind()
@ -154,19 +180,14 @@ void MTLUniformBuf::unbind()
bound_ctx_ = nullptr;
}
id<MTLBuffer> MTLUniformBuf::get_metal_buffer(int *r_offset)
id<MTLBuffer> MTLUniformBuf::get_metal_buffer()
{
BLI_assert(this);
*r_offset = 0;
if (metal_buffer_ != nullptr && has_data_) {
*r_offset = 0;
metal_buffer_->debug_ensure_used();
return metal_buffer_->get_metal_buffer();
}
else {
*r_offset = 0;
return nil;
}
return nil;
}
int MTLUniformBuf::get_size()

View File

@ -22,7 +22,8 @@ class MTLVertBuf : public VertBuf {
friend class gpu::MTLTexture; /* For buffer texture. */
friend class MTLShader; /* For transform feedback. */
friend class MTLBatch;
friend class MTLContext; /* For transform feedback. */
friend class MTLContext; /* For transform feedback. */
friend class MTLStorageBuf; /* For bind as SSBO resource access. */
private:
/** Metal buffer allocation. **/
@ -37,6 +38,8 @@ class MTLVertBuf : public VertBuf {
uint64_t alloc_size_ = 0;
/** Whether existing allocation has been submitted for use by the GPU. */
bool contents_in_flight_ = false;
/* SSBO wrapper for bind_as_ssbo support. */
MTLStorageBuf *ssbo_wrapper_ = nullptr;
/* Fetch Metal buffer and offset into allocation if necessary.
* Access limited to friend classes. */

View File

@ -5,6 +5,7 @@
*/
#include "mtl_vertex_buffer.hh"
#include "mtl_debug.hh"
#include "mtl_storage_buffer.hh"
namespace blender::gpu {
@ -48,6 +49,11 @@ void MTLVertBuf::release_data()
GPU_TEXTURE_FREE_SAFE(buffer_texture_);
MEM_SAFE_FREE(data);
if (ssbo_wrapper_) {
delete ssbo_wrapper_;
ssbo_wrapper_ = nullptr;
}
}
void MTLVertBuf::duplicate_data(VertBuf *dst_)
@ -253,7 +259,7 @@ void MTLVertBuf::bind()
void MTLVertBuf::update_sub(uint start, uint len, const void *data)
{
/* Fetch and verify active context. */
MTLContext *ctx = reinterpret_cast<MTLContext *>(unwrap(GPU_context_active_get()));
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
BLI_assert(ctx->device);
@ -292,10 +298,16 @@ void MTLVertBuf::update_sub(uint start, uint len, const void *data)
void MTLVertBuf::bind_as_ssbo(uint binding)
{
/* TODO(Metal): Support binding of buffers as SSBOs.
* Pending overall compute support for Metal backend. */
MTL_LOG_WARNING("MTLVertBuf::bind_as_ssbo not yet implemented!\n");
this->flag_used();
/* Ensure resource is initialized. */
this->bind();
/* Create MTLStorageBuffer to wrap this resource and use conventional binding. */
if (ssbo_wrapper_ == nullptr) {
ssbo_wrapper_ = new MTLStorageBuf(this, alloc_size_);
}
ssbo_wrapper_->bind(binding);
}
void MTLVertBuf::bind_as_texture(uint binding)

View File

@ -189,13 +189,14 @@ template<typename T> T atomicXor(device T &mem, T data)
return atomic_fetch_xor_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
/* Unblock texture atomic compilation.
* TODO(Metal): This is not correct for global atomic behaviour, but will be safe within a single thread.
/* Unblock texture atomic compilation.
* TODO(Metal): This is not correct for global atomic behaviour, but will be safe within a single
* thread.
* We need to re-visit the solution for this use-case and use a 2D texture buffer instead. */
#define imageAtomicMin(tex, coord, data) \
uint val = _texelFetch_internal(tex, coord, 0).r;\
_texture_write_internal(tex, coord, uint4((val < data) ? val : data));\
tex.texture->fence();
uint val = _texelFetch_internal(tex, coord, 0).r; \
_texture_write_internal(tex, coord, uint4((val < data) ? val : data)); \
tex.texture->fence();
/* Used to replace 'out' in function parameters with threadlocal reference
* shortened to avoid expanding the glsl source string. */
@ -1186,17 +1187,16 @@ inline float4 uintBitsToFloat(uint4 f)
template<typename T> T findLSB(T x)
{
/* ctz returns the number of trailing zeroes. To fetch the index of the LSB, we can also use this
* value as index, however need to filter out the case where the input value is zero to match
* value as index, however we need to filter out the case where the input value is zero to match
* GLSL functionality. */
return (x == T(0)) ? T(-1) : T(ctz(x));
}
template<typename T> T findMSB(T x)
{
/* clz returns the number of leading zeroes. To fetch the index of the LSB, we can also use this
* value as index when offset by 1. however need to filter out the case where the input value is
* zero to match GLSL functionality. 000000010*/
return (x == T(0)) ? T(-1) : (clz(T(0)) - clz(x) - T(1));
/* clz returns the number of leading zeroes. To fetch the index of the MSB, we can also use this
* value as index when offset by 1. */
return (sizeof(T) * 8) - T(1) - clz(x);
}
/* Texture size functions. Add texture types as needed. */

View File

@ -15,12 +15,6 @@
#define depthCubeArray samplerCubeArray
#define depth2DArrayShadow sampler2DArrayShadow
/* Memory scope and pass by reference types.
* NOTE: These are required by Metal, but are not required in all cases by GLSL. */
#define device
#define threadgroup
#define OUT(type, name, array_len) out type name[array_len]
/* Backend Functions. */
#define select(A, B, mask) mix(A, B, mask)