Metal: Support for Storage Buffers. #104870

Closed
Thomas Dinges wants to merge 1 commits from (deleted):metal-ssbo into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
32 changed files with 1108 additions and 101 deletions

View File

@ -590,7 +590,7 @@ void dof_gather_accumulator(sampler2D color_tx,
* The full pixel neighborhood is gathered.
* \{ */
void dof_slight_focus_gather(sampler2D depth_tx,
void dof_slight_focus_gather(depth2D depth_tx,
sampler2D color_tx,
sampler2D bkh_lut_tx, /* Renamed because of ugly macro job. */
float radius,

View File

@ -62,7 +62,7 @@ void main()
int mask_shift = 1;
#define downsample_level(out_mip__, lod_) \
active_thread = all(lessThan(local_px, gl_WorkGroupSize.xy >> uint(mask_shift))); \
active_thread = all(lessThan(uvec2(local_px), gl_WorkGroupSize.xy >> uint(mask_shift))); \
barrier(); /* Wait for previous writes to finish. */ \
if (active_thread) { \
max_depth = max_v4(load_local_depths(local_px)); \
@ -89,12 +89,12 @@ void main()
}
finished_tile_counter = 0u;
ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize * 2u));
ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize.xy * 2u));
ivec2 image_border = imageSize(out_mip_5) - 1;
for (int y = 0; y < iter.y; y++) {
for (int x = 0; x < iter.x; x++) {
/* Load result of the other work groups. */
kernel_origin = ivec2(gl_WorkGroupSize) * ivec2(x, y);
kernel_origin = ivec2(gl_WorkGroupSize.xy) * ivec2(x, y);
src_px = ivec2(kernel_origin + local_px) * 2;
vec4 samp;
samp.x = imageLoad(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x;

View File

@ -168,13 +168,14 @@ void main()
}
/* Fallthrough to the hemispheric case. */
case LIGHT_RECT:
case LIGHT_ELLIPSE:
case LIGHT_ELLIPSE: {
vec3 v000 = vP - v_right * radius - v_up * radius;
vec3 v100 = v000 + v_right * (radius * 2.0);
vec3 v010 = v000 + v_up * (radius * 2.0);
vec3 v001 = v000 - v_back * radius;
Box bbox = shape_box(v000, v100, v010, v001);
intersect_tile = intersect_tile && intersect(tile, bbox);
} break;
default:
break;
}

View File

@ -74,8 +74,10 @@ void main()
vec4 max_motion = imageLoad(in_tiles_img, src_tile);
MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy, src_tile);
MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile);
MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy,
uvec2(src_tile));
MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw,
uvec2(src_tile));
if (true) {
/* Rectangular area (in tiles) where the motion vector spreads. */
MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.xy);
@ -85,17 +87,20 @@ void main()
for (int y = 0; y < motion_rect.extent.y; y++) {
ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
if (is_inside_motion_line(tile, motion_line)) {
motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv);
motion_blur_tile_indirection_store(
tile_indirection_buf, MOTION_PREV, uvec2(tile), payload_prv);
/* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
* the motion next so that weighting in gather pass is better. */
motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt);
motion_blur_tile_indirection_store(
tile_indirection_buf, MOTION_NEXT, uvec2(tile), payload_nxt);
}
}
}
}
if (true) {
MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile);
MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw,
uvec2(src_tile));
/* Rectangular area (in tiles) where the motion vector spreads. */
MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.zw);
MotionLine motion_line = compute_motion_line(src_tile, max_motion.zw);
@ -104,10 +109,12 @@ void main()
for (int y = 0; y < motion_rect.extent.y; y++) {
ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
if (is_inside_motion_line(tile, motion_line)) {
motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt);
motion_blur_tile_indirection_store(
tile_indirection_buf, MOTION_NEXT, uvec2(tile), payload_nxt);
/* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
* the motion next so that weighting in gather pass is better. */
motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv);
motion_blur_tile_indirection_store(
tile_indirection_buf, MOTION_PREV, uvec2(tile), payload_prv);
}
}
}

View File

@ -178,10 +178,10 @@ void main()
vec4 max_motion;
/* Load dilation result from the indirection table. */
ivec2 tile_prev;
motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, tile, tile_prev);
motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, uvec2(tile), tile_prev);
max_motion.xy = imageLoad(in_tiles_img, tile_prev).xy;
ivec2 tile_next;
motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, tile, tile_next);
motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, uvec2(tile), tile_next);
max_motion.zw = imageLoad(in_tiles_img, tile_next).zw;
Accumulator accum;

View File

@ -240,13 +240,13 @@ void output_aov(vec4 color, float value, uint hash)
#if defined(MAT_AOV_SUPPORT) && defined(GPU_FRAGMENT_SHADER)
for (int i = 0; i < AOV_MAX && i < aov_buf.color_len; i++) {
if (aov_buf.hash_color[i] == hash) {
imageStore(aov_color_img, ivec3(gl_FragCoord.xy, i), color);
imageStore(aov_color_img, ivec3(ivec2(gl_FragCoord.xy), i), color);
return;
}
}
for (int i = 0; i < AOV_MAX && i < aov_buf.value_len; i++) {
if (aov_buf.hash_value[i] == hash) {
imageStore(aov_value_img, ivec3(gl_FragCoord.xy, i), vec4(value));
imageStore(aov_value_img, ivec3(ivec2(gl_FragCoord.xy), i), vec4(value));
return;
}
}

View File

@ -33,7 +33,12 @@
BLI_INLINE eParticleRefineShaderType drw_curves_shader_type_get()
{
if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) {
/* NOTE: Curve refine is faster using transform feedback via vertex processing pipeline with
* Metal and Apple Silicon GPUs. This is also because vertex work can more easily be executed in
* parallel with fragment work, whereas compute inserts an explicit dependency,
* due to switching of command encoder types. */
if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support() &&
(GPU_backend_get_type() != GPU_BACKEND_METAL)) {
return PART_REFINE_SHADER_COMPUTE;
}
if (GPU_transform_feedback_support()) {

View File

@ -36,7 +36,12 @@
BLI_INLINE eParticleRefineShaderType drw_hair_shader_type_get()
{
if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) {
/* NOTE: Hair refine is faster using transform feedback via vertex processing pipeline with Metal

I just created #105241 to keep track of this.

I just created #105241 to keep track of this.
* and Apple Silicon GPUs. This is also because vertex work can more easily be executed in
* parallel with fragment work, whereas compute inserts an explicit dependency,
* due to switching of command encoder types. */
if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support() &&
(GPU_backend_get_type() != GPU_BACKEND_METAL)) {
return PART_REFINE_SHADER_COMPUTE;
}
if (GPU_transform_feedback_support()) {

View File

@ -329,6 +329,14 @@ struct DRWDebugVert {
uint pos2;
/* Named vert_color to avoid global namespace collision with uniform color. */
uint vert_color;
#ifdef GPU_METAL
inline DRWDebugVert() = default;
inline DRWDebugVert(uint in_pos0, uint in_pos1, uint in_pos2, uint in_vert_color)
: pos0(in_pos0), pos1(in_pos1), pos2(in_pos2), vert_color(in_vert_color)
{
}
#endif
};
BLI_STATIC_ASSERT_ALIGN(DRWDebugVert, 16)

View File

@ -14,6 +14,13 @@
struct Circle {
vec2 center;
float radius;
#ifdef GPU_METAL
inline Circle() = default;
inline Circle(vec2 in_center, float in_radius) : center(in_center), radius(in_radius)
{
}
#endif
};
Circle shape_circle(vec2 center, float radius)
@ -30,6 +37,13 @@ Circle shape_circle(vec2 center, float radius)
struct Sphere {
vec3 center;
float radius;
#ifdef GPU_METAL
inline Sphere() = default;
inline Sphere(vec3 in_center, float in_radius) : center(in_center), radius(in_radius)
{
}
#endif
};
Sphere shape_sphere(vec3 center, float radius)
@ -192,6 +206,14 @@ Frustum shape_frustum(vec3 corners[8])
struct Cone {
vec3 direction;
float angle_cos;
#ifdef GPU_METAL
inline Cone() = default;
inline Cone(vec3 in_direction, float in_angle_cos)
: direction(in_direction), angle_cos(in_angle_cos)
{
}
#endif
};
Cone shape_cone(vec3 direction, float angle_cosine)

View File

@ -33,7 +33,7 @@ void projmat_dimensions(mat4 winmat,
}
}
void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out vec4 corners[8])
void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, DEVICE_OUT_ARRAY(vec4, corners, 8))
{
float left, right, bottom, top, near, far;
bool is_persp = winmat[3][3] == 0.0;
@ -68,12 +68,12 @@ void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out vec4 corners[8])
}
void planes_from_projmat(mat4 mat,
out vec4 left,
out vec4 right,
out vec4 bottom,
out vec4 top,
out vec4 near,
out vec4 far)
DEVICE_OUT(vec4, left),

I would prefer have a return struct instead of this (maybe ProjectionPlanes). This is more refactor I would very much avoid these types of defines.

I would prefer have a return struct instead of this (maybe `ProjectionPlanes`). This is more refactor I would very much avoid these types of defines.
DEVICE_OUT(vec4, right),
DEVICE_OUT(vec4, bottom),
DEVICE_OUT(vec4, top),
DEVICE_OUT(vec4, near),
DEVICE_OUT(vec4, far))
{
/* References:
*
@ -89,7 +89,7 @@ void planes_from_projmat(mat4 mat,
far = mat[3] - mat[2];
}
void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out vec4 planes[6])
void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, DEVICE_OUT_ARRAY(vec4, planes, 6))

Same here

Same here
{
mat4 persmat = winmat * viewmat;
planes_from_projmat(persmat, planes[0], planes[5], planes[1], planes[3], planes[4], planes[2]);
@ -100,7 +100,7 @@ void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out vec4 planes[6])
}
}
vec4 frustum_culling_sphere_calc(vec4 corners[8])
vec4 frustum_culling_sphere_calc(device vec4 corners[8])

Same here. You can use struct Box for that matter.

Same here. You can use `struct Box` for that matter.
{
/* Extract Bounding Sphere */
/* TODO(fclem): This is significantly less precise than CPU, but it isn't used in most cases. */

View File

@ -240,6 +240,7 @@ set(METAL_SRC
metal/mtl_shader_generator.mm
metal/mtl_shader_interface.mm
metal/mtl_state.mm
metal/mtl_storage_buffer.mm
metal/mtl_texture.mm
metal/mtl_texture_util.mm
metal/mtl_uniform_buffer.mm
@ -265,6 +266,7 @@ set(METAL_SRC
metal/mtl_shader_interface_type.hh
metal/mtl_shader_shared.h
metal/mtl_state.hh
metal/mtl_storage_buffer.hh
metal/mtl_texture.hh
metal/mtl_uniform_buffer.hh
metal/mtl_vertex_buffer.hh

View File

@ -16,6 +16,7 @@
#include "mtl_index_buffer.hh"
#include "mtl_query.hh"
#include "mtl_shader.hh"
#include "mtl_storage_buffer.hh"
#include "mtl_uniform_buffer.hh"
#include "mtl_vertex_buffer.hh"
@ -100,8 +101,7 @@ UniformBuf *MTLBackend::uniformbuf_alloc(int size, const char *name)
StorageBuf *MTLBackend::storagebuf_alloc(int size, GPUUsageType usage, const char *name)
{
/* TODO(Metal): Implement MTLStorageBuf. */
return nullptr;
return new MTLStorageBuf(size, usage, name);
}
VertBuf *MTLBackend::vertbuf_alloc()
@ -398,16 +398,16 @@ void MTLBackend::capabilities_init(MTLContext *ctx)
GCaps.shader_image_load_store_support = ([device supportsFamily:MTLGPUFamilyApple3] ||
MTLBackend::capabilities.supports_family_mac1 ||
MTLBackend::capabilities.supports_family_mac2);
GCaps.compute_shader_support = true;
GCaps.shader_storage_buffer_objects_support = false;
/* TODO(Metal): Add support? */
GCaps.shader_draw_parameters_support = false;
GCaps.compute_shader_support = true;
GCaps.geometry_shader_support = false;
GCaps.shader_storage_buffer_objects_support =
false; /* TODO(Metal): implement Storage Buffer support. */
/* Maximum buffer bindings: 31. Consider required slot for uniforms/UBOs/Vertex attributes.
* Can use argument buffers if a higher limit is required. */
GCaps.max_shader_storage_buffer_bindings = 24;
GCaps.max_shader_storage_buffer_bindings = 14;
if (GCaps.compute_shader_support) {
GCaps.max_work_group_count[0] = 65535;

View File

@ -18,7 +18,12 @@ namespace gpu {
#define MTL_MAX_DEFAULT_SAMPLERS 16
/* Total maximum buffers which can be bound to an encoder, for use within a shader.
* MTL_MAX_UNIFORM_BUFFER_BINDINGS + MTL_MAX_STORAGE_BUFFER_BINDINGS must be <=
* than MTL_MAX_BUFFER_BINDINGS. */
* than MTL_MAX_BUFFER_BINDINGS.
* We also require an additional 3 core buffers for:
* - Argument buffer for bindless resources (e.g. samplers)
* - Transform feedback buffer
* - Default push constant block
* Along with up to 6+1 buffers for vertex data, and index data. */

We also require an additional 3 core buffers for:

Does this mean they are also counting towards the limit of MTL_MAX_BUFFER_BINDINGS ?

> We also require an additional 3 core buffers for: Does this mean they are also counting towards the limit of `MTL_MAX_BUFFER_BINDINGS` ?
#define MTL_MAX_BUFFER_BINDINGS 31
#define MTL_MAX_UNIFORM_BUFFER_BINDINGS 16
#define MTL_MAX_STORAGE_BUFFER_BINDINGS 12

View File

@ -46,6 +46,7 @@ namespace blender::gpu {
class MTLContext;
class MTLCommandBufferManager;
class MTLUniformBuf;
class MTLStorageBuf;
/* Structs containing information on current binding state for textures and samplers. */
struct MTLTextureBinding {
@ -436,6 +437,11 @@ struct MTLUniformBufferBinding {
MTLUniformBuf *ubo;
};
struct MTLStorageBufferBinding {
bool bound;
MTLStorageBuf *ssbo;
};
struct MTLContextGlobalShaderPipelineState {
bool initialised;
@ -457,6 +463,9 @@ struct MTLContextGlobalShaderPipelineState {
/* Global Uniform Buffers. */
MTLUniformBufferBinding ubo_bindings[MTL_MAX_UNIFORM_BUFFER_BINDINGS];
/* Storage buffer. */
MTLStorageBufferBinding ssbo_bindings[MTL_MAX_STORAGE_BUFFER_BINDINGS];
/* Context Texture bindings. */
MTLTextureBinding texture_bindings[MTL_MAX_TEXTURE_SLOTS];
MTLSamplerBinding sampler_bindings[MTL_MAX_SAMPLER_SLOTS];

View File

@ -12,6 +12,7 @@
#include "mtl_shader.hh"
#include "mtl_shader_interface.hh"
#include "mtl_state.hh"
#include "mtl_storage_buffer.hh"
#include "mtl_uniform_buffer.hh"
#include "mtl_vertex_buffer.hh"
@ -20,6 +21,7 @@
#include "GPU_capabilities.h"
#include "GPU_matrix.h"
#include "GPU_shader.h"
#include "GPU_storage_buffer.h"
#include "GPU_texture.h"
#include "GPU_uniform_buffer.h"
#include "GPU_vertex_buffer.h"
@ -272,6 +274,16 @@ MTLContext::~MTLContext()
}
}
/* Unbind SSBOs. */
for (int i = 0; i < MTL_MAX_STORAGE_BUFFER_BINDINGS; i++) {
if (this->pipeline_state.ssbo_bindings[i].bound &&
this->pipeline_state.ssbo_bindings[i].ssbo != nullptr) {
GPUStorageBuf *ssbo = wrap(
static_cast<StorageBuf *>(this->pipeline_state.ssbo_bindings[i].ssbo));
GPU_storagebuf_unbind(ssbo);
}
}
/* Release Dummy resources */
this->free_dummy_resources();
@ -360,6 +372,15 @@ void MTLContext::activate()
}
}
/* Reset SSBO bind state. */
for (int i = 0; i < MTL_MAX_STORAGE_BUFFER_BINDINGS; i++) {
if (this->pipeline_state.ssbo_bindings[i].bound &&
this->pipeline_state.ssbo_bindings[i].ssbo != nullptr) {
this->pipeline_state.ssbo_bindings[i].bound = false;
this->pipeline_state.ssbo_bindings[i].ssbo = nullptr;
}
}
/* Ensure imm active. */
immActivate();
}
@ -658,6 +679,10 @@ void MTLContext::pipeline_state_init()
this->pipeline_state.ubo_bindings[u].bound = false;
this->pipeline_state.ubo_bindings[u].ubo = nullptr;
}
for (int u = 0; u < MTL_MAX_STORAGE_BUFFER_BINDINGS; u++) {
this->pipeline_state.ssbo_bindings[u].bound = false;
this->pipeline_state.ssbo_bindings[u].ssbo = nullptr;
}
}
/*** State defaults -- restored by GPU_state_init. ***/
@ -1026,7 +1051,7 @@ bool MTLContext::ensure_uniform_buffer_bindings(
rps.last_bound_shader_state.pso_index_ !=
pipeline_state_instance->shader_pso_index);
const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
const MTLShaderBufferBlock &push_constant_block = shader_interface->get_push_constant_block();
if (push_constant_block.size > 0) {
/* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index
@ -1061,7 +1086,7 @@ bool MTLContext::ensure_uniform_buffer_bindings(
* match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
* and not owned by the shader instance. */
for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
const MTLShaderBufferBlock &ubo = shader_interface->get_uniform_block(ubo_index);
if (ubo.buffer_index >= 0) {
@ -1177,6 +1202,58 @@ bool MTLContext::ensure_uniform_buffer_bindings(
}
}
}
/* Bind Global GPUStorageBuf's */
/* Iterate through expected SSBOs in the shader interface, and check if the globally bound ones
* match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
* and not owned by the shader instance. */
for (const uint ssbo_index : IndexRange(shader_interface->get_total_storage_blocks())) {
const MTLShaderBufferBlock &ssbo = shader_interface->get_storage_block(ssbo_index);
if (ssbo.buffer_index >= 0) {
id<MTLBuffer> ssbo_buffer = nil;
int ssbo_size = 0;
UNUSED_VARS_NDEBUG(ssbo_size);
if (this->pipeline_state.ssbo_bindings[ssbo_index].bound) {
/* Fetch UBO global-binding properties from slot. */
ssbo_buffer = this->pipeline_state.ssbo_bindings[ssbo_index].ssbo->get_metal_buffer();
ssbo_size = this->pipeline_state.ssbo_bindings[ssbo_index].ssbo->get_size();
/* For SSBOs, we always need to ensure the buffer exists, as it may be written to. */
BLI_assert(ssbo_buffer != nil);
BLI_assert(ssbo_size > 0);
}
else {
MTL_LOG_INFO(

Should be MTL_LOG_ERROR. This should mandatory. Same for UBOs.

Should be `MTL_LOG_ERROR`. This should mandatory. Same for UBOs.
"[Warning][SSBO] Shader '%s' expected SSBO '%s' to be bound at buffer index: %d -- "
"but "
"nothing was bound.\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ssbo.name_offset),
ssbo.buffer_index);
}
if (ssbo_buffer != nil) {
uint32_t buffer_bind_index = pipeline_state_instance->base_storage_buffer_index +
ssbo.buffer_index;
/* Bind Vertex UBO. */
if (bool(ssbo.stage_mask & ShaderStage::VERTEX)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
rps.bind_vertex_buffer(ssbo_buffer, 0, buffer_bind_index);
}
/* Bind Fragment UBOs. */
if (bool(ssbo.stage_mask & ShaderStage::FRAGMENT)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
rps.bind_fragment_buffer(ssbo_buffer, 0, buffer_bind_index);
}
}
}
}
return true;
}
@ -1191,7 +1268,7 @@ bool MTLContext::ensure_uniform_buffer_bindings(
MTLComputeState &cs = this->main_command_buffer.get_compute_state();
/* Fetch push constant block and bind. */
const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
const MTLShaderBufferBlock &push_constant_block = shader_interface->get_push_constant_block();
if (push_constant_block.size > 0) {
/* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index
@ -1218,7 +1295,7 @@ bool MTLContext::ensure_uniform_buffer_bindings(
* match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
* and not owned by the shader instance. */
for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
const MTLShaderBufferBlock &ubo = shader_interface->get_uniform_block(ubo_index);
if (ubo.buffer_index >= 0) {
@ -1270,7 +1347,7 @@ bool MTLContext::ensure_uniform_buffer_bindings(
uint32_t buffer_bind_index = pipeline_state_instance.base_uniform_buffer_index +
buffer_index;
/* Bind Vertex UBO. */
/* Bind Compute UBO. */
if (bool(ubo.stage_mask & ShaderStage::COMPUTE)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
cs.bind_compute_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
@ -1286,6 +1363,52 @@ bool MTLContext::ensure_uniform_buffer_bindings(
}
}
}
/* Bind Global GPUStorageBuffers */
/* Iterate through expected SSBOs in the shader interface, and check if the globally bound ones
* match. */
for (const uint ssbo_index : IndexRange(shader_interface->get_total_storage_blocks())) {
const MTLShaderBufferBlock &ssbo = shader_interface->get_storage_block(ssbo_index);
if (ssbo.buffer_index >= 0) {
id<MTLBuffer> ssbo_buffer = nil;
int ssbo_size = 0;
if (this->pipeline_state.ssbo_bindings[ssbo_index].bound) {
/* Fetch UBO global-binding properties from slot. */
ssbo_buffer = this->pipeline_state.ssbo_bindings[ssbo_index].ssbo->get_metal_buffer();
ssbo_size = this->pipeline_state.ssbo_bindings[ssbo_index].ssbo->get_size();
UNUSED_VARS_NDEBUG(ssbo_size);
/* For SSBOs, we always need to ensure the buffer exists, as it may be written to. */
BLI_assert(ssbo_buffer != nil);
BLI_assert(ssbo_size > 0);
}
else {
MTL_LOG_ERROR(
"[Error][SSBO] Shader '%s' expected SSBO '%s' to be bound at SSBO index: %d (buffer "
"%d) -- but "
"nothing was bound.\n",
shader_interface->get_name(),
shader_interface->get_name_at_offset(ssbo.name_offset),
ssbo.buffer_index,
pipeline_state_instance.base_storage_buffer_index + ssbo.buffer_index);
}
if (ssbo_buffer != nil) {
uint32_t buffer_bind_index = pipeline_state_instance.base_storage_buffer_index +
ssbo.buffer_index;
/* Bind Vertex UBO. */
if (bool(ssbo.stage_mask & ShaderStage::COMPUTE)) {
BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
cs.bind_compute_buffer(ssbo_buffer, 0, buffer_bind_index);
}
}
}
}
return true;
}

View File

@ -18,12 +18,16 @@ namespace blender::gpu {
class MTLIndexBuf : public IndexBuf {
friend class MTLBatch;
friend class MTLDrawList;
friend class MTLStorageBuf; /* For bind as SSBO resource access. */
private:
/* Metal buffer resource. */
gpu::MTLBuffer *ibo_ = nullptr;
uint64_t alloc_size_ = 0;
/* SSBO wrapper for bind_as_ssbo support. */
MTLStorageBuf *ssbo_wrapper_ = nullptr;
#ifndef NDEBUG
/* Flags whether point index buffer has been compacted
* to remove false restart indices. */

View File

@ -7,6 +7,7 @@
#include "mtl_index_buffer.hh"
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "mtl_storage_buffer.hh"
#include "BLI_span.hh"
@ -22,6 +23,11 @@ MTLIndexBuf::~MTLIndexBuf()
ibo_->free();
}
this->free_optimized_buffer();
if (ssbo_wrapper_) {
delete ssbo_wrapper_;
ssbo_wrapper_ = nullptr;
}
}
void MTLIndexBuf::free_optimized_buffer()
@ -42,8 +48,14 @@ void MTLIndexBuf::bind_as_ssbo(uint32_t binding)
/* Ensure we have a valid IBO. */
BLI_assert(this->ibo_);
/* TODO(Metal): Support index buffer SSBO's. Dependent on compute implementation. */
MTL_LOG_WARNING("MTLIndexBuf::bind_as_ssbo not yet implemented!\n");
/* Ensure resource is initialized. */
this->upload_data();
/* Create MTLStorageBuffer to wrap this resource and use conventional binding. */
if (ssbo_wrapper_ == nullptr) {
ssbo_wrapper_ = new MTLStorageBuf(this, alloc_size_);
}
ssbo_wrapper_->bind(binding);
}
void MTLIndexBuf::read(uint32_t *data) const

View File

@ -71,7 +71,7 @@ struct MTLRenderPipelineStateInstance {
* bound buffers such as vertex buffers, as the count can vary. */
int base_uniform_buffer_index;
/* Base bind index for binding storage buffers. */
int base_ssbo_buffer_index;
int base_storage_buffer_index;
/* buffer bind slot used for null attributes (-1 if not needed). */
int null_attribute_buffer_index;
/* buffer bind used for transform feedback output buffer. */
@ -101,7 +101,7 @@ struct MTLComputePipelineStateInstance {
* bound buffers such as vertex buffers, as the count can vary. */
int base_uniform_buffer_index = -1;
/* Base bind index for binding storage buffers. */
int base_ssbo_buffer_index = -1;
int base_storage_buffer_index = -1;
int threadgroup_x_len = 1;
int threadgroup_y_len = 1;

View File

@ -386,7 +386,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
valid_ = true;
/* Prepare backing data storage for local uniforms. */
const MTLShaderUniformBlock &push_constant_block = mtl_interface->get_push_constant_block();
const MTLShaderBufferBlock &push_constant_block = mtl_interface->get_push_constant_block();
if (push_constant_block.size > 0) {
push_constant_data_ = MEM_callocN(push_constant_block.size, __func__);
this->push_constant_bindstate_mark_dirty(true);
@ -987,12 +987,26 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
type:MTLDataTypeInt
withName:@"MTL_uniform_buffer_base_index"];
/* Storage buffer bind index.
* This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
* and an additional space for the push constant block.
* If the shader does not have any uniform blocks, then we can place directly after the push
* constant block. As we do not need an extra spot for the UBO at index '0'. */
int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index +
((mtl_interface->get_total_uniform_blocks() > 0) ?
(mtl_interface->get_max_ubo_index() + 2) :
(MTL_uniform_buffer_base_index + 1));
[values setConstantValue:&MTL_storage_buffer_base_index
type:MTLDataTypeInt
withName:@"MTL_storage_buffer_base_index"];
/* Transform feedback constant.
* Ensure buffer is placed after existing buffers, including default buffers. */
* Ensure buffer is placed after existing buffers, including default buffers, UBOs and SSBOs.
*/
int MTL_transform_feedback_buffer_index = (this->transform_feedback_type_ !=
GPU_SHADER_TFB_NONE) ?
MTL_uniform_buffer_base_index +
mtl_interface->get_max_ubo_index() + 2 :
MTL_storage_buffer_base_index +
mtl_interface->get_max_ssbo_index() + 2 :
-1;
if (this->transform_feedback_type_ != GPU_SHADER_TFB_NONE) {
@ -1150,6 +1164,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
pso_inst->frag = desc.fragmentFunction;
pso_inst->pso = pso;
pso_inst->base_uniform_buffer_index = MTL_uniform_buffer_base_index;
pso_inst->base_storage_buffer_index = MTL_storage_buffer_base_index;
pso_inst->null_attribute_buffer_index = (using_null_buffer) ? null_buffer_index : -1;
pso_inst->transform_feedback_buffer_index = MTL_transform_feedback_buffer_index;
pso_inst->prim_type = prim_type;
@ -1254,6 +1269,8 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
{
/* NOTE(Metal): Bakes and caches a PSO for compute. */
BLI_assert(this);
MTLShaderInterface *mtl_interface = this->get_interface();
BLI_assert(mtl_interface);
BLI_assert(this->is_valid());
BLI_assert(shader_library_compute_ != nil);
@ -1275,7 +1292,19 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
type:MTLDataTypeInt
withName:@"MTL_uniform_buffer_base_index"];
/* TODO: SSBO binding base index. */
/* Storage buffer bind index.
* This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
* and an additional space for the push constant block.
* If the shader does not have any uniform blocks, then we can place directly after the push
* constant block. As we do not need an extra spot for the UBO at index '0'. */
int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index +
((mtl_interface->get_total_uniform_blocks() > 0) ?
(mtl_interface->get_max_ubo_index() + 2) :
(MTL_uniform_buffer_base_index + 1));
[values setConstantValue:&MTL_storage_buffer_base_index
type:MTLDataTypeInt
withName:@"MTL_storage_buffer_base_index"];
/* Compile compute function. */
NSError *error = nullptr;
@ -1321,8 +1350,7 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
compute_pso_instance_.compute = [compute_function retain];
compute_pso_instance_.pso = [pso retain];
compute_pso_instance_.base_uniform_buffer_index = MTL_uniform_buffer_base_index;
/* TODO: Add SSBO base buffer index support. */
compute_pso_instance_.base_ssbo_buffer_index = -1;
compute_pso_instance_.base_storage_buffer_index = MTL_storage_buffer_base_index;
}
return true;
}

View File

@ -105,6 +105,22 @@
* }
* \endcode
*
* -- Metal buffer bindings structure --
*
* Metal shader contains several different binding types. All buffers are bound using the buffer(N)
* binding attribute tag. However, different ranges serve different purposes. The structure of the
* bindings always happen as follows:
*
* Vertex Buffers (N) <-- 0
* Index buffer
* Default Push constant block for uniforms <-- MTL_uniform_buffer_base_index
* Uniform buffers <-- MTL_uniform_buffer_base_index+1
* Storage buffers <-- MTL_storage_buffer_base_index
* Samplers/argument buffer table <-- last buffer + 1

Why not put all reserved ones first? This would simplify this mapping.

Why not put all reserved ones first? This would simplify this mapping.
* Transform feedback buffer <-- last_buffer + 2
*
* Up to a maximum of 31 bindings.
*
* -- SSBO-vertex-fetchmode --
*
* SSBO-vertex-fetchmode is a special option wherein vertex buffers are bound directly
@ -200,13 +216,14 @@ struct MSLUniform {
}
};
struct MSLUniformBlock {
struct MSLBufferBlock {
std::string type_name;
std::string name;
ShaderStage stage;
bool is_array;
shader::Qualifier qualifiers;
bool operator==(const MSLUniformBlock &right) const
bool operator==(const MSLBufferBlock &right) const
{
return (type_name == right.type_name && name == right.name);
}
@ -369,7 +386,8 @@ class MSLGeneratorInterface {
public:
/** Shader stage input/output binding information.
* Derived from shader source reflection or GPUShaderCreateInfo. */
blender::Vector<MSLUniformBlock> uniform_blocks;
blender::Vector<MSLBufferBlock> uniform_blocks;
blender::Vector<MSLBufferBlock> storage_blocks;
blender::Vector<MSLUniform> uniforms;
blender::Vector<MSLTextureSampler> texture_samplers;
blender::Vector<MSLVertexInputAttribute> vertex_input_attributes;
@ -385,7 +403,8 @@ class MSLGeneratorInterface {
blender::Vector<char> clip_distances;
/* Shared Memory Blocks. */
blender::Vector<MSLSharedMemoryBlock> shared_memory_blocks;
/* Max bind IDs. */
int max_tex_bind_index = 0;
/** GL Global usage. */
/* Whether GL position is used, or an alternative vertex output should be the default. */
bool uses_gl_Position;
@ -459,8 +478,10 @@ class MSLGeneratorInterface {
/* Samplers. */
bool use_argument_buffer_for_samplers() const;
uint32_t num_samplers_for_stage(ShaderStage stage) const;
uint32_t max_sampler_index_for_stage(ShaderStage stage) const;
/* Returns the bind index, relative to MTL_uniform_buffer_base_index. */
/* Returns the bind index, relative to
* MTL_uniform_buffer_base_index+MTL_storage_buffer_base_index. */
uint32_t get_sampler_argument_buffer_bind_index(ShaderStage stage);
/* Code generation utility functions. */
@ -476,7 +497,7 @@ class MSLGeneratorInterface {
std::string generate_msl_fragment_entry_stub();
std::string generate_msl_compute_entry_stub();
std::string generate_msl_global_uniform_population(ShaderStage stage);
std::string generate_ubo_block_macro_chain(MSLUniformBlock block);
std::string generate_ubo_block_macro_chain(MSLBufferBlock block);
std::string generate_msl_uniform_block_population(ShaderStage stage);
std::string generate_msl_vertex_attribute_input_population();
std::string generate_msl_vertex_output_population();
@ -538,7 +559,9 @@ inline bool is_builtin_type(std::string type)
{
/* Add Types as needed. */
/* TODO(Metal): Consider replacing this with a switch and `constexpr` hash and switch.
* Though most efficient and maintainable approach to be determined. */
* Though most efficient and maintainable approach to be determined.
* NOTE: Some duplicate types exit for Metal and GLSL representations, as generated typenames
* from createinfo may use GLSL signature. */
static std::map<std::string, eMTLDataType> glsl_builtin_types = {
{"float", MTL_DATATYPE_FLOAT},
{"vec2", MTL_DATATYPE_FLOAT2},
@ -548,10 +571,17 @@ inline bool is_builtin_type(std::string type)
{"ivec2", MTL_DATATYPE_INT2},
{"ivec3", MTL_DATATYPE_INT3},
{"ivec4", MTL_DATATYPE_INT4},
{"int2", MTL_DATATYPE_INT2},
{"int3", MTL_DATATYPE_INT3},
{"int4", MTL_DATATYPE_INT4},
{"uint32_t", MTL_DATATYPE_UINT},
{"uvec2", MTL_DATATYPE_UINT2},
{"uvec3", MTL_DATATYPE_UINT3},
{"uvec4", MTL_DATATYPE_UINT4},
{"uint", MTL_DATATYPE_UINT},
{"uint2", MTL_DATATYPE_UINT2},
{"uint3", MTL_DATATYPE_UINT3},
{"uint4", MTL_DATATYPE_UINT4},
{"mat3", MTL_DATATYPE_FLOAT3x3},
{"mat4", MTL_DATATYPE_FLOAT4x4},
{"bool", MTL_DATATYPE_INT},

View File

@ -709,8 +709,30 @@ static void print_resource(std::ostream &os, const ShaderCreateInfo::Resource &r
}
break;
}
case ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER:
case ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: {
int64_t array_offset = res.storagebuf.name.find_first_of("[");
bool writeable = (res.storagebuf.qualifiers & shader::Qualifier::WRITE) ==
shader::Qualifier::WRITE;
const char *memory_scope = ((writeable) ? "device " : "constant ");
if (array_offset == -1) {
/* Create local class member as device pointer reference to bound SSBO.
* Given usage within a shader follows ssbo_name.ubo_element syntax, we can
* dereference the pointer as the compiler will optimize this data fetch.
* To do this, we also give the UBO name a post-fix of `_local` to avoid
* macro accessor collisions. */
os << memory_scope << res.storagebuf.type_name << " *" << res.storagebuf.name
<< "_local;\n";
os << "#define " << res.storagebuf.name << " (*" << res.storagebuf.name << "_local)\n";
}
else {
/* For arrays, we can directly provide the constant access pointer, as the array
* syntax will de-reference this at the correct fetch index. */
StringRef name_no_array = StringRef(res.storagebuf.name.c_str(), array_offset);
os << memory_scope << res.storagebuf.type_name << " *" << name_no_array << ";\n";
}
break;
}
}
}
@ -999,7 +1021,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
if (msl_iface.use_argument_buffer_for_samplers()) {
ss_vertex << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl;
ss_vertex << "#define ARGUMENT_BUFFER_NUM_SAMPLERS "
<< msl_iface.num_samplers_for_stage(ShaderStage::VERTEX) << std::endl;
<< msl_iface.max_sampler_index_for_stage(ShaderStage::VERTEX) + 1 << std::endl;
}
if (msl_iface.uses_ssbo_vertex_fetch_mode) {
ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl;
@ -1190,7 +1212,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
if (msl_iface.use_argument_buffer_for_samplers()) {
ss_fragment << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl;
ss_fragment << "#define ARGUMENT_BUFFER_NUM_SAMPLERS "
<< msl_iface.num_samplers_for_stage(ShaderStage::FRAGMENT) << std::endl;
<< msl_iface.max_sampler_index_for_stage(ShaderStage::FRAGMENT) + 1 << std::endl;
}
/* Inject common Metal header. */
@ -1437,7 +1459,7 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
if (msl_iface.use_argument_buffer_for_samplers()) {
ss_compute << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl;
ss_compute << "#define ARGUMENT_BUFFER_NUM_SAMPLERS "
<< msl_iface.num_samplers_for_stage(ShaderStage::COMPUTE) << std::endl;
<< msl_iface.max_sampler_index_for_stage(ShaderStage::COMPUTE) + 1 << std::endl;
}
/* Inject static workgroup sizes. */
@ -1555,6 +1577,31 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
this->set_compute_function_name(@"compute_function_entry");
#endif
/* DEBUG: Export source to file for manual verification. */
#if MTL_SHADER_DEBUG_EXPORT_SOURCE
NSFileManager *sharedFM = [NSFileManager defaultManager];
NSURL *app_bundle_url = [[NSBundle mainBundle] bundleURL];
NSURL *shader_dir = [[app_bundle_url URLByDeletingLastPathComponent]
URLByAppendingPathComponent:@"Shaders/"
isDirectory:YES];
[sharedFM createDirectoryAtURL:shader_dir
withIntermediateDirectories:YES
attributes:nil
error:nil];
const char *path_cstr = [shader_dir fileSystemRepresentation];
std::ofstream compute_fs;
compute_fs.open(
(std::string(path_cstr) + "/" + std::string(this->name) + "_GeneratedComputeShader.msl")
.c_str());
compute_fs << ss_compute.str();
compute_fs.close();
shader_debug_printf(
"Compute Shader Saved to: %s\n",
(std::string(path_cstr) + std::string(this->name) + "_GeneratedComputeShader.msl").c_str());
#endif
NSString *msl_final_compute = [NSString stringWithUTF8String:ss_compute.str().c_str()];
this->shader_compute_source_from_msl(msl_final_compute);
@ -1738,6 +1785,7 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
MSLTextureSampler msl_tex(
ShaderStage::ANY, res.sampler.type, res.sampler.name, access, used_slot);
texture_samplers.append(msl_tex);
max_tex_bind_index = max_ii(used_slot, max_tex_bind_index);
} break;
case shader::ShaderCreateInfo::Resource::BindType::IMAGE: {
@ -1771,14 +1819,16 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
access,
used_slot);
texture_samplers.append(msl_tex);
max_tex_bind_index = max_ii(used_slot, max_tex_bind_index);
} break;
case shader::ShaderCreateInfo::Resource::BindType::UNIFORM_BUFFER: {
MSLUniformBlock ubo;
MSLBufferBlock ubo;
BLI_assert(res.uniformbuf.type_name.size() > 0);
BLI_assert(res.uniformbuf.name.size() > 0);
int64_t array_offset = res.uniformbuf.name.find_first_of("[");
ubo.qualifiers = shader::Qualifier::READ;
ubo.type_name = res.uniformbuf.type_name;
ubo.is_array = (array_offset > -1);
if (ubo.is_array) {
@ -1794,8 +1844,24 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
} break;
case shader::ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: {
/* TODO(Metal): Support shader storage buffer in Metal.
* Pending compute support. */
MSLBufferBlock ssbo;
BLI_assert(res.storagebuf.type_name.size() > 0);
BLI_assert(res.storagebuf.name.size() > 0);
int64_t array_offset = res.storagebuf.name.find_first_of("[");
ssbo.qualifiers = res.storagebuf.qualifiers;
ssbo.type_name = res.storagebuf.type_name;
ssbo.is_array = (array_offset > -1);
if (ssbo.is_array) {
/* If is array UBO, strip out array tag from name. */
StringRef name_no_array = StringRef(res.storagebuf.name.c_str(), array_offset);
ssbo.name = name_no_array;
}
else {
ssbo.name = res.storagebuf.name;
}
ssbo.stage = ShaderStage::FRAGMENT | ShaderStage::COMPUTE;
storage_blocks.append(ssbo);
} break;
}
}
@ -1850,10 +1916,28 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
bool MSLGeneratorInterface::use_argument_buffer_for_samplers() const
{
/* We can only use argument buffers IF sampler count exceeds static limit of 16,
* AND we can support more samplers with an argument buffer.
* NOTE: We reserve one constant sampler within the shader for fast read via point-sampling. */
return texture_samplers.size() >= 15 && GPU_max_samplers() > 16;
/* We can only use argument buffers IF highest sampler index exceeds static limit of 16,
* AND we can support more samplers with an argument buffer. */
bool use_argument_buffer = (texture_samplers.size() >= 15 || max_tex_bind_index >= 14) &&
GPU_max_samplers() > 15;
#ifndef NDEBUG
/* Due to explicit bind location support, we may be below the sampler limit, but forced to offset
* bindings due to the range being high. Introduce debug check here to issue warning. In these
* cases, if explicit bind location support is not required, best to use auto_resource_location
* to optimize bind point packing. */
if (use_argument_buffer && texture_samplers.size() < 15) {
MTL_LOG_WARNING(
"Compiled Shader '%s' is falling back to bindless via argument buffers due to having a "
"texture sampler of Index: %u Which exceeds the limit of 15+1. However shader only uses "
"%d textures. Consider optimising bind points with .auto_resource_location(true).\n",
parent_shader_.name_get(),
max_tex_bind_index,
(int)texture_samplers.size());
}
#endif
return use_argument_buffer;
}
uint32_t MSLGeneratorInterface::num_samplers_for_stage(ShaderStage stage) const
@ -1863,6 +1947,13 @@ uint32_t MSLGeneratorInterface::num_samplers_for_stage(ShaderStage stage) const
return texture_samplers.size();
}
uint32_t MSLGeneratorInterface::max_sampler_index_for_stage(ShaderStage stage) const
{
/* NOTE: Sampler bindings and argument buffer shared across stages,
* in case stages share texture/sampler bindings. */
return max_tex_bind_index;
}
uint32_t MSLGeneratorInterface::get_sampler_argument_buffer_bind_index(ShaderStage stage)
{
/* Note: Shader stage must be a singular index. Compound shader masks are not valid for this
@ -1873,7 +1964,7 @@ uint32_t MSLGeneratorInterface::get_sampler_argument_buffer_bind_index(ShaderSta
return sampler_argument_buffer_bind_index[get_shader_stage_index(stage)];
}
sampler_argument_buffer_bind_index[get_shader_stage_index(stage)] =
(this->uniform_blocks.size() + 1);
(this->uniform_blocks.size() + this->storage_blocks.size() + 1);
return sampler_argument_buffer_bind_index[get_shader_stage_index(stage)];
}
@ -2148,7 +2239,6 @@ std::string MSLGeneratorInterface::generate_msl_compute_entry_stub()
out << this->generate_msl_texture_vars(ShaderStage::COMPUTE);
out << this->generate_msl_global_uniform_population(ShaderStage::COMPUTE);
out << this->generate_msl_uniform_block_population(ShaderStage::COMPUTE);
/* TODO(Metal): SSBO Population. */
/* Execute original 'main' function within class scope. */
out << "\t/* Execute Compute main function */\t" << std::endl
@ -2205,8 +2295,9 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream &out,
ShaderStage stage)
{
/* Uniform buffers. */
int ubo_index = 0;
for (const MSLUniformBlock &ubo : this->uniform_blocks) {
for (const MSLBufferBlock &ubo : this->uniform_blocks) {
if (bool(ubo.stage & stage)) {
/* For literal/existing global types, we do not need the class name-space accessor. */
out << ",\n\tconstant ";
@ -2222,6 +2313,28 @@ void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream
}
ubo_index++;
}
/* Storage buffers. */
int ssbo_index = 0;
for (const MSLBufferBlock &ssbo : this->storage_blocks) {
if (bool(ssbo.stage & stage)) {
/* For literal/existing global types, we do not need the class name-space accessor. */
bool writeable = (ssbo.qualifiers & shader::Qualifier::WRITE) == shader::Qualifier::WRITE;
const char *memory_scope = ((writeable) ? "device " : "constant ");
out << ",\n\t" << memory_scope;
if (!is_builtin_type(ssbo.type_name)) {
out << get_stage_class_name(stage) << "::";
}
/* #StorageBuffer bind indices start at `MTL_storage_buffer_base_index`.
* MTL_storage_buffer_base_index follows immediately after all uniform blocks.
* such that MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index +
* uniform_blocks.size() + 1. Where the additional buffer is reserved for the
* #PushConstantBlock (push constants). */
out << ssbo.type_name << "* " << ssbo.name << "[[buffer(MTL_storage_buffer_base_index+"
<< (ssbo_index) << ")]]";
}
ssbo_index++;
}
}
std::string MSLGeneratorInterface::generate_msl_vertex_inputs_string()
@ -2372,9 +2485,13 @@ std::string MSLGeneratorInterface::generate_msl_uniform_undefs(ShaderStage shade
out << "#undef " << uniform.name << std::endl;
}
/* UBO block undef. */
for (const MSLUniformBlock &ubo : this->uniform_blocks) {
for (const MSLBufferBlock &ubo : this->uniform_blocks) {
out << "#undef " << ubo.name << std::endl;
}
/* SSBO block undef. */
for (const MSLBufferBlock &ssbo : this->storage_blocks) {
out << "#undef " << ssbo.name << std::endl;
}
return out.str();
}
@ -2656,7 +2773,7 @@ std::string MSLGeneratorInterface::generate_msl_uniform_block_population(ShaderS
/* Populate Global Uniforms. */
std::stringstream out;
out << "\t/* Copy UBO block references into local class variables */" << std::endl;
for (const MSLUniformBlock &ubo : this->uniform_blocks) {
for (const MSLBufferBlock &ubo : this->uniform_blocks) {
/* Only include blocks which are used within this stage. */
if (bool(ubo.stage & stage)) {
@ -2672,6 +2789,26 @@ std::string MSLGeneratorInterface::generate_msl_uniform_block_population(ShaderS
out << " = " << ubo.name << ";" << std::endl;
}
}
/* Populate storage buffer references. */
out << "\t/* Copy SSBO block references into local class variables */" << std::endl;
for (const MSLBufferBlock &ssbo : this->storage_blocks) {
/* Only include blocks which are used within this stage. */
if (bool(ssbo.stage & stage)) {
/* Generate UBO reference assignment.
* NOTE(Metal): We append `_local` post-fix onto the class member name
* for the ubo to avoid name collision with the UBO accessor macro.
* We only need to add this post-fix for the non-array access variant,
* as the array is indexed directly, rather than requiring a dereference. */
out << "\t" << get_shader_stage_instance_name(stage) << "." << ssbo.name;
if (!ssbo.is_array) {
out << "_local";
}
out << " = " << ssbo.name << ";" << std::endl;
}
}
out << std::endl;
return out.str();
}
@ -3261,6 +3398,18 @@ MTLShaderInterface *MSLGeneratorInterface::bake_shader_interface(const char *nam
this->uniform_blocks[uniform_block].stage);
}
/* Prepare Interface Storage Blocks. */
for (int storage_block = 0; storage_block < this->storage_blocks.size(); storage_block++) {
interface->add_storage_block(
name_buffer_copystr(&interface->name_buffer_,
this->storage_blocks[storage_block].name.c_str(),
name_buffer_size,
name_buffer_offset),
storage_block,
0,
this->storage_blocks[storage_block].stage);
}
/* Texture/sampler bindings to interface. */
for (const MSLTextureSampler &texture_sampler : this->texture_samplers) {
interface->add_texture(name_buffer_copystr(&interface->name_buffer_,

View File

@ -107,7 +107,7 @@ struct MTLShaderInputAttribute {
uint32_t matrix_element_count;
};
struct MTLShaderUniformBlock {
struct MTLShaderBufferBlock {
uint32_t name_offset;
uint32_t size = 0;
/* Buffer resource bind index in shader `[[buffer(index)]]`. */
@ -120,7 +120,7 @@ struct MTLShaderUniformBlock {
struct MTLShaderUniform {
uint32_t name_offset;
/* Index of `MTLShaderUniformBlock` this uniform belongs to. */
/* Index of `MTLShaderBufferBlock` this uniform belongs to. */
uint32_t size_in_bytes;
uint32_t byte_offset;
eMTLDataType type;
@ -173,8 +173,13 @@ class MTLShaderInterface : public ShaderInterface {
/* Uniform Blocks. */
uint32_t total_uniform_blocks_;
uint32_t max_uniformbuf_index_;
MTLShaderUniformBlock ubos_[MTL_MAX_UNIFORM_BUFFER_BINDINGS];
MTLShaderUniformBlock push_constant_block_;
MTLShaderBufferBlock ubos_[MTL_MAX_UNIFORM_BUFFER_BINDINGS];
MTLShaderBufferBlock push_constant_block_;
/* Storage blocks. */
uint32_t total_storage_blocks_;
uint32_t max_storagebuf_index_;
MTLShaderBufferBlock ssbos_[MTL_MAX_STORAGE_BUFFER_BINDINGS];
/* Textures. */
/* Textures support explicit binding indices, so some texture slots
@ -209,6 +214,10 @@ class MTLShaderInterface : public ShaderInterface {
uint32_t buffer_index,
uint32_t size,
ShaderStage stage_mask = ShaderStage::ANY);
uint32_t add_storage_block(uint32_t name_offset,
uint32_t buffer_index,
uint32_t size,
ShaderStage stage_mask = ShaderStage::ANY);
void add_uniform(uint32_t name_offset, eMTLDataType type, int array_len = 1);
void add_texture(uint32_t name_offset,
uint32_t texture_slot,
@ -232,14 +241,21 @@ class MTLShaderInterface : public ShaderInterface {
uint32_t get_total_uniforms() const;
/* Fetch Uniform Blocks. */
const MTLShaderUniformBlock &get_uniform_block(uint index) const;
const MTLShaderBufferBlock &get_uniform_block(uint index) const;
uint32_t get_total_uniform_blocks() const;
uint32_t get_max_ubo_index() const;
bool has_uniform_block(uint32_t block_index) const;
uint32_t get_uniform_block_size(uint32_t block_index) const;
/* Fetch Storage Blocks. */
const MTLShaderBufferBlock &get_storage_block(uint index) const;
uint32_t get_total_storage_blocks() const;
uint32_t get_max_ssbo_index() const;
bool has_storage_block(uint32_t block_index) const;
uint32_t get_storage_block_size(uint32_t block_index) const;
/* Push constant uniform data block should always be available. */
const MTLShaderUniformBlock &get_push_constant_block() const;
const MTLShaderBufferBlock &get_push_constant_block() const;
/* Fetch textures. */
const MTLShaderTexture &get_texture(uint index) const;

View File

@ -56,6 +56,8 @@ void MTLShaderInterface::init()
total_attributes_ = 0;
total_uniform_blocks_ = 0;
max_uniformbuf_index_ = 0;
total_storage_blocks_ = 0;
max_storagebuf_index_ = 0;
total_uniforms_ = 0;
total_textures_ = 0;
max_texture_index_ = -1;
@ -73,6 +75,9 @@ void MTLShaderInterface::init()
for (const int ubo : IndexRange(GPU_NUM_UNIFORM_BLOCKS)) {
builtin_blocks_[ubo] = -1;
}
for (const int ssbo : IndexRange(GPU_NUM_STORAGE_BUFFERS)) {
builtin_buffers_[ssbo] = -1;
}
for (const int tex : IndexRange(MTL_MAX_TEXTURE_SLOTS)) {
textures_[tex].used = false;
textures_[tex].slot_index = -1;
@ -117,7 +122,10 @@ uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset,
size += 16 - (size % 16);
}
MTLShaderUniformBlock &uni_block = ubos_[total_uniform_blocks_];
BLI_assert(total_uniform_blocks_ < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
BLI_assert(buffer_index < MTL_MAX_STORAGE_BUFFER_BINDINGS);
MTLShaderBufferBlock &uni_block = ubos_[total_uniform_blocks_];
uni_block.name_offset = name_offset;
uni_block.buffer_index = buffer_index;
uni_block.size = size;
@ -127,6 +135,29 @@ uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset,
return (total_uniform_blocks_++);
}
uint32_t MTLShaderInterface::add_storage_block(uint32_t name_offset,
uint32_t buffer_index,
uint32_t size,
ShaderStage stage_mask)
{
/* Ensure Size is 16 byte aligned to guarantees alignment rules are satisfied. */
if ((size % 16) != 0) {
size += 16 - (size % 16);
}
BLI_assert(total_storage_blocks_ < MTL_MAX_STORAGE_BUFFER_BINDINGS);
BLI_assert(buffer_index < MTL_MAX_STORAGE_BUFFER_BINDINGS);
MTLShaderBufferBlock &ssbo_block = ssbos_[total_storage_blocks_];
ssbo_block.name_offset = name_offset;
ssbo_block.buffer_index = buffer_index;
ssbo_block.size = size;
ssbo_block.current_offset = 0;
ssbo_block.stage_mask = ShaderStage::ANY;
max_storagebuf_index_ = max_ii(max_storagebuf_index_, buffer_index);
return (total_storage_blocks_++);
}
void MTLShaderInterface::add_push_constant_block(uint32_t name_offset)
{
push_constant_block_.name_offset = name_offset;
@ -227,6 +258,9 @@ void MTLShaderInterface::map_builtins()
for (const int ubo : IndexRange(GPU_NUM_UNIFORM_BLOCKS)) {
builtin_blocks_[ubo] = -1;
}
for (const int ssbo : IndexRange(GPU_NUM_STORAGE_BUFFERS)) {
builtin_buffers_[ssbo] = -1;
}
/* Resolve and cache uniform locations for builtin uniforms. */
for (const int u : IndexRange(GPU_NUM_UNIFORMS)) {
@ -257,6 +291,22 @@ void MTLShaderInterface::map_builtins()
}
}
}
/* Resolve and cache uniform locations for builtin storage buffers. */
for (const int u : IndexRange(GPU_NUM_STORAGE_BUFFERS)) {
const ShaderInput *uni = this->ssbo_get(
builtin_storage_block_name((GPUStorageBufferBuiltin)u));
if (uni != nullptr) {
BLI_assert(uni->location >= 0);
if (uni->location >= 0) {
builtin_buffers_[u] = uni->binding;
MTL_LOG_INFO("Mapped builtin storage buffer '%s' to location %d\n",
builtin_storage_block_name((GPUStorageBufferBuiltin)u),
uni->location);
}
}
}
}
/* Populate #ShaderInput struct based on interface. */
@ -272,9 +322,7 @@ void MTLShaderInterface::prepare_common_shader_inputs()
attr_len_ = this->get_total_attributes();
ubo_len_ = this->get_total_uniform_blocks();
uniform_len_ = this->get_total_uniforms() + this->get_total_textures();
/* TODO(Metal): Support storage buffer bindings. Pending compute shader support. */
ssbo_len_ = 0;
ssbo_len_ = this->get_total_storage_blocks();
/* Calculate total inputs and allocate #ShaderInput array. */
/* NOTE: We use the existing `name_buffer_` allocated for internal input structs. */
@ -300,7 +348,7 @@ void MTLShaderInterface::prepare_common_shader_inputs()
BLI_assert(&inputs_[attr_len_] >= current_input);
current_input = &inputs_[attr_len_];
for (const int ubo_index : IndexRange(total_uniform_blocks_)) {
MTLShaderUniformBlock &shd_ubo = ubos_[ubo_index];
MTLShaderBufferBlock &shd_ubo = ubos_[ubo_index];
current_input->name_offset = shd_ubo.name_offset;
current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ubo.name_offset));
/* Location refers to the index in the ubos_ array. */
@ -308,7 +356,8 @@ void MTLShaderInterface::prepare_common_shader_inputs()
/* Binding location refers to the UBO bind slot in
* #MTLContextGlobalShaderPipelineState::ubo_bindings. The buffer bind index [[buffer(N)]]
* within the shader will apply an offset for bound vertex buffers and the default uniform
* PushConstantBlock. */
* PushConstantBlock.
* see `mtl_shader_generator.hh` for buffer binding table breakdown. */
current_input->binding = shd_ubo.buffer_index;
current_input++;
}
@ -357,10 +406,24 @@ void MTLShaderInterface::prepare_common_shader_inputs()
}
}
/* SSBO bindings.
* TODO(Metal): Support SSBOs. Pending compute support. */
/* SSBO bindings. */
BLI_assert(&inputs_[attr_len_ + ubo_len_ + uniform_len_] >= current_input);
current_input = &inputs_[attr_len_ + ubo_len_ + uniform_len_];
BLI_assert(ssbo_len_ >= total_storage_blocks_);
for (const int ssbo_index : IndexRange(total_storage_blocks_)) {
MTLShaderBufferBlock &shd_ssbo = ssbos_[ssbo_index];
current_input->name_offset = shd_ssbo.name_offset;
current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ssbo.name_offset));
/* Location refers to the index in the ssbos_ array. */
current_input->location = ssbo_index;
/* Binding location refers to the SSBO bind slot in
* #MTLContextGlobalShaderPipelineState::ssbo_bindings. The buffer bind index [[buffer(N)]]
* within the shader will apply an offset for bound vertex buffers and the default uniform
* PushConstantBlock after other uniform blocks
* see `mtl_shader_generator.hh` for buffer binding table breakdown. */
current_input->binding = shd_ssbo.buffer_index;
current_input++;
}
/* Map builtin uniform indices to uniform binding locations. */
this->map_builtins();
@ -417,14 +480,14 @@ uint32_t MTLShaderInterface::get_total_uniforms() const
}
/* Uniform Blocks. */
const MTLShaderUniformBlock &MTLShaderInterface::get_uniform_block(uint index) const
const MTLShaderBufferBlock &MTLShaderInterface::get_uniform_block(uint index) const
{
BLI_assert(index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
BLI_assert(index < get_total_uniform_blocks());
return ubos_[index];
}
const MTLShaderUniformBlock &MTLShaderInterface::get_push_constant_block() const
const MTLShaderBufferBlock &MTLShaderInterface::get_push_constant_block() const
{
return push_constant_block_;
}
@ -449,6 +512,33 @@ uint32_t MTLShaderInterface::get_uniform_block_size(uint32_t block_index) const
return (block_index < total_uniform_blocks_) ? ubos_[block_index].size : 0;
}
/* Storage Blocks. */
const MTLShaderBufferBlock &MTLShaderInterface::get_storage_block(uint index) const
{
BLI_assert(index < MTL_MAX_STORAGE_BUFFER_BINDINGS);
BLI_assert(index < get_total_storage_blocks());
return ssbos_[index];
}
uint32_t MTLShaderInterface::get_total_storage_blocks() const
{
return total_storage_blocks_;
}
uint32_t MTLShaderInterface::get_max_ssbo_index() const
{
return max_storagebuf_index_;
}
bool MTLShaderInterface::has_storage_block(uint32_t block_index) const
{
return (block_index < total_storage_blocks_);
}
uint32_t MTLShaderInterface::get_storage_block_size(uint32_t block_index) const
{
return (block_index < total_storage_blocks_) ? ssbos_[block_index].size : 0;
}
/* Textures. */
const MTLShaderTexture &MTLShaderInterface::get_texture(uint index) const
{

View File

@ -0,0 +1,84 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup gpu
*/
#pragma once
#include "MEM_guardedalloc.h"
#include "gpu_storage_buffer_private.hh"
#include "mtl_context.hh"
namespace blender {
namespace gpu {
class MTLUniformBuf;
class MTLVertBuf;
class MTLIndexBuf;
/**
* Implementation of Storage Buffers using Metal.
*/
class MTLStorageBuf : public StorageBuf {
private:
/** Allocation Handle or indirect wrapped instance.
* MTLStorageBuf can wrap a MTLVertBuf, MTLIndexBuf or MTLUniformBuf for binding as a writeable
* resource. */
enum {
MTL_STORAGE_BUF_TYPE_DEFAULT = 0,
MTL_STORAGE_BUF_TYPE_UNIFORMBUF = 1,
MTL_STORAGE_BUF_TYPE_VERTBUF = 2,
MTL_STORAGE_BUF_TYPE_INDEXBUF = 3,
} storage_source_ = MTL_STORAGE_BUF_TYPE_DEFAULT;
union {
/* Own alloation. */
gpu::MTLBuffer *metal_buffer_;
/* Wrapped type. */
MTLUniformBuf *uniform_buffer_;
MTLVertBuf *vertex_buffer_;
MTLIndexBuf *index_buffer_;
};
/* Whether buffer has contents, if false, no GPU buffer will
* have yet been allocated. */
bool has_data_ = false;
/** Bind-state tracking. */
int bind_slot_ = -1;
MTLContext *bound_ctx_ = nullptr;
/** Usage type. */
GPUUsageType usage_;
public:
MTLStorageBuf(size_t size, GPUUsageType usage, const char *name);
~MTLStorageBuf();
MTLStorageBuf(MTLUniformBuf *uniform_buf, size_t size);
MTLStorageBuf(MTLVertBuf *uniform_buf, size_t size);
MTLStorageBuf(MTLIndexBuf *uniform_buf, size_t size);
void update(const void *data) override;
void bind(int slot) override;
void unbind() override;
void clear(eGPUTextureFormat internal_format, eGPUDataFormat data_format, void *data) override;
void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) override;
void read(void *data) override;
void init();
id<MTLBuffer> get_metal_buffer();
int get_size();
const char *get_name()
{
return name_;
}
private:
MEM_CXX_CLASS_ALLOC_FUNCS("MTLStorageBuf");
};
} // namespace gpu
} // namespace blender

View File

@ -0,0 +1,313 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup gpu
*/
#include "BLI_string.h"
#include "gpu_backend.hh"
#include "gpu_context_private.hh"
#include "mtl_backend.hh"
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "mtl_index_buffer.hh"
#include "mtl_storage_buffer.hh"
#include "mtl_uniform_buffer.hh"
#include "mtl_vertex_buffer.hh"
namespace blender::gpu {
/* -------------------------------------------------------------------- */
/** \name Creation & Deletion
* \{ */
MTLStorageBuf::MTLStorageBuf(size_t size, GPUUsageType usage, const char *name)
: StorageBuf(size, name)
{
usage_ = usage;
/* Do not create SSBO MTL buffer here to allow allocation from any thread. */
storage_source_ = MTL_STORAGE_BUF_TYPE_DEFAULT;
metal_buffer_ = nullptr;
}
MTLStorageBuf::MTLStorageBuf(MTLUniformBuf *uniform_buf, size_t size)
: StorageBuf(size, "UniformBuffer_as_SSBO")
{
usage_ = GPU_USAGE_DYNAMIC;
storage_source_ = MTL_STORAGE_BUF_TYPE_UNIFORMBUF;
uniform_buffer_ = uniform_buf;
BLI_assert(uniform_buffer_ != nullptr);
}
MTLStorageBuf::MTLStorageBuf(MTLVertBuf *vert_buf, size_t size)
: StorageBuf(size, "VertexBuffer_as_SSBO")
{
usage_ = GPU_USAGE_DYNAMIC;
storage_source_ = MTL_STORAGE_BUF_TYPE_VERTBUF;
vertex_buffer_ = vert_buf;
BLI_assert(vertex_buffer_ != nullptr);
}
MTLStorageBuf::MTLStorageBuf(MTLIndexBuf *index_buf, size_t size)
: StorageBuf(size, "IndexBuffer_as_SSBO")
{
usage_ = GPU_USAGE_DYNAMIC;
storage_source_ = MTL_STORAGE_BUF_TYPE_INDEXBUF;
index_buffer_ = index_buf;
BLI_assert(index_buffer_ != nullptr);
}
MTLStorageBuf::~MTLStorageBuf()
{
if (storage_source_ == MTL_STORAGE_BUF_TYPE_DEFAULT) {
if (metal_buffer_ != nullptr) {
metal_buffer_->free();
metal_buffer_ = nullptr;
}
has_data_ = false;
}
/* Ensure SSBO is not bound to active CTX.
* SSBO bindings are reset upon Context-switch so we do not need
* to check deactivated context's. */
MTLContext *ctx = MTLContext::get();
if (ctx) {
for (int i = 0; i < MTL_MAX_STORAGE_BUFFER_BINDINGS; i++) {
MTLStorageBufferBinding &slot = ctx->pipeline_state.ssbo_bindings[i];
if (slot.bound && slot.ssbo == this) {
slot.bound = false;
slot.ssbo = nullptr;
}
}
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Data upload / update
* \{ */
void MTLStorageBuf::init()
{
/* We only need to initialize the storage buffer for default buffer types. */
if (storage_source_ != MTL_STORAGE_BUF_TYPE_DEFAULT) {
return;
}
BLI_assert(this);
BLI_assert(size_in_bytes_ > 0);
/* Allocate MTL buffer */
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
BLI_assert(ctx->device);
UNUSED_VARS_NDEBUG(ctx);
metal_buffer_ = MTLContext::get_global_memory_manager()->allocate(size_in_bytes_, true);
#ifndef NDEBUG
metal_buffer_->set_label([NSString stringWithFormat:@"Storage Buffer %s", name_]);
#endif
BLI_assert(metal_buffer_ != nullptr);
BLI_assert(metal_buffer_->get_metal_buffer() != nil);
has_data_ = false;
}
void MTLStorageBuf::update(const void *data)
{
/* We only need to initialize the storage buffer for default buffer types. */
if (storage_source_ != MTL_STORAGE_BUF_TYPE_DEFAULT) {
return;
}
/* Ensure buffer has been allocated. */
if (metal_buffer_ == nullptr) {
init();
}
BLI_assert(data != nullptr);
if (data != nullptr) {
/* Upload data. */
BLI_assert(data != nullptr);
BLI_assert(!(metal_buffer_->get_resource_options() & MTLResourceStorageModePrivate));
BLI_assert(size_in_bytes_ <= metal_buffer_->get_size());
BLI_assert(size_in_bytes_ <= [metal_buffer_->get_metal_buffer() length]);
memcpy(metal_buffer_->get_host_ptr(), data, size_in_bytes_);
metal_buffer_->flush_range(0, size_in_bytes_);
has_data_ = true;
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Usage
* \{ */
void MTLStorageBuf::bind(int slot)
{
if (slot >= MTL_MAX_STORAGE_BUFFER_BINDINGS) {
fprintf(
stderr,
"Error: Trying to bind \"%s\" ssbo to slot %d which is above the reported limit of %d.\n",
name_,
slot,
MTL_MAX_STORAGE_BUFFER_BINDINGS);
BLI_assert(false);
return;
}
if (metal_buffer_ == nullptr) {
this->init();
}
if (data_ != nullptr) {
this->update(data_);
MEM_SAFE_FREE(data_);
}
/* Bind current UBO to active context. */
MTLContext *ctx = MTLContext::get();
BLI_assert(ctx);
MTLStorageBufferBinding &ctx_ssbo_bind_slot = ctx->pipeline_state.ssbo_bindings[slot];
ctx_ssbo_bind_slot.ssbo = this;
ctx_ssbo_bind_slot.bound = true;
bind_slot_ = slot;
bound_ctx_ = ctx;
}
void MTLStorageBuf::unbind()
{
/* Unbind in debug mode to validate missing binds.
* Otherwise, only perform a full unbind upon destruction
* to ensure no lingering references. */
#ifndef NDEBUG
if (true) {
#else
if (G.debug & G_DEBUG_GPU) {
#endif
if (bound_ctx_ != nullptr && bind_slot_ > -1) {
MTLStorageBufferBinding &ctx_ssbo_bind_slot =
bound_ctx_->pipeline_state.ssbo_bindings[bind_slot_];
if (ctx_ssbo_bind_slot.bound && ctx_ssbo_bind_slot.ssbo == this) {
ctx_ssbo_bind_slot.bound = false;
ctx_ssbo_bind_slot.ssbo = nullptr;
}
}
}
/* Reset bind index. */
bind_slot_ = -1;
bound_ctx_ = nullptr;
}
void MTLStorageBuf::clear(eGPUTextureFormat internal_format,
eGPUDataFormat data_format,
void *data)
{
/* Fetch active context. */
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
if (metal_buffer_ == nullptr) {
this->init();
}
if (ctx) {
/* Fast clear. */
id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
[blit_encoder fillBuffer:metal_buffer_->get_metal_buffer()
range:NSMakeRange(0, size_in_bytes_)
value:0];
}
else {
/* Fallback inefficient clear if outside of render context. */
void *clear_data = calloc(1, size_in_bytes_);
this->update(clear_data);
free(clear_data);
}
}
void MTLStorageBuf::copy_sub(VertBuf *src_, uint dst_offset, uint src_offset, uint copy_size)
{
/* TODO(Metal): Support Copy sub operation. */
MTL_LOG_WARNING("TLStorageBuf::copy_sub not yet supported.\n");
}
void MTLStorageBuf::read(void *data)
{
if (data == nullptr) {
return;
}
if (metal_buffer_ == nullptr) {
this->init();
}
/* Managed buffers need to be explicitly flushed back to host. */
if (metal_buffer_->get_resource_options() & MTLResourceStorageModeManaged) {
/* Fetch active context. */
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
/* Ensure GPU updates are flushed back to CPU. */
id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
[blit_encoder synchronizeResource:metal_buffer_->get_metal_buffer()];
/* Ensure sync has occured. */
GPU_finish();
}
/* Read data. NOTE: Unless explicitly synchronized with GPU work, results may not be ready. */
memcpy(data, metal_buffer_->get_host_ptr(), size_in_bytes_);
}
id<MTLBuffer> MTLStorageBuf::get_metal_buffer()
{
gpu::MTLBuffer *source_buffer = nullptr;
switch (storage_source_) {
/* Default SSBO buffer comes from own allocation. */
case MTL_STORAGE_BUF_TYPE_DEFAULT: {
if (metal_buffer_ == nullptr) {
this->init();
}
if (data_ != nullptr) {
this->update(data_);
MEM_SAFE_FREE(data_);
}
source_buffer = metal_buffer_;
} break;
/* SSBO buffer comes from Uniform Buffer. */
case MTL_STORAGE_BUF_TYPE_UNIFORMBUF: {
source_buffer = uniform_buffer_->metal_buffer_;
} break;
/* SSBO buffer comes from Vertex Buffer. */
case MTL_STORAGE_BUF_TYPE_VERTBUF: {
source_buffer = vertex_buffer_->vbo_;
} break;
/* SSBO buffer comes from Index Buffer. */
case MTL_STORAGE_BUF_TYPE_INDEXBUF: {
source_buffer = index_buffer_->ibo_;
} break;
}
/* Return Metal allocation handle and flag as used. */
BLI_assert(source_buffer != nullptr);
source_buffer->debug_ensure_used();
return source_buffer->get_metal_buffer();
}
int MTLStorageBuf::get_size()
{
BLI_assert(this);
return size_in_bytes_;
}
} // blender::gpu

View File

@ -13,10 +13,14 @@
namespace blender::gpu {
class MTLStorageBuf;
/**
* Implementation of Uniform Buffers using Metal.
**/
class MTLUniformBuf : public UniformBuf {
friend class MTLStorageBuf; /* For bind as SSBO resource access. */
private:
/* Allocation Handle. */
gpu::MTLBuffer *metal_buffer_ = nullptr;
@ -29,6 +33,9 @@ class MTLUniformBuf : public UniformBuf {
int bind_slot_ = -1;
MTLContext *bound_ctx_ = nullptr;
/* SSBO wrapper for bind_as_ssbo support. */
MTLStorageBuf *ssbo_wrapper_ = nullptr;
public:
MTLUniformBuf(size_t size, const char *name);
~MTLUniformBuf();

View File

@ -14,6 +14,7 @@
#include "mtl_backend.hh"
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "mtl_storage_buffer.hh"
#include "mtl_uniform_buffer.hh"
namespace blender::gpu {
@ -43,6 +44,11 @@ MTLUniformBuf::~MTLUniformBuf()
}
}
}
if (ssbo_wrapper_) {
delete ssbo_wrapper_;
ssbo_wrapper_ = nullptr;
}
}
void MTLUniformBuf::update(const void *data)
@ -128,7 +134,25 @@ void MTLUniformBuf::bind_as_ssbo(int slot)
return;
}
BLI_assert_msg(0, "Not implemented yet");
/* We need to ensure data is actually allocated if using as an SSBO, as resource may be written
* to. */
if (metal_buffer_ == nullptr) {
/* Check if we have any deferred data to upload. */
if (data_ != nullptr) {
this->update(data_);
MEM_SAFE_FREE(data_);
}
else {
this->clear_to_zero();
}
}
/* Create MTLStorageBuffer to wrap this resource and use conventional binding. */
if (ssbo_wrapper_ == nullptr) {
ssbo_wrapper_ = new MTLStorageBuf(this, size_in_bytes_);
}
ssbo_wrapper_->bind(slot);
}
void MTLUniformBuf::unbind()

View File

@ -22,7 +22,8 @@ class MTLVertBuf : public VertBuf {
friend class gpu::MTLTexture; /* For buffer texture. */
friend class MTLShader; /* For transform feedback. */
friend class MTLBatch;
friend class MTLContext; /* For transform feedback. */
friend class MTLContext; /* For transform feedback. */
friend class MTLStorageBuf; /* For bind as SSBO resource access. */
private:
/** Metal buffer allocation. **/
@ -37,6 +38,8 @@ class MTLVertBuf : public VertBuf {
uint64_t alloc_size_ = 0;
/** Whether existing allocation has been submitted for use by the GPU. */
bool contents_in_flight_ = false;
/* SSBO wrapper for bind_as_ssbo support. */
MTLStorageBuf *ssbo_wrapper_ = nullptr;
/* Fetch Metal buffer and offset into allocation if necessary.
* Access limited to friend classes. */

View File

@ -5,6 +5,7 @@
*/
#include "mtl_vertex_buffer.hh"
#include "mtl_debug.hh"
#include "mtl_storage_buffer.hh"
namespace blender::gpu {
@ -50,6 +51,11 @@ void MTLVertBuf::release_data()
GPU_TEXTURE_FREE_SAFE(buffer_texture_);
MEM_SAFE_FREE(data);
if (ssbo_wrapper_) {
delete ssbo_wrapper_;
ssbo_wrapper_ = nullptr;
}
}
void MTLVertBuf::duplicate_data(VertBuf *dst_)
@ -294,10 +300,16 @@ void MTLVertBuf::update_sub(uint start, uint len, const void *data)
void MTLVertBuf::bind_as_ssbo(uint binding)
{
/* TODO(Metal): Support binding of buffers as SSBOs.
* Pending overall compute support for Metal backend. */
MTL_LOG_WARNING("MTLVertBuf::bind_as_ssbo not yet implemented!\n");
this->flag_used();
/* Ensure resource is initialized. */
this->bind();
/* Create MTLStorageBuffer to wrap this resource and use conventional binding. */
if (ssbo_wrapper_ == nullptr) {
ssbo_wrapper_ = new MTLStorageBuf(this, alloc_size_);
}
ssbo_wrapper_->bind(binding);
}
void MTLVertBuf::bind_as_texture(uint binding)

View File

@ -97,10 +97,18 @@ struct constexp_uvec3 {
return 0;
}
}
inline operator uint3() const
constexpr inline operator uint3() const
{
return xyz;
}
constexpr inline operator uint2() const
{
return xy;
}
constexpr inline operator uint() const
{
return x;
}
};
constexpr constexp_uvec3 __internal_workgroupsize_get()
@ -136,6 +144,10 @@ template<typename T> T atomicSub(threadgroup T &mem, T data)
{
return atomic_fetch_sub_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicAnd(threadgroup T &mem, T data)
{
return atomic_fetch_and_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicOr(threadgroup T &mem, T data)
{
return atomic_fetch_or_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
@ -148,33 +160,40 @@ template<typename T> T atomicXor(threadgroup T &mem, T data)
/* Device memory. */
template<typename T> T atomicMax(device T &mem, T data)
{
return atomic_fetch_max_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
return atomic_fetch_max_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicMin(device T &mem, T data)
{
return atomic_fetch_min_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
return atomic_fetch_min_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicAdd(device T &mem, T data)
{
return atomic_fetch_add_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
return atomic_fetch_add_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicSub(device T &mem, T data)
{
return atomic_fetch_sub_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
return atomic_fetch_sub_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicAnd(device T &mem, T data)
{
return atomic_fetch_and_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicOr(device T &mem, T data)
{
return atomic_fetch_or_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
return atomic_fetch_or_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicXor(device T &mem, T data)
{
return atomic_fetch_xor_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
return atomic_fetch_xor_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
/* Used to replace 'out' in function parameters with threadlocal reference
* shortened to avoid expanding the glsl source string. */
#define THD thread
#define OUT(type, name, array) thread type(&name)[array]
#define THREADGROUP_OUT_ARRAY(type, name, array) threadgroup type(&name)[array]
#define DEVICE_OUT_ARRAY(type, name, array) device type(&name)[array]
#define DEVICE_OUT(type, name) device type &name
/* Generate wrapper structs for combined texture and sampler type. */
#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS
@ -1122,6 +1141,27 @@ inline float4 uintBitsToFloat(uint4 f)
return as_type<float4>(f);
}
#define bitfieldReverse reverse_bits
#define bitfieldExtract extract_bits
#define bitfieldInsert insert_bits
#define bitCount popcount
template<typename T> T findLSB(T x)
{
/* ctz returns the number of trailing zeroes. To fetch the index of the LSB, we can also use this
* value as index, however need to filter out the case where the input value is zero to match
* GLSL functionality. */
return (x == T(0)) ? T(-1) : T(ctz(x));
}
template<typename T> T findMSB(T x)
{
/* clz returns the number of leading zeroes. To fetch the index of the LSB, we can also use this

The comment need to be reworked. It mention LSB.

The comment need to be reworked. It mention LSB.
* value as index when offset by 1. however need to filter out the case where the input value is
* zero to match GLSL functionality. 000000010*/

Nice 000000010 you got here.

Nice `000000010` you got here.
return (x == T(0)) ? T(-1) : (clz(T(0)) - clz(x) - T(1));

I think the 0 case here already handled by the logic:
clz(0) - clz(0) - 1 = -1
Also can we make sure that clz(T(0)) - T(1) is compiled time constant folded? Or it can be replaced by sizeof(T) * -8 - 1.

I think the 0 case here already handled by the logic: `clz(0) - clz(0) - 1 = -1` Also can we make sure that `clz(T(0)) - T(1)` is compiled time constant folded? Or it can be replaced by `sizeof(T) * -8 - 1`.
}
/* Texture size functions. Add texture types as needed. */
#define imageSize(image) textureSize(image, 0)

View File

@ -15,6 +15,14 @@
#define depthCubeArray samplerCubeArray
#define depth2DArrayShadow sampler2DArrayShadow
/* Memory scope and pass by reference types.
* NOTE: These are required by Metal, but are not required in all cases by GLSL. */

Not sure why this is here. Any GLSL code targeted at Metal should be using #ifdef GPU_METAL.
Like mentioned in the GLSL files, I would really like to avoid these.

Not sure why this is here. Any GLSL code targeted at Metal should be using `#ifdef GPU_METAL`. Like mentioned in the GLSL files, I would really like to avoid these.
#define device
#define threadgroup
#define OUT(type, name, array_len) out type name[array_len]
#define DEVICE_OUT_ARRAY(type, name, array_len) out type name[array_len]
#define DEVICE_OUT(type, name) out type
/* Backend Functions. */
#define select(A, B, mask) mix(A, B, mask)