Metal: Add AMD support for subpass transition #119784

Merged
Jeroen Bakker merged 5 commits from Jason-Fielder/blender:MetalAMDSubpassTransitionSupport into main 2024-04-11 15:24:05 +02:00
6 changed files with 159 additions and 61 deletions

View File

@ -74,6 +74,62 @@ enum class Type {
SHORT4
};
BLI_INLINE int to_component_count(const Type &type)

Use BLI_INLINE Currently generates many compilation warning when used on backends that don't use this function.

Use `BLI_INLINE` Currently generates many compilation warning when used on backends that don't use this function.

Thanks Jeroen, will resolve!

Good to confirm shadows working on Intel GPUs too. Will be performing a test pass on Intel later today to see where we are at overall.

Thanks Jeroen, will resolve! Good to confirm shadows working on Intel GPUs too. Will be performing a test pass on Intel later today to see where we are at overall.
{
switch (type) {
case Type::FLOAT:
case Type::UINT:
case Type::INT:
case Type::BOOL:
return 1;
case Type::VEC2:
case Type::UVEC2:
case Type::IVEC2:
return 2;
case Type::VEC3:
case Type::UVEC3:
case Type::IVEC3:
return 3;
case Type::VEC4:
case Type::UVEC4:
case Type::IVEC4:
return 4;
case Type::MAT3:
return 9;
case Type::MAT4:
return 16;
/* Alias special types. */
case Type::UCHAR:
case Type::USHORT:
return 1;
case Type::UCHAR2:
case Type::USHORT2:
return 2;
case Type::UCHAR3:
case Type::USHORT3:
return 3;
case Type::UCHAR4:
case Type::USHORT4:
return 4;
case Type::CHAR:
case Type::SHORT:
return 1;
case Type::CHAR2:
case Type::SHORT2:
return 2;
case Type::CHAR3:
case Type::SHORT3:
return 3;
case Type::CHAR4:
case Type::SHORT4:
return 4;
case Type::VEC3_101010I2:
return 3;
}
BLI_assert_unreachable();
return -1;
}
/* All of these functions is a bit out of place */
static inline Type to_type(const eGPUType type)
{

View File

@ -155,7 +155,7 @@ class MTLFrameBuffer : public FrameBuffer {
protected:
void subpass_transition_impl(const GPUAttachmentState /*depth_attachment_state*/,
Span<GPUAttachmentState> /*color_attachment_states*/) override{};
Span<GPUAttachmentState> color_attachment_states) override;
public:
void apply_state();

View File

@ -472,6 +472,27 @@ void MTLFrameBuffer::clear_attachment(GPUAttachmentType type,
this->force_clear();
}
}
void MTLFrameBuffer::subpass_transition_impl(const GPUAttachmentState /*depth_attachment_state*/,
Span<GPUAttachmentState> color_attachment_states)
{
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
if (!is_tile_based_arch) {
/* Break renderpass if tile memory is unsupported to ensure current framebuffer results are
* stored. */
context_->main_command_buffer.end_active_command_encoder();
/* Bind framebuffer attachments as textures.
* NOTE: Follows behaviour of gl_framebuffer. However, shaders utilising subpass_in will
* need to avoid bindpoint collisions for image/texture resources. */
for (int i : color_attachment_states.index_range()) {
GPUAttachmentType type = GPU_FB_COLOR_ATTACHMENT0 + i;
GPUTexture *attach_tex = this->attachments_[type].tex;
if (color_attachment_states[i] == GPU_ATTACHEMENT_READ) {
GPU_texture_image_bind(attach_tex, i);
}
}
}
}
void MTLFrameBuffer::read(eGPUFrameBufferBits planes,
eGPUDataFormat format,

View File

@ -414,6 +414,7 @@ class MSLGeneratorInterface {
blender::Vector<MSLConstant> constants;
/* Fragment tile inputs. */
blender::Vector<MSLFragmentTileInputAttribute> fragment_tile_inputs;
bool supports_native_tile_inputs;
/* Should match vertex outputs, but defined separately as
* some shader permutations will not utilize all inputs/outputs.
* Final shader uses the intersection between the two sets. */

View File

@ -2089,6 +2089,16 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
fragment_outputs.append(mtl_frag_out);
}
/** Identify support for tile inputs. */
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
if (is_tile_based_arch) {
supports_native_tile_inputs = true;
}
else {
/* NOTE: If emulating tile input reads, we must ensure we also expose position data. */
supports_native_tile_inputs = false;
}
/* Fragment tile inputs. */
for (const shader::ShaderCreateInfo::SubpassIn &frag_tile_in : create_info_->subpass_inputs_) {
@ -2107,6 +2117,51 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
mtl_frag_in.raster_order_group = frag_tile_in.raster_order_group;
fragment_tile_inputs.append(mtl_frag_in);
/* If we do not support native tile inputs, generate an image-binding per input. */
if (!supports_native_tile_inputs) {
/* Determine type: */
bool is_layered_fb = bool(create_info_->builtins_ & BuiltinBits::LAYER);
/* Start with invalid value to detect failure cases. */
ImageType image_type = ImageType::FLOAT_BUFFER;
switch (frag_tile_in.type) {
case Type::FLOAT:
image_type = is_layered_fb ? ImageType::FLOAT_2D_ARRAY : ImageType::FLOAT_2D;
break;
case Type::INT:
image_type = is_layered_fb ? ImageType::INT_2D_ARRAY : ImageType::INT_2D;
break;
case Type::UINT:
image_type = is_layered_fb ? ImageType::UINT_2D_ARRAY : ImageType::UINT_2D;
break;
default:
break;
}
BLI_assert(image_type != ImageType::FLOAT_BUFFER);
/* Generate texture binding resource. */
MSLTextureResource msl_image;
msl_image.stage = ShaderStage::FRAGMENT;
msl_image.type = image_type;
msl_image.name = frag_tile_in.name + "_subpass_img";
msl_image.access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ;
msl_image.slot = texture_slot_id++;
/* WATCH: We don't have a great place to generate the image bindings.
* So we will use the subpass binding index and check if it collides with an existing
* binding. */
msl_image.location = frag_tile_in.index;
msl_image.is_texture_sampler = false;
BLI_assert(msl_image.slot < MTL_MAX_TEXTURE_SLOTS);
BLI_assert(msl_image.location < MTL_MAX_TEXTURE_SLOTS);
/* Check existing samplers. */
for (const auto &tex : texture_samplers) {
BLI_assert(tex.location != msl_image.location);
}
texture_samplers.append(msl_image);
max_tex_bind_index = max_ii(max_tex_bind_index, msl_image.slot);
}
}
/* Transform feedback. */
@ -3043,10 +3098,31 @@ std::string MSLGeneratorInterface::generate_msl_global_uniform_population(Shader
std::string MSLGeneratorInterface::generate_msl_fragment_tile_input_population()
{
std::stringstream out;
for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) {
out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." << tile_input.name
<< " = "
<< "fragment_tile_in." << tile_input.name << ";" << std::endl;
/* Native tile read is supported on tile-based architectures (Apple Silicon). */
if (supports_native_tile_inputs) {
for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) {
out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "."
<< tile_input.name << " = "
<< "fragment_tile_in." << tile_input.name << ";" << std::endl;
}
}
else {
for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) {
fclem marked this conversation as resolved

What does this TODO refers to?

What does this TODO refers to?

Apologies, left over, already addressed, will remove.

Apologies, left over, already addressed, will remove.
/* Get read swizzle mask. */
char swizzle[] = "xyzw";
swizzle[to_component_count(tile_input.type)] = '\0';
bool is_layered_fb = bool(create_info_->builtins_ & BuiltinBits::LAYER);
std::string texel_co = (is_layered_fb) ?
"ivec3(ivec2(v_in._default_position_.xy), int(v_in.gpu_Layer))" :
"ivec2(v_in._default_position_.xy)";
out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "."
<< tile_input.name << " = texelFetch("
<< get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." << tile_input.name
<< "_subpass_img, " << texel_co << ", 0)." << swizzle << ";\n";
}
}
return out.str();
}

View File

@ -144,62 +144,6 @@ static const char *to_string(const Type &type)
return "unknown";
}
static int to_component_count(const Type &type)
{
switch (type) {
case Type::FLOAT:
case Type::UINT:
case Type::INT:
case Type::BOOL:
return 1;
case Type::VEC2:
case Type::UVEC2:
case Type::IVEC2:
return 2;
case Type::VEC3:
case Type::UVEC3:
case Type::IVEC3:
return 3;
case Type::VEC4:
case Type::UVEC4:
case Type::IVEC4:
return 4;
case Type::MAT3:
return 9;
case Type::MAT4:
return 16;
/* Alias special types. */
case Type::UCHAR:
case Type::USHORT:
return 1;
case Type::UCHAR2:
case Type::USHORT2:
return 2;
case Type::UCHAR3:
case Type::USHORT3:
return 3;
case Type::UCHAR4:
case Type::USHORT4:
return 4;
case Type::CHAR:
case Type::SHORT:
return 1;
case Type::CHAR2:
case Type::SHORT2:
return 2;
case Type::CHAR3:
case Type::SHORT3:
return 3;
case Type::CHAR4:
case Type::SHORT4:
return 4;
case Type::VEC3_101010I2:
return 3;
}
BLI_assert_unreachable();
return -1;
}
static Type to_component_type(const Type &type)
{
switch (type) {