Metal: Add AMD support for subpass transition #119784
@ -74,6 +74,62 @@ enum class Type {
|
||||
SHORT4
|
||||
};
|
||||
|
||||
static int to_component_count(const Type &type)
|
||||
|
||||
{
|
||||
switch (type) {
|
||||
case Type::FLOAT:
|
||||
case Type::UINT:
|
||||
case Type::INT:
|
||||
case Type::BOOL:
|
||||
return 1;
|
||||
case Type::VEC2:
|
||||
case Type::UVEC2:
|
||||
case Type::IVEC2:
|
||||
return 2;
|
||||
case Type::VEC3:
|
||||
case Type::UVEC3:
|
||||
case Type::IVEC3:
|
||||
return 3;
|
||||
case Type::VEC4:
|
||||
case Type::UVEC4:
|
||||
case Type::IVEC4:
|
||||
return 4;
|
||||
case Type::MAT3:
|
||||
return 9;
|
||||
case Type::MAT4:
|
||||
return 16;
|
||||
/* Alias special types. */
|
||||
case Type::UCHAR:
|
||||
case Type::USHORT:
|
||||
return 1;
|
||||
case Type::UCHAR2:
|
||||
case Type::USHORT2:
|
||||
return 2;
|
||||
case Type::UCHAR3:
|
||||
case Type::USHORT3:
|
||||
return 3;
|
||||
case Type::UCHAR4:
|
||||
case Type::USHORT4:
|
||||
return 4;
|
||||
case Type::CHAR:
|
||||
case Type::SHORT:
|
||||
return 1;
|
||||
case Type::CHAR2:
|
||||
case Type::SHORT2:
|
||||
return 2;
|
||||
case Type::CHAR3:
|
||||
case Type::SHORT3:
|
||||
return 3;
|
||||
case Type::CHAR4:
|
||||
case Type::SHORT4:
|
||||
return 4;
|
||||
case Type::VEC3_101010I2:
|
||||
return 3;
|
||||
}
|
||||
BLI_assert_unreachable();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* All of these functions is a bit out of place */
|
||||
static inline Type to_type(const eGPUType type)
|
||||
{
|
||||
|
@ -155,7 +155,7 @@ class MTLFrameBuffer : public FrameBuffer {
|
||||
|
||||
protected:
|
||||
void subpass_transition_impl(const GPUAttachmentState /*depth_attachment_state*/,
|
||||
Span<GPUAttachmentState> /*color_attachment_states*/) override{};
|
||||
Span<GPUAttachmentState> color_attachment_states) override;
|
||||
|
||||
public:
|
||||
void apply_state();
|
||||
|
@ -472,6 +472,27 @@ void MTLFrameBuffer::clear_attachment(GPUAttachmentType type,
|
||||
this->force_clear();
|
||||
}
|
||||
}
|
||||
void MTLFrameBuffer::subpass_transition_impl(const GPUAttachmentState /*depth_attachment_state*/,
|
||||
Span<GPUAttachmentState> color_attachment_states)
|
||||
{
|
||||
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
|
||||
if (!is_tile_based_arch) {
|
||||
/* Break renderpass if tile memory is unsupported to ensure current framebuffer results are
|
||||
* stored. */
|
||||
context_->main_command_buffer.end_active_command_encoder();
|
||||
|
||||
/* Bind framebuffer attachments as textures.
|
||||
* NOTE: Follows behaviour of gl_framebuffer. However, shaders utilising subpass_in will
|
||||
* need to avoid bindpoint collisions for image/texture resources. */
|
||||
for (int i : color_attachment_states.index_range()) {
|
||||
GPUAttachmentType type = GPU_FB_COLOR_ATTACHMENT0 + i;
|
||||
GPUTexture *attach_tex = this->attachments_[type].tex;
|
||||
if (color_attachment_states[i] == GPU_ATTACHEMENT_READ) {
|
||||
GPU_texture_image_bind(attach_tex, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MTLFrameBuffer::read(eGPUFrameBufferBits planes,
|
||||
eGPUDataFormat format,
|
||||
|
@ -414,6 +414,7 @@ class MSLGeneratorInterface {
|
||||
blender::Vector<MSLConstant> constants;
|
||||
/* Fragment tile inputs. */
|
||||
blender::Vector<MSLFragmentTileInputAttribute> fragment_tile_inputs;
|
||||
bool supports_native_tile_inputs;
|
||||
/* Should match vertex outputs, but defined separately as
|
||||
* some shader permutations will not utilize all inputs/outputs.
|
||||
* Final shader uses the intersection between the two sets. */
|
||||
|
@ -2089,6 +2089,16 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
|
||||
fragment_outputs.append(mtl_frag_out);
|
||||
}
|
||||
|
||||
/** Identify support for tile inputs. */
|
||||
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
|
||||
if (is_tile_based_arch) {
|
||||
supports_native_tile_inputs = true;
|
||||
}
|
||||
else {
|
||||
/* NOTE: If emulating tile input reads, we must ensure we also expose position data. */
|
||||
supports_native_tile_inputs = false;
|
||||
}
|
||||
|
||||
/* Fragment tile inputs. */
|
||||
for (const shader::ShaderCreateInfo::SubpassIn &frag_tile_in : create_info_->subpass_inputs_) {
|
||||
|
||||
@ -2107,6 +2117,51 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
|
||||
mtl_frag_in.raster_order_group = frag_tile_in.raster_order_group;
|
||||
|
||||
fragment_tile_inputs.append(mtl_frag_in);
|
||||
|
||||
/* If we do not support native tile inputs, generate an image-binding per input. */
|
||||
if (!supports_native_tile_inputs) {
|
||||
/* Determine type: */
|
||||
bool is_layered_fb = bool(create_info_->builtins_ & BuiltinBits::LAYER);
|
||||
/* Start with invalid value to detect failure cases. */
|
||||
ImageType image_type = ImageType::FLOAT_BUFFER;
|
||||
switch (frag_tile_in.type) {
|
||||
case Type::FLOAT:
|
||||
image_type = is_layered_fb ? ImageType::FLOAT_2D_ARRAY : ImageType::FLOAT_2D;
|
||||
break;
|
||||
case Type::INT:
|
||||
image_type = is_layered_fb ? ImageType::INT_2D_ARRAY : ImageType::INT_2D;
|
||||
break;
|
||||
case Type::UINT:
|
||||
image_type = is_layered_fb ? ImageType::UINT_2D_ARRAY : ImageType::UINT_2D;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
BLI_assert(image_type != ImageType::FLOAT_BUFFER);
|
||||
|
||||
/* Generate texture binding resource. */
|
||||
MSLTextureResource msl_image;
|
||||
msl_image.stage = ShaderStage::FRAGMENT;
|
||||
msl_image.type = image_type;
|
||||
msl_image.name = frag_tile_in.name + "_subpass_img";
|
||||
msl_image.access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ;
|
||||
msl_image.slot = texture_slot_id++;
|
||||
/* WATCH: We don't have a great place to generate the image bindings.
|
||||
* So we will use the subpass binding index and check if it collides with an existing
|
||||
* binding. */
|
||||
msl_image.location = frag_tile_in.index;
|
||||
msl_image.is_texture_sampler = false;
|
||||
BLI_assert(msl_image.slot < MTL_MAX_TEXTURE_SLOTS);
|
||||
BLI_assert(msl_image.location < MTL_MAX_TEXTURE_SLOTS);
|
||||
|
||||
/* Check existing samplers. */
|
||||
for (const auto &tex : texture_samplers) {
|
||||
BLI_assert(tex.location != msl_image.location);
|
||||
}
|
||||
|
||||
texture_samplers.append(msl_image);
|
||||
max_tex_bind_index = max_ii(max_tex_bind_index, msl_image.slot);
|
||||
}
|
||||
}
|
||||
|
||||
/* Transform feedback. */
|
||||
@ -3043,10 +3098,32 @@ std::string MSLGeneratorInterface::generate_msl_global_uniform_population(Shader
|
||||
std::string MSLGeneratorInterface::generate_msl_fragment_tile_input_population()
|
||||
{
|
||||
std::stringstream out;
|
||||
for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) {
|
||||
out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." << tile_input.name
|
||||
<< " = "
|
||||
<< "fragment_tile_in." << tile_input.name << ";" << std::endl;
|
||||
|
||||
/* Native tile read is supported on tile-based architectures (Apple Silicon). */
|
||||
if (supports_native_tile_inputs) {
|
||||
for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) {
|
||||
out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "."
|
||||
<< tile_input.name << " = "
|
||||
<< "fragment_tile_in." << tile_input.name << ";" << std::endl;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* TODO: Read from generated images. */
|
||||
fclem marked this conversation as resolved
Clément Foucault
commented
What does this TODO refers to? What does this TODO refers to?
|
||||
for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) {
|
||||
/* Get read swizzle mask. */
|
||||
char swizzle[] = "xyzw";
|
||||
swizzle[to_component_count(tile_input.type)] = '\0';
|
||||
|
||||
bool is_layered_fb = bool(create_info_->builtins_ & BuiltinBits::LAYER);
|
||||
std::string texel_co = (is_layered_fb) ?
|
||||
"ivec3(ivec2(v_in._default_position_.xy), int(v_in.gpu_Layer))" :
|
||||
"ivec2(v_in._default_position_.xy)";
|
||||
|
||||
out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "."
|
||||
<< tile_input.name << " = texelFetch("
|
||||
<< get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." << tile_input.name
|
||||
<< "_subpass_img, " << texel_co << ", 0)." << swizzle << ";\n";
|
||||
}
|
||||
}
|
||||
return out.str();
|
||||
}
|
||||
|
@ -144,62 +144,6 @@ static const char *to_string(const Type &type)
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
static int to_component_count(const Type &type)
|
||||
{
|
||||
switch (type) {
|
||||
case Type::FLOAT:
|
||||
case Type::UINT:
|
||||
case Type::INT:
|
||||
case Type::BOOL:
|
||||
return 1;
|
||||
case Type::VEC2:
|
||||
case Type::UVEC2:
|
||||
case Type::IVEC2:
|
||||
return 2;
|
||||
case Type::VEC3:
|
||||
case Type::UVEC3:
|
||||
case Type::IVEC3:
|
||||
return 3;
|
||||
case Type::VEC4:
|
||||
case Type::UVEC4:
|
||||
case Type::IVEC4:
|
||||
return 4;
|
||||
case Type::MAT3:
|
||||
return 9;
|
||||
case Type::MAT4:
|
||||
return 16;
|
||||
/* Alias special types. */
|
||||
case Type::UCHAR:
|
||||
case Type::USHORT:
|
||||
return 1;
|
||||
case Type::UCHAR2:
|
||||
case Type::USHORT2:
|
||||
return 2;
|
||||
case Type::UCHAR3:
|
||||
case Type::USHORT3:
|
||||
return 3;
|
||||
case Type::UCHAR4:
|
||||
case Type::USHORT4:
|
||||
return 4;
|
||||
case Type::CHAR:
|
||||
case Type::SHORT:
|
||||
return 1;
|
||||
case Type::CHAR2:
|
||||
case Type::SHORT2:
|
||||
return 2;
|
||||
case Type::CHAR3:
|
||||
case Type::SHORT3:
|
||||
return 3;
|
||||
case Type::CHAR4:
|
||||
case Type::SHORT4:
|
||||
return 4;
|
||||
case Type::VEC3_101010I2:
|
||||
return 3;
|
||||
}
|
||||
BLI_assert_unreachable();
|
||||
return -1;
|
||||
}
|
||||
|
||||
static Type to_component_type(const Type &type)
|
||||
{
|
||||
switch (type) {
|
||||
|
Loading…
Reference in New Issue
Block a user
Use
BLI_INLINE
Currently generates many compilation warning when used on backends that don't use this function.Thanks Jeroen, will resolve!
Good to confirm shadows working on Intel GPUs too. Will be performing a test pass on Intel later today to see where we are at overall.