Metal: Add AMD support for subpass transition #119784
|
@ -74,6 +74,62 @@ enum class Type {
|
||||||
SHORT4
|
SHORT4
|
||||||
};
|
};
|
||||||
|
|
||||||
|
BLI_INLINE int to_component_count(const Type &type)
|
||||||
|
|||||||
|
{
|
||||||
|
switch (type) {
|
||||||
|
case Type::FLOAT:
|
||||||
|
case Type::UINT:
|
||||||
|
case Type::INT:
|
||||||
|
case Type::BOOL:
|
||||||
|
return 1;
|
||||||
|
case Type::VEC2:
|
||||||
|
case Type::UVEC2:
|
||||||
|
case Type::IVEC2:
|
||||||
|
return 2;
|
||||||
|
case Type::VEC3:
|
||||||
|
case Type::UVEC3:
|
||||||
|
case Type::IVEC3:
|
||||||
|
return 3;
|
||||||
|
case Type::VEC4:
|
||||||
|
case Type::UVEC4:
|
||||||
|
case Type::IVEC4:
|
||||||
|
return 4;
|
||||||
|
case Type::MAT3:
|
||||||
|
return 9;
|
||||||
|
case Type::MAT4:
|
||||||
|
return 16;
|
||||||
|
/* Alias special types. */
|
||||||
|
case Type::UCHAR:
|
||||||
|
case Type::USHORT:
|
||||||
|
return 1;
|
||||||
|
case Type::UCHAR2:
|
||||||
|
case Type::USHORT2:
|
||||||
|
return 2;
|
||||||
|
case Type::UCHAR3:
|
||||||
|
case Type::USHORT3:
|
||||||
|
return 3;
|
||||||
|
case Type::UCHAR4:
|
||||||
|
case Type::USHORT4:
|
||||||
|
return 4;
|
||||||
|
case Type::CHAR:
|
||||||
|
case Type::SHORT:
|
||||||
|
return 1;
|
||||||
|
case Type::CHAR2:
|
||||||
|
case Type::SHORT2:
|
||||||
|
return 2;
|
||||||
|
case Type::CHAR3:
|
||||||
|
case Type::SHORT3:
|
||||||
|
return 3;
|
||||||
|
case Type::CHAR4:
|
||||||
|
case Type::SHORT4:
|
||||||
|
return 4;
|
||||||
|
case Type::VEC3_101010I2:
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
BLI_assert_unreachable();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
/* All of these functions is a bit out of place */
|
/* All of these functions is a bit out of place */
|
||||||
static inline Type to_type(const eGPUType type)
|
static inline Type to_type(const eGPUType type)
|
||||||
{
|
{
|
||||||
|
|
|
@ -155,7 +155,7 @@ class MTLFrameBuffer : public FrameBuffer {
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void subpass_transition_impl(const GPUAttachmentState /*depth_attachment_state*/,
|
void subpass_transition_impl(const GPUAttachmentState /*depth_attachment_state*/,
|
||||||
Span<GPUAttachmentState> /*color_attachment_states*/) override{};
|
Span<GPUAttachmentState> color_attachment_states) override;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void apply_state();
|
void apply_state();
|
||||||
|
|
|
@ -472,6 +472,27 @@ void MTLFrameBuffer::clear_attachment(GPUAttachmentType type,
|
||||||
this->force_clear();
|
this->force_clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void MTLFrameBuffer::subpass_transition_impl(const GPUAttachmentState /*depth_attachment_state*/,
|
||||||
|
Span<GPUAttachmentState> color_attachment_states)
|
||||||
|
{
|
||||||
|
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
|
||||||
|
if (!is_tile_based_arch) {
|
||||||
|
/* Break renderpass if tile memory is unsupported to ensure current framebuffer results are
|
||||||
|
* stored. */
|
||||||
|
context_->main_command_buffer.end_active_command_encoder();
|
||||||
|
|
||||||
|
/* Bind framebuffer attachments as textures.
|
||||||
|
* NOTE: Follows behaviour of gl_framebuffer. However, shaders utilising subpass_in will
|
||||||
|
* need to avoid bindpoint collisions for image/texture resources. */
|
||||||
|
for (int i : color_attachment_states.index_range()) {
|
||||||
|
GPUAttachmentType type = GPU_FB_COLOR_ATTACHMENT0 + i;
|
||||||
|
GPUTexture *attach_tex = this->attachments_[type].tex;
|
||||||
|
if (color_attachment_states[i] == GPU_ATTACHEMENT_READ) {
|
||||||
|
GPU_texture_image_bind(attach_tex, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void MTLFrameBuffer::read(eGPUFrameBufferBits planes,
|
void MTLFrameBuffer::read(eGPUFrameBufferBits planes,
|
||||||
eGPUDataFormat format,
|
eGPUDataFormat format,
|
||||||
|
|
|
@ -414,6 +414,7 @@ class MSLGeneratorInterface {
|
||||||
blender::Vector<MSLConstant> constants;
|
blender::Vector<MSLConstant> constants;
|
||||||
/* Fragment tile inputs. */
|
/* Fragment tile inputs. */
|
||||||
blender::Vector<MSLFragmentTileInputAttribute> fragment_tile_inputs;
|
blender::Vector<MSLFragmentTileInputAttribute> fragment_tile_inputs;
|
||||||
|
bool supports_native_tile_inputs;
|
||||||
/* Should match vertex outputs, but defined separately as
|
/* Should match vertex outputs, but defined separately as
|
||||||
* some shader permutations will not utilize all inputs/outputs.
|
* some shader permutations will not utilize all inputs/outputs.
|
||||||
* Final shader uses the intersection between the two sets. */
|
* Final shader uses the intersection between the two sets. */
|
||||||
|
|
|
@ -2089,6 +2089,16 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
|
||||||
fragment_outputs.append(mtl_frag_out);
|
fragment_outputs.append(mtl_frag_out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Identify support for tile inputs. */
|
||||||
|
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
|
||||||
|
if (is_tile_based_arch) {
|
||||||
|
supports_native_tile_inputs = true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* NOTE: If emulating tile input reads, we must ensure we also expose position data. */
|
||||||
|
supports_native_tile_inputs = false;
|
||||||
|
}
|
||||||
|
|
||||||
/* Fragment tile inputs. */
|
/* Fragment tile inputs. */
|
||||||
for (const shader::ShaderCreateInfo::SubpassIn &frag_tile_in : create_info_->subpass_inputs_) {
|
for (const shader::ShaderCreateInfo::SubpassIn &frag_tile_in : create_info_->subpass_inputs_) {
|
||||||
|
|
||||||
|
@ -2107,6 +2117,51 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
|
||||||
mtl_frag_in.raster_order_group = frag_tile_in.raster_order_group;
|
mtl_frag_in.raster_order_group = frag_tile_in.raster_order_group;
|
||||||
|
|
||||||
fragment_tile_inputs.append(mtl_frag_in);
|
fragment_tile_inputs.append(mtl_frag_in);
|
||||||
|
|
||||||
|
/* If we do not support native tile inputs, generate an image-binding per input. */
|
||||||
|
if (!supports_native_tile_inputs) {
|
||||||
|
/* Determine type: */
|
||||||
|
bool is_layered_fb = bool(create_info_->builtins_ & BuiltinBits::LAYER);
|
||||||
|
/* Start with invalid value to detect failure cases. */
|
||||||
|
ImageType image_type = ImageType::FLOAT_BUFFER;
|
||||||
|
switch (frag_tile_in.type) {
|
||||||
|
case Type::FLOAT:
|
||||||
|
image_type = is_layered_fb ? ImageType::FLOAT_2D_ARRAY : ImageType::FLOAT_2D;
|
||||||
|
break;
|
||||||
|
case Type::INT:
|
||||||
|
image_type = is_layered_fb ? ImageType::INT_2D_ARRAY : ImageType::INT_2D;
|
||||||
|
break;
|
||||||
|
case Type::UINT:
|
||||||
|
image_type = is_layered_fb ? ImageType::UINT_2D_ARRAY : ImageType::UINT_2D;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
BLI_assert(image_type != ImageType::FLOAT_BUFFER);
|
||||||
|
|
||||||
|
/* Generate texture binding resource. */
|
||||||
|
MSLTextureResource msl_image;
|
||||||
|
msl_image.stage = ShaderStage::FRAGMENT;
|
||||||
|
msl_image.type = image_type;
|
||||||
|
msl_image.name = frag_tile_in.name + "_subpass_img";
|
||||||
|
msl_image.access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ;
|
||||||
|
msl_image.slot = texture_slot_id++;
|
||||||
|
/* WATCH: We don't have a great place to generate the image bindings.
|
||||||
|
* So we will use the subpass binding index and check if it collides with an existing
|
||||||
|
* binding. */
|
||||||
|
msl_image.location = frag_tile_in.index;
|
||||||
|
msl_image.is_texture_sampler = false;
|
||||||
|
BLI_assert(msl_image.slot < MTL_MAX_TEXTURE_SLOTS);
|
||||||
|
BLI_assert(msl_image.location < MTL_MAX_TEXTURE_SLOTS);
|
||||||
|
|
||||||
|
/* Check existing samplers. */
|
||||||
|
for (const auto &tex : texture_samplers) {
|
||||||
|
BLI_assert(tex.location != msl_image.location);
|
||||||
|
}
|
||||||
|
|
||||||
|
texture_samplers.append(msl_image);
|
||||||
|
max_tex_bind_index = max_ii(max_tex_bind_index, msl_image.slot);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Transform feedback. */
|
/* Transform feedback. */
|
||||||
|
@ -3043,10 +3098,31 @@ std::string MSLGeneratorInterface::generate_msl_global_uniform_population(Shader
|
||||||
std::string MSLGeneratorInterface::generate_msl_fragment_tile_input_population()
|
std::string MSLGeneratorInterface::generate_msl_fragment_tile_input_population()
|
||||||
{
|
{
|
||||||
std::stringstream out;
|
std::stringstream out;
|
||||||
for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) {
|
|
||||||
out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." << tile_input.name
|
/* Native tile read is supported on tile-based architectures (Apple Silicon). */
|
||||||
<< " = "
|
if (supports_native_tile_inputs) {
|
||||||
<< "fragment_tile_in." << tile_input.name << ";" << std::endl;
|
for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) {
|
||||||
|
out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "."
|
||||||
|
<< tile_input.name << " = "
|
||||||
|
<< "fragment_tile_in." << tile_input.name << ";" << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) {
|
||||||
fclem marked this conversation as resolved
Clément Foucault
commented
What does this TODO refers to? What does this TODO refers to?
|
|||||||
|
/* Get read swizzle mask. */
|
||||||
|
char swizzle[] = "xyzw";
|
||||||
|
swizzle[to_component_count(tile_input.type)] = '\0';
|
||||||
|
|
||||||
|
bool is_layered_fb = bool(create_info_->builtins_ & BuiltinBits::LAYER);
|
||||||
|
std::string texel_co = (is_layered_fb) ?
|
||||||
|
"ivec3(ivec2(v_in._default_position_.xy), int(v_in.gpu_Layer))" :
|
||||||
|
"ivec2(v_in._default_position_.xy)";
|
||||||
|
|
||||||
|
out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "."
|
||||||
|
<< tile_input.name << " = texelFetch("
|
||||||
|
<< get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." << tile_input.name
|
||||||
|
<< "_subpass_img, " << texel_co << ", 0)." << swizzle << ";\n";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return out.str();
|
return out.str();
|
||||||
}
|
}
|
||||||
|
|
|
@ -144,62 +144,6 @@ static const char *to_string(const Type &type)
|
||||||
return "unknown";
|
return "unknown";
|
||||||
}
|
}
|
||||||
|
|
||||||
static int to_component_count(const Type &type)
|
|
||||||
{
|
|
||||||
switch (type) {
|
|
||||||
case Type::FLOAT:
|
|
||||||
case Type::UINT:
|
|
||||||
case Type::INT:
|
|
||||||
case Type::BOOL:
|
|
||||||
return 1;
|
|
||||||
case Type::VEC2:
|
|
||||||
case Type::UVEC2:
|
|
||||||
case Type::IVEC2:
|
|
||||||
return 2;
|
|
||||||
case Type::VEC3:
|
|
||||||
case Type::UVEC3:
|
|
||||||
case Type::IVEC3:
|
|
||||||
return 3;
|
|
||||||
case Type::VEC4:
|
|
||||||
case Type::UVEC4:
|
|
||||||
case Type::IVEC4:
|
|
||||||
return 4;
|
|
||||||
case Type::MAT3:
|
|
||||||
return 9;
|
|
||||||
case Type::MAT4:
|
|
||||||
return 16;
|
|
||||||
/* Alias special types. */
|
|
||||||
case Type::UCHAR:
|
|
||||||
case Type::USHORT:
|
|
||||||
return 1;
|
|
||||||
case Type::UCHAR2:
|
|
||||||
case Type::USHORT2:
|
|
||||||
return 2;
|
|
||||||
case Type::UCHAR3:
|
|
||||||
case Type::USHORT3:
|
|
||||||
return 3;
|
|
||||||
case Type::UCHAR4:
|
|
||||||
case Type::USHORT4:
|
|
||||||
return 4;
|
|
||||||
case Type::CHAR:
|
|
||||||
case Type::SHORT:
|
|
||||||
return 1;
|
|
||||||
case Type::CHAR2:
|
|
||||||
case Type::SHORT2:
|
|
||||||
return 2;
|
|
||||||
case Type::CHAR3:
|
|
||||||
case Type::SHORT3:
|
|
||||||
return 3;
|
|
||||||
case Type::CHAR4:
|
|
||||||
case Type::SHORT4:
|
|
||||||
return 4;
|
|
||||||
case Type::VEC3_101010I2:
|
|
||||||
return 3;
|
|
||||||
}
|
|
||||||
BLI_assert_unreachable();
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static Type to_component_type(const Type &type)
|
static Type to_component_type(const Type &type)
|
||||||
{
|
{
|
||||||
switch (type) {
|
switch (type) {
|
||||||
|
|
Loading…
Reference in New Issue
Use
BLI_INLINE
Currently generates many compilation warning when used on backends that don't use this function.Thanks Jeroen, will resolve!
Good to confirm shadows working on Intel GPUs too. Will be performing a test pass on Intel later today to see where we are at overall.