From 2df86a6db98c7affa2b35d3220801e3a116ec19f Mon Sep 17 00:00:00 2001 From: Michael Parkin-White Date: Fri, 22 Mar 2024 12:39:02 +0000 Subject: [PATCH 1/3] Metal: Add AMD support for subpass transition Adds support for subpass transition for AMD/Intel IMR GPUs. This enables correct functioning of EEVEE Next deferred lighting pass on AMD platforms. The emulation is consistent with the OpenGL approach of generating additional texture bindings in the shader for subpass inputs, and splitting render passes across sub-pass boundaries. Authored by Apple: Michael Parkin-White --- .../gpu/intern/gpu_shader_create_info.hh | 56 ++++++++++++ source/blender/gpu/metal/mtl_framebuffer.hh | 2 +- source/blender/gpu/metal/mtl_framebuffer.mm | 21 +++++ .../blender/gpu/metal/mtl_shader_generator.hh | 1 + .../blender/gpu/metal/mtl_shader_generator.mm | 85 ++++++++++++++++++- source/blender/gpu/opengl/gl_shader.cc | 56 ------------ 6 files changed, 160 insertions(+), 61 deletions(-) diff --git a/source/blender/gpu/intern/gpu_shader_create_info.hh b/source/blender/gpu/intern/gpu_shader_create_info.hh index 95dacd65956..e28e94aca49 100644 --- a/source/blender/gpu/intern/gpu_shader_create_info.hh +++ b/source/blender/gpu/intern/gpu_shader_create_info.hh @@ -74,6 +74,62 @@ enum class Type { SHORT4 }; +static int to_component_count(const Type &type) +{ + switch (type) { + case Type::FLOAT: + case Type::UINT: + case Type::INT: + case Type::BOOL: + return 1; + case Type::VEC2: + case Type::UVEC2: + case Type::IVEC2: + return 2; + case Type::VEC3: + case Type::UVEC3: + case Type::IVEC3: + return 3; + case Type::VEC4: + case Type::UVEC4: + case Type::IVEC4: + return 4; + case Type::MAT3: + return 9; + case Type::MAT4: + return 16; + /* Alias special types. */ + case Type::UCHAR: + case Type::USHORT: + return 1; + case Type::UCHAR2: + case Type::USHORT2: + return 2; + case Type::UCHAR3: + case Type::USHORT3: + return 3; + case Type::UCHAR4: + case Type::USHORT4: + return 4; + case Type::CHAR: + case Type::SHORT: + return 1; + case Type::CHAR2: + case Type::SHORT2: + return 2; + case Type::CHAR3: + case Type::SHORT3: + return 3; + case Type::CHAR4: + case Type::SHORT4: + return 4; + case Type::VEC3_101010I2: + return 3; + } + BLI_assert_unreachable(); + return -1; +} + /* All of these functions is a bit out of place */ static inline Type to_type(const eGPUType type) { diff --git a/source/blender/gpu/metal/mtl_framebuffer.hh b/source/blender/gpu/metal/mtl_framebuffer.hh index 1a16c63fb53..fa44d6bcdea 100644 --- a/source/blender/gpu/metal/mtl_framebuffer.hh +++ b/source/blender/gpu/metal/mtl_framebuffer.hh @@ -155,7 +155,7 @@ class MTLFrameBuffer : public FrameBuffer { protected: void subpass_transition_impl(const GPUAttachmentState /*depth_attachment_state*/, - Span /*color_attachment_states*/) override{}; + Span color_attachment_states) override; public: void apply_state(); diff --git a/source/blender/gpu/metal/mtl_framebuffer.mm b/source/blender/gpu/metal/mtl_framebuffer.mm index c68789c73ab..964dd0335c3 100644 --- a/source/blender/gpu/metal/mtl_framebuffer.mm +++ b/source/blender/gpu/metal/mtl_framebuffer.mm @@ -472,6 +472,27 @@ void MTLFrameBuffer::clear_attachment(GPUAttachmentType type, this->force_clear(); } } +void MTLFrameBuffer::subpass_transition_impl(const GPUAttachmentState /*depth_attachment_state*/, + Span color_attachment_states) +{ + const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR); + if (!is_tile_based_arch) { + /* Break renderpass if tile memory is unsupported to ensure current framebuffer results are + * stored. */ + context_->main_command_buffer.end_active_command_encoder(); + + /* Bind framebuffer attachments as textures. + * NOTE: Follows behaviour of gl_framebuffer. However, shaders utilising subpass_in will + * need to avoid bindpoint collisions for image/texture resources. */ + for (int i : color_attachment_states.index_range()) { + GPUAttachmentType type = GPU_FB_COLOR_ATTACHMENT0 + i; + GPUTexture *attach_tex = this->attachments_[type].tex; + if (color_attachment_states[i] == GPU_ATTACHEMENT_READ) { + GPU_texture_image_bind(attach_tex, i); + } + } + } +} void MTLFrameBuffer::read(eGPUFrameBufferBits planes, eGPUDataFormat format, diff --git a/source/blender/gpu/metal/mtl_shader_generator.hh b/source/blender/gpu/metal/mtl_shader_generator.hh index e74a8eee476..89caa7d2a80 100644 --- a/source/blender/gpu/metal/mtl_shader_generator.hh +++ b/source/blender/gpu/metal/mtl_shader_generator.hh @@ -414,6 +414,7 @@ class MSLGeneratorInterface { blender::Vector constants; /* Fragment tile inputs. */ blender::Vector fragment_tile_inputs; + bool supports_native_tile_inputs; /* Should match vertex outputs, but defined separately as * some shader permutations will not utilize all inputs/outputs. * Final shader uses the intersection between the two sets. */ diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm index d9b17597364..e642755bdaa 100644 --- a/source/blender/gpu/metal/mtl_shader_generator.mm +++ b/source/blender/gpu/metal/mtl_shader_generator.mm @@ -2089,6 +2089,16 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn fragment_outputs.append(mtl_frag_out); } + /** Identify support for tile inputs. */ + const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR); + if (is_tile_based_arch) { + supports_native_tile_inputs = true; + } + else { + /* NOTE: If emulating tile input reads, we must ensure we also expose position data. */ + supports_native_tile_inputs = false; + } + /* Fragment tile inputs. */ for (const shader::ShaderCreateInfo::SubpassIn &frag_tile_in : create_info_->subpass_inputs_) { @@ -2107,6 +2117,51 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn mtl_frag_in.raster_order_group = frag_tile_in.raster_order_group; fragment_tile_inputs.append(mtl_frag_in); + + /* If we do not support native tile inputs, generate an image-binding per input. */ + if (!supports_native_tile_inputs) { + /* Determine type: */ + bool is_layered_fb = bool(create_info_->builtins_ & BuiltinBits::LAYER); + /* Start with invalid value to detect failure cases. */ + ImageType image_type = ImageType::FLOAT_BUFFER; + switch (frag_tile_in.type) { + case Type::FLOAT: + image_type = is_layered_fb ? ImageType::FLOAT_2D_ARRAY : ImageType::FLOAT_2D; + break; + case Type::INT: + image_type = is_layered_fb ? ImageType::INT_2D_ARRAY : ImageType::INT_2D; + break; + case Type::UINT: + image_type = is_layered_fb ? ImageType::UINT_2D_ARRAY : ImageType::UINT_2D; + break; + default: + break; + } + BLI_assert(image_type != ImageType::FLOAT_BUFFER); + + /* Generate texture binding resource. */ + MSLTextureResource msl_image; + msl_image.stage = ShaderStage::FRAGMENT; + msl_image.type = image_type; + msl_image.name = frag_tile_in.name + "_subpass_img"; + msl_image.access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ; + msl_image.slot = texture_slot_id++; + /* WATCH: We don't have a great place to generate the image bindings. + * So we will use the subpass binding index and check if it collides with an existing + * binding. */ + msl_image.location = frag_tile_in.index; + msl_image.is_texture_sampler = false; + BLI_assert(msl_image.slot < MTL_MAX_TEXTURE_SLOTS); + BLI_assert(msl_image.location < MTL_MAX_TEXTURE_SLOTS); + + /* Check existing samplers. */ + for (const auto &tex : texture_samplers) { + BLI_assert(tex.location != msl_image.location); + } + + texture_samplers.append(msl_image); + max_tex_bind_index = max_ii(max_tex_bind_index, msl_image.slot); + } } /* Transform feedback. */ @@ -3043,10 +3098,32 @@ std::string MSLGeneratorInterface::generate_msl_global_uniform_population(Shader std::string MSLGeneratorInterface::generate_msl_fragment_tile_input_population() { std::stringstream out; - for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) { - out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." << tile_input.name - << " = " - << "fragment_tile_in." << tile_input.name << ";" << std::endl; + + /* Native tile read is supported on tile-based architectures (Apple Silicon). */ + if (supports_native_tile_inputs) { + for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) { + out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." + << tile_input.name << " = " + << "fragment_tile_in." << tile_input.name << ";" << std::endl; + } + } + else { + /* TODO: Read from generated images. */ + for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) { + /* Get read swizzle mask. */ + char swizzle[] = "xyzw"; + swizzle[to_component_count(tile_input.type)] = '\0'; + + bool is_layered_fb = bool(create_info_->builtins_ & BuiltinBits::LAYER); + std::string texel_co = (is_layered_fb) ? + "ivec3(ivec2(v_in._default_position_.xy), int(v_in.gpu_Layer))" : + "ivec2(v_in._default_position_.xy)"; + + out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." + << tile_input.name << " = texelFetch(" + << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." << tile_input.name + << "_subpass_img, " << texel_co << ", 0)." << swizzle << ";\n"; + } } return out.str(); } diff --git a/source/blender/gpu/opengl/gl_shader.cc b/source/blender/gpu/opengl/gl_shader.cc index 83d9c519c30..864f6c8bf53 100644 --- a/source/blender/gpu/opengl/gl_shader.cc +++ b/source/blender/gpu/opengl/gl_shader.cc @@ -144,62 +144,6 @@ static const char *to_string(const Type &type) return "unknown"; } -static int to_component_count(const Type &type) -{ - switch (type) { - case Type::FLOAT: - case Type::UINT: - case Type::INT: - case Type::BOOL: - return 1; - case Type::VEC2: - case Type::UVEC2: - case Type::IVEC2: - return 2; - case Type::VEC3: - case Type::UVEC3: - case Type::IVEC3: - return 3; - case Type::VEC4: - case Type::UVEC4: - case Type::IVEC4: - return 4; - case Type::MAT3: - return 9; - case Type::MAT4: - return 16; - /* Alias special types. */ - case Type::UCHAR: - case Type::USHORT: - return 1; - case Type::UCHAR2: - case Type::USHORT2: - return 2; - case Type::UCHAR3: - case Type::USHORT3: - return 3; - case Type::UCHAR4: - case Type::USHORT4: - return 4; - case Type::CHAR: - case Type::SHORT: - return 1; - case Type::CHAR2: - case Type::SHORT2: - return 2; - case Type::CHAR3: - case Type::SHORT3: - return 3; - case Type::CHAR4: - case Type::SHORT4: - return 4; - case Type::VEC3_101010I2: - return 3; - } - BLI_assert_unreachable(); - return -1; -} - static Type to_component_type(const Type &type) { switch (type) { -- 2.30.2 From 5ca3e0cb7ea203df2a6c49e69edefc4dd4d81fb1 Mon Sep 17 00:00:00 2001 From: Michael Parkin-White Date: Fri, 22 Mar 2024 13:53:43 +0000 Subject: [PATCH 2/3] Remove leftover TODO --- source/blender/gpu/metal/mtl_shader_generator.mm | 1 - 1 file changed, 1 deletion(-) diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm index e642755bdaa..8b1a02ce421 100644 --- a/source/blender/gpu/metal/mtl_shader_generator.mm +++ b/source/blender/gpu/metal/mtl_shader_generator.mm @@ -3108,7 +3108,6 @@ std::string MSLGeneratorInterface::generate_msl_fragment_tile_input_population() } } else { - /* TODO: Read from generated images. */ for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) { /* Get read swizzle mask. */ char swizzle[] = "xyzw"; -- 2.30.2 From d5e139b084ef6efa1bbd88103f78fd1f1ce534bd Mon Sep 17 00:00:00 2001 From: Michael Parkin-White Date: Thu, 11 Apr 2024 13:53:05 +0100 Subject: [PATCH 3/3] Replace static function with BLI_INLINE to reduce compiler warnings. --- source/blender/gpu/intern/gpu_shader_create_info.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/blender/gpu/intern/gpu_shader_create_info.hh b/source/blender/gpu/intern/gpu_shader_create_info.hh index 414a0c19f3e..900c635a927 100644 --- a/source/blender/gpu/intern/gpu_shader_create_info.hh +++ b/source/blender/gpu/intern/gpu_shader_create_info.hh @@ -74,7 +74,7 @@ enum class Type { SHORT4 }; -static int to_component_count(const Type &type) +BLI_INLINE int to_component_count(const Type &type) { switch (type) { case Type::FLOAT: -- 2.30.2