diff --git a/source/blender/gpu/intern/gpu_shader_create_info.hh b/source/blender/gpu/intern/gpu_shader_create_info.hh index 9d421b1e388..900c635a927 100644 --- a/source/blender/gpu/intern/gpu_shader_create_info.hh +++ b/source/blender/gpu/intern/gpu_shader_create_info.hh @@ -74,6 +74,62 @@ enum class Type { SHORT4 }; +BLI_INLINE int to_component_count(const Type &type) +{ + switch (type) { + case Type::FLOAT: + case Type::UINT: + case Type::INT: + case Type::BOOL: + return 1; + case Type::VEC2: + case Type::UVEC2: + case Type::IVEC2: + return 2; + case Type::VEC3: + case Type::UVEC3: + case Type::IVEC3: + return 3; + case Type::VEC4: + case Type::UVEC4: + case Type::IVEC4: + return 4; + case Type::MAT3: + return 9; + case Type::MAT4: + return 16; + /* Alias special types. */ + case Type::UCHAR: + case Type::USHORT: + return 1; + case Type::UCHAR2: + case Type::USHORT2: + return 2; + case Type::UCHAR3: + case Type::USHORT3: + return 3; + case Type::UCHAR4: + case Type::USHORT4: + return 4; + case Type::CHAR: + case Type::SHORT: + return 1; + case Type::CHAR2: + case Type::SHORT2: + return 2; + case Type::CHAR3: + case Type::SHORT3: + return 3; + case Type::CHAR4: + case Type::SHORT4: + return 4; + case Type::VEC3_101010I2: + return 3; + } + BLI_assert_unreachable(); + return -1; +} + /* All of these functions is a bit out of place */ static inline Type to_type(const eGPUType type) { diff --git a/source/blender/gpu/metal/mtl_framebuffer.hh b/source/blender/gpu/metal/mtl_framebuffer.hh index 7d423fc163a..24f3a517526 100644 --- a/source/blender/gpu/metal/mtl_framebuffer.hh +++ b/source/blender/gpu/metal/mtl_framebuffer.hh @@ -155,7 +155,7 @@ class MTLFrameBuffer : public FrameBuffer { protected: void subpass_transition_impl(const GPUAttachmentState /*depth_attachment_state*/, - Span /*color_attachment_states*/) override{}; + Span color_attachment_states) override; public: void apply_state(); diff --git a/source/blender/gpu/metal/mtl_framebuffer.mm b/source/blender/gpu/metal/mtl_framebuffer.mm index c68789c73ab..964dd0335c3 100644 --- a/source/blender/gpu/metal/mtl_framebuffer.mm +++ b/source/blender/gpu/metal/mtl_framebuffer.mm @@ -472,6 +472,27 @@ void MTLFrameBuffer::clear_attachment(GPUAttachmentType type, this->force_clear(); } } +void MTLFrameBuffer::subpass_transition_impl(const GPUAttachmentState /*depth_attachment_state*/, + Span color_attachment_states) +{ + const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR); + if (!is_tile_based_arch) { + /* Break renderpass if tile memory is unsupported to ensure current framebuffer results are + * stored. */ + context_->main_command_buffer.end_active_command_encoder(); + + /* Bind framebuffer attachments as textures. + * NOTE: Follows behaviour of gl_framebuffer. However, shaders utilising subpass_in will + * need to avoid bindpoint collisions for image/texture resources. */ + for (int i : color_attachment_states.index_range()) { + GPUAttachmentType type = GPU_FB_COLOR_ATTACHMENT0 + i; + GPUTexture *attach_tex = this->attachments_[type].tex; + if (color_attachment_states[i] == GPU_ATTACHEMENT_READ) { + GPU_texture_image_bind(attach_tex, i); + } + } + } +} void MTLFrameBuffer::read(eGPUFrameBufferBits planes, eGPUDataFormat format, diff --git a/source/blender/gpu/metal/mtl_shader_generator.hh b/source/blender/gpu/metal/mtl_shader_generator.hh index 86d39fa8a0d..9d632788008 100644 --- a/source/blender/gpu/metal/mtl_shader_generator.hh +++ b/source/blender/gpu/metal/mtl_shader_generator.hh @@ -414,6 +414,7 @@ class MSLGeneratorInterface { blender::Vector constants; /* Fragment tile inputs. */ blender::Vector fragment_tile_inputs; + bool supports_native_tile_inputs; /* Should match vertex outputs, but defined separately as * some shader permutations will not utilize all inputs/outputs. * Final shader uses the intersection between the two sets. */ diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm index 111caff40af..6393780150b 100644 --- a/source/blender/gpu/metal/mtl_shader_generator.mm +++ b/source/blender/gpu/metal/mtl_shader_generator.mm @@ -2089,6 +2089,16 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn fragment_outputs.append(mtl_frag_out); } + /** Identify support for tile inputs. */ + const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR); + if (is_tile_based_arch) { + supports_native_tile_inputs = true; + } + else { + /* NOTE: If emulating tile input reads, we must ensure we also expose position data. */ + supports_native_tile_inputs = false; + } + /* Fragment tile inputs. */ for (const shader::ShaderCreateInfo::SubpassIn &frag_tile_in : create_info_->subpass_inputs_) { @@ -2107,6 +2117,51 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn mtl_frag_in.raster_order_group = frag_tile_in.raster_order_group; fragment_tile_inputs.append(mtl_frag_in); + + /* If we do not support native tile inputs, generate an image-binding per input. */ + if (!supports_native_tile_inputs) { + /* Determine type: */ + bool is_layered_fb = bool(create_info_->builtins_ & BuiltinBits::LAYER); + /* Start with invalid value to detect failure cases. */ + ImageType image_type = ImageType::FLOAT_BUFFER; + switch (frag_tile_in.type) { + case Type::FLOAT: + image_type = is_layered_fb ? ImageType::FLOAT_2D_ARRAY : ImageType::FLOAT_2D; + break; + case Type::INT: + image_type = is_layered_fb ? ImageType::INT_2D_ARRAY : ImageType::INT_2D; + break; + case Type::UINT: + image_type = is_layered_fb ? ImageType::UINT_2D_ARRAY : ImageType::UINT_2D; + break; + default: + break; + } + BLI_assert(image_type != ImageType::FLOAT_BUFFER); + + /* Generate texture binding resource. */ + MSLTextureResource msl_image; + msl_image.stage = ShaderStage::FRAGMENT; + msl_image.type = image_type; + msl_image.name = frag_tile_in.name + "_subpass_img"; + msl_image.access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ; + msl_image.slot = texture_slot_id++; + /* WATCH: We don't have a great place to generate the image bindings. + * So we will use the subpass binding index and check if it collides with an existing + * binding. */ + msl_image.location = frag_tile_in.index; + msl_image.is_texture_sampler = false; + BLI_assert(msl_image.slot < MTL_MAX_TEXTURE_SLOTS); + BLI_assert(msl_image.location < MTL_MAX_TEXTURE_SLOTS); + + /* Check existing samplers. */ + for (const auto &tex : texture_samplers) { + BLI_assert(tex.location != msl_image.location); + } + + texture_samplers.append(msl_image); + max_tex_bind_index = max_ii(max_tex_bind_index, msl_image.slot); + } } /* Transform feedback. */ @@ -3043,10 +3098,31 @@ std::string MSLGeneratorInterface::generate_msl_global_uniform_population(Shader std::string MSLGeneratorInterface::generate_msl_fragment_tile_input_population() { std::stringstream out; - for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) { - out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." << tile_input.name - << " = " - << "fragment_tile_in." << tile_input.name << ";" << std::endl; + + /* Native tile read is supported on tile-based architectures (Apple Silicon). */ + if (supports_native_tile_inputs) { + for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) { + out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." + << tile_input.name << " = " + << "fragment_tile_in." << tile_input.name << ";" << std::endl; + } + } + else { + for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) { + /* Get read swizzle mask. */ + char swizzle[] = "xyzw"; + swizzle[to_component_count(tile_input.type)] = '\0'; + + bool is_layered_fb = bool(create_info_->builtins_ & BuiltinBits::LAYER); + std::string texel_co = (is_layered_fb) ? + "ivec3(ivec2(v_in._default_position_.xy), int(v_in.gpu_Layer))" : + "ivec2(v_in._default_position_.xy)"; + + out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." + << tile_input.name << " = texelFetch(" + << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." << tile_input.name + << "_subpass_img, " << texel_co << ", 0)." << swizzle << ";\n"; + } } return out.str(); } diff --git a/source/blender/gpu/opengl/gl_shader.cc b/source/blender/gpu/opengl/gl_shader.cc index 4318ce26c41..4e3a7cd426c 100644 --- a/source/blender/gpu/opengl/gl_shader.cc +++ b/source/blender/gpu/opengl/gl_shader.cc @@ -144,62 +144,6 @@ static const char *to_string(const Type &type) return "unknown"; } -static int to_component_count(const Type &type) -{ - switch (type) { - case Type::FLOAT: - case Type::UINT: - case Type::INT: - case Type::BOOL: - return 1; - case Type::VEC2: - case Type::UVEC2: - case Type::IVEC2: - return 2; - case Type::VEC3: - case Type::UVEC3: - case Type::IVEC3: - return 3; - case Type::VEC4: - case Type::UVEC4: - case Type::IVEC4: - return 4; - case Type::MAT3: - return 9; - case Type::MAT4: - return 16; - /* Alias special types. */ - case Type::UCHAR: - case Type::USHORT: - return 1; - case Type::UCHAR2: - case Type::USHORT2: - return 2; - case Type::UCHAR3: - case Type::USHORT3: - return 3; - case Type::UCHAR4: - case Type::USHORT4: - return 4; - case Type::CHAR: - case Type::SHORT: - return 1; - case Type::CHAR2: - case Type::SHORT2: - return 2; - case Type::CHAR3: - case Type::SHORT3: - return 3; - case Type::CHAR4: - case Type::SHORT4: - return 4; - case Type::VEC3_101010I2: - return 3; - } - BLI_assert_unreachable(); - return -1; -} - static Type to_component_type(const Type &type) { switch (type) {