2024-04-11 15:24:05 +02:00 · 2024-04-11 09:14:33 +02:00 · 2024-04-11 12:19:56 +02:00 · 2024-03-22 14:50:05 +01:00 · 2024-03-22 14:52:32 +01:00
6 changed files with 160 additions and 61 deletions
--- a/source/blender/gpu/intern/gpu_shader_create_info.hh
+++ b/source/blender/gpu/intern/gpu_shader_create_info.hh
@ -74,6 +74,62 @@ enum class Type {
  SHORT4
 };

+static int to_component_count(const Type &type)
+{
+  switch (type) {
+    case Type::FLOAT:
+    case Type::UINT:
+    case Type::INT:
+    case Type::BOOL:
+      return 1;
+    case Type::VEC2:
+    case Type::UVEC2:
+    case Type::IVEC2:
+      return 2;
+    case Type::VEC3:
+    case Type::UVEC3:
+    case Type::IVEC3:
+      return 3;
+    case Type::VEC4:
+    case Type::UVEC4:
+    case Type::IVEC4:
+      return 4;
+    case Type::MAT3:
+      return 9;
+    case Type::MAT4:
+      return 16;
+    /* Alias special types. */
+    case Type::UCHAR:
+    case Type::USHORT:
+      return 1;
+    case Type::UCHAR2:
+    case Type::USHORT2:
+      return 2;
+    case Type::UCHAR3:
+    case Type::USHORT3:
+      return 3;
+    case Type::UCHAR4:
+    case Type::USHORT4:
+      return 4;
+    case Type::CHAR:
+    case Type::SHORT:
+      return 1;
+    case Type::CHAR2:
+    case Type::SHORT2:
+      return 2;
+    case Type::CHAR3:
+    case Type::SHORT3:
+      return 3;
+    case Type::CHAR4:
+    case Type::SHORT4:
+      return 4;
+    case Type::VEC3_101010I2:
+      return 3;
+  }
+  BLI_assert_unreachable();
+  return -1;
+}
+
 /* All of these functions is a bit out of place */
 static inline Type to_type(const eGPUType type)
 {
--- a/source/blender/gpu/metal/mtl_framebuffer.hh
+++ b/source/blender/gpu/metal/mtl_framebuffer.hh
@ -155,7 +155,7 @@ class MTLFrameBuffer : public FrameBuffer {

 protected:
  void subpass_transition_impl(const GPUAttachmentState /*depth_attachment_state*/,
-                               Span<GPUAttachmentState> /*color_attachment_states*/) override{};
+                               Span<GPUAttachmentState> color_attachment_states) override;

 public:
  void apply_state();
--- a/source/blender/gpu/metal/mtl_framebuffer.mm
+++ b/source/blender/gpu/metal/mtl_framebuffer.mm
@ -472,6 +472,27 @@ void MTLFrameBuffer::clear_attachment(GPUAttachmentType type,
    this->force_clear();
  }
 }
+void MTLFrameBuffer::subpass_transition_impl(const GPUAttachmentState /*depth_attachment_state*/,
+                                             Span<GPUAttachmentState> color_attachment_states)
+{
+  const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
+  if (!is_tile_based_arch) {
+    /* Break renderpass if tile memory is unsupported to ensure current framebuffer results are
+     * stored. */
+    context_->main_command_buffer.end_active_command_encoder();
+
+    /* Bind framebuffer attachments as textures.
+     * NOTE: Follows behaviour of gl_framebuffer. However, shaders utilising subpass_in will
+     * need to avoid bindpoint collisions for image/texture resources.  */
+    for (int i : color_attachment_states.index_range()) {
+      GPUAttachmentType type = GPU_FB_COLOR_ATTACHMENT0 + i;
+      GPUTexture *attach_tex = this->attachments_[type].tex;
+      if (color_attachment_states[i] == GPU_ATTACHEMENT_READ) {
+        GPU_texture_image_bind(attach_tex, i);
+      }
+    }
+  }
+}

 void MTLFrameBuffer::read(eGPUFrameBufferBits planes,
                          eGPUDataFormat format,
--- a/source/blender/gpu/metal/mtl_shader_generator.hh
+++ b/source/blender/gpu/metal/mtl_shader_generator.hh
@ -414,6 +414,7 @@ class MSLGeneratorInterface {
  blender::Vector<MSLConstant> constants;
  /* Fragment tile inputs. */
  blender::Vector<MSLFragmentTileInputAttribute> fragment_tile_inputs;
+  bool supports_native_tile_inputs;
  /* Should match vertex outputs, but defined separately as
   * some shader permutations will not utilize all inputs/outputs.
   * Final shader uses the intersection between the two sets. */
--- a/source/blender/gpu/metal/mtl_shader_generator.mm
+++ b/source/blender/gpu/metal/mtl_shader_generator.mm
@ -2089,6 +2089,16 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
    fragment_outputs.append(mtl_frag_out);
  }

+  /** Identify support for tile inputs. */
+  const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
+  if (is_tile_based_arch) {
+    supports_native_tile_inputs = true;
+  }
+  else {
+    /* NOTE: If emulating tile input reads, we must ensure we also expose position data. */
+    supports_native_tile_inputs = false;
+  }
+
  /* Fragment tile inputs. */
  for (const shader::ShaderCreateInfo::SubpassIn &frag_tile_in : create_info_->subpass_inputs_) {

@ -2107,6 +2117,51 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
    mtl_frag_in.raster_order_group = frag_tile_in.raster_order_group;

    fragment_tile_inputs.append(mtl_frag_in);
+
+    /* If we do not support native tile inputs, generate an image-binding per input. */
+    if (!supports_native_tile_inputs) {
+      /* Determine type: */
+      bool is_layered_fb = bool(create_info_->builtins_ & BuiltinBits::LAYER);
+      /* Start with invalid value to detect failure cases. */
+      ImageType image_type = ImageType::FLOAT_BUFFER;
+      switch (frag_tile_in.type) {
+        case Type::FLOAT:
+          image_type = is_layered_fb ? ImageType::FLOAT_2D_ARRAY : ImageType::FLOAT_2D;
+          break;
+        case Type::INT:
+          image_type = is_layered_fb ? ImageType::INT_2D_ARRAY : ImageType::INT_2D;
+          break;
+        case Type::UINT:
+          image_type = is_layered_fb ? ImageType::UINT_2D_ARRAY : ImageType::UINT_2D;
+          break;
+        default:
+          break;
+      }
+      BLI_assert(image_type != ImageType::FLOAT_BUFFER);
+
+      /* Generate texture binding resource. */
+      MSLTextureResource msl_image;
+      msl_image.stage = ShaderStage::FRAGMENT;
+      msl_image.type = image_type;
+      msl_image.name = frag_tile_in.name + "_subpass_img";
+      msl_image.access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ;
+      msl_image.slot = texture_slot_id++;
+      /* WATCH: We don't have a great place to generate the image bindings.
+       * So we will use the subpass binding index and check if it collides with an existing
+       * binding. */
+      msl_image.location = frag_tile_in.index;
+      msl_image.is_texture_sampler = false;
+      BLI_assert(msl_image.slot < MTL_MAX_TEXTURE_SLOTS);
+      BLI_assert(msl_image.location < MTL_MAX_TEXTURE_SLOTS);
+
+      /* Check existing samplers. */
+      for (const auto &tex : texture_samplers) {
+        BLI_assert(tex.location != msl_image.location);
+      }
+
+      texture_samplers.append(msl_image);
+      max_tex_bind_index = max_ii(max_tex_bind_index, msl_image.slot);
+    }
  }

  /* Transform feedback. */
@ -3043,10 +3098,32 @@ std::string MSLGeneratorInterface::generate_msl_global_uniform_population(Shader
 std::string MSLGeneratorInterface::generate_msl_fragment_tile_input_population()
 {
  std::stringstream out;
-  for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) {
-    out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." << tile_input.name
-        << " = "
-        << "fragment_tile_in." << tile_input.name << ";" << std::endl;
+
+  /* Native tile read is supported on tile-based architectures (Apple Silicon). */
+  if (supports_native_tile_inputs) {
+    for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) {
+      out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "."
+          << tile_input.name << " = "
+          << "fragment_tile_in." << tile_input.name << ";" << std::endl;
+    }
+  }
+  else {
+    /* TODO: Read from generated images. */
+    for (const MSLFragmentTileInputAttribute &tile_input : this->fragment_tile_inputs) {
+      /* Get read swizzle mask. */
+      char swizzle[] = "xyzw";
+      swizzle[to_component_count(tile_input.type)] = '\0';
+
+      bool is_layered_fb = bool(create_info_->builtins_ & BuiltinBits::LAYER);
+      std::string texel_co = (is_layered_fb) ?
+                                 "ivec3(ivec2(v_in._default_position_.xy), int(v_in.gpu_Layer))" :
+                                 "ivec2(v_in._default_position_.xy)";
+
+      out << "\t" << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "."
+          << tile_input.name << " = texelFetch("
+          << get_shader_stage_instance_name(ShaderStage::FRAGMENT) << "." << tile_input.name
+          << "_subpass_img, " << texel_co << ", 0)." << swizzle << ";\n";
+    }
  }
  return out.str();
 }
--- a/source/blender/gpu/opengl/gl_shader.cc
+++ b/source/blender/gpu/opengl/gl_shader.cc
@ -144,62 +144,6 @@ static const char *to_string(const Type &type)
  return "unknown";
 }

-static int to_component_count(const Type &type)
-{
-  switch (type) {
-    case Type::FLOAT:
-    case Type::UINT:
-    case Type::INT:
-    case Type::BOOL:
-      return 1;
-    case Type::VEC2:
-    case Type::UVEC2:
-    case Type::IVEC2:
-      return 2;
-    case Type::VEC3:
-    case Type::UVEC3:
-    case Type::IVEC3:
-      return 3;
-    case Type::VEC4:
-    case Type::UVEC4:
-    case Type::IVEC4:
-      return 4;
-    case Type::MAT3:
-      return 9;
-    case Type::MAT4:
-      return 16;
-    /* Alias special types. */
-    case Type::UCHAR:
-    case Type::USHORT:
-      return 1;
-    case Type::UCHAR2:
-    case Type::USHORT2:
-      return 2;
-    case Type::UCHAR3:
-    case Type::USHORT3:
-      return 3;
-    case Type::UCHAR4:
-    case Type::USHORT4:
-      return 4;
-    case Type::CHAR:
-    case Type::SHORT:
-      return 1;
-    case Type::CHAR2:
-    case Type::SHORT2:
-      return 2;
-    case Type::CHAR3:
-    case Type::SHORT3:
-      return 3;
-    case Type::CHAR4:
-    case Type::SHORT4:
-      return 4;
-    case Type::VEC3_101010I2:
-      return 3;
-  }
-  BLI_assert_unreachable();
-  return -1;
-}
-
 static Type to_component_type(const Type &type)
 {
  switch (type) {