main sync #3

Merged
Patrick Busch merged 318 commits from blender/blender:main into main 2023-03-17 15:52:21 +01:00
9 changed files with 186 additions and 92 deletions
Showing only changes of commit 397f6250a5 - Show all commits

View File

@ -147,6 +147,18 @@ void immDrawPixelsTexTiled_scaling_clipping(IMMDrawPixelsTexState *state,
const float color[4]) const float color[4])
{ {
int subpart_x, subpart_y, tex_w = 256, tex_h = 256; int subpart_x, subpart_y, tex_w = 256, tex_h = 256;
#ifdef __APPLE__
if (GPU_backend_get_type() == GPU_BACKEND_METAL) {
/* NOTE(Metal): The Metal backend will keep all temporary texture memory within a command
* submission in-flight, so using a partial tile size does not provide any tangible memory
* reduction, but does incur additional API overhead and significant cache inefficiency on AMD
* platforms.
* The Metal API also provides smart resource paging such that the application can
* still efficiently swap memory, even if system is low in physical memory. */
tex_w = img_w;
tex_h = img_h;
}
#endif
int seamless, offset_x, offset_y, nsubparts_x, nsubparts_y; int seamless, offset_x, offset_y, nsubparts_x, nsubparts_y;
int components; int components;
const bool use_clipping = ((clip_min_x < clip_max_x) && (clip_min_y < clip_max_y)); const bool use_clipping = ((clip_min_x < clip_max_x) && (clip_min_y < clip_max_y));

View File

@ -99,6 +99,8 @@ struct GPUPass {
/** Hint that an optimized variant of this pass should be created based on a complexity heuristic /** Hint that an optimized variant of this pass should be created based on a complexity heuristic
* during pass code generation. */ * during pass code generation. */
bool should_optimize; bool should_optimize;
/** Whether pass is in the GPUPass cache. */
bool cached;
}; };
/* -------------------------------------------------------------------- */ /* -------------------------------------------------------------------- */
@ -132,6 +134,7 @@ static GPUPass *gpu_pass_cache_lookup(uint32_t hash)
static void gpu_pass_cache_insert_after(GPUPass *node, GPUPass *pass) static void gpu_pass_cache_insert_after(GPUPass *node, GPUPass *pass)
{ {
BLI_spin_lock(&pass_cache_spin); BLI_spin_lock(&pass_cache_spin);
pass->cached = true;
if (node != nullptr) { if (node != nullptr) {
/* Add after the first pass having the same hash. */ /* Add after the first pass having the same hash. */
pass->next = node->next; pass->next = node->next;
@ -775,6 +778,7 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
pass->create_info = codegen.create_info; pass->create_info = codegen.create_info;
pass->hash = codegen.hash_get(); pass->hash = codegen.hash_get();
pass->compiled = false; pass->compiled = false;
pass->cached = false;
/* Only flag pass optimization hint if this is the first generated pass for a material. /* Only flag pass optimization hint if this is the first generated pass for a material.
* Optimized passes cannot be optimized further, even if the heuristic is still not * Optimized passes cannot be optimized further, even if the heuristic is still not
* favorable. */ * favorable. */
@ -881,14 +885,6 @@ GPUShader *GPU_pass_shader_get(GPUPass *pass)
return pass->shader; return pass->shader;
} }
void GPU_pass_release(GPUPass *pass)
{
BLI_spin_lock(&pass_cache_spin);
BLI_assert(pass->refcount > 0);
pass->refcount--;
BLI_spin_unlock(&pass_cache_spin);
}
static void gpu_pass_free(GPUPass *pass) static void gpu_pass_free(GPUPass *pass)
{ {
BLI_assert(pass->refcount == 0); BLI_assert(pass->refcount == 0);
@ -899,6 +895,18 @@ static void gpu_pass_free(GPUPass *pass)
MEM_freeN(pass); MEM_freeN(pass);
} }
void GPU_pass_release(GPUPass *pass)
{
BLI_spin_lock(&pass_cache_spin);
BLI_assert(pass->refcount > 0);
pass->refcount--;
/* Un-cached passes will not be filtered by garbage collection, so release here. */
if (pass->refcount == 0 && !pass->cached) {
gpu_pass_free(pass);
}
BLI_spin_unlock(&pass_cache_spin);
}
void GPU_pass_cache_garbage_collect(void) void GPU_pass_cache_garbage_collect(void)
{ {
static int lasttime = 0; static int lasttime = 0;

View File

@ -288,17 +288,17 @@ class MTLSafeFreeList {
std::atomic<bool> in_free_queue_; std::atomic<bool> in_free_queue_;
std::atomic<bool> referenced_by_workload_; std::atomic<bool> referenced_by_workload_;
std::recursive_mutex lock_; std::recursive_mutex lock_;
/* Linked list of next MTLSafeFreeList chunk if current chunk is full. */ /* Linked list of next MTLSafeFreeList chunk if current chunk is full. */
std::atomic<int> has_next_pool_;
std::atomic<MTLSafeFreeList *> next_; std::atomic<MTLSafeFreeList *> next_;
/* Lockless list. MAX_NUM_BUFFERS_ within a chunk based on considerations /* Lockless list. MAX_NUM_BUFFERS_ within a chunk based on considerations
* for performance and memory. * for performance and memory. Higher chunk counts are preferable for efficiently
* performing block operations such as copying several objects simultaneously.
*
* MIN_BUFFER_FLUSH_COUNT refers to the minimum count of buffers in the MTLSafeFreeList * MIN_BUFFER_FLUSH_COUNT refers to the minimum count of buffers in the MTLSafeFreeList
* before buffers are returned to global memory pool. This is set at a point to reduce * before buffers are returned to global memory pool. This is set at a point to reduce
* overhead of small pool flushes, while ensuring floating memory overhead is not excessive. */ * overhead of small pool flushes, while ensuring floating memory overhead is not excessive. */
static const int MAX_NUM_BUFFERS_ = 1024; static const int MAX_NUM_BUFFERS_ = 8192;
static const int MIN_BUFFER_FLUSH_COUNT = 120; static const int MIN_BUFFER_FLUSH_COUNT = 120;
std::atomic<int> current_list_index_; std::atomic<int> current_list_index_;
gpu::MTLBuffer *safe_free_pool_[MAX_NUM_BUFFERS_]; gpu::MTLBuffer *safe_free_pool_[MAX_NUM_BUFFERS_];
@ -306,8 +306,8 @@ class MTLSafeFreeList {
public: public:
MTLSafeFreeList(); MTLSafeFreeList();
/* Add buffer to Safe Free List, can be called from secondary threads. /* Can be used from multiple threads. Performs insertion into Safe Free List with the least
* Performs a lockless list insert. */ * amount of threading synchronization. */
void insert_buffer(gpu::MTLBuffer *buffer); void insert_buffer(gpu::MTLBuffer *buffer);
/* Whether we need to start a new safe free list, or can carry on using the existing one. */ /* Whether we need to start a new safe free list, or can carry on using the existing one. */
@ -322,10 +322,11 @@ class MTLSafeFreeList {
void flag_in_queue() void flag_in_queue()
{ {
in_free_queue_ = true; in_free_queue_ = true;
if (has_next_pool_) { if (current_list_index_ >= MTLSafeFreeList::MAX_NUM_BUFFERS_) {
MTLSafeFreeList *next_pool = next_.load(); MTLSafeFreeList *next_pool = next_.load();
BLI_assert(next_pool != nullptr); if (next_pool) {
next_pool->flag_in_queue(); next_pool->flag_in_queue();
}
} }
} }
}; };

View File

@ -257,10 +257,7 @@ void MTLBufferPool::update_memory_pools()
} }
/* Fetch next MTLSafeFreeList chunk, if any. */ /* Fetch next MTLSafeFreeList chunk, if any. */
MTLSafeFreeList *next_list = nullptr; MTLSafeFreeList *next_list = current_pool->next_.load();
if (current_pool->has_next_pool_ > 0) {
next_list = current_pool->next_.load();
}
/* Delete current MTLSafeFreeList */ /* Delete current MTLSafeFreeList */
current_pool->lock_.unlock(); current_pool->lock_.unlock();
@ -396,7 +393,6 @@ MTLSafeFreeList::MTLSafeFreeList()
in_free_queue_ = false; in_free_queue_ = false;
current_list_index_ = 0; current_list_index_ = 0;
next_ = nullptr; next_ = nullptr;
has_next_pool_ = 0;
} }
void MTLSafeFreeList::insert_buffer(gpu::MTLBuffer *buffer) void MTLSafeFreeList::insert_buffer(gpu::MTLBuffer *buffer)
@ -410,12 +406,19 @@ void MTLSafeFreeList::insert_buffer(gpu::MTLBuffer *buffer)
* insert the buffer into the next available chunk. */ * insert the buffer into the next available chunk. */
if (insert_index >= MTLSafeFreeList::MAX_NUM_BUFFERS_) { if (insert_index >= MTLSafeFreeList::MAX_NUM_BUFFERS_) {
/* Check if first caller to generate next pool. */ /* Check if first caller to generate next pool in chain.
int has_next = has_next_pool_++; * Otherwise, ensure pool exists or wait for first caller to create next pool. */
if (has_next == 0) {
next_ = new MTLSafeFreeList();
}
MTLSafeFreeList *next_list = next_.load(); MTLSafeFreeList *next_list = next_.load();
if (!next_list) {
std::unique_lock lock(lock_);
next_list = next_.load();
if (!next_list) {
next_list = new MTLSafeFreeList();
next_.store(next_list);
}
}
BLI_assert(next_list); BLI_assert(next_list);
next_list->insert_buffer(buffer); next_list->insert_buffer(buffer);

View File

@ -490,8 +490,12 @@ class MSLGeneratorInterface {
std::string generate_msl_uniform_undefs(ShaderStage stage); std::string generate_msl_uniform_undefs(ShaderStage stage);
std::string generate_ubo_block_undef_chain(ShaderStage stage); std::string generate_ubo_block_undef_chain(ShaderStage stage);
std::string generate_msl_texture_vars(ShaderStage shader_stage); std::string generate_msl_texture_vars(ShaderStage shader_stage);
void generate_msl_textures_input_string(std::stringstream &out, ShaderStage stage); void generate_msl_textures_input_string(std::stringstream &out,
void generate_msl_uniforms_input_string(std::stringstream &out, ShaderStage stage); ShaderStage stage,
bool &is_first_parameter);
void generate_msl_uniforms_input_string(std::stringstream &out,
ShaderStage stage,
bool &is_first_parameter);
/* Location is not always specified, so this will resolve outstanding locations. */ /* Location is not always specified, so this will resolve outstanding locations. */
void resolve_input_attribute_locations(); void resolve_input_attribute_locations();

View File

@ -2145,8 +2145,20 @@ std::string MSLGeneratorInterface::generate_msl_compute_entry_stub()
return out.str(); return out.str();
} }
/* If first parameter in function signature, do not print out a comma.
* Update first parameter flag to false for future invocations. */
static char parameter_delimiter(bool &is_first_parameter)
{
if (is_first_parameter) {
is_first_parameter = false;
return ' ';
}
return ',';
}
void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream &out, void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream &out,
ShaderStage stage) ShaderStage stage,
bool &is_first_parameter)
{ {
/* Note: Shader stage must be specified as the singular stage index for which the input /* Note: Shader stage must be specified as the singular stage index for which the input
* is generating. Compound stages are not valid inputs. */ * is generating. Compound stages are not valid inputs. */
@ -2156,7 +2168,8 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
BLI_assert(this->texture_samplers.size() <= GPU_max_textures_vert()); BLI_assert(this->texture_samplers.size() <= GPU_max_textures_vert());
for (const MSLTextureSampler &tex : this->texture_samplers) { for (const MSLTextureSampler &tex : this->texture_samplers) {
if (bool(tex.stage & stage)) { if (bool(tex.stage & stage)) {
out << ",\n\t" << tex.get_msl_typestring(false) << " [[texture(" << tex.location << ")]]"; out << parameter_delimiter(is_first_parameter) << "\n\t" << tex.get_msl_typestring(false)
<< " [[texture(" << tex.location << ")]]";
} }
} }
@ -2166,7 +2179,8 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
* If we exceed the hardware-supported limit, then follow a bind-less model using argument * If we exceed the hardware-supported limit, then follow a bind-less model using argument
* buffers. */ * buffers. */
if (this->use_argument_buffer_for_samplers()) { if (this->use_argument_buffer_for_samplers()) {
out << ",\n\tconstant SStruct& samplers [[buffer(MTL_uniform_buffer_base_index+" out << parameter_delimiter(is_first_parameter)
<< "\n\tconstant SStruct& samplers [[buffer(MTL_uniform_buffer_base_index+"
<< (this->get_sampler_argument_buffer_bind_index(stage)) << ")]]"; << (this->get_sampler_argument_buffer_bind_index(stage)) << ")]]";
} }
else { else {
@ -2175,7 +2189,8 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
BLI_assert(this->texture_samplers.size() <= MTL_MAX_DEFAULT_SAMPLERS); BLI_assert(this->texture_samplers.size() <= MTL_MAX_DEFAULT_SAMPLERS);
for (const MSLTextureSampler &tex : this->texture_samplers) { for (const MSLTextureSampler &tex : this->texture_samplers) {
if (bool(tex.stage & stage)) { if (bool(tex.stage & stage)) {
out << ",\n\tsampler " << tex.name << "_sampler [[sampler(" << tex.location << ")]]"; out << parameter_delimiter(is_first_parameter) << "\n\tsampler " << tex.name
<< "_sampler [[sampler(" << tex.location << ")]]";
} }
} }
@ -2189,12 +2204,13 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
} }
void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream &out, void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream &out,
ShaderStage stage) ShaderStage stage,
bool &is_first_parameter)
{ {
for (const MSLUniformBlock &ubo : this->uniform_blocks) { for (const MSLUniformBlock &ubo : this->uniform_blocks) {
if (bool(ubo.stage & stage)) { if (bool(ubo.stage & stage)) {
/* For literal/existing global types, we do not need the class name-space accessor. */ /* For literal/existing global types, we do not need the class name-space accessor. */
out << ",\n\tconstant "; out << parameter_delimiter(is_first_parameter) << "\n\tconstant ";
if (!is_builtin_type(ubo.type_name)) { if (!is_builtin_type(ubo.type_name)) {
out << get_stage_class_name(stage) << "::"; out << get_stage_class_name(stage) << "::";
} }
@ -2211,104 +2227,135 @@ void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream
std::string MSLGeneratorInterface::generate_msl_vertex_inputs_string() std::string MSLGeneratorInterface::generate_msl_vertex_inputs_string()
{ {
std::stringstream out; std::stringstream out;
bool is_first_parameter = true;
if (this->uses_ssbo_vertex_fetch_mode) { if (this->uses_ssbo_vertex_fetch_mode) {
/* Vertex Buffers bound as raw buffers. */ /* Vertex Buffers bound as raw buffers. */
for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) { for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
out << "\tconstant uchar* MTL_VERTEX_DATA_" << i << " [[buffer(" << i << ")]],\n"; out << parameter_delimiter(is_first_parameter) << "\tconstant uchar* MTL_VERTEX_DATA_" << i
<< " [[buffer(" << i << ")]]\n";
} }
out << "\tconstant ushort* MTL_INDEX_DATA[[buffer(MTL_SSBO_VERTEX_FETCH_IBO_INDEX)]],"; out << parameter_delimiter(is_first_parameter)
<< "\tconstant ushort* MTL_INDEX_DATA[[buffer(MTL_SSBO_VERTEX_FETCH_IBO_INDEX)]]";
} }
else { else {
if (this->vertex_input_attributes.size() > 0) { if (this->vertex_input_attributes.size() > 0) {
/* Vertex Buffers use input assembly. */ /* Vertex Buffers use input assembly. */
out << get_stage_class_name(ShaderStage::VERTEX) << "::VertexIn v_in [[stage_in]],"; out << get_stage_class_name(ShaderStage::VERTEX) << "::VertexIn v_in [[stage_in]]";
is_first_parameter = false;
} }
} }
out << "\n\tconstant " << get_stage_class_name(ShaderStage::VERTEX)
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
this->generate_msl_uniforms_input_string(out, ShaderStage::VERTEX); if (this->uniforms.size() > 0) {
out << parameter_delimiter(is_first_parameter) << "\n\tconstant "
<< get_stage_class_name(ShaderStage::VERTEX)
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
is_first_parameter = false;
}
this->generate_msl_uniforms_input_string(out, ShaderStage::VERTEX, is_first_parameter);
/* Transform feedback buffer binding. */ /* Transform feedback buffer binding. */
if (this->uses_transform_feedback) { if (this->uses_transform_feedback) {
out << ",\n\tdevice " << get_stage_class_name(ShaderStage::VERTEX) out << parameter_delimiter(is_first_parameter) << "\n\tdevice "
<< get_stage_class_name(ShaderStage::VERTEX)
<< "::VertexOut_TF* " << "::VertexOut_TF* "
"transform_feedback_results[[buffer(MTL_transform_feedback_buffer_index)]]"; "transform_feedback_results[[buffer(MTL_transform_feedback_buffer_index)]]";
} }
/* Generate texture signatures. */ /* Generate texture signatures. */
this->generate_msl_textures_input_string(out, ShaderStage::VERTEX); this->generate_msl_textures_input_string(out, ShaderStage::VERTEX, is_first_parameter);
/* Entry point parameters for gl Globals. */ /* Entry point parameters for gl Globals. */
if (this->uses_gl_VertexID) { if (this->uses_gl_VertexID) {
out << ",\n\tconst uint32_t gl_VertexID [[vertex_id]]"; out << parameter_delimiter(is_first_parameter)
<< "\n\tconst uint32_t gl_VertexID [[vertex_id]]";
} }
if (this->uses_gl_InstanceID) { if (this->uses_gl_InstanceID) {
out << ",\n\tconst uint32_t gl_InstanceID [[instance_id]]"; out << parameter_delimiter(is_first_parameter)
<< "\n\tconst uint32_t gl_InstanceID [[instance_id]]";
} }
if (this->uses_gl_BaseInstanceARB) { if (this->uses_gl_BaseInstanceARB) {
out << ",\n\tconst uint32_t gl_BaseInstanceARB [[base_instance]]"; out << parameter_delimiter(is_first_parameter)
<< "\n\tconst uint32_t gl_BaseInstanceARB [[base_instance]]";
} }
return out.str(); return out.str();
} }
std::string MSLGeneratorInterface::generate_msl_fragment_inputs_string() std::string MSLGeneratorInterface::generate_msl_fragment_inputs_string()
{ {
bool is_first_parameter = true;
std::stringstream out; std::stringstream out;
out << get_stage_class_name(ShaderStage::FRAGMENT) out << parameter_delimiter(is_first_parameter) << get_stage_class_name(ShaderStage::FRAGMENT)
<< "::VertexOut v_in [[stage_in]],\n\tconstant " << "::VertexOut v_in [[stage_in]]";
<< get_stage_class_name(ShaderStage::FRAGMENT)
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
this->generate_msl_uniforms_input_string(out, ShaderStage::FRAGMENT); if (this->uniforms.size() > 0) {
out << parameter_delimiter(is_first_parameter) << "\n\tconstant "
<< get_stage_class_name(ShaderStage::FRAGMENT)
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
}
this->generate_msl_uniforms_input_string(out, ShaderStage::FRAGMENT, is_first_parameter);
/* Generate texture signatures. */ /* Generate texture signatures. */
this->generate_msl_textures_input_string(out, ShaderStage::FRAGMENT); this->generate_msl_textures_input_string(out, ShaderStage::FRAGMENT, is_first_parameter);
if (this->uses_gl_PointCoord) { if (this->uses_gl_PointCoord) {
out << ",\n\tconst float2 gl_PointCoord [[point_coord]]"; out << parameter_delimiter(is_first_parameter)
<< "\n\tconst float2 gl_PointCoord [[point_coord]]";
} }
if (this->uses_gl_FrontFacing) { if (this->uses_gl_FrontFacing) {
out << ",\n\tconst MTLBOOL gl_FrontFacing [[front_facing]]"; out << parameter_delimiter(is_first_parameter)
<< "\n\tconst MTLBOOL gl_FrontFacing [[front_facing]]";
} }
if (this->uses_gl_PrimitiveID) { if (this->uses_gl_PrimitiveID) {
out << ",\n\tconst uint gl_PrimitiveID [[primitive_id]]"; out << parameter_delimiter(is_first_parameter)
<< "\n\tconst uint gl_PrimitiveID [[primitive_id]]";
} }
/* Barycentrics. */ /* Barycentrics. */
if (this->uses_barycentrics) { if (this->uses_barycentrics) {
out << ",\n\tconst float3 mtl_barycentric_coord [[barycentric_coord]]"; out << parameter_delimiter(is_first_parameter)
<< "\n\tconst float3 mtl_barycentric_coord [[barycentric_coord]]";
} }
return out.str(); return out.str();
} }
std::string MSLGeneratorInterface::generate_msl_compute_inputs_string() std::string MSLGeneratorInterface::generate_msl_compute_inputs_string()
{ {
bool is_first_parameter = true;
std::stringstream out; std::stringstream out;
out << "constant " << get_stage_class_name(ShaderStage::COMPUTE) if (this->uniforms.size() > 0) {
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]"; out << parameter_delimiter(is_first_parameter) << "constant "
<< get_stage_class_name(ShaderStage::COMPUTE)
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
}
this->generate_msl_uniforms_input_string(out, ShaderStage::COMPUTE); this->generate_msl_uniforms_input_string(out, ShaderStage::COMPUTE, is_first_parameter);
/* Generate texture signatures. */ /* Generate texture signatures. */
this->generate_msl_textures_input_string(out, ShaderStage::COMPUTE); this->generate_msl_textures_input_string(out, ShaderStage::COMPUTE, is_first_parameter);
/* Entry point parameters for gl Globals. */ /* Entry point parameters for gl Globals. */
if (this->uses_gl_GlobalInvocationID) { if (this->uses_gl_GlobalInvocationID) {
out << ",\n\tconst uint3 gl_GlobalInvocationID [[thread_position_in_grid]]"; out << parameter_delimiter(is_first_parameter)
<< "\n\tconst uint3 gl_GlobalInvocationID [[thread_position_in_grid]]";
} }
if (this->uses_gl_WorkGroupID) { if (this->uses_gl_WorkGroupID) {
out << ",\n\tconst uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]"; out << parameter_delimiter(is_first_parameter)
<< "\n\tconst uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]";
} }
if (this->uses_gl_NumWorkGroups) { if (this->uses_gl_NumWorkGroups) {
out << ",\n\tconst uint3 gl_NumWorkGroups [[threadgroups_per_grid]]"; out << parameter_delimiter(is_first_parameter)
<< "\n\tconst uint3 gl_NumWorkGroups [[threadgroups_per_grid]]";
} }
if (this->uses_gl_LocalInvocationIndex) { if (this->uses_gl_LocalInvocationIndex) {
out << ",\n\tconst uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]"; out << parameter_delimiter(is_first_parameter)
<< "\n\tconst uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]";
} }
if (this->uses_gl_LocalInvocationID) { if (this->uses_gl_LocalInvocationID) {
out << ",\n\tconst uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]"; out << parameter_delimiter(is_first_parameter)
<< "\n\tconst uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]";
} }
return out.str(); return out.str();
@ -2316,6 +2363,10 @@ std::string MSLGeneratorInterface::generate_msl_compute_inputs_string()
std::string MSLGeneratorInterface::generate_msl_uniform_structs(ShaderStage shader_stage) std::string MSLGeneratorInterface::generate_msl_uniform_structs(ShaderStage shader_stage)
{ {
/* Only generate PushConstantBlock if we have uniforms. */
if (this->uniforms.size() == 0) {
return "";
}
BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT); BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT);
std::stringstream out; std::stringstream out;
@ -2624,6 +2675,9 @@ std::string MSLGeneratorInterface::generate_msl_fragment_out_struct()
std::string MSLGeneratorInterface::generate_msl_global_uniform_population(ShaderStage stage) std::string MSLGeneratorInterface::generate_msl_global_uniform_population(ShaderStage stage)
{ {
if (this->uniforms.size() == 0) {
return "";
}
/* Populate Global Uniforms. */ /* Populate Global Uniforms. */
std::stringstream out; std::stringstream out;

View File

@ -594,17 +594,6 @@ void gpu::MTLTexture::update_sub(
} }
} }
/* Prepare staging buffer for data. */
id<MTLBuffer> staging_buffer = nil;
uint64_t staging_buffer_offset = 0;
/* Fetch allocation from scratch buffer. */
MTLTemporaryBuffer allocation =
ctx->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(totalsize, 256);
memcpy(allocation.data, data, totalsize);
staging_buffer = allocation.metal_buffer;
staging_buffer_offset = allocation.buffer_offset;
/* Common Properties. */ /* Common Properties. */
MTLPixelFormat compatible_write_format = mtl_format_get_writeable_view_format( MTLPixelFormat compatible_write_format = mtl_format_get_writeable_view_format(
destination_format); destination_format);
@ -616,6 +605,12 @@ void gpu::MTLTexture::update_sub(
return; return;
} }
/* Fetch allocation from memory pool. */
MTLBuffer *temp_allocation = MTLContext::get_global_memory_manager()->allocate_with_data(
totalsize, true, data);
id<MTLBuffer> staging_buffer = temp_allocation->get_metal_buffer();
BLI_assert(staging_buffer != nil);
/* Prepare command encoders. */ /* Prepare command encoders. */
id<MTLBlitCommandEncoder> blit_encoder = nil; id<MTLBlitCommandEncoder> blit_encoder = nil;
id<MTLComputeCommandEncoder> compute_encoder = nil; id<MTLComputeCommandEncoder> compute_encoder = nil;
@ -697,7 +692,7 @@ void gpu::MTLTexture::update_sub(
int max_array_index = ((type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1); int max_array_index = ((type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1);
for (int array_index = 0; array_index < max_array_index; array_index++) { for (int array_index = 0; array_index < max_array_index; array_index++) {
int buffer_array_offset = staging_buffer_offset + (bytes_per_image * array_index); int buffer_array_offset = (bytes_per_image * array_index);
[blit_encoder [blit_encoder
copyFromBuffer:staging_buffer copyFromBuffer:staging_buffer
sourceOffset:buffer_array_offset sourceOffset:buffer_array_offset
@ -727,7 +722,7 @@ void gpu::MTLTexture::update_sub(
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state(); MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
cs.bind_pso(pso); cs.bind_pso(pso);
cs.bind_compute_bytes(&params, sizeof(params), 0); cs.bind_compute_bytes(&params, sizeof(params), 0);
cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1); cs.bind_compute_buffer(staging_buffer, 0, 1);
cs.bind_compute_texture(texture_handle, 0); cs.bind_compute_texture(texture_handle, 0);
[compute_encoder [compute_encoder
dispatchThreads:MTLSizeMake(extent[0], 1, 1) /* Width, Height, Layer */ dispatchThreads:MTLSizeMake(extent[0], 1, 1) /* Width, Height, Layer */
@ -747,7 +742,7 @@ void gpu::MTLTexture::update_sub(
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state(); MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
cs.bind_pso(pso); cs.bind_pso(pso);
cs.bind_compute_bytes(&params, sizeof(params), 0); cs.bind_compute_bytes(&params, sizeof(params), 0);
cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1); cs.bind_compute_buffer(staging_buffer, 0, 1);
cs.bind_compute_texture(texture_handle, 0); cs.bind_compute_texture(texture_handle, 0);
[compute_encoder [compute_encoder
dispatchThreads:MTLSizeMake(extent[0], extent[1], 1) /* Width, layers, nil */ dispatchThreads:MTLSizeMake(extent[0], extent[1], 1) /* Width, layers, nil */
@ -779,7 +774,7 @@ void gpu::MTLTexture::update_sub(
} }
[blit_encoder copyFromBuffer:staging_buffer [blit_encoder copyFromBuffer:staging_buffer
sourceOffset:staging_buffer_offset + texture_array_relative_offset sourceOffset:texture_array_relative_offset
sourceBytesPerRow:bytes_per_row sourceBytesPerRow:bytes_per_row
sourceBytesPerImage:bytes_per_image sourceBytesPerImage:bytes_per_image
sourceSize:MTLSizeMake(extent[0], extent[1], 1) sourceSize:MTLSizeMake(extent[0], extent[1], 1)
@ -807,7 +802,7 @@ void gpu::MTLTexture::update_sub(
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state(); MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
cs.bind_pso(pso); cs.bind_pso(pso);
cs.bind_compute_bytes(&params, sizeof(params), 0); cs.bind_compute_bytes(&params, sizeof(params), 0);
cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1); cs.bind_compute_buffer(staging_buffer, 0, 1);
cs.bind_compute_texture(texture_handle, 0); cs.bind_compute_texture(texture_handle, 0);
[compute_encoder [compute_encoder
dispatchThreads:MTLSizeMake( dispatchThreads:MTLSizeMake(
@ -828,7 +823,7 @@ void gpu::MTLTexture::update_sub(
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state(); MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
cs.bind_pso(pso); cs.bind_pso(pso);
cs.bind_compute_bytes(&params, sizeof(params), 0); cs.bind_compute_bytes(&params, sizeof(params), 0);
cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1); cs.bind_compute_buffer(staging_buffer, 0, 1);
cs.bind_compute_texture(texture_handle, 0); cs.bind_compute_texture(texture_handle, 0);
[compute_encoder dispatchThreads:MTLSizeMake(extent[0], [compute_encoder dispatchThreads:MTLSizeMake(extent[0],
extent[1], extent[1],
@ -848,7 +843,7 @@ void gpu::MTLTexture::update_sub(
ctx->pipeline_state.unpack_row_length); ctx->pipeline_state.unpack_row_length);
int bytes_per_image = bytes_per_row * extent[1]; int bytes_per_image = bytes_per_row * extent[1];
[blit_encoder copyFromBuffer:staging_buffer [blit_encoder copyFromBuffer:staging_buffer
sourceOffset:staging_buffer_offset sourceOffset:0
sourceBytesPerRow:bytes_per_row sourceBytesPerRow:bytes_per_row
sourceBytesPerImage:bytes_per_image sourceBytesPerImage:bytes_per_image
sourceSize:MTLSizeMake(extent[0], extent[1], extent[2]) sourceSize:MTLSizeMake(extent[0], extent[1], extent[2])
@ -871,7 +866,7 @@ void gpu::MTLTexture::update_sub(
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state(); MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
cs.bind_pso(pso); cs.bind_pso(pso);
cs.bind_compute_bytes(&params, sizeof(params), 0); cs.bind_compute_bytes(&params, sizeof(params), 0);
cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1); cs.bind_compute_buffer(staging_buffer, 0, 1);
cs.bind_compute_texture(texture_handle, 0); cs.bind_compute_texture(texture_handle, 0);
[compute_encoder [compute_encoder
dispatchThreads:MTLSizeMake( dispatchThreads:MTLSizeMake(
@ -896,7 +891,7 @@ void gpu::MTLTexture::update_sub(
int face_index = offset[2] + i; int face_index = offset[2] + i;
[blit_encoder copyFromBuffer:staging_buffer [blit_encoder copyFromBuffer:staging_buffer
sourceOffset:staging_buffer_offset + texture_array_relative_offset sourceOffset:texture_array_relative_offset
sourceBytesPerRow:bytes_per_row sourceBytesPerRow:bytes_per_row
sourceBytesPerImage:bytes_per_image sourceBytesPerImage:bytes_per_image
sourceSize:MTLSizeMake(extent[0], extent[1], 1) sourceSize:MTLSizeMake(extent[0], extent[1], 1)
@ -930,7 +925,7 @@ void gpu::MTLTexture::update_sub(
for (int i = 0; i < extent[2]; i++) { for (int i = 0; i < extent[2]; i++) {
int face_index = offset[2] + i; int face_index = offset[2] + i;
[blit_encoder copyFromBuffer:staging_buffer [blit_encoder copyFromBuffer:staging_buffer
sourceOffset:staging_buffer_offset + texture_array_relative_offset sourceOffset:texture_array_relative_offset
sourceBytesPerRow:bytes_per_row sourceBytesPerRow:bytes_per_row
sourceBytesPerImage:bytes_per_image sourceBytesPerImage:bytes_per_image
sourceSize:MTLSizeMake(extent[0], extent[1], 1) sourceSize:MTLSizeMake(extent[0], extent[1], 1)
@ -1058,6 +1053,11 @@ void gpu::MTLTexture::update_sub(
/* Decrement texture reference counts. This ensures temporary texture views are released. */ /* Decrement texture reference counts. This ensures temporary texture views are released. */
[texture_handle release]; [texture_handle release];
/* Release temporary staging buffer allocation.
* NOTE: Allocation will be tracked with command submission and released once no longer in use.
*/
temp_allocation->free();
} }
} }

View File

@ -402,9 +402,13 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl(
options:options options:options
error:&error] autorelease]; error:&error] autorelease];
if (error) { if (error) {
NSLog(@"Compile Error - Metal Shader Library error %@ ", error); /* Only exit out if genuine error and not warning. */
BLI_assert(false); if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
return nullptr; NSNotFound) {
NSLog(@"Compile Error - Metal Shader Library error %@ ", error);
BLI_assert(false);
return nil;
}
} }
/* Fetch compute function. */ /* Fetch compute function. */
@ -718,9 +722,13 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
options:options options:options
error:&error] autorelease]; error:&error] autorelease];
if (error) { if (error) {
NSLog(@"Compile Error - Metal Shader Library error %@ ", error); /* Only exit out if genuine error and not warning. */
BLI_assert(false); if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
return nil; NSNotFound) {
NSLog(@"Compile Error - Metal Shader Library error %@ ", error);
BLI_assert(false);
return nil;
}
} }
/* Fetch compute function. */ /* Fetch compute function. */

View File

@ -7,6 +7,10 @@
* and texture2d types in metal). * and texture2d types in metal).
*/ */
/* Suppress unhelpful shader compiler warnings. */
#pragma clang diagnostic ignored "-Wunused-variable"
#pragma clang diagnostic ignored "-Wcomment"
/* Base instance with offsets. */ /* Base instance with offsets. */
#define gpu_BaseInstance gl_BaseInstanceARB #define gpu_BaseInstance gl_BaseInstanceARB
#define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance) #define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance)