forked from blender/blender
main sync #3
@ -147,6 +147,18 @@ void immDrawPixelsTexTiled_scaling_clipping(IMMDrawPixelsTexState *state,
|
||||
const float color[4])
|
||||
{
|
||||
int subpart_x, subpart_y, tex_w = 256, tex_h = 256;
|
||||
#ifdef __APPLE__
|
||||
if (GPU_backend_get_type() == GPU_BACKEND_METAL) {
|
||||
/* NOTE(Metal): The Metal backend will keep all temporary texture memory within a command
|
||||
* submission in-flight, so using a partial tile size does not provide any tangible memory
|
||||
* reduction, but does incur additional API overhead and significant cache inefficiency on AMD
|
||||
* platforms.
|
||||
* The Metal API also provides smart resource paging such that the application can
|
||||
* still efficiently swap memory, even if system is low in physical memory. */
|
||||
tex_w = img_w;
|
||||
tex_h = img_h;
|
||||
}
|
||||
#endif
|
||||
int seamless, offset_x, offset_y, nsubparts_x, nsubparts_y;
|
||||
int components;
|
||||
const bool use_clipping = ((clip_min_x < clip_max_x) && (clip_min_y < clip_max_y));
|
||||
|
@ -99,6 +99,8 @@ struct GPUPass {
|
||||
/** Hint that an optimized variant of this pass should be created based on a complexity heuristic
|
||||
* during pass code generation. */
|
||||
bool should_optimize;
|
||||
/** Whether pass is in the GPUPass cache. */
|
||||
bool cached;
|
||||
};
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
@ -132,6 +134,7 @@ static GPUPass *gpu_pass_cache_lookup(uint32_t hash)
|
||||
static void gpu_pass_cache_insert_after(GPUPass *node, GPUPass *pass)
|
||||
{
|
||||
BLI_spin_lock(&pass_cache_spin);
|
||||
pass->cached = true;
|
||||
if (node != nullptr) {
|
||||
/* Add after the first pass having the same hash. */
|
||||
pass->next = node->next;
|
||||
@ -775,6 +778,7 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
|
||||
pass->create_info = codegen.create_info;
|
||||
pass->hash = codegen.hash_get();
|
||||
pass->compiled = false;
|
||||
pass->cached = false;
|
||||
/* Only flag pass optimization hint if this is the first generated pass for a material.
|
||||
* Optimized passes cannot be optimized further, even if the heuristic is still not
|
||||
* favorable. */
|
||||
@ -881,14 +885,6 @@ GPUShader *GPU_pass_shader_get(GPUPass *pass)
|
||||
return pass->shader;
|
||||
}
|
||||
|
||||
void GPU_pass_release(GPUPass *pass)
|
||||
{
|
||||
BLI_spin_lock(&pass_cache_spin);
|
||||
BLI_assert(pass->refcount > 0);
|
||||
pass->refcount--;
|
||||
BLI_spin_unlock(&pass_cache_spin);
|
||||
}
|
||||
|
||||
static void gpu_pass_free(GPUPass *pass)
|
||||
{
|
||||
BLI_assert(pass->refcount == 0);
|
||||
@ -899,6 +895,18 @@ static void gpu_pass_free(GPUPass *pass)
|
||||
MEM_freeN(pass);
|
||||
}
|
||||
|
||||
void GPU_pass_release(GPUPass *pass)
|
||||
{
|
||||
BLI_spin_lock(&pass_cache_spin);
|
||||
BLI_assert(pass->refcount > 0);
|
||||
pass->refcount--;
|
||||
/* Un-cached passes will not be filtered by garbage collection, so release here. */
|
||||
if (pass->refcount == 0 && !pass->cached) {
|
||||
gpu_pass_free(pass);
|
||||
}
|
||||
BLI_spin_unlock(&pass_cache_spin);
|
||||
}
|
||||
|
||||
void GPU_pass_cache_garbage_collect(void)
|
||||
{
|
||||
static int lasttime = 0;
|
||||
|
@ -288,17 +288,17 @@ class MTLSafeFreeList {
|
||||
std::atomic<bool> in_free_queue_;
|
||||
std::atomic<bool> referenced_by_workload_;
|
||||
std::recursive_mutex lock_;
|
||||
|
||||
/* Linked list of next MTLSafeFreeList chunk if current chunk is full. */
|
||||
std::atomic<int> has_next_pool_;
|
||||
std::atomic<MTLSafeFreeList *> next_;
|
||||
|
||||
/* Lockless list. MAX_NUM_BUFFERS_ within a chunk based on considerations
|
||||
* for performance and memory.
|
||||
* for performance and memory. Higher chunk counts are preferable for efficiently
|
||||
* performing block operations such as copying several objects simultaneously.
|
||||
*
|
||||
* MIN_BUFFER_FLUSH_COUNT refers to the minimum count of buffers in the MTLSafeFreeList
|
||||
* before buffers are returned to global memory pool. This is set at a point to reduce
|
||||
* overhead of small pool flushes, while ensuring floating memory overhead is not excessive. */
|
||||
static const int MAX_NUM_BUFFERS_ = 1024;
|
||||
static const int MAX_NUM_BUFFERS_ = 8192;
|
||||
static const int MIN_BUFFER_FLUSH_COUNT = 120;
|
||||
std::atomic<int> current_list_index_;
|
||||
gpu::MTLBuffer *safe_free_pool_[MAX_NUM_BUFFERS_];
|
||||
@ -306,8 +306,8 @@ class MTLSafeFreeList {
|
||||
public:
|
||||
MTLSafeFreeList();
|
||||
|
||||
/* Add buffer to Safe Free List, can be called from secondary threads.
|
||||
* Performs a lockless list insert. */
|
||||
/* Can be used from multiple threads. Performs insertion into Safe Free List with the least
|
||||
* amount of threading synchronization. */
|
||||
void insert_buffer(gpu::MTLBuffer *buffer);
|
||||
|
||||
/* Whether we need to start a new safe free list, or can carry on using the existing one. */
|
||||
@ -322,10 +322,11 @@ class MTLSafeFreeList {
|
||||
void flag_in_queue()
|
||||
{
|
||||
in_free_queue_ = true;
|
||||
if (has_next_pool_) {
|
||||
if (current_list_index_ >= MTLSafeFreeList::MAX_NUM_BUFFERS_) {
|
||||
MTLSafeFreeList *next_pool = next_.load();
|
||||
BLI_assert(next_pool != nullptr);
|
||||
next_pool->flag_in_queue();
|
||||
if (next_pool) {
|
||||
next_pool->flag_in_queue();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -257,10 +257,7 @@ void MTLBufferPool::update_memory_pools()
|
||||
}
|
||||
|
||||
/* Fetch next MTLSafeFreeList chunk, if any. */
|
||||
MTLSafeFreeList *next_list = nullptr;
|
||||
if (current_pool->has_next_pool_ > 0) {
|
||||
next_list = current_pool->next_.load();
|
||||
}
|
||||
MTLSafeFreeList *next_list = current_pool->next_.load();
|
||||
|
||||
/* Delete current MTLSafeFreeList */
|
||||
current_pool->lock_.unlock();
|
||||
@ -396,7 +393,6 @@ MTLSafeFreeList::MTLSafeFreeList()
|
||||
in_free_queue_ = false;
|
||||
current_list_index_ = 0;
|
||||
next_ = nullptr;
|
||||
has_next_pool_ = 0;
|
||||
}
|
||||
|
||||
void MTLSafeFreeList::insert_buffer(gpu::MTLBuffer *buffer)
|
||||
@ -410,12 +406,19 @@ void MTLSafeFreeList::insert_buffer(gpu::MTLBuffer *buffer)
|
||||
* insert the buffer into the next available chunk. */
|
||||
if (insert_index >= MTLSafeFreeList::MAX_NUM_BUFFERS_) {
|
||||
|
||||
/* Check if first caller to generate next pool. */
|
||||
int has_next = has_next_pool_++;
|
||||
if (has_next == 0) {
|
||||
next_ = new MTLSafeFreeList();
|
||||
}
|
||||
/* Check if first caller to generate next pool in chain.
|
||||
* Otherwise, ensure pool exists or wait for first caller to create next pool. */
|
||||
MTLSafeFreeList *next_list = next_.load();
|
||||
|
||||
if (!next_list) {
|
||||
std::unique_lock lock(lock_);
|
||||
|
||||
next_list = next_.load();
|
||||
if (!next_list) {
|
||||
next_list = new MTLSafeFreeList();
|
||||
next_.store(next_list);
|
||||
}
|
||||
}
|
||||
BLI_assert(next_list);
|
||||
next_list->insert_buffer(buffer);
|
||||
|
||||
|
@ -490,8 +490,12 @@ class MSLGeneratorInterface {
|
||||
std::string generate_msl_uniform_undefs(ShaderStage stage);
|
||||
std::string generate_ubo_block_undef_chain(ShaderStage stage);
|
||||
std::string generate_msl_texture_vars(ShaderStage shader_stage);
|
||||
void generate_msl_textures_input_string(std::stringstream &out, ShaderStage stage);
|
||||
void generate_msl_uniforms_input_string(std::stringstream &out, ShaderStage stage);
|
||||
void generate_msl_textures_input_string(std::stringstream &out,
|
||||
ShaderStage stage,
|
||||
bool &is_first_parameter);
|
||||
void generate_msl_uniforms_input_string(std::stringstream &out,
|
||||
ShaderStage stage,
|
||||
bool &is_first_parameter);
|
||||
|
||||
/* Location is not always specified, so this will resolve outstanding locations. */
|
||||
void resolve_input_attribute_locations();
|
||||
|
@ -2145,8 +2145,20 @@ std::string MSLGeneratorInterface::generate_msl_compute_entry_stub()
|
||||
return out.str();
|
||||
}
|
||||
|
||||
/* If first parameter in function signature, do not print out a comma.
|
||||
* Update first parameter flag to false for future invocations. */
|
||||
static char parameter_delimiter(bool &is_first_parameter)
|
||||
{
|
||||
if (is_first_parameter) {
|
||||
is_first_parameter = false;
|
||||
return ' ';
|
||||
}
|
||||
return ',';
|
||||
}
|
||||
|
||||
void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream &out,
|
||||
ShaderStage stage)
|
||||
ShaderStage stage,
|
||||
bool &is_first_parameter)
|
||||
{
|
||||
/* Note: Shader stage must be specified as the singular stage index for which the input
|
||||
* is generating. Compound stages are not valid inputs. */
|
||||
@ -2156,7 +2168,8 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
|
||||
BLI_assert(this->texture_samplers.size() <= GPU_max_textures_vert());
|
||||
for (const MSLTextureSampler &tex : this->texture_samplers) {
|
||||
if (bool(tex.stage & stage)) {
|
||||
out << ",\n\t" << tex.get_msl_typestring(false) << " [[texture(" << tex.location << ")]]";
|
||||
out << parameter_delimiter(is_first_parameter) << "\n\t" << tex.get_msl_typestring(false)
|
||||
<< " [[texture(" << tex.location << ")]]";
|
||||
}
|
||||
}
|
||||
|
||||
@ -2166,7 +2179,8 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
|
||||
* If we exceed the hardware-supported limit, then follow a bind-less model using argument
|
||||
* buffers. */
|
||||
if (this->use_argument_buffer_for_samplers()) {
|
||||
out << ",\n\tconstant SStruct& samplers [[buffer(MTL_uniform_buffer_base_index+"
|
||||
out << parameter_delimiter(is_first_parameter)
|
||||
<< "\n\tconstant SStruct& samplers [[buffer(MTL_uniform_buffer_base_index+"
|
||||
<< (this->get_sampler_argument_buffer_bind_index(stage)) << ")]]";
|
||||
}
|
||||
else {
|
||||
@ -2175,7 +2189,8 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
|
||||
BLI_assert(this->texture_samplers.size() <= MTL_MAX_DEFAULT_SAMPLERS);
|
||||
for (const MSLTextureSampler &tex : this->texture_samplers) {
|
||||
if (bool(tex.stage & stage)) {
|
||||
out << ",\n\tsampler " << tex.name << "_sampler [[sampler(" << tex.location << ")]]";
|
||||
out << parameter_delimiter(is_first_parameter) << "\n\tsampler " << tex.name
|
||||
<< "_sampler [[sampler(" << tex.location << ")]]";
|
||||
}
|
||||
}
|
||||
|
||||
@ -2189,12 +2204,13 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
|
||||
}
|
||||
|
||||
void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream &out,
|
||||
ShaderStage stage)
|
||||
ShaderStage stage,
|
||||
bool &is_first_parameter)
|
||||
{
|
||||
for (const MSLUniformBlock &ubo : this->uniform_blocks) {
|
||||
if (bool(ubo.stage & stage)) {
|
||||
/* For literal/existing global types, we do not need the class name-space accessor. */
|
||||
out << ",\n\tconstant ";
|
||||
out << parameter_delimiter(is_first_parameter) << "\n\tconstant ";
|
||||
if (!is_builtin_type(ubo.type_name)) {
|
||||
out << get_stage_class_name(stage) << "::";
|
||||
}
|
||||
@ -2211,104 +2227,135 @@ void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream
|
||||
std::string MSLGeneratorInterface::generate_msl_vertex_inputs_string()
|
||||
{
|
||||
std::stringstream out;
|
||||
bool is_first_parameter = true;
|
||||
|
||||
if (this->uses_ssbo_vertex_fetch_mode) {
|
||||
/* Vertex Buffers bound as raw buffers. */
|
||||
for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
|
||||
out << "\tconstant uchar* MTL_VERTEX_DATA_" << i << " [[buffer(" << i << ")]],\n";
|
||||
out << parameter_delimiter(is_first_parameter) << "\tconstant uchar* MTL_VERTEX_DATA_" << i
|
||||
<< " [[buffer(" << i << ")]]\n";
|
||||
}
|
||||
out << "\tconstant ushort* MTL_INDEX_DATA[[buffer(MTL_SSBO_VERTEX_FETCH_IBO_INDEX)]],";
|
||||
out << parameter_delimiter(is_first_parameter)
|
||||
<< "\tconstant ushort* MTL_INDEX_DATA[[buffer(MTL_SSBO_VERTEX_FETCH_IBO_INDEX)]]";
|
||||
}
|
||||
else {
|
||||
if (this->vertex_input_attributes.size() > 0) {
|
||||
/* Vertex Buffers use input assembly. */
|
||||
out << get_stage_class_name(ShaderStage::VERTEX) << "::VertexIn v_in [[stage_in]],";
|
||||
out << get_stage_class_name(ShaderStage::VERTEX) << "::VertexIn v_in [[stage_in]]";
|
||||
is_first_parameter = false;
|
||||
}
|
||||
}
|
||||
out << "\n\tconstant " << get_stage_class_name(ShaderStage::VERTEX)
|
||||
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
|
||||
|
||||
this->generate_msl_uniforms_input_string(out, ShaderStage::VERTEX);
|
||||
if (this->uniforms.size() > 0) {
|
||||
out << parameter_delimiter(is_first_parameter) << "\n\tconstant "
|
||||
<< get_stage_class_name(ShaderStage::VERTEX)
|
||||
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
|
||||
is_first_parameter = false;
|
||||
}
|
||||
|
||||
this->generate_msl_uniforms_input_string(out, ShaderStage::VERTEX, is_first_parameter);
|
||||
|
||||
/* Transform feedback buffer binding. */
|
||||
if (this->uses_transform_feedback) {
|
||||
out << ",\n\tdevice " << get_stage_class_name(ShaderStage::VERTEX)
|
||||
out << parameter_delimiter(is_first_parameter) << "\n\tdevice "
|
||||
<< get_stage_class_name(ShaderStage::VERTEX)
|
||||
<< "::VertexOut_TF* "
|
||||
"transform_feedback_results[[buffer(MTL_transform_feedback_buffer_index)]]";
|
||||
}
|
||||
|
||||
/* Generate texture signatures. */
|
||||
this->generate_msl_textures_input_string(out, ShaderStage::VERTEX);
|
||||
this->generate_msl_textures_input_string(out, ShaderStage::VERTEX, is_first_parameter);
|
||||
|
||||
/* Entry point parameters for gl Globals. */
|
||||
if (this->uses_gl_VertexID) {
|
||||
out << ",\n\tconst uint32_t gl_VertexID [[vertex_id]]";
|
||||
out << parameter_delimiter(is_first_parameter)
|
||||
<< "\n\tconst uint32_t gl_VertexID [[vertex_id]]";
|
||||
}
|
||||
if (this->uses_gl_InstanceID) {
|
||||
out << ",\n\tconst uint32_t gl_InstanceID [[instance_id]]";
|
||||
out << parameter_delimiter(is_first_parameter)
|
||||
<< "\n\tconst uint32_t gl_InstanceID [[instance_id]]";
|
||||
}
|
||||
if (this->uses_gl_BaseInstanceARB) {
|
||||
out << ",\n\tconst uint32_t gl_BaseInstanceARB [[base_instance]]";
|
||||
out << parameter_delimiter(is_first_parameter)
|
||||
<< "\n\tconst uint32_t gl_BaseInstanceARB [[base_instance]]";
|
||||
}
|
||||
return out.str();
|
||||
}
|
||||
|
||||
std::string MSLGeneratorInterface::generate_msl_fragment_inputs_string()
|
||||
{
|
||||
bool is_first_parameter = true;
|
||||
std::stringstream out;
|
||||
out << get_stage_class_name(ShaderStage::FRAGMENT)
|
||||
<< "::VertexOut v_in [[stage_in]],\n\tconstant "
|
||||
<< get_stage_class_name(ShaderStage::FRAGMENT)
|
||||
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
|
||||
out << parameter_delimiter(is_first_parameter) << get_stage_class_name(ShaderStage::FRAGMENT)
|
||||
<< "::VertexOut v_in [[stage_in]]";
|
||||
|
||||
this->generate_msl_uniforms_input_string(out, ShaderStage::FRAGMENT);
|
||||
if (this->uniforms.size() > 0) {
|
||||
out << parameter_delimiter(is_first_parameter) << "\n\tconstant "
|
||||
<< get_stage_class_name(ShaderStage::FRAGMENT)
|
||||
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
|
||||
}
|
||||
|
||||
this->generate_msl_uniforms_input_string(out, ShaderStage::FRAGMENT, is_first_parameter);
|
||||
|
||||
/* Generate texture signatures. */
|
||||
this->generate_msl_textures_input_string(out, ShaderStage::FRAGMENT);
|
||||
this->generate_msl_textures_input_string(out, ShaderStage::FRAGMENT, is_first_parameter);
|
||||
|
||||
if (this->uses_gl_PointCoord) {
|
||||
out << ",\n\tconst float2 gl_PointCoord [[point_coord]]";
|
||||
out << parameter_delimiter(is_first_parameter)
|
||||
<< "\n\tconst float2 gl_PointCoord [[point_coord]]";
|
||||
}
|
||||
if (this->uses_gl_FrontFacing) {
|
||||
out << ",\n\tconst MTLBOOL gl_FrontFacing [[front_facing]]";
|
||||
out << parameter_delimiter(is_first_parameter)
|
||||
<< "\n\tconst MTLBOOL gl_FrontFacing [[front_facing]]";
|
||||
}
|
||||
if (this->uses_gl_PrimitiveID) {
|
||||
out << ",\n\tconst uint gl_PrimitiveID [[primitive_id]]";
|
||||
out << parameter_delimiter(is_first_parameter)
|
||||
<< "\n\tconst uint gl_PrimitiveID [[primitive_id]]";
|
||||
}
|
||||
|
||||
/* Barycentrics. */
|
||||
if (this->uses_barycentrics) {
|
||||
out << ",\n\tconst float3 mtl_barycentric_coord [[barycentric_coord]]";
|
||||
out << parameter_delimiter(is_first_parameter)
|
||||
<< "\n\tconst float3 mtl_barycentric_coord [[barycentric_coord]]";
|
||||
}
|
||||
return out.str();
|
||||
}
|
||||
|
||||
std::string MSLGeneratorInterface::generate_msl_compute_inputs_string()
|
||||
{
|
||||
bool is_first_parameter = true;
|
||||
std::stringstream out;
|
||||
out << "constant " << get_stage_class_name(ShaderStage::COMPUTE)
|
||||
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
|
||||
if (this->uniforms.size() > 0) {
|
||||
out << parameter_delimiter(is_first_parameter) << "constant "
|
||||
<< get_stage_class_name(ShaderStage::COMPUTE)
|
||||
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
|
||||
}
|
||||
|
||||
this->generate_msl_uniforms_input_string(out, ShaderStage::COMPUTE);
|
||||
this->generate_msl_uniforms_input_string(out, ShaderStage::COMPUTE, is_first_parameter);
|
||||
|
||||
/* Generate texture signatures. */
|
||||
this->generate_msl_textures_input_string(out, ShaderStage::COMPUTE);
|
||||
this->generate_msl_textures_input_string(out, ShaderStage::COMPUTE, is_first_parameter);
|
||||
|
||||
/* Entry point parameters for gl Globals. */
|
||||
if (this->uses_gl_GlobalInvocationID) {
|
||||
out << ",\n\tconst uint3 gl_GlobalInvocationID [[thread_position_in_grid]]";
|
||||
out << parameter_delimiter(is_first_parameter)
|
||||
<< "\n\tconst uint3 gl_GlobalInvocationID [[thread_position_in_grid]]";
|
||||
}
|
||||
if (this->uses_gl_WorkGroupID) {
|
||||
out << ",\n\tconst uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]";
|
||||
out << parameter_delimiter(is_first_parameter)
|
||||
<< "\n\tconst uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]";
|
||||
}
|
||||
if (this->uses_gl_NumWorkGroups) {
|
||||
out << ",\n\tconst uint3 gl_NumWorkGroups [[threadgroups_per_grid]]";
|
||||
out << parameter_delimiter(is_first_parameter)
|
||||
<< "\n\tconst uint3 gl_NumWorkGroups [[threadgroups_per_grid]]";
|
||||
}
|
||||
if (this->uses_gl_LocalInvocationIndex) {
|
||||
out << ",\n\tconst uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]";
|
||||
out << parameter_delimiter(is_first_parameter)
|
||||
<< "\n\tconst uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]";
|
||||
}
|
||||
if (this->uses_gl_LocalInvocationID) {
|
||||
out << ",\n\tconst uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]";
|
||||
out << parameter_delimiter(is_first_parameter)
|
||||
<< "\n\tconst uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]";
|
||||
}
|
||||
|
||||
return out.str();
|
||||
@ -2316,6 +2363,10 @@ std::string MSLGeneratorInterface::generate_msl_compute_inputs_string()
|
||||
|
||||
std::string MSLGeneratorInterface::generate_msl_uniform_structs(ShaderStage shader_stage)
|
||||
{
|
||||
/* Only generate PushConstantBlock if we have uniforms. */
|
||||
if (this->uniforms.size() == 0) {
|
||||
return "";
|
||||
}
|
||||
BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT);
|
||||
std::stringstream out;
|
||||
|
||||
@ -2624,6 +2675,9 @@ std::string MSLGeneratorInterface::generate_msl_fragment_out_struct()
|
||||
|
||||
std::string MSLGeneratorInterface::generate_msl_global_uniform_population(ShaderStage stage)
|
||||
{
|
||||
if (this->uniforms.size() == 0) {
|
||||
return "";
|
||||
}
|
||||
/* Populate Global Uniforms. */
|
||||
std::stringstream out;
|
||||
|
||||
|
@ -594,17 +594,6 @@ void gpu::MTLTexture::update_sub(
|
||||
}
|
||||
}
|
||||
|
||||
/* Prepare staging buffer for data. */
|
||||
id<MTLBuffer> staging_buffer = nil;
|
||||
uint64_t staging_buffer_offset = 0;
|
||||
|
||||
/* Fetch allocation from scratch buffer. */
|
||||
MTLTemporaryBuffer allocation =
|
||||
ctx->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(totalsize, 256);
|
||||
memcpy(allocation.data, data, totalsize);
|
||||
staging_buffer = allocation.metal_buffer;
|
||||
staging_buffer_offset = allocation.buffer_offset;
|
||||
|
||||
/* Common Properties. */
|
||||
MTLPixelFormat compatible_write_format = mtl_format_get_writeable_view_format(
|
||||
destination_format);
|
||||
@ -616,6 +605,12 @@ void gpu::MTLTexture::update_sub(
|
||||
return;
|
||||
}
|
||||
|
||||
/* Fetch allocation from memory pool. */
|
||||
MTLBuffer *temp_allocation = MTLContext::get_global_memory_manager()->allocate_with_data(
|
||||
totalsize, true, data);
|
||||
id<MTLBuffer> staging_buffer = temp_allocation->get_metal_buffer();
|
||||
BLI_assert(staging_buffer != nil);
|
||||
|
||||
/* Prepare command encoders. */
|
||||
id<MTLBlitCommandEncoder> blit_encoder = nil;
|
||||
id<MTLComputeCommandEncoder> compute_encoder = nil;
|
||||
@ -697,7 +692,7 @@ void gpu::MTLTexture::update_sub(
|
||||
int max_array_index = ((type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1);
|
||||
for (int array_index = 0; array_index < max_array_index; array_index++) {
|
||||
|
||||
int buffer_array_offset = staging_buffer_offset + (bytes_per_image * array_index);
|
||||
int buffer_array_offset = (bytes_per_image * array_index);
|
||||
[blit_encoder
|
||||
copyFromBuffer:staging_buffer
|
||||
sourceOffset:buffer_array_offset
|
||||
@ -727,7 +722,7 @@ void gpu::MTLTexture::update_sub(
|
||||
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
|
||||
cs.bind_pso(pso);
|
||||
cs.bind_compute_bytes(¶ms, sizeof(params), 0);
|
||||
cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1);
|
||||
cs.bind_compute_buffer(staging_buffer, 0, 1);
|
||||
cs.bind_compute_texture(texture_handle, 0);
|
||||
[compute_encoder
|
||||
dispatchThreads:MTLSizeMake(extent[0], 1, 1) /* Width, Height, Layer */
|
||||
@ -747,7 +742,7 @@ void gpu::MTLTexture::update_sub(
|
||||
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
|
||||
cs.bind_pso(pso);
|
||||
cs.bind_compute_bytes(¶ms, sizeof(params), 0);
|
||||
cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1);
|
||||
cs.bind_compute_buffer(staging_buffer, 0, 1);
|
||||
cs.bind_compute_texture(texture_handle, 0);
|
||||
[compute_encoder
|
||||
dispatchThreads:MTLSizeMake(extent[0], extent[1], 1) /* Width, layers, nil */
|
||||
@ -779,7 +774,7 @@ void gpu::MTLTexture::update_sub(
|
||||
}
|
||||
|
||||
[blit_encoder copyFromBuffer:staging_buffer
|
||||
sourceOffset:staging_buffer_offset + texture_array_relative_offset
|
||||
sourceOffset:texture_array_relative_offset
|
||||
sourceBytesPerRow:bytes_per_row
|
||||
sourceBytesPerImage:bytes_per_image
|
||||
sourceSize:MTLSizeMake(extent[0], extent[1], 1)
|
||||
@ -807,7 +802,7 @@ void gpu::MTLTexture::update_sub(
|
||||
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
|
||||
cs.bind_pso(pso);
|
||||
cs.bind_compute_bytes(¶ms, sizeof(params), 0);
|
||||
cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1);
|
||||
cs.bind_compute_buffer(staging_buffer, 0, 1);
|
||||
cs.bind_compute_texture(texture_handle, 0);
|
||||
[compute_encoder
|
||||
dispatchThreads:MTLSizeMake(
|
||||
@ -828,7 +823,7 @@ void gpu::MTLTexture::update_sub(
|
||||
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
|
||||
cs.bind_pso(pso);
|
||||
cs.bind_compute_bytes(¶ms, sizeof(params), 0);
|
||||
cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1);
|
||||
cs.bind_compute_buffer(staging_buffer, 0, 1);
|
||||
cs.bind_compute_texture(texture_handle, 0);
|
||||
[compute_encoder dispatchThreads:MTLSizeMake(extent[0],
|
||||
extent[1],
|
||||
@ -848,7 +843,7 @@ void gpu::MTLTexture::update_sub(
|
||||
ctx->pipeline_state.unpack_row_length);
|
||||
int bytes_per_image = bytes_per_row * extent[1];
|
||||
[blit_encoder copyFromBuffer:staging_buffer
|
||||
sourceOffset:staging_buffer_offset
|
||||
sourceOffset:0
|
||||
sourceBytesPerRow:bytes_per_row
|
||||
sourceBytesPerImage:bytes_per_image
|
||||
sourceSize:MTLSizeMake(extent[0], extent[1], extent[2])
|
||||
@ -871,7 +866,7 @@ void gpu::MTLTexture::update_sub(
|
||||
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
|
||||
cs.bind_pso(pso);
|
||||
cs.bind_compute_bytes(¶ms, sizeof(params), 0);
|
||||
cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1);
|
||||
cs.bind_compute_buffer(staging_buffer, 0, 1);
|
||||
cs.bind_compute_texture(texture_handle, 0);
|
||||
[compute_encoder
|
||||
dispatchThreads:MTLSizeMake(
|
||||
@ -896,7 +891,7 @@ void gpu::MTLTexture::update_sub(
|
||||
int face_index = offset[2] + i;
|
||||
|
||||
[blit_encoder copyFromBuffer:staging_buffer
|
||||
sourceOffset:staging_buffer_offset + texture_array_relative_offset
|
||||
sourceOffset:texture_array_relative_offset
|
||||
sourceBytesPerRow:bytes_per_row
|
||||
sourceBytesPerImage:bytes_per_image
|
||||
sourceSize:MTLSizeMake(extent[0], extent[1], 1)
|
||||
@ -930,7 +925,7 @@ void gpu::MTLTexture::update_sub(
|
||||
for (int i = 0; i < extent[2]; i++) {
|
||||
int face_index = offset[2] + i;
|
||||
[blit_encoder copyFromBuffer:staging_buffer
|
||||
sourceOffset:staging_buffer_offset + texture_array_relative_offset
|
||||
sourceOffset:texture_array_relative_offset
|
||||
sourceBytesPerRow:bytes_per_row
|
||||
sourceBytesPerImage:bytes_per_image
|
||||
sourceSize:MTLSizeMake(extent[0], extent[1], 1)
|
||||
@ -1058,6 +1053,11 @@ void gpu::MTLTexture::update_sub(
|
||||
|
||||
/* Decrement texture reference counts. This ensures temporary texture views are released. */
|
||||
[texture_handle release];
|
||||
|
||||
/* Release temporary staging buffer allocation.
|
||||
* NOTE: Allocation will be tracked with command submission and released once no longer in use.
|
||||
*/
|
||||
temp_allocation->free();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -402,9 +402,13 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl(
|
||||
options:options
|
||||
error:&error] autorelease];
|
||||
if (error) {
|
||||
NSLog(@"Compile Error - Metal Shader Library error %@ ", error);
|
||||
BLI_assert(false);
|
||||
return nullptr;
|
||||
/* Only exit out if genuine error and not warning. */
|
||||
if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
|
||||
NSNotFound) {
|
||||
NSLog(@"Compile Error - Metal Shader Library error %@ ", error);
|
||||
BLI_assert(false);
|
||||
return nil;
|
||||
}
|
||||
}
|
||||
|
||||
/* Fetch compute function. */
|
||||
@ -718,9 +722,13 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
|
||||
options:options
|
||||
error:&error] autorelease];
|
||||
if (error) {
|
||||
NSLog(@"Compile Error - Metal Shader Library error %@ ", error);
|
||||
BLI_assert(false);
|
||||
return nil;
|
||||
/* Only exit out if genuine error and not warning. */
|
||||
if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
|
||||
NSNotFound) {
|
||||
NSLog(@"Compile Error - Metal Shader Library error %@ ", error);
|
||||
BLI_assert(false);
|
||||
return nil;
|
||||
}
|
||||
}
|
||||
|
||||
/* Fetch compute function. */
|
||||
|
@ -7,6 +7,10 @@
|
||||
* and texture2d types in metal).
|
||||
*/
|
||||
|
||||
/* Suppress unhelpful shader compiler warnings. */
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
#pragma clang diagnostic ignored "-Wcomment"
|
||||
|
||||
/* Base instance with offsets. */
|
||||
#define gpu_BaseInstance gl_BaseInstanceARB
|
||||
#define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance)
|
||||
|
Loading…
Reference in New Issue
Block a user