forked from blender/blender
main sync #3
@ -147,6 +147,18 @@ void immDrawPixelsTexTiled_scaling_clipping(IMMDrawPixelsTexState *state,
|
|||||||
const float color[4])
|
const float color[4])
|
||||||
{
|
{
|
||||||
int subpart_x, subpart_y, tex_w = 256, tex_h = 256;
|
int subpart_x, subpart_y, tex_w = 256, tex_h = 256;
|
||||||
|
#ifdef __APPLE__
|
||||||
|
if (GPU_backend_get_type() == GPU_BACKEND_METAL) {
|
||||||
|
/* NOTE(Metal): The Metal backend will keep all temporary texture memory within a command
|
||||||
|
* submission in-flight, so using a partial tile size does not provide any tangible memory
|
||||||
|
* reduction, but does incur additional API overhead and significant cache inefficiency on AMD
|
||||||
|
* platforms.
|
||||||
|
* The Metal API also provides smart resource paging such that the application can
|
||||||
|
* still efficiently swap memory, even if system is low in physical memory. */
|
||||||
|
tex_w = img_w;
|
||||||
|
tex_h = img_h;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
int seamless, offset_x, offset_y, nsubparts_x, nsubparts_y;
|
int seamless, offset_x, offset_y, nsubparts_x, nsubparts_y;
|
||||||
int components;
|
int components;
|
||||||
const bool use_clipping = ((clip_min_x < clip_max_x) && (clip_min_y < clip_max_y));
|
const bool use_clipping = ((clip_min_x < clip_max_x) && (clip_min_y < clip_max_y));
|
||||||
|
@ -99,6 +99,8 @@ struct GPUPass {
|
|||||||
/** Hint that an optimized variant of this pass should be created based on a complexity heuristic
|
/** Hint that an optimized variant of this pass should be created based on a complexity heuristic
|
||||||
* during pass code generation. */
|
* during pass code generation. */
|
||||||
bool should_optimize;
|
bool should_optimize;
|
||||||
|
/** Whether pass is in the GPUPass cache. */
|
||||||
|
bool cached;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* -------------------------------------------------------------------- */
|
/* -------------------------------------------------------------------- */
|
||||||
@ -132,6 +134,7 @@ static GPUPass *gpu_pass_cache_lookup(uint32_t hash)
|
|||||||
static void gpu_pass_cache_insert_after(GPUPass *node, GPUPass *pass)
|
static void gpu_pass_cache_insert_after(GPUPass *node, GPUPass *pass)
|
||||||
{
|
{
|
||||||
BLI_spin_lock(&pass_cache_spin);
|
BLI_spin_lock(&pass_cache_spin);
|
||||||
|
pass->cached = true;
|
||||||
if (node != nullptr) {
|
if (node != nullptr) {
|
||||||
/* Add after the first pass having the same hash. */
|
/* Add after the first pass having the same hash. */
|
||||||
pass->next = node->next;
|
pass->next = node->next;
|
||||||
@ -775,6 +778,7 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
|
|||||||
pass->create_info = codegen.create_info;
|
pass->create_info = codegen.create_info;
|
||||||
pass->hash = codegen.hash_get();
|
pass->hash = codegen.hash_get();
|
||||||
pass->compiled = false;
|
pass->compiled = false;
|
||||||
|
pass->cached = false;
|
||||||
/* Only flag pass optimization hint if this is the first generated pass for a material.
|
/* Only flag pass optimization hint if this is the first generated pass for a material.
|
||||||
* Optimized passes cannot be optimized further, even if the heuristic is still not
|
* Optimized passes cannot be optimized further, even if the heuristic is still not
|
||||||
* favorable. */
|
* favorable. */
|
||||||
@ -881,14 +885,6 @@ GPUShader *GPU_pass_shader_get(GPUPass *pass)
|
|||||||
return pass->shader;
|
return pass->shader;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_pass_release(GPUPass *pass)
|
|
||||||
{
|
|
||||||
BLI_spin_lock(&pass_cache_spin);
|
|
||||||
BLI_assert(pass->refcount > 0);
|
|
||||||
pass->refcount--;
|
|
||||||
BLI_spin_unlock(&pass_cache_spin);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void gpu_pass_free(GPUPass *pass)
|
static void gpu_pass_free(GPUPass *pass)
|
||||||
{
|
{
|
||||||
BLI_assert(pass->refcount == 0);
|
BLI_assert(pass->refcount == 0);
|
||||||
@ -899,6 +895,18 @@ static void gpu_pass_free(GPUPass *pass)
|
|||||||
MEM_freeN(pass);
|
MEM_freeN(pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPU_pass_release(GPUPass *pass)
|
||||||
|
{
|
||||||
|
BLI_spin_lock(&pass_cache_spin);
|
||||||
|
BLI_assert(pass->refcount > 0);
|
||||||
|
pass->refcount--;
|
||||||
|
/* Un-cached passes will not be filtered by garbage collection, so release here. */
|
||||||
|
if (pass->refcount == 0 && !pass->cached) {
|
||||||
|
gpu_pass_free(pass);
|
||||||
|
}
|
||||||
|
BLI_spin_unlock(&pass_cache_spin);
|
||||||
|
}
|
||||||
|
|
||||||
void GPU_pass_cache_garbage_collect(void)
|
void GPU_pass_cache_garbage_collect(void)
|
||||||
{
|
{
|
||||||
static int lasttime = 0;
|
static int lasttime = 0;
|
||||||
|
@ -288,17 +288,17 @@ class MTLSafeFreeList {
|
|||||||
std::atomic<bool> in_free_queue_;
|
std::atomic<bool> in_free_queue_;
|
||||||
std::atomic<bool> referenced_by_workload_;
|
std::atomic<bool> referenced_by_workload_;
|
||||||
std::recursive_mutex lock_;
|
std::recursive_mutex lock_;
|
||||||
|
|
||||||
/* Linked list of next MTLSafeFreeList chunk if current chunk is full. */
|
/* Linked list of next MTLSafeFreeList chunk if current chunk is full. */
|
||||||
std::atomic<int> has_next_pool_;
|
|
||||||
std::atomic<MTLSafeFreeList *> next_;
|
std::atomic<MTLSafeFreeList *> next_;
|
||||||
|
|
||||||
/* Lockless list. MAX_NUM_BUFFERS_ within a chunk based on considerations
|
/* Lockless list. MAX_NUM_BUFFERS_ within a chunk based on considerations
|
||||||
* for performance and memory.
|
* for performance and memory. Higher chunk counts are preferable for efficiently
|
||||||
|
* performing block operations such as copying several objects simultaneously.
|
||||||
|
*
|
||||||
* MIN_BUFFER_FLUSH_COUNT refers to the minimum count of buffers in the MTLSafeFreeList
|
* MIN_BUFFER_FLUSH_COUNT refers to the minimum count of buffers in the MTLSafeFreeList
|
||||||
* before buffers are returned to global memory pool. This is set at a point to reduce
|
* before buffers are returned to global memory pool. This is set at a point to reduce
|
||||||
* overhead of small pool flushes, while ensuring floating memory overhead is not excessive. */
|
* overhead of small pool flushes, while ensuring floating memory overhead is not excessive. */
|
||||||
static const int MAX_NUM_BUFFERS_ = 1024;
|
static const int MAX_NUM_BUFFERS_ = 8192;
|
||||||
static const int MIN_BUFFER_FLUSH_COUNT = 120;
|
static const int MIN_BUFFER_FLUSH_COUNT = 120;
|
||||||
std::atomic<int> current_list_index_;
|
std::atomic<int> current_list_index_;
|
||||||
gpu::MTLBuffer *safe_free_pool_[MAX_NUM_BUFFERS_];
|
gpu::MTLBuffer *safe_free_pool_[MAX_NUM_BUFFERS_];
|
||||||
@ -306,8 +306,8 @@ class MTLSafeFreeList {
|
|||||||
public:
|
public:
|
||||||
MTLSafeFreeList();
|
MTLSafeFreeList();
|
||||||
|
|
||||||
/* Add buffer to Safe Free List, can be called from secondary threads.
|
/* Can be used from multiple threads. Performs insertion into Safe Free List with the least
|
||||||
* Performs a lockless list insert. */
|
* amount of threading synchronization. */
|
||||||
void insert_buffer(gpu::MTLBuffer *buffer);
|
void insert_buffer(gpu::MTLBuffer *buffer);
|
||||||
|
|
||||||
/* Whether we need to start a new safe free list, or can carry on using the existing one. */
|
/* Whether we need to start a new safe free list, or can carry on using the existing one. */
|
||||||
@ -322,10 +322,11 @@ class MTLSafeFreeList {
|
|||||||
void flag_in_queue()
|
void flag_in_queue()
|
||||||
{
|
{
|
||||||
in_free_queue_ = true;
|
in_free_queue_ = true;
|
||||||
if (has_next_pool_) {
|
if (current_list_index_ >= MTLSafeFreeList::MAX_NUM_BUFFERS_) {
|
||||||
MTLSafeFreeList *next_pool = next_.load();
|
MTLSafeFreeList *next_pool = next_.load();
|
||||||
BLI_assert(next_pool != nullptr);
|
if (next_pool) {
|
||||||
next_pool->flag_in_queue();
|
next_pool->flag_in_queue();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -257,10 +257,7 @@ void MTLBufferPool::update_memory_pools()
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Fetch next MTLSafeFreeList chunk, if any. */
|
/* Fetch next MTLSafeFreeList chunk, if any. */
|
||||||
MTLSafeFreeList *next_list = nullptr;
|
MTLSafeFreeList *next_list = current_pool->next_.load();
|
||||||
if (current_pool->has_next_pool_ > 0) {
|
|
||||||
next_list = current_pool->next_.load();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Delete current MTLSafeFreeList */
|
/* Delete current MTLSafeFreeList */
|
||||||
current_pool->lock_.unlock();
|
current_pool->lock_.unlock();
|
||||||
@ -396,7 +393,6 @@ MTLSafeFreeList::MTLSafeFreeList()
|
|||||||
in_free_queue_ = false;
|
in_free_queue_ = false;
|
||||||
current_list_index_ = 0;
|
current_list_index_ = 0;
|
||||||
next_ = nullptr;
|
next_ = nullptr;
|
||||||
has_next_pool_ = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void MTLSafeFreeList::insert_buffer(gpu::MTLBuffer *buffer)
|
void MTLSafeFreeList::insert_buffer(gpu::MTLBuffer *buffer)
|
||||||
@ -410,12 +406,19 @@ void MTLSafeFreeList::insert_buffer(gpu::MTLBuffer *buffer)
|
|||||||
* insert the buffer into the next available chunk. */
|
* insert the buffer into the next available chunk. */
|
||||||
if (insert_index >= MTLSafeFreeList::MAX_NUM_BUFFERS_) {
|
if (insert_index >= MTLSafeFreeList::MAX_NUM_BUFFERS_) {
|
||||||
|
|
||||||
/* Check if first caller to generate next pool. */
|
/* Check if first caller to generate next pool in chain.
|
||||||
int has_next = has_next_pool_++;
|
* Otherwise, ensure pool exists or wait for first caller to create next pool. */
|
||||||
if (has_next == 0) {
|
|
||||||
next_ = new MTLSafeFreeList();
|
|
||||||
}
|
|
||||||
MTLSafeFreeList *next_list = next_.load();
|
MTLSafeFreeList *next_list = next_.load();
|
||||||
|
|
||||||
|
if (!next_list) {
|
||||||
|
std::unique_lock lock(lock_);
|
||||||
|
|
||||||
|
next_list = next_.load();
|
||||||
|
if (!next_list) {
|
||||||
|
next_list = new MTLSafeFreeList();
|
||||||
|
next_.store(next_list);
|
||||||
|
}
|
||||||
|
}
|
||||||
BLI_assert(next_list);
|
BLI_assert(next_list);
|
||||||
next_list->insert_buffer(buffer);
|
next_list->insert_buffer(buffer);
|
||||||
|
|
||||||
|
@ -490,8 +490,12 @@ class MSLGeneratorInterface {
|
|||||||
std::string generate_msl_uniform_undefs(ShaderStage stage);
|
std::string generate_msl_uniform_undefs(ShaderStage stage);
|
||||||
std::string generate_ubo_block_undef_chain(ShaderStage stage);
|
std::string generate_ubo_block_undef_chain(ShaderStage stage);
|
||||||
std::string generate_msl_texture_vars(ShaderStage shader_stage);
|
std::string generate_msl_texture_vars(ShaderStage shader_stage);
|
||||||
void generate_msl_textures_input_string(std::stringstream &out, ShaderStage stage);
|
void generate_msl_textures_input_string(std::stringstream &out,
|
||||||
void generate_msl_uniforms_input_string(std::stringstream &out, ShaderStage stage);
|
ShaderStage stage,
|
||||||
|
bool &is_first_parameter);
|
||||||
|
void generate_msl_uniforms_input_string(std::stringstream &out,
|
||||||
|
ShaderStage stage,
|
||||||
|
bool &is_first_parameter);
|
||||||
|
|
||||||
/* Location is not always specified, so this will resolve outstanding locations. */
|
/* Location is not always specified, so this will resolve outstanding locations. */
|
||||||
void resolve_input_attribute_locations();
|
void resolve_input_attribute_locations();
|
||||||
|
@ -2145,8 +2145,20 @@ std::string MSLGeneratorInterface::generate_msl_compute_entry_stub()
|
|||||||
return out.str();
|
return out.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If first parameter in function signature, do not print out a comma.
|
||||||
|
* Update first parameter flag to false for future invocations. */
|
||||||
|
static char parameter_delimiter(bool &is_first_parameter)
|
||||||
|
{
|
||||||
|
if (is_first_parameter) {
|
||||||
|
is_first_parameter = false;
|
||||||
|
return ' ';
|
||||||
|
}
|
||||||
|
return ',';
|
||||||
|
}
|
||||||
|
|
||||||
void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream &out,
|
void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream &out,
|
||||||
ShaderStage stage)
|
ShaderStage stage,
|
||||||
|
bool &is_first_parameter)
|
||||||
{
|
{
|
||||||
/* Note: Shader stage must be specified as the singular stage index for which the input
|
/* Note: Shader stage must be specified as the singular stage index for which the input
|
||||||
* is generating. Compound stages are not valid inputs. */
|
* is generating. Compound stages are not valid inputs. */
|
||||||
@ -2156,7 +2168,8 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
|
|||||||
BLI_assert(this->texture_samplers.size() <= GPU_max_textures_vert());
|
BLI_assert(this->texture_samplers.size() <= GPU_max_textures_vert());
|
||||||
for (const MSLTextureSampler &tex : this->texture_samplers) {
|
for (const MSLTextureSampler &tex : this->texture_samplers) {
|
||||||
if (bool(tex.stage & stage)) {
|
if (bool(tex.stage & stage)) {
|
||||||
out << ",\n\t" << tex.get_msl_typestring(false) << " [[texture(" << tex.location << ")]]";
|
out << parameter_delimiter(is_first_parameter) << "\n\t" << tex.get_msl_typestring(false)
|
||||||
|
<< " [[texture(" << tex.location << ")]]";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2166,7 +2179,8 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
|
|||||||
* If we exceed the hardware-supported limit, then follow a bind-less model using argument
|
* If we exceed the hardware-supported limit, then follow a bind-less model using argument
|
||||||
* buffers. */
|
* buffers. */
|
||||||
if (this->use_argument_buffer_for_samplers()) {
|
if (this->use_argument_buffer_for_samplers()) {
|
||||||
out << ",\n\tconstant SStruct& samplers [[buffer(MTL_uniform_buffer_base_index+"
|
out << parameter_delimiter(is_first_parameter)
|
||||||
|
<< "\n\tconstant SStruct& samplers [[buffer(MTL_uniform_buffer_base_index+"
|
||||||
<< (this->get_sampler_argument_buffer_bind_index(stage)) << ")]]";
|
<< (this->get_sampler_argument_buffer_bind_index(stage)) << ")]]";
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -2175,7 +2189,8 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
|
|||||||
BLI_assert(this->texture_samplers.size() <= MTL_MAX_DEFAULT_SAMPLERS);
|
BLI_assert(this->texture_samplers.size() <= MTL_MAX_DEFAULT_SAMPLERS);
|
||||||
for (const MSLTextureSampler &tex : this->texture_samplers) {
|
for (const MSLTextureSampler &tex : this->texture_samplers) {
|
||||||
if (bool(tex.stage & stage)) {
|
if (bool(tex.stage & stage)) {
|
||||||
out << ",\n\tsampler " << tex.name << "_sampler [[sampler(" << tex.location << ")]]";
|
out << parameter_delimiter(is_first_parameter) << "\n\tsampler " << tex.name
|
||||||
|
<< "_sampler [[sampler(" << tex.location << ")]]";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2189,12 +2204,13 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
|
|||||||
}
|
}
|
||||||
|
|
||||||
void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream &out,
|
void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream &out,
|
||||||
ShaderStage stage)
|
ShaderStage stage,
|
||||||
|
bool &is_first_parameter)
|
||||||
{
|
{
|
||||||
for (const MSLUniformBlock &ubo : this->uniform_blocks) {
|
for (const MSLUniformBlock &ubo : this->uniform_blocks) {
|
||||||
if (bool(ubo.stage & stage)) {
|
if (bool(ubo.stage & stage)) {
|
||||||
/* For literal/existing global types, we do not need the class name-space accessor. */
|
/* For literal/existing global types, we do not need the class name-space accessor. */
|
||||||
out << ",\n\tconstant ";
|
out << parameter_delimiter(is_first_parameter) << "\n\tconstant ";
|
||||||
if (!is_builtin_type(ubo.type_name)) {
|
if (!is_builtin_type(ubo.type_name)) {
|
||||||
out << get_stage_class_name(stage) << "::";
|
out << get_stage_class_name(stage) << "::";
|
||||||
}
|
}
|
||||||
@ -2211,104 +2227,135 @@ void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream
|
|||||||
std::string MSLGeneratorInterface::generate_msl_vertex_inputs_string()
|
std::string MSLGeneratorInterface::generate_msl_vertex_inputs_string()
|
||||||
{
|
{
|
||||||
std::stringstream out;
|
std::stringstream out;
|
||||||
|
bool is_first_parameter = true;
|
||||||
|
|
||||||
if (this->uses_ssbo_vertex_fetch_mode) {
|
if (this->uses_ssbo_vertex_fetch_mode) {
|
||||||
/* Vertex Buffers bound as raw buffers. */
|
/* Vertex Buffers bound as raw buffers. */
|
||||||
for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
|
for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
|
||||||
out << "\tconstant uchar* MTL_VERTEX_DATA_" << i << " [[buffer(" << i << ")]],\n";
|
out << parameter_delimiter(is_first_parameter) << "\tconstant uchar* MTL_VERTEX_DATA_" << i
|
||||||
|
<< " [[buffer(" << i << ")]]\n";
|
||||||
}
|
}
|
||||||
out << "\tconstant ushort* MTL_INDEX_DATA[[buffer(MTL_SSBO_VERTEX_FETCH_IBO_INDEX)]],";
|
out << parameter_delimiter(is_first_parameter)
|
||||||
|
<< "\tconstant ushort* MTL_INDEX_DATA[[buffer(MTL_SSBO_VERTEX_FETCH_IBO_INDEX)]]";
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (this->vertex_input_attributes.size() > 0) {
|
if (this->vertex_input_attributes.size() > 0) {
|
||||||
/* Vertex Buffers use input assembly. */
|
/* Vertex Buffers use input assembly. */
|
||||||
out << get_stage_class_name(ShaderStage::VERTEX) << "::VertexIn v_in [[stage_in]],";
|
out << get_stage_class_name(ShaderStage::VERTEX) << "::VertexIn v_in [[stage_in]]";
|
||||||
|
is_first_parameter = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out << "\n\tconstant " << get_stage_class_name(ShaderStage::VERTEX)
|
|
||||||
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
|
|
||||||
|
|
||||||
this->generate_msl_uniforms_input_string(out, ShaderStage::VERTEX);
|
if (this->uniforms.size() > 0) {
|
||||||
|
out << parameter_delimiter(is_first_parameter) << "\n\tconstant "
|
||||||
|
<< get_stage_class_name(ShaderStage::VERTEX)
|
||||||
|
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
|
||||||
|
is_first_parameter = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
this->generate_msl_uniforms_input_string(out, ShaderStage::VERTEX, is_first_parameter);
|
||||||
|
|
||||||
/* Transform feedback buffer binding. */
|
/* Transform feedback buffer binding. */
|
||||||
if (this->uses_transform_feedback) {
|
if (this->uses_transform_feedback) {
|
||||||
out << ",\n\tdevice " << get_stage_class_name(ShaderStage::VERTEX)
|
out << parameter_delimiter(is_first_parameter) << "\n\tdevice "
|
||||||
|
<< get_stage_class_name(ShaderStage::VERTEX)
|
||||||
<< "::VertexOut_TF* "
|
<< "::VertexOut_TF* "
|
||||||
"transform_feedback_results[[buffer(MTL_transform_feedback_buffer_index)]]";
|
"transform_feedback_results[[buffer(MTL_transform_feedback_buffer_index)]]";
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Generate texture signatures. */
|
/* Generate texture signatures. */
|
||||||
this->generate_msl_textures_input_string(out, ShaderStage::VERTEX);
|
this->generate_msl_textures_input_string(out, ShaderStage::VERTEX, is_first_parameter);
|
||||||
|
|
||||||
/* Entry point parameters for gl Globals. */
|
/* Entry point parameters for gl Globals. */
|
||||||
if (this->uses_gl_VertexID) {
|
if (this->uses_gl_VertexID) {
|
||||||
out << ",\n\tconst uint32_t gl_VertexID [[vertex_id]]";
|
out << parameter_delimiter(is_first_parameter)
|
||||||
|
<< "\n\tconst uint32_t gl_VertexID [[vertex_id]]";
|
||||||
}
|
}
|
||||||
if (this->uses_gl_InstanceID) {
|
if (this->uses_gl_InstanceID) {
|
||||||
out << ",\n\tconst uint32_t gl_InstanceID [[instance_id]]";
|
out << parameter_delimiter(is_first_parameter)
|
||||||
|
<< "\n\tconst uint32_t gl_InstanceID [[instance_id]]";
|
||||||
}
|
}
|
||||||
if (this->uses_gl_BaseInstanceARB) {
|
if (this->uses_gl_BaseInstanceARB) {
|
||||||
out << ",\n\tconst uint32_t gl_BaseInstanceARB [[base_instance]]";
|
out << parameter_delimiter(is_first_parameter)
|
||||||
|
<< "\n\tconst uint32_t gl_BaseInstanceARB [[base_instance]]";
|
||||||
}
|
}
|
||||||
return out.str();
|
return out.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string MSLGeneratorInterface::generate_msl_fragment_inputs_string()
|
std::string MSLGeneratorInterface::generate_msl_fragment_inputs_string()
|
||||||
{
|
{
|
||||||
|
bool is_first_parameter = true;
|
||||||
std::stringstream out;
|
std::stringstream out;
|
||||||
out << get_stage_class_name(ShaderStage::FRAGMENT)
|
out << parameter_delimiter(is_first_parameter) << get_stage_class_name(ShaderStage::FRAGMENT)
|
||||||
<< "::VertexOut v_in [[stage_in]],\n\tconstant "
|
<< "::VertexOut v_in [[stage_in]]";
|
||||||
<< get_stage_class_name(ShaderStage::FRAGMENT)
|
|
||||||
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
|
|
||||||
|
|
||||||
this->generate_msl_uniforms_input_string(out, ShaderStage::FRAGMENT);
|
if (this->uniforms.size() > 0) {
|
||||||
|
out << parameter_delimiter(is_first_parameter) << "\n\tconstant "
|
||||||
|
<< get_stage_class_name(ShaderStage::FRAGMENT)
|
||||||
|
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
|
||||||
|
}
|
||||||
|
|
||||||
|
this->generate_msl_uniforms_input_string(out, ShaderStage::FRAGMENT, is_first_parameter);
|
||||||
|
|
||||||
/* Generate texture signatures. */
|
/* Generate texture signatures. */
|
||||||
this->generate_msl_textures_input_string(out, ShaderStage::FRAGMENT);
|
this->generate_msl_textures_input_string(out, ShaderStage::FRAGMENT, is_first_parameter);
|
||||||
|
|
||||||
if (this->uses_gl_PointCoord) {
|
if (this->uses_gl_PointCoord) {
|
||||||
out << ",\n\tconst float2 gl_PointCoord [[point_coord]]";
|
out << parameter_delimiter(is_first_parameter)
|
||||||
|
<< "\n\tconst float2 gl_PointCoord [[point_coord]]";
|
||||||
}
|
}
|
||||||
if (this->uses_gl_FrontFacing) {
|
if (this->uses_gl_FrontFacing) {
|
||||||
out << ",\n\tconst MTLBOOL gl_FrontFacing [[front_facing]]";
|
out << parameter_delimiter(is_first_parameter)
|
||||||
|
<< "\n\tconst MTLBOOL gl_FrontFacing [[front_facing]]";
|
||||||
}
|
}
|
||||||
if (this->uses_gl_PrimitiveID) {
|
if (this->uses_gl_PrimitiveID) {
|
||||||
out << ",\n\tconst uint gl_PrimitiveID [[primitive_id]]";
|
out << parameter_delimiter(is_first_parameter)
|
||||||
|
<< "\n\tconst uint gl_PrimitiveID [[primitive_id]]";
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Barycentrics. */
|
/* Barycentrics. */
|
||||||
if (this->uses_barycentrics) {
|
if (this->uses_barycentrics) {
|
||||||
out << ",\n\tconst float3 mtl_barycentric_coord [[barycentric_coord]]";
|
out << parameter_delimiter(is_first_parameter)
|
||||||
|
<< "\n\tconst float3 mtl_barycentric_coord [[barycentric_coord]]";
|
||||||
}
|
}
|
||||||
return out.str();
|
return out.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string MSLGeneratorInterface::generate_msl_compute_inputs_string()
|
std::string MSLGeneratorInterface::generate_msl_compute_inputs_string()
|
||||||
{
|
{
|
||||||
|
bool is_first_parameter = true;
|
||||||
std::stringstream out;
|
std::stringstream out;
|
||||||
out << "constant " << get_stage_class_name(ShaderStage::COMPUTE)
|
if (this->uniforms.size() > 0) {
|
||||||
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
|
out << parameter_delimiter(is_first_parameter) << "constant "
|
||||||
|
<< get_stage_class_name(ShaderStage::COMPUTE)
|
||||||
|
<< "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
|
||||||
|
}
|
||||||
|
|
||||||
this->generate_msl_uniforms_input_string(out, ShaderStage::COMPUTE);
|
this->generate_msl_uniforms_input_string(out, ShaderStage::COMPUTE, is_first_parameter);
|
||||||
|
|
||||||
/* Generate texture signatures. */
|
/* Generate texture signatures. */
|
||||||
this->generate_msl_textures_input_string(out, ShaderStage::COMPUTE);
|
this->generate_msl_textures_input_string(out, ShaderStage::COMPUTE, is_first_parameter);
|
||||||
|
|
||||||
/* Entry point parameters for gl Globals. */
|
/* Entry point parameters for gl Globals. */
|
||||||
if (this->uses_gl_GlobalInvocationID) {
|
if (this->uses_gl_GlobalInvocationID) {
|
||||||
out << ",\n\tconst uint3 gl_GlobalInvocationID [[thread_position_in_grid]]";
|
out << parameter_delimiter(is_first_parameter)
|
||||||
|
<< "\n\tconst uint3 gl_GlobalInvocationID [[thread_position_in_grid]]";
|
||||||
}
|
}
|
||||||
if (this->uses_gl_WorkGroupID) {
|
if (this->uses_gl_WorkGroupID) {
|
||||||
out << ",\n\tconst uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]";
|
out << parameter_delimiter(is_first_parameter)
|
||||||
|
<< "\n\tconst uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]";
|
||||||
}
|
}
|
||||||
if (this->uses_gl_NumWorkGroups) {
|
if (this->uses_gl_NumWorkGroups) {
|
||||||
out << ",\n\tconst uint3 gl_NumWorkGroups [[threadgroups_per_grid]]";
|
out << parameter_delimiter(is_first_parameter)
|
||||||
|
<< "\n\tconst uint3 gl_NumWorkGroups [[threadgroups_per_grid]]";
|
||||||
}
|
}
|
||||||
if (this->uses_gl_LocalInvocationIndex) {
|
if (this->uses_gl_LocalInvocationIndex) {
|
||||||
out << ",\n\tconst uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]";
|
out << parameter_delimiter(is_first_parameter)
|
||||||
|
<< "\n\tconst uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]";
|
||||||
}
|
}
|
||||||
if (this->uses_gl_LocalInvocationID) {
|
if (this->uses_gl_LocalInvocationID) {
|
||||||
out << ",\n\tconst uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]";
|
out << parameter_delimiter(is_first_parameter)
|
||||||
|
<< "\n\tconst uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]";
|
||||||
}
|
}
|
||||||
|
|
||||||
return out.str();
|
return out.str();
|
||||||
@ -2316,6 +2363,10 @@ std::string MSLGeneratorInterface::generate_msl_compute_inputs_string()
|
|||||||
|
|
||||||
std::string MSLGeneratorInterface::generate_msl_uniform_structs(ShaderStage shader_stage)
|
std::string MSLGeneratorInterface::generate_msl_uniform_structs(ShaderStage shader_stage)
|
||||||
{
|
{
|
||||||
|
/* Only generate PushConstantBlock if we have uniforms. */
|
||||||
|
if (this->uniforms.size() == 0) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT);
|
BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT);
|
||||||
std::stringstream out;
|
std::stringstream out;
|
||||||
|
|
||||||
@ -2624,6 +2675,9 @@ std::string MSLGeneratorInterface::generate_msl_fragment_out_struct()
|
|||||||
|
|
||||||
std::string MSLGeneratorInterface::generate_msl_global_uniform_population(ShaderStage stage)
|
std::string MSLGeneratorInterface::generate_msl_global_uniform_population(ShaderStage stage)
|
||||||
{
|
{
|
||||||
|
if (this->uniforms.size() == 0) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
/* Populate Global Uniforms. */
|
/* Populate Global Uniforms. */
|
||||||
std::stringstream out;
|
std::stringstream out;
|
||||||
|
|
||||||
|
@ -594,17 +594,6 @@ void gpu::MTLTexture::update_sub(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Prepare staging buffer for data. */
|
|
||||||
id<MTLBuffer> staging_buffer = nil;
|
|
||||||
uint64_t staging_buffer_offset = 0;
|
|
||||||
|
|
||||||
/* Fetch allocation from scratch buffer. */
|
|
||||||
MTLTemporaryBuffer allocation =
|
|
||||||
ctx->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(totalsize, 256);
|
|
||||||
memcpy(allocation.data, data, totalsize);
|
|
||||||
staging_buffer = allocation.metal_buffer;
|
|
||||||
staging_buffer_offset = allocation.buffer_offset;
|
|
||||||
|
|
||||||
/* Common Properties. */
|
/* Common Properties. */
|
||||||
MTLPixelFormat compatible_write_format = mtl_format_get_writeable_view_format(
|
MTLPixelFormat compatible_write_format = mtl_format_get_writeable_view_format(
|
||||||
destination_format);
|
destination_format);
|
||||||
@ -616,6 +605,12 @@ void gpu::MTLTexture::update_sub(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Fetch allocation from memory pool. */
|
||||||
|
MTLBuffer *temp_allocation = MTLContext::get_global_memory_manager()->allocate_with_data(
|
||||||
|
totalsize, true, data);
|
||||||
|
id<MTLBuffer> staging_buffer = temp_allocation->get_metal_buffer();
|
||||||
|
BLI_assert(staging_buffer != nil);
|
||||||
|
|
||||||
/* Prepare command encoders. */
|
/* Prepare command encoders. */
|
||||||
id<MTLBlitCommandEncoder> blit_encoder = nil;
|
id<MTLBlitCommandEncoder> blit_encoder = nil;
|
||||||
id<MTLComputeCommandEncoder> compute_encoder = nil;
|
id<MTLComputeCommandEncoder> compute_encoder = nil;
|
||||||
@ -697,7 +692,7 @@ void gpu::MTLTexture::update_sub(
|
|||||||
int max_array_index = ((type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1);
|
int max_array_index = ((type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1);
|
||||||
for (int array_index = 0; array_index < max_array_index; array_index++) {
|
for (int array_index = 0; array_index < max_array_index; array_index++) {
|
||||||
|
|
||||||
int buffer_array_offset = staging_buffer_offset + (bytes_per_image * array_index);
|
int buffer_array_offset = (bytes_per_image * array_index);
|
||||||
[blit_encoder
|
[blit_encoder
|
||||||
copyFromBuffer:staging_buffer
|
copyFromBuffer:staging_buffer
|
||||||
sourceOffset:buffer_array_offset
|
sourceOffset:buffer_array_offset
|
||||||
@ -727,7 +722,7 @@ void gpu::MTLTexture::update_sub(
|
|||||||
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
|
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
|
||||||
cs.bind_pso(pso);
|
cs.bind_pso(pso);
|
||||||
cs.bind_compute_bytes(¶ms, sizeof(params), 0);
|
cs.bind_compute_bytes(¶ms, sizeof(params), 0);
|
||||||
cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1);
|
cs.bind_compute_buffer(staging_buffer, 0, 1);
|
||||||
cs.bind_compute_texture(texture_handle, 0);
|
cs.bind_compute_texture(texture_handle, 0);
|
||||||
[compute_encoder
|
[compute_encoder
|
||||||
dispatchThreads:MTLSizeMake(extent[0], 1, 1) /* Width, Height, Layer */
|
dispatchThreads:MTLSizeMake(extent[0], 1, 1) /* Width, Height, Layer */
|
||||||
@ -747,7 +742,7 @@ void gpu::MTLTexture::update_sub(
|
|||||||
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
|
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
|
||||||
cs.bind_pso(pso);
|
cs.bind_pso(pso);
|
||||||
cs.bind_compute_bytes(¶ms, sizeof(params), 0);
|
cs.bind_compute_bytes(¶ms, sizeof(params), 0);
|
||||||
cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1);
|
cs.bind_compute_buffer(staging_buffer, 0, 1);
|
||||||
cs.bind_compute_texture(texture_handle, 0);
|
cs.bind_compute_texture(texture_handle, 0);
|
||||||
[compute_encoder
|
[compute_encoder
|
||||||
dispatchThreads:MTLSizeMake(extent[0], extent[1], 1) /* Width, layers, nil */
|
dispatchThreads:MTLSizeMake(extent[0], extent[1], 1) /* Width, layers, nil */
|
||||||
@ -779,7 +774,7 @@ void gpu::MTLTexture::update_sub(
|
|||||||
}
|
}
|
||||||
|
|
||||||
[blit_encoder copyFromBuffer:staging_buffer
|
[blit_encoder copyFromBuffer:staging_buffer
|
||||||
sourceOffset:staging_buffer_offset + texture_array_relative_offset
|
sourceOffset:texture_array_relative_offset
|
||||||
sourceBytesPerRow:bytes_per_row
|
sourceBytesPerRow:bytes_per_row
|
||||||
sourceBytesPerImage:bytes_per_image
|
sourceBytesPerImage:bytes_per_image
|
||||||
sourceSize:MTLSizeMake(extent[0], extent[1], 1)
|
sourceSize:MTLSizeMake(extent[0], extent[1], 1)
|
||||||
@ -807,7 +802,7 @@ void gpu::MTLTexture::update_sub(
|
|||||||
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
|
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
|
||||||
cs.bind_pso(pso);
|
cs.bind_pso(pso);
|
||||||
cs.bind_compute_bytes(¶ms, sizeof(params), 0);
|
cs.bind_compute_bytes(¶ms, sizeof(params), 0);
|
||||||
cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1);
|
cs.bind_compute_buffer(staging_buffer, 0, 1);
|
||||||
cs.bind_compute_texture(texture_handle, 0);
|
cs.bind_compute_texture(texture_handle, 0);
|
||||||
[compute_encoder
|
[compute_encoder
|
||||||
dispatchThreads:MTLSizeMake(
|
dispatchThreads:MTLSizeMake(
|
||||||
@ -828,7 +823,7 @@ void gpu::MTLTexture::update_sub(
|
|||||||
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
|
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
|
||||||
cs.bind_pso(pso);
|
cs.bind_pso(pso);
|
||||||
cs.bind_compute_bytes(¶ms, sizeof(params), 0);
|
cs.bind_compute_bytes(¶ms, sizeof(params), 0);
|
||||||
cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1);
|
cs.bind_compute_buffer(staging_buffer, 0, 1);
|
||||||
cs.bind_compute_texture(texture_handle, 0);
|
cs.bind_compute_texture(texture_handle, 0);
|
||||||
[compute_encoder dispatchThreads:MTLSizeMake(extent[0],
|
[compute_encoder dispatchThreads:MTLSizeMake(extent[0],
|
||||||
extent[1],
|
extent[1],
|
||||||
@ -848,7 +843,7 @@ void gpu::MTLTexture::update_sub(
|
|||||||
ctx->pipeline_state.unpack_row_length);
|
ctx->pipeline_state.unpack_row_length);
|
||||||
int bytes_per_image = bytes_per_row * extent[1];
|
int bytes_per_image = bytes_per_row * extent[1];
|
||||||
[blit_encoder copyFromBuffer:staging_buffer
|
[blit_encoder copyFromBuffer:staging_buffer
|
||||||
sourceOffset:staging_buffer_offset
|
sourceOffset:0
|
||||||
sourceBytesPerRow:bytes_per_row
|
sourceBytesPerRow:bytes_per_row
|
||||||
sourceBytesPerImage:bytes_per_image
|
sourceBytesPerImage:bytes_per_image
|
||||||
sourceSize:MTLSizeMake(extent[0], extent[1], extent[2])
|
sourceSize:MTLSizeMake(extent[0], extent[1], extent[2])
|
||||||
@ -871,7 +866,7 @@ void gpu::MTLTexture::update_sub(
|
|||||||
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
|
MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
|
||||||
cs.bind_pso(pso);
|
cs.bind_pso(pso);
|
||||||
cs.bind_compute_bytes(¶ms, sizeof(params), 0);
|
cs.bind_compute_bytes(¶ms, sizeof(params), 0);
|
||||||
cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1);
|
cs.bind_compute_buffer(staging_buffer, 0, 1);
|
||||||
cs.bind_compute_texture(texture_handle, 0);
|
cs.bind_compute_texture(texture_handle, 0);
|
||||||
[compute_encoder
|
[compute_encoder
|
||||||
dispatchThreads:MTLSizeMake(
|
dispatchThreads:MTLSizeMake(
|
||||||
@ -896,7 +891,7 @@ void gpu::MTLTexture::update_sub(
|
|||||||
int face_index = offset[2] + i;
|
int face_index = offset[2] + i;
|
||||||
|
|
||||||
[blit_encoder copyFromBuffer:staging_buffer
|
[blit_encoder copyFromBuffer:staging_buffer
|
||||||
sourceOffset:staging_buffer_offset + texture_array_relative_offset
|
sourceOffset:texture_array_relative_offset
|
||||||
sourceBytesPerRow:bytes_per_row
|
sourceBytesPerRow:bytes_per_row
|
||||||
sourceBytesPerImage:bytes_per_image
|
sourceBytesPerImage:bytes_per_image
|
||||||
sourceSize:MTLSizeMake(extent[0], extent[1], 1)
|
sourceSize:MTLSizeMake(extent[0], extent[1], 1)
|
||||||
@ -930,7 +925,7 @@ void gpu::MTLTexture::update_sub(
|
|||||||
for (int i = 0; i < extent[2]; i++) {
|
for (int i = 0; i < extent[2]; i++) {
|
||||||
int face_index = offset[2] + i;
|
int face_index = offset[2] + i;
|
||||||
[blit_encoder copyFromBuffer:staging_buffer
|
[blit_encoder copyFromBuffer:staging_buffer
|
||||||
sourceOffset:staging_buffer_offset + texture_array_relative_offset
|
sourceOffset:texture_array_relative_offset
|
||||||
sourceBytesPerRow:bytes_per_row
|
sourceBytesPerRow:bytes_per_row
|
||||||
sourceBytesPerImage:bytes_per_image
|
sourceBytesPerImage:bytes_per_image
|
||||||
sourceSize:MTLSizeMake(extent[0], extent[1], 1)
|
sourceSize:MTLSizeMake(extent[0], extent[1], 1)
|
||||||
@ -1058,6 +1053,11 @@ void gpu::MTLTexture::update_sub(
|
|||||||
|
|
||||||
/* Decrement texture reference counts. This ensures temporary texture views are released. */
|
/* Decrement texture reference counts. This ensures temporary texture views are released. */
|
||||||
[texture_handle release];
|
[texture_handle release];
|
||||||
|
|
||||||
|
/* Release temporary staging buffer allocation.
|
||||||
|
* NOTE: Allocation will be tracked with command submission and released once no longer in use.
|
||||||
|
*/
|
||||||
|
temp_allocation->free();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -402,9 +402,13 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl(
|
|||||||
options:options
|
options:options
|
||||||
error:&error] autorelease];
|
error:&error] autorelease];
|
||||||
if (error) {
|
if (error) {
|
||||||
NSLog(@"Compile Error - Metal Shader Library error %@ ", error);
|
/* Only exit out if genuine error and not warning. */
|
||||||
BLI_assert(false);
|
if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
|
||||||
return nullptr;
|
NSNotFound) {
|
||||||
|
NSLog(@"Compile Error - Metal Shader Library error %@ ", error);
|
||||||
|
BLI_assert(false);
|
||||||
|
return nil;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Fetch compute function. */
|
/* Fetch compute function. */
|
||||||
@ -718,9 +722,13 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
|
|||||||
options:options
|
options:options
|
||||||
error:&error] autorelease];
|
error:&error] autorelease];
|
||||||
if (error) {
|
if (error) {
|
||||||
NSLog(@"Compile Error - Metal Shader Library error %@ ", error);
|
/* Only exit out if genuine error and not warning. */
|
||||||
BLI_assert(false);
|
if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
|
||||||
return nil;
|
NSNotFound) {
|
||||||
|
NSLog(@"Compile Error - Metal Shader Library error %@ ", error);
|
||||||
|
BLI_assert(false);
|
||||||
|
return nil;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Fetch compute function. */
|
/* Fetch compute function. */
|
||||||
|
@ -7,6 +7,10 @@
|
|||||||
* and texture2d types in metal).
|
* and texture2d types in metal).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* Suppress unhelpful shader compiler warnings. */
|
||||||
|
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||||
|
#pragma clang diagnostic ignored "-Wcomment"
|
||||||
|
|
||||||
/* Base instance with offsets. */
|
/* Base instance with offsets. */
|
||||||
#define gpu_BaseInstance gl_BaseInstanceARB
|
#define gpu_BaseInstance gl_BaseInstanceARB
|
||||||
#define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance)
|
#define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance)
|
||||||
|
Loading…
Reference in New Issue
Block a user