diff --git a/intern/gawain/CMakeLists.txt b/intern/gawain/CMakeLists.txt index 9924daa8cd1..424b364ae8e 100644 --- a/intern/gawain/CMakeLists.txt +++ b/intern/gawain/CMakeLists.txt @@ -16,6 +16,7 @@ set(SRC src/gwn_imm_util.c src/gwn_primitive.c src/gwn_shader_interface.c + src/gwn_vertex_array_id.cpp src/gwn_vertex_buffer.c src/gwn_vertex_format.c @@ -30,6 +31,7 @@ set(SRC gawain/gwn_primitive.h gawain/gwn_primitive_private.h gawain/gwn_shader_interface.h + gawain/gwn_vertex_array_id.h gawain/gwn_vertex_buffer.h gawain/gwn_vertex_format.h gawain/gwn_vertex_format_private.h diff --git a/intern/gawain/gawain/gwn_batch.h b/intern/gawain/gawain/gwn_batch.h index 94cd893f09e..c676cfef119 100644 --- a/intern/gawain/gawain/gwn_batch.h +++ b/intern/gawain/gawain/gwn_batch.h @@ -23,34 +23,61 @@ typedef enum { } Gwn_BatchPhase; #define GWN_BATCH_VBO_MAX_LEN 3 +#define GWN_BATCH_VAO_STATIC_LEN 3 +#define GWN_BATCH_VAO_DYN_ALLOC_COUNT 16 typedef struct Gwn_Batch { // geometry Gwn_VertBuf* verts[GWN_BATCH_VBO_MAX_LEN]; // verts[0] is required, others can be NULL + Gwn_VertBuf* inst; // instance attribs Gwn_IndexBuf* elem; // NULL if element list not needed - Gwn_PrimType prim_type; GLenum gl_prim_type; - // book-keeping - GLuint vao_id; // remembers all geometry state (vertex attrib bindings & element buffer) - Gwn_BatchPhase phase; - bool program_dirty; - bool program_in_use; - unsigned owns_flag; - - // state + // cached values (avoid dereferencing later) + GLuint vao_id; GLuint program; - const Gwn_ShaderInterface* interface; + const struct Gwn_ShaderInterface* interface; + + // book-keeping + unsigned owns_flag; + struct Gwn_Context *context; // used to free all vaos. this implies all vaos were created under the same context. + Gwn_BatchPhase phase; + bool program_in_use; + + // Vao management: remembers all geometry state (vertex attrib bindings & element buffer) + // for each shader interface. Start with a static number of vaos and fallback to dynamic count + // if necessary. Once a batch goes dynamic it does not go back. + bool is_dynamic_vao_count; + union { + // Static handle count + struct { + const struct Gwn_ShaderInterface* interfaces[GWN_BATCH_VAO_STATIC_LEN]; + GLuint vao_ids[GWN_BATCH_VAO_STATIC_LEN]; + } static_vaos; + // Dynamic handle count + struct { + unsigned count; + const struct Gwn_ShaderInterface** interfaces; + GLuint* vao_ids; + } dynamic_vaos; + }; + + // XXX This is the only solution if we want to have some data structure using + // batches as key to identify nodes. We must destroy these nodes with this callback. + void (*free_callback)(struct Gwn_Batch*, void*); + void* callback_data; } Gwn_Batch; enum { GWN_BATCH_OWNS_VBO = (1 << 0), /* each vbo index gets bit-shifted */ + GWN_BATCH_OWNS_INSTANCES = (1 << 30), GWN_BATCH_OWNS_INDEX = (1 << 31), }; Gwn_Batch* GWN_batch_create_ex(Gwn_PrimType, Gwn_VertBuf*, Gwn_IndexBuf*, unsigned owns_flag); void GWN_batch_init_ex(Gwn_Batch*, Gwn_PrimType, Gwn_VertBuf*, Gwn_IndexBuf*, unsigned owns_flag); +Gwn_Batch* GWN_batch_duplicate(Gwn_Batch* batch_src); #define GWN_batch_create(prim, verts, elem) \ GWN_batch_create_ex(prim, verts, elem, 0) @@ -59,11 +86,18 @@ void GWN_batch_init_ex(Gwn_Batch*, Gwn_PrimType, Gwn_VertBuf*, Gwn_IndexBuf*, un void GWN_batch_discard(Gwn_Batch*); // verts & elem are not discarded +void GWN_batch_callback_free_set(Gwn_Batch*, void (*callback)(Gwn_Batch*, void*), void*); + +void GWN_batch_instbuf_set(Gwn_Batch*, Gwn_VertBuf*, bool own_vbo); // Instancing + int GWN_batch_vertbuf_add_ex(Gwn_Batch*, Gwn_VertBuf*, bool own_vbo); #define GWN_batch_vertbuf_add(batch, verts) \ GWN_batch_vertbuf_add_ex(batch, verts, false) +// This is a private function +void GWN_batch_remove_interface_ref(Gwn_Batch*, const Gwn_ShaderInterface*); + void GWN_batch_program_set(Gwn_Batch*, GLuint program, const Gwn_ShaderInterface*); void GWN_batch_program_unset(Gwn_Batch*); // Entire batch draws with one shader program, but can be redrawn later with another program. @@ -84,11 +118,14 @@ void GWN_batch_uniform_4fv(Gwn_Batch*, const char* name, const float data[4]); void GWN_batch_draw(Gwn_Batch*); +// This does not bind/unbind shader and does not call gpuBindMatrices() +void GWN_batch_draw_range_ex(Gwn_Batch*, int v_first, int v_count, bool force_instance); -void GWN_batch_draw_stupid(Gwn_Batch*, int v_first, int v_count); -void GWN_batch_draw_stupid_instanced(Gwn_Batch*, Gwn_Batch*, int instance_first, int instance_count); -void GWN_batch_draw_procedural(Gwn_Batch*, Gwn_PrimType, int v_count); +#define GWN_batch_draw_range(batch, first, count) \ + GWN_batch_draw_range_ex(batch, first, count, false) +// Does not even need batch +void GWN_draw_primitive(Gwn_PrimType, int v_count); #if 0 // future plans diff --git a/intern/gawain/gawain/gwn_buffer_id.h b/intern/gawain/gawain/gwn_buffer_id.h index db5df99f526..6f51ca6905d 100644 --- a/intern/gawain/gawain/gwn_buffer_id.h +++ b/intern/gawain/gawain/gwn_buffer_id.h @@ -25,10 +25,6 @@ extern "C" { GLuint GWN_buf_id_alloc(void); void GWN_buf_id_free(GLuint buffer_id); -GLuint GWN_vao_alloc(void); -void GWN_vao_free(GLuint vao_id); - - #ifdef __cplusplus } #endif diff --git a/intern/gawain/gawain/gwn_shader_interface.h b/intern/gawain/gawain/gwn_shader_interface.h index 345ad8d389b..3bca541d6e8 100644 --- a/intern/gawain/gawain/gwn_shader_interface.h +++ b/intern/gawain/gawain/gwn_shader_interface.h @@ -54,6 +54,7 @@ typedef struct Gwn_ShaderInput { } Gwn_ShaderInput; #define GWN_NUM_SHADERINTERFACE_BUCKETS 257 +#define GWN_SHADERINTERFACE_REF_ALLOC_COUNT 16 typedef struct Gwn_ShaderInterface { GLint program; @@ -63,6 +64,8 @@ typedef struct Gwn_ShaderInterface { Gwn_ShaderInput* ubo_buckets[GWN_NUM_SHADERINTERFACE_BUCKETS]; Gwn_ShaderInput* builtin_uniforms[GWN_NUM_UNIFORMS]; char* name_buffer; + struct Gwn_Batch** batches; // references to batches using this interface + unsigned batches_ct; } Gwn_ShaderInterface; Gwn_ShaderInterface* GWN_shaderinterface_create(GLint program_id); @@ -72,3 +75,7 @@ const Gwn_ShaderInput* GWN_shaderinterface_uniform(const Gwn_ShaderInterface*, c const Gwn_ShaderInput* GWN_shaderinterface_uniform_builtin(const Gwn_ShaderInterface*, Gwn_UniformBuiltin); const Gwn_ShaderInput* GWN_shaderinterface_ubo(const Gwn_ShaderInterface*, const char* name); const Gwn_ShaderInput* GWN_shaderinterface_attr(const Gwn_ShaderInterface*, const char* name); + +// keep track of batches using this interface +void GWN_shaderinterface_add_batch_ref(Gwn_ShaderInterface*, struct Gwn_Batch*); +void GWN_shaderinterface_remove_batch_ref(Gwn_ShaderInterface*, struct Gwn_Batch*); diff --git a/intern/gawain/gawain/gwn_vertex_array_id.h b/intern/gawain/gawain/gwn_vertex_array_id.h index 6d2a059b9bd..1c093d428ce 100644 --- a/intern/gawain/gawain/gwn_vertex_array_id.h +++ b/intern/gawain/gawain/gwn_vertex_array_id.h @@ -26,8 +26,8 @@ extern "C" { #include "gwn_context.h" GLuint GWN_vao_default(void); -GLuint GWN_vao_alloc_new(void); -void GWN_vao_free_new(GLuint vao_id, Gwn_Context*); +GLuint GWN_vao_alloc(void); +void GWN_vao_free(GLuint vao_id, Gwn_Context*); #ifdef __cplusplus } diff --git a/intern/gawain/src/gwn_batch.c b/intern/gawain/src/gwn_batch.c index ec3f98e348c..098c547c662 100644 --- a/intern/gawain/src/gwn_batch.c +++ b/intern/gawain/src/gwn_batch.c @@ -11,12 +11,48 @@ #include "gwn_batch.h" #include "gwn_buffer_id.h" +#include "gwn_vertex_array_id.h" #include "gwn_primitive_private.h" #include +#include // necessary functions from matrix API extern void gpuBindMatrices(const Gwn_ShaderInterface* shaderface); -extern bool gpuMatricesDirty(void); // how best to use this here? + +static void batch_update_program_bindings(Gwn_Batch* batch, unsigned int v_first); + +static void Batch_vao_cache_clear(Gwn_Batch* batch) + { + if (batch->is_dynamic_vao_count) + { + for (int i = 0; i < batch->dynamic_vaos.count; ++i) + { + if (batch->dynamic_vaos.vao_ids[i]) + GWN_vao_free(batch->dynamic_vaos.vao_ids[i], batch->context); + if (batch->dynamic_vaos.interfaces[i]) + GWN_shaderinterface_remove_batch_ref((Gwn_ShaderInterface *)batch->dynamic_vaos.interfaces[i], batch); + } + free(batch->dynamic_vaos.interfaces); + free(batch->dynamic_vaos.vao_ids); + } + else + { + for (int i = 0; i < GWN_BATCH_VAO_STATIC_LEN; ++i) + { + if (batch->static_vaos.vao_ids[i]) + GWN_vao_free(batch->static_vaos.vao_ids[i], batch->context); + if (batch->static_vaos.interfaces[i]) + GWN_shaderinterface_remove_batch_ref((Gwn_ShaderInterface *)batch->static_vaos.interfaces[i], batch); + } + } + + batch->is_dynamic_vao_count = false; + for (int i = 0; i < GWN_BATCH_VAO_STATIC_LEN; ++i) + { + batch->static_vaos.vao_ids[i] = 0; + batch->static_vaos.interfaces[i] = NULL; + } + } Gwn_Batch* GWN_batch_create_ex( Gwn_PrimType prim_type, Gwn_VertBuf* verts, Gwn_IndexBuf* elem, @@ -40,11 +76,25 @@ void GWN_batch_init_ex( batch->verts[0] = verts; for (int v = 1; v < GWN_BATCH_VBO_MAX_LEN; ++v) batch->verts[v] = NULL; + batch->inst = NULL; batch->elem = elem; - batch->prim_type = prim_type; batch->gl_prim_type = convert_prim_type_to_gl(prim_type); batch->phase = GWN_BATCH_READY_TO_DRAW; + batch->is_dynamic_vao_count = false; batch->owns_flag = owns_flag; + batch->free_callback = NULL; + } + +// This will share the VBOs with the new batch +Gwn_Batch* GWN_batch_duplicate(Gwn_Batch* batch_src) + { + Gwn_Batch* batch = GWN_batch_create_ex(GWN_PRIM_POINTS, batch_src->verts[0], batch_src->elem, 0); + + batch->gl_prim_type = batch_src->gl_prim_type; + for (int v = 1; v < GWN_BATCH_VBO_MAX_LEN; ++v) + batch->verts[v] = batch_src->verts[v]; + + return batch; } void GWN_batch_discard(Gwn_Batch* batch) @@ -52,6 +102,9 @@ void GWN_batch_discard(Gwn_Batch* batch) if (batch->owns_flag & GWN_BATCH_OWNS_INDEX) GWN_indexbuf_discard(batch->elem); + if (batch->owns_flag & GWN_BATCH_OWNS_INSTANCES) + GWN_vertbuf_discard(batch->inst); + if ((batch->owns_flag & ~GWN_BATCH_OWNS_INDEX) != 0) { for (int v = 0; v < GWN_BATCH_VBO_MAX_LEN; ++v) @@ -63,12 +116,39 @@ void GWN_batch_discard(Gwn_Batch* batch) } } - if (batch->vao_id) - GWN_vao_free(batch->vao_id); + Batch_vao_cache_clear(batch); + + if (batch->free_callback) + batch->free_callback(batch, batch->callback_data); free(batch); } +void GWN_batch_callback_free_set(Gwn_Batch* batch, void (*callback)(Gwn_Batch*, void*), void* user_data) + { + batch->free_callback = callback; + batch->callback_data = user_data; + } + +void GWN_batch_instbuf_set(Gwn_Batch* batch, Gwn_VertBuf* inst, bool own_vbo) + { +#if TRUST_NO_ONE + assert(inst != NULL); +#endif + // redo the bindings + Batch_vao_cache_clear(batch); + + if (batch->inst != NULL && (batch->owns_flag & GWN_BATCH_OWNS_INSTANCES)) + GWN_vertbuf_discard(batch->inst); + + batch->inst = inst; + + if (own_vbo) + batch->owns_flag |= GWN_BATCH_OWNS_INSTANCES; + else + batch->owns_flag &= ~GWN_BATCH_OWNS_INSTANCES; + } + int GWN_batch_vertbuf_add_ex( Gwn_Batch* batch, Gwn_VertBuf* verts, bool own_vbo) @@ -100,12 +180,96 @@ int GWN_batch_vertbuf_add_ex( void GWN_batch_program_set(Gwn_Batch* batch, GLuint program, const Gwn_ShaderInterface* shaderface) { #if TRUST_NO_ONE - assert(glIsProgram(program)); + assert(glIsProgram(shaderface->program)); + assert(batch->program_in_use == 0); #endif + batch->vao_id = 0; batch->program = program; batch->interface = shaderface; - batch->program_dirty = true; + + + // Search through cache + if (batch->is_dynamic_vao_count) + { + for (int i = 0; i < batch->dynamic_vaos.count && batch->vao_id == 0; ++i) + if (batch->dynamic_vaos.interfaces[i] == shaderface) + batch->vao_id = batch->dynamic_vaos.vao_ids[i]; + } + else + { + for (int i = 0; i < GWN_BATCH_VAO_STATIC_LEN && batch->vao_id == 0; ++i) + if (batch->static_vaos.interfaces[i] == shaderface) + batch->vao_id = batch->static_vaos.vao_ids[i]; + } + + if (batch->vao_id == 0) + { + if (batch->context == NULL) + batch->context = GWN_context_active_get(); +#if TRUST_NO_ONE && 0 // disabled until we use a separate single context for UI. + else // Make sure you are not trying to draw this batch in another context. + assert(batch->context == GWN_context_active_get()); +#endif + // Cache miss, time to add a new entry! + if (!batch->is_dynamic_vao_count) + { + int i; // find first unused slot + for (i = 0; i < GWN_BATCH_VAO_STATIC_LEN; ++i) + if (batch->static_vaos.vao_ids[i] == 0) + break; + + if (i < GWN_BATCH_VAO_STATIC_LEN) + { + batch->static_vaos.interfaces[i] = shaderface; + batch->static_vaos.vao_ids[i] = batch->vao_id = GWN_vao_alloc(); + } + else + { + // Not enough place switch to dynamic. + batch->is_dynamic_vao_count = true; + // Erase previous entries, they will be added back if drawn again. + for (int j = 0; j < GWN_BATCH_VAO_STATIC_LEN; ++j) + { + GWN_shaderinterface_remove_batch_ref((Gwn_ShaderInterface*)batch->static_vaos.interfaces[j], batch); + GWN_vao_free(batch->static_vaos.vao_ids[j], batch->context); + } + // Init dynamic arrays and let the branch below set the values. + batch->dynamic_vaos.count = GWN_BATCH_VAO_DYN_ALLOC_COUNT; + batch->dynamic_vaos.interfaces = calloc(batch->dynamic_vaos.count, sizeof(Gwn_ShaderInterface*)); + batch->dynamic_vaos.vao_ids = calloc(batch->dynamic_vaos.count, sizeof(GLuint)); + } + } + + if (batch->is_dynamic_vao_count) + { + int i; // find first unused slot + for (i = 0; i < batch->dynamic_vaos.count; ++i) + if (batch->dynamic_vaos.vao_ids[i] == 0) + break; + + if (i == batch->dynamic_vaos.count) + { + // Not enough place, realloc the array. + i = batch->dynamic_vaos.count; + batch->dynamic_vaos.count += GWN_BATCH_VAO_DYN_ALLOC_COUNT; + batch->dynamic_vaos.interfaces = realloc(batch->dynamic_vaos.interfaces, sizeof(Gwn_ShaderInterface*) * batch->dynamic_vaos.count); + batch->dynamic_vaos.vao_ids = realloc(batch->dynamic_vaos.vao_ids, sizeof(GLuint) * batch->dynamic_vaos.count); + memset(batch->dynamic_vaos.interfaces + i, 0, sizeof(Gwn_ShaderInterface*) * GWN_BATCH_VAO_DYN_ALLOC_COUNT); + memset(batch->dynamic_vaos.vao_ids + i, 0, sizeof(GLuint) * GWN_BATCH_VAO_DYN_ALLOC_COUNT); + } + + batch->dynamic_vaos.interfaces[i] = shaderface; + batch->dynamic_vaos.vao_ids[i] = batch->vao_id = GWN_vao_alloc(); + } + + GWN_shaderinterface_add_batch_ref((Gwn_ShaderInterface*)shaderface, batch); + + // We just got a fresh VAO we need to initialize it. + glBindVertexArray(batch->vao_id); + batch_update_program_bindings(batch, 0); + glBindVertexArray(0); + } GWN_batch_program_use_begin(batch); // hack! to make Batch_Uniform* simpler } @@ -118,94 +282,104 @@ void GWN_batch_program_unset(Gwn_Batch* batch) batch->program_in_use = false; } -static void create_bindings(Gwn_Batch* batch, const Gwn_ShaderInterface* interface, unsigned int v_first, const bool use_instancing) +void GWN_batch_remove_interface_ref(Gwn_Batch* batch, const Gwn_ShaderInterface* interface) { - for (int v = 0; v < GWN_BATCH_VBO_MAX_LEN; ++v) + if (batch->is_dynamic_vao_count) { - Gwn_VertBuf* verts = batch->verts[v]; - if (verts == NULL) - break; - - const Gwn_VertFormat* format = &verts->format; - - const unsigned attrib_ct = format->attrib_ct; - const unsigned stride = format->stride; - - GWN_vertbuf_use(verts); - - for (unsigned a_idx = 0; a_idx < attrib_ct; ++a_idx) + for (int i = 0; i < batch->dynamic_vaos.count; ++i) { - const Gwn_VertAttr* a = format->attribs + a_idx; - - const GLvoid* pointer = (const GLubyte*)0 + a->offset + v_first * stride; - - for (unsigned n_idx = 0; n_idx < a->name_ct; ++n_idx) + if (batch->dynamic_vaos.interfaces[i] == interface) { - const Gwn_ShaderInput* input = GWN_shaderinterface_attr(interface, a->name[n_idx]); + GWN_vao_free(batch->dynamic_vaos.vao_ids[i], batch->context); + batch->dynamic_vaos.vao_ids[i] = 0; + batch->dynamic_vaos.interfaces[i] = NULL; + break; // cannot have duplicates + } + } + } + else + { + int i; + for (i = 0; i < GWN_BATCH_VAO_STATIC_LEN; ++i) + { + if (batch->static_vaos.interfaces[i] == interface) + { + GWN_vao_free(batch->static_vaos.vao_ids[i], batch->context); + batch->static_vaos.vao_ids[i] = 0; + batch->static_vaos.interfaces[i] = NULL; + break; // cannot have duplicates + } + } + } + } - if (input == NULL) continue; +static void create_bindings(Gwn_VertBuf* verts, const Gwn_ShaderInterface* interface, unsigned int v_first, const bool use_instancing) + { + const Gwn_VertFormat* format = &verts->format; - if (a->comp_ct == 16 || a->comp_ct == 12 || a->comp_ct == 8) - { + const unsigned attrib_ct = format->attrib_ct; + const unsigned stride = format->stride; + + GWN_vertbuf_use(verts); + + for (unsigned a_idx = 0; a_idx < attrib_ct; ++a_idx) + { + const Gwn_VertAttr* a = format->attribs + a_idx; + + const GLvoid* pointer = (const GLubyte*)0 + a->offset + v_first * stride; + + for (unsigned n_idx = 0; n_idx < a->name_ct; ++n_idx) + { + const Gwn_ShaderInput* input = GWN_shaderinterface_attr(interface, a->name[n_idx]); + + if (input == NULL) continue; + + if (a->comp_ct == 16 || a->comp_ct == 12 || a->comp_ct == 8) + { #if TRUST_NO_ONE - assert(a->fetch_mode == GWN_FETCH_FLOAT); - assert(a->gl_comp_type == GL_FLOAT); + assert(a->fetch_mode == GWN_FETCH_FLOAT); + assert(a->gl_comp_type == GL_FLOAT); #endif - for (int i = 0; i < a->comp_ct / 4; ++i) - { - glEnableVertexAttribArray(input->location + i); - glVertexAttribDivisor(input->location + i, (use_instancing) ? 1 : 0); - glVertexAttribPointer(input->location + i, 4, a->gl_comp_type, GL_FALSE, stride, - (const GLubyte*)pointer + i * 16); - } - } - else + for (int i = 0; i < a->comp_ct / 4; ++i) { - glEnableVertexAttribArray(input->location); - glVertexAttribDivisor(input->location, (use_instancing) ? 1 : 0); + glEnableVertexAttribArray(input->location + i); + glVertexAttribDivisor(input->location + i, (use_instancing) ? 1 : 0); + glVertexAttribPointer(input->location + i, 4, a->gl_comp_type, GL_FALSE, stride, + (const GLubyte*)pointer + i * 16); + } + } + else + { + glEnableVertexAttribArray(input->location); + glVertexAttribDivisor(input->location, (use_instancing) ? 1 : 0); - switch (a->fetch_mode) - { - case GWN_FETCH_FLOAT: - case GWN_FETCH_INT_TO_FLOAT: - glVertexAttribPointer(input->location, a->comp_ct, a->gl_comp_type, GL_FALSE, stride, pointer); - break; - case GWN_FETCH_INT_TO_FLOAT_UNIT: - glVertexAttribPointer(input->location, a->comp_ct, a->gl_comp_type, GL_TRUE, stride, pointer); - break; - case GWN_FETCH_INT: - glVertexAttribIPointer(input->location, a->comp_ct, a->gl_comp_type, stride, pointer); - } + switch (a->fetch_mode) + { + case GWN_FETCH_FLOAT: + case GWN_FETCH_INT_TO_FLOAT: + glVertexAttribPointer(input->location, a->comp_ct, a->gl_comp_type, GL_FALSE, stride, pointer); + break; + case GWN_FETCH_INT_TO_FLOAT_UNIT: + glVertexAttribPointer(input->location, a->comp_ct, a->gl_comp_type, GL_TRUE, stride, pointer); + break; + case GWN_FETCH_INT: + glVertexAttribIPointer(input->location, a->comp_ct, a->gl_comp_type, stride, pointer); } } } } } -static void Batch_update_program_bindings(Gwn_Batch* batch, unsigned int v_first) +static void batch_update_program_bindings(Gwn_Batch* batch, unsigned int v_first) { - // disable all as a precaution - // why are we not using prev_attrib_enabled_bits?? see immediate.c - for (unsigned a_idx = 0; a_idx < GWN_VERT_ATTR_MAX_LEN; ++a_idx) - glDisableVertexAttribArray(a_idx); + for (int v = 0; v < GWN_BATCH_VBO_MAX_LEN && batch->verts[v] != NULL; ++v) + create_bindings(batch->verts[v], batch->interface, (batch->inst) ? 0 : v_first, false); - create_bindings(batch, batch->interface, v_first, false); + if (batch->inst) + create_bindings(batch->inst, batch->interface, v_first, true); - batch->program_dirty = false; - } - -static void Batch_update_program_bindings_instancing(Gwn_Batch* batch, Gwn_Batch* batch_instancing, unsigned int instance_first) - { - // disable all as a precaution - // why are we not using prev_attrib_enabled_bits?? see immediate.c - for (unsigned a_idx = 0; a_idx < GWN_VERT_ATTR_MAX_LEN; ++a_idx) - glDisableVertexAttribArray(a_idx); - - create_bindings(batch, batch->interface, 0, false); - if (batch_instancing) - create_bindings(batch_instancing, batch->interface, instance_first, true); - - batch->program_dirty = false; + if (batch->elem) + GWN_indexbuf_use(batch->elem); } void GWN_batch_program_use_begin(Gwn_Batch* batch) @@ -290,142 +464,86 @@ void GWN_batch_uniform_4fv(Gwn_Batch* batch, const char* name, const float data[ glUniform4fv(uniform->location, 1, data); } -static void Batch_prime(Gwn_Batch* batch) - { - batch->vao_id = GWN_vao_alloc(); - glBindVertexArray(batch->vao_id); - - for (int v = 0; v < GWN_BATCH_VBO_MAX_LEN; ++v) - { - if (batch->verts[v] == NULL) - break; - GWN_vertbuf_use(batch->verts[v]); - } - - if (batch->elem) - GWN_indexbuf_use(batch->elem); - - // vertex attribs and element list remain bound to this VAO - } - void GWN_batch_draw(Gwn_Batch* batch) { #if TRUST_NO_ONE assert(batch->phase == GWN_BATCH_READY_TO_DRAW); - assert(glIsProgram(batch->program)); + assert(batch->verts[0]->vbo_id != 0); #endif - - if (batch->vao_id) - glBindVertexArray(batch->vao_id); - else - Batch_prime(batch); - - if (batch->program_dirty) - Batch_update_program_bindings(batch, 0); - GWN_batch_program_use_begin(batch); + gpuBindMatrices(batch->interface); // external call. - gpuBindMatrices(batch->interface); - - if (batch->elem) - { - const Gwn_IndexBuf* el = batch->elem; - -#if GWN_TRACK_INDEX_RANGE - if (el->base_index) - glDrawRangeElementsBaseVertex(batch->gl_prim_type, el->min_index, el->max_index, el->index_ct, el->gl_index_type, 0, el->base_index); - else - glDrawRangeElements(batch->gl_prim_type, el->min_index, el->max_index, el->index_ct, el->gl_index_type, 0); -#else - glDrawElements(batch->gl_prim_type, el->index_ct, GL_UNSIGNED_INT, 0); -#endif - } - else - glDrawArrays(batch->gl_prim_type, 0, batch->verts[0]->vertex_ct); + GWN_batch_draw_range_ex(batch, 0, 0, false); GWN_batch_program_use_end(batch); - glBindVertexArray(0); } -void GWN_batch_draw_stupid(Gwn_Batch* batch, int v_first, int v_count) - { - if (batch->vao_id) - glBindVertexArray(batch->vao_id); - else - Batch_prime(batch); - - if (batch->program_dirty) - Batch_update_program_bindings(batch, v_first); - - // GWN_batch_program_use_begin(batch); - - //gpuBindMatrices(batch->program); - - // Infer lenght if vertex count is not given - if (v_count == 0) - v_count = (batch->elem) ? batch->elem->index_ct : batch->verts[0]->vertex_ct; - - if (batch->elem) - { - const Gwn_IndexBuf* el = batch->elem; - -#if GWN_TRACK_INDEX_RANGE - if (el->base_index) - glDrawRangeElementsBaseVertex(batch->gl_prim_type, el->min_index, el->max_index, v_count, el->gl_index_type, 0, el->base_index); - else - glDrawRangeElements(batch->gl_prim_type, el->min_index, el->max_index, v_count, el->gl_index_type, 0); -#else - glDrawElements(batch->gl_prim_type, v_count, GL_UNSIGNED_INT, 0); -#endif - } - else - glDrawArrays(batch->gl_prim_type, 0, v_count); - - // GWN_batch_program_use_end(batch); - glBindVertexArray(0); - } - -void GWN_batch_draw_stupid_instanced(Gwn_Batch* batch_instanced, Gwn_Batch* batch_instancing, int instance_first, int instance_count) +void GWN_batch_draw_range_ex(Gwn_Batch* batch, int v_first, int v_count, bool force_instance) { #if TRUST_NO_ONE - // batch_instancing can be null if the number of instances is specified. - assert(batch_instancing != NULL || instance_count != 0); + assert(!(force_instance && (batch->inst == NULL)) || v_count > 0); // we cannot infer length if force_instance #endif - if (batch_instanced->vao_id) - glBindVertexArray(batch_instanced->vao_id); - else - Batch_prime(batch_instanced); - if (batch_instanced->program_dirty) - Batch_update_program_bindings_instancing(batch_instanced, batch_instancing, instance_first); - - if (instance_count == 0) - instance_count = batch_instancing->verts[0]->vertex_ct; - - if (batch_instanced->elem) + // If using offset drawing, use the default VAO and redo bindings. + if (v_first != 0) { - const Gwn_IndexBuf* el = batch_instanced->elem; - -#if GWN_TRACK_INDEX_RANGE - glDrawElementsInstancedBaseVertex(batch_instanced->gl_prim_type, el->index_ct, el->gl_index_type, 0, instance_count, el->base_index); -#else - glDrawElementsInstanced(batch_instanced->gl_prim_type, el->index_ct, GL_UNSIGNED_INT, 0, instance_count); -#endif + glBindVertexArray(GWN_vao_default()); + batch_update_program_bindings(batch, v_first); } else - glDrawArraysInstanced(batch_instanced->gl_prim_type, 0, batch_instanced->verts[0]->vertex_ct, instance_count); + glBindVertexArray(batch->vao_id); + + if (force_instance || batch->inst) + { + // Infer length if vertex count is not given + if (v_count == 0) + v_count = batch->inst->vertex_ct; + + if (batch->elem) + { + const Gwn_IndexBuf* el = batch->elem; + +#if GWN_TRACK_INDEX_RANGE + glDrawElementsInstancedBaseVertex(batch->gl_prim_type, el->index_ct, el->gl_index_type, 0, v_count, el->base_index); +#else + glDrawElementsInstanced(batch->gl_prim_type, el->index_ct, GL_UNSIGNED_INT, 0, v_count); +#endif + } + else + glDrawArraysInstanced(batch->gl_prim_type, 0, batch->verts[0]->vertex_ct, v_count); + } + else + { + // Infer length if vertex count is not given + if (v_count == 0) + v_count = (batch->elem) ? batch->elem->index_ct : batch->verts[0]->vertex_ct; + + if (batch->elem) + { + const Gwn_IndexBuf* el = batch->elem; + +#if GWN_TRACK_INDEX_RANGE + if (el->base_index) + glDrawRangeElementsBaseVertex(batch->gl_prim_type, el->min_index, el->max_index, v_count, el->gl_index_type, 0, el->base_index); + else + glDrawRangeElements(batch->gl_prim_type, el->min_index, el->max_index, v_count, el->gl_index_type, 0); +#else + glDrawElements(batch->gl_prim_type, v_count, GL_UNSIGNED_INT, 0); +#endif + } + else + glDrawArrays(batch->gl_prim_type, 0, v_count); + } + glBindVertexArray(0); } // just draw some vertices and let shader place them where we want. -void GWN_batch_draw_procedural(Gwn_Batch* batch, Gwn_PrimType prim_type, int v_count) +void GWN_draw_primitive(Gwn_PrimType prim_type, int v_count) { // we cannot draw without vao ... annoying ... - if (batch->vao_id) - glBindVertexArray(batch->vao_id); - else - Batch_prime(batch); + glBindVertexArray(GWN_vao_default()); GLenum type = convert_prim_type_to_gl(prim_type); glDrawArrays(type, 0, v_count); diff --git a/intern/gawain/src/gwn_buffer_id.cpp b/intern/gawain/src/gwn_buffer_id.cpp index a93c3950d29..64bad855ca7 100644 --- a/intern/gawain/src/gwn_buffer_id.cpp +++ b/intern/gawain/src/gwn_buffer_id.cpp @@ -20,7 +20,6 @@ #endif static std::vector orphaned_buffer_ids; -static std::vector orphaned_vao_ids; static std::mutex orphan_mutex; @@ -36,10 +35,6 @@ static bool thread_is_main() GLuint GWN_buf_id_alloc() { -#if TRUST_NO_ONE - assert(thread_is_main()); -#endif - // delete orphaned IDs orphan_mutex.lock(); if (!orphaned_buffer_ids.empty()) @@ -73,43 +68,3 @@ void GWN_buf_id_free(GLuint buffer_id) orphan_mutex.unlock(); } } - -GLuint GWN_vao_alloc() - { -#if TRUST_NO_ONE - assert(thread_is_main()); -#endif - - // delete orphaned IDs - orphan_mutex.lock(); - if (!orphaned_vao_ids.empty()) - { - const auto orphaned_vao_ct = (unsigned)orphaned_vao_ids.size(); -#if ORPHAN_DEBUG - printf("deleting %u orphaned VAO%s\n", orphaned_vao_ct, orphaned_vao_ct == 1 ? "" : "s"); -#endif - glDeleteVertexArrays(orphaned_vao_ct, orphaned_vao_ids.data()); - orphaned_vao_ids.clear(); - } - orphan_mutex.unlock(); - - GLuint new_vao_id = 0; - glGenVertexArrays(1, &new_vao_id); - return new_vao_id; - } - -void GWN_vao_free(GLuint vao_id) - { - if (thread_is_main()) - glDeleteVertexArrays(1, &vao_id); - else - { - // add this ID to the orphaned list - orphan_mutex.lock(); -#if ORPHAN_DEBUG - printf("orphaning VAO %u\n", vao_id); -#endif - orphaned_vao_ids.emplace_back(vao_id); - orphan_mutex.unlock(); - } - } diff --git a/intern/gawain/src/gwn_immediate.c b/intern/gawain/src/gwn_immediate.c index 1c0776d1bbf..f063665b423 100644 --- a/intern/gawain/src/gwn_immediate.c +++ b/intern/gawain/src/gwn_immediate.c @@ -14,6 +14,7 @@ #include "gwn_attr_binding.h" #include "gwn_attr_binding_private.h" #include "gwn_vertex_format_private.h" +#include "gwn_vertex_array_id.h" #include "gwn_primitive_private.h" #include @@ -27,6 +28,7 @@ typedef struct { #if IMM_BATCH_COMBO Gwn_Batch* batch; #endif + Gwn_Context* context; // current draw call GLubyte* buffer_data; @@ -86,8 +88,8 @@ void immActivate(void) assert(imm.prim_type == GWN_PRIM_NONE); // make sure we're not between a Begin/End pair assert(imm.vao_id == 0); #endif - imm.vao_id = GWN_vao_alloc(); + imm.context = GWN_context_active_get(); } void immDeactivate(void) @@ -97,8 +99,7 @@ void immDeactivate(void) assert(imm.prim_type == GWN_PRIM_NONE); // make sure we're not between a Begin/End pair assert(imm.vao_id != 0); #endif - - GWN_vao_free(imm.vao_id); + GWN_vao_free(imm.vao_id, imm.context); imm.vao_id = 0; imm.prev_enabled_attrib_bits = 0; } diff --git a/intern/gawain/src/gwn_shader_interface.c b/intern/gawain/src/gwn_shader_interface.c index 33821ae36e2..ef3e8f0f3fa 100644 --- a/intern/gawain/src/gwn_shader_interface.c +++ b/intern/gawain/src/gwn_shader_interface.c @@ -10,6 +10,7 @@ // the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. #include "gwn_shader_interface.h" +#include "gwn_vertex_array_id.h" #include #include #include @@ -263,6 +264,10 @@ Gwn_ShaderInterface* GWN_shaderinterface_create(GLint program) #endif } + // Batches ref buffer + shaderface->batches_ct = GWN_SHADERINTERFACE_REF_ALLOC_COUNT; + shaderface->batches = calloc(shaderface->batches_ct, sizeof(Gwn_Batch*)); + return shaderface; } @@ -274,6 +279,12 @@ void GWN_shaderinterface_discard(Gwn_ShaderInterface* shaderface) buckets_free(shaderface->ubo_buckets); // Free memory used by name_buffer. free(shaderface->name_buffer); + // Remove this interface from all linked Batches vao cache. + for (int i = 0; i < shaderface->batches_ct; ++i) + if (shaderface->batches[i] != NULL) + GWN_batch_remove_interface_ref(shaderface->batches[i], shaderface); + + free(shaderface->batches); // Free memory used by shader interface by its self. free(shaderface); } @@ -316,3 +327,34 @@ const Gwn_ShaderInput* GWN_shaderinterface_attr(const Gwn_ShaderInterface* shade { return buckets_lookup(shaderface->attrib_buckets, shaderface->name_buffer, name); } + +void GWN_shaderinterface_add_batch_ref(Gwn_ShaderInterface* shaderface, Gwn_Batch* batch) + { + int i; // find first unused slot + for (i = 0; i < shaderface->batches_ct; ++i) + if (shaderface->batches[i] == NULL) + break; + + if (i == shaderface->batches_ct) + { + // Not enough place, realloc the array. + i = shaderface->batches_ct; + shaderface->batches_ct += GWN_SHADERINTERFACE_REF_ALLOC_COUNT; + shaderface->batches = realloc(shaderface->batches, sizeof(Gwn_Batch*) * shaderface->batches_ct); + memset(shaderface->batches + i, 0, sizeof(Gwn_Batch*) * GWN_SHADERINTERFACE_REF_ALLOC_COUNT); + } + + shaderface->batches[i] = batch; + } + +void GWN_shaderinterface_remove_batch_ref(Gwn_ShaderInterface* shaderface, Gwn_Batch* batch) + { + for (int i = 0; i < shaderface->batches_ct; ++i) + { + if (shaderface->batches[i] == batch) + { + shaderface->batches[i] = NULL; + break; // cannot have duplicates + } + } + } diff --git a/intern/gawain/src/gwn_vertex_array_id.cpp b/intern/gawain/src/gwn_vertex_array_id.cpp index 602c1c4919c..27010f03bc0 100644 --- a/intern/gawain/src/gwn_vertex_array_id.cpp +++ b/intern/gawain/src/gwn_vertex_array_id.cpp @@ -109,7 +109,7 @@ GLuint GWN_vao_default(void) return active_ctx->default_vao; } -GLuint GWN_vao_alloc_new(void) +GLuint GWN_vao_alloc(void) { #if TRUST_NO_ONE assert(active_ctx); // need at least an active context @@ -123,7 +123,7 @@ GLuint GWN_vao_alloc_new(void) } // this can be called from multiple thread -void GWN_vao_free_new(GLuint vao_id, Gwn_Context* ctx) +void GWN_vao_free(GLuint vao_id, Gwn_Context* ctx) { if (ctx == active_ctx) glDeleteVertexArrays(1, &vao_id); diff --git a/source/blender/draw/intern/DRW_render.h b/source/blender/draw/intern/DRW_render.h index f62b224b094..82ba2922dd0 100644 --- a/source/blender/draw/intern/DRW_render.h +++ b/source/blender/draw/intern/DRW_render.h @@ -341,7 +341,7 @@ typedef void (DRWCallGenerateFn)( void (*draw_fn)(DRWShadingGroup *shgroup, struct Gwn_Batch *geom), void *user_data); -void DRW_shgroup_instance_batch(DRWShadingGroup *shgroup, struct Gwn_Batch *instances); +void DRW_shgroup_instance_batch(DRWShadingGroup *shgroup, struct Gwn_Batch *batch); void DRW_shgroup_free(struct DRWShadingGroup *shgroup); void DRW_shgroup_call_add(DRWShadingGroup *shgroup, struct Gwn_Batch *geom, float (*obmat)[4]); diff --git a/source/blender/draw/intern/draw_instance_data.c b/source/blender/draw/intern/draw_instance_data.c index c2aae8e33ae..bfff1a2f546 100644 --- a/source/blender/draw/intern/draw_instance_data.c +++ b/source/blender/draw/intern/draw_instance_data.c @@ -42,12 +42,29 @@ #define BUFFER_CHUNK_SIZE 32 #define BUFFER_VERTS_CHUNK 32 -typedef struct DRWInstanceBuffer { +typedef struct DRWBatchingBuffer { struct DRWShadingGroup *shgroup; /* Link back to the owning shGroup. Also tells if it's used */ Gwn_VertFormat *format; /* Identifier. */ Gwn_VertBuf *vert; /* Gwn_VertBuf contained in the Gwn_Batch. */ Gwn_Batch *batch; /* Gwn_Batch containing the Gwn_VertBuf. */ -} DRWInstanceBuffer; +} DRWBatchingBuffer; + +typedef struct DRWInstancingBuffer { + struct DRWShadingGroup *shgroup; /* Link back to the owning shGroup. Also tells if it's used */ + Gwn_VertFormat *format; /* Identifier. */ + Gwn_Batch *instance; /* Identifier. */ + Gwn_VertBuf *vert; /* Gwn_VertBuf contained in the Gwn_Batch. */ + Gwn_Batch *batch; /* Gwn_Batch containing the Gwn_VertBuf. */ +} DRWInstancingBuffer; + +typedef struct DRWInstanceChunk { + size_t cursor; /* Offset to the next instance data. */ + size_t alloc_size; /* Number of DRWBatchingBuffer/Batches alloc'd in ibufs/btchs. */ + union { + DRWBatchingBuffer *bbufs; + DRWInstancingBuffer *ibufs; + }; +} DRWInstanceChunk; struct DRWInstanceData { struct DRWInstanceData *next; @@ -60,19 +77,19 @@ struct DRWInstanceData { }; struct DRWInstanceDataList { + struct DRWInstanceDataList *next, *prev; /* Linked lists for all possible data pool size */ /* Not entirely sure if we should separate them in the first place. * This is done to minimize the reattribution misses. */ DRWInstanceData *idata_head[MAX_INSTANCE_DATA_SIZE]; DRWInstanceData *idata_tail[MAX_INSTANCE_DATA_SIZE]; - struct { - size_t cursor; /* Offset to the next instance data. */ - size_t alloc_size; /* Number of DRWInstanceBuffer alloc'd in ibufs. */ - DRWInstanceBuffer *ibufs; - } ibuffers; + DRWInstanceChunk instancing; + DRWInstanceChunk batching; }; +static ListBase g_idatalists = {NULL, NULL}; + /* -------------------------------------------------------------------- */ /** \name Instance Buffer Management @@ -87,89 +104,174 @@ struct DRWInstanceDataList { * that would be too slow]). **/ -void DRW_instance_buffer_request( - DRWInstanceDataList *idatalist, Gwn_VertFormat *format, struct DRWShadingGroup *shgroup, - Gwn_Batch **r_batch, Gwn_VertBuf **r_vert, Gwn_PrimType type) +static void instance_batch_free(Gwn_Batch *batch, void *UNUSED(user_data)) { - BLI_assert(format); - - DRWInstanceBuffer *ibuf = idatalist->ibuffers.ibufs; - int first_non_alloced = -1; - - /* Search for an unused batch. */ - for (int i = 0; i < idatalist->ibuffers.alloc_size; i++, ibuf++) { - if (ibuf->shgroup == NULL) { - if (ibuf->format == format) { - ibuf->shgroup = shgroup; - *r_batch = ibuf->batch; - *r_vert = ibuf->vert; - return; - } - else if (ibuf->format == NULL && first_non_alloced == -1) { - first_non_alloced = i; + /* Free all batches that have the same key before they are reused. */ + /* TODO: Make it thread safe! Batch freeing can happen from another thread. */ + /* XXX we need to iterate over all idatalists unless we make some smart + * data structure to store the locations to update. */ + for (DRWInstanceDataList *idatalist = g_idatalists.first; idatalist; ++idatalist) { + DRWInstancingBuffer *ibuf = idatalist->instancing.ibufs; + for (int i = 0; i < idatalist->instancing.alloc_size; i++, ibuf++) { + if (ibuf->instance == batch) { + BLI_assert(ibuf->shgroup == NULL); /* Make sure it has no other users. */ + GWN_VERTBUF_DISCARD_SAFE(ibuf->vert); + GWN_BATCH_DISCARD_SAFE(ibuf->batch); + /* Tag as non alloced. */ + ibuf->format = NULL; } } } +} - if (first_non_alloced == -1) { - /* There is no batch left. Allocate more. */ - first_non_alloced = idatalist->ibuffers.alloc_size; - idatalist->ibuffers.alloc_size += BUFFER_CHUNK_SIZE; - idatalist->ibuffers.ibufs = MEM_reallocN(idatalist->ibuffers.ibufs, - idatalist->ibuffers.alloc_size * sizeof(DRWInstanceBuffer)); - /* Clear new part of the memory. */ - memset(idatalist->ibuffers.ibufs + first_non_alloced, 0, sizeof(DRWInstanceBuffer) * BUFFER_CHUNK_SIZE); +void DRW_batching_buffer_request( + DRWInstanceDataList *idatalist, Gwn_VertFormat *format, Gwn_PrimType type, struct DRWShadingGroup *shgroup, + Gwn_Batch **r_batch, Gwn_VertBuf **r_vert) +{ + DRWInstanceChunk *chunk = &idatalist->batching; + DRWBatchingBuffer *bbuf = idatalist->batching.bbufs; + BLI_assert(format); + /* Search for an unused batch. */ + for (int i = 0; i < idatalist->batching.alloc_size; i++, bbuf++) { + if (bbuf->shgroup == NULL) { + if (bbuf->format == format) { + bbuf->shgroup = shgroup; + *r_batch = bbuf->batch; + *r_vert = bbuf->vert; + return; + } + } + } + int new_id = 0; /* Find insertion point. */ + for (; new_id < chunk->alloc_size; ++new_id) { + if (chunk->bbufs[new_id].format == NULL) + break; + } + /* If there is no batch left. Allocate more. */ + if (new_id == chunk->alloc_size) { + new_id = chunk->alloc_size; + chunk->alloc_size += BUFFER_CHUNK_SIZE; + chunk->bbufs = MEM_reallocN(chunk->bbufs, chunk->alloc_size * sizeof(DRWBatchingBuffer)); + memset(chunk->bbufs + new_id, 0, sizeof(DRWBatchingBuffer) * BUFFER_CHUNK_SIZE); } - /* Create the batch. */ - ibuf = idatalist->ibuffers.ibufs + first_non_alloced; + bbuf = chunk->bbufs + new_id; + bbuf->vert = *r_vert = GWN_vertbuf_create_dynamic_with_format(format); + bbuf->batch = *r_batch = GWN_batch_create_ex(type, bbuf->vert, NULL, 0); + bbuf->format = format; + bbuf->shgroup = shgroup; + GWN_vertbuf_data_alloc(*r_vert, BUFFER_VERTS_CHUNK); +} + +void DRW_instancing_buffer_request( + DRWInstanceDataList *idatalist, Gwn_VertFormat *format, Gwn_Batch *instance, struct DRWShadingGroup *shgroup, + Gwn_Batch **r_batch, Gwn_VertBuf **r_vert) +{ + DRWInstanceChunk *chunk = &idatalist->instancing; + DRWInstancingBuffer *ibuf = idatalist->instancing.ibufs; + BLI_assert(format); + /* Search for an unused batch. */ + for (int i = 0; i < idatalist->instancing.alloc_size; i++, ibuf++) { + if (ibuf->shgroup == NULL) { + if (ibuf->format == format) { + if (ibuf->instance == instance) { + ibuf->shgroup = shgroup; + *r_batch = ibuf->batch; + *r_vert = ibuf->vert; + return; + } + } + } + } + int new_id = 0; /* Find insertion point. */ + for (; new_id < chunk->alloc_size; ++new_id) { + if (chunk->ibufs[new_id].format == NULL) + break; + } + /* If there is no batch left. Allocate more. */ + if (new_id == chunk->alloc_size) { + new_id = chunk->alloc_size; + chunk->alloc_size += BUFFER_CHUNK_SIZE; + chunk->ibufs = MEM_reallocN(chunk->ibufs, chunk->alloc_size * sizeof(DRWInstancingBuffer)); + memset(chunk->ibufs + new_id, 0, sizeof(DRWInstancingBuffer) * BUFFER_CHUNK_SIZE); + } + /* Create the batch. */ + ibuf = chunk->ibufs + new_id; ibuf->vert = *r_vert = GWN_vertbuf_create_dynamic_with_format(format); - ibuf->batch = *r_batch = GWN_batch_create_ex(type, ibuf->vert, NULL, GWN_BATCH_OWNS_VBO); + ibuf->batch = *r_batch = GWN_batch_duplicate(instance); ibuf->format = format; ibuf->shgroup = shgroup; - + ibuf->instance = instance; GWN_vertbuf_data_alloc(*r_vert, BUFFER_VERTS_CHUNK); + GWN_batch_instbuf_set(ibuf->batch, ibuf->vert, false); + /* Make sure to free this ibuf if the instance batch gets free. */ + GWN_batch_callback_free_set(instance, &instance_batch_free, NULL); } void DRW_instance_buffer_finish(DRWInstanceDataList *idatalist) { - DRWInstanceBuffer *ibuf = idatalist->ibuffers.ibufs; - size_t minimum_alloc_size = 1; /* Avoid 0 size realloc. */ - + size_t realloc_size = 1; /* Avoid 0 size realloc. */ /* Resize down buffers in use and send data to GPU & free unused buffers. */ - for (int i = 0; i < idatalist->ibuffers.alloc_size; i++, ibuf++) { + DRWInstanceChunk *batching = &idatalist->batching; + DRWBatchingBuffer *bbuf = batching->bbufs; + for (int i = 0; i < batching->alloc_size; i++, bbuf++) { + if (bbuf->shgroup != NULL) { + realloc_size = i + 1; + unsigned int vert_ct = DRW_shgroup_get_instance_count(bbuf->shgroup); + vert_ct += (vert_ct == 0) ? 1 : 0; /* Do not realloc to 0 size buffer */ + if (vert_ct + BUFFER_VERTS_CHUNK <= bbuf->vert->vertex_ct) { + unsigned int size = vert_ct + BUFFER_VERTS_CHUNK - 1; + size = size - size % BUFFER_VERTS_CHUNK; + GWN_vertbuf_data_resize(bbuf->vert, size); + } + GWN_vertbuf_use(bbuf->vert); /* Send data. */ + bbuf->shgroup = NULL; /* Set as non used for the next round. */ + } + else { + GWN_VERTBUF_DISCARD_SAFE(bbuf->vert); + GWN_BATCH_DISCARD_SAFE(bbuf->batch); + bbuf->format = NULL; /* Tag as non alloced. */ + } + } + /* Rounding up to nearest chunk size. */ + realloc_size += BUFFER_CHUNK_SIZE - 1; + realloc_size -= realloc_size % BUFFER_CHUNK_SIZE; + /* Resize down if necessary. */ + if (realloc_size < batching->alloc_size) { + batching->alloc_size = realloc_size; + batching->ibufs = MEM_reallocN(batching->ibufs, realloc_size * sizeof(DRWBatchingBuffer)); + } + + realloc_size = 1; + /* Resize down buffers in use and send data to GPU & free unused buffers. */ + DRWInstanceChunk *instancing = &idatalist->instancing; + DRWInstancingBuffer *ibuf = instancing->ibufs; + for (int i = 0; i < instancing->alloc_size; i++, ibuf++) { if (ibuf->shgroup != NULL) { - minimum_alloc_size = i + 1; + realloc_size = i + 1; unsigned int vert_ct = DRW_shgroup_get_instance_count(ibuf->shgroup); - /* Do not realloc to 0 size buffer */ - vert_ct += (vert_ct == 0) ? 1 : 0; - /* Resize buffer to reclame space. */ + vert_ct += (vert_ct == 0) ? 1 : 0; /* Do not realloc to 0 size buffer */ if (vert_ct + BUFFER_VERTS_CHUNK <= ibuf->vert->vertex_ct) { unsigned int size = vert_ct + BUFFER_VERTS_CHUNK - 1; size = size - size % BUFFER_VERTS_CHUNK; GWN_vertbuf_data_resize(ibuf->vert, size); } - /* Send data. */ - GWN_vertbuf_use(ibuf->vert); - /* Set as non used for the next round. */ - ibuf->shgroup = NULL; + GWN_vertbuf_use(ibuf->vert); /* Send data. */ + ibuf->shgroup = NULL; /* Set as non used for the next round. */ } else { + GWN_VERTBUF_DISCARD_SAFE(ibuf->vert); GWN_BATCH_DISCARD_SAFE(ibuf->batch); - /* Tag as non alloced. */ - ibuf->format = NULL; + ibuf->format = NULL; /* Tag as non alloced. */ } } - - /* Resize down the handle buffer (ibuffers). */ /* Rounding up to nearest chunk size. */ - minimum_alloc_size += BUFFER_CHUNK_SIZE - 1; - minimum_alloc_size -= minimum_alloc_size % BUFFER_CHUNK_SIZE; + realloc_size += BUFFER_CHUNK_SIZE - 1; + realloc_size -= realloc_size % BUFFER_CHUNK_SIZE; /* Resize down if necessary. */ - if (minimum_alloc_size < idatalist->ibuffers.alloc_size) { - idatalist->ibuffers.alloc_size = minimum_alloc_size; - idatalist->ibuffers.ibufs = MEM_reallocN(idatalist->ibuffers.ibufs, - minimum_alloc_size * sizeof(DRWInstanceBuffer)); + if (realloc_size < instancing->alloc_size) { + instancing->alloc_size = realloc_size; + instancing->ibufs = MEM_reallocN(instancing->ibufs, realloc_size * sizeof(DRWInstancingBuffer)); } } @@ -183,7 +285,7 @@ void DRW_instance_buffer_finish(DRWInstanceDataList *idatalist) static DRWInstanceData *drw_instance_data_create( DRWInstanceDataList *idatalist, unsigned int attrib_size, unsigned int instance_group) { - DRWInstanceData *idata = MEM_mallocN(sizeof(DRWInstanceData), "DRWInstanceData"); + DRWInstanceData *idata = MEM_callocN(sizeof(DRWInstanceData), "DRWInstanceData"); idata->next = NULL; idata->used = true; idata->data_size = attrib_size; @@ -263,15 +365,18 @@ DRWInstanceData *DRW_instance_data_request( DRWInstanceDataList *DRW_instance_data_list_create(void) { DRWInstanceDataList *idatalist = MEM_callocN(sizeof(DRWInstanceDataList), "DRWInstanceDataList"); - idatalist->ibuffers.ibufs = MEM_callocN(sizeof(DRWInstanceBuffer) * BUFFER_CHUNK_SIZE, "DRWInstanceBuffers"); - idatalist->ibuffers.alloc_size = BUFFER_CHUNK_SIZE; + idatalist->batching.bbufs = MEM_callocN(sizeof(DRWBatchingBuffer) * BUFFER_CHUNK_SIZE, "DRWBatchingBuffers"); + idatalist->batching.alloc_size = BUFFER_CHUNK_SIZE; + idatalist->instancing.ibufs = MEM_callocN(sizeof(DRWInstancingBuffer) * BUFFER_CHUNK_SIZE, "DRWInstancingBuffers"); + idatalist->instancing.alloc_size = BUFFER_CHUNK_SIZE; + + BLI_addtail(&g_idatalists, idatalist); return idatalist; } void DRW_instance_data_list_free(DRWInstanceDataList *idatalist) { - DRWInstanceBuffer *ibuf = idatalist->ibuffers.ibufs; DRWInstanceData *idata, *next_idata; for (int i = 0; i < MAX_INSTANCE_DATA_SIZE; ++i) { @@ -284,10 +389,21 @@ void DRW_instance_data_list_free(DRWInstanceDataList *idatalist) idatalist->idata_tail[i] = NULL; } - for (int i = 0; i < idatalist->ibuffers.alloc_size; i++, ibuf++) { + DRWBatchingBuffer *bbuf = idatalist->batching.bbufs; + for (int i = 0; i < idatalist->batching.alloc_size; i++, bbuf++) { + GWN_VERTBUF_DISCARD_SAFE(bbuf->vert); + GWN_BATCH_DISCARD_SAFE(bbuf->batch); + } + MEM_freeN(idatalist->batching.bbufs); + + DRWInstancingBuffer *ibuf = idatalist->instancing.ibufs; + for (int i = 0; i < idatalist->instancing.alloc_size; i++, ibuf++) { + GWN_VERTBUF_DISCARD_SAFE(ibuf->vert); GWN_BATCH_DISCARD_SAFE(ibuf->batch); } - MEM_freeN(idatalist->ibuffers.ibufs); + MEM_freeN(idatalist->instancing.ibufs); + + BLI_remlink(&g_idatalists, idatalist); } void DRW_instance_data_list_reset(DRWInstanceDataList *idatalist) diff --git a/source/blender/draw/intern/draw_instance_data.h b/source/blender/draw/intern/draw_instance_data.h index a7a66c9baff..3b0f7839277 100644 --- a/source/blender/draw/intern/draw_instance_data.h +++ b/source/blender/draw/intern/draw_instance_data.h @@ -43,9 +43,12 @@ void *DRW_instance_data_get(DRWInstanceData *idata); DRWInstanceData *DRW_instance_data_request( DRWInstanceDataList *idatalist, unsigned int attrib_size, unsigned int instance_group); -void DRW_instance_buffer_request( - DRWInstanceDataList *idatalist, Gwn_VertFormat *format, struct DRWShadingGroup *shgroup, - Gwn_Batch **r_batch, Gwn_VertBuf **r_vert, Gwn_PrimType type); +void DRW_batching_buffer_request( + DRWInstanceDataList *idatalist, Gwn_VertFormat *format, Gwn_PrimType type, struct DRWShadingGroup *shgroup, + Gwn_Batch **r_batch, Gwn_VertBuf **r_vert); +void DRW_instancing_buffer_request( + DRWInstanceDataList *idatalist, Gwn_VertFormat *format, Gwn_Batch *instance, struct DRWShadingGroup *shgroup, + Gwn_Batch **r_batch, Gwn_VertBuf **r_vert); /* Upload all instance data to the GPU as soon as possible. */ void DRW_instance_buffer_finish(DRWInstanceDataList *idatalist); diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c index a3a59efc799..5299fa04e4e 100644 --- a/source/blender/draw/intern/draw_manager.c +++ b/source/blender/draw/intern/draw_manager.c @@ -665,6 +665,24 @@ static void drw_interface_init(DRWInterface *interface, GPUShader *shader) } static void drw_interface_instance_init( + DRWShadingGroup *shgroup, GPUShader *shader, Gwn_Batch *batch, Gwn_VertFormat *format) +{ + DRWInterface *interface = &shgroup->interface; + drw_interface_init(interface, shader); + +#ifndef NDEBUG + interface->attribs_count = (format != NULL) ? format->attrib_ct : 0; +#endif + BLI_assert(shgroup->type == DRW_SHG_INSTANCE); + BLI_assert(shgroup->instance_geom != NULL); + + if (format != NULL) { + DRW_instancing_buffer_request(DST.idatalist, format, batch, shgroup, + &shgroup->instancing_geom, &interface->instance_vbo); + } +} + +static void drw_interface_batching_init( DRWShadingGroup *shgroup, GPUShader *shader, Gwn_VertFormat *format) { DRWInterface *interface = &shgroup->interface; @@ -673,36 +691,19 @@ static void drw_interface_instance_init( #ifndef NDEBUG interface->attribs_count = (format != NULL) ? format->attrib_ct : 0; #endif + BLI_assert(format != NULL); Gwn_PrimType type; - Gwn_Batch **r_batch = NULL; switch (shgroup->type) { - case DRW_SHG_INSTANCE: - r_batch = &shgroup->instancing_geom; - type = GWN_PRIM_POINTS; - break; - case DRW_SHG_POINT_BATCH: - r_batch = &shgroup->batch_geom; - type = GWN_PRIM_POINTS; - break; - case DRW_SHG_LINE_BATCH: - r_batch = &shgroup->batch_geom; - type = GWN_PRIM_LINES; - break; - case DRW_SHG_TRIANGLE_BATCH: - r_batch = &shgroup->batch_geom; - type = GWN_PRIM_TRIS; - break; + case DRW_SHG_POINT_BATCH: type = GWN_PRIM_POINTS; break; + case DRW_SHG_LINE_BATCH: type = GWN_PRIM_LINES; break; + case DRW_SHG_TRIANGLE_BATCH: type = GWN_PRIM_TRIS; break; default: BLI_assert(0); } - if (format != NULL) { - DRW_instance_buffer_request(DST.idatalist, format, shgroup, r_batch, &interface->instance_vbo, type); - } - else { - *r_batch = NULL; - } + DRW_batching_buffer_request(DST.idatalist, format, type, shgroup, + &shgroup->batch_geom, &interface->instance_vbo); } static void drw_interface_uniform(DRWShadingGroup *shgroup, const char *name, @@ -882,7 +883,7 @@ DRWShadingGroup *DRW_shgroup_material_instance_create( shgroup->type = DRW_SHG_INSTANCE; shgroup->instance_geom = geom; shgroup->instance_data = ob->data; - drw_interface_instance_init(shgroup, GPU_pass_shader(gpupass), format); + drw_interface_instance_init(shgroup, GPU_pass_shader(gpupass), geom, format); drw_shgroup_material_inputs(shgroup, material, gpupass); } @@ -890,7 +891,7 @@ DRWShadingGroup *DRW_shgroup_material_instance_create( } DRWShadingGroup *DRW_shgroup_material_empty_tri_batch_create( - struct GPUMaterial *material, DRWPass *pass, int size) + struct GPUMaterial *material, DRWPass *pass, int tri_count) { #ifdef USE_GPU_SELECT BLI_assert((G.f & G_PICKSEL) == 0); @@ -899,10 +900,10 @@ DRWShadingGroup *DRW_shgroup_material_empty_tri_batch_create( DRWShadingGroup *shgroup = drw_shgroup_material_create_ex(gpupass, pass); if (shgroup) { - shgroup->type = DRW_SHG_TRIANGLE_BATCH; - shgroup->interface.instance_count = size * 3; - /* Calling drw_interface_init will cause it to GWN_batch_draw_procedural. */ + /* Calling drw_interface_init will cause it to call GWN_draw_primitive(). */ drw_interface_init(&shgroup->interface, GPU_pass_shader(gpupass)); + shgroup->type = DRW_SHG_TRIANGLE_BATCH; + shgroup->interface.instance_count = tri_count * 3; drw_shgroup_material_inputs(shgroup, material, gpupass); } @@ -923,7 +924,7 @@ DRWShadingGroup *DRW_shgroup_instance_create( shgroup->type = DRW_SHG_INSTANCE; shgroup->instance_geom = geom; - drw_interface_instance_init(shgroup, shader, format); + drw_interface_instance_init(shgroup, shader, geom, format); return shgroup; } @@ -937,7 +938,7 @@ DRWShadingGroup *DRW_shgroup_point_batch_create(struct GPUShader *shader, DRWPas DRWShadingGroup *shgroup = drw_shgroup_create_ex(shader, pass); shgroup->type = DRW_SHG_POINT_BATCH; - drw_interface_instance_init(shgroup, shader, g_pos_format); + drw_interface_batching_init(shgroup, shader, g_pos_format); return shgroup; } @@ -949,7 +950,7 @@ DRWShadingGroup *DRW_shgroup_line_batch_create(struct GPUShader *shader, DRWPass DRWShadingGroup *shgroup = drw_shgroup_create_ex(shader, pass); shgroup->type = DRW_SHG_LINE_BATCH; - drw_interface_instance_init(shgroup, shader, g_pos_format); + drw_interface_batching_init(shgroup, shader, g_pos_format); return shgroup; } @@ -957,18 +958,18 @@ DRWShadingGroup *DRW_shgroup_line_batch_create(struct GPUShader *shader, DRWPass /* Very special batch. Use this if you position * your vertices with the vertex shader * and dont need any VBO attrib */ -DRWShadingGroup *DRW_shgroup_empty_tri_batch_create(struct GPUShader *shader, DRWPass *pass, int size) +DRWShadingGroup *DRW_shgroup_empty_tri_batch_create(struct GPUShader *shader, DRWPass *pass, int tri_count) { #ifdef USE_GPU_SELECT BLI_assert((G.f & G_PICKSEL) == 0); #endif DRWShadingGroup *shgroup = drw_shgroup_create_ex(shader, pass); - /* Calling drw_interface_init will cause it to GWN_batch_draw_procedural. */ + /* Calling drw_interface_init will cause it to call GWN_draw_primitive(). */ drw_interface_init(&shgroup->interface, shader); shgroup->type = DRW_SHG_TRIANGLE_BATCH; - shgroup->interface.instance_count = size * 3; + shgroup->interface.instance_count = tri_count * 3; return shgroup; } @@ -991,13 +992,19 @@ void DRW_shgroup_free(struct DRWShadingGroup *UNUSED(shgroup)) } ((void)0) /* Specify an external batch instead of adding each attrib one by one. */ -void DRW_shgroup_instance_batch(DRWShadingGroup *shgroup, struct Gwn_Batch *instances) +void DRW_shgroup_instance_batch(DRWShadingGroup *shgroup, struct Gwn_Batch *batch) { BLI_assert(shgroup->type == DRW_SHG_INSTANCE); - BLI_assert(shgroup->instancing_geom == NULL); + BLI_assert(shgroup->interface.instance_count == 0); + /* You cannot use external instancing batch without a dummy format. */ + BLI_assert(shgroup->instancing_geom != NULL); shgroup->type = DRW_SHG_INSTANCE_EXTERNAL; - shgroup->instancing_geom = instances; + /* PERF : This destroys the vaos cache so better check if it's necessary. */ + /* Note: This WILL break if batch->verts[0] is destroyed and reallocated + * at the same adress. Bindings/VAOs would remain obsolete. */ + //if (shgroup->instancing_geom->inst != batch->verts[0]) + GWN_batch_instbuf_set(shgroup->instancing_geom, batch->verts[0], false); #ifdef USE_GPU_SELECT DRWCall *call = BLI_mempool_alloc(DST.vmempool->calls); @@ -1140,8 +1147,6 @@ void DRW_shgroup_set_instance_count(DRWShadingGroup *shgroup, unsigned int count unsigned int DRW_shgroup_get_instance_count(const DRWShadingGroup *shgroup) { - BLI_assert(shgroup->type != DRW_SHG_NORMAL && shgroup->type != DRW_SHG_INSTANCE_EXTERNAL); - return shgroup->interface.instance_count; } @@ -1765,18 +1770,17 @@ static void draw_geometry_execute_ex( if (geom == NULL) { BLI_assert(shgroup->type == DRW_SHG_TRIANGLE_BATCH); /* Add other type if needed. */ /* Shader is already bound. */ - Gwn_Batch *batch = DRW_cache_fullscreen_quad_get(); - GWN_batch_draw_procedural(batch, GWN_PRIM_TRIS, count); + GWN_draw_primitive(GWN_PRIM_TRIS, count); return; } /* step 2 : bind vertex array & draw */ GWN_batch_program_set(geom, GPU_shader_get_program(shgroup->shader), GPU_shader_get_interface(shgroup->shader)); if (ELEM(shgroup->type, DRW_SHG_INSTANCE, DRW_SHG_INSTANCE_EXTERNAL)) { - GWN_batch_draw_stupid_instanced(geom, shgroup->instancing_geom, start, count); + GWN_batch_draw_range_ex(geom, start, count, true); } else { - GWN_batch_draw_stupid(geom, start, count); + GWN_batch_draw_range(geom, start, count); } /* XXX this just tells gawain we are done with the shader. * This does not unbind the shader. */ @@ -1998,7 +2002,7 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state) if (shgroup->type == DRW_SHG_INSTANCE_EXTERNAL) { if (shgroup->instancing_geom != NULL) { GPU_SELECT_LOAD_IF_PICKSEL((DRWCall *)shgroup->calls_first); - draw_geometry(shgroup, shgroup->instance_geom, obmat, shgroup->instance_data, 0, 0); + draw_geometry(shgroup, shgroup->instancing_geom, obmat, shgroup->instance_data, 0, 0); } } else { @@ -2006,13 +2010,15 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state) unsigned int count, start; GPU_SELECT_LOAD_IF_PICKSEL_LIST(shgroup, start, count) { - draw_geometry(shgroup, shgroup->instance_geom, obmat, shgroup->instance_data, start, count); + draw_geometry(shgroup, + (shgroup->instancing_geom) ? shgroup->instancing_geom : shgroup->instance_geom, + obmat, shgroup->instance_data, start, count); } GPU_SELECT_LOAD_IF_PICKSEL_LIST_END(start, count) } } } - else { + else { /* DRW_SHG_***_BATCH */ /* Some dynamic batch can have no geom (no call to aggregate) */ if (shgroup->interface.instance_count > 0) { unsigned int count, start; diff --git a/source/blender/draw/modes/object_mode.c b/source/blender/draw/modes/object_mode.c index 4a7a5d25b11..d6c0369b0a5 100644 --- a/source/blender/draw/modes/object_mode.c +++ b/source/blender/draw/modes/object_mode.c @@ -218,6 +218,7 @@ typedef struct OBJECT_PrivateData { static struct { /* Instance Data format */ + struct Gwn_VertFormat *particle_format; struct Gwn_VertFormat *empty_image_format; struct Gwn_VertFormat *empty_image_wire_format; @@ -537,6 +538,7 @@ static void OBJECT_engine_init(void *vedata) static void OBJECT_engine_free(void) { + MEM_SAFE_FREE(e_data.particle_format); MEM_SAFE_FREE(e_data.empty_image_format); MEM_SAFE_FREE(e_data.empty_image_wire_format); DRW_SHADER_FREE_SAFE(e_data.outline_resolve_sh); @@ -1752,6 +1754,9 @@ static void OBJECT_cache_populate_particles(Object *ob, static float def_prim_col[3] = {0.5f, 0.5f, 0.5f}; static float def_sec_col[3] = {1.0f, 1.0f, 1.0f}; + /* Dummy particle format for instancing to work. */ + DRW_shgroup_instance_format(e_data.particle_format, {{"dummy", DRW_ATTRIB_FLOAT, 1}}); + Material *ma = give_current_material(ob, part->omat); switch (draw_as) { @@ -1766,21 +1771,24 @@ static void OBJECT_cache_populate_particles(Object *ob, break; case PART_DRAW_CROSS: shgrp = DRW_shgroup_instance_create( - e_data.part_prim_sh, psl->particle, DRW_cache_particles_get_prim(PART_DRAW_CROSS), NULL); + e_data.part_prim_sh, psl->particle, DRW_cache_particles_get_prim(PART_DRAW_CROSS), + e_data.particle_format); DRW_shgroup_uniform_texture(shgrp, "ramp", globals_ramp); DRW_shgroup_uniform_vec3(shgrp, "color", ma ? &ma->r : def_prim_col, 1); DRW_shgroup_uniform_int(shgrp, "screen_space", &screen_space[0], 1); break; case PART_DRAW_CIRC: shgrp = DRW_shgroup_instance_create( - e_data.part_prim_sh, psl->particle, DRW_cache_particles_get_prim(PART_DRAW_CIRC), NULL); + e_data.part_prim_sh, psl->particle, DRW_cache_particles_get_prim(PART_DRAW_CIRC), + e_data.particle_format); DRW_shgroup_uniform_texture(shgrp, "ramp", globals_ramp); DRW_shgroup_uniform_vec3(shgrp, "color", ma ? &ma->r : def_prim_col, 1); DRW_shgroup_uniform_int(shgrp, "screen_space", &screen_space[1], 1); break; case PART_DRAW_AXIS: shgrp = DRW_shgroup_instance_create( - e_data.part_axis_sh, psl->particle, DRW_cache_particles_get_prim(PART_DRAW_AXIS), NULL); + e_data.part_axis_sh, psl->particle, DRW_cache_particles_get_prim(PART_DRAW_AXIS), + e_data.particle_format); DRW_shgroup_uniform_int(shgrp, "screen_space", &screen_space[0], 1); break; default: