GPUBatch: GL backend isolation

This changes the drawing paradigm a bit. The VAO configuration is done JIT-style and depends on context active shader. This is to allow more flexibility for implementations to do optimization at lower level. The vao cache is now its own class to isolate the concept. It is this class that is reference by the GLContext for ownership of the containing VAO ids.
2020-08-11 01:31:40 +02:00
parent 47bfb0f7ad
commit efc97b3919
15 changed files with 626 additions and 496 deletions
--- a/source/blender/gpu/opengl/gl_batch.cc
+++ b/source/blender/gpu/opengl/gl_batch.cc
@@ -29,13 +29,254 @@

 #include "glew-mx.h"

+#include "GPU_extensions.h"
+
 #include "gpu_batch_private.hh"
 #include "gpu_primitive_private.h"
+#include "gpu_shader_private.h"

 #include "gl_batch.hh"
+#include "gl_context.hh"
+#include "gl_vertex_array.hh"

 using namespace blender::gpu;

+/* -------------------------------------------------------------------- */
+/** \name Vao cache
+ *
+ * Each GLBatch has a small cache of VAO objects that are used to avoid VAO reconfiguration.
+ * TODO(fclem) Could be revisited to avoid so much cross references.
+ * \{ */
+
+GLVaoCache::GLVaoCache(void)
+{
+  init();
+}
+
+GLVaoCache::~GLVaoCache()
+{
+  this->clear();
+}
+
+void GLVaoCache::init(void)
+{
+  context_ = NULL;
+  interface_ = NULL;
+  is_dynamic_vao_count = false;
+  for (int i = 0; i < GPU_VAO_STATIC_LEN; i++) {
+    static_vaos.interfaces[i] = NULL;
+    static_vaos.vao_ids[i] = 0;
+  }
+  vao_base_instance_ = 0;
+  base_instance_ = 0;
+}
+
+/* Create a new VAO object and store it in the cache. */
+void GLVaoCache::insert(const GPUShaderInterface *interface, GLuint vao)
+{
+  /* Now insert the cache. */
+  if (!is_dynamic_vao_count) {
+    int i; /* find first unused slot */
+    for (i = 0; i < GPU_VAO_STATIC_LEN; i++) {
+      if (static_vaos.vao_ids[i] == 0) {
+        break;
+      }
+    }
+
+    if (i < GPU_VAO_STATIC_LEN) {
+      static_vaos.interfaces[i] = interface;
+      static_vaos.vao_ids[i] = vao;
+    }
+    else {
+      /* Erase previous entries, they will be added back if drawn again. */
+      for (int i = 0; i < GPU_VAO_STATIC_LEN; i++) {
+        if (static_vaos.interfaces[i] != NULL) {
+          GPU_shaderinterface_remove_batch_ref(
+              const_cast<GPUShaderInterface *>(static_vaos.interfaces[i]), this);
+          context_->vao_free(static_vaos.vao_ids[i]);
+        }
+      }
+      /* Not enough place switch to dynamic. */
+      is_dynamic_vao_count = true;
+      /* Init dynamic arrays and let the branch below set the values. */
+      dynamic_vaos.count = GPU_BATCH_VAO_DYN_ALLOC_COUNT;
+      dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_callocN(
+          dynamic_vaos.count * sizeof(GPUShaderInterface *), "dyn vaos interfaces");
+      dynamic_vaos.vao_ids = (GLuint *)MEM_callocN(dynamic_vaos.count * sizeof(GLuint),
+                                                   "dyn vaos ids");
+    }
+  }
+
+  if (is_dynamic_vao_count) {
+    int i; /* find first unused slot */
+    for (i = 0; i < dynamic_vaos.count; i++) {
+      if (dynamic_vaos.vao_ids[i] == 0) {
+        break;
+      }
+    }
+
+    if (i == dynamic_vaos.count) {
+      /* Not enough place, realloc the array. */
+      i = dynamic_vaos.count;
+      dynamic_vaos.count += GPU_BATCH_VAO_DYN_ALLOC_COUNT;
+      dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_recallocN(
+          (void *)dynamic_vaos.interfaces, sizeof(GPUShaderInterface *) * dynamic_vaos.count);
+      dynamic_vaos.vao_ids = (GLuint *)MEM_recallocN(dynamic_vaos.vao_ids,
+                                                     sizeof(GLuint) * dynamic_vaos.count);
+    }
+    dynamic_vaos.interfaces[i] = interface;
+    dynamic_vaos.vao_ids[i] = vao;
+  }
+
+  GPU_shaderinterface_add_batch_ref(const_cast<GPUShaderInterface *>(interface), this);
+}
+
+void GLVaoCache::remove(const GPUShaderInterface *interface)
+{
+  const int count = (is_dynamic_vao_count) ? dynamic_vaos.count : GPU_VAO_STATIC_LEN;
+  GLuint *vaos = (is_dynamic_vao_count) ? dynamic_vaos.vao_ids : static_vaos.vao_ids;
+  const GPUShaderInterface **interfaces = (is_dynamic_vao_count) ? dynamic_vaos.interfaces :
+                                                                   static_vaos.interfaces;
+  for (int i = 0; i < count; i++) {
+    if (interfaces[i] == interface) {
+      context_->vao_free(vaos[i]);
+      vaos[i] = 0;
+      interfaces[i] = NULL;
+      break; /* cannot have duplicates */
+    }
+  }
+}
+
+void GLVaoCache::clear(void)
+{
+  GLContext *ctx = static_cast<GLContext *>(GPU_context_active_get());
+  const int count = (is_dynamic_vao_count) ? dynamic_vaos.count : GPU_VAO_STATIC_LEN;
+  GLuint *vaos = (is_dynamic_vao_count) ? dynamic_vaos.vao_ids : static_vaos.vao_ids;
+  const GPUShaderInterface **interfaces = (is_dynamic_vao_count) ? dynamic_vaos.interfaces :
+                                                                   static_vaos.interfaces;
+  /* Early out, nothing to free. */
+  if (context_ == NULL) {
+    return;
+  }
+
+  if (context_ == ctx) {
+    glDeleteVertexArrays(count, vaos);
+    glDeleteVertexArrays(1, &vao_base_instance_);
+  }
+  else {
+    /* TODO(fclem) Slow way. Could avoid multiple mutex lock here */
+    for (int i = 0; i < count; i++) {
+      context_->vao_free(vaos[i]);
+    }
+    context_->vao_free(vao_base_instance_);
+  }
+
+  for (int i = 0; i < count; i++) {
+    if (interfaces[i] == NULL) {
+      continue;
+    }
+    GPU_shaderinterface_remove_batch_ref(const_cast<GPUShaderInterface *>(interfaces[i]), this);
+  }
+
+  if (is_dynamic_vao_count) {
+    MEM_freeN((void *)dynamic_vaos.interfaces);
+    MEM_freeN(dynamic_vaos.vao_ids);
+  }
+
+  if (context_) {
+    context_->vao_cache_unregister(this);
+  }
+  /* Reinit. */
+  this->init();
+}
+
+/* Return 0 on cache miss (invalid VAO) */
+GLuint GLVaoCache::lookup(const GPUShaderInterface *interface)
+{
+  const int count = (is_dynamic_vao_count) ? dynamic_vaos.count : GPU_VAO_STATIC_LEN;
+  const GPUShaderInterface **interfaces = (is_dynamic_vao_count) ? dynamic_vaos.interfaces :
+                                                                   static_vaos.interfaces;
+  for (int i = 0; i < count; i++) {
+    if (interfaces[i] == interface) {
+      return (is_dynamic_vao_count) ? dynamic_vaos.vao_ids[i] : static_vaos.vao_ids[i];
+    }
+  }
+  return 0;
+}
+
+/* The GLVaoCache object is only valid for one GLContext.
+ * Reset the cache if trying to draw in another context; */
+void GLVaoCache::context_check(void)
+{
+  GLContext *ctx = static_cast<GLContext *>(GPU_context_active_get());
+  BLI_assert(ctx);
+
+  if (context_ != ctx) {
+    if (context_ != NULL) {
+      /* IMPORTANT: Trying to draw a batch in multiple different context will trash the VAO cache.
+       * This has major performance impact and should be avoided in most cases. */
+      context_->vao_cache_unregister(this);
+    }
+    this->clear();
+    context_ = ctx;
+    context_->vao_cache_register(this);
+  }
+}
+
+GLuint GLVaoCache::base_instance_vao_get(GPUBatch *batch, int i_first)
+{
+  this->context_check();
+  /* Make sure the interface is up to date. */
+  if (interface_ != GPU_context_active_get()->shader->interface) {
+    vao_get(batch);
+    /* Trigger update. */
+    base_instance_ = 0;
+  }
+  /**
+   * There seems to be a nasty bug when drawing using the same VAO reconfiguring (T71147).
+   * We just use a throwaway VAO for that. Note that this is likely to degrade performance.
+   **/
+#ifdef __APPLE__
+  glDeleteVertexArrays(1, &vao_base_instance_);
+  vao_base_instance_ = 0;
+#endif
+
+  if (vao_base_instance_ == 0) {
+    glGenVertexArrays(1, &vao_base_instance_);
+  }
+
+  if (base_instance_ != i_first) {
+    base_instance_ = i_first;
+    GLVertArray::update_bindings(vao_base_instance_, batch, interface_, i_first);
+  }
+  return base_instance_;
+}
+
+GLuint GLVaoCache::vao_get(GPUBatch *batch)
+{
+  this->context_check();
+
+  GPUContext *ctx = GPU_context_active_get();
+  if (interface_ != ctx->shader->interface) {
+    interface_ = ctx->shader->interface;
+    vao_id_ = this->lookup(interface_);
+
+    if (vao_id_ == 0) {
+      /* Cache miss, create a new VAO. */
+      glGenVertexArrays(1, &vao_id_);
+      this->insert(interface_, vao_id_);
+      GLVertArray::update_bindings(vao_id_, batch, interface_, 0);
+    }
+  }
+
+  return vao_id_;
+}
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
+
 GLBatch::GLBatch(void)
 {
 }
@@ -44,7 +285,83 @@ GLBatch::~GLBatch()
 {
 }

+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Drawing
+ * \{ */
+
+#if GPU_TRACK_INDEX_RANGE
+#  define BASE_INDEX(el) ((el)->base_index)
+#  define INDEX_TYPE(el) ((el)->gl_index_type)
+#else
+#  define BASE_INDEX(el) 0
+#  define INDEX_TYPE(el) GL_UNSIGNED_INT
+#endif
+
+void GLBatch::bind(int i_first)
+{
+  if (flag & GPU_BATCH_DIRTY) {
+    vao_cache_.clear();
+  }
+
+#if GPU_TRACK_INDEX_RANGE
+  /* Can be removed if GL 4.3 is required. */
+  if (!GLEW_ARB_ES3_compatibility && (elem != NULL)) {
+    glPrimitiveRestartIndex((elem->index_type == GPU_INDEX_U16) ? 0xFFFFu : 0xFFFFFFFFu);
+  }
+#endif
+
+  /* Can be removed if GL 4.2 is required. */
+  if (!GPU_arb_base_instance_is_supported() && (i_first > 0)) {
+    glBindVertexArray(vao_cache_.base_instance_vao_get(this, i_first));
+  }
+  else {
+    glBindVertexArray(vao_cache_.vao_get(this));
+  }
+}
+
 void GLBatch::draw(int v_first, int v_count, int i_first, int i_count)
 {
-  UNUSED_VARS(v_first, v_count, i_first, i_count);
-}
+  this->bind(i_first);
+
+  GLenum gl_type = convert_prim_type_to_gl(prim_type);
+
+  if (elem) {
+    const GPUIndexBuf *el = elem;
+    GLenum index_type = INDEX_TYPE(el);
+    GLint base_index = BASE_INDEX(el);
+    void *v_first_ofs = (GLuint *)0 + v_first + el->index_start;
+
+#if GPU_TRACK_INDEX_RANGE
+    if (el->index_type == GPU_INDEX_U16) {
+      v_first_ofs = (GLushort *)0 + v_first + el->index_start;
+    }
+#endif
+
+    if (GPU_arb_base_instance_is_supported()) {
+      glDrawElementsInstancedBaseVertexBaseInstance(
+          gl_type, v_count, index_type, v_first_ofs, i_count, base_index, i_first);
+    }
+    else {
+      glDrawElementsInstancedBaseVertex(
+          gl_type, v_count, index_type, v_first_ofs, i_count, base_index);
+    }
+  }
+  else {
+#ifdef __APPLE__
+    glDisable(GL_PRIMITIVE_RESTART);
+#endif
+    if (GPU_arb_base_instance_is_supported()) {
+      glDrawArraysInstancedBaseInstance(gl_type, v_first, v_count, i_count, i_first);
+    }
+    else {
+      glDrawArraysInstanced(gl_type, v_first, v_count, i_count);
+    }
+#ifdef __APPLE__
+    glEnable(GL_PRIMITIVE_RESTART);
+#endif
+  }
+}
+
+/** \} */