Metal: Minimum per-vertex stride, 3D texture size + Transform feedback GPUCapabilities expansion.
- Adding in compatibility paths to support minimum per-vertex strides for vertex formats. OpenGL supports a minimum stride of 1 byte, in Metal, this minimum stride is 4 bytes. Meaing a vertex format must be atleast 4-bytes in size. - Replacing transform feedback compile-time check to conditional look-up, given TF is supported on macOS with Metal. - 3D texture size safety check added as a general capability, rather than being in the gl backend only. Also required for Metal. Authored by Apple: Michael Parkin-White Ref T96261 Reviewed By: fclem Maniphest Tasks: T96261 Differential Revision: https://developer.blender.org/D14510
This commit is contained in:
@@ -30,6 +30,7 @@
|
||||
#include "DEG_depsgraph_query.h"
|
||||
|
||||
#include "GPU_capabilities.h"
|
||||
#include "GPU_context.h"
|
||||
#include "GPU_material.h"
|
||||
#include "GPU_texture.h"
|
||||
#include "eevee_private.h"
|
||||
@@ -82,6 +83,13 @@ void EEVEE_volumes_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
|
||||
tex_size[1] = (int)ceilf(fmaxf(1.0f, viewport_size[1] / (float)tile_size));
|
||||
tex_size[2] = max_ii(scene_eval->eevee.volumetric_samples, 1);
|
||||
|
||||
/* Clamp 3D texture size based on device maximum. */
|
||||
int maxSize = GPU_max_texture_3d_size();
|
||||
BLI_assert(tex_size[0] <= maxSize);
|
||||
tex_size[0] = tex_size[0] > maxSize ? maxSize : tex_size[0];
|
||||
tex_size[1] = tex_size[1] > maxSize ? maxSize : tex_size[1];
|
||||
tex_size[2] = tex_size[2] > maxSize ? maxSize : tex_size[2];
|
||||
|
||||
common_data->vol_coord_scale[0] = viewport_size[0] / (float)(tile_size * tex_size[0]);
|
||||
common_data->vol_coord_scale[1] = viewport_size[1] / (float)(tile_size * tex_size[1]);
|
||||
common_data->vol_coord_scale[2] = 1.0f / viewport_size[0];
|
||||
|
||||
@@ -826,7 +826,8 @@ GPUBatch *DRW_gpencil_dummy_buffer_get(void)
|
||||
{
|
||||
if (SHC.drw_gpencil_dummy_quad == NULL) {
|
||||
GPUVertFormat format = {0};
|
||||
GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT);
|
||||
/* NOTE: Use GPU_COMP_U32 to satisfy minimum 4-byte vertex stride for Metal backend. */
|
||||
GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT);
|
||||
GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
|
||||
GPU_vertbuf_data_alloc(vbo, 4);
|
||||
|
||||
|
||||
@@ -269,7 +269,8 @@ static void curves_batch_cache_ensure_procedural_pos(const Curves &curves,
|
||||
GPU_vertformat_attr_add(&format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
|
||||
GPU_vertformat_alias_add(&format, "pos");
|
||||
|
||||
cache.proc_point_buf = GPU_vertbuf_create_with_format(&format);
|
||||
cache.proc_point_buf = GPU_vertbuf_create_with_format_ex(
|
||||
&format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
GPU_vertbuf_data_alloc(cache.proc_point_buf, cache.point_len);
|
||||
|
||||
MutableSpan posTime_data{
|
||||
@@ -279,7 +280,8 @@ static void curves_batch_cache_ensure_procedural_pos(const Curves &curves,
|
||||
GPUVertFormat length_format = {0};
|
||||
GPU_vertformat_attr_add(&length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
|
||||
|
||||
cache.proc_length_buf = GPU_vertbuf_create_with_format(&length_format);
|
||||
cache.proc_length_buf = GPU_vertbuf_create_with_format_ex(
|
||||
&length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
GPU_vertbuf_data_alloc(cache.proc_length_buf, cache.strands_len);
|
||||
|
||||
MutableSpan hairLength_data{
|
||||
@@ -319,8 +321,8 @@ static void curves_batch_cache_ensure_procedural_final_attr(CurvesEvalCache &cac
|
||||
const char *name)
|
||||
{
|
||||
CurvesEvalFinalCache &final_cache = cache.final[subdiv];
|
||||
final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex(format,
|
||||
GPU_USAGE_DEVICE_ONLY);
|
||||
final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex(
|
||||
format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
|
||||
/* Create a destination buffer for the transform feedback. Sized appropriately */
|
||||
/* Those are points! not line segments. */
|
||||
@@ -351,7 +353,8 @@ static void curves_batch_ensure_attribute(const Curves &curves,
|
||||
/* All attributes use vec4, see comment below. */
|
||||
GPU_vertformat_attr_add(&format, sampler_name, GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
|
||||
|
||||
cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format(&format);
|
||||
cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format_ex(
|
||||
&format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
GPUVertBuf *attr_vbo = cache.proc_attributes_buf[index];
|
||||
|
||||
GPU_vertbuf_data_alloc(attr_vbo,
|
||||
@@ -416,11 +419,13 @@ static void curves_batch_cache_ensure_procedural_strand_data(Curves &curves,
|
||||
uint seg_id = GPU_vertformat_attr_add(&format_seg, "data", GPU_COMP_U16, 1, GPU_FETCH_INT);
|
||||
|
||||
/* Curve Data. */
|
||||
cache.proc_strand_buf = GPU_vertbuf_create_with_format(&format_data);
|
||||
cache.proc_strand_buf = GPU_vertbuf_create_with_format_ex(
|
||||
&format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
GPU_vertbuf_data_alloc(cache.proc_strand_buf, cache.strands_len);
|
||||
GPU_vertbuf_attr_get_raw_data(cache.proc_strand_buf, data_id, &data_step);
|
||||
|
||||
cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg);
|
||||
cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex(
|
||||
&format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
GPU_vertbuf_data_alloc(cache.proc_strand_seg_buf, cache.strands_len);
|
||||
GPU_vertbuf_attr_get_raw_data(cache.proc_strand_seg_buf, seg_id, &seg_step);
|
||||
|
||||
@@ -441,7 +446,8 @@ static void curves_batch_cache_ensure_procedural_final_points(CurvesEvalCache &c
|
||||
GPUVertFormat format = {0};
|
||||
GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
|
||||
|
||||
cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(&format, GPU_USAGE_DEVICE_ONLY);
|
||||
cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(
|
||||
&format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
|
||||
/* Create a destination buffer for the transform feedback. Sized appropriately */
|
||||
/* Those are points! not line segments. */
|
||||
|
||||
@@ -32,6 +32,8 @@
|
||||
#include "ED_particle.h"
|
||||
|
||||
#include "GPU_batch.h"
|
||||
#include "GPU_capabilities.h"
|
||||
#include "GPU_context.h"
|
||||
#include "GPU_material.h"
|
||||
|
||||
#include "DEG_depsgraph_query.h"
|
||||
@@ -808,7 +810,10 @@ static void particle_batch_cache_ensure_procedural_final_points(ParticleHairCach
|
||||
GPUVertFormat format = {0};
|
||||
GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
|
||||
|
||||
cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format(&format);
|
||||
/* Transform feedback buffer only needs to be resident in device memory. */
|
||||
GPUUsageType type = GPU_transform_feedback_support() ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_STATIC;
|
||||
cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(
|
||||
&format, type | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
|
||||
/* Create a destination buffer for the transform feedback. Sized appropriately */
|
||||
/* Those are points! not line segments. */
|
||||
@@ -873,17 +878,20 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
|
||||
memset(cache->uv_layer_names, 0, sizeof(cache->uv_layer_names));
|
||||
|
||||
/* Strand Data */
|
||||
cache->proc_strand_buf = GPU_vertbuf_create_with_format(&format_data);
|
||||
cache->proc_strand_buf = GPU_vertbuf_create_with_format_ex(
|
||||
&format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
GPU_vertbuf_data_alloc(cache->proc_strand_buf, cache->strands_len);
|
||||
GPU_vertbuf_attr_get_raw_data(cache->proc_strand_buf, data_id, &data_step);
|
||||
|
||||
cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg);
|
||||
cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex(
|
||||
&format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
GPU_vertbuf_data_alloc(cache->proc_strand_seg_buf, cache->strands_len);
|
||||
GPU_vertbuf_attr_get_raw_data(cache->proc_strand_seg_buf, seg_id, &seg_step);
|
||||
|
||||
/* UV layers */
|
||||
for (int i = 0; i < cache->num_uv_layers; i++) {
|
||||
cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format(&format_uv);
|
||||
cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format_ex(
|
||||
&format_uv, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
GPU_vertbuf_data_alloc(cache->proc_uv_buf[i], cache->strands_len);
|
||||
GPU_vertbuf_attr_get_raw_data(cache->proc_uv_buf[i], uv_id, &uv_step[i]);
|
||||
|
||||
@@ -913,7 +921,8 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
|
||||
|
||||
/* Vertex colors */
|
||||
for (int i = 0; i < cache->num_col_layers; i++) {
|
||||
cache->proc_col_buf[i] = GPU_vertbuf_create_with_format(&format_col);
|
||||
cache->proc_col_buf[i] = GPU_vertbuf_create_with_format_ex(
|
||||
&format_col, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
GPU_vertbuf_data_alloc(cache->proc_col_buf[i], cache->strands_len);
|
||||
GPU_vertbuf_attr_get_raw_data(cache->proc_col_buf[i], col_id, &col_step[i]);
|
||||
|
||||
@@ -1059,8 +1068,9 @@ static void particle_batch_cache_ensure_procedural_indices(PTCacheEdit *edit,
|
||||
static GPUVertFormat format = {0};
|
||||
GPU_vertformat_clear(&format);
|
||||
|
||||
/* initialize vertex format */
|
||||
GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
|
||||
/* NOTE: initialize vertex format. Using GPU_COMP_U32 to satisfy Metal's 4-byte minimum
|
||||
* stride requirement. */
|
||||
GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
|
||||
|
||||
GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
|
||||
GPU_vertbuf_data_alloc(vbo, 1);
|
||||
@@ -1101,7 +1111,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit,
|
||||
uint pos_id = GPU_vertformat_attr_add(
|
||||
&pos_format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
|
||||
|
||||
cache->proc_point_buf = GPU_vertbuf_create_with_format(&pos_format);
|
||||
cache->proc_point_buf = GPU_vertbuf_create_with_format_ex(
|
||||
&pos_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
GPU_vertbuf_data_alloc(cache->proc_point_buf, cache->point_len);
|
||||
|
||||
GPUVertBufRaw pos_step;
|
||||
@@ -1111,7 +1122,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit,
|
||||
uint length_id = GPU_vertformat_attr_add(
|
||||
&length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
|
||||
|
||||
cache->proc_length_buf = GPU_vertbuf_create_with_format(&length_format);
|
||||
cache->proc_length_buf = GPU_vertbuf_create_with_format_ex(
|
||||
&length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
GPU_vertbuf_data_alloc(cache->proc_length_buf, cache->strands_len);
|
||||
|
||||
GPUVertBufRaw length_step;
|
||||
|
||||
@@ -33,25 +33,17 @@
|
||||
#include "draw_manager.h"
|
||||
#include "draw_shader.h"
|
||||
|
||||
#ifndef __APPLE__
|
||||
# define USE_TRANSFORM_FEEDBACK
|
||||
# define USE_COMPUTE_SHADERS
|
||||
#endif
|
||||
|
||||
BLI_INLINE eParticleRefineShaderType drw_curves_shader_type_get()
|
||||
{
|
||||
#ifdef USE_COMPUTE_SHADERS
|
||||
if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) {
|
||||
return PART_REFINE_SHADER_COMPUTE;
|
||||
}
|
||||
#endif
|
||||
#ifdef USE_TRANSFORM_FEEDBACK
|
||||
return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
|
||||
#endif
|
||||
if (GPU_transform_feedback_support()) {
|
||||
return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
|
||||
}
|
||||
return PART_REFINE_SHADER_TRANSFORM_FEEDBACK_WORKAROUND;
|
||||
}
|
||||
|
||||
#ifndef USE_TRANSFORM_FEEDBACK
|
||||
struct CurvesEvalCall {
|
||||
struct CurvesEvalCall *next;
|
||||
GPUVertBuf *vbo;
|
||||
@@ -63,7 +55,6 @@ static CurvesEvalCall *g_tf_calls = nullptr;
|
||||
static int g_tf_id_offset;
|
||||
static int g_tf_target_width;
|
||||
static int g_tf_target_height;
|
||||
#endif
|
||||
|
||||
static GPUVertBuf *g_dummy_vbo = nullptr;
|
||||
static GPUTexture *g_dummy_texture = nullptr;
|
||||
@@ -106,18 +97,20 @@ void DRW_curves_init(DRWData *drw_data)
|
||||
CurvesUniformBufPool *pool = drw_data->curves_ubos;
|
||||
pool->reset();
|
||||
|
||||
#if defined(USE_TRANSFORM_FEEDBACK) || defined(USE_COMPUTE_SHADERS)
|
||||
g_tf_pass = DRW_pass_create("Update Curves Pass", (DRWState)0);
|
||||
#else
|
||||
g_tf_pass = DRW_pass_create("Update Curves Pass", DRW_STATE_WRITE_COLOR);
|
||||
#endif
|
||||
if (GPU_transform_feedback_support() || GPU_compute_shader_support()) {
|
||||
g_tf_pass = DRW_pass_create("Update Curves Pass", (DRWState)0);
|
||||
}
|
||||
else {
|
||||
g_tf_pass = DRW_pass_create("Update Curves Pass", DRW_STATE_WRITE_COLOR);
|
||||
}
|
||||
|
||||
if (g_dummy_vbo == nullptr) {
|
||||
/* initialize vertex format */
|
||||
GPUVertFormat format = {0};
|
||||
uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
|
||||
|
||||
g_dummy_vbo = GPU_vertbuf_create_with_format(&format);
|
||||
g_dummy_vbo = GPU_vertbuf_create_with_format_ex(
|
||||
&format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
|
||||
const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
GPU_vertbuf_data_alloc(g_dummy_vbo, 1);
|
||||
@@ -201,21 +194,24 @@ static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache,
|
||||
{
|
||||
GPUShader *tf_shader = curves_eval_shader_get(CURVES_EVAL_CATMULL_ROM);
|
||||
|
||||
#ifdef USE_TRANSFORM_FEEDBACK
|
||||
DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, vbo);
|
||||
#else
|
||||
DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
|
||||
DRWShadingGroup *tf_shgrp = nullptr;
|
||||
if (GPU_transform_feedback_support()) {
|
||||
tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, vbo);
|
||||
}
|
||||
else {
|
||||
tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
|
||||
|
||||
CurvesEvalCall *pr_call = MEM_new<CurvesEvalCall>(__func__);
|
||||
pr_call->next = g_tf_calls;
|
||||
pr_call->vbo = vbo;
|
||||
pr_call->shgrp = tf_shgrp;
|
||||
pr_call->vert_len = final_points_len;
|
||||
g_tf_calls = pr_call;
|
||||
DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
|
||||
DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
|
||||
DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
|
||||
#endif
|
||||
CurvesEvalCall *pr_call = MEM_new<CurvesEvalCall>(__func__);
|
||||
pr_call->next = g_tf_calls;
|
||||
pr_call->vbo = vbo;
|
||||
pr_call->shgrp = tf_shgrp;
|
||||
pr_call->vert_len = final_points_len;
|
||||
g_tf_calls = pr_call;
|
||||
DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
|
||||
DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
|
||||
DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
|
||||
}
|
||||
BLI_assert(tf_shgrp != nullptr);
|
||||
|
||||
drw_curves_cache_shgrp_attach_resources(tf_shgrp, cache, tex, subdiv);
|
||||
DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len);
|
||||
@@ -411,82 +407,118 @@ void DRW_curves_update()
|
||||
/* Update legacy hair too, to avoid verbosity in callers. */
|
||||
DRW_hair_update();
|
||||
|
||||
#ifndef USE_TRANSFORM_FEEDBACK
|
||||
/**
|
||||
* Workaround to transform feedback not working on mac.
|
||||
* On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
|
||||
*
|
||||
* So instead of using transform feedback we render to a texture,
|
||||
* read back the result to system memory and re-upload as VBO data.
|
||||
* It is really not ideal performance wise, but it is the simplest
|
||||
* and the most local workaround that still uses the power of the GPU.
|
||||
*/
|
||||
if (!GPU_transform_feedback_support()) {
|
||||
/**
|
||||
* Workaround to transform feedback not working on mac.
|
||||
* On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
|
||||
*
|
||||
* So instead of using transform feedback we render to a texture,
|
||||
* read back the result to system memory and re-upload as VBO data.
|
||||
* It is really not ideal performance wise, but it is the simplest
|
||||
* and the most local workaround that still uses the power of the GPU.
|
||||
*/
|
||||
|
||||
if (g_tf_calls == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Search ideal buffer size. */
|
||||
uint max_size = 0;
|
||||
for (CurvesEvalCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
|
||||
max_size = max_ii(max_size, pr_call->vert_len);
|
||||
}
|
||||
|
||||
/* Create target Texture / Frame-buffer */
|
||||
/* Don't use max size as it can be really heavy and fail.
|
||||
* Do chunks of maximum 2048 * 2048 hair points. */
|
||||
int width = 2048;
|
||||
int height = min_ii(width, 1 + max_size / width);
|
||||
GPUTexture *tex = DRW_texture_pool_query_2d(
|
||||
width, height, GPU_RGBA32F, (DrawEngineType *)DRW_curves_update);
|
||||
g_tf_target_height = height;
|
||||
g_tf_target_width = width;
|
||||
|
||||
GPUFrameBuffer *fb = nullptr;
|
||||
GPU_framebuffer_ensure_config(&fb,
|
||||
{
|
||||
GPU_ATTACHMENT_NONE,
|
||||
GPU_ATTACHMENT_TEXTURE(tex),
|
||||
});
|
||||
|
||||
float *data = static_cast<float *>(
|
||||
MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer"));
|
||||
|
||||
GPU_framebuffer_bind(fb);
|
||||
while (g_tf_calls != nullptr) {
|
||||
CurvesEvalCall *pr_call = g_tf_calls;
|
||||
g_tf_calls = g_tf_calls->next;
|
||||
|
||||
g_tf_id_offset = 0;
|
||||
while (pr_call->vert_len > 0) {
|
||||
int max_read_px_len = min_ii(width * height, pr_call->vert_len);
|
||||
|
||||
DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
|
||||
/* Read back result to main memory. */
|
||||
GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
|
||||
/* Upload back to VBO. */
|
||||
GPU_vertbuf_use(pr_call->vbo);
|
||||
GPU_vertbuf_update_sub(pr_call->vbo,
|
||||
sizeof(float[4]) * g_tf_id_offset,
|
||||
sizeof(float[4]) * max_read_px_len,
|
||||
data);
|
||||
|
||||
g_tf_id_offset += max_read_px_len;
|
||||
pr_call->vert_len -= max_read_px_len;
|
||||
if (g_tf_calls == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
MEM_freeN(pr_call);
|
||||
}
|
||||
/* Search ideal buffer size. */
|
||||
uint max_size = 0;
|
||||
for (CurvesEvalCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
|
||||
max_size = max_ii(max_size, pr_call->vert_len);
|
||||
}
|
||||
|
||||
MEM_freeN(data);
|
||||
GPU_framebuffer_free(fb);
|
||||
#else
|
||||
/* Just render the pass when using compute shaders or transform feedback. */
|
||||
DRW_draw_pass(g_tf_pass);
|
||||
if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
|
||||
GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
|
||||
/* Create target Texture / Frame-buffer */
|
||||
/* Don't use max size as it can be really heavy and fail.
|
||||
* Do chunks of maximum 2048 * 2048 hair points. */
|
||||
int width = 2048;
|
||||
int height = min_ii(width, 1 + max_size / width);
|
||||
GPUTexture *tex = DRW_texture_pool_query_2d(
|
||||
width, height, GPU_RGBA32F, (DrawEngineType *)DRW_curves_update);
|
||||
g_tf_target_height = height;
|
||||
g_tf_target_width = width;
|
||||
|
||||
GPUFrameBuffer *fb = nullptr;
|
||||
GPU_framebuffer_ensure_config(&fb,
|
||||
{
|
||||
GPU_ATTACHMENT_NONE,
|
||||
GPU_ATTACHMENT_TEXTURE(tex),
|
||||
});
|
||||
|
||||
float *data = static_cast<float *>(
|
||||
MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer"));
|
||||
|
||||
GPU_framebuffer_bind(fb);
|
||||
while (g_tf_calls != nullptr) {
|
||||
CurvesEvalCall *pr_call = g_tf_calls;
|
||||
g_tf_calls = g_tf_calls->next;
|
||||
|
||||
g_tf_id_offset = 0;
|
||||
while (pr_call->vert_len > 0) {
|
||||
int max_read_px_len = min_ii(width * height, pr_call->vert_len);
|
||||
|
||||
DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
|
||||
/* Read back result to main memory. */
|
||||
GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
|
||||
/* Upload back to VBO. */
|
||||
GPU_vertbuf_use(pr_call->vbo);
|
||||
GPU_vertbuf_update_sub(pr_call->vbo,
|
||||
sizeof(float[4]) * g_tf_id_offset,
|
||||
sizeof(float[4]) * max_read_px_len,
|
||||
data);
|
||||
|
||||
g_tf_id_offset += max_read_px_len;
|
||||
pr_call->vert_len -= max_read_px_len;
|
||||
}
|
||||
|
||||
MEM_freeN(pr_call);
|
||||
}
|
||||
|
||||
MEM_freeN(data);
|
||||
GPU_framebuffer_free(fb);
|
||||
}
|
||||
else {
|
||||
/* Note(Metal): If compute is not supported, bind a temporary framebuffer to avoid
|
||||
* side-effects from rendering in the active buffer.
|
||||
* We also need to guarantee that a Framebuffer is active to perform any rendering work,
|
||||
* even if there is no output */
|
||||
GPUFrameBuffer *temp_fb = nullptr;
|
||||
GPUFrameBuffer *prev_fb = nullptr;
|
||||
if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) {
|
||||
if (!GPU_compute_shader_support()) {
|
||||
prev_fb = GPU_framebuffer_active_get();
|
||||
char errorOut[256];
|
||||
/* if the framebuffer is invalid we need a dummy framebuffer to be bound. */
|
||||
if (!GPU_framebuffer_check_valid(prev_fb, errorOut)) {
|
||||
int width = 64;
|
||||
int height = 64;
|
||||
GPUTexture *tex = DRW_texture_pool_query_2d(
|
||||
width, height, GPU_DEPTH_COMPONENT32F, (DrawEngineType *)DRW_hair_update);
|
||||
g_tf_target_height = height;
|
||||
g_tf_target_width = width;
|
||||
|
||||
GPU_framebuffer_ensure_config(&temp_fb, {GPU_ATTACHMENT_TEXTURE(tex)});
|
||||
|
||||
GPU_framebuffer_bind(temp_fb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Just render the pass when using compute shaders or transform feedback. */
|
||||
DRW_draw_pass(g_tf_pass);
|
||||
if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
|
||||
GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
|
||||
}
|
||||
|
||||
/* Release temporary framebuffer. */
|
||||
if (temp_fb != nullptr) {
|
||||
GPU_framebuffer_free(temp_fb);
|
||||
}
|
||||
/* Rebind existing framebuffer */
|
||||
if (prev_fb != nullptr) {
|
||||
GPU_framebuffer_bind(prev_fb);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void DRW_curves_free()
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "GPU_batch.h"
|
||||
#include "GPU_capabilities.h"
|
||||
#include "GPU_compute.h"
|
||||
#include "GPU_context.h"
|
||||
#include "GPU_material.h"
|
||||
#include "GPU_shader.h"
|
||||
#include "GPU_texture.h"
|
||||
@@ -33,25 +34,17 @@
|
||||
#include "draw_shader.h"
|
||||
#include "draw_shader_shared.h"
|
||||
|
||||
#ifndef __APPLE__
|
||||
# define USE_TRANSFORM_FEEDBACK
|
||||
# define USE_COMPUTE_SHADERS
|
||||
#endif
|
||||
|
||||
BLI_INLINE eParticleRefineShaderType drw_hair_shader_type_get()
|
||||
{
|
||||
#ifdef USE_COMPUTE_SHADERS
|
||||
if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) {
|
||||
return PART_REFINE_SHADER_COMPUTE;
|
||||
}
|
||||
#endif
|
||||
#ifdef USE_TRANSFORM_FEEDBACK
|
||||
return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
|
||||
#endif
|
||||
if (GPU_transform_feedback_support()) {
|
||||
return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
|
||||
}
|
||||
return PART_REFINE_SHADER_TRANSFORM_FEEDBACK_WORKAROUND;
|
||||
}
|
||||
|
||||
#ifndef USE_TRANSFORM_FEEDBACK
|
||||
struct ParticleRefineCall {
|
||||
struct ParticleRefineCall *next;
|
||||
GPUVertBuf *vbo;
|
||||
@@ -63,7 +56,6 @@ static ParticleRefineCall *g_tf_calls = nullptr;
|
||||
static int g_tf_id_offset;
|
||||
static int g_tf_target_width;
|
||||
static int g_tf_target_height;
|
||||
#endif
|
||||
|
||||
static GPUVertBuf *g_dummy_vbo = nullptr;
|
||||
static GPUTexture *g_dummy_texture = nullptr;
|
||||
@@ -77,18 +69,20 @@ static GPUShader *hair_refine_shader_get(ParticleRefineShader refinement)
|
||||
|
||||
void DRW_hair_init(void)
|
||||
{
|
||||
#if defined(USE_TRANSFORM_FEEDBACK) || defined(USE_COMPUTE_SHADERS)
|
||||
g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_NO_DRAW);
|
||||
#else
|
||||
g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_WRITE_COLOR);
|
||||
#endif
|
||||
if (GPU_transform_feedback_support() || GPU_compute_shader_support()) {
|
||||
g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_NO_DRAW);
|
||||
}
|
||||
else {
|
||||
g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_WRITE_COLOR);
|
||||
}
|
||||
|
||||
if (g_dummy_vbo == nullptr) {
|
||||
/* initialize vertex format */
|
||||
GPUVertFormat format = {0};
|
||||
uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
|
||||
|
||||
g_dummy_vbo = GPU_vertbuf_create_with_format(&format);
|
||||
g_dummy_vbo = GPU_vertbuf_create_with_format_ex(
|
||||
&format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
|
||||
const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
GPU_vertbuf_data_alloc(g_dummy_vbo, 1);
|
||||
@@ -146,22 +140,25 @@ static void drw_hair_particle_cache_update_transform_feedback(ParticleHairCache
|
||||
if (final_points_len > 0) {
|
||||
GPUShader *tf_shader = hair_refine_shader_get(PART_REFINE_CATMULL_ROM);
|
||||
|
||||
#ifdef USE_TRANSFORM_FEEDBACK
|
||||
DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create(
|
||||
tf_shader, g_tf_pass, cache->final[subdiv].proc_buf);
|
||||
#else
|
||||
DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
|
||||
DRWShadingGroup *tf_shgrp = nullptr;
|
||||
if (GPU_transform_feedback_support()) {
|
||||
tf_shgrp = DRW_shgroup_transform_feedback_create(
|
||||
tf_shader, g_tf_pass, cache->final[subdiv].proc_buf);
|
||||
}
|
||||
else {
|
||||
tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
|
||||
|
||||
ParticleRefineCall *pr_call = (ParticleRefineCall *)MEM_mallocN(sizeof(*pr_call), __func__);
|
||||
pr_call->next = g_tf_calls;
|
||||
pr_call->vbo = cache->final[subdiv].proc_buf;
|
||||
pr_call->shgrp = tf_shgrp;
|
||||
pr_call->vert_len = final_points_len;
|
||||
g_tf_calls = pr_call;
|
||||
DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
|
||||
DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
|
||||
DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
|
||||
#endif
|
||||
ParticleRefineCall *pr_call = (ParticleRefineCall *)MEM_mallocN(sizeof(*pr_call), __func__);
|
||||
pr_call->next = g_tf_calls;
|
||||
pr_call->vbo = cache->final[subdiv].proc_buf;
|
||||
pr_call->shgrp = tf_shgrp;
|
||||
pr_call->vert_len = final_points_len;
|
||||
g_tf_calls = pr_call;
|
||||
DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
|
||||
DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
|
||||
DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
|
||||
}
|
||||
BLI_assert(tf_shgrp != nullptr);
|
||||
|
||||
drw_hair_particle_cache_shgrp_attach_resources(tf_shgrp, cache, subdiv);
|
||||
DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len);
|
||||
@@ -306,81 +303,117 @@ DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object,
|
||||
|
||||
void DRW_hair_update()
|
||||
{
|
||||
#ifndef USE_TRANSFORM_FEEDBACK
|
||||
/**
|
||||
* Workaround to transform feedback not working on mac.
|
||||
* On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
|
||||
*
|
||||
* So instead of using transform feedback we render to a texture,
|
||||
* read back the result to system memory and re-upload as VBO data.
|
||||
* It is really not ideal performance wise, but it is the simplest
|
||||
* and the most local workaround that still uses the power of the GPU.
|
||||
*/
|
||||
if (!GPU_transform_feedback_support()) {
|
||||
/**
|
||||
* Workaround to transform feedback not working on mac.
|
||||
* On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
|
||||
*
|
||||
* So instead of using transform feedback we render to a texture,
|
||||
* read back the result to system memory and re-upload as VBO data.
|
||||
* It is really not ideal performance wise, but it is the simplest
|
||||
* and the most local workaround that still uses the power of the GPU.
|
||||
*/
|
||||
|
||||
if (g_tf_calls == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Search ideal buffer size. */
|
||||
uint max_size = 0;
|
||||
for (ParticleRefineCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
|
||||
max_size = max_ii(max_size, pr_call->vert_len);
|
||||
}
|
||||
|
||||
/* Create target Texture / Frame-buffer */
|
||||
/* Don't use max size as it can be really heavy and fail.
|
||||
* Do chunks of maximum 2048 * 2048 hair points. */
|
||||
int width = 2048;
|
||||
int height = min_ii(width, 1 + max_size / width);
|
||||
GPUTexture *tex = DRW_texture_pool_query_2d(
|
||||
width, height, GPU_RGBA32F, (DrawEngineType *)DRW_hair_update);
|
||||
g_tf_target_height = height;
|
||||
g_tf_target_width = width;
|
||||
|
||||
GPUFrameBuffer *fb = nullptr;
|
||||
GPU_framebuffer_ensure_config(&fb,
|
||||
{
|
||||
GPU_ATTACHMENT_NONE,
|
||||
GPU_ATTACHMENT_TEXTURE(tex),
|
||||
});
|
||||
|
||||
float *data = (float *)MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer");
|
||||
|
||||
GPU_framebuffer_bind(fb);
|
||||
while (g_tf_calls != nullptr) {
|
||||
ParticleRefineCall *pr_call = g_tf_calls;
|
||||
g_tf_calls = g_tf_calls->next;
|
||||
|
||||
g_tf_id_offset = 0;
|
||||
while (pr_call->vert_len > 0) {
|
||||
int max_read_px_len = min_ii(width * height, pr_call->vert_len);
|
||||
|
||||
DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
|
||||
/* Read back result to main memory. */
|
||||
GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
|
||||
/* Upload back to VBO. */
|
||||
GPU_vertbuf_use(pr_call->vbo);
|
||||
GPU_vertbuf_update_sub(pr_call->vbo,
|
||||
sizeof(float[4]) * g_tf_id_offset,
|
||||
sizeof(float[4]) * max_read_px_len,
|
||||
data);
|
||||
|
||||
g_tf_id_offset += max_read_px_len;
|
||||
pr_call->vert_len -= max_read_px_len;
|
||||
if (g_tf_calls == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
MEM_freeN(pr_call);
|
||||
}
|
||||
/* Search ideal buffer size. */
|
||||
uint max_size = 0;
|
||||
for (ParticleRefineCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
|
||||
max_size = max_ii(max_size, pr_call->vert_len);
|
||||
}
|
||||
|
||||
MEM_freeN(data);
|
||||
GPU_framebuffer_free(fb);
|
||||
#else
|
||||
/* Just render the pass when using compute shaders or transform feedback. */
|
||||
DRW_draw_pass(g_tf_pass);
|
||||
if (drw_hair_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
|
||||
GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
|
||||
/* Create target Texture / Frame-buffer */
|
||||
/* Don't use max size as it can be really heavy and fail.
|
||||
* Do chunks of maximum 2048 * 2048 hair points. */
|
||||
int width = 2048;
|
||||
int height = min_ii(width, 1 + max_size / width);
|
||||
GPUTexture *tex = DRW_texture_pool_query_2d(
|
||||
width, height, GPU_RGBA32F, (DrawEngineType *)DRW_hair_update);
|
||||
g_tf_target_height = height;
|
||||
g_tf_target_width = width;
|
||||
|
||||
GPUFrameBuffer *fb = nullptr;
|
||||
GPU_framebuffer_ensure_config(&fb,
|
||||
{
|
||||
GPU_ATTACHMENT_NONE,
|
||||
GPU_ATTACHMENT_TEXTURE(tex),
|
||||
});
|
||||
|
||||
float *data = (float *)MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer");
|
||||
|
||||
GPU_framebuffer_bind(fb);
|
||||
while (g_tf_calls != nullptr) {
|
||||
ParticleRefineCall *pr_call = g_tf_calls;
|
||||
g_tf_calls = g_tf_calls->next;
|
||||
|
||||
g_tf_id_offset = 0;
|
||||
while (pr_call->vert_len > 0) {
|
||||
int max_read_px_len = min_ii(width * height, pr_call->vert_len);
|
||||
|
||||
DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
|
||||
/* Read back result to main memory. */
|
||||
GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
|
||||
/* Upload back to VBO. */
|
||||
GPU_vertbuf_use(pr_call->vbo);
|
||||
GPU_vertbuf_update_sub(pr_call->vbo,
|
||||
sizeof(float[4]) * g_tf_id_offset,
|
||||
sizeof(float[4]) * max_read_px_len,
|
||||
data);
|
||||
|
||||
g_tf_id_offset += max_read_px_len;
|
||||
pr_call->vert_len -= max_read_px_len;
|
||||
}
|
||||
|
||||
MEM_freeN(pr_call);
|
||||
}
|
||||
|
||||
MEM_freeN(data);
|
||||
GPU_framebuffer_free(fb);
|
||||
}
|
||||
else {
|
||||
/* Note(Metal): If compute is not supported, bind a temporary framebuffer to avoid
|
||||
* side-effects from rendering in the active buffer.
|
||||
* We also need to guarantee that a Framebuffer is active to perform any rendering work,
|
||||
* even if there is no output */
|
||||
GPUFrameBuffer *temp_fb = nullptr;
|
||||
GPUFrameBuffer *prev_fb = nullptr;
|
||||
if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) {
|
||||
if (!GPU_compute_shader_support()) {
|
||||
prev_fb = GPU_framebuffer_active_get();
|
||||
char errorOut[256];
|
||||
/* if the framebuffer is invalid we need a dummy framebuffer to be bound. */
|
||||
if (!GPU_framebuffer_check_valid(prev_fb, errorOut)) {
|
||||
int width = 64;
|
||||
int height = 64;
|
||||
GPUTexture *tex = DRW_texture_pool_query_2d(
|
||||
width, height, GPU_DEPTH_COMPONENT32F, (DrawEngineType *)DRW_hair_update);
|
||||
g_tf_target_height = height;
|
||||
g_tf_target_width = width;
|
||||
|
||||
GPU_framebuffer_ensure_config(&temp_fb, {GPU_ATTACHMENT_TEXTURE(tex)});
|
||||
|
||||
GPU_framebuffer_bind(temp_fb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Just render the pass when using compute shaders or transform feedback. */
|
||||
DRW_draw_pass(g_tf_pass);
|
||||
if (drw_hair_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
|
||||
GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
|
||||
}
|
||||
|
||||
/* Release temporary framebuffer. */
|
||||
if (temp_fb != nullptr) {
|
||||
GPU_framebuffer_free(temp_fb);
|
||||
}
|
||||
/* Rebind existing framebuffer */
|
||||
if (prev_fb != nullptr) {
|
||||
GPU_framebuffer_bind(prev_fb);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void DRW_hair_free(void)
|
||||
|
||||
@@ -174,7 +174,7 @@ static void extract_edge_fac_finish(const MeshRenderData *mr,
|
||||
GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
|
||||
MeshExtract_EdgeFac_Data *data = static_cast<MeshExtract_EdgeFac_Data *>(_data);
|
||||
|
||||
if (GPU_crappy_amd_driver()) {
|
||||
if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) {
|
||||
/* Some AMD drivers strangely crash with VBO's with a one byte format.
|
||||
* To workaround we reinitialize the VBO with another format and convert
|
||||
* all bytes to floats. */
|
||||
@@ -206,7 +206,7 @@ static GPUVertFormat *get_subdiv_edge_fac_format()
|
||||
{
|
||||
static GPUVertFormat format = {0};
|
||||
if (format.attr_len == 0) {
|
||||
if (GPU_crappy_amd_driver()) {
|
||||
if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) {
|
||||
GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
|
||||
}
|
||||
else {
|
||||
@@ -268,7 +268,7 @@ static void extract_edge_fac_loose_geom_subdiv(const DRWSubdivCache *subdiv_cach
|
||||
|
||||
uint offset = subdiv_cache->num_subdiv_loops;
|
||||
for (int i = 0; i < loose_geom.edge_len; i++) {
|
||||
if (GPU_crappy_amd_driver()) {
|
||||
if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) {
|
||||
float loose_edge_fac[2] = {1.0f, 1.0f};
|
||||
GPU_vertbuf_update_sub(vbo, offset * sizeof(float), sizeof(loose_edge_fac), loose_edge_fac);
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
int GPU_max_texture_size(void);
|
||||
int GPU_max_texture_3d_size(void);
|
||||
int GPU_max_texture_layers(void);
|
||||
int GPU_max_textures(void);
|
||||
int GPU_max_textures_vert(void);
|
||||
@@ -31,6 +32,7 @@ int GPU_max_vertex_attribs(void);
|
||||
int GPU_max_varying_floats(void);
|
||||
int GPU_max_shader_storage_buffer_bindings(void);
|
||||
int GPU_max_compute_shader_storage_blocks(void);
|
||||
int GPU_max_samplers(void);
|
||||
|
||||
int GPU_extensions_len(void);
|
||||
const char *GPU_extension_get(int i);
|
||||
@@ -57,6 +59,9 @@ void GPU_mem_stats_get(int *totalmem, int *freemem);
|
||||
*/
|
||||
bool GPU_stereo_quadbuffer_support(void);
|
||||
|
||||
int GPU_minimum_per_vertex_stride(void);
|
||||
bool GPU_transform_feedback_support(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -40,12 +40,20 @@ extern "C" {
|
||||
|
||||
typedef enum {
|
||||
/* can be extended to support more types */
|
||||
GPU_USAGE_STREAM,
|
||||
GPU_USAGE_STATIC, /* do not keep data in memory */
|
||||
GPU_USAGE_DYNAMIC,
|
||||
GPU_USAGE_DEVICE_ONLY, /* Do not do host->device data transfers. */
|
||||
GPU_USAGE_STREAM = 0,
|
||||
GPU_USAGE_STATIC = 1, /* do not keep data in memory */
|
||||
GPU_USAGE_DYNAMIC = 2,
|
||||
GPU_USAGE_DEVICE_ONLY = 3, /* Do not do host->device data transfers. */
|
||||
|
||||
/** Extended usage flags. */
|
||||
/* Flag for vertex buffers used for textures. Skips additional padding/compaction to ensure
|
||||
* format matches the texture exactly. Can be masked with other properties, and is stripped
|
||||
* during VertBuf::init. */
|
||||
GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY = 1 << 3,
|
||||
} GPUUsageType;
|
||||
|
||||
ENUM_OPERATORS(GPUUsageType, GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
|
||||
|
||||
/** Opaque type hiding blender::gpu::VertBuf. */
|
||||
typedef struct GPUVertBuf GPUVertBuf;
|
||||
|
||||
|
||||
@@ -33,6 +33,11 @@ int GPU_max_texture_size()
|
||||
return GCaps.max_texture_size;
|
||||
}
|
||||
|
||||
int GPU_max_texture_3d_size(void)
|
||||
{
|
||||
return GCaps.max_texture_3d_size;
|
||||
}
|
||||
|
||||
int GPU_texture_size_with_limit(int res)
|
||||
{
|
||||
int size = GPU_max_texture_size();
|
||||
@@ -115,6 +120,11 @@ const char *GPU_extension_get(int i)
|
||||
return GCaps.extension_get ? GCaps.extension_get(i) : "\0";
|
||||
}
|
||||
|
||||
int GPU_max_samplers()
|
||||
{
|
||||
return GCaps.max_samplers;
|
||||
}
|
||||
|
||||
bool GPU_mip_render_workaround()
|
||||
{
|
||||
return GCaps.mip_render_workaround;
|
||||
@@ -176,6 +186,16 @@ int GPU_max_compute_shader_storage_blocks()
|
||||
return GCaps.max_compute_shader_storage_blocks;
|
||||
}
|
||||
|
||||
int GPU_minimum_per_vertex_stride(void)
|
||||
{
|
||||
return GCaps.minimum_per_vertex_stride;
|
||||
}
|
||||
|
||||
bool GPU_transform_feedback_support(void)
|
||||
{
|
||||
return GCaps.transform_feedback_support;
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
|
||||
@@ -360,6 +360,13 @@ GPUTexture *GPU_texture_create_compressed_2d(
|
||||
|
||||
GPUTexture *GPU_texture_create_from_vertbuf(const char *name, GPUVertBuf *vert)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
/* Vertex buffers used for texture buffers must be flagged with:
|
||||
* GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY. */
|
||||
BLI_assert_msg(unwrap(vert)->extended_usage_ & GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY,
|
||||
"Vertex Buffers used for textures should have usage flag "
|
||||
"GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY.");
|
||||
#endif
|
||||
eGPUTextureFormat tex_format = to_texture_format(GPU_vertbuf_get_format(vert));
|
||||
Texture *tex = GPUBackend::get()->texture_alloc(name);
|
||||
|
||||
|
||||
@@ -40,10 +40,21 @@ VertBuf::~VertBuf()
|
||||
|
||||
void VertBuf::init(const GPUVertFormat *format, GPUUsageType usage)
|
||||
{
|
||||
usage_ = usage;
|
||||
/* Strip extended usage flags. */
|
||||
usage_ = usage & ~GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY;
|
||||
#ifndef NDEBUG
|
||||
/* Store extended usage. */
|
||||
extended_usage_ = usage;
|
||||
#endif
|
||||
flag = GPU_VERTBUF_DATA_DIRTY;
|
||||
GPU_vertformat_copy(&this->format, format);
|
||||
if (!format->packed) {
|
||||
/* Avoid packing vertex formats which are used for texture buffers.
|
||||
* These cases use singular types and do not need packing. They must
|
||||
* also not have increased alignment padding to the minimum per-vertex stride. */
|
||||
if (usage & GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY) {
|
||||
VertexFormat_texture_buffer_pack(&this->format);
|
||||
}
|
||||
if (!this->format.packed) {
|
||||
VertexFormat_pack(&this->format);
|
||||
}
|
||||
flag |= GPU_VERTBUF_INIT;
|
||||
@@ -62,6 +73,10 @@ VertBuf *VertBuf::duplicate()
|
||||
*dst = *this;
|
||||
/* Almost full copy... */
|
||||
dst->handle_refcount_ = 1;
|
||||
/* Metadata. */
|
||||
#ifndef NDEBUG
|
||||
dst->extended_usage_ = extended_usage_;
|
||||
#endif
|
||||
/* Duplicate all needed implementation specifics data. */
|
||||
this->duplicate_data(dst);
|
||||
return dst;
|
||||
@@ -192,6 +207,7 @@ void GPU_vertbuf_data_len_set(GPUVertBuf *verts_, uint v_len)
|
||||
void GPU_vertbuf_attr_set(GPUVertBuf *verts_, uint a_idx, uint v_idx, const void *data)
|
||||
{
|
||||
VertBuf *verts = unwrap(verts_);
|
||||
BLI_assert(verts->get_usage_type() != GPU_USAGE_DEVICE_ONLY);
|
||||
const GPUVertFormat *format = &verts->format;
|
||||
const GPUVertAttr *a = &format->attrs[a_idx];
|
||||
BLI_assert(v_idx < verts->vertex_alloc);
|
||||
@@ -215,6 +231,7 @@ void GPU_vertbuf_attr_fill(GPUVertBuf *verts_, uint a_idx, const void *data)
|
||||
void GPU_vertbuf_vert_set(GPUVertBuf *verts_, uint v_idx, const void *data)
|
||||
{
|
||||
VertBuf *verts = unwrap(verts_);
|
||||
BLI_assert(verts->get_usage_type() != GPU_USAGE_DEVICE_ONLY);
|
||||
const GPUVertFormat *format = &verts->format;
|
||||
BLI_assert(v_idx < verts->vertex_alloc);
|
||||
BLI_assert(verts->data != nullptr);
|
||||
@@ -225,6 +242,7 @@ void GPU_vertbuf_vert_set(GPUVertBuf *verts_, uint v_idx, const void *data)
|
||||
void GPU_vertbuf_attr_fill_stride(GPUVertBuf *verts_, uint a_idx, uint stride, const void *data)
|
||||
{
|
||||
VertBuf *verts = unwrap(verts_);
|
||||
BLI_assert(verts->get_usage_type() != GPU_USAGE_DEVICE_ONLY);
|
||||
const GPUVertFormat *format = &verts->format;
|
||||
const GPUVertAttr *a = &format->attrs[a_idx];
|
||||
BLI_assert(a_idx < format->attr_len);
|
||||
|
||||
@@ -31,6 +31,11 @@ class VertBuf {
|
||||
/** NULL indicates data in VRAM (unmapped) */
|
||||
uchar *data = nullptr;
|
||||
|
||||
#ifndef NDEBUG
|
||||
/** Usage including extended usage flags. */
|
||||
GPUUsageType extended_usage_ = GPU_USAGE_STATIC;
|
||||
#endif
|
||||
|
||||
protected:
|
||||
/** Usage hint for GL optimization. */
|
||||
GPUUsageType usage_ = GPU_USAGE_STATIC;
|
||||
@@ -83,6 +88,11 @@ class VertBuf {
|
||||
}
|
||||
}
|
||||
|
||||
GPUUsageType get_usage_type() const
|
||||
{
|
||||
return usage_;
|
||||
}
|
||||
|
||||
virtual void update_sub(uint start, uint len, const void *data) = 0;
|
||||
virtual const void *read() const = 0;
|
||||
virtual void *unmap(const void *mapped_data) const = 0;
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
*/
|
||||
|
||||
#include "GPU_vertex_format.h"
|
||||
#include "GPU_capabilities.h"
|
||||
|
||||
#include "gpu_shader_create_info.hh"
|
||||
#include "gpu_shader_private.hh"
|
||||
#include "gpu_vertex_format_private.h"
|
||||
@@ -68,7 +70,7 @@ static uint attr_size(const GPUVertAttr *a)
|
||||
return a->comp_len * comp_size(static_cast<GPUVertCompType>(a->comp_type));
|
||||
}
|
||||
|
||||
static uint attr_align(const GPUVertAttr *a)
|
||||
static uint attr_align(const GPUVertAttr *a, uint minimum_stride)
|
||||
{
|
||||
if (a->comp_type == GPU_COMP_I10) {
|
||||
return 4; /* always packed as 10_10_10_2 */
|
||||
@@ -78,7 +80,10 @@ static uint attr_align(const GPUVertAttr *a)
|
||||
return 4 * c; /* AMD HW can't fetch these well, so pad it out (other vendors too?) */
|
||||
}
|
||||
|
||||
return c; /* most fetches are ok if components are naturally aligned */
|
||||
/* Most fetches are ok if components are naturally aligned.
|
||||
* However, in Metal,the minimum supported per-vertex stride is 4,
|
||||
* so we must query the GPU and pad out the size accordingly. */
|
||||
return max_ii(minimum_stride, c);
|
||||
}
|
||||
|
||||
uint vertex_buffer_size(const GPUVertFormat *format, uint vertex_len)
|
||||
@@ -308,7 +313,7 @@ static void show_pack(uint a_idx, uint size, uint pad)
|
||||
}
|
||||
#endif
|
||||
|
||||
void VertexFormat_pack(GPUVertFormat *format)
|
||||
static void VertexFormat_pack_impl(GPUVertFormat *format, uint minimum_stride)
|
||||
{
|
||||
GPUVertAttr *a0 = &format->attrs[0];
|
||||
a0->offset = 0;
|
||||
@@ -320,7 +325,7 @@ void VertexFormat_pack(GPUVertFormat *format)
|
||||
|
||||
for (uint a_idx = 1; a_idx < format->attr_len; a_idx++) {
|
||||
GPUVertAttr *a = &format->attrs[a_idx];
|
||||
uint mid_padding = padding(offset, attr_align(a));
|
||||
uint mid_padding = padding(offset, attr_align(a, minimum_stride));
|
||||
offset += mid_padding;
|
||||
a->offset = offset;
|
||||
offset += a->size;
|
||||
@@ -330,7 +335,7 @@ void VertexFormat_pack(GPUVertFormat *format)
|
||||
#endif
|
||||
}
|
||||
|
||||
uint end_padding = padding(offset, attr_align(a0));
|
||||
uint end_padding = padding(offset, attr_align(a0, minimum_stride));
|
||||
|
||||
#if PACK_DEBUG
|
||||
show_pack(0, 0, end_padding);
|
||||
|
||||
@@ -16,6 +16,7 @@ extern "C" {
|
||||
struct GPUVertFormat;
|
||||
|
||||
void VertexFormat_pack(struct GPUVertFormat *format);
|
||||
void VertexFormat_texture_buffer_pack(struct GPUVertFormat *format);
|
||||
uint padding(uint offset, uint alignment);
|
||||
uint vertex_buffer_size(const struct GPUVertFormat *format, uint vertex_len);
|
||||
|
||||
|
||||
@@ -386,6 +386,11 @@ static void detect_workarounds()
|
||||
}
|
||||
}
|
||||
|
||||
/* Disable TF on macOS. */
|
||||
if (GPU_type_matches(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY)) {
|
||||
GCaps.transform_feedback_support = false;
|
||||
}
|
||||
|
||||
/* Some Intel drivers have issues with using mips as frame-buffer targets if
|
||||
* GL_TEXTURE_MAX_LEVEL is higher than the target MIP.
|
||||
* Only check at the end after all other workarounds because this uses the drawing code.
|
||||
@@ -431,7 +436,6 @@ static void detect_workarounds()
|
||||
/** Internal capabilities. */
|
||||
|
||||
GLint GLContext::max_cubemap_size = 0;
|
||||
GLint GLContext::max_texture_3d_size = 0;
|
||||
GLint GLContext::max_ubo_binds = 0;
|
||||
GLint GLContext::max_ubo_size = 0;
|
||||
GLint GLContext::max_ssbo_binds = 0;
|
||||
@@ -499,6 +503,8 @@ void GLBackend::capabilities_init()
|
||||
GCaps.shader_draw_parameters_support = epoxy_has_gl_extension("GL_ARB_shader_draw_parameters");
|
||||
GCaps.compute_shader_support = epoxy_has_gl_extension("GL_ARB_compute_shader") &&
|
||||
epoxy_gl_version() >= 43;
|
||||
GCaps.max_samplers = GCaps.max_textures;
|
||||
|
||||
if (GCaps.compute_shader_support) {
|
||||
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &GCaps.max_work_group_count[0]);
|
||||
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &GCaps.max_work_group_count[1]);
|
||||
@@ -512,8 +518,10 @@ void GLBackend::capabilities_init()
|
||||
}
|
||||
GCaps.shader_storage_buffer_objects_support = epoxy_has_gl_extension(
|
||||
"GL_ARB_shader_storage_buffer_object");
|
||||
GCaps.transform_feedback_support = true;
|
||||
|
||||
/* GL specific capabilities. */
|
||||
glGetIntegerv(GL_MAX_3D_TEXTURE_SIZE, &GLContext::max_texture_3d_size);
|
||||
glGetIntegerv(GL_MAX_3D_TEXTURE_SIZE, &GCaps.max_texture_3d_size);
|
||||
glGetIntegerv(GL_MAX_CUBE_MAP_TEXTURE_SIZE, &GLContext::max_cubemap_size);
|
||||
glGetIntegerv(GL_MAX_FRAGMENT_UNIFORM_BLOCKS, &GLContext::max_ubo_binds);
|
||||
glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &GLContext::max_ubo_size);
|
||||
|
||||
@@ -40,7 +40,6 @@ class GLContext : public Context {
|
||||
/** Capabilities. */
|
||||
|
||||
static GLint max_cubemap_size;
|
||||
static GLint max_texture_3d_size;
|
||||
static GLint max_ubo_size;
|
||||
static GLint max_ubo_binds;
|
||||
static GLint max_ssbo_size;
|
||||
|
||||
@@ -603,7 +603,7 @@ bool GLTexture::proxy_check(int mip)
|
||||
{
|
||||
/* Manual validation first, since some implementation have issues with proxy creation. */
|
||||
int max_size = GPU_max_texture_size();
|
||||
int max_3d_size = GLContext::max_texture_3d_size;
|
||||
int max_3d_size = GPU_max_texture_3d_size();
|
||||
int max_cube_size = GLContext::max_cubemap_size;
|
||||
int size[3] = {1, 1, 1};
|
||||
this->mip_size_get(mip, size);
|
||||
|
||||
Reference in New Issue
Block a user