Metal: Optimize SSR and texture flags #106221

Closed
Jason Fielder wants to merge 3 commits from Jason-Fielder/blender:MetalOptimizations_SSR_TexFlags_Mar3 into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
17 changed files with 217 additions and 125 deletions

View File

@ -42,8 +42,9 @@ int EEVEE_bloom_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *vedata)
effects->blit_texel_size[0] = 1.0f / (float)blitsize[0];
effects->blit_texel_size[1] = 1.0f / (float)blitsize[1];
effects->bloom_blit = DRW_texture_pool_query_2d(
blitsize[0], blitsize[1], GPU_R11F_G11F_B10F, &draw_engine_eevee_type);
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
effects->bloom_blit = DRW_texture_pool_query_2d_ex(
blitsize[0], blitsize[1], GPU_R11F_G11F_B10F, usage, &draw_engine_eevee_type);
GPU_framebuffer_ensure_config(
&fbl->bloom_blit_fb, {GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(effects->bloom_blit)});
@ -83,8 +84,11 @@ int EEVEE_bloom_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *vedata)
effects->downsamp_texel_size[i][0] = 1.0f / (float)texsize[0];
effects->downsamp_texel_size[i][1] = 1.0f / (float)texsize[1];
effects->bloom_downsample[i] = DRW_texture_pool_query_2d(
texsize[0], texsize[1], GPU_R11F_G11F_B10F, &draw_engine_eevee_type);
eGPUTextureUsage downsample_usage = GPU_TEXTURE_USAGE_SHADER_READ |
GPU_TEXTURE_USAGE_ATTACHMENT |
GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW;
effects->bloom_downsample[i] = DRW_texture_pool_query_2d_ex(
texsize[0], texsize[1], GPU_R11F_G11F_B10F, downsample_usage, &draw_engine_eevee_type);
GPU_framebuffer_ensure_config(
&fbl->bloom_down_fb[i],
{GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(effects->bloom_downsample[i])});
@ -99,8 +103,13 @@ int EEVEE_bloom_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *vedata)
texsize[0] = MAX2(texsize[0], 2);
texsize[1] = MAX2(texsize[1], 2);
effects->bloom_upsample[i] = DRW_texture_pool_query_2d(
texsize[0], texsize[1], GPU_R11F_G11F_B10F, &draw_engine_eevee_type);
eGPUTextureUsage upsample_usage = GPU_TEXTURE_USAGE_SHADER_READ |
GPU_TEXTURE_USAGE_ATTACHMENT |
GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW;
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
effects->bloom_upsample[i] = DRW_texture_pool_query_2d_ex(
texsize[0], texsize[1], GPU_R11F_G11F_B10F, upsample_usage, &draw_engine_eevee_type);
GPU_framebuffer_ensure_config(
&fbl->bloom_accum_fb[i],
{GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(effects->bloom_upsample[i])});

View File

@ -574,7 +574,8 @@ static void dof_reduce_pass_init(EEVEE_FramebufferList *fbl,
DRW_shgroup_call_procedural_triangles(grp, NULL, 1);
void *owner = (void *)&EEVEE_depth_of_field_init;
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT |
GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW;
fx->dof_downsample_tx = DRW_texture_pool_query_2d_ex(
UNPACK2(quater_res), COLOR_FORMAT, usage, owner);

View File

@ -118,8 +118,13 @@ void EEVEE_effects_init(EEVEE_ViewLayerData *sldata,
});
}
else {
DRW_texture_ensure_2d(
&txl->maxzbuffer, UNPACK2(effects->hiz_size), GPU_DEPTH_COMPONENT24, DRW_TEX_MIPMAP);
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ |
GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW;
DRW_texture_ensure_2d_ex(&txl->maxzbuffer,
UNPACK2(effects->hiz_size),
GPU_DEPTH_COMPONENT24,
usage,
DRW_TEX_MIPMAP);
GPU_framebuffer_ensure_config(&fbl->maxzbuffer_fb,
{
GPU_ATTACHMENT_TEXTURE(txl->maxzbuffer),
@ -146,10 +151,13 @@ void EEVEE_effects_init(EEVEE_ViewLayerData *sldata,
* Used for SSReflections & SSRefractions.
*/
if ((effects->enabled_effects & EFFECT_RADIANCE_BUFFER) != 0) {
DRW_texture_ensure_2d(&txl->filtered_radiance,
UNPACK2(effects->hiz_size),
GPU_R11F_G11F_B10F,
DRW_TEX_FILTER | DRW_TEX_MIPMAP);
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ |
GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW;
DRW_texture_ensure_2d_ex(&txl->filtered_radiance,
UNPACK2(effects->hiz_size),
GPU_R11F_G11F_B10F,
usage,
DRW_TEX_FILTER | DRW_TEX_MIPMAP);
GPU_framebuffer_ensure_config(&fbl->radiance_filtered_fb,
{
@ -166,8 +174,9 @@ void EEVEE_effects_init(EEVEE_ViewLayerData *sldata,
* Normal buffer for deferred passes.
*/
if ((effects->enabled_effects & EFFECT_NORMAL_BUFFER) != 0) {
effects->ssr_normal_input = DRW_texture_pool_query_2d(
size_fs[0], size_fs[1], GPU_RG16, &draw_engine_eevee_type);
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
effects->ssr_normal_input = DRW_texture_pool_query_2d_ex(
size_fs[0], size_fs[1], GPU_RG16, usage, &draw_engine_eevee_type);
GPU_framebuffer_texture_attach(fbl->main_fb, effects->ssr_normal_input, 1, 0);
}
@ -179,8 +188,9 @@ void EEVEE_effects_init(EEVEE_ViewLayerData *sldata,
* Motion vector buffer for correct TAA / motion blur.
*/
if ((effects->enabled_effects & EFFECT_VELOCITY_BUFFER) != 0) {
effects->velocity_tx = DRW_texture_pool_query_2d(
size_fs[0], size_fs[1], GPU_RGBA16, &draw_engine_eevee_type);
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
effects->velocity_tx = DRW_texture_pool_query_2d_ex(
size_fs[0], size_fs[1], GPU_RGBA16, usage, &draw_engine_eevee_type);
GPU_framebuffer_ensure_config(&fbl->velocity_fb,
{

View File

@ -101,22 +101,28 @@ static void planar_pool_ensure_alloc(EEVEE_Data *vedata, int num_planar_ref)
/* We need an Array texture so allocate it ourself */
if (!txl->planar_pool) {
eGPUTextureUsage planar_usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ |
GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW;
eGPUTextureUsage planar_usage_depth = GPU_TEXTURE_USAGE_ATTACHMENT |
GPU_TEXTURE_USAGE_SHADER_READ;
if (num_planar_ref > 0) {
txl->planar_pool = DRW_texture_create_2d_array(width,
height,
num_planar_ref,
GPU_R11F_G11F_B10F,
DRW_TEX_FILTER | DRW_TEX_MIPMAP,
NULL);
txl->planar_depth = DRW_texture_create_2d_array(
width, height, num_planar_ref, GPU_DEPTH_COMPONENT24, 0, NULL);
txl->planar_pool = DRW_texture_create_2d_array_ex(width,
height,
num_planar_ref,
GPU_R11F_G11F_B10F,
planar_usage,
DRW_TEX_FILTER | DRW_TEX_MIPMAP,
NULL);
txl->planar_depth = DRW_texture_create_2d_array_ex(
width, height, num_planar_ref, GPU_DEPTH_COMPONENT24, planar_usage_depth, 0, NULL);
}
else if (num_planar_ref == 0) {
/* Makes Opengl Happy : Create a placeholder texture that will never be sampled but still
* bound to shader. */
txl->planar_pool = DRW_texture_create_2d_array(
1, 1, 1, GPU_RGBA8, DRW_TEX_FILTER | DRW_TEX_MIPMAP, NULL);
txl->planar_depth = DRW_texture_create_2d_array(1, 1, 1, GPU_DEPTH_COMPONENT24, 0, NULL);
txl->planar_pool = DRW_texture_create_2d_array_ex(
1, 1, 1, GPU_RGBA8, planar_usage, DRW_TEX_FILTER | DRW_TEX_MIPMAP, NULL);
txl->planar_depth = DRW_texture_create_2d_array_ex(
1, 1, 1, GPU_DEPTH_COMPONENT24, planar_usage_depth, 0, NULL);
}
}
}
@ -182,8 +188,10 @@ void EEVEE_lightprobes_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
/* Placeholder planar pool: used when rendering planar reflections (avoid dependency loop). */
if (!e_data.planar_pool_placeholder) {
e_data.planar_pool_placeholder = DRW_texture_create_2d_array(
1, 1, 1, GPU_RGBA8, DRW_TEX_FILTER, NULL);
eGPUTextureUsage planar_usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ |
GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW;
e_data.planar_pool_placeholder = DRW_texture_create_2d_array_ex(
1, 1, 1, GPU_RGBA8, planar_usage, DRW_TEX_FILTER, NULL);
}
}

View File

@ -197,8 +197,9 @@ static void eevee_init_util_texture(void)
texels_layer += 64 * 64;
}
e_data.util_tex = DRW_texture_create_2d_array(
64, 64, layers, GPU_RGBA16F, DRW_TEX_FILTER | DRW_TEX_WRAP, (float *)texels);
eGPUTextureUsage util_usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ;
e_data.util_tex = DRW_texture_create_2d_array_ex(
64, 64, layers, GPU_RGBA16F, util_usage, DRW_TEX_FILTER | DRW_TEX_WRAP, (float *)texels);
MEM_freeN(texels);
#if RUNTIME_LUT_CREATION

View File

@ -36,8 +36,10 @@ int EEVEE_occlusion_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
const Scene *scene_eval = DEG_get_evaluated_scene(draw_ctx->depsgraph);
if (!e_data.dummy_horizon_tx) {
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ;
const float pixel[4] = {0.0f, 0.0f, 0.0f, 0.0f};
e_data.dummy_horizon_tx = DRW_texture_create_2d(1, 1, GPU_RGBA8, DRW_TEX_WRAP, pixel);
e_data.dummy_horizon_tx = DRW_texture_create_2d_ex(
1, 1, GPU_RGBA8, usage, DRW_TEX_WRAP, pixel);
}
if (scene_eval->eevee.flag & SCE_EEVEE_GTAO_ENABLED ||
@ -61,8 +63,9 @@ int EEVEE_occlusion_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
common_data->ao_bounce_fac = (scene_eval->eevee.flag & SCE_EEVEE_GTAO_BOUNCE) ? 1.0f : 0.0f;
effects->gtao_horizons_renderpass = DRW_texture_pool_query_2d(
UNPACK2(effects->hiz_size), GPU_RGBA8, &draw_engine_eevee_type);
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ;
effects->gtao_horizons_renderpass = DRW_texture_pool_query_2d_ex(
UNPACK2(effects->hiz_size), GPU_RGBA8, usage, &draw_engine_eevee_type);
GPU_framebuffer_ensure_config(
&fbl->gtao_fb,
{GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(effects->gtao_horizons_renderpass)});

View File

@ -72,13 +72,17 @@ int EEVEE_screen_raytrace_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
common_data->ssr_uv_scale[1] = size_fs[1] / ((float)tracing_res[1] * divisor);
/* MRT for the shading pass in order to output needed data for the SSR pass. */
effects->ssr_specrough_input = DRW_texture_pool_query_2d(UNPACK2(size_fs), format, owner);
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
effects->ssr_specrough_input = DRW_texture_pool_query_2d_ex(
UNPACK2(size_fs), format, usage, owner);
GPU_framebuffer_texture_attach(fbl->main_fb, effects->ssr_specrough_input, 2, 0);
/* Ray-tracing output. */
effects->ssr_hit_output = DRW_texture_pool_query_2d(UNPACK2(tracing_res), GPU_RGBA16F, owner);
effects->ssr_hit_depth = DRW_texture_pool_query_2d(UNPACK2(tracing_res), GPU_R16F, owner);
effects->ssr_hit_output = DRW_texture_pool_query_2d_ex(
UNPACK2(tracing_res), GPU_RGBA16F, usage, owner);
effects->ssr_hit_depth = DRW_texture_pool_query_2d_ex(
UNPACK2(tracing_res), GPU_R16F, usage, owner);
GPU_framebuffer_ensure_config(&fbl->screen_tracing_fb,
{

View File

@ -213,22 +213,26 @@ void EEVEE_shadows_update(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
linfo->cache_num_cascade_layer = linfo->num_cascade_layer;
}
eGPUTextureUsage shadow_usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ;
if (!sldata->shadow_cube_pool) {
sldata->shadow_cube_pool = DRW_texture_create_2d_array(linfo->shadow_cube_size,
linfo->shadow_cube_size,
max_ii(1, linfo->num_cube_layer * 6),
shadow_pool_format,
DRW_TEX_FILTER | DRW_TEX_COMPARE,
NULL);
sldata->shadow_cube_pool = DRW_texture_create_2d_array_ex(linfo->shadow_cube_size,
linfo->shadow_cube_size,
max_ii(1, linfo->num_cube_layer * 6),
shadow_pool_format,
shadow_usage,
DRW_TEX_FILTER | DRW_TEX_COMPARE,
NULL);
}
if (!sldata->shadow_cascade_pool) {
sldata->shadow_cascade_pool = DRW_texture_create_2d_array(linfo->shadow_cascade_size,
linfo->shadow_cascade_size,
max_ii(1, linfo->num_cascade_layer),
shadow_pool_format,
DRW_TEX_FILTER | DRW_TEX_COMPARE,
NULL);
sldata->shadow_cascade_pool = DRW_texture_create_2d_array_ex(
linfo->shadow_cascade_size,
linfo->shadow_cascade_size,
max_ii(1, linfo->num_cascade_layer),
shadow_pool_format,
shadow_usage,
DRW_TEX_FILTER | DRW_TEX_COMPARE,
NULL);
}
if (sldata->shadow_fb == NULL) {

View File

@ -38,16 +38,18 @@ void EEVEE_subsurface_draw_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
* as the depth buffer we are sampling from. This could be avoided if the stencil is
* a separate texture but that needs OpenGL 4.4 or ARB_texture_stencil8.
* OR OpenGL 4.3 / ARB_ES3_compatibility if using a render-buffer instead. */
effects->sss_stencil = DRW_texture_pool_query_2d(
fs_size[0], fs_size[1], GPU_DEPTH24_STENCIL8, &draw_engine_eevee_type);
effects->sss_blur = DRW_texture_pool_query_2d(
fs_size[0], fs_size[1], GPU_R11F_G11F_B10F, &draw_engine_eevee_type);
effects->sss_irradiance = DRW_texture_pool_query_2d(
fs_size[0], fs_size[1], GPU_R11F_G11F_B10F, &draw_engine_eevee_type);
effects->sss_radius = DRW_texture_pool_query_2d(
fs_size[0], fs_size[1], GPU_R16F, &draw_engine_eevee_type);
effects->sss_albedo = DRW_texture_pool_query_2d(
fs_size[0], fs_size[1], GPU_R11F_G11F_B10F, &draw_engine_eevee_type);
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
effects->sss_stencil = DRW_texture_pool_query_2d_ex(
fs_size[0], fs_size[1], GPU_DEPTH24_STENCIL8, usage, &draw_engine_eevee_type);
effects->sss_blur = DRW_texture_pool_query_2d_ex(
fs_size[0], fs_size[1], GPU_R11F_G11F_B10F, usage, &draw_engine_eevee_type);
effects->sss_irradiance = DRW_texture_pool_query_2d_ex(
fs_size[0], fs_size[1], GPU_R11F_G11F_B10F, usage, &draw_engine_eevee_type);
effects->sss_radius = DRW_texture_pool_query_2d_ex(
fs_size[0], fs_size[1], GPU_R16F, usage, &draw_engine_eevee_type);
effects->sss_albedo = DRW_texture_pool_query_2d_ex(
fs_size[0], fs_size[1], GPU_R11F_G11F_B10F, usage, &draw_engine_eevee_type);
GPUTexture *stencil_tex = effects->sss_stencil;

View File

@ -192,8 +192,11 @@ void EEVEE_volumes_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
if (!e_data.dummy_scatter) {
const float scatter[4] = {0.0f, 0.0f, 0.0f, 0.0f};
const float transmit[4] = {1.0f, 1.0f, 1.0f, 1.0f};
e_data.dummy_scatter = DRW_texture_create_3d(1, 1, 1, GPU_RGBA8, DRW_TEX_WRAP, scatter);
e_data.dummy_transmit = DRW_texture_create_3d(1, 1, 1, GPU_RGBA8, DRW_TEX_WRAP, transmit);
eGPUTextureUsage dummy_usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ;
e_data.dummy_scatter = DRW_texture_create_3d_ex(
1, 1, 1, GPU_RGBA8, dummy_usage, DRW_TEX_WRAP, scatter);
e_data.dummy_transmit = DRW_texture_create_3d_ex(
1, 1, 1, GPU_RGBA8, dummy_usage, DRW_TEX_WRAP, transmit);
}
}
@ -424,31 +427,54 @@ void EEVEE_volumes_draw_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
if (txl->volume_prop_scattering == NULL) {
/* Volume properties: We evaluate all volumetric objects
* and store their final properties into each froxel */
txl->volume_prop_scattering = DRW_texture_create_3d(
tex_size[0], tex_size[1], tex_size[2], GPU_R11F_G11F_B10F, DRW_TEX_FILTER, NULL);
txl->volume_prop_extinction = DRW_texture_create_3d(
tex_size[0], tex_size[1], tex_size[2], GPU_R11F_G11F_B10F, DRW_TEX_FILTER, NULL);
txl->volume_prop_emission = DRW_texture_create_3d(
tex_size[0], tex_size[1], tex_size[2], GPU_R11F_G11F_B10F, DRW_TEX_FILTER, NULL);
txl->volume_prop_phase = DRW_texture_create_3d(
tex_size[0], tex_size[1], tex_size[2], GPU_RG16F, DRW_TEX_FILTER, NULL);
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ;
txl->volume_prop_scattering = DRW_texture_create_3d_ex(
tex_size[0], tex_size[1], tex_size[2], GPU_R11F_G11F_B10F, usage, DRW_TEX_FILTER, NULL);
txl->volume_prop_extinction = DRW_texture_create_3d_ex(
tex_size[0], tex_size[1], tex_size[2], GPU_R11F_G11F_B10F, usage, DRW_TEX_FILTER, NULL);
txl->volume_prop_emission = DRW_texture_create_3d_ex(
tex_size[0], tex_size[1], tex_size[2], GPU_R11F_G11F_B10F, usage, DRW_TEX_FILTER, NULL);
txl->volume_prop_phase = DRW_texture_create_3d_ex(
tex_size[0], tex_size[1], tex_size[2], GPU_RG16F, usage, DRW_TEX_FILTER, NULL);
/* Volume scattering: We compute for each froxel the
* Scattered light towards the view. We also resolve temporal
* super sampling during this stage. */
txl->volume_scatter = DRW_texture_create_3d(
tex_size[0], tex_size[1], tex_size[2], GPU_R11F_G11F_B10F, DRW_TEX_FILTER, NULL);
txl->volume_transmit = DRW_texture_create_3d(
tex_size[0], tex_size[1], tex_size[2], GPU_R11F_G11F_B10F, DRW_TEX_FILTER, NULL);
eGPUTextureUsage usage_write = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ |
GPU_TEXTURE_USAGE_SHADER_WRITE;
txl->volume_scatter = DRW_texture_create_3d_ex(tex_size[0],
tex_size[1],
tex_size[2],
GPU_R11F_G11F_B10F,
usage_write,
DRW_TEX_FILTER,
NULL);
txl->volume_transmit = DRW_texture_create_3d_ex(tex_size[0],
tex_size[1],
tex_size[2],
GPU_R11F_G11F_B10F,
usage_write,
DRW_TEX_FILTER,
NULL);
/* Final integration: We compute for each froxel the
* amount of scattered light and extinction coef at this
* given depth. We use these textures as double buffer
* for the volumetric history. */
txl->volume_scatter_history = DRW_texture_create_3d(
tex_size[0], tex_size[1], tex_size[2], GPU_R11F_G11F_B10F, DRW_TEX_FILTER, NULL);
txl->volume_transmit_history = DRW_texture_create_3d(
tex_size[0], tex_size[1], tex_size[2], GPU_R11F_G11F_B10F, DRW_TEX_FILTER, NULL);
txl->volume_scatter_history = DRW_texture_create_3d_ex(tex_size[0],
tex_size[1],
tex_size[2],
GPU_R11F_G11F_B10F,
usage_write,
DRW_TEX_FILTER,
NULL);
txl->volume_transmit_history = DRW_texture_create_3d_ex(tex_size[0],
tex_size[1],
tex_size[2],
GPU_R11F_G11F_B10F,
usage_write,
DRW_TEX_FILTER,
NULL);
}
GPU_framebuffer_ensure_config(&fbl->volumetric_fb,

View File

@ -103,6 +103,20 @@ void resolve_reflection_sample(int planar_index,
weight_accum += weight;
}
/* NOTE(Metal): For Apple silicon GPUs executing this particular shader, by default, memory read
* pressure is high while ALU remains low. Packing the sample data into a smaller format balances
* this trade-off by reducing local shader register pressure and expensive memory look-ups into
* spilled local shader memory, resulting in an increase in performance of 20% for this shader. */
#ifdef GPU_METAL
# define SAMPLE_STORAGE_TYPE uchar
# define pack_sample(x, y) uchar(((uchar(x + 2)) << uchar(3)) + (uchar(y + 2)))
# define unpack_sample(x) vec2((char(x) >> 3) - 2, (char(x) & 7) - 2)
#else
# define SAMPLE_STORAGE_TYPE vec2
# define pack_sample(x, y) SAMPLE_STORAGE_TYPE(x, y)
# define unpack_sample(x) x
#endif
void raytrace_resolve(ClosureInputGlossy cl_in,
inout ClosureEvalGlossy cl_eval,
inout ClosureEvalCommon cl_common,
@ -110,55 +124,55 @@ void raytrace_resolve(ClosureInputGlossy cl_in,
{
/* Note: Reflection samples declared in function scope to avoid per-thread memory pressure on
* tile-based GPUs e.g. Apple Silicon. */
const vec2 resolve_sample_offsets[36] = vec2[36](
const SAMPLE_STORAGE_TYPE resolve_sample_offsets[36] = SAMPLE_STORAGE_TYPE[36](
/* Set 1. */
/* First Ring (2x2). */
vec2(0, 0),
pack_sample(0, 0),
/* Second Ring (6x6). */
vec2(-1, 3),
vec2(1, 3),
vec2(-1, 1),
vec2(3, 1),
vec2(-2, 0),
vec2(3, 0),
vec2(2, -1),
vec2(1, -2),
pack_sample(-1, 3),
pack_sample(1, 3),
pack_sample(-1, 1),
pack_sample(3, 1),
pack_sample(-2, 0),
pack_sample(3, 0),
pack_sample(2, -1),
pack_sample(1, -2),
/* Set 2. */
/* First Ring (2x2). */
vec2(1, 1),
pack_sample(1, 1),
/* Second Ring (6x6). */
vec2(-2, 3),
vec2(3, 3),
vec2(0, 2),
vec2(2, 2),
vec2(-2, -1),
vec2(1, -1),
vec2(0, -2),
vec2(3, -2),
pack_sample(-2, 3),
pack_sample(3, 3),
pack_sample(0, 2),
pack_sample(2, 2),
pack_sample(-2, -1),
pack_sample(1, -1),
pack_sample(0, -2),
pack_sample(3, -2),
/* Set 3. */
/* First Ring (2x2). */
vec2(0, 1),
pack_sample(0, 1),
/* Second Ring (6x6). */
vec2(0, 3),
vec2(3, 2),
vec2(-2, 1),
vec2(2, 1),
vec2(-1, 0),
vec2(-2, -2),
vec2(0, -1),
vec2(2, -2),
pack_sample(0, 3),
pack_sample(3, 2),
pack_sample(-2, 1),
pack_sample(2, 1),
pack_sample(-1, 0),
pack_sample(-2, -2),
pack_sample(0, -1),
pack_sample(2, -2),
/* Set 4. */
/* First Ring (2x2). */
vec2(1, 0),
pack_sample(1, 0),
/* Second Ring (6x6). */
vec2(2, 3),
vec2(-2, 2),
vec2(-1, 2),
vec2(1, 2),
vec2(2, 0),
vec2(-1, -1),
vec2(3, -1),
vec2(-1, -2));
pack_sample(2, 3),
pack_sample(-2, 2),
pack_sample(-1, 2),
pack_sample(1, 2),
pack_sample(2, 0),
pack_sample(-1, -1),
pack_sample(3, -1),
pack_sample(-1, -2));
float roughness = cl_in.roughness;
@ -208,7 +222,8 @@ void raytrace_resolve(ClosureInputGlossy cl_in,
int sample_id = sample_pool * resolve_samples_count + i;
vec2 texture_size = vec2(textureSize(hitBuffer, 0));
vec2 sample_texel = texture_size * uvcoordsvar.xy * ssrUvScale;
vec2 sample_uv = (sample_texel + resolve_sample_offsets[sample_id]) / texture_size;
vec2 sample_uv = (sample_texel + unpack_sample(resolve_sample_offsets[sample_id])) /
texture_size;
resolve_reflection_sample(
planar_index, sample_uv, vP, vN, vV, roughness_squared, cone_tan, weight_acc, ssr_accum);

View File

@ -57,7 +57,9 @@ void workbench_engine_init(void *ved)
wpd->dummy_image_tx = txl->dummy_image_tx;
if (OBJECT_ID_PASS_ENABLED(wpd)) {
wpd->object_id_tx = DRW_texture_pool_query_fullscreen(GPU_R16UI, &draw_engine_workbench);
const eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ;
wpd->object_id_tx = DRW_texture_pool_query_fullscreen_ex(
GPU_R16UI, usage, &draw_engine_workbench);
}
else {
/* Don't free because it's a pool texture. */

View File

@ -29,9 +29,10 @@ void workbench_opaque_engine_init(WORKBENCH_Data *data)
/* Reused the same textures format for transparent pipeline to share the textures. */
const eGPUTextureFormat col_tex_format = GPU_RGBA16F;
const eGPUTextureFormat nor_tex_format = NORMAL_ENCODING_ENABLED() ? GPU_RG16F : GPU_RGBA16F;
const eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ;
wpd->material_buffer_tx = DRW_texture_pool_query_fullscreen(col_tex_format, owner);
wpd->normal_buffer_tx = DRW_texture_pool_query_fullscreen(nor_tex_format, owner);
wpd->material_buffer_tx = DRW_texture_pool_query_fullscreen_ex(col_tex_format, usage, owner);
wpd->normal_buffer_tx = DRW_texture_pool_query_fullscreen_ex(nor_tex_format, usage, owner);
GPU_framebuffer_ensure_config(&fbl->opaque_fb,
{

View File

@ -36,8 +36,9 @@ void workbench_transparent_engine_init(WORKBENCH_Data *data)
const eGPUTextureFormat accum_tex_format = GPU_RGBA16F;
const eGPUTextureFormat reveal_tex_format = NORMAL_ENCODING_ENABLED() ? GPU_RG16F : GPU_RGBA32F;
wpd->accum_buffer_tx = DRW_texture_pool_query_fullscreen(accum_tex_format, owner);
wpd->reveal_buffer_tx = DRW_texture_pool_query_fullscreen(reveal_tex_format, owner);
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ;
wpd->accum_buffer_tx = DRW_texture_pool_query_fullscreen_ex(accum_tex_format, usage, owner);
wpd->reveal_buffer_tx = DRW_texture_pool_query_fullscreen_ex(reveal_tex_format, usage, owner);
GPU_framebuffer_ensure_config(&fbl->transp_accum_fb,
{

View File

@ -81,7 +81,7 @@ void immDrawPixelsTexScaledFullSize(const IMMDrawPixelsTexState *state,
const int mip_len = use_mipmap ? 9999 : 1;
GPUTexture *tex = GPU_texture_create_2d(
"immDrawPixels", img_w, img_h, mip_len, gpu_format, GPU_TEXTURE_USAGE_GENERAL, NULL);
"immDrawPixels", img_w, img_h, mip_len, gpu_format, GPU_TEXTURE_USAGE_SHADER_READ, NULL);
const bool use_float_data = ELEM(gpu_format, GPU_RGBA16F, GPU_RGB16F, GPU_R16F);
eGPUDataFormat gpu_data_format = (use_float_data) ? GPU_DATA_FLOAT : GPU_DATA_UBYTE;
@ -183,7 +183,7 @@ void immDrawPixelsTexTiled_scaling_clipping(IMMDrawPixelsTexState *state,
size_t stride = components * ((use_float_data) ? sizeof(float) : sizeof(uchar));
GPUTexture *tex = GPU_texture_create_2d(
"immDrawPixels", tex_w, tex_h, 1, gpu_format, GPU_TEXTURE_USAGE_GENERAL, NULL);
"immDrawPixels", tex_w, tex_h, 1, gpu_format, GPU_TEXTURE_USAGE_SHADER_READ, NULL);
GPU_texture_filter_mode(tex, use_filter);
GPU_texture_wrap_mode(tex, false, true);

View File

@ -239,7 +239,7 @@ static void gpu_material_sky_texture_build(GPUMaterial *mat)
mat->sky_builder->current_layer,
1,
GPU_RGBA32F,
GPU_TEXTURE_USAGE_GENERAL,
GPU_TEXTURE_USAGE_SHADER_READ,
(float *)mat->sky_builder->pixels);
MEM_freeN(mat->sky_builder);

View File

@ -1041,14 +1041,17 @@ void gpu::MTLTexture::update_sub(
if (texture_.storageMode == MTLStorageModeManaged) {
[blit_encoder synchronizeResource:texture_];
}
[blit_encoder optimizeContentsForGPUAccess:texture_];
}
else {
/* Textures which use MTLStorageModeManaged need to have updated contents
* synced back to CPU to avoid an automatic flush overwriting contents. */
blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
if (texture_.storageMode == MTLStorageModeManaged) {
blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
[blit_encoder synchronizeResource:texture_];
}
[blit_encoder optimizeContentsForGPUAccess:texture_];
}
/* Decrement texture reference counts. This ensures temporary texture views are released. */
@ -1110,6 +1113,7 @@ void MTLTexture::update_sub(int offset[3],
if (texture_.storageMode == MTLStorageModeManaged) {
[blit_encoder synchronizeResource:texture_];
}
[blit_encoder optimizeContentsForGPUAccess:texture_];
}
else {
BLI_assert(false);
@ -1230,6 +1234,7 @@ void gpu::MTLTexture::copy_to(Texture *dst)
BLI_assert(mt_dst->d_ == d_);
[blit_encoder copyFromTexture:this->get_metal_handle_base()
toTexture:mt_dst->get_metal_handle_base()];
[blit_encoder optimizeContentsForGPUAccess:mt_dst->get_metal_handle_base()];
} break;
default: {
int slice = 0;