Metal: Optimize SSR shader for Apple Silicon #106231

Merged
Jeroen Bakker merged 3 commits from Jason-Fielder/blender:EEVEE_SSR_OptiMetal into main 2023-04-03 08:37:35 +02:00
4 changed files with 47 additions and 42 deletions
Showing only changes of commit 6321311a0d - Show all commits

View File

@ -57,7 +57,9 @@ void workbench_engine_init(void *ved)
wpd->dummy_image_tx = txl->dummy_image_tx;
if (OBJECT_ID_PASS_ENABLED(wpd)) {
wpd->object_id_tx = DRW_texture_pool_query_fullscreen(GPU_R16UI, &draw_engine_workbench);
const eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ;
wpd->object_id_tx = DRW_texture_pool_query_fullscreen_ex(
GPU_R16UI, usage, &draw_engine_workbench);
}
else {
/* Don't free because it's a pool texture. */

View File

@ -29,9 +29,10 @@ void workbench_opaque_engine_init(WORKBENCH_Data *data)
/* Reused the same textures format for transparent pipeline to share the textures. */
const eGPUTextureFormat col_tex_format = GPU_RGBA16F;
const eGPUTextureFormat nor_tex_format = NORMAL_ENCODING_ENABLED() ? GPU_RG16F : GPU_RGBA16F;
const eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ;
wpd->material_buffer_tx = DRW_texture_pool_query_fullscreen(col_tex_format, owner);
wpd->normal_buffer_tx = DRW_texture_pool_query_fullscreen(nor_tex_format, owner);
wpd->material_buffer_tx = DRW_texture_pool_query_fullscreen_ex(col_tex_format, usage, owner);
wpd->normal_buffer_tx = DRW_texture_pool_query_fullscreen_ex(nor_tex_format, usage, owner);
GPU_framebuffer_ensure_config(&fbl->opaque_fb,
{

View File

@ -36,8 +36,9 @@ void workbench_transparent_engine_init(WORKBENCH_Data *data)
const eGPUTextureFormat accum_tex_format = GPU_RGBA16F;
const eGPUTextureFormat reveal_tex_format = NORMAL_ENCODING_ENABLED() ? GPU_RG16F : GPU_RGBA32F;
wpd->accum_buffer_tx = DRW_texture_pool_query_fullscreen(accum_tex_format, owner);
wpd->reveal_buffer_tx = DRW_texture_pool_query_fullscreen(reveal_tex_format, owner);
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_SHADER_READ;
wpd->accum_buffer_tx = DRW_texture_pool_query_fullscreen_ex(accum_tex_format, usage, owner);
wpd->reveal_buffer_tx = DRW_texture_pool_query_fullscreen_ex(reveal_tex_format, usage, owner);
GPU_framebuffer_ensure_config(&fbl->transp_accum_fb,
{

View File

@ -225,8 +225,8 @@ static void pack_islands_alpaca_turbo(const Span<UVAABBIsland *> islands,
/* Visit every island in order. */
for (UVAABBIsland *island : islands) {
float dsm_u = island->uv_diagonal.x;
float dsm_v = island->uv_diagonal.y;
const float dsm_u = island->uv_diagonal.x;
const float dsm_v = island->uv_diagonal.y;
bool restart = false;
if (zigzag) {
@ -720,6 +720,15 @@ static void pack_islands_alpaca_rotate(const Span<UVAABBIsland *> islands,
*r_max_v = next_v1;
}
/**
* Pack islands using a mix of other strategies.
* \param islands: The islands to be packed. Will be modified with results.
* \param box_array: Transition storage, will soon be removed.
* \param scale: Scale islands by `scale` before packing.
* \param margin: Add `margin` units around islands before packing.
* \param params: Additional parameters. Scale and margin information is ignored.
* \return Size of square covering the resulting packed UVs. The maximum `u` or `v` co-ordinate.
*/
static float pack_islands_scale_margin(const Span<PackIsland *> islands,
BoxPack *box_array,
const float scale,
@ -830,6 +839,15 @@ static float pack_islands_scale_margin(const Span<PackIsland *> islands,
break;
}
/* Write back box_pack UVs. */
for (int64_t i = 0; i < max_box_pack; i++) {
PackIsland *pack_island = islands[aabbs[i]->index];
BoxPack *box = box_array + i;
pack_island->angle = 0.0f;
pack_island->pre_translate.x = (box->x + margin) / scale - pack_island->bounds_rect.xmin;
pack_island->pre_translate.y = (box->y + margin) / scale - pack_island->bounds_rect.ymin;
}
/* At this stage, `max_u` and `max_v` contain the box_pack UVs. */
/* Call Alpaca. */
@ -843,12 +861,22 @@ static float pack_islands_scale_margin(const Span<PackIsland *> islands,
/* Write back Alpaca UVs. */
for (int64_t i = max_box_pack; i < aabbs.size(); i++) {
UVAABBIsland *aabb = aabbs[i];
BoxPack *box = &box_array[i];
box->x = aabb->uv_placement.x;
box->y = aabb->uv_placement.y;
PackIsland *pack_island = islands[aabb->index];
pack_island->angle = aabb->angle;
if (aabb->angle) {
pack_island->pre_translate.x = (aabb->uv_placement.y + margin) / scale /
-pack_island->aspect_y -
pack_island->bounds_rect.xmax;
pack_island->pre_translate.y = (aabb->uv_placement.x + margin) / scale *
pack_island->aspect_y -
pack_island->bounds_rect.ymin;
}
else {
pack_island->pre_translate.x = (aabb->uv_placement.x + margin) / scale -
pack_island->bounds_rect.xmin;
pack_island->pre_translate.y = (aabb->uv_placement.y + margin) / scale -
pack_island->bounds_rect.ymin;
}
}
/* Memory management. */
@ -861,6 +889,9 @@ static float pack_islands_scale_margin(const Span<PackIsland *> islands,
return std::max(max_u, max_v);
}
/** Find the optimal scale to pack islands into the unit square.
* returns largest scale that will pack `islands` into the unit square.
*/
static float pack_islands_margin_fraction(const Span<PackIsland *> &island_vector,
BoxPack *box_array,
const float margin_fraction,
@ -952,7 +983,7 @@ static float pack_islands_margin_fraction(const Span<PackIsland *> &island_vecto
const bool flush = true;
if (flush) {
/* Write back best pack as a side-effect. First get best pack. */
/* Write back best pack as a side-effect. */
if (scale_last != scale_low) {
scale_last = scale_low;
const float max_uv = pack_islands_scale_margin(
@ -961,16 +992,6 @@ static float pack_islands_margin_fraction(const Span<PackIsland *> &island_vecto
UNUSED_VARS(max_uv);
/* TODO (?): `if (max_uv < 1.0f) { scale_last /= max_uv; }` */
}
/* Then expand PackIslands by the correct amount. */
for (const int64_t index : island_vector.index_range()) {
BoxPack *box = &box_array[index];
box->x /= scale_last;
box->y /= scale_last;
PackIsland *island = island_vector[index];
BLI_rctf_pad(
&island->bounds_rect, margin_fraction / scale_last, margin_fraction / scale_last);
}
}
return scale_last;
}
@ -1016,7 +1037,6 @@ static BoxPack *pack_islands_box_array(const Span<PackIsland *> &islands,
const float scale = pack_islands_margin_fraction(islands, box_array, params.margin, params);
r_scale[0] = scale;
r_scale[1] = scale;
/* pack_islands_margin_fraction will pad PackIslands, return early. */
return box_array;
}
@ -1038,10 +1058,6 @@ static BoxPack *pack_islands_box_array(const Span<PackIsland *> &islands,
r_scale[0] = 1.0f / max_uv;
r_scale[1] = r_scale[0];
for (int index = 0; index < islands.size(); index++) {
PackIsland *island = islands[index];
BLI_rctf_pad(&island->bounds_rect, margin, margin);
}
return box_array;
}
@ -1050,21 +1066,6 @@ void pack_islands(const Span<PackIsland *> &islands,
float r_scale[2])
{
BoxPack *box_array = pack_islands_box_array(islands, params, r_scale);
for (int64_t i : islands.index_range()) {
BoxPack *box = box_array + i;
PackIsland *island = islands[box->index];
if (island->angle) {
/* TODO: Apply proper rotation. */
island->pre_translate.x = (-box->y / island->aspect_y) - island->bounds_rect.xmax;
island->pre_translate.y = (box->x * island->aspect_y) - island->bounds_rect.ymin;
}
else {
island->pre_translate.x = box->x - island->bounds_rect.xmin;
island->pre_translate.y = box->y - island->bounds_rect.ymin;
}
}
MEM_freeN(box_array);
}