EEVEE-Next: Resolve compilation errors in Metal #106219

Merged
Jeroen Bakker merged 8 commits from Jason-Fielder/blender:MetalEEVEENext_shaderCompilation into main 2023-04-20 08:03:42 +02:00
18 changed files with 191 additions and 85 deletions

View File

@ -590,7 +590,7 @@ void dof_gather_accumulator(sampler2D color_tx,
* The full pixel neighborhood is gathered. * The full pixel neighborhood is gathered.
* \{ */ * \{ */
void dof_slight_focus_gather(sampler2D depth_tx, void dof_slight_focus_gather(depth2D depth_tx,
sampler2D color_tx, sampler2D color_tx,
sampler2D bkh_lut_tx, /* Renamed because of ugly macro job. */ sampler2D bkh_lut_tx, /* Renamed because of ugly macro job. */
float radius, float radius,

View File

@ -62,7 +62,7 @@ void main()
int mask_shift = 1; int mask_shift = 1;
#define downsample_level(out_mip__, lod_) \ #define downsample_level(out_mip__, lod_) \
active_thread = all(lessThan(local_px, gl_WorkGroupSize.xy >> uint(mask_shift))); \ active_thread = all(lessThan(uvec2(local_px), gl_WorkGroupSize.xy >> uint(mask_shift))); \
barrier(); /* Wait for previous writes to finish. */ \ barrier(); /* Wait for previous writes to finish. */ \
if (active_thread) { \ if (active_thread) { \
max_depth = max_v4(load_local_depths(local_px)); \ max_depth = max_v4(load_local_depths(local_px)); \
@ -89,12 +89,12 @@ void main()
} }
finished_tile_counter = 0u; finished_tile_counter = 0u;
ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize * 2u)); ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize.xy * 2u));
ivec2 image_border = imageSize(out_mip_5) - 1; ivec2 image_border = imageSize(out_mip_5) - 1;
for (int y = 0; y < iter.y; y++) { for (int y = 0; y < iter.y; y++) {
for (int x = 0; x < iter.x; x++) { for (int x = 0; x < iter.x; x++) {
/* Load result of the other work groups. */ /* Load result of the other work groups. */
kernel_origin = ivec2(gl_WorkGroupSize) * ivec2(x, y); kernel_origin = ivec2(gl_WorkGroupSize.xy) * ivec2(x, y);
src_px = ivec2(kernel_origin + local_px) * 2; src_px = ivec2(kernel_origin + local_px) * 2;
vec4 samp; vec4 samp;
samp.x = imageLoad(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x; samp.x = imageLoad(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x;

View File

@ -168,13 +168,15 @@ void main()
} }
/* Fallthrough to the hemispheric case. */ /* Fallthrough to the hemispheric case. */
case LIGHT_RECT: case LIGHT_RECT:
case LIGHT_ELLIPSE: case LIGHT_ELLIPSE: {
vec3 v000 = vP - v_right * radius - v_up * radius; vec3 v000 = vP - v_right * radius - v_up * radius;
vec3 v100 = v000 + v_right * (radius * 2.0); vec3 v100 = v000 + v_right * (radius * 2.0);
vec3 v010 = v000 + v_up * (radius * 2.0); vec3 v010 = v000 + v_up * (radius * 2.0);
vec3 v001 = v000 - v_back * radius; vec3 v001 = v000 - v_back * radius;
Box bbox = shape_box(v000, v100, v010, v001); Box bbox = shape_box(v000, v100, v010, v001);
intersect_tile = intersect_tile && intersect(tile, bbox); intersect_tile = intersect_tile && intersect(tile, bbox);
break;
}
default: default:
break; break;
} }

View File

@ -74,8 +74,10 @@ void main()
vec4 max_motion = imageLoad(in_tiles_img, src_tile); vec4 max_motion = imageLoad(in_tiles_img, src_tile);
MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy, src_tile); MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy,
MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile); uvec2(src_tile));
MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw,
uvec2(src_tile));
if (true) { if (true) {
/* Rectangular area (in tiles) where the motion vector spreads. */ /* Rectangular area (in tiles) where the motion vector spreads. */
MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.xy); MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.xy);
@ -85,17 +87,20 @@ void main()
for (int y = 0; y < motion_rect.extent.y; y++) { for (int y = 0; y < motion_rect.extent.y; y++) {
ivec2 tile = motion_rect.bottom_left + ivec2(x, y); ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
if (is_inside_motion_line(tile, motion_line)) { if (is_inside_motion_line(tile, motion_line)) {
motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv); motion_blur_tile_indirection_store(
tile_indirection_buf, MOTION_PREV, uvec2(tile), payload_prv);
/* FIXME: This is a bit weird, but for some reason, we need the store the same vector in /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
* the motion next so that weighting in gather pass is better. */ * the motion next so that weighting in gather pass is better. */
motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt); motion_blur_tile_indirection_store(
tile_indirection_buf, MOTION_NEXT, uvec2(tile), payload_nxt);
} }
} }
} }
} }
if (true) { if (true) {
MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile); MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw,
uvec2(src_tile));
/* Rectangular area (in tiles) where the motion vector spreads. */ /* Rectangular area (in tiles) where the motion vector spreads. */
MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.zw); MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.zw);
MotionLine motion_line = compute_motion_line(src_tile, max_motion.zw); MotionLine motion_line = compute_motion_line(src_tile, max_motion.zw);
@ -104,10 +109,12 @@ void main()
for (int y = 0; y < motion_rect.extent.y; y++) { for (int y = 0; y < motion_rect.extent.y; y++) {
ivec2 tile = motion_rect.bottom_left + ivec2(x, y); ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
if (is_inside_motion_line(tile, motion_line)) { if (is_inside_motion_line(tile, motion_line)) {
motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt); motion_blur_tile_indirection_store(
tile_indirection_buf, MOTION_NEXT, uvec2(tile), payload_nxt);
/* FIXME: This is a bit weird, but for some reason, we need the store the same vector in /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
* the motion next so that weighting in gather pass is better. */ * the motion next so that weighting in gather pass is better. */
motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv); motion_blur_tile_indirection_store(
tile_indirection_buf, MOTION_PREV, uvec2(tile), payload_prv);
} }
} }
} }

View File

@ -178,10 +178,10 @@ void main()
vec4 max_motion; vec4 max_motion;
/* Load dilation result from the indirection table. */ /* Load dilation result from the indirection table. */
ivec2 tile_prev; ivec2 tile_prev;
motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, tile, tile_prev); motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, uvec2(tile), tile_prev);
max_motion.xy = imageLoad(in_tiles_img, tile_prev).xy; max_motion.xy = imageLoad(in_tiles_img, tile_prev).xy;
ivec2 tile_next; ivec2 tile_next;
motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, tile, tile_next); motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, uvec2(tile), tile_next);
max_motion.zw = imageLoad(in_tiles_img, tile_next).zw; max_motion.zw = imageLoad(in_tiles_img, tile_next).zw;
Accumulator accum; Accumulator accum;

View File

@ -242,13 +242,13 @@ void output_aov(vec4 color, float value, uint hash)
#if defined(MAT_AOV_SUPPORT) && defined(GPU_FRAGMENT_SHADER) #if defined(MAT_AOV_SUPPORT) && defined(GPU_FRAGMENT_SHADER)
for (int i = 0; i < AOV_MAX && i < aov_buf.color_len; i++) { for (int i = 0; i < AOV_MAX && i < aov_buf.color_len; i++) {
if (aov_buf.hash_color[i] == hash) { if (aov_buf.hash_color[i] == hash) {
imageStore(aov_color_img, ivec3(gl_FragCoord.xy, i), color); imageStore(aov_color_img, ivec3(ivec2(gl_FragCoord.xy), i), color);
return; return;
} }
} }
for (int i = 0; i < AOV_MAX && i < aov_buf.value_len; i++) { for (int i = 0; i < AOV_MAX && i < aov_buf.value_len; i++) {
if (aov_buf.hash_value[i] == hash) { if (aov_buf.hash_value[i] == hash) {
imageStore(aov_value_img, ivec3(gl_FragCoord.xy, i), vec4(value)); imageStore(aov_value_img, ivec3(ivec2(gl_FragCoord.xy), i), vec4(value));
return; return;
} }
} }

View File

@ -65,7 +65,7 @@ void main()
} }
AABB aabb_tag; AABB aabb_tag;
AABB aabb_map = AABB(vec3(-0.99999), vec3(0.99999)); AABB aabb_map = shape_aabb(vec3(-0.99999), vec3(0.99999));
/* Directionnal winmat have no correct near/far in the Z dimension at this point. /* Directionnal winmat have no correct near/far in the Z dimension at this point.
* Do not clip in this dimension. */ * Do not clip in this dimension. */
@ -87,7 +87,7 @@ void main()
for (int y = box_min.y; y <= box_max.y; y++) { for (int y = box_min.y; y <= box_max.y; y++) {
for (int x = box_min.x; x <= box_max.x; x++) { for (int x = box_min.x; x <= box_max.x; x++) {
int tile_index = shadow_tile_offset(ivec2(x, y), tilemap.tiles_index, lod); int tile_index = shadow_tile_offset(ivec2(x, y), tilemap.tiles_index, lod);
atomicOr(tiles_buf[tile_index], SHADOW_DO_UPDATE); atomicOr(tiles_buf[tile_index], uint(SHADOW_DO_UPDATE));
} }
} }
} }

View File

@ -21,7 +21,7 @@ void shadow_tag_usage_tile(LightData light, ivec2 tile_co, int lod, int tilemap_
tile_co >>= lod; tile_co >>= lod;
int tile_index = shadow_tile_offset(tile_co, tilemaps_buf[tilemap_index].tiles_index, lod); int tile_index = shadow_tile_offset(tile_co, tilemaps_buf[tilemap_index].tiles_index, lod);
atomicOr(tiles_buf[tile_index], SHADOW_IS_USED); atomicOr(tiles_buf[tile_index], uint(SHADOW_IS_USED));
} }
void shadow_tag_usage_tilemap_directional(uint l_idx, vec3 P, vec3 V, float radius) void shadow_tag_usage_tilemap_directional(uint l_idx, vec3 P, vec3 V, float radius)

View File

@ -44,7 +44,7 @@ bool is_visible(IsectBox box)
bool intersects_near_plane(IsectBox box) bool intersects_near_plane(IsectBox box)
{ {
vec4 near_plane = drw_view_culling.planes[4]; vec4 near_plane = drw_view_culling.frustum_planes.planes[4];
bool on_positive_side = false; bool on_positive_side = false;
bool on_negative_side = false; bool on_negative_side = false;

View File

@ -21,6 +21,8 @@ typedef struct DispatchCommand DispatchCommand;
typedef struct DRWDebugPrintBuffer DRWDebugPrintBuffer; typedef struct DRWDebugPrintBuffer DRWDebugPrintBuffer;
typedef struct DRWDebugVert DRWDebugVert; typedef struct DRWDebugVert DRWDebugVert;
typedef struct DRWDebugDrawBuffer DRWDebugDrawBuffer; typedef struct DRWDebugDrawBuffer DRWDebugDrawBuffer;
typedef struct FrustumCorners FrustumCorners;
typedef struct FrustumPlanes FrustumPlanes;
/* __cplusplus is true when compiling with MSL. */ /* __cplusplus is true when compiling with MSL. */
# if defined(__cplusplus) && !defined(GPU_SHADER) # if defined(__cplusplus) && !defined(GPU_SHADER)
@ -94,11 +96,27 @@ uint drw_view_id = 0;
# define DRW_VIEW_FROM_RESOURCE_ID drw_view_id = (drw_ResourceID & DRW_VIEW_MASK) # define DRW_VIEW_FROM_RESOURCE_ID drw_view_id = (drw_ResourceID & DRW_VIEW_MASK)
#endif #endif
struct FrustumCorners {
float4 corners[8];
};
BLI_STATIC_ASSERT_ALIGN(FrustumCorners, 16)
struct FrustumPlanes {
/* [0] left
* [1] right
* [2] bottom
* [3] top
* [4] near
* [5] far */
float4 planes[6];
};
BLI_STATIC_ASSERT_ALIGN(FrustumPlanes, 16)
struct ViewCullingData { struct ViewCullingData {
/** \note vec3 array padded to vec4. */ /** \note vec3 array padded to vec4. */
/** Frustum corners. */ /** Frustum corners. */
float4 corners[8]; FrustumCorners frustum_corners;
float4 planes[6]; FrustumPlanes frustum_planes;
float4 bound_sphere; float4 bound_sphere;
}; };
BLI_STATIC_ASSERT_ALIGN(ViewCullingData, 16) BLI_STATIC_ASSERT_ALIGN(ViewCullingData, 16)

View File

@ -50,7 +50,8 @@ void View::frustum_boundbox_calc(int view_id)
} }
#endif #endif
MutableSpan<float4> corners = {culling_[view_id].corners, ARRAY_SIZE(culling_[view_id].corners)}; MutableSpan<float4> corners = {culling_[view_id].frustum_corners.corners,
ARRAY_SIZE(culling_[view_id].frustum_corners.corners)};
float left, right, bottom, top, near, far; float left, right, bottom, top, near, far;
bool is_persp = data_[view_id].winmat[3][3] == 0.0f; bool is_persp = data_[view_id].winmat[3][3] == 0.0f;
@ -89,15 +90,15 @@ void View::frustum_culling_planes_calc(int view_id)
{ {
float4x4 persmat = data_[view_id].winmat * data_[view_id].viewmat; float4x4 persmat = data_[view_id].winmat * data_[view_id].viewmat;
planes_from_projmat(persmat.ptr(), planes_from_projmat(persmat.ptr(),
culling_[view_id].planes[0], culling_[view_id].frustum_planes.planes[0],
culling_[view_id].planes[5], culling_[view_id].frustum_planes.planes[5],
culling_[view_id].planes[1], culling_[view_id].frustum_planes.planes[1],
culling_[view_id].planes[3], culling_[view_id].frustum_planes.planes[3],
culling_[view_id].planes[4], culling_[view_id].frustum_planes.planes[4],
culling_[view_id].planes[2]); culling_[view_id].frustum_planes.planes[2]);
/* Normalize. */ /* Normalize. */
for (float4 &plane : culling_[view_id].planes) { for (float4 &plane : culling_[view_id].frustum_planes.planes) {
plane.w /= normalize_v3(plane); plane.w /= normalize_v3(plane);
} }
} }
@ -105,7 +106,8 @@ void View::frustum_culling_planes_calc(int view_id)
void View::frustum_culling_sphere_calc(int view_id) void View::frustum_culling_sphere_calc(int view_id)
{ {
BoundSphere &bsphere = *reinterpret_cast<BoundSphere *>(&culling_[view_id].bound_sphere); BoundSphere &bsphere = *reinterpret_cast<BoundSphere *>(&culling_[view_id].bound_sphere);
Span<float4> corners = {culling_[view_id].corners, ARRAY_SIZE(culling_[view_id].corners)}; Span<float4> corners = {culling_[view_id].frustum_corners.corners,
ARRAY_SIZE(culling_[view_id].frustum_corners.corners)};
/* Extract Bounding Sphere */ /* Extract Bounding Sphere */
if (data_[view_id].winmat[3][3] != 0.0f) { if (data_[view_id].winmat[3][3] != 0.0f) {

View File

@ -9,6 +9,14 @@ struct AABB {
vec3 min, max; vec3 min, max;
}; };
AABB shape_aabb(vec3 min, vec3 max)
{
AABB aabb;
aabb.min = min;
aabb.max = max;
return aabb;
}
AABB aabb_init_min_max() AABB aabb_init_min_max()
{ {
AABB aabb; AABB aabb;

View File

@ -136,7 +136,7 @@ bool intersect_view(Pyramid pyramid)
for (int p = 0; p < 6; ++p) { for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false; bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 5; ++v) { for (int v = 0; v < 5; ++v) {
float test = dot(drw_view_culling.planes[p], vec4(pyramid.corners[v], 1.0)); float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(pyramid.corners[v], 1.0));
if (test > 0.0) { if (test > 0.0) {
is_any_vertex_on_positive_side = true; is_any_vertex_on_positive_side = true;
break; break;
@ -158,7 +158,8 @@ bool intersect_view(Pyramid pyramid)
for (int p = 0; p < 5; ++p) { for (int p = 0; p < 5; ++p) {
bool is_any_vertex_on_positive_side = false; bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) { for (int v = 0; v < 8; ++v) {
float test = dot(i_pyramid.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0)); float test = dot(i_pyramid.planes[p],
vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0));
if (test > 0.0) { if (test > 0.0) {
is_any_vertex_on_positive_side = true; is_any_vertex_on_positive_side = true;
break; break;
@ -181,7 +182,7 @@ bool intersect_view(Box box)
for (int p = 0; p < 6; ++p) { for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false; bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) { for (int v = 0; v < 8; ++v) {
float test = dot(drw_view_culling.planes[p], vec4(box.corners[v], 1.0)); float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(box.corners[v], 1.0));
if (test > 0.0) { if (test > 0.0) {
is_any_vertex_on_positive_side = true; is_any_vertex_on_positive_side = true;
break; break;
@ -203,7 +204,8 @@ bool intersect_view(Box box)
for (int p = 0; p < 6; ++p) { for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false; bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) { for (int v = 0; v < 8; ++v) {
float test = dot(i_box.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0)); float test = dot(i_box.planes[p],
vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0));
if (test > 0.0) { if (test > 0.0) {
is_any_vertex_on_positive_side = true; is_any_vertex_on_positive_side = true;
break; break;
@ -227,7 +229,7 @@ bool intersect_view(IsectBox i_box)
for (int p = 0; p < 6; ++p) { for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false; bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) { for (int v = 0; v < 8; ++v) {
float test = dot(drw_view_culling.planes[p], vec4(i_box.corners[v], 1.0)); float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(i_box.corners[v], 1.0));
if (test > 0.0) { if (test > 0.0) {
is_any_vertex_on_positive_side = true; is_any_vertex_on_positive_side = true;
break; break;
@ -247,7 +249,8 @@ bool intersect_view(IsectBox i_box)
for (int p = 0; p < 6; ++p) { for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false; bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) { for (int v = 0; v < 8; ++v) {
float test = dot(i_box.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0)); float test = dot(i_box.planes[p],
vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0));
if (test > 0.0) { if (test > 0.0) {
is_any_vertex_on_positive_side = true; is_any_vertex_on_positive_side = true;
break; break;
@ -268,7 +271,7 @@ bool intersect_view(Sphere sphere)
bool intersects = true; bool intersects = true;
for (int p = 0; p < 6 && intersects; ++p) { for (int p = 0; p < 6 && intersects; ++p) {
float dist_to_plane = dot(drw_view_culling.planes[p], vec4(sphere.center, 1.0)); float dist_to_plane = dot(drw_view_culling.frustum_planes.planes[p], vec4(sphere.center, 1.0));
if (dist_to_plane < -sphere.radius) { if (dist_to_plane < -sphere.radius) {
intersects = false; intersects = false;
} }

View File

@ -18,7 +18,10 @@ struct Circle {
Circle shape_circle(vec2 center, float radius) Circle shape_circle(vec2 center, float radius)
{ {
return Circle(center, radius); Circle circle;
circle.center = center;
circle.radius = radius;
return circle;
} }
/** \} */ /** \} */
@ -34,7 +37,10 @@ struct Sphere {
Sphere shape_sphere(vec3 center, float radius) Sphere shape_sphere(vec3 center, float radius)
{ {
return Sphere(center, radius); Sphere sphere;
sphere.center = center;
sphere.radius = radius;
return sphere;
} }
/** \} */ /** \} */
@ -192,6 +198,14 @@ Frustum shape_frustum(vec3 corners[8])
struct Cone { struct Cone {
vec3 direction; vec3 direction;
float angle_cos; float angle_cos;
#ifdef GPU_METAL
inline Cone() = default;
inline Cone(vec3 in_direction, float in_angle_cos)
: direction(in_direction), angle_cos(in_angle_cos)
{
}
#endif
}; };
Cone shape_cone(vec3 direction, float angle_cosine) Cone shape_cone(vec3 direction, float angle_cosine)

View File

@ -33,18 +33,19 @@ void projmat_dimensions(mat4 winmat,
} }
} }
void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out vec4 corners[8]) void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out FrustumCorners frustum_corners)
{ {
float left, right, bottom, top, near, far; float left, right, bottom, top, near, far;
bool is_persp = winmat[3][3] == 0.0; bool is_persp = winmat[3][3] == 0.0;
projmat_dimensions(winmat, left, right, bottom, top, near, far); projmat_dimensions(winmat, left, right, bottom, top, near, far);
corners[0][2] = corners[3][2] = corners[7][2] = corners[4][2] = -near; frustum_corners.corners[0][2] = frustum_corners.corners[3][2] = frustum_corners.corners[7][2] =
corners[0][0] = corners[3][0] = left; frustum_corners.corners[4][2] = -near;
corners[4][0] = corners[7][0] = right; frustum_corners.corners[0][0] = frustum_corners.corners[3][0] = left;
corners[0][1] = corners[4][1] = bottom; frustum_corners.corners[4][0] = frustum_corners.corners[7][0] = right;
corners[7][1] = corners[3][1] = top; frustum_corners.corners[0][1] = frustum_corners.corners[4][1] = bottom;
frustum_corners.corners[7][1] = frustum_corners.corners[3][1] = top;
/* Get the coordinates of the far plane. */ /* Get the coordinates of the far plane. */
if (is_persp) { if (is_persp) {
@ -55,25 +56,20 @@ void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out vec4 corners[8])
top *= sca_far; top *= sca_far;
} }
corners[1][2] = corners[2][2] = corners[6][2] = corners[5][2] = -far; frustum_corners.corners[1][2] = frustum_corners.corners[2][2] = frustum_corners.corners[6][2] =
corners[1][0] = corners[2][0] = left; frustum_corners.corners[5][2] = -far;
corners[6][0] = corners[5][0] = right; frustum_corners.corners[1][0] = frustum_corners.corners[2][0] = left;
corners[1][1] = corners[5][1] = bottom; frustum_corners.corners[6][0] = frustum_corners.corners[5][0] = right;
corners[2][1] = corners[6][1] = top; frustum_corners.corners[1][1] = frustum_corners.corners[5][1] = bottom;
frustum_corners.corners[2][1] = frustum_corners.corners[6][1] = top;
/* Transform into world space. */ /* Transform into world space. */
for (int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++) {
corners[i].xyz = transform_point(viewinv, corners[i].xyz); frustum_corners.corners[i].xyz = transform_point(viewinv, frustum_corners.corners[i].xyz);
} }
} }
void planes_from_projmat(mat4 mat, void planes_from_projmat(mat4 mat, out FrustumPlanes frustum_planes)
out vec4 left,
out vec4 right,
out vec4 bottom,
out vec4 top,
out vec4 near,
out vec4 far)
{ {
/* References: /* References:
* *
@ -81,35 +77,35 @@ void planes_from_projmat(mat4 mat,
* http://www8.cs.umu.se/kurser/5DV051/HT12/lab/plane_extraction.pdf * http://www8.cs.umu.se/kurser/5DV051/HT12/lab/plane_extraction.pdf
*/ */
mat = transpose(mat); mat = transpose(mat);
left = mat[3] + mat[0]; frustum_planes.planes[0] = mat[3] + mat[0];
right = mat[3] - mat[0]; frustum_planes.planes[1] = mat[3] - mat[0];
bottom = mat[3] + mat[1]; frustum_planes.planes[2] = mat[3] + mat[1];
top = mat[3] - mat[1]; frustum_planes.planes[3] = mat[3] - mat[1];
near = mat[3] + mat[2]; frustum_planes.planes[4] = mat[3] + mat[2];
far = mat[3] - mat[2]; frustum_planes.planes[5] = mat[3] - mat[2];
} }
void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out vec4 planes[6]) void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out FrustumPlanes frustum_planes)
{ {
mat4 persmat = winmat * viewmat; mat4 persmat = winmat * viewmat;
planes_from_projmat(persmat, planes[0], planes[5], planes[1], planes[3], planes[4], planes[2]); planes_from_projmat(persmat, frustum_planes);
/* Normalize. */ /* Normalize. */
for (int p = 0; p < 6; p++) { for (int p = 0; p < 6; p++) {
planes[p] /= length(planes[p].xyz); frustum_planes.planes[p] /= length(frustum_planes.planes[p].xyz);
} }
} }
vec4 frustum_culling_sphere_calc(vec4 corners[8]) vec4 frustum_culling_sphere_calc(FrustumCorners frustum_corners)
{ {
/* Extract Bounding Sphere */ /* Extract Bounding Sphere */
/* TODO(fclem): This is significantly less precise than CPU, but it isn't used in most cases. */ /* TODO(fclem): This is significantly less precise than CPU, but it isn't used in most cases. */
vec4 bsphere; vec4 bsphere;
bsphere.xyz = (corners[0].xyz + corners[6].xyz) * 0.5; bsphere.xyz = (frustum_corners.corners[0].xyz + frustum_corners.corners[6].xyz) * 0.5;
bsphere.w = 0.0; bsphere.w = 0.0;
for (int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++) {
bsphere.w = max(bsphere.w, distance(bsphere.xyz, corners[i].xyz)); bsphere.w = max(bsphere.w, distance(bsphere.xyz, frustum_corners.corners[i].xyz));
} }
return bsphere; return bsphere;
} }
@ -125,11 +121,15 @@ void main()
return; return;
} }
frustum_boundbox_calc(drw_view.winmat, drw_view.viewinv, view_culling_buf[drw_view_id].corners); /* Read frustom_corners from device memory, update, and write back. */
FrustumCorners frustum_corners = view_culling_buf[drw_view_id].frustum_corners;
frustum_boundbox_calc(drw_view.winmat, drw_view.viewinv, frustum_corners);
view_culling_buf[drw_view_id].frustum_corners = frustum_corners;
frustum_culling_planes_calc( /* Read frustum_planes from device memory, update, and write back. */
drw_view.winmat, drw_view.viewmat, view_culling_buf[drw_view_id].planes); FrustumPlanes frustum_planes = view_culling_buf[drw_view_id].frustum_planes;
frustum_culling_planes_calc(drw_view.winmat, drw_view.viewmat, frustum_planes);
view_culling_buf[drw_view_id].bound_sphere = frustum_culling_sphere_calc( view_culling_buf[drw_view_id].frustum_planes = frustum_planes;
view_culling_buf[drw_view_id].corners); view_culling_buf[drw_view_id].bound_sphere = frustum_culling_sphere_calc(frustum_corners);
} }

View File

@ -34,8 +34,9 @@ void main()
bounds.bounding_corners[1].xyz, bounds.bounding_corners[1].xyz,
bounds.bounding_corners[2].xyz, bounds.bounding_corners[2].xyz,
bounds.bounding_corners[3].xyz); bounds.bounding_corners[3].xyz);
Sphere bounding_sphere = Sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w); Sphere bounding_sphere = shape_sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w);
Sphere inscribed_sphere = Sphere(bounds.bounding_sphere.xyz, bounds._inner_sphere_radius); Sphere inscribed_sphere = shape_sphere(bounds.bounding_sphere.xyz,
bounds._inner_sphere_radius);
for (drw_view_id = 0; drw_view_id < view_len; drw_view_id++) { for (drw_view_id = 0; drw_view_id < view_len; drw_view_id++) {
if (drw_view_culling.bound_sphere.w == -1.0) { if (drw_view_culling.bound_sphere.w == -1.0) {

View File

@ -101,10 +101,18 @@ struct constexp_uvec3 {
return 0; return 0;
} }
} }
inline operator uint3() const constexpr inline operator uint3() const
{ {
return xyz; return xyz;
} }
constexpr inline operator uint2() const
{
return xy;
}
constexpr inline operator uint() const
{
return x;
}
}; };
constexpr constexp_uvec3 __internal_workgroupsize_get() constexpr constexp_uvec3 __internal_workgroupsize_get()
@ -140,6 +148,10 @@ template<typename T> T atomicSub(threadgroup T &mem, T data)
{ {
return atomic_fetch_sub_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed); return atomic_fetch_sub_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
} }
template<typename T> T atomicAnd(threadgroup T &mem, T data)
{
return atomic_fetch_and_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicOr(threadgroup T &mem, T data) template<typename T> T atomicOr(threadgroup T &mem, T data)
{ {
return atomic_fetch_or_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed); return atomic_fetch_or_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
@ -152,29 +164,41 @@ template<typename T> T atomicXor(threadgroup T &mem, T data)
/* Device memory. */ /* Device memory. */
template<typename T> T atomicMax(device T &mem, T data) template<typename T> T atomicMax(device T &mem, T data)
{ {
return atomic_fetch_max_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed); return atomic_fetch_max_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
} }
template<typename T> T atomicMin(device T &mem, T data) template<typename T> T atomicMin(device T &mem, T data)
{ {
return atomic_fetch_min_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed); return atomic_fetch_min_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
} }
template<typename T> T atomicAdd(device T &mem, T data) template<typename T> T atomicAdd(device T &mem, T data)
{ {
return atomic_fetch_add_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed); return atomic_fetch_add_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
} }
template<typename T> T atomicSub(device T &mem, T data) template<typename T> T atomicSub(device T &mem, T data)
{ {
return atomic_fetch_sub_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed); return atomic_fetch_sub_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicAnd(device T &mem, T data)
{
return atomic_fetch_and_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
} }
template<typename T> T atomicOr(device T &mem, T data) template<typename T> T atomicOr(device T &mem, T data)
{ {
return atomic_fetch_or_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed); return atomic_fetch_or_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
} }
template<typename T> T atomicXor(device T &mem, T data) template<typename T> T atomicXor(device T &mem, T data)
{ {
return atomic_fetch_xor_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed); return atomic_fetch_xor_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
} }
/* Unblock texture atomic compilation.
* TODO(Metal): This is not correct for global atomic behaviour, but will be safe within a single thread.
* We need to re-visit the solution for this use-case and use a 2D texture buffer instead. */
#define imageAtomicMin(tex, coord, data) \
uint val = _texelFetch_internal(tex, coord, 0).r;\
_texture_write_internal(tex, coord, uint4((val < data) ? val : data));\
tex.texture->fence();
/* Used to replace 'out' in function parameters with threadlocal reference /* Used to replace 'out' in function parameters with threadlocal reference
* shortened to avoid expanding the glsl source string. */ * shortened to avoid expanding the glsl source string. */
#define THD thread #define THD thread
@ -1126,6 +1150,27 @@ inline float4 uintBitsToFloat(uint4 f)
return as_type<float4>(f); return as_type<float4>(f);
} }
#define bitfieldReverse reverse_bits
#define bitfieldExtract extract_bits
#define bitfieldInsert insert_bits
#define bitCount popcount
template<typename T> T findLSB(T x)
{
/* ctz returns the number of trailing zeroes. To fetch the index of the LSB, we can also use this
* value as index, however need to filter out the case where the input value is zero to match
* GLSL functionality. */
return (x == T(0)) ? T(-1) : T(ctz(x));
}
template<typename T> T findMSB(T x)
{
/* clz returns the number of leading zeroes. To fetch the index of the LSB, we can also use this
* value as index when offset by 1. however need to filter out the case where the input value is
* zero to match GLSL functionality. 000000010*/
return (x == T(0)) ? T(-1) : (clz(T(0)) - clz(x) - T(1));
}
/* Texture size functions. Add texture types as needed. */ /* Texture size functions. Add texture types as needed. */
#define imageSize(image) textureSize(image, 0) #define imageSize(image) textureSize(image, 0)

View File

@ -15,6 +15,12 @@
#define depthCubeArray samplerCubeArray #define depthCubeArray samplerCubeArray
#define depth2DArrayShadow sampler2DArrayShadow #define depth2DArrayShadow sampler2DArrayShadow
/* Memory scope and pass by reference types.
* NOTE: These are required by Metal, but are not required in all cases by GLSL. */
#define device
#define threadgroup
#define OUT(type, name, array_len) out type name[array_len]
/* Backend Functions. */ /* Backend Functions. */
#define select(A, B, mask) mix(A, B, mask) #define select(A, B, mask) mix(A, B, mask)