EEVEE-Next: Resolve compilation errors in Metal #106219

Merged
Jeroen Bakker merged 8 commits from Jason-Fielder/blender:MetalEEVEENext_shaderCompilation into main 2023-04-20 08:03:42 +02:00
18 changed files with 191 additions and 85 deletions

View File

@ -590,7 +590,7 @@ void dof_gather_accumulator(sampler2D color_tx,
* The full pixel neighborhood is gathered.
* \{ */
void dof_slight_focus_gather(sampler2D depth_tx,
void dof_slight_focus_gather(depth2D depth_tx,
sampler2D color_tx,
sampler2D bkh_lut_tx, /* Renamed because of ugly macro job. */
float radius,

View File

@ -62,7 +62,7 @@ void main()
int mask_shift = 1;
#define downsample_level(out_mip__, lod_) \
active_thread = all(lessThan(local_px, gl_WorkGroupSize.xy >> uint(mask_shift))); \
active_thread = all(lessThan(uvec2(local_px), gl_WorkGroupSize.xy >> uint(mask_shift))); \
barrier(); /* Wait for previous writes to finish. */ \
if (active_thread) { \
max_depth = max_v4(load_local_depths(local_px)); \
@ -89,12 +89,12 @@ void main()
}
finished_tile_counter = 0u;
ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize * 2u));
ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize.xy * 2u));
ivec2 image_border = imageSize(out_mip_5) - 1;
for (int y = 0; y < iter.y; y++) {
for (int x = 0; x < iter.x; x++) {
/* Load result of the other work groups. */
kernel_origin = ivec2(gl_WorkGroupSize) * ivec2(x, y);
kernel_origin = ivec2(gl_WorkGroupSize.xy) * ivec2(x, y);
src_px = ivec2(kernel_origin + local_px) * 2;
vec4 samp;
samp.x = imageLoad(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x;

View File

@ -168,13 +168,15 @@ void main()
}
/* Fallthrough to the hemispheric case. */
case LIGHT_RECT:
case LIGHT_ELLIPSE:
case LIGHT_ELLIPSE: {
vec3 v000 = vP - v_right * radius - v_up * radius;
vec3 v100 = v000 + v_right * (radius * 2.0);
vec3 v010 = v000 + v_up * (radius * 2.0);
vec3 v001 = v000 - v_back * radius;
Box bbox = shape_box(v000, v100, v010, v001);
intersect_tile = intersect_tile && intersect(tile, bbox);
break;

Codestyle: add break inside the code-block.

Codestyle: add break inside the code-block.
}
default:
break;
}

View File

@ -74,8 +74,10 @@ void main()
vec4 max_motion = imageLoad(in_tiles_img, src_tile);
MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy, src_tile);
MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile);
MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy,
uvec2(src_tile));
MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw,
uvec2(src_tile));
if (true) {
/* Rectangular area (in tiles) where the motion vector spreads. */
MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.xy);
@ -85,17 +87,20 @@ void main()
for (int y = 0; y < motion_rect.extent.y; y++) {
ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
if (is_inside_motion_line(tile, motion_line)) {
motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv);
motion_blur_tile_indirection_store(
tile_indirection_buf, MOTION_PREV, uvec2(tile), payload_prv);
/* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
* the motion next so that weighting in gather pass is better. */
motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt);
motion_blur_tile_indirection_store(
tile_indirection_buf, MOTION_NEXT, uvec2(tile), payload_nxt);
}
}
}
}
if (true) {
MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile);
MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw,
uvec2(src_tile));
/* Rectangular area (in tiles) where the motion vector spreads. */
MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.zw);
MotionLine motion_line = compute_motion_line(src_tile, max_motion.zw);
@ -104,10 +109,12 @@ void main()
for (int y = 0; y < motion_rect.extent.y; y++) {
ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
if (is_inside_motion_line(tile, motion_line)) {
motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt);
motion_blur_tile_indirection_store(
tile_indirection_buf, MOTION_NEXT, uvec2(tile), payload_nxt);
/* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
* the motion next so that weighting in gather pass is better. */
motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv);
motion_blur_tile_indirection_store(
tile_indirection_buf, MOTION_PREV, uvec2(tile), payload_prv);
}
}
}

View File

@ -178,10 +178,10 @@ void main()
vec4 max_motion;
/* Load dilation result from the indirection table. */
ivec2 tile_prev;
motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, tile, tile_prev);
motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, uvec2(tile), tile_prev);
max_motion.xy = imageLoad(in_tiles_img, tile_prev).xy;
ivec2 tile_next;
motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, tile, tile_next);
motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, uvec2(tile), tile_next);
max_motion.zw = imageLoad(in_tiles_img, tile_next).zw;
Accumulator accum;

View File

@ -242,13 +242,13 @@ void output_aov(vec4 color, float value, uint hash)
#if defined(MAT_AOV_SUPPORT) && defined(GPU_FRAGMENT_SHADER)
for (int i = 0; i < AOV_MAX && i < aov_buf.color_len; i++) {
if (aov_buf.hash_color[i] == hash) {
imageStore(aov_color_img, ivec3(gl_FragCoord.xy, i), color);
imageStore(aov_color_img, ivec3(ivec2(gl_FragCoord.xy), i), color);
return;
}
}
for (int i = 0; i < AOV_MAX && i < aov_buf.value_len; i++) {
if (aov_buf.hash_value[i] == hash) {
imageStore(aov_value_img, ivec3(gl_FragCoord.xy, i), vec4(value));
imageStore(aov_value_img, ivec3(ivec2(gl_FragCoord.xy), i), vec4(value));
return;
}
}

View File

@ -65,7 +65,7 @@ void main()
}
AABB aabb_tag;
AABB aabb_map = AABB(vec3(-0.99999), vec3(0.99999));
AABB aabb_map = shape_aabb(vec3(-0.99999), vec3(0.99999));
/* Directionnal winmat have no correct near/far in the Z dimension at this point.
* Do not clip in this dimension. */
@ -87,7 +87,7 @@ void main()
for (int y = box_min.y; y <= box_max.y; y++) {
for (int x = box_min.x; x <= box_max.x; x++) {
int tile_index = shadow_tile_offset(ivec2(x, y), tilemap.tiles_index, lod);
atomicOr(tiles_buf[tile_index], SHADOW_DO_UPDATE);
atomicOr(tiles_buf[tile_index], uint(SHADOW_DO_UPDATE));
}
}
}

View File

@ -21,7 +21,7 @@ void shadow_tag_usage_tile(LightData light, ivec2 tile_co, int lod, int tilemap_
tile_co >>= lod;
int tile_index = shadow_tile_offset(tile_co, tilemaps_buf[tilemap_index].tiles_index, lod);
atomicOr(tiles_buf[tile_index], SHADOW_IS_USED);
atomicOr(tiles_buf[tile_index], uint(SHADOW_IS_USED));
}
void shadow_tag_usage_tilemap_directional(uint l_idx, vec3 P, vec3 V, float radius)

View File

@ -44,7 +44,7 @@ bool is_visible(IsectBox box)
bool intersects_near_plane(IsectBox box)
{
vec4 near_plane = drw_view_culling.planes[4];
vec4 near_plane = drw_view_culling.frustum_planes.planes[4];
bool on_positive_side = false;
bool on_negative_side = false;

View File

@ -21,6 +21,8 @@ typedef struct DispatchCommand DispatchCommand;
typedef struct DRWDebugPrintBuffer DRWDebugPrintBuffer;
typedef struct DRWDebugVert DRWDebugVert;
typedef struct DRWDebugDrawBuffer DRWDebugDrawBuffer;
typedef struct FrustumCorners FrustumCorners;
typedef struct FrustumPlanes FrustumPlanes;
/* __cplusplus is true when compiling with MSL. */
# if defined(__cplusplus) && !defined(GPU_SHADER)
@ -94,11 +96,27 @@ uint drw_view_id = 0;
# define DRW_VIEW_FROM_RESOURCE_ID drw_view_id = (drw_ResourceID & DRW_VIEW_MASK)
#endif
struct FrustumCorners {
float4 corners[8];
};
BLI_STATIC_ASSERT_ALIGN(FrustumCorners, 16)
struct FrustumPlanes {
/* [0] left
* [1] right
* [2] bottom
* [3] top
* [4] near
* [5] far */
float4 planes[6];
};
BLI_STATIC_ASSERT_ALIGN(FrustumPlanes, 16)
struct ViewCullingData {
/** \note vec3 array padded to vec4. */
/** Frustum corners. */
float4 corners[8];
float4 planes[6];
FrustumCorners frustum_corners;
FrustumPlanes frustum_planes;
float4 bound_sphere;
};
BLI_STATIC_ASSERT_ALIGN(ViewCullingData, 16)

View File

@ -50,7 +50,8 @@ void View::frustum_boundbox_calc(int view_id)
}
#endif
MutableSpan<float4> corners = {culling_[view_id].corners, ARRAY_SIZE(culling_[view_id].corners)};
MutableSpan<float4> corners = {culling_[view_id].frustum_corners.corners,
ARRAY_SIZE(culling_[view_id].frustum_corners.corners)};
float left, right, bottom, top, near, far;
bool is_persp = data_[view_id].winmat[3][3] == 0.0f;
@ -89,15 +90,15 @@ void View::frustum_culling_planes_calc(int view_id)
{
float4x4 persmat = data_[view_id].winmat * data_[view_id].viewmat;
planes_from_projmat(persmat.ptr(),
culling_[view_id].planes[0],
culling_[view_id].planes[5],
culling_[view_id].planes[1],
culling_[view_id].planes[3],
culling_[view_id].planes[4],
culling_[view_id].planes[2]);
culling_[view_id].frustum_planes.planes[0],
culling_[view_id].frustum_planes.planes[5],
culling_[view_id].frustum_planes.planes[1],
culling_[view_id].frustum_planes.planes[3],
culling_[view_id].frustum_planes.planes[4],
culling_[view_id].frustum_planes.planes[2]);
/* Normalize. */
for (float4 &plane : culling_[view_id].planes) {
for (float4 &plane : culling_[view_id].frustum_planes.planes) {
plane.w /= normalize_v3(plane);
}
}
@ -105,7 +106,8 @@ void View::frustum_culling_planes_calc(int view_id)
void View::frustum_culling_sphere_calc(int view_id)
{
BoundSphere &bsphere = *reinterpret_cast<BoundSphere *>(&culling_[view_id].bound_sphere);
Span<float4> corners = {culling_[view_id].corners, ARRAY_SIZE(culling_[view_id].corners)};
Span<float4> corners = {culling_[view_id].frustum_corners.corners,
ARRAY_SIZE(culling_[view_id].frustum_corners.corners)};
/* Extract Bounding Sphere */
if (data_[view_id].winmat[3][3] != 0.0f) {

View File

@ -9,6 +9,14 @@ struct AABB {
vec3 min, max;
};

Change caller to use the following syntax instead. Same for Sphere()

  AABB aabb;
  aabb.min = vec3(1.0e30);
  aabb.max = vec3(-1.0e30);
Change caller to use the following syntax instead. Same for `Sphere()` ``` AABB aabb; aabb.min = vec3(1.0e30); aabb.max = vec3(-1.0e30);
AABB shape_aabb(vec3 min, vec3 max)
{
AABB aabb;
aabb.min = min;
aabb.max = max;
return aabb;
}
AABB aabb_init_min_max()
{
AABB aabb;

View File

@ -136,7 +136,7 @@ bool intersect_view(Pyramid pyramid)
for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 5; ++v) {
float test = dot(drw_view_culling.planes[p], vec4(pyramid.corners[v], 1.0));
float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(pyramid.corners[v], 1.0));
if (test > 0.0) {
is_any_vertex_on_positive_side = true;
break;
@ -158,7 +158,8 @@ bool intersect_view(Pyramid pyramid)
for (int p = 0; p < 5; ++p) {
bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) {
float test = dot(i_pyramid.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0));
float test = dot(i_pyramid.planes[p],
vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0));
if (test > 0.0) {
is_any_vertex_on_positive_side = true;
break;
@ -181,7 +182,7 @@ bool intersect_view(Box box)
for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) {
float test = dot(drw_view_culling.planes[p], vec4(box.corners[v], 1.0));
float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(box.corners[v], 1.0));
if (test > 0.0) {
is_any_vertex_on_positive_side = true;
break;
@ -203,7 +204,8 @@ bool intersect_view(Box box)
for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) {
float test = dot(i_box.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0));
float test = dot(i_box.planes[p],
vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0));
if (test > 0.0) {
is_any_vertex_on_positive_side = true;
break;
@ -227,7 +229,7 @@ bool intersect_view(IsectBox i_box)
for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) {
float test = dot(drw_view_culling.planes[p], vec4(i_box.corners[v], 1.0));
float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(i_box.corners[v], 1.0));
if (test > 0.0) {
is_any_vertex_on_positive_side = true;
break;
@ -247,7 +249,8 @@ bool intersect_view(IsectBox i_box)
for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) {
float test = dot(i_box.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0));
float test = dot(i_box.planes[p],
vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0));
if (test > 0.0) {
is_any_vertex_on_positive_side = true;
break;
@ -268,7 +271,7 @@ bool intersect_view(Sphere sphere)
bool intersects = true;
for (int p = 0; p < 6 && intersects; ++p) {
float dist_to_plane = dot(drw_view_culling.planes[p], vec4(sphere.center, 1.0));
float dist_to_plane = dot(drw_view_culling.frustum_planes.planes[p], vec4(sphere.center, 1.0));
if (dist_to_plane < -sphere.radius) {
intersects = false;
}

View File

@ -18,7 +18,10 @@ struct Circle {
Circle shape_circle(vec2 center, float radius)
{
return Circle(center, radius);
Circle circle;
circle.center = center;
circle.radius = radius;
return circle;
}
/** \} */
@ -34,7 +37,10 @@ struct Sphere {
Sphere shape_sphere(vec3 center, float radius)
{
return Sphere(center, radius);
Sphere sphere;
sphere.center = center;
sphere.radius = radius;
return sphere;
}
/** \} */
@ -192,6 +198,14 @@ Frustum shape_frustum(vec3 corners[8])
struct Cone {
vec3 direction;
float angle_cos;
#ifdef GPU_METAL
inline Cone() = default;
inline Cone(vec3 in_direction, float in_angle_cos)
: direction(in_direction), angle_cos(in_angle_cos)
{
}
#endif
};
Cone shape_cone(vec3 direction, float angle_cosine)

View File

@ -33,18 +33,19 @@ void projmat_dimensions(mat4 winmat,
}
}
void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out vec4 corners[8])
void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out FrustumCorners frustum_corners)
{
float left, right, bottom, top, near, far;
bool is_persp = winmat[3][3] == 0.0;
projmat_dimensions(winmat, left, right, bottom, top, near, far);
corners[0][2] = corners[3][2] = corners[7][2] = corners[4][2] = -near;
corners[0][0] = corners[3][0] = left;
corners[4][0] = corners[7][0] = right;
corners[0][1] = corners[4][1] = bottom;
corners[7][1] = corners[3][1] = top;
frustum_corners.corners[0][2] = frustum_corners.corners[3][2] = frustum_corners.corners[7][2] =
frustum_corners.corners[4][2] = -near;
frustum_corners.corners[0][0] = frustum_corners.corners[3][0] = left;
frustum_corners.corners[4][0] = frustum_corners.corners[7][0] = right;
frustum_corners.corners[0][1] = frustum_corners.corners[4][1] = bottom;
frustum_corners.corners[7][1] = frustum_corners.corners[3][1] = top;
/* Get the coordinates of the far plane. */
if (is_persp) {
@ -55,25 +56,20 @@ void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out vec4 corners[8])
top *= sca_far;
}
corners[1][2] = corners[2][2] = corners[6][2] = corners[5][2] = -far;
corners[1][0] = corners[2][0] = left;
corners[6][0] = corners[5][0] = right;
corners[1][1] = corners[5][1] = bottom;
corners[2][1] = corners[6][1] = top;
frustum_corners.corners[1][2] = frustum_corners.corners[2][2] = frustum_corners.corners[6][2] =
frustum_corners.corners[5][2] = -far;
frustum_corners.corners[1][0] = frustum_corners.corners[2][0] = left;
frustum_corners.corners[6][0] = frustum_corners.corners[5][0] = right;
frustum_corners.corners[1][1] = frustum_corners.corners[5][1] = bottom;
frustum_corners.corners[2][1] = frustum_corners.corners[6][1] = top;
/* Transform into world space. */
for (int i = 0; i < 8; i++) {
corners[i].xyz = transform_point(viewinv, corners[i].xyz);
frustum_corners.corners[i].xyz = transform_point(viewinv, frustum_corners.corners[i].xyz);
}
}

We should avoid that. Change the caller to not pass device memory references (and do some copy instead, hoping the compiler optimizes it). This make the caller code uglier but it is better than having a function that can only take device memory as argument.

We should avoid that. Change the caller to not pass device memory references (and do some copy instead, hoping the compiler optimizes it). This make the caller code uglier but it is better than having a function that can only take device memory as argument.

Yep, I can make this change for this case, I can't guarantee that it won't incur additional overhead, though most likely the device read needs to happen anyway, it's just a question of whether the read-write is more or less the same.

The slightly more awkward case here is that it is used in combination with the currentDEVICE_OUT_ARRAY in:
void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, DEVICE_OUT_ARRAY(vec4, planes, 6))

as this routine also then updates the planes stored in device memory.

I'm happy to refactor the code around this function to first pull the data, update locally and then update device memory if this is suitable, even if makes the code a little longer?

Yep, I can make this change for this case, I can't guarantee that it won't incur additional overhead, though most likely the device read needs to happen anyway, it's just a question of whether the read-write is more or less the same. The slightly more awkward case here is that it is used in combination with the current`DEVICE_OUT_ARRAY` in: `void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, DEVICE_OUT_ARRAY(vec4, planes, 6))` as this routine also then updates the planes stored in device memory. I'm happy to refactor the code around this function to first pull the data, update locally and then update device memory if this is suitable, even if makes the code a little longer?

The proposed way implemented in the latest version of this PR is suitable.

The proposed way implemented in the latest version of this PR is suitable.
void planes_from_projmat(mat4 mat,
out vec4 left,
out vec4 right,
out vec4 bottom,
out vec4 top,
out vec4 near,
out vec4 far)
void planes_from_projmat(mat4 mat, out FrustumPlanes frustum_planes)
{
/* References:
*
@ -81,35 +77,35 @@ void planes_from_projmat(mat4 mat,
* http://www8.cs.umu.se/kurser/5DV051/HT12/lab/plane_extraction.pdf
*/
mat = transpose(mat);
left = mat[3] + mat[0];
right = mat[3] - mat[0];
bottom = mat[3] + mat[1];
top = mat[3] - mat[1];
near = mat[3] + mat[2];
far = mat[3] - mat[2];
frustum_planes.planes[0] = mat[3] + mat[0];
frustum_planes.planes[1] = mat[3] - mat[0];
frustum_planes.planes[2] = mat[3] + mat[1];
frustum_planes.planes[3] = mat[3] - mat[1];
frustum_planes.planes[4] = mat[3] + mat[2];
frustum_planes.planes[5] = mat[3] - mat[2];
}
void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out vec4 planes[6])
void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out FrustumPlanes frustum_planes)
{
mat4 persmat = winmat * viewmat;
planes_from_projmat(persmat, planes[0], planes[5], planes[1], planes[3], planes[4], planes[2]);
planes_from_projmat(persmat, frustum_planes);
/* Normalize. */
for (int p = 0; p < 6; p++) {
planes[p] /= length(planes[p].xyz);
frustum_planes.planes[p] /= length(frustum_planes.planes[p].xyz);
}
}
vec4 frustum_culling_sphere_calc(vec4 corners[8])
vec4 frustum_culling_sphere_calc(FrustumCorners frustum_corners)
{
/* Extract Bounding Sphere */
/* TODO(fclem): This is significantly less precise than CPU, but it isn't used in most cases. */
vec4 bsphere;
bsphere.xyz = (corners[0].xyz + corners[6].xyz) * 0.5;
bsphere.xyz = (frustum_corners.corners[0].xyz + frustum_corners.corners[6].xyz) * 0.5;
bsphere.w = 0.0;
for (int i = 0; i < 8; i++) {
bsphere.w = max(bsphere.w, distance(bsphere.xyz, corners[i].xyz));
bsphere.w = max(bsphere.w, distance(bsphere.xyz, frustum_corners.corners[i].xyz));
}
return bsphere;
}
@ -125,11 +121,15 @@ void main()
return;
}
frustum_boundbox_calc(drw_view.winmat, drw_view.viewinv, view_culling_buf[drw_view_id].corners);
/* Read frustom_corners from device memory, update, and write back. */
FrustumCorners frustum_corners = view_culling_buf[drw_view_id].frustum_corners;
frustum_boundbox_calc(drw_view.winmat, drw_view.viewinv, frustum_corners);
view_culling_buf[drw_view_id].frustum_corners = frustum_corners;
frustum_culling_planes_calc(
drw_view.winmat, drw_view.viewmat, view_culling_buf[drw_view_id].planes);
/* Read frustum_planes from device memory, update, and write back. */
FrustumPlanes frustum_planes = view_culling_buf[drw_view_id].frustum_planes;
frustum_culling_planes_calc(drw_view.winmat, drw_view.viewmat, frustum_planes);
view_culling_buf[drw_view_id].bound_sphere = frustum_culling_sphere_calc(
view_culling_buf[drw_view_id].corners);
view_culling_buf[drw_view_id].frustum_planes = frustum_planes;
view_culling_buf[drw_view_id].bound_sphere = frustum_culling_sphere_calc(frustum_corners);
}

View File

@ -34,8 +34,9 @@ void main()
bounds.bounding_corners[1].xyz,
bounds.bounding_corners[2].xyz,
bounds.bounding_corners[3].xyz);
Sphere bounding_sphere = Sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w);
Sphere inscribed_sphere = Sphere(bounds.bounding_sphere.xyz, bounds._inner_sphere_radius);
Sphere bounding_sphere = shape_sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w);
Sphere inscribed_sphere = shape_sphere(bounds.bounding_sphere.xyz,
bounds._inner_sphere_radius);
for (drw_view_id = 0; drw_view_id < view_len; drw_view_id++) {
if (drw_view_culling.bound_sphere.w == -1.0) {

View File

@ -101,10 +101,18 @@ struct constexp_uvec3 {
return 0;
}
}
inline operator uint3() const
constexpr inline operator uint3() const
{
return xyz;
}
constexpr inline operator uint2() const
{
return xy;
}
constexpr inline operator uint() const
{
return x;
}
};
constexpr constexp_uvec3 __internal_workgroupsize_get()
@ -140,6 +148,10 @@ template<typename T> T atomicSub(threadgroup T &mem, T data)
{
return atomic_fetch_sub_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicAnd(threadgroup T &mem, T data)
{
return atomic_fetch_and_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicOr(threadgroup T &mem, T data)
{
return atomic_fetch_or_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
@ -152,29 +164,41 @@ template<typename T> T atomicXor(threadgroup T &mem, T data)
/* Device memory. */
template<typename T> T atomicMax(device T &mem, T data)
{
return atomic_fetch_max_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
return atomic_fetch_max_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicMin(device T &mem, T data)
{
return atomic_fetch_min_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
return atomic_fetch_min_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicAdd(device T &mem, T data)
{
return atomic_fetch_add_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
return atomic_fetch_add_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicSub(device T &mem, T data)
{
return atomic_fetch_sub_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
return atomic_fetch_sub_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicAnd(device T &mem, T data)
{
return atomic_fetch_and_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicOr(device T &mem, T data)
{
return atomic_fetch_or_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
return atomic_fetch_or_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicXor(device T &mem, T data)
{
return atomic_fetch_xor_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
return atomic_fetch_xor_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
/* Unblock texture atomic compilation.
* TODO(Metal): This is not correct for global atomic behaviour, but will be safe within a single thread.
* We need to re-visit the solution for this use-case and use a 2D texture buffer instead. */
#define imageAtomicMin(tex, coord, data) \
uint val = _texelFetch_internal(tex, coord, 0).r;\
_texture_write_internal(tex, coord, uint4((val < data) ? val : data));\
tex.texture->fence();
/* Used to replace 'out' in function parameters with threadlocal reference
* shortened to avoid expanding the glsl source string. */
#define THD thread
@ -1126,6 +1150,27 @@ inline float4 uintBitsToFloat(uint4 f)
return as_type<float4>(f);
}
#define bitfieldReverse reverse_bits
#define bitfieldExtract extract_bits
#define bitfieldInsert insert_bits
#define bitCount popcount
template<typename T> T findLSB(T x)
{
/* ctz returns the number of trailing zeroes. To fetch the index of the LSB, we can also use this
* value as index, however need to filter out the case where the input value is zero to match
* GLSL functionality. */
return (x == T(0)) ? T(-1) : T(ctz(x));
}
template<typename T> T findMSB(T x)
{
/* clz returns the number of leading zeroes. To fetch the index of the LSB, we can also use this
* value as index when offset by 1. however need to filter out the case where the input value is
* zero to match GLSL functionality. 000000010*/
return (x == T(0)) ? T(-1) : (clz(T(0)) - clz(x) - T(1));
}
/* Texture size functions. Add texture types as needed. */
#define imageSize(image) textureSize(image, 0)

View File

@ -15,6 +15,12 @@
#define depthCubeArray samplerCubeArray
#define depth2DArrayShadow sampler2DArrayShadow
/* Memory scope and pass by reference types.
* NOTE: These are required by Metal, but are not required in all cases by GLSL. */
#define device
#define threadgroup
#define OUT(type, name, array_len) out type name[array_len]
/* Backend Functions. */
#define select(A, B, mask) mix(A, B, mask)