EEVEE-Next: Resolve compilation errors in Metal #106219

Merged
Jeroen Bakker merged 8 commits from Jason-Fielder/blender:MetalEEVEENext_shaderCompilation into main 2023-04-20 08:03:42 +02:00
10 changed files with 85 additions and 77 deletions
Showing only changes of commit 39ceab2071 - Show all commits

View File

@ -65,7 +65,7 @@ void main()
}
AABB aabb_tag;
AABB aabb_map = AABB(vec3(-0.99999), vec3(0.99999));
AABB aabb_map = shape_aabb(vec3(-0.99999), vec3(0.99999));
/* Directionnal winmat have no correct near/far in the Z dimension at this point.
* Do not clip in this dimension. */

View File

@ -44,7 +44,7 @@ bool is_visible(IsectBox box)
bool intersects_near_plane(IsectBox box)
{
vec4 near_plane = drw_view_culling.planes[4];
vec4 near_plane = drw_view_culling.frustum_planes.planes[4];
bool on_positive_side = false;
bool on_negative_side = false;

View File

@ -94,11 +94,27 @@ uint drw_view_id = 0;
# define DRW_VIEW_FROM_RESOURCE_ID drw_view_id = (drw_ResourceID & DRW_VIEW_MASK)
#endif
struct FrustumCorners {
float4 corners[8];
};
BLI_STATIC_ASSERT_ALIGN(FrustumCorners, 16)
struct FrustumPlanes {
/* [0] left
* [1] right
* [2] bottom
* [3] top
* [4] near
* [5] far */
float4 planes[6];
};
BLI_STATIC_ASSERT_ALIGN(FrustumPlanes, 16)
struct ViewCullingData {
/** \note vec3 array padded to vec4. */
/** Frustum corners. */
float4 corners[8];
float4 planes[6];
FrustumCorners frustum_corners;
FrustumPlanes frustum_planes;
float4 bound_sphere;
};
BLI_STATIC_ASSERT_ALIGN(ViewCullingData, 16)

View File

@ -7,15 +7,16 @@
struct AABB {
vec3 min, max;
#ifdef GPU_METAL
inline AABB() = default;
inline AABB(vec3 _min, vec3 _max) : min(_min), max(_max)
{
}
#endif
};

Change caller to use the following syntax instead. Same for Sphere()

  AABB aabb;
  aabb.min = vec3(1.0e30);
  aabb.max = vec3(-1.0e30);
Change caller to use the following syntax instead. Same for `Sphere()` ``` AABB aabb; aabb.min = vec3(1.0e30); aabb.max = vec3(-1.0e30);
AABB shape_aabb(vec3 min, vec3 max)
{
AABB aabb;
aabb.min = min;
aabb.max = max;
return aabb;
}
AABB aabb_init_min_max()
{
AABB aabb;

View File

@ -136,7 +136,7 @@ bool intersect_view(Pyramid pyramid)
for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 5; ++v) {
float test = dot(drw_view_culling.planes[p], vec4(pyramid.corners[v], 1.0));
float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(pyramid.corners[v], 1.0));
if (test > 0.0) {
is_any_vertex_on_positive_side = true;
break;
@ -158,7 +158,8 @@ bool intersect_view(Pyramid pyramid)
for (int p = 0; p < 5; ++p) {
bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) {
float test = dot(i_pyramid.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0));
float test = dot(i_pyramid.planes[p],
vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0));
if (test > 0.0) {
is_any_vertex_on_positive_side = true;
break;
@ -181,7 +182,7 @@ bool intersect_view(Box box)
for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) {
float test = dot(drw_view_culling.planes[p], vec4(box.corners[v], 1.0));
float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(box.corners[v], 1.0));
if (test > 0.0) {
is_any_vertex_on_positive_side = true;
break;
@ -203,7 +204,8 @@ bool intersect_view(Box box)
for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) {
float test = dot(i_box.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0));
float test = dot(i_box.planes[p],
vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0));
if (test > 0.0) {
is_any_vertex_on_positive_side = true;
break;
@ -227,7 +229,7 @@ bool intersect_view(IsectBox i_box)
for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) {
float test = dot(drw_view_culling.planes[p], vec4(i_box.corners[v], 1.0));
float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(i_box.corners[v], 1.0));
if (test > 0.0) {
is_any_vertex_on_positive_side = true;
break;
@ -247,7 +249,8 @@ bool intersect_view(IsectBox i_box)
for (int p = 0; p < 6; ++p) {
bool is_any_vertex_on_positive_side = false;
for (int v = 0; v < 8; ++v) {
float test = dot(i_box.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0));
float test = dot(i_box.planes[p],
vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0));
if (test > 0.0) {
is_any_vertex_on_positive_side = true;
break;
@ -268,7 +271,7 @@ bool intersect_view(Sphere sphere)
bool intersects = true;
for (int p = 0; p < 6 && intersects; ++p) {
float dist_to_plane = dot(drw_view_culling.planes[p], vec4(sphere.center, 1.0));
float dist_to_plane = dot(drw_view_culling.frustum_planes.planes[p], vec4(sphere.center, 1.0));
if (dist_to_plane < -sphere.radius) {
intersects = false;
}

View File

@ -14,18 +14,14 @@
struct Circle {
vec2 center;
float radius;
#ifdef GPU_METAL
inline Circle() = default;
inline Circle(vec2 in_center, float in_radius) : center(in_center), radius(in_radius)
{
}
#endif
};
Circle shape_circle(vec2 center, float radius)
{
return Circle(center, radius);
Circle circle;
circle.center = center;
circle.radius = radius;
return circle;
}
/** \} */
@ -37,18 +33,14 @@ Circle shape_circle(vec2 center, float radius)
struct Sphere {
vec3 center;
float radius;
#ifdef GPU_METAL
inline Sphere() = default;
inline Sphere(vec3 in_center, float in_radius) : center(in_center), radius(in_radius)
{
}
#endif
};
Sphere shape_sphere(vec3 center, float radius)
{
return Sphere(center, radius);
Sphere sphere;
sphere.center = center;
sphere.radius = radius;
return sphere;
}
/** \} */

View File

@ -33,18 +33,19 @@ void projmat_dimensions(mat4 winmat,
}
}
void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, DEVICE_OUT_ARRAY(vec4, corners, 8))
void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out FrustumCorners frustum_corners)
{
float left, right, bottom, top, near, far;
bool is_persp = winmat[3][3] == 0.0;
projmat_dimensions(winmat, left, right, bottom, top, near, far);
corners[0][2] = corners[3][2] = corners[7][2] = corners[4][2] = -near;
corners[0][0] = corners[3][0] = left;
corners[4][0] = corners[7][0] = right;
corners[0][1] = corners[4][1] = bottom;
corners[7][1] = corners[3][1] = top;
frustum_corners.corners[0][2] = frustum_corners.corners[3][2] = frustum_corners.corners[7][2] =
frustum_corners.corners[4][2] = -near;
frustum_corners.corners[0][0] = frustum_corners.corners[3][0] = left;
frustum_corners.corners[4][0] = frustum_corners.corners[7][0] = right;
frustum_corners.corners[0][1] = frustum_corners.corners[4][1] = bottom;
frustum_corners.corners[7][1] = frustum_corners.corners[3][1] = top;
/* Get the coordinates of the far plane. */
if (is_persp) {
@ -55,25 +56,20 @@ void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, DEVICE_OUT_ARRAY(vec4, cor
top *= sca_far;
}
corners[1][2] = corners[2][2] = corners[6][2] = corners[5][2] = -far;
corners[1][0] = corners[2][0] = left;
corners[6][0] = corners[5][0] = right;
corners[1][1] = corners[5][1] = bottom;
corners[2][1] = corners[6][1] = top;
frustum_corners.corners[1][2] = frustum_corners.corners[2][2] = frustum_corners.corners[6][2] =
frustum_corners.corners[5][2] = -far;
frustum_corners.corners[1][0] = frustum_corners.corners[2][0] = left;
frustum_corners.corners[6][0] = frustum_corners.corners[5][0] = right;
frustum_corners.corners[1][1] = frustum_corners.corners[5][1] = bottom;
frustum_corners.corners[2][1] = frustum_corners.corners[6][1] = top;
/* Transform into world space. */
for (int i = 0; i < 8; i++) {
corners[i].xyz = transform_point(viewinv, corners[i].xyz);
frustum_corners.corners[i].xyz = transform_point(viewinv, frustum_corners.corners[i].xyz);
}
}

We should avoid that. Change the caller to not pass device memory references (and do some copy instead, hoping the compiler optimizes it). This make the caller code uglier but it is better than having a function that can only take device memory as argument.

We should avoid that. Change the caller to not pass device memory references (and do some copy instead, hoping the compiler optimizes it). This make the caller code uglier but it is better than having a function that can only take device memory as argument.

Yep, I can make this change for this case, I can't guarantee that it won't incur additional overhead, though most likely the device read needs to happen anyway, it's just a question of whether the read-write is more or less the same.

The slightly more awkward case here is that it is used in combination with the currentDEVICE_OUT_ARRAY in:
void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, DEVICE_OUT_ARRAY(vec4, planes, 6))

as this routine also then updates the planes stored in device memory.

I'm happy to refactor the code around this function to first pull the data, update locally and then update device memory if this is suitable, even if makes the code a little longer?

Yep, I can make this change for this case, I can't guarantee that it won't incur additional overhead, though most likely the device read needs to happen anyway, it's just a question of whether the read-write is more or less the same. The slightly more awkward case here is that it is used in combination with the current`DEVICE_OUT_ARRAY` in: `void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, DEVICE_OUT_ARRAY(vec4, planes, 6))` as this routine also then updates the planes stored in device memory. I'm happy to refactor the code around this function to first pull the data, update locally and then update device memory if this is suitable, even if makes the code a little longer?

The proposed way implemented in the latest version of this PR is suitable.

The proposed way implemented in the latest version of this PR is suitable.
void planes_from_projmat(mat4 mat,
DEVICE_OUT(vec4, left),
DEVICE_OUT(vec4, right),
DEVICE_OUT(vec4, bottom),
DEVICE_OUT(vec4, top),
DEVICE_OUT(vec4, near),
DEVICE_OUT(vec4, far))
void planes_from_projmat(mat4 mat, out FrustumPlanes frustum_planes)
{
/* References:
*
@ -81,35 +77,35 @@ void planes_from_projmat(mat4 mat,
* http://www8.cs.umu.se/kurser/5DV051/HT12/lab/plane_extraction.pdf
*/
mat = transpose(mat);
left = mat[3] + mat[0];
right = mat[3] - mat[0];
bottom = mat[3] + mat[1];
top = mat[3] - mat[1];
near = mat[3] + mat[2];
far = mat[3] - mat[2];
frustum_planes.planes[0] = mat[3] + mat[0];
frustum_planes.planes[1] = mat[3] - mat[0];
frustum_planes.planes[2] = mat[3] + mat[1];
frustum_planes.planes[3] = mat[3] - mat[1];
frustum_planes.planes[4] = mat[3] + mat[2];
frustum_planes.planes[5] = mat[3] - mat[2];
}
void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, DEVICE_OUT_ARRAY(vec4, planes, 6))
void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out FrustumPlanes frustum_planes)
{
mat4 persmat = winmat * viewmat;
planes_from_projmat(persmat, planes[0], planes[5], planes[1], planes[3], planes[4], planes[2]);
planes_from_projmat(persmat, frustum_planes);
/* Normalize. */
for (int p = 0; p < 6; p++) {
planes[p] /= length(planes[p].xyz);
frustum_planes.planes[p] /= length(frustum_planes.planes[p].xyz);
}
}
vec4 frustum_culling_sphere_calc(device vec4 corners[8])
vec4 frustum_culling_sphere_calc(FrustumCorners frustum_corners)
{
/* Extract Bounding Sphere */
/* TODO(fclem): This is significantly less precise than CPU, but it isn't used in most cases. */
vec4 bsphere;
bsphere.xyz = (corners[0].xyz + corners[6].xyz) * 0.5;
bsphere.xyz = (frustum_corners.corners[0].xyz + frustum_corners.corners[6].xyz) * 0.5;
bsphere.w = 0.0;
for (int i = 0; i < 8; i++) {
bsphere.w = max(bsphere.w, distance(bsphere.xyz, corners[i].xyz));
bsphere.w = max(bsphere.w, distance(bsphere.xyz, frustum_corners.corners[i].xyz));
}
return bsphere;
}
@ -125,11 +121,15 @@ void main()
return;
}
frustum_boundbox_calc(drw_view.winmat, drw_view.viewinv, view_culling_buf[drw_view_id].corners);
/* Read frustom_corners from device memory, update, and write back. */
FrustumCorners frustum_corners = view_culling_buf[drw_view_id].frustum_corners;
frustum_boundbox_calc(drw_view.winmat, drw_view.viewinv, frustum_corners);
view_culling_buf[drw_view_id].frustum_corners = frustum_corners;
frustum_culling_planes_calc(
drw_view.winmat, drw_view.viewmat, view_culling_buf[drw_view_id].planes);
/* Read frustum_planes from device memory, update, and write back. */
FrustumPlanes frustum_planes = view_culling_buf[drw_view_id].frustum_planes;
frustum_culling_planes_calc(drw_view.winmat, drw_view.viewmat, frustum_planes);
view_culling_buf[drw_view_id].bound_sphere = frustum_culling_sphere_calc(
view_culling_buf[drw_view_id].corners);
view_culling_buf[drw_view_id].frustum_planes = frustum_planes;
view_culling_buf[drw_view_id].bound_sphere = frustum_culling_sphere_calc(frustum_corners);
}

View File

@ -34,8 +34,9 @@ void main()
bounds.bounding_corners[1].xyz,
bounds.bounding_corners[2].xyz,
bounds.bounding_corners[3].xyz);
Sphere bounding_sphere = Sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w);
Sphere inscribed_sphere = Sphere(bounds.bounding_sphere.xyz, bounds._inner_sphere_radius);
Sphere bounding_sphere = shape_sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w);
Sphere inscribed_sphere = shape_sphere(bounds.bounding_sphere.xyz,
bounds._inner_sphere_radius);
for (drw_view_id = 0; drw_view_id < view_len; drw_view_id++) {
if (drw_view_culling.bound_sphere.w == -1.0) {

View File

@ -203,9 +203,6 @@ template<typename T> T atomicXor(device T &mem, T data)
* shortened to avoid expanding the glsl source string. */
#define THD thread
#define OUT(type, name, array) thread type(&name)[array]
#define THREADGROUP_OUT_ARRAY(type, name, array) threadgroup type(&name)[array]
#define DEVICE_OUT_ARRAY(type, name, array) device type(&name)[array]
#define DEVICE_OUT(type, name) device type &name
/* Generate wrapper structs for combined texture and sampler type. */
#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS

View File

@ -20,8 +20,6 @@
#define device
#define threadgroup
#define OUT(type, name, array_len) out type name[array_len]
#define DEVICE_OUT_ARRAY(type, name, array_len) out type name[array_len]
#define DEVICE_OUT(type, name) out type
/* Backend Functions. */
#define select(A, B, mask) mix(A, B, mask)