From d9a52b7bb6fceb90a1e4e181bb27ef96bda62a28 Mon Sep 17 00:00:00 2001 From: Michael Parkin-White Date: Tue, 28 Mar 2023 14:06:32 +0100 Subject: [PATCH 1/4] EEVEE-Next: Resolve compilation errors in Metal. Shader source requires explicit conversions and shader address space qualifers in certain places in order to compile for Metal. We also require constructors for a number of default struct types. Authored by Apple: Michael Parkin-White Ref #96261 --- .../eevee_depth_of_field_accumulator_lib.glsl | 2 +- .../shaders/eevee_hiz_update_comp.glsl | 6 +- .../eevee_light_culling_tile_comp.glsl | 3 +- .../eevee_motion_blur_dilate_comp.glsl | 21 ++++--- .../eevee_motion_blur_gather_comp.glsl | 4 +- .../shaders/eevee_nodetree_lib.glsl | 4 +- .../shaders/eevee_shadow_tag_update_comp.glsl | 2 +- .../shaders/eevee_shadow_tag_usage_lib.glsl | 2 +- .../draw/intern/shaders/common_aabb_lib.glsl | 7 +++ .../draw/intern/shaders/common_shape_lib.glsl | 22 +++++++ .../shaders/draw_view_finalize_comp.glsl | 18 +++--- .../gpu/shaders/metal/mtl_shader_defines.msl | 62 ++++++++++++++++--- .../shaders/opengl/glsl_shader_defines.glsl | 8 +++ 13 files changed, 127 insertions(+), 34 deletions(-) diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl index 1da741d7609..957c9b01a2a 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl @@ -590,7 +590,7 @@ void dof_gather_accumulator(sampler2D color_tx, * The full pixel neighborhood is gathered. * \{ */ -void dof_slight_focus_gather(sampler2D depth_tx, +void dof_slight_focus_gather(depth2D depth_tx, sampler2D color_tx, sampler2D bkh_lut_tx, /* Renamed because of ugly macro job. */ float radius, diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl index 479a6b590b0..cea25ef7ce0 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl @@ -62,7 +62,7 @@ void main() int mask_shift = 1; #define downsample_level(out_mip__, lod_) \ - active_thread = all(lessThan(local_px, gl_WorkGroupSize.xy >> uint(mask_shift))); \ + active_thread = all(lessThan(uvec2(local_px), gl_WorkGroupSize.xy >> uint(mask_shift))); \ barrier(); /* Wait for previous writes to finish. */ \ if (active_thread) { \ max_depth = max_v4(load_local_depths(local_px)); \ @@ -89,12 +89,12 @@ void main() } finished_tile_counter = 0u; - ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize * 2u)); + ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize.xy * 2u)); ivec2 image_border = imageSize(out_mip_5) - 1; for (int y = 0; y < iter.y; y++) { for (int x = 0; x < iter.x; x++) { /* Load result of the other work groups. */ - kernel_origin = ivec2(gl_WorkGroupSize) * ivec2(x, y); + kernel_origin = ivec2(gl_WorkGroupSize.xy) * ivec2(x, y); src_px = ivec2(kernel_origin + local_px) * 2; vec4 samp; samp.x = imageLoad(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x; diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl index 37705e22b22..6479f4f98ff 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl @@ -168,13 +168,14 @@ void main() } /* Fallthrough to the hemispheric case. */ case LIGHT_RECT: - case LIGHT_ELLIPSE: + case LIGHT_ELLIPSE: { vec3 v000 = vP - v_right * radius - v_up * radius; vec3 v100 = v000 + v_right * (radius * 2.0); vec3 v010 = v000 + v_up * (radius * 2.0); vec3 v001 = v000 - v_back * radius; Box bbox = shape_box(v000, v100, v010, v001); intersect_tile = intersect_tile && intersect(tile, bbox); + } break; default: break; } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl index 07139ea6a09..e365da53d2b 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl @@ -74,8 +74,10 @@ void main() vec4 max_motion = imageLoad(in_tiles_img, src_tile); - MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy, src_tile); - MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile); + MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy, + uvec2(src_tile)); + MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw, + uvec2(src_tile)); if (true) { /* Rectangular area (in tiles) where the motion vector spreads. */ MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.xy); @@ -85,17 +87,20 @@ void main() for (int y = 0; y < motion_rect.extent.y; y++) { ivec2 tile = motion_rect.bottom_left + ivec2(x, y); if (is_inside_motion_line(tile, motion_line)) { - motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv); + motion_blur_tile_indirection_store( + tile_indirection_buf, MOTION_PREV, uvec2(tile), payload_prv); /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in * the motion next so that weighting in gather pass is better. */ - motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt); + motion_blur_tile_indirection_store( + tile_indirection_buf, MOTION_NEXT, uvec2(tile), payload_nxt); } } } } if (true) { - MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile); + MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw, + uvec2(src_tile)); /* Rectangular area (in tiles) where the motion vector spreads. */ MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.zw); MotionLine motion_line = compute_motion_line(src_tile, max_motion.zw); @@ -104,10 +109,12 @@ void main() for (int y = 0; y < motion_rect.extent.y; y++) { ivec2 tile = motion_rect.bottom_left + ivec2(x, y); if (is_inside_motion_line(tile, motion_line)) { - motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt); + motion_blur_tile_indirection_store( + tile_indirection_buf, MOTION_NEXT, uvec2(tile), payload_nxt); /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in * the motion next so that weighting in gather pass is better. */ - motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv); + motion_blur_tile_indirection_store( + tile_indirection_buf, MOTION_PREV, uvec2(tile), payload_prv); } } } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl index 5249e6637b6..1408f28e585 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl @@ -178,10 +178,10 @@ void main() vec4 max_motion; /* Load dilation result from the indirection table. */ ivec2 tile_prev; - motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, tile, tile_prev); + motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, uvec2(tile), tile_prev); max_motion.xy = imageLoad(in_tiles_img, tile_prev).xy; ivec2 tile_next; - motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, tile, tile_next); + motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, uvec2(tile), tile_next); max_motion.zw = imageLoad(in_tiles_img, tile_next).zw; Accumulator accum; diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl index db38baab6a4..6d802a6d79a 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl @@ -242,13 +242,13 @@ void output_aov(vec4 color, float value, uint hash) #if defined(MAT_AOV_SUPPORT) && defined(GPU_FRAGMENT_SHADER) for (int i = 0; i < AOV_MAX && i < aov_buf.color_len; i++) { if (aov_buf.hash_color[i] == hash) { - imageStore(aov_color_img, ivec3(gl_FragCoord.xy, i), color); + imageStore(aov_color_img, ivec3(ivec2(gl_FragCoord.xy), i), color); return; } } for (int i = 0; i < AOV_MAX && i < aov_buf.value_len; i++) { if (aov_buf.hash_value[i] == hash) { - imageStore(aov_value_img, ivec3(gl_FragCoord.xy, i), vec4(value)); + imageStore(aov_value_img, ivec3(ivec2(gl_FragCoord.xy), i), vec4(value)); return; } } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_update_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_update_comp.glsl index 475d456db7a..6a780a91f76 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_update_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_update_comp.glsl @@ -87,7 +87,7 @@ void main() for (int y = box_min.y; y <= box_max.y; y++) { for (int x = box_min.x; x <= box_max.x; x++) { int tile_index = shadow_tile_offset(ivec2(x, y), tilemap.tiles_index, lod); - atomicOr(tiles_buf[tile_index], SHADOW_DO_UPDATE); + atomicOr(tiles_buf[tile_index], uint(SHADOW_DO_UPDATE)); } } } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_usage_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_usage_lib.glsl index bb18f56ec74..172fe9488f4 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_usage_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_usage_lib.glsl @@ -21,7 +21,7 @@ void shadow_tag_usage_tile(LightData light, ivec2 tile_co, int lod, int tilemap_ tile_co >>= lod; int tile_index = shadow_tile_offset(tile_co, tilemaps_buf[tilemap_index].tiles_index, lod); - atomicOr(tiles_buf[tile_index], SHADOW_IS_USED); + atomicOr(tiles_buf[tile_index], uint(SHADOW_IS_USED)); } void shadow_tag_usage_tilemap_directional(uint l_idx, vec3 P, vec3 V, float radius) diff --git a/source/blender/draw/intern/shaders/common_aabb_lib.glsl b/source/blender/draw/intern/shaders/common_aabb_lib.glsl index b5f664a6779..8564c6648ed 100644 --- a/source/blender/draw/intern/shaders/common_aabb_lib.glsl +++ b/source/blender/draw/intern/shaders/common_aabb_lib.glsl @@ -7,6 +7,13 @@ struct AABB { vec3 min, max; + +#ifdef GPU_METAL + inline AABB() = default; + inline AABB(vec3 _min, vec3 _max) : min(_min), max(_max) + { + } +#endif }; AABB aabb_init_min_max() diff --git a/source/blender/draw/intern/shaders/common_shape_lib.glsl b/source/blender/draw/intern/shaders/common_shape_lib.glsl index 56722c417aa..016ef944859 100644 --- a/source/blender/draw/intern/shaders/common_shape_lib.glsl +++ b/source/blender/draw/intern/shaders/common_shape_lib.glsl @@ -14,6 +14,13 @@ struct Circle { vec2 center; float radius; + +#ifdef GPU_METAL + inline Circle() = default; + inline Circle(vec2 in_center, float in_radius) : center(in_center), radius(in_radius) + { + } +#endif }; Circle shape_circle(vec2 center, float radius) @@ -30,6 +37,13 @@ Circle shape_circle(vec2 center, float radius) struct Sphere { vec3 center; float radius; + +#ifdef GPU_METAL + inline Sphere() = default; + inline Sphere(vec3 in_center, float in_radius) : center(in_center), radius(in_radius) + { + } +#endif }; Sphere shape_sphere(vec3 center, float radius) @@ -192,6 +206,14 @@ Frustum shape_frustum(vec3 corners[8]) struct Cone { vec3 direction; float angle_cos; + +#ifdef GPU_METAL + inline Cone() = default; + inline Cone(vec3 in_direction, float in_angle_cos) + : direction(in_direction), angle_cos(in_angle_cos) + { + } +#endif }; Cone shape_cone(vec3 direction, float angle_cosine) diff --git a/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl b/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl index f3af010a47c..c7917357b2c 100644 --- a/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl +++ b/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl @@ -33,7 +33,7 @@ void projmat_dimensions(mat4 winmat, } } -void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out vec4 corners[8]) +void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, DEVICE_OUT_ARRAY(vec4, corners, 8)) { float left, right, bottom, top, near, far; bool is_persp = winmat[3][3] == 0.0; @@ -68,12 +68,12 @@ void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out vec4 corners[8]) } void planes_from_projmat(mat4 mat, - out vec4 left, - out vec4 right, - out vec4 bottom, - out vec4 top, - out vec4 near, - out vec4 far) + DEVICE_OUT(vec4, left), + DEVICE_OUT(vec4, right), + DEVICE_OUT(vec4, bottom), + DEVICE_OUT(vec4, top), + DEVICE_OUT(vec4, near), + DEVICE_OUT(vec4, far)) { /* References: * @@ -89,7 +89,7 @@ void planes_from_projmat(mat4 mat, far = mat[3] - mat[2]; } -void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out vec4 planes[6]) +void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, DEVICE_OUT_ARRAY(vec4, planes, 6)) { mat4 persmat = winmat * viewmat; planes_from_projmat(persmat, planes[0], planes[5], planes[1], planes[3], planes[4], planes[2]); @@ -100,7 +100,7 @@ void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out vec4 planes[6]) } } -vec4 frustum_culling_sphere_calc(vec4 corners[8]) +vec4 frustum_culling_sphere_calc(device vec4 corners[8]) { /* Extract Bounding Sphere */ /* TODO(fclem): This is significantly less precise than CPU, but it isn't used in most cases. */ diff --git a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl index a192e51a0ec..94b7bae302a 100644 --- a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl +++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl @@ -101,10 +101,18 @@ struct constexp_uvec3 { return 0; } } - inline operator uint3() const + constexpr inline operator uint3() const { return xyz; } + constexpr inline operator uint2() const + { + return xy; + } + constexpr inline operator uint() const + { + return x; + } }; constexpr constexp_uvec3 __internal_workgroupsize_get() @@ -140,6 +148,10 @@ template T atomicSub(threadgroup T &mem, T data) { return atomic_fetch_sub_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); } +template T atomicAnd(threadgroup T &mem, T data) +{ + return atomic_fetch_and_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); +} template T atomicOr(threadgroup T &mem, T data) { return atomic_fetch_or_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); @@ -152,33 +164,48 @@ template T atomicXor(threadgroup T &mem, T data) /* Device memory. */ template T atomicMax(device T &mem, T data) { - return atomic_fetch_max_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); + return atomic_fetch_max_explicit((device _atomic *)&mem, data, memory_order_relaxed); } template T atomicMin(device T &mem, T data) { - return atomic_fetch_min_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); + return atomic_fetch_min_explicit((device _atomic *)&mem, data, memory_order_relaxed); } template T atomicAdd(device T &mem, T data) { - return atomic_fetch_add_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); + return atomic_fetch_add_explicit((device _atomic *)&mem, data, memory_order_relaxed); } template T atomicSub(device T &mem, T data) { - return atomic_fetch_sub_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); + return atomic_fetch_sub_explicit((device _atomic *)&mem, data, memory_order_relaxed); +} +template T atomicAnd(device T &mem, T data) +{ + return atomic_fetch_and_explicit((device _atomic *)&mem, data, memory_order_relaxed); } template T atomicOr(device T &mem, T data) { - return atomic_fetch_or_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); + return atomic_fetch_or_explicit((device _atomic *)&mem, data, memory_order_relaxed); } template T atomicXor(device T &mem, T data) { - return atomic_fetch_xor_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); + return atomic_fetch_xor_explicit((device _atomic *)&mem, data, memory_order_relaxed); } +/* Unblock texture atomic compilation. + * TODO(Metal): This is not correct for global atomic behaviour, but will be safe within a single thread. + * We need to re-visit the solution for this use-case and use a 2D texture buffer instead. */ +#define imageAtomicMin(tex, coord, data) \ + uint val = _texelFetch_internal(tex, coord, 0).r;\ + _texture_write_internal(tex, coord, uint4((val < data) ? val : data));\ + tex.texture->fence(); + /* Used to replace 'out' in function parameters with threadlocal reference * shortened to avoid expanding the glsl source string. */ #define THD thread #define OUT(type, name, array) thread type(&name)[array] +#define THREADGROUP_OUT_ARRAY(type, name, array) threadgroup type(&name)[array] +#define DEVICE_OUT_ARRAY(type, name, array) device type(&name)[array] +#define DEVICE_OUT(type, name) device type &name /* Generate wrapper structs for combined texture and sampler type. */ #ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS @@ -1126,6 +1153,27 @@ inline float4 uintBitsToFloat(uint4 f) return as_type(f); } +#define bitfieldReverse reverse_bits +#define bitfieldExtract extract_bits +#define bitfieldInsert insert_bits +#define bitCount popcount + +template T findLSB(T x) +{ + /* ctz returns the number of trailing zeroes. To fetch the index of the LSB, we can also use this + * value as index, however need to filter out the case where the input value is zero to match + * GLSL functionality. */ + return (x == T(0)) ? T(-1) : T(ctz(x)); +} + +template T findMSB(T x) +{ + /* clz returns the number of leading zeroes. To fetch the index of the LSB, we can also use this + * value as index when offset by 1. however need to filter out the case where the input value is + * zero to match GLSL functionality. 000000010*/ + return (x == T(0)) ? T(-1) : (clz(T(0)) - clz(x) - T(1)); +} + /* Texture size functions. Add texture types as needed. */ #define imageSize(image) textureSize(image, 0) diff --git a/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl b/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl index f2d972ea574..e8119c57d6c 100644 --- a/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl +++ b/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl @@ -15,6 +15,14 @@ #define depthCubeArray samplerCubeArray #define depth2DArrayShadow sampler2DArrayShadow +/* Memory scope and pass by reference types. + * NOTE: These are required by Metal, but are not required in all cases by GLSL. */ +#define device +#define threadgroup +#define OUT(type, name, array_len) out type name[array_len] +#define DEVICE_OUT_ARRAY(type, name, array_len) out type name[array_len] +#define DEVICE_OUT(type, name) out type + /* Backend Functions. */ #define select(A, B, mask) mix(A, B, mask) -- 2.30.2 From 39ceab2071adbe46468855dbb0c9b6f48d9bfe0f Mon Sep 17 00:00:00 2001 From: Michael Parkin-White Date: Mon, 17 Apr 2023 15:11:14 +0100 Subject: [PATCH 2/4] Address feedback. Remove Metal constructors and use struct member population for shapes. Remove DEVICE_OUT and DEVICE_OUT_ARRAY, refactor ViewCullingData to allow full struct copy to avoid any additional overhead from removing device pointer modification, as device data now copied into local variable for modification, before writing back to device memory. --- .../shaders/eevee_shadow_tag_update_comp.glsl | 2 +- .../workbench_shadow_visibility_comp.glsl | 2 +- .../blender/draw/intern/draw_shader_shared.h | 20 +++++- .../draw/intern/shaders/common_aabb_lib.glsl | 15 ++-- .../intern/shaders/common_intersect_lib.glsl | 17 +++-- .../draw/intern/shaders/common_shape_lib.glsl | 24 +++---- .../shaders/draw_view_finalize_comp.glsl | 72 +++++++++---------- .../intern/shaders/draw_visibility_comp.glsl | 5 +- .../gpu/shaders/metal/mtl_shader_defines.msl | 3 - .../shaders/opengl/glsl_shader_defines.glsl | 2 - 10 files changed, 85 insertions(+), 77 deletions(-) diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_update_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_update_comp.glsl index 6a780a91f76..9f9a4c88f9c 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_update_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_update_comp.glsl @@ -65,7 +65,7 @@ void main() } AABB aabb_tag; - AABB aabb_map = AABB(vec3(-0.99999), vec3(0.99999)); + AABB aabb_map = shape_aabb(vec3(-0.99999), vec3(0.99999)); /* Directionnal winmat have no correct near/far in the Z dimension at this point. * Do not clip in this dimension. */ diff --git a/source/blender/draw/engines/workbench/shaders/workbench_shadow_visibility_comp.glsl b/source/blender/draw/engines/workbench/shaders/workbench_shadow_visibility_comp.glsl index aef73672a8a..346e10d7083 100644 --- a/source/blender/draw/engines/workbench/shaders/workbench_shadow_visibility_comp.glsl +++ b/source/blender/draw/engines/workbench/shaders/workbench_shadow_visibility_comp.glsl @@ -44,7 +44,7 @@ bool is_visible(IsectBox box) bool intersects_near_plane(IsectBox box) { - vec4 near_plane = drw_view_culling.planes[4]; + vec4 near_plane = drw_view_culling.frustum_planes.planes[4]; bool on_positive_side = false; bool on_negative_side = false; diff --git a/source/blender/draw/intern/draw_shader_shared.h b/source/blender/draw/intern/draw_shader_shared.h index 28090ef2b46..22ea36c45d2 100644 --- a/source/blender/draw/intern/draw_shader_shared.h +++ b/source/blender/draw/intern/draw_shader_shared.h @@ -94,11 +94,27 @@ uint drw_view_id = 0; # define DRW_VIEW_FROM_RESOURCE_ID drw_view_id = (drw_ResourceID & DRW_VIEW_MASK) #endif +struct FrustumCorners { + float4 corners[8]; +}; +BLI_STATIC_ASSERT_ALIGN(FrustumCorners, 16) + +struct FrustumPlanes { + /* [0] left + * [1] right + * [2] bottom + * [3] top + * [4] near + * [5] far */ + float4 planes[6]; +}; +BLI_STATIC_ASSERT_ALIGN(FrustumPlanes, 16) + struct ViewCullingData { /** \note vec3 array padded to vec4. */ /** Frustum corners. */ - float4 corners[8]; - float4 planes[6]; + FrustumCorners frustum_corners; + FrustumPlanes frustum_planes; float4 bound_sphere; }; BLI_STATIC_ASSERT_ALIGN(ViewCullingData, 16) diff --git a/source/blender/draw/intern/shaders/common_aabb_lib.glsl b/source/blender/draw/intern/shaders/common_aabb_lib.glsl index 8564c6648ed..5adcdec4a3e 100644 --- a/source/blender/draw/intern/shaders/common_aabb_lib.glsl +++ b/source/blender/draw/intern/shaders/common_aabb_lib.glsl @@ -7,15 +7,16 @@ struct AABB { vec3 min, max; - -#ifdef GPU_METAL - inline AABB() = default; - inline AABB(vec3 _min, vec3 _max) : min(_min), max(_max) - { - } -#endif }; +AABB shape_aabb(vec3 min, vec3 max) +{ + AABB aabb; + aabb.min = min; + aabb.max = max; + return aabb; +} + AABB aabb_init_min_max() { AABB aabb; diff --git a/source/blender/draw/intern/shaders/common_intersect_lib.glsl b/source/blender/draw/intern/shaders/common_intersect_lib.glsl index e23216ec2e2..252298022e3 100644 --- a/source/blender/draw/intern/shaders/common_intersect_lib.glsl +++ b/source/blender/draw/intern/shaders/common_intersect_lib.glsl @@ -136,7 +136,7 @@ bool intersect_view(Pyramid pyramid) for (int p = 0; p < 6; ++p) { bool is_any_vertex_on_positive_side = false; for (int v = 0; v < 5; ++v) { - float test = dot(drw_view_culling.planes[p], vec4(pyramid.corners[v], 1.0)); + float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(pyramid.corners[v], 1.0)); if (test > 0.0) { is_any_vertex_on_positive_side = true; break; @@ -158,7 +158,8 @@ bool intersect_view(Pyramid pyramid) for (int p = 0; p < 5; ++p) { bool is_any_vertex_on_positive_side = false; for (int v = 0; v < 8; ++v) { - float test = dot(i_pyramid.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0)); + float test = dot(i_pyramid.planes[p], + vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0)); if (test > 0.0) { is_any_vertex_on_positive_side = true; break; @@ -181,7 +182,7 @@ bool intersect_view(Box box) for (int p = 0; p < 6; ++p) { bool is_any_vertex_on_positive_side = false; for (int v = 0; v < 8; ++v) { - float test = dot(drw_view_culling.planes[p], vec4(box.corners[v], 1.0)); + float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(box.corners[v], 1.0)); if (test > 0.0) { is_any_vertex_on_positive_side = true; break; @@ -203,7 +204,8 @@ bool intersect_view(Box box) for (int p = 0; p < 6; ++p) { bool is_any_vertex_on_positive_side = false; for (int v = 0; v < 8; ++v) { - float test = dot(i_box.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0)); + float test = dot(i_box.planes[p], + vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0)); if (test > 0.0) { is_any_vertex_on_positive_side = true; break; @@ -227,7 +229,7 @@ bool intersect_view(IsectBox i_box) for (int p = 0; p < 6; ++p) { bool is_any_vertex_on_positive_side = false; for (int v = 0; v < 8; ++v) { - float test = dot(drw_view_culling.planes[p], vec4(i_box.corners[v], 1.0)); + float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(i_box.corners[v], 1.0)); if (test > 0.0) { is_any_vertex_on_positive_side = true; break; @@ -247,7 +249,8 @@ bool intersect_view(IsectBox i_box) for (int p = 0; p < 6; ++p) { bool is_any_vertex_on_positive_side = false; for (int v = 0; v < 8; ++v) { - float test = dot(i_box.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0)); + float test = dot(i_box.planes[p], + vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0)); if (test > 0.0) { is_any_vertex_on_positive_side = true; break; @@ -268,7 +271,7 @@ bool intersect_view(Sphere sphere) bool intersects = true; for (int p = 0; p < 6 && intersects; ++p) { - float dist_to_plane = dot(drw_view_culling.planes[p], vec4(sphere.center, 1.0)); + float dist_to_plane = dot(drw_view_culling.frustum_planes.planes[p], vec4(sphere.center, 1.0)); if (dist_to_plane < -sphere.radius) { intersects = false; } diff --git a/source/blender/draw/intern/shaders/common_shape_lib.glsl b/source/blender/draw/intern/shaders/common_shape_lib.glsl index 016ef944859..25a2781d729 100644 --- a/source/blender/draw/intern/shaders/common_shape_lib.glsl +++ b/source/blender/draw/intern/shaders/common_shape_lib.glsl @@ -14,18 +14,14 @@ struct Circle { vec2 center; float radius; - -#ifdef GPU_METAL - inline Circle() = default; - inline Circle(vec2 in_center, float in_radius) : center(in_center), radius(in_radius) - { - } -#endif }; Circle shape_circle(vec2 center, float radius) { - return Circle(center, radius); + Circle circle; + circle.center = center; + circle.radius = radius; + return circle; } /** \} */ @@ -37,18 +33,14 @@ Circle shape_circle(vec2 center, float radius) struct Sphere { vec3 center; float radius; - -#ifdef GPU_METAL - inline Sphere() = default; - inline Sphere(vec3 in_center, float in_radius) : center(in_center), radius(in_radius) - { - } -#endif }; Sphere shape_sphere(vec3 center, float radius) { - return Sphere(center, radius); + Sphere sphere; + sphere.center = center; + sphere.radius = radius; + return sphere; } /** \} */ diff --git a/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl b/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl index c7917357b2c..6fc34af815d 100644 --- a/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl +++ b/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl @@ -33,18 +33,19 @@ void projmat_dimensions(mat4 winmat, } } -void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, DEVICE_OUT_ARRAY(vec4, corners, 8)) +void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out FrustumCorners frustum_corners) { float left, right, bottom, top, near, far; bool is_persp = winmat[3][3] == 0.0; projmat_dimensions(winmat, left, right, bottom, top, near, far); - corners[0][2] = corners[3][2] = corners[7][2] = corners[4][2] = -near; - corners[0][0] = corners[3][0] = left; - corners[4][0] = corners[7][0] = right; - corners[0][1] = corners[4][1] = bottom; - corners[7][1] = corners[3][1] = top; + frustum_corners.corners[0][2] = frustum_corners.corners[3][2] = frustum_corners.corners[7][2] = + frustum_corners.corners[4][2] = -near; + frustum_corners.corners[0][0] = frustum_corners.corners[3][0] = left; + frustum_corners.corners[4][0] = frustum_corners.corners[7][0] = right; + frustum_corners.corners[0][1] = frustum_corners.corners[4][1] = bottom; + frustum_corners.corners[7][1] = frustum_corners.corners[3][1] = top; /* Get the coordinates of the far plane. */ if (is_persp) { @@ -55,25 +56,20 @@ void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, DEVICE_OUT_ARRAY(vec4, cor top *= sca_far; } - corners[1][2] = corners[2][2] = corners[6][2] = corners[5][2] = -far; - corners[1][0] = corners[2][0] = left; - corners[6][0] = corners[5][0] = right; - corners[1][1] = corners[5][1] = bottom; - corners[2][1] = corners[6][1] = top; + frustum_corners.corners[1][2] = frustum_corners.corners[2][2] = frustum_corners.corners[6][2] = + frustum_corners.corners[5][2] = -far; + frustum_corners.corners[1][0] = frustum_corners.corners[2][0] = left; + frustum_corners.corners[6][0] = frustum_corners.corners[5][0] = right; + frustum_corners.corners[1][1] = frustum_corners.corners[5][1] = bottom; + frustum_corners.corners[2][1] = frustum_corners.corners[6][1] = top; /* Transform into world space. */ for (int i = 0; i < 8; i++) { - corners[i].xyz = transform_point(viewinv, corners[i].xyz); + frustum_corners.corners[i].xyz = transform_point(viewinv, frustum_corners.corners[i].xyz); } } -void planes_from_projmat(mat4 mat, - DEVICE_OUT(vec4, left), - DEVICE_OUT(vec4, right), - DEVICE_OUT(vec4, bottom), - DEVICE_OUT(vec4, top), - DEVICE_OUT(vec4, near), - DEVICE_OUT(vec4, far)) +void planes_from_projmat(mat4 mat, out FrustumPlanes frustum_planes) { /* References: * @@ -81,35 +77,35 @@ void planes_from_projmat(mat4 mat, * http://www8.cs.umu.se/kurser/5DV051/HT12/lab/plane_extraction.pdf */ mat = transpose(mat); - left = mat[3] + mat[0]; - right = mat[3] - mat[0]; - bottom = mat[3] + mat[1]; - top = mat[3] - mat[1]; - near = mat[3] + mat[2]; - far = mat[3] - mat[2]; + frustum_planes.planes[0] = mat[3] + mat[0]; + frustum_planes.planes[1] = mat[3] - mat[0]; + frustum_planes.planes[2] = mat[3] + mat[1]; + frustum_planes.planes[3] = mat[3] - mat[1]; + frustum_planes.planes[4] = mat[3] + mat[2]; + frustum_planes.planes[5] = mat[3] - mat[2]; } -void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, DEVICE_OUT_ARRAY(vec4, planes, 6)) +void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out FrustumPlanes frustum_planes) { mat4 persmat = winmat * viewmat; - planes_from_projmat(persmat, planes[0], planes[5], planes[1], planes[3], planes[4], planes[2]); + planes_from_projmat(persmat, frustum_planes); /* Normalize. */ for (int p = 0; p < 6; p++) { - planes[p] /= length(planes[p].xyz); + frustum_planes.planes[p] /= length(frustum_planes.planes[p].xyz); } } -vec4 frustum_culling_sphere_calc(device vec4 corners[8]) +vec4 frustum_culling_sphere_calc(FrustumCorners frustum_corners) { /* Extract Bounding Sphere */ /* TODO(fclem): This is significantly less precise than CPU, but it isn't used in most cases. */ vec4 bsphere; - bsphere.xyz = (corners[0].xyz + corners[6].xyz) * 0.5; + bsphere.xyz = (frustum_corners.corners[0].xyz + frustum_corners.corners[6].xyz) * 0.5; bsphere.w = 0.0; for (int i = 0; i < 8; i++) { - bsphere.w = max(bsphere.w, distance(bsphere.xyz, corners[i].xyz)); + bsphere.w = max(bsphere.w, distance(bsphere.xyz, frustum_corners.corners[i].xyz)); } return bsphere; } @@ -125,11 +121,15 @@ void main() return; } - frustum_boundbox_calc(drw_view.winmat, drw_view.viewinv, view_culling_buf[drw_view_id].corners); + /* Read frustom_corners from device memory, update, and write back. */ + FrustumCorners frustum_corners = view_culling_buf[drw_view_id].frustum_corners; + frustum_boundbox_calc(drw_view.winmat, drw_view.viewinv, frustum_corners); + view_culling_buf[drw_view_id].frustum_corners = frustum_corners; - frustum_culling_planes_calc( - drw_view.winmat, drw_view.viewmat, view_culling_buf[drw_view_id].planes); + /* Read frustum_planes from device memory, update, and write back. */ + FrustumPlanes frustum_planes = view_culling_buf[drw_view_id].frustum_planes; + frustum_culling_planes_calc(drw_view.winmat, drw_view.viewmat, frustum_planes); - view_culling_buf[drw_view_id].bound_sphere = frustum_culling_sphere_calc( - view_culling_buf[drw_view_id].corners); + view_culling_buf[drw_view_id].frustum_planes = frustum_planes; + view_culling_buf[drw_view_id].bound_sphere = frustum_culling_sphere_calc(frustum_corners); } diff --git a/source/blender/draw/intern/shaders/draw_visibility_comp.glsl b/source/blender/draw/intern/shaders/draw_visibility_comp.glsl index f3ca51dbf6b..0d2717aea64 100644 --- a/source/blender/draw/intern/shaders/draw_visibility_comp.glsl +++ b/source/blender/draw/intern/shaders/draw_visibility_comp.glsl @@ -34,8 +34,9 @@ void main() bounds.bounding_corners[1].xyz, bounds.bounding_corners[2].xyz, bounds.bounding_corners[3].xyz); - Sphere bounding_sphere = Sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w); - Sphere inscribed_sphere = Sphere(bounds.bounding_sphere.xyz, bounds._inner_sphere_radius); + Sphere bounding_sphere = shape_sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w); + Sphere inscribed_sphere = shape_sphere(bounds.bounding_sphere.xyz, + bounds._inner_sphere_radius); for (drw_view_id = 0; drw_view_id < view_len; drw_view_id++) { if (drw_view_culling.bound_sphere.w == -1.0) { diff --git a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl index 94b7bae302a..dfb74a3e76b 100644 --- a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl +++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl @@ -203,9 +203,6 @@ template T atomicXor(device T &mem, T data) * shortened to avoid expanding the glsl source string. */ #define THD thread #define OUT(type, name, array) thread type(&name)[array] -#define THREADGROUP_OUT_ARRAY(type, name, array) threadgroup type(&name)[array] -#define DEVICE_OUT_ARRAY(type, name, array) device type(&name)[array] -#define DEVICE_OUT(type, name) device type &name /* Generate wrapper structs for combined texture and sampler type. */ #ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS diff --git a/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl b/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl index e8119c57d6c..eb09f580391 100644 --- a/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl +++ b/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl @@ -20,8 +20,6 @@ #define device #define threadgroup #define OUT(type, name, array_len) out type name[array_len] -#define DEVICE_OUT_ARRAY(type, name, array_len) out type name[array_len] -#define DEVICE_OUT(type, name) out type /* Backend Functions. */ #define select(A, B, mask) mix(A, B, mask) -- 2.30.2 From d647aa9791fc03b25a3735e22d9474f34b5df0dc Mon Sep 17 00:00:00 2001 From: Michael Parkin-White Date: Mon, 17 Apr 2023 16:50:48 +0100 Subject: [PATCH 3/4] Move break inside code block --- .../eevee_next/shaders/eevee_light_culling_tile_comp.glsl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl index 6479f4f98ff..1f012a44acf 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl @@ -175,7 +175,8 @@ void main() vec3 v001 = v000 - v_back * radius; Box bbox = shape_box(v000, v100, v010, v001); intersect_tile = intersect_tile && intersect(tile, bbox); - } break; + break; + } default: break; } -- 2.30.2 From 6aa0011068045c457006b43fdd38088bc255fb46 Mon Sep 17 00:00:00 2001 From: Michael Parkin-White Date: Mon, 17 Apr 2023 21:59:16 +0100 Subject: [PATCH 4/4] Host changes for ViewCullingData refactor --- .../blender/draw/intern/draw_shader_shared.h | 2 ++ source/blender/draw/intern/draw_view.cc | 20 ++++++++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/source/blender/draw/intern/draw_shader_shared.h b/source/blender/draw/intern/draw_shader_shared.h index 22ea36c45d2..3ad1e11df28 100644 --- a/source/blender/draw/intern/draw_shader_shared.h +++ b/source/blender/draw/intern/draw_shader_shared.h @@ -21,6 +21,8 @@ typedef struct DispatchCommand DispatchCommand; typedef struct DRWDebugPrintBuffer DRWDebugPrintBuffer; typedef struct DRWDebugVert DRWDebugVert; typedef struct DRWDebugDrawBuffer DRWDebugDrawBuffer; +typedef struct FrustumCorners FrustumCorners; +typedef struct FrustumPlanes FrustumPlanes; /* __cplusplus is true when compiling with MSL. */ # if defined(__cplusplus) && !defined(GPU_SHADER) diff --git a/source/blender/draw/intern/draw_view.cc b/source/blender/draw/intern/draw_view.cc index 30417ff6420..c6b7ac11017 100644 --- a/source/blender/draw/intern/draw_view.cc +++ b/source/blender/draw/intern/draw_view.cc @@ -50,7 +50,8 @@ void View::frustum_boundbox_calc(int view_id) } #endif - MutableSpan corners = {culling_[view_id].corners, ARRAY_SIZE(culling_[view_id].corners)}; + MutableSpan corners = {culling_[view_id].frustum_corners.corners, + ARRAY_SIZE(culling_[view_id].frustum_corners.corners)}; float left, right, bottom, top, near, far; bool is_persp = data_[view_id].winmat[3][3] == 0.0f; @@ -89,15 +90,15 @@ void View::frustum_culling_planes_calc(int view_id) { float4x4 persmat = data_[view_id].winmat * data_[view_id].viewmat; planes_from_projmat(persmat.ptr(), - culling_[view_id].planes[0], - culling_[view_id].planes[5], - culling_[view_id].planes[1], - culling_[view_id].planes[3], - culling_[view_id].planes[4], - culling_[view_id].planes[2]); + culling_[view_id].frustum_planes.planes[0], + culling_[view_id].frustum_planes.planes[5], + culling_[view_id].frustum_planes.planes[1], + culling_[view_id].frustum_planes.planes[3], + culling_[view_id].frustum_planes.planes[4], + culling_[view_id].frustum_planes.planes[2]); /* Normalize. */ - for (float4 &plane : culling_[view_id].planes) { + for (float4 &plane : culling_[view_id].frustum_planes.planes) { plane.w /= normalize_v3(plane); } } @@ -105,7 +106,8 @@ void View::frustum_culling_planes_calc(int view_id) void View::frustum_culling_sphere_calc(int view_id) { BoundSphere &bsphere = *reinterpret_cast(&culling_[view_id].bound_sphere); - Span corners = {culling_[view_id].corners, ARRAY_SIZE(culling_[view_id].corners)}; + Span corners = {culling_[view_id].frustum_corners.corners, + ARRAY_SIZE(culling_[view_id].frustum_corners.corners)}; /* Extract Bounding Sphere */ if (data_[view_id].winmat[3][3] != 0.0f) { -- 2.30.2