EEVEE-Next: Shadow Rendering Refactor #110979

Merged
Clément Foucault merged 34 commits from fclem/blender:eevee-next-shadow-opti into main 2023-08-17 17:35:25 +02:00
25 changed files with 666 additions and 445 deletions

View File

@ -56,16 +56,20 @@
SHADOW_TILEMAP_LOD3_LEN + SHADOW_TILEMAP_LOD4_LEN + SHADOW_TILEMAP_LOD5_LEN)
#define SHADOW_PAGE_CLEAR_GROUP_SIZE 32
#define SHADOW_PAGE_RES 256
#define SHADOW_PAGE_LOD 8 /* LOG2(SHADOW_PAGE_RES) */
#define SHADOW_DEPTH_SCAN_GROUP_SIZE 8
#define SHADOW_AABB_TAG_GROUP_SIZE 64
#define SHADOW_MAX_TILEMAP 4096
#define SHADOW_MAX_TILE (SHADOW_MAX_TILEMAP * SHADOW_TILEDATA_PER_TILEMAP)
#define SHADOW_MAX_PAGE 4096
#define SHADOW_PAGE_PER_ROW 64
#define SHADOW_ATLAS_SLOT 5
#define SHADOW_BOUNDS_GROUP_SIZE 64
#define SHADOW_CLIPMAP_GROUP_SIZE 64
#define SHADOW_VIEW_MAX 64 /* Must match DRW_VIEW_MAX. */
#define SHADOW_RENDER_MAP_SIZE (SHADOW_VIEW_MAX * SHADOW_TILEMAP_LOD0_LEN)
#define SHADOW_ATOMIC 1
#define SHADOW_PAGE_PER_ROW 4
#define SHADOW_PAGE_PER_COL 4
#define SHADOW_PAGE_PER_LAYER (SHADOW_PAGE_PER_ROW * SHADOW_PAGE_PER_COL)
/* Ray-tracing. */
#define RAYTRACE_GROUP_SIZE 8
@ -131,8 +135,6 @@
#define REFLECTION_PROBE_TEX_SLOT 8
#define VOLUME_SCATTERING_TEX_SLOT 9
#define VOLUME_TRANSMITTANCE_TEX_SLOT 10
/* Only during shadow rendering. */
#define SHADOW_RENDER_MAP_SLOT 4
/* Images. */
#define RBUFS_COLOR_SLOT 0
@ -145,6 +147,8 @@
#define VOLUME_PROP_EXTINCTION_IMG_SLOT 1
#define VOLUME_PROP_EMISSION_IMG_SLOT 2
#define VOLUME_PROP_PHASE_IMG_SLOT 3
/* Only during shadow rendering. */
#define SHADOW_ATLAS_IMG_SLOT 4
/* Uniform Buffers. */
/* Slot 0 is GPU_NODE_TREE_UBO_SLOT. */
@ -157,6 +161,7 @@
#define VOLUMES_INFO_BUF_SLOT 6
/* SLOT 6 is used by render shaders (Film, DoF and Motion Blur). Need to check if it should be
* assigned a different slot. */
/* TODO(fclem): This is above the limit of slot 6 for engines. Keep it lower by merging others. */
#define REFLECTION_PROBE_BUF_SLOT 7
/* Only during pre-pass. */
#define VELOCITY_CAMERA_PREV_BUF 3
@ -169,14 +174,16 @@
#define LIGHT_ZBIN_BUF_SLOT 2
#define LIGHT_TILE_BUF_SLOT 3
#define IRRADIANCE_BRICK_BUF_SLOT 4
#define SAMPLING_BUF_SLOT 6
#define CRYPTOMATTE_BUF_SLOT 7
/* Only during surface capture. */
#define SURFEL_BUF_SLOT 4
/* Only during surface capture. */
#define CAPTURE_BUF_SLOT 5
/* Only during shadow rendering. */
#define SHADOW_RENDER_MAP_BUF_SLOT 3
#define SHADOW_PAGE_INFO_SLOT 4
#define SAMPLING_BUF_SLOT 6
#define CRYPTOMATTE_BUF_SLOT 7
#define SHADOW_VIEWPORT_INDEX_BUF_SLOT 5
/* Only during pre-pass. */
#define VELOCITY_OBJ_PREV_BUF_SLOT 0

View File

@ -149,17 +149,14 @@ void WorldVolumePipeline::render(View &view)
void ShadowPipeline::sync()
{
surface_ps_.init();
/* TODO(fclem): Add state for rendering to empty framebuffer without depth test.
* For now this is only here for avoiding the rasterizer discard state. */
surface_ps_.state_set(DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS);
surface_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
surface_ps_.bind_texture(SHADOW_RENDER_MAP_SLOT, &inst_.shadows.render_map_tx_);
surface_ps_.bind_image(SHADOW_ATLAS_SLOT, &inst_.shadows.atlas_tx_);
surface_ps_.bind_image(SHADOW_ATLAS_IMG_SLOT, inst_.shadows.atlas_tx_);
surface_ps_.bind_ubo(CAMERA_BUF_SLOT, inst_.camera.ubo_get());
surface_ps_.bind_ssbo(SHADOW_RENDER_MAP_BUF_SLOT, &inst_.shadows.render_map_buf_);
surface_ps_.bind_ssbo(SHADOW_VIEWPORT_INDEX_BUF_SLOT, &inst_.shadows.viewport_index_buf_);
surface_ps_.bind_ssbo(SHADOW_PAGE_INFO_SLOT, &inst_.shadows.pages_infos_data_);
inst_.sampling.bind_resources(&surface_ps_);
surface_ps_.framebuffer_set(&inst_.shadows.render_fb_);
}
PassMain::Sub *ShadowPipeline::surface_material_add(GPUMaterial *gpumat)

View File

@ -797,7 +797,9 @@ static inline int2 shadow_cascade_grid_offset(int2 base_offset, int level_relati
*/
struct ShadowTileMapData {
/** Cached, used for rendering. */
float4x4 viewmat, winmat;
float4x4 viewmat;
/** Precomputed matrix, not used for rendering but for tagging. */
float4x4 winmat;
/** Punctual : Corners of the frustum. (vec3 padded to vec4) */
float4 corners[4];
/** Integer offset of the center of the 16x16 tiles from the origin of the tile space. */
@ -812,6 +814,16 @@ struct ShadowTileMapData {
int clip_data_index;
/** Bias LOD to tag for usage to lower the amount of tile used. */
float lod_bias;
int _pad0;
int _pad1;
int _pad2;
/** Near and far clip distances for punctual. */
float clip_near;
float clip_far;
/** Half of the tilemap size in world units. Used to compute directional window matrix. */
float half_size;
/** Offset in local space to the tilemap center in world units. Used for directional winmat. */
float2 center_offset;
};
BLI_STATIC_ASSERT_ALIGN(ShadowTileMapData, 16)
@ -823,6 +835,7 @@ struct ShadowTileMapClip {
float clip_near_stored;
float clip_far_stored;
/** Near and far clip distances for directional. Float stored as int for atomic operations. */
/** NOTE: These are positive just like camera parameters. */
int clip_near;
int clip_far;
};
@ -839,12 +852,10 @@ struct ShadowPagesInfoData {
uint page_cached_start;
/** Index of the last page in the buffer since the last defrag. */
uint page_cached_end;
/** Number of views to be rendered during the shadow update pass. */
int view_count;
/** Physical page size in pixel. Pages are all squares. */
int page_size;
int _pad0;
int _pad1;
int _pad2;
};
BLI_STATIC_ASSERT_ALIGN(ShadowPagesInfoData, 16)
@ -854,13 +865,17 @@ struct ShadowStatistics {
int page_update_count;
int page_allocated_count;
int page_rendered_count;
int view_needed_count;
int _pad0;
int _pad1;
int _pad2;
};
BLI_STATIC_ASSERT_ALIGN(ShadowStatistics, 16)
/** Decoded tile data structure. */
fclem marked this conversation as resolved Outdated

Possible uint3 alignment issue sneaking in. Not sure if these are also host-resident, in which case may need to use uint3_packed

Possible uint3 alignment issue sneaking in. Not sure if these are also host-resident, in which case may need to use uint3_packed

These are not part of a buffer, it is only decoded data that are stored as one uint in the SSBOs. So I think it is safe as is.

These are not part of a buffer, it is only decoded data that are stored as one uint in the SSBOs. So I think it is safe as is.

Ah no worries then :) Thanks!

Ah no worries then :) Thanks!

Ah no worries then :) Thanks!

Ah no worries then :) Thanks!
struct ShadowTileData {
/** Page inside the virtual shadow map atlas. */
uint2 page;
uint3 page;
/** Page index inside pages_cached_buf. Only valid if `is_cached` is true. */
uint cache_index;
/** LOD pointed to LOD 0 tile page. (cube-map only). */
@ -888,12 +903,29 @@ enum eShadowFlag : uint32_t {
SHADOW_IS_USED = (1u << 31u)
};
static inline uint shadow_page_pack(uint3 page)
{
/* NOTE: Trust the input to be in valid range.
* But sometime this is used to encode invalid pages uint3(-1) and it needs to output uint(-1).
*/
return (page.x << 0u) | (page.y << 2u) | (page.z << 4u);
}
static inline uint3 shadow_page_unpack(uint data)
{
uint3 page;
/* Tweaked for SHADOW_PAGE_PER_ROW = 4. */
page.x = data & uint(SHADOW_PAGE_PER_ROW - 1);
page.y = (data >> 2u) & uint(SHADOW_PAGE_PER_COL - 1);
page.z = (data >> 4u);
return page;
}
static inline ShadowTileData shadow_tile_unpack(ShadowTileDataPacked data)
{
ShadowTileData tile;
/* Tweaked for SHADOW_PAGE_PER_ROW = 64. */
tile.page.x = data & 63u;
tile.page.y = (data >> 6u) & 63u;
/* Tweaked for SHADOW_MAX_PAGE = 4096. */
tile.page = shadow_page_unpack(data & uint(SHADOW_MAX_PAGE - 1));
/* -- 12 bits -- */
/* Tweaked for SHADOW_TILEMAP_LOD < 8. */
tile.lod = (data >> 12u) & 7u;
@ -911,9 +943,7 @@ static inline ShadowTileData shadow_tile_unpack(ShadowTileDataPacked data)
static inline ShadowTileDataPacked shadow_tile_pack(ShadowTileData tile)
{
uint data;
data = (tile.page.x & 63u);
data |= (tile.page.y & 63u) << 6u;
uint data = shadow_page_pack(tile.page) & uint(SHADOW_MAX_PAGE - 1);
data |= (tile.lod & 7u) << 12u;
data |= (tile.cache_index & 4095u) << 15u;
data |= (tile.is_used ? uint(SHADOW_IS_USED) : 0);

View File

@ -54,13 +54,13 @@ void ShadowTileMap::sync_orthographic(const float4x4 &object_mat_,
* inverse in this particular case. */
viewmat = math::transpose(object_mat);
float half_size = ShadowDirectional::coverage_get(level) / 2.0f;
float2 win_offset = float2(grid_offset) * tile_size;
half_size = ShadowDirectional::coverage_get(level) / 2.0f;
center_offset = float2(grid_offset) * tile_size;
orthographic_m4(winmat.ptr(),
-half_size + win_offset.x,
half_size + win_offset.x,
-half_size + win_offset.y,
half_size + win_offset.y,
-half_size + center_offset.x,
half_size + center_offset.x,
-half_size + center_offset.y,
half_size + center_offset.y,
/* Near/far is computed on GPU using casters bounds. */
-1.0,
1.0);
@ -69,15 +69,15 @@ void ShadowTileMap::sync_orthographic(const float4x4 &object_mat_,
void ShadowTileMap::sync_cubeface(
const float4x4 &object_mat_, float near_, float far_, eCubeFace face, float lod_bias_)
{
if (projection_type != SHADOW_PROJECTION_CUBEFACE || (cubeface != face) || (near != near_) ||
(far != far_))
if (projection_type != SHADOW_PROJECTION_CUBEFACE || (cubeface != face) ||
(clip_near != near_) || (clip_far != far_))
{
set_dirty();
}
projection_type = SHADOW_PROJECTION_CUBEFACE;
cubeface = face;
near = near_;
far = far_;
clip_near = near_;
clip_far = far_;
lod_bias = lod_bias_;
grid_offset = int2(0);
@ -86,11 +86,13 @@ void ShadowTileMap::sync_cubeface(
set_dirty();
}
perspective_m4(winmat.ptr(), -near, near, -near, near, near, far);
winmat = math::projection::perspective(
-clip_near, clip_near, -clip_near, clip_near, clip_near, clip_far);
viewmat = float4x4(shadow_face_mat[cubeface]) * math::invert(object_mat);
/* Update corners. */
float4x4 viewinv = object_mat;
float far = clip_far;
corners[0] = float4(viewinv.location(), 0.0f);
corners[1] = float4(math::transform_point(viewinv, float3(-far, -far, -far)), 0.0f);
corners[2] = float4(math::transform_point(viewinv, float3(far, -far, -far)), 0.0f);
@ -605,8 +607,8 @@ void ShadowDirectional::end_sync(Light &light, const Camera &camera, float lod_b
}
light.tilemap_index = tilemap_pool.tilemaps_data.size();
light.clip_near = int(0xFF7FFFFFu ^ 0x7FFFFFFFu); /* floatBitsToOrderedInt(-FLT_MAX) */
light.clip_far = 0x7F7FFFFF; /* floatBitsToOrderedInt(FLT_MAX) */
light.clip_near = 0x7F7FFFFF; /* floatBitsToOrderedInt(FLT_MAX) */
light.clip_far = int(0xFF7FFFFFu ^ 0x7FFFFFFFu); /* floatBitsToOrderedInt(-FLT_MAX) */
if (directional_distribution_type_get(camera) == SHADOW_PROJECTION_CASCADE) {
cascade_tilemaps_distribution(light, camera);
@ -644,8 +646,11 @@ void ShadowModule::init()
}
}
int pool_size = enabled_ ? scene.eevee.shadow_pool_size : 0;
shadow_page_len_ = clamp_i(pool_size * 4, SHADOW_PAGE_PER_ROW, SHADOW_MAX_PAGE);
/* Pool size is in MBytes. */
const size_t pool_byte_size = enabled_ ? scene.eevee.shadow_pool_size * square_i(1024) : 1;
const size_t page_byte_size = square_i(shadow_page_size_) * sizeof(int);
shadow_page_len_ = int(divide_ceil_ul(pool_byte_size, page_byte_size));
shadow_page_len_ = min_ii(shadow_page_len_, SHADOW_MAX_PAGE);
float simplify_shadows = 1.0f;
if (scene.r.mode & R_SIMPLIFY) {
@ -654,18 +659,18 @@ void ShadowModule::init()
}
lod_bias_ = math::interpolate(float(SHADOW_TILEMAP_LOD), 0.0f, simplify_shadows);
int2 atlas_extent = shadow_page_size_ *
int2(SHADOW_PAGE_PER_ROW, shadow_page_len_ / SHADOW_PAGE_PER_ROW);
const int2 atlas_extent = shadow_page_size_ * int2(SHADOW_PAGE_PER_ROW);
const int atlas_layers = divide_ceil_u(shadow_page_len_, SHADOW_PAGE_PER_LAYER);
eGPUTextureUsage tex_usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE;
if (atlas_tx_.ensure_2d(atlas_type, atlas_extent, tex_usage)) {
if (atlas_tx_.ensure_2d_array(atlas_type, atlas_extent, atlas_layers, tex_usage)) {
/* Global update. */
do_full_update = true;
}
/* Make allocation safe. Avoids crash later on. */
if (!atlas_tx_.is_valid()) {
atlas_tx_.ensure_2d(atlas_type, int2(1));
atlas_tx_.ensure_2d_array(atlas_type, int2(1), 1);
inst_.info = "Error: Could not allocate shadow atlas. Most likely out of GPU memory.";
}
@ -689,11 +694,24 @@ void ShadowModule::init()
<< stats.page_used_count << " / " << shadow_page_len_ << ")\n";
inst_.info = ss.str();
}
if (stats.view_needed_count > SHADOW_VIEW_MAX && enabled_) {
std::stringstream ss;
ss << "Error: Too many shadow updates, some shadow might be incorrect.\n";
inst_.info = ss.str();
}
}
atlas_tx_.filter_mode(false);
render_map_tx_.ensure_mip_views();
/* Create different viewport to support different update region size. The most fitting viewport
* is then selected during the tilemap finalize stage in `viewport_select`. */
for (int i = 0; i < multi_viewports_.size(); i++) {
int size_in_tile = min_ii(1 << i, SHADOW_TILEMAP_RES);
multi_viewports_[i][0] = 0;
multi_viewports_[i][1] = 0;
multi_viewports_[i][2] = size_in_tile * shadow_page_size_;
multi_viewports_[i][3] = size_in_tile * shadow_page_size_;
}
}
void ShadowModule::begin_sync()
@ -856,13 +874,19 @@ void ShadowModule::end_sync()
do_full_update = false;
/* Put all pages in the free heap. */
for (uint i : IndexRange(shadow_page_len_)) {
uint2 page = {i % SHADOW_PAGE_PER_ROW, i / SHADOW_PAGE_PER_ROW};
pages_free_data_[i] = page.x | (page.y << 16u);
uint3 page = {i % SHADOW_PAGE_PER_ROW,
(i / SHADOW_PAGE_PER_ROW) % SHADOW_PAGE_PER_COL,
i / SHADOW_PAGE_PER_LAYER};
pages_free_data_[i] = shadow_page_pack(page);
}
for (uint i : IndexRange(shadow_page_len_, SHADOW_MAX_PAGE - shadow_page_len_)) {
pages_free_data_[i] = 0xFFFFFFFFu;
}
pages_free_data_.push_update();
/* Clear tiles to not reference any page. */
tilemap_pool.tiles_data.clear_to_zero();
tilemap_pool.tilemaps_clip.clear_to_zero();
/* Clear cached page buffer. */
GPU_storagebuf_clear(pages_cached_data_, -1);
@ -873,7 +897,6 @@ void ShadowModule::end_sync()
pages_infos_data_.page_cached_next = 0u;
pages_infos_data_.page_cached_start = 0u;
pages_infos_data_.page_cached_end = 0u;
pages_infos_data_.page_size = shadow_page_size_;
pages_infos_data_.push_update();
}
@ -1012,15 +1035,11 @@ void ShadowModule::end_sync()
sub.bind_ssbo("view_infos_buf", &shadow_multi_view_.matrices_ubo_get());
sub.bind_ssbo("statistics_buf", statistics_buf_.current());
sub.bind_ssbo("clear_dispatch_buf", clear_dispatch_buf_);
sub.bind_ssbo("clear_page_buf", clear_page_buf_);
sub.bind_ssbo("clear_list_buf", clear_list_buf_);
sub.bind_ssbo("render_map_buf", render_map_buf_);
sub.bind_ssbo("viewport_index_buf", viewport_index_buf_);
sub.bind_ssbo("pages_infos_buf", pages_infos_data_);
sub.bind_image("tilemaps_img", tilemap_pool.tilemap_tx);
sub.bind_image("render_map_lod0_img", render_map_tx_.mip_view(0));
sub.bind_image("render_map_lod1_img", render_map_tx_.mip_view(1));
sub.bind_image("render_map_lod2_img", render_map_tx_.mip_view(2));
sub.bind_image("render_map_lod3_img", render_map_tx_.mip_view(3));
sub.bind_image("render_map_lod4_img", render_map_tx_.mip_view(4));
sub.bind_image("render_map_lod5_img", render_map_tx_.mip_view(5));
sub.dispatch(int3(1, 1, tilemap_pool.tilemaps_data.size()));
sub.barrier(GPU_BARRIER_SHADER_STORAGE | GPU_BARRIER_UNIFORM | GPU_BARRIER_TEXTURE_FETCH |
GPU_BARRIER_SHADER_IMAGE_ACCESS);
@ -1028,10 +1047,12 @@ void ShadowModule::end_sync()
{
/** Clear pages that need to be rendered. */
PassSimple::Sub &sub = pass.sub("RenderClear");
sub.framebuffer_set(&render_fb_);
sub.state_set(DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_ALWAYS);
sub.shader_set(inst_.shaders.static_shader_get(SHADOW_PAGE_CLEAR));
sub.bind_ssbo("pages_infos_buf", pages_infos_data_);
sub.bind_ssbo("clear_dispatch_buf", clear_dispatch_buf_);
sub.bind_image("atlas_img", atlas_tx_);
sub.bind_ssbo("clear_list_buf", clear_list_buf_);
sub.bind_image("shadow_atlas_img", atlas_tx_);
sub.dispatch(clear_dispatch_buf_);
sub.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
@ -1133,6 +1154,9 @@ void ShadowModule::set_view(View &view)
usage_tag_fb.ensure(usage_tag_fb_resolution_);
render_fb_.ensure(int2(SHADOW_TILEMAP_RES * shadow_page_size_));
GPU_framebuffer_bind(render_fb_);
GPU_framebuffer_multi_viewports_set(render_fb_,
reinterpret_cast<int(*)[4]>(multi_viewports_.data()));
inst_.hiz_buffer.update();
@ -1151,6 +1175,8 @@ void ShadowModule::set_view(View &view)
shadow_multi_view_.compute_procedural_bounds();
inst_.pipelines.shadow.render(shadow_multi_view_);
GPU_memory_barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS | GPU_BARRIER_TEXTURE_FETCH);
}
DRW_stats_group_end();

View File

@ -71,8 +71,6 @@ struct ShadowTileMap : public ShadowTileMapData {
eCubeFace cubeface = Z_NEG;
/** Cached, used for detecting updates. */
float4x4 object_mat;
/** Near and far clip distances. For clip-map, computed on the GPU using casters BBoxes. */
float near, far;
public:
ShadowTileMap(int tiles_index_)
@ -209,9 +207,13 @@ class ShadowModule {
StorageVectorBuffer<uint, 128> curr_casters_ = {"CurrCasters"};
/** Indirect arguments for page clearing. */
StorageBuffer<DispatchCommand> clear_dispatch_buf_;
/** Pages to clear. */
StorageArrayBuffer<uint, SHADOW_MAX_PAGE> clear_page_buf_ = {"clear_page_buf"};
DispatchIndirectBuf clear_dispatch_buf_;
/** Array containing a compact stream of tiles to clear. */
StorageArrayBuffer<uint, SHADOW_RENDER_MAP_SIZE, true> clear_list_buf_ = {"clear_list_buf"};
/** Tile to pages mapping. */
StorageArrayBuffer<uint, SHADOW_RENDER_MAP_SIZE, true> render_map_buf_ = {"render_map_buf"};
/** View to viewport index mapping. */
StorageArrayBuffer<uint, SHADOW_VIEW_MAX, true> viewport_index_buf_ = {"viewport_index_buf"};
int3 dispatch_depth_scan_size_;
/* Ratio between tile-map pixel world "radius" and film pixel world "radius". */
@ -254,17 +256,10 @@ class ShadowModule {
/** Multi-View containing a maximum of 64 view to be rendered with the shadow pipeline. */
View shadow_multi_view_ = {"ShadowMultiView", SHADOW_VIEW_MAX, true};
/** Tile to physical page mapping. This is an array texture with one layer per view. */
Texture render_map_tx_ = {"ShadowRenderMap",
GPU_R32UI,
GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE |
GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW,
int2(SHADOW_TILEMAP_RES),
64,
nullptr,
SHADOW_TILEMAP_LOD + 1};
/** An empty frame-buffer (no attachment) the size of a whole tile-map. */
/** Framebuffer with the atlas_tx attached. */
Framebuffer render_fb_;
/** Arrays of viewports to rendering each tile to. */
std::array<int4, 16> multi_viewports_;
/** \} */

View File

@ -11,7 +11,7 @@ void main()
{
DRW_VIEW_FROM_RESOURCE_ID;
#ifdef MAT_SHADOW
shadow_interp.view_id = drw_view_id;
shadow_viewport_layer_set(int(drw_view_id), int(viewport_index_buf[drw_view_id]));
#endif
init_interface();

View File

@ -9,7 +9,7 @@ void main()
{
DRW_VIEW_FROM_RESOURCE_ID;
#ifdef MAT_SHADOW
shadow_interp.view_id = drw_view_id;
shadow_viewport_layer_set(int(drw_view_id), int(viewport_index_buf[drw_view_id]));
#endif
init_interface();

View File

@ -9,7 +9,7 @@ void main()
{
DRW_VIEW_FROM_RESOURCE_ID;
#ifdef MAT_SHADOW
shadow_interp.view_id = drw_view_id;
shadow_viewport_layer_set(int(drw_view_id), int(viewport_index_buf[drw_view_id]));
#endif
init_interface();

View File

@ -11,7 +11,7 @@ void main()
{
DRW_VIEW_FROM_RESOURCE_ID;
#ifdef MAT_SHADOW
shadow_interp.view_id = drw_view_id;
shadow_viewport_layer_set(int(drw_view_id), int(viewport_index_buf[drw_view_id]));
#endif
init_interface();

View File

@ -5,9 +5,7 @@ void main()
{
int index = int(gl_GlobalInvocationID.x);
if (index < tilemaps_clip_buf_len) {
tilemaps_clip_buf[index].clip_near_stored = 0;
tilemaps_clip_buf[index].clip_far_stored = 0;
tilemaps_clip_buf[index].clip_near = floatBitsToOrderedInt(-FLT_MAX);
tilemaps_clip_buf[index].clip_far = floatBitsToOrderedInt(FLT_MAX);
tilemaps_clip_buf[index].clip_far = floatBitsToOrderedInt(-FLT_MAX);
tilemaps_clip_buf[index].clip_near = floatBitsToOrderedInt(FLT_MAX);
}
}

View File

@ -3,7 +3,7 @@
/** \a unormalized_uv is the uv coordinates for the whole tilemap [0..SHADOW_TILEMAP_RES]. */
vec2 shadow_page_uv_transform(
vec2 atlas_size, uvec2 page, uint lod, vec2 unormalized_uv, ivec2 tile_lod0_coord)
vec2 atlas_size, uvec3 page, uint lod, vec2 unormalized_uv, ivec2 tile_lod0_coord)
{
/* Bias uv sample for LODs since custom raster aligns LOD pixels instead of centering them. */
if (lod != 0) {
@ -13,7 +13,7 @@ vec2 shadow_page_uv_transform(
vec2 target_tile = vec2(tile_lod0_coord >> lod);
vec2 page_uv = unormalized_uv * lod_scaling - target_tile;
/* Assumes atlas is squared. */
vec2 atlas_uv = (vec2(page) + min(page_uv, 0.99999)) * float(SHADOW_PAGE_RES) / atlas_size;
vec2 atlas_uv = (vec2(page.xy) + min(page_uv, 0.99999)) * float(SHADOW_PAGE_RES) / atlas_size;
return atlas_uv;
}
@ -84,8 +84,6 @@ float shadow_slope_bias_get(vec2 atlas_size, LightData light, vec3 lNg, vec3 lP,
{
/* Compute coordinate inside the pixel we are sampling. */
vec2 uv_subpixel_coord = fract(uv * atlas_size);
/* Bias uv sample for LODs since custom raster aligns LOD pixels instead of centering them. */
uv_subpixel_coord += (lod > 0) ? -exp2(-1.0 - float(lod)) : 0.0;
/* Compute delta to the texel center (where the sample is). */
vec2 ndc_texel_center_delta = uv_subpixel_coord * 2.0 - 1.0;
/* Create a normal plane equation and go through the normal projection matrix. */
@ -98,7 +96,7 @@ float shadow_slope_bias_get(vec2 atlas_size, LightData light, vec3 lNg, vec3 lP,
/* Compute slope to where the receiver should be by extending the plane to the texel center. */
float bias = dot(ndc_slope, ndc_texel_center_delta);
/* Bias for 1 pixel of the sampled LOD. */
bias /= ((SHADOW_TILEMAP_RES * SHADOW_PAGE_RES) >> lod);
bias /= float((SHADOW_TILEMAP_RES * SHADOW_PAGE_RES) >> lod);
return bias;
}
@ -117,14 +115,16 @@ struct ShadowSample {
ShadowTileData tile;
};
float shadow_tile_depth_get(usampler2D atlas_tx, ShadowTileData tile, vec2 atlas_uv)
float shadow_tile_depth_get(usampler2DArray atlas_tx, ShadowTileData tile, vec2 atlas_uv)
{
if (!tile.is_allocated) {
/* Far plane distance but with a bias to make sure there will be no shadowing.
* But also not FLT_MAX since it can cause issue with projection. */
return 1.1;
}
return uintBitsToFloat(texture(atlas_tx, atlas_uv).r);
uint raw_bits = texture(atlas_tx, vec3(atlas_uv, float(tile.page.z))).r;
float depth = uintBitsToFloat(raw_bits);
return depth;
}
vec2 shadow_punctual_linear_depth(vec2 z, float near, float far)
@ -137,11 +137,11 @@ vec2 shadow_punctual_linear_depth(vec2 z, float near, float far)
float shadow_directional_linear_depth(float z, float near, float far)
{
return z * (near - far) - near;
return z * (far - near) + near;
}
ShadowSample shadow_punctual_sample_get(
usampler2D atlas_tx, usampler2D tilemaps_tx, LightData light, vec3 lP, vec3 lNg)
usampler2DArray atlas_tx, usampler2D tilemaps_tx, LightData light, vec3 lP, vec3 lNg)
{
int face_id = shadow_punctual_face_index_get(lP);
lNg = shadow_punctual_local_position_to_face_local(face_id, lNg);
@ -176,7 +176,7 @@ ShadowSample shadow_punctual_sample_get(
}
ShadowSample shadow_directional_sample_get(
usampler2D atlas_tx, usampler2D tilemaps_tx, LightData light, vec3 P, vec3 lNg)
usampler2DArray atlas_tx, usampler2D tilemaps_tx, LightData light, vec3 P, vec3 lNg)
{
vec3 lP = shadow_world_to_local(light, P);
ShadowCoordinates coord = shadow_directional_coordinates(light, lP);
@ -198,13 +198,13 @@ ShadowSample shadow_directional_sample_get(
/* Receiver distance needs to also be increasing.
* Negate since Z distance follows blender camera convention of -Z as forward. */
float receiver_dist = -lP.z;
samp.bias *= near - far;
samp.bias *= far - near;
samp.occluder_delta = samp.occluder_dist - receiver_dist;
return samp;
}
ShadowSample shadow_sample(const bool is_directional,
usampler2D atlas_tx,
usampler2DArray atlas_tx,
usampler2D tilemaps_tx,
LightData light,
vec3 lL,

View File

@ -23,6 +23,7 @@ void main()
ShadowTileData tile = shadow_tile_unpack(tiles_buf[tile_index]);
if (tile.is_used && !tile.is_allocated) {
shadow_page_alloc(tile);
tile.lod = lod;
tiles_buf[tile_index] = shadow_tile_pack(tile);
}

View File

@ -9,9 +9,10 @@
void main()
{
uvec2 page_co = unpackUvec2x16(clear_page_buf[gl_GlobalInvocationID.z]);
uvec2 page_texel = page_co * pages_infos_buf.page_size + gl_GlobalInvocationID.xy;
uint page_packed = clear_list_buf[gl_GlobalInvocationID.z];
uvec3 page_co = shadow_page_unpack(page_packed);
page_co.xy = page_co.xy * SHADOW_PAGE_RES + gl_GlobalInvocationID.xy;
/* Clear to FLT_MAX instead of 1 so the far plane doesn't cast shadows onto farther objects. */
imageStore(atlas_img, ivec2(page_texel), uvec4(floatBitsToUint(FLT_MAX)));
imageStore(shadow_atlas_img, ivec3(page_co), uvec4(floatBitsToUint(FLT_MAX)));
}

View File

@ -106,13 +106,13 @@ void main()
pages_infos_buf.page_cached_start = src;
pages_infos_buf.page_cached_end = end;
pages_infos_buf.page_alloc_count = 0;
pages_infos_buf.view_count = 0;
/* Stats. */
statistics_buf.page_used_count = 0;
statistics_buf.page_update_count = 0;
statistics_buf.page_allocated_count = 0;
statistics_buf.page_rendered_count = 0;
statistics_buf.view_needed_count = 0;
/* Wrap the cursor to avoid unsigned overflow. We do not do modulo arithmetic because it would
* produce a 0 length buffer if the buffer is full. */
@ -123,7 +123,7 @@ void main()
}
/* Reset clear command indirect buffer. */
clear_dispatch_buf.num_groups_x = pages_infos_buf.page_size / SHADOW_PAGE_CLEAR_GROUP_SIZE;
clear_dispatch_buf.num_groups_y = pages_infos_buf.page_size / SHADOW_PAGE_CLEAR_GROUP_SIZE;
clear_dispatch_buf.num_groups_x = SHADOW_PAGE_RES / SHADOW_PAGE_CLEAR_GROUP_SIZE;
clear_dispatch_buf.num_groups_y = SHADOW_PAGE_RES / SHADOW_PAGE_CLEAR_GROUP_SIZE;
clear_dispatch_buf.num_groups_z = 0;
}

View File

@ -37,9 +37,9 @@ void shadow_page_free(inout ShadowTileData tile)
int index = atomicAdd(pages_infos_buf.page_free_count, 1);
assert(index < SHADOW_MAX_PAGE);
/* Insert in heap. */
pages_free_buf[index] = packUvec2x16(tile.page);
pages_free_buf[index] = shadow_page_pack(tile.page);
/* Remove from tile. */
tile.page = uvec2(-1);
tile.page = uvec3(-1);
tile.is_cached = false;
tile.is_allocated = false;
}
@ -55,7 +55,7 @@ void shadow_page_alloc(inout ShadowTileData tile)
return;
}
/* Insert in tile. */
tile.page = unpackUvec2x16(pages_free_buf[index]);
tile.page = shadow_page_unpack(pages_free_buf[index]);
tile.is_allocated = true;
tile.do_update = true;
/* Remove from heap. */
@ -70,9 +70,9 @@ void shadow_page_cache_append(inout ShadowTileData tile, uint tile_index)
/* The page_cached_next is also wrapped in the defrag phase to avoid unsigned overflow. */
uint index = atomicAdd(pages_infos_buf.page_cached_next, 1u) % uint(SHADOW_MAX_PAGE);
/* Insert in heap. */
pages_cached_buf[index] = uvec2(packUvec2x16(tile.page), tile_index);
pages_cached_buf[index] = uvec2(shadow_page_pack(tile.page), tile_index);
/* Remove from tile. */
tile.page = uvec2(-1);
tile.page = uvec3(-1);
tile.cache_index = index;
tile.is_cached = true;
tile.is_allocated = false;
@ -86,7 +86,7 @@ void shadow_page_cache_remove(inout ShadowTileData tile)
uint index = tile.cache_index;
/* Insert in tile. */
tile.page = unpackUvec2x16(pages_cached_buf[index].x);
tile.page = shadow_page_unpack(pages_cached_buf[index].x);
tile.cache_index = uint(-1);
tile.is_cached = false;
tile.is_allocated = true;

View File

@ -35,7 +35,7 @@ void main()
float local_min = FLT_MAX;
float local_max = -FLT_MAX;
for (int i = 0; i < 8; i++) {
float z = dot(box.corners[i].xyz, light._back);
float z = dot(box.corners[i].xyz, -light._back);
local_min = min(local_min, z);
local_max = max(local_max, z);
}
@ -59,14 +59,14 @@ void main()
if (gl_LocalInvocationID.x == 0) {
/* Final result. Min/Max of the whole dispatch. */
atomicMin(light_buf[l_idx].clip_far, global_min);
atomicMax(light_buf[l_idx].clip_near, global_max);
atomicMin(light_buf[l_idx].clip_near, global_min);
atomicMax(light_buf[l_idx].clip_far, global_max);
/* TODO(fclem): This feel unecessary but we currently have no indexing from
* tilemap to lights. This is because the lights are selected by culling phase. */
for (int i = light.tilemap_index; i <= light_tilemap_max_get(light); i++) {
int index = tilemaps_buf[i].clip_data_index;
atomicMin(tilemaps_clip_buf[index].clip_far, global_min);
atomicMax(tilemaps_clip_buf[index].clip_near, global_max);
atomicMin(tilemaps_clip_buf[index].clip_near, global_min);
atomicMax(tilemaps_clip_buf[index].clip_far, global_max);
}
}

View File

@ -8,171 +8,171 @@
*/
#pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_math_matrix_lib.glsl)
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_shadow_tilemap_lib.glsl)
shared uint tile_updates_count;
shared ivec2 rect_min;
shared ivec2 rect_max;
shared int view_index;
void page_clear_buf_append(uint page_packed)
/**
* Select the smallest viewport that can contain the given rect of tiles to render.
* Returns the viewport index.
*/
int viewport_select(ivec2 rect_size)
{
uint clear_page_index = atomicAdd(clear_dispatch_buf.num_groups_z, 1u);
clear_page_buf[clear_page_index] = page_packed;
/* TODO(fclem): Experiment with non squared viewports. */
int max_dim = max(rect_size.x, rect_size.y);
/* Assumes max_dim is non-null. */
int power_of_two = int(findMSB(uint(max_dim)));
if ((1 << power_of_two) != max_dim) {
power_of_two += 1;
}
return power_of_two;
}
void page_tag_as_rendered(ivec2 tile_co, int tiles_index, int lod)
/**
* Select the smallest viewport that can contain the given rect of tiles to render.
* Returns the viewport size in tile.
*/
ivec2 viewport_size_get(int viewport_index)
{
int tile_index = shadow_tile_offset(tile_co, tiles_index, lod);
tiles_buf[tile_index] |= SHADOW_IS_RENDERED;
atomicAdd(statistics_buf.page_rendered_count, 1);
/* TODO(fclem): Experiment with non squared viewports. */
return ivec2(1 << viewport_index);
}
void main()
{
if (all(equal(gl_LocalInvocationID, uvec3(0)))) {
tile_updates_count = uint(0);
}
barrier();
int tilemap_index = int(gl_GlobalInvocationID.z);
ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy);
ivec2 atlas_texel = shadow_tile_coord_in_atlas(tile_co, tilemap_index);
ShadowTileMapData tilemap_data = tilemaps_buf[tilemap_index];
int lod_max = (tilemap_data.projection_type == SHADOW_PROJECTION_CUBEFACE) ? SHADOW_TILEMAP_LOD :
0;
int lod_valid = 0;
/* One bit per lod. */
int do_lod_update = 0;
/* Packed page (packUvec2x16) to render per LOD. */
uint updated_lod_page[SHADOW_TILEMAP_LOD + 1];
uvec2 page_valid;
bool is_cubemap = (tilemap_data.projection_type == SHADOW_PROJECTION_CUBEFACE);
int lod_max = is_cubemap ? SHADOW_TILEMAP_LOD : 0;
int valid_tile_index = -1;
/* With all threads (LOD0 size dispatch) load each lod tile from the highest lod
* to the lowest, keeping track of the lowest one allocated which will be use for shadowing.
* Also save which page are to be updated. */
for (int lod = SHADOW_TILEMAP_LOD; lod >= 0; lod--) {
if (lod > lod_max) {
updated_lod_page[lod] = 0xFFFFFFFFu;
continue;
}
int tile_index = shadow_tile_offset(tile_co >> lod, tilemap_data.tiles_index, lod);
* This guarantee a O(1) lookup time.
* Add one render view per LOD that has tiles to be rendered. */
for (int lod = lod_max; lod >= 0; lod--) {
ivec2 tile_co_lod = tile_co >> lod;
int tile_index = shadow_tile_offset(tile_co_lod, tilemap_data.tiles_index, lod);
ShadowTileData tile = shadow_tile_unpack(tiles_buf[tile_index]);
if (tile.is_used && tile.do_update) {
do_lod_update = 1 << lod;
updated_lod_page[lod] = packUvec2x16(tile.page);
}
else {
updated_lod_page[lod] = 0xFFFFFFFFu;
/* Compute update area. */
if (all(equal(gl_LocalInvocationID, uvec3(0)))) {
rect_min = ivec2(SHADOW_TILEMAP_RES);
rect_max = ivec2(0);
view_index = -1;
}
/* Save highest lod for this thread. */
if (tile.is_used && lod > 0) {
/* Reload the page in case there was an allocation in the valid thread. */
page_valid = tile.page;
lod_valid = lod;
}
else if (lod == 0 && lod_valid != 0 && !tile.is_allocated) {
/* If the tile is not used, store the valid LOD level in LOD0. */
tile.page = page_valid;
tile.lod = lod_valid;
/* This is not a real ownership. It is just a tag so that the shadowing is deemed correct. */
tile.is_allocated = true;
barrier();
bool lod_valid_thread = all(equal(tile_co, tile_co_lod << lod));
bool do_page_render = tile.is_used && tile.do_update && lod_valid_thread;
if (do_page_render) {
atomicMin(rect_min.x, tile_co_lod.x);
atomicMin(rect_min.y, tile_co_lod.y);
atomicMax(rect_max.x, tile_co_lod.x + 1);
atomicMax(rect_max.y, tile_co_lod.y + 1);
}
if (lod == 0) {
imageStore(tilemaps_img, atlas_texel, uvec4(shadow_tile_pack(tile)));
}
}
barrier();
if (do_lod_update > 0) {
atomicAdd(tile_updates_count, 1u);
}
int viewport_index = viewport_select(rect_max - rect_min);
ivec2 viewport_size = viewport_size_get(viewport_index);
barrier();
/* Issue one view if there is an update in the LOD. */
if (all(equal(gl_LocalInvocationID, uvec3(0)))) {
bool lod_has_update = rect_min.x < rect_max.x;
if (lod_has_update) {
view_index = atomicAdd(statistics_buf.view_needed_count, 1);
if (view_index < SHADOW_VIEW_MAX) {
/* Setup the view. */
viewport_index_buf[view_index] = viewport_index;
if (all(equal(gl_LocalInvocationID, uvec3(0)))) {
/* No update by default. */
view_index = 64;
view_infos_buf[view_index].viewmat = tilemap_data.viewmat;
view_infos_buf[view_index].viewinv = inverse(tilemap_data.viewmat);
if (tile_updates_count > 0) {
view_index = atomicAdd(pages_infos_buf.view_count, 1);
if (view_index < 64) {
view_infos_buf[view_index].viewmat = tilemap_data.viewmat;
view_infos_buf[view_index].viewinv = inverse(tilemap_data.viewmat);
float lod_res = float(SHADOW_TILEMAP_RES >> lod);
/* TODO(fclem): These should be the culling planes. */
// vec2 cull_region_start = (vec2(rect_min) / lod_res) * 2.0 - 1.0;
// vec2 cull_region_end = (vec2(rect_max) / lod_res) * 2.0 - 1.0;
vec2 view_start = (vec2(rect_min) / lod_res) * 2.0 - 1.0;
vec2 view_end = (vec2(rect_min + viewport_size) / lod_res) * 2.0 - 1.0;
if (tilemap_data.projection_type != SHADOW_PROJECTION_CUBEFACE) {
int clip_index = tilemap_data.clip_data_index;
/* For directionnal, we need to modify winmat to encompass all casters. */
float clip_far = -tilemaps_clip_buf[clip_index].clip_far_stored;
float clip_near = -tilemaps_clip_buf[clip_index].clip_near_stored;
tilemap_data.winmat[2][2] = -2.0 / (clip_far - clip_near);
tilemap_data.winmat[3][2] = -(clip_far + clip_near) / (clip_far - clip_near);
float clip_far = tilemaps_clip_buf[clip_index].clip_far_stored;
float clip_near = tilemaps_clip_buf[clip_index].clip_near_stored;
mat4x4 winmat;
if (tilemap_data.projection_type != SHADOW_PROJECTION_CUBEFACE) {
view_start *= tilemap_data.half_size;
view_end *= tilemap_data.half_size;
view_start += tilemap_data.center_offset;
view_end += tilemap_data.center_offset;
winmat = projection_orthographic(
view_start.x, view_end.x, view_start.y, view_end.y, clip_near, clip_far);
}
else {
view_start *= clip_near;
view_end *= clip_near;
winmat = projection_perspective(
view_start.x, view_end.x, view_start.y, view_end.y, clip_near, clip_far);
}
view_infos_buf[view_index].winmat = winmat;
view_infos_buf[view_index].wininv = inverse(winmat);
}
view_infos_buf[view_index].winmat = tilemap_data.winmat;
view_infos_buf[view_index].wininv = inverse(tilemap_data.winmat);
}
}
barrier();
bool lod_is_rendered = (view_index >= 0) && (view_index < SHADOW_VIEW_MAX);
if (lod_is_rendered && lod_valid_thread) {
/* Tile coordinate relative to chosen viewport origin. */
ivec2 viewport_tile_co = tile_co_lod - rect_min;
/* We need to add page indirection to the render map for the whole viewport even if this one
* might extend outside of the shadowmap range. To this end, we need to wrap the threads to
* always cover the whole mip. This is because the viewport cannot be bigger than the mip
* level itself. */
int lod_res = SHADOW_TILEMAP_RES >> lod;
ivec2 relative_tile_co = (viewport_tile_co + lod_res) % lod_res;
if (all(lessThan(relative_tile_co, viewport_size))) {
uint page_packed = shadow_page_pack(tile.page);
/* Add page to render map. */
int render_page_index = shadow_render_page_index_get(view_index, relative_tile_co);
render_map_buf[render_page_index] = do_page_render ? page_packed : 0xFFFFFFFFu;
if (do_page_render) {
/* Tag tile as rendered. There is a barrier after the read. So it is safe. */
tiles_buf[tile_index] |= SHADOW_IS_RENDERED;
/* Add page to clear list. */
uint clear_page_index = atomicAdd(clear_dispatch_buf.num_groups_z, 1u);
clear_list_buf[clear_page_index] = page_packed;
/* Statistics. */
atomicAdd(statistics_buf.page_rendered_count, 1);
}
}
}
if (tile.is_used && tile.is_allocated && (!tile.do_update || lod_is_rendered)) {
/* Save highest lod for this thread. */
valid_tile_index = tile_index;
}
}
barrier();
if (view_index < 64) {
ivec3 render_map_texel = ivec3(tile_co, view_index);
/* Store page indirection for rendering. Update every texel in the view array level. */
if (true) {
imageStore(render_map_lod0_img, render_map_texel, uvec4(updated_lod_page[0]));
if (updated_lod_page[0] != 0xFFFFFFFFu) {
page_clear_buf_append(updated_lod_page[0]);
page_tag_as_rendered(render_map_texel.xy, tilemap_data.tiles_index, 0);
}
}
render_map_texel.xy >>= 1;
if (all(equal(tile_co, render_map_texel.xy << 1u))) {
imageStore(render_map_lod1_img, render_map_texel, uvec4(updated_lod_page[1]));
if (updated_lod_page[1] != 0xFFFFFFFFu) {
page_clear_buf_append(updated_lod_page[1]);
page_tag_as_rendered(render_map_texel.xy, tilemap_data.tiles_index, 1);
}
}
render_map_texel.xy >>= 1;
if (all(equal(tile_co, render_map_texel.xy << 2u))) {
imageStore(render_map_lod2_img, render_map_texel, uvec4(updated_lod_page[2]));
if (updated_lod_page[2] != 0xFFFFFFFFu) {
page_clear_buf_append(updated_lod_page[2]);
page_tag_as_rendered(render_map_texel.xy, tilemap_data.tiles_index, 2);
}
}
render_map_texel.xy >>= 1;
if (all(equal(tile_co, render_map_texel.xy << 3u))) {
imageStore(render_map_lod3_img, render_map_texel, uvec4(updated_lod_page[3]));
if (updated_lod_page[3] != 0xFFFFFFFFu) {
page_clear_buf_append(updated_lod_page[3]);
page_tag_as_rendered(render_map_texel.xy, tilemap_data.tiles_index, 3);
}
}
render_map_texel.xy >>= 1;
if (all(equal(tile_co, render_map_texel.xy << 4u))) {
imageStore(render_map_lod4_img, render_map_texel, uvec4(updated_lod_page[4]));
if (updated_lod_page[4] != 0xFFFFFFFFu) {
page_clear_buf_append(updated_lod_page[4]);
page_tag_as_rendered(render_map_texel.xy, tilemap_data.tiles_index, 4);
}
}
render_map_texel.xy >>= 1;
if (all(equal(tile_co, render_map_texel.xy << 5u))) {
imageStore(render_map_lod5_img, render_map_texel, uvec4(updated_lod_page[5]));
if (updated_lod_page[5] != 0xFFFFFFFFu) {
page_clear_buf_append(updated_lod_page[5]);
page_tag_as_rendered(render_map_texel.xy, tilemap_data.tiles_index, 5);
}
}
}
/* Store the highest LOD valid page for rendering. */
uint tile_packed = (valid_tile_index != -1) ? tiles_buf[valid_tile_index] : SHADOW_NO_DATA;
imageStore(tilemaps_img, atlas_texel, uvec4(tile_packed));
if (all(equal(gl_GlobalInvocationID, uvec3(0)))) {
/* Clamp it as it can underflow if there is too much tile present on screen. */

View File

@ -11,7 +11,7 @@
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_shadow_tilemap_lib.glsl)
shared int directional_range_changed;
shared bool directional_range_changed;
ShadowTileDataPacked init_tile_data(ShadowTileDataPacked tile, bool do_update)
{
@ -36,6 +36,8 @@ void main()
/* Reset shift to not tag for update more than once per sync cycle. */
tilemaps_buf[tilemap_index].grid_shift = ivec2(0);
directional_range_changed = false;
int clip_index = tilemap.clip_data_index;
if (clip_index == -1) {
/* Noop. This is the case for unused tilemaps that are getting pushed to the free heap. */
@ -46,13 +48,18 @@ void main()
float clip_far_new = orderedIntBitsToFloat(clip_data.clip_far);
bool near_changed = clip_near_new != clip_data.clip_near_stored;
bool far_changed = clip_far_new != clip_data.clip_far_stored;
directional_range_changed = int(near_changed || far_changed);
directional_range_changed = near_changed || far_changed;
/* NOTE(fclem): This assumes clip near/far are computed each time the init phase runs. */
tilemaps_clip_buf[clip_index].clip_near_stored = clip_near_new;
tilemaps_clip_buf[clip_index].clip_far_stored = clip_far_new;
/* Reset for next update. */
tilemaps_clip_buf[clip_index].clip_near = floatBitsToOrderedInt(-FLT_MAX);
tilemaps_clip_buf[clip_index].clip_far = floatBitsToOrderedInt(FLT_MAX);
tilemaps_clip_buf[clip_index].clip_near = floatBitsToOrderedInt(FLT_MAX);
tilemaps_clip_buf[clip_index].clip_far = floatBitsToOrderedInt(-FLT_MAX);
}
else {
/* For cubefaces, simply use the light near and far distances. */
tilemaps_clip_buf[clip_index].clip_near_stored = tilemap.clip_near;
tilemaps_clip_buf[clip_index].clip_far_stored = tilemap.clip_far;
}
}
@ -68,7 +75,7 @@ void main()
bool do_update = !in_range_inclusive(tile_shifted, ivec2(0), ivec2(SHADOW_TILEMAP_RES - 1));
/* TODO(fclem): Might be better to resize the depth stored instead of a full render update. */
if (tilemap.projection_type != SHADOW_PROJECTION_CUBEFACE && directional_range_changed != 0) {
if (directional_range_changed) {
do_update = true;
}

View File

@ -246,3 +246,18 @@ Pyramid shadow_tilemap_cubeface_bounds(ShadowTileMapData tilemap,
}
/** \} */
/* ---------------------------------------------------------------------- */
/** \name Render map layout.
*
* Since a view can cover at most the number of tile contained in LOD0,
* index every LOD like they were LOD0.
* \{ */
int shadow_render_page_index_get(int view_index, ivec2 tile_coordinate_in_lod)
{
return view_index * SHADOW_TILEMAP_LOD0_LEN + tile_coordinate_in_lod.y * SHADOW_TILEMAP_RES +
tile_coordinate_in_lod.x;
}
/** \} */

View File

@ -124,3 +124,21 @@ void init_interface()
drw_ResourceID_iface.resource_index = resource_id;
#endif
}
#ifdef GPU_VERTEX_SHADER
void shadow_viewport_layer_set(int view_id, int lod)
{
/* We still render to a layered framebuffer in the case of Metal + Tile Based Renderer.
* Since it needs correct depth buffering, each view needs to not overlap each others.
* It doesn't matter much for other platform, so we use that as a way to pass the view id. */
gpu_Layer = view_id;
gpu_ViewportIndex = lod;
}
#endif
#ifdef GPU_FRAGMENT_SHADER
int shadow_view_id_get()
{
return gpu_Layer;
}
#endif

View File

@ -15,40 +15,7 @@
#pragma BLENDER_REQUIRE(eevee_nodetree_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_transparency_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
void write_depth(ivec2 texel_co, const int lod, ivec2 tile_co, float depth)
{
ivec2 texel_co_lod = texel_co >> lod;
ivec2 lod_corner_in_lod0 = texel_co_lod << lod;
/* Add half of the lod to get the top right pixel nearest to the lod pixel.
* This way we never get more than half a LOD0 pixel of offset from the center of any LOD.
* This offset is taken into account during sampling. */
const int lod_half_stride_in_lod0 = (1 << lod) / 2;
ivec2 closest_lod0_texel = lod_corner_in_lod0 + lod_half_stride_in_lod0;
if (!all(equal(closest_lod0_texel, texel_co))) {
return;
}
ivec3 render_map_coord = ivec3(tile_co >> lod, shadow_interp.view_id);
uint page_packed = texelFetch(shadow_render_map_tx, render_map_coord, lod).r;
/* Return if no valid page. */
if (page_packed == 0xFFFFFFFFu) {
return;
}
ivec2 page = ivec2(unpackUvec2x16(page_packed));
ivec2 texel_in_page = texel_co_lod % pages_infos_buf.page_size;
ivec2 out_texel = page * pages_infos_buf.page_size + texel_in_page;
uint u_depth = floatBitsToUint(depth);
/* Quantization bias. Equivalent to nextafter in C without all the safety. 1 is not enough. */
u_depth += 2;
/* TOOD(Metal): For Metal, textures will need to be viewed as buffers to workaround missing image
* atomics support. */
imageAtomicMin(shadow_atlas_img, out_texel, u_depth);
}
#pragma BLENDER_REQUIRE(eevee_shadow_tilemap_lib.glsl)
void main()
{
@ -67,27 +34,26 @@ void main()
}
#endif
drw_view_id = shadow_interp.view_id;
#ifdef USE_ATOMIC
ivec2 texel_co = ivec2(gl_FragCoord.xy);
ivec2 tile_co = texel_co / pages_infos_buf.page_size;
float depth = gl_FragCoord.z;
float slope_bias = fwidth(depth);
write_depth(texel_co, 0, tile_co, depth + slope_bias);
/* Using bitwise ops is way faster than integer ops. */
const int page_shift = SHADOW_PAGE_LOD;
const int page_mask = ~(0xFFFFFFFF << SHADOW_PAGE_LOD);
/* Only needed for local lights. */
bool is_persp = (drw_view.winmat[3][3] == 0.0);
if (is_persp) {
/* Note that even if texel center is offset, we store unmodified depth.
* We increase bias instead at sampling time. */
#if SHADOW_TILEMAP_LOD != 5
# error This needs to be adjusted
ivec2 tile_co = texel_co >> page_shift;
ivec2 texel_page = texel_co & page_mask;
int view_index = shadow_view_id_get();
int render_page_index = shadow_render_page_index_get(view_index, tile_co);
uint page_packed = render_map_buf[render_page_index];
ivec3 page = ivec3(shadow_page_unpack(page_packed));
ivec3 out_texel = ivec3((page.xy << page_shift) | texel_page, page.z);
uint u_depth = floatBitsToUint(gl_FragCoord.z + fwidth(gl_FragCoord.z));
/* Quantization bias. Equivalent to `nextafter()` in C without all the safety. */
u_depth += 2;
imageAtomicMin(shadow_atlas_img, out_texel, u_depth);
#endif
write_depth(texel_co, 1, tile_co, depth + slope_bias * 2.0);
write_depth(texel_co, 2, tile_co, depth + slope_bias * 4.0);
write_depth(texel_co, 3, tile_co, depth + slope_bias * 8.0);
write_depth(texel_co, 4, tile_co, depth + slope_bias * 16.0);
write_depth(texel_co, 5, tile_co, depth + slope_bias * 32.0);
}
}

View File

@ -187,21 +187,26 @@ GPU_SHADER_CREATE_INFO(eevee_surf_world)
"eevee_camera",
"eevee_utility_texture");
GPU_SHADER_INTERFACE_INFO(eevee_shadow_iface, "shadow_interp").flat(Type::UINT, "view_id");
GPU_SHADER_CREATE_INFO(eevee_surf_shadow)
.define("DRW_VIEW_LEN", "64")
.define("MAT_SHADOW")
.define("USE_ATOMIC")
.vertex_out(eevee_surf_iface)
.vertex_out(eevee_surf_flat_iface)
.vertex_out(eevee_shadow_iface)
.sampler(SHADOW_RENDER_MAP_SLOT, ImageType::UINT_2D_ARRAY, "shadow_render_map_tx")
.image(SHADOW_ATLAS_SLOT,
.storage_buf(SHADOW_RENDER_MAP_BUF_SLOT,
Qualifier::READ,
"uint",
"render_map_buf[SHADOW_RENDER_MAP_SIZE]")
.storage_buf(SHADOW_VIEWPORT_INDEX_BUF_SLOT,
Qualifier::READ,
"uint",
"viewport_index_buf[SHADOW_VIEW_MAX]")
.storage_buf(SHADOW_PAGE_INFO_SLOT, Qualifier::READ, "ShadowPagesInfoData", "pages_infos_buf")
.image(SHADOW_ATLAS_IMG_SLOT,
GPU_R32UI,
Qualifier::READ_WRITE,
ImageType::UINT_2D,
ImageType::UINT_2D_ARRAY,
"shadow_atlas_img")
.storage_buf(SHADOW_PAGE_INFO_SLOT, Qualifier::READ, "ShadowPagesInfoData", "pages_infos_buf")
.fragment_source("eevee_surf_shadow_frag.glsl")
.additional_info("eevee_camera", "eevee_utility_texture", "eevee_sampling_data");

View File

@ -14,6 +14,7 @@
* explicitly as uint for code generation, as the MSLShaderGenerator needs to be able to
* distinguish between classes and fundamental types during code generation. */
#define SHADOW_TILE_DATA_PACKED "uint"
#define SHADOW_PAGE_PACKED "uint"
GPU_SHADER_CREATE_INFO(eevee_shadow_clipmap_clear)
.do_static_compilation(true)
@ -160,21 +161,17 @@ GPU_SHADER_CREATE_INFO(eevee_shadow_tilemap_finalize)
.do_static_compilation(true)
.typedef_source("draw_shader_shared.h")
.local_group_size(SHADOW_TILEMAP_RES, SHADOW_TILEMAP_RES)
.storage_buf(0, Qualifier::READ_WRITE, "ShadowTileMapData", "tilemaps_buf[]")
.storage_buf(0, Qualifier::READ, "ShadowTileMapData", "tilemaps_buf[]")
.storage_buf(1, Qualifier::READ_WRITE, SHADOW_TILE_DATA_PACKED, "tiles_buf[]")
.storage_buf(2, Qualifier::READ_WRITE, "ShadowPagesInfoData", "pages_infos_buf")
.storage_buf(3, Qualifier::WRITE, "ViewMatrices", "view_infos_buf[64]")
.storage_buf(3, Qualifier::WRITE, "ViewMatrices", "view_infos_buf[SHADOW_VIEW_MAX]")
.storage_buf(4, Qualifier::READ_WRITE, "ShadowStatistics", "statistics_buf")
.storage_buf(5, Qualifier::READ_WRITE, "DispatchCommand", "clear_dispatch_buf")
.storage_buf(6, Qualifier::READ_WRITE, "uint", "clear_page_buf[]")
.storage_buf(7, Qualifier::READ_WRITE, "ShadowTileMapClip", "tilemaps_clip_buf[]")
.storage_buf(6, Qualifier::WRITE, SHADOW_PAGE_PACKED, "clear_list_buf[SHADOW_RENDER_MAP_SIZE]")
.storage_buf(7, Qualifier::WRITE, SHADOW_PAGE_PACKED, "render_map_buf[SHADOW_RENDER_MAP_SIZE]")
.storage_buf(8, Qualifier::WRITE, "uint", "viewport_index_buf[SHADOW_VIEW_MAX]")
.storage_buf(9, Qualifier::READ, "ShadowTileMapClip", "tilemaps_clip_buf[]")
.image(0, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D, "tilemaps_img")
.image(1, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D_ARRAY, "render_map_lod0_img")
.image(2, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D_ARRAY, "render_map_lod1_img")
.image(3, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D_ARRAY, "render_map_lod2_img")
.image(4, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D_ARRAY, "render_map_lod3_img")
.image(5, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D_ARRAY, "render_map_lod4_img")
.image(6, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D_ARRAY, "render_map_lod5_img")
.additional_info("eevee_shared")
.compute_source("eevee_shadow_tilemap_finalize_comp.glsl");
@ -182,8 +179,12 @@ GPU_SHADER_CREATE_INFO(eevee_shadow_page_clear)
.do_static_compilation(true)
.local_group_size(SHADOW_PAGE_CLEAR_GROUP_SIZE, SHADOW_PAGE_CLEAR_GROUP_SIZE)
.storage_buf(2, Qualifier::READ, "ShadowPagesInfoData", "pages_infos_buf")
.storage_buf(6, Qualifier::READ, "uint", "clear_page_buf[]")
.image(0, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D, "atlas_img")
.storage_buf(6, Qualifier::READ, SHADOW_PAGE_PACKED, "clear_list_buf[SHADOW_RENDER_MAP_SIZE]")
.image(SHADOW_ATLAS_IMG_SLOT,
GPU_R32UI,
Qualifier::READ_WRITE,
ImageType::UINT_2D_ARRAY,
"shadow_atlas_img")
.additional_info("eevee_shared")
.compute_source("eevee_shadow_page_clear_comp.glsl");
@ -194,7 +195,7 @@ GPU_SHADER_CREATE_INFO(eevee_shadow_page_clear)
* \{ */
GPU_SHADER_CREATE_INFO(eevee_shadow_data)
.sampler(SHADOW_ATLAS_TEX_SLOT, ImageType::UINT_2D, "shadow_atlas_tx")
.sampler(SHADOW_ATLAS_TEX_SLOT, ImageType::UINT_2D_ARRAY, "shadow_atlas_tx")
.sampler(SHADOW_TILEMAPS_TEX_SLOT, ImageType::UINT_2D, "shadow_tilemaps_tx");
/** \} */

View File

@ -630,6 +630,7 @@ class Texture : NonCopyable {
float *data = nullptr,
int mip_len = 1)
{
BLI_assert(layers > 0);
return ensure_impl(extent, layers, 0, mip_len, format, usage, data, true, false);
}
@ -657,6 +658,7 @@ class Texture : NonCopyable {
float *data = nullptr,
int mip_len = 1)
{
BLI_assert(layers > 0);
return ensure_impl(UNPACK2(extent), layers, mip_len, format, usage, data, true, false);
}

View File

@ -50,12 +50,12 @@ static void test_eevee_shadow_shift_clear()
{
ShadowTileData tile;
tile.page = uint2(1, 2);
tile.page = uint3(1, 2, 0);
tile.is_used = true;
tile.do_update = true;
tiles_data[tile_lod0] = shadow_tile_pack(tile);
tile.page = uint2(3, 4);
tile.page = uint3(3, 2, 4);
tile.is_used = false;
tile.do_update = false;
tiles_data[tile_lod1] = shadow_tile_pack(tile);
@ -72,19 +72,19 @@ static void test_eevee_shadow_shift_clear()
pass.bind_ssbo("tiles_buf", tiles_data);
pass.bind_ssbo("pages_cached_buf", pages_cached_data_);
pass.dispatch(int3(1, 1, tilemaps_data.size()));
pass.barrier(GPU_BARRIER_BUFFER_UPDATE);
Manager manager;
manager.submit(pass);
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
tilemaps_data.read();
tiles_data.read();
EXPECT_EQ(tilemaps_data[0].grid_offset, int2(0));
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod0]).page, uint2(1, 2));
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod0]).page, uint3(1, 2, 0));
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod0]).is_used, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod0]).do_update, true);
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod1]).page, uint2(3, 4));
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod1]).page, uint3(3, 2, 4));
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod1]).is_used, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod1]).do_update, true);
@ -99,9 +99,27 @@ static void test_eevee_shadow_shift()
GPU_render_begin();
ShadowTileMapDataBuf tilemaps_data = {"tilemaps_data"};
ShadowTileDataBuf tiles_data = {"tiles_data"};
ShadowTileMapClipBuf tilemaps_clip = {"tilemaps_clip"};
ShadowPageCacheBuf pages_cached_data_ = {"pages_cached_data_"};
StorageArrayBuffer<ShadowTileMapClip, SHADOW_MAX_TILEMAP> tilemaps_clip = {"tilemaps_clip"};
ShadowPageCacheBuf pages_cached_data = {"pages_cached_data"};
auto tile_co_to_page = [](int2 co) {
int page = co.x + co.y * SHADOW_TILEMAP_RES;
return uint3((page % SHADOW_PAGE_PER_ROW),
(page / SHADOW_PAGE_PER_ROW) % SHADOW_PAGE_PER_COL,
(page / SHADOW_PAGE_PER_LAYER));
};
{
ShadowTileMapClip clip = {};
clip.clip_near_stored = 0.0;
clip.clip_far_stored = 1.0;
clip.clip_near = 0x00000000; /* floatBitsToOrderedInt(0.0) */
clip.clip_far = 0x3F800000; /* floatBitsToOrderedInt(1.0) */
tilemaps_clip[0] = clip;
tilemaps_clip.push_update();
}
{
ShadowTileMapData tilemap = {};
tilemap.tiles_index = 0;
@ -114,7 +132,6 @@ static void test_eevee_shadow_shift()
tilemaps_data.push_update();
}
{
ShadowTileData tile = shadow_tile_unpack(ShadowTileDataPacked(SHADOW_NO_DATA));
for (auto x : IndexRange(SHADOW_TILEMAP_RES)) {
@ -122,7 +139,7 @@ static void test_eevee_shadow_shift()
tile.is_allocated = true;
tile.is_rendered = true;
tile.do_update = true;
tile.page = uint2(x, y);
tile.page = tile_co_to_page(int2(x, y));
tiles_data[x + y * SHADOW_TILEMAP_RES] = shadow_tile_pack(tile);
}
}
@ -137,31 +154,33 @@ static void test_eevee_shadow_shift()
pass.bind_ssbo("tilemaps_buf", tilemaps_data);
pass.bind_ssbo("tilemaps_clip_buf", tilemaps_clip);
pass.bind_ssbo("tiles_buf", tiles_data);
pass.bind_ssbo("pages_cached_buf", pages_cached_data_);
pass.bind_ssbo("pages_cached_buf", pages_cached_data);
pass.dispatch(int3(1, 1, tilemaps_data.size()));
pass.barrier(GPU_BARRIER_BUFFER_UPDATE);
Manager manager;
manager.submit(pass);
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
tilemaps_data.read();
tiles_data.read();
EXPECT_EQ(tilemaps_data[0].grid_offset, int2(0));
EXPECT_EQ(shadow_tile_unpack(tiles_data[0]).page, uint2(SHADOW_TILEMAP_RES - 1, 2));
EXPECT_EQ(shadow_tile_unpack(tiles_data[0]).page,
tile_co_to_page(int2(SHADOW_TILEMAP_RES - 1, 2)));
EXPECT_EQ(shadow_tile_unpack(tiles_data[0]).do_update, true);
EXPECT_EQ(shadow_tile_unpack(tiles_data[0]).is_rendered, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[0]).is_allocated, true);
EXPECT_EQ(shadow_tile_unpack(tiles_data[1]).page, uint2(0, 2));
EXPECT_EQ(shadow_tile_unpack(tiles_data[1]).page, tile_co_to_page(int2(0, 2)));
EXPECT_EQ(shadow_tile_unpack(tiles_data[1]).do_update, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[1]).is_rendered, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[1]).is_allocated, true);
EXPECT_EQ(shadow_tile_unpack(tiles_data[0 + SHADOW_TILEMAP_RES * 2]).page,
uint2(SHADOW_TILEMAP_RES - 1, 4));
tile_co_to_page(int2(SHADOW_TILEMAP_RES - 1, 4)));
EXPECT_EQ(shadow_tile_unpack(tiles_data[0 + SHADOW_TILEMAP_RES * 2]).do_update, true);
EXPECT_EQ(shadow_tile_unpack(tiles_data[0 + SHADOW_TILEMAP_RES * 2]).is_rendered, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[0 + SHADOW_TILEMAP_RES * 2]).is_allocated, true);
EXPECT_EQ(shadow_tile_unpack(tiles_data[1 + SHADOW_TILEMAP_RES * 2]).page, uint2(0, 4));
EXPECT_EQ(shadow_tile_unpack(tiles_data[1 + SHADOW_TILEMAP_RES * 2]).page,
tile_co_to_page(int2(0, 4)));
EXPECT_EQ(shadow_tile_unpack(tiles_data[1 + SHADOW_TILEMAP_RES * 2]).do_update, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[1 + SHADOW_TILEMAP_RES * 2]).is_rendered, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[1 + SHADOW_TILEMAP_RES * 2]).is_allocated, true);
@ -234,9 +253,9 @@ static void test_eevee_shadow_tag_update()
pass.bind_ssbo("bounds_buf", &manager.bounds_buf.current());
pass.bind_ssbo("resource_ids_buf", curr_casters_updated);
pass.dispatch(int3(curr_casters_updated.size(), 1, tilemaps_data.size()));
pass.barrier(GPU_BARRIER_BUFFER_UPDATE);
manager.submit(pass);
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
tiles_data.read();
@ -358,8 +377,10 @@ static void test_eevee_shadow_free()
int page_free_count = SHADOW_MAX_PAGE - 6;
for (uint i : IndexRange(2, page_free_count)) {
uint2 page = {i % SHADOW_PAGE_PER_ROW, i / SHADOW_PAGE_PER_ROW};
pages_free_data[i] = page.x | (page.y << 16u);
uint3 page = uint3((i % SHADOW_PAGE_PER_ROW),
(i / SHADOW_PAGE_PER_ROW) % SHADOW_PAGE_PER_COL,
(i / SHADOW_PAGE_PER_LAYER));
pages_free_data[i] = shadow_page_pack(page);
}
pages_free_data.push_update();
@ -380,6 +401,9 @@ static void test_eevee_shadow_free()
{
ShadowTileData tile;
tiles_data.clear_to_zero();
tiles_data.read();
/* is_orphaned = true */
tile.is_used = false;
tile.do_update = true;
@ -436,10 +460,10 @@ static void test_eevee_shadow_free()
pass.bind_ssbo("pages_free_buf", pages_free_data);
pass.bind_ssbo("pages_cached_buf", pages_cached_data);
pass.dispatch(int3(1, 1, tilemaps_data.size()));
pass.barrier(GPU_BARRIER_BUFFER_UPDATE);
Manager manager;
manager.submit(pass);
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
tiles_data.read();
pages_infos_data.read();
@ -475,7 +499,7 @@ class TestDefrag {
ShadowPageHeapBuf pages_free_data = {"PagesFreeBuf"};
ShadowPageCacheBuf pages_cached_data = {"PagesCachedBuf"};
ShadowPagesInfoDataBuf pages_infos_data = {"PagesInfosBuf"};
StorageBuffer<DispatchCommand> clear_dispatch_buf;
StorageBuffer<DispatchCommand> clear_draw_buf;
ShadowStatisticsBuf statistics_buf = {"statistics_buf"};
public:
@ -546,12 +570,12 @@ class TestDefrag {
pass.bind_ssbo("pages_free_buf", pages_free_data);
pass.bind_ssbo("pages_cached_buf", pages_cached_data);
pass.bind_ssbo("statistics_buf", statistics_buf);
pass.bind_ssbo("clear_dispatch_buf", clear_dispatch_buf);
pass.bind_ssbo("clear_draw_buf", clear_draw_buf);
pass.dispatch(int3(1, 1, 1));
pass.barrier(GPU_BARRIER_BUFFER_UPDATE);
Manager manager;
manager.submit(pass);
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
tiles_data.read();
pages_cached_data.read();
@ -631,10 +655,11 @@ class TestAlloc {
pages_infos_data.page_cached_next = 0u;
pages_infos_data.page_cached_start = 0u;
pages_infos_data.page_cached_end = 0u;
pages_infos_data.view_count = 0u;
pages_infos_data.page_size = 256u;
pages_infos_data.push_update();
statistics_buf.view_needed_count = 0;
statistics_buf.push_update();
int tile_allocated = tiles_index * SHADOW_TILEDATA_PER_TILEMAP + 5;
int tile_free = tiles_index * SHADOW_TILEDATA_PER_TILEMAP + 6;
@ -672,10 +697,10 @@ class TestAlloc {
pass.bind_ssbo("pages_cached_buf", pages_cached_data);
pass.bind_ssbo("statistics_buf", statistics_buf);
pass.dispatch(int3(1, 1, tilemaps_data.size()));
pass.barrier(GPU_BARRIER_BUFFER_UPDATE);
Manager manager;
manager.submit(pass);
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
tiles_data.read();
pages_infos_data.read();
@ -711,7 +736,8 @@ static void test_eevee_shadow_finalize()
ShadowPageCacheBuf pages_cached_data = {"PagesCachedBuf"};
ShadowPagesInfoDataBuf pages_infos_data = {"PagesInfosBuf"};
ShadowStatisticsBuf statistics_buf = {"statistics_buf"};
ShadowTileMapClipBuf tilemaps_clip = {"tilemaps_clip"};
StorageArrayBuffer<ShadowTileMapClip, SHADOW_MAX_TILEMAP, false> tilemaps_clip = {
"tilemaps_clip"};
const uint lod0_len = SHADOW_TILEMAP_LOD0_LEN;
const uint lod1_len = SHADOW_TILEMAP_LOD1_LEN;
@ -727,7 +753,7 @@ static void test_eevee_shadow_finalize()
const uint lod5_ofs = lod4_ofs + lod4_len;
for (auto i : IndexRange(SHADOW_TILEDATA_PER_TILEMAP)) {
tiles_data[i] = 0;
tiles_data[i] = SHADOW_NO_DATA;
}
{
@ -735,52 +761,71 @@ static void test_eevee_shadow_finalize()
tile.is_used = true;
tile.is_allocated = true;
tile.page = uint2(1, 0);
tile.page = uint3(1, 0, 0);
tile.do_update = false;
tiles_data[lod0_ofs] = shadow_tile_pack(tile);
tile.page = uint2(2, 0);
tile.page = uint3(2, 0, 0);
tile.do_update = false;
tiles_data[lod1_ofs] = shadow_tile_pack(tile);
tile.page = uint2(3, 0);
tile.page = uint3(3, 0, 0);
tile.do_update = true;
tiles_data[lod2_ofs] = shadow_tile_pack(tile);
tile.page = uint2(4, 0);
tile.do_update = false;
tile.page = uint3(0, 1, 0);
tile.do_update = true;
tiles_data[lod3_ofs] = shadow_tile_pack(tile);
tile.page = uint2(5, 0);
tile.page = uint3(1, 1, 0);
tile.do_update = true;
tiles_data[lod4_ofs] = shadow_tile_pack(tile);
tile.page = uint2(6, 0);
tile.page = uint3(2, 1, 0);
tile.do_update = true;
tiles_data[lod5_ofs] = shadow_tile_pack(tile);
tile.page = uint2(7, 0);
tile.page = uint3(3, 1, 0);
tile.do_update = true;
tiles_data[lod0_ofs + 8] = shadow_tile_pack(tile);
tiles_data[lod0_ofs + 31] = shadow_tile_pack(tile);
tile.page = uint3(0, 2, 0);
tile.do_update = true;
tiles_data[lod3_ofs + 8] = shadow_tile_pack(tile);
tile.page = uint3(1, 2, 0);
tile.do_update = true;
tiles_data[lod0_ofs + 32 * 16 - 8] = shadow_tile_pack(tile);
tiles_data.push_update();
}
{
ShadowTileMapData tilemap = {};
tilemap.viewmat = float4x4::identity();
tilemap.tiles_index = 0;
tilemap.clip_data_index = 0;
tilemap.projection_type = SHADOW_PROJECTION_CUBEFACE;
tilemaps_data.append(tilemap);
tilemaps_data.push_update();
}
{
ShadowTileMapClip clip = {};
clip.clip_far_stored = 10.0f;
clip.clip_near_stored = 1.0f;
tilemaps_clip[0] = clip;
tilemaps_clip.push_update();
}
{
statistics_buf.view_needed_count = 0;
statistics_buf.push_update();
}
{
pages_infos_data.page_free_count = -5;
pages_infos_data.page_alloc_count = 0;
pages_infos_data.page_cached_next = 0u;
pages_infos_data.page_cached_start = 0u;
pages_infos_data.page_cached_end = 0u;
pages_infos_data.view_count = 0u;
pages_infos_data.page_size = 256u;
pages_infos_data.push_update();
}
@ -791,44 +836,55 @@ static void test_eevee_shadow_finalize()
GPU_TEXTURE_USAGE_SHADER_WRITE);
tilemap_tx.clear(uint4(0));
Texture render_map_tx = {"ShadowRenderMap",
GPU_R32UI,
GPU_TEXTURE_USAGE_HOST_READ | GPU_TEXTURE_USAGE_SHADER_READ |
GPU_TEXTURE_USAGE_SHADER_WRITE | GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW,
int2(SHADOW_TILEMAP_RES),
1, /* Only one layer for the test. */
nullptr,
SHADOW_TILEMAP_LOD + 1};
render_map_tx.ensure_mip_views();
View shadow_multi_view = {"ShadowMultiView", 64, true};
StorageArrayBuffer<ViewMatrices, DRW_VIEW_MAX> shadow_multi_view_buf = {"ShadowMultiView"};
StorageBuffer<DispatchCommand> clear_dispatch_buf;
StorageArrayBuffer<uint, SHADOW_MAX_PAGE> clear_page_buf = {"clear_page_buf"};
StorageArrayBuffer<uint, SHADOW_MAX_PAGE> clear_list_buf = {"clear_list_buf"};
StorageArrayBuffer<uint, SHADOW_RENDER_MAP_SIZE> render_map_buf = {"render_map_buf"};
StorageArrayBuffer<uint, SHADOW_VIEW_MAX> viewport_index_buf = {"viewport_index_buf"};
render_map_buf.clear_to_zero();
GPUShader *sh = GPU_shader_create_from_info_name("eevee_shadow_tilemap_finalize");
PassSimple pass("Test");
pass.shader_set(sh);
pass.bind_ssbo("tilemaps_buf", tilemaps_data);
pass.bind_ssbo("tilemaps_clip_buf", tilemaps_clip);
pass.bind_ssbo("tiles_buf", tiles_data);
pass.bind_ssbo("view_infos_buf", shadow_multi_view_buf);
pass.bind_ssbo("statistics_buf", statistics_buf);
pass.bind_ssbo("clear_dispatch_buf", clear_dispatch_buf);
pass.bind_ssbo("clear_list_buf", clear_list_buf);
pass.bind_ssbo("render_map_buf", render_map_buf);
pass.bind_ssbo("viewport_index_buf", viewport_index_buf);
pass.bind_ssbo("pages_infos_buf", pages_infos_data);
pass.bind_image("tilemaps_img", tilemap_tx);
pass.bind_ssbo("view_infos_buf", shadow_multi_view.matrices_ubo_get());
pass.bind_ssbo("clear_dispatch_buf", clear_dispatch_buf);
pass.bind_ssbo("clear_page_buf", clear_page_buf);
pass.bind_ssbo("statistics_buf", statistics_buf);
pass.bind_ssbo("tilemaps_clip_buf", tilemaps_clip);
pass.bind_image("render_map_lod0_img", render_map_tx.mip_view(0));
pass.bind_image("render_map_lod1_img", render_map_tx.mip_view(1));
pass.bind_image("render_map_lod2_img", render_map_tx.mip_view(2));
pass.bind_image("render_map_lod3_img", render_map_tx.mip_view(3));
pass.bind_image("render_map_lod4_img", render_map_tx.mip_view(4));
pass.bind_image("render_map_lod5_img", render_map_tx.mip_view(5));
pass.dispatch(int3(1, 1, tilemaps_data.size()));
pass.barrier(GPU_BARRIER_BUFFER_UPDATE | GPU_BARRIER_TEXTURE_UPDATE);
Manager manager;
manager.submit(pass);
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE | GPU_BARRIER_TEXTURE_UPDATE);
{
/* Check output views. */
shadow_multi_view_buf.read();
for (auto i : IndexRange(5)) {
EXPECT_EQ(shadow_multi_view_buf[i].viewmat, float4x4::identity());
EXPECT_EQ(shadow_multi_view_buf[i].viewinv, float4x4::identity());
}
EXPECT_EQ(shadow_multi_view_buf[0].winmat,
math::projection::perspective(-1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 10.0f));
EXPECT_EQ(shadow_multi_view_buf[1].winmat,
math::projection::perspective(-1.0f, 0.0f, -1.0f, 0.0f, 1.0f, 10.0f));
EXPECT_EQ(shadow_multi_view_buf[2].winmat,
math::projection::perspective(-1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 10.0f));
EXPECT_EQ(shadow_multi_view_buf[3].winmat,
math::projection::perspective(-1.0f, -0.75f, -1.0f, -0.75f, 1.0f, 10.0f));
EXPECT_EQ(shadow_multi_view_buf[4].winmat,
math::projection::perspective(0.5f, 1.5f, -1.0f, 0.0f, 1.0f, 10.0f));
}
{
uint *pixels = tilemap_tx.read<uint32_t>(GPU_DATA_UINT);
@ -836,7 +892,8 @@ static void test_eevee_shadow_finalize()
std::string result = "";
for (auto y : IndexRange(SHADOW_TILEMAP_RES)) {
for (auto x : IndexRange(SHADOW_TILEMAP_RES)) {
result += std::to_string(shadow_tile_unpack(pixels[y * SHADOW_TILEMAP_RES + x]).page.x);
ShadowTileData tile = shadow_tile_unpack(pixels[y * SHADOW_TILEMAP_RES + x]);
result += std::to_string(tile.page.x + tile.page.y * SHADOW_PAGE_PER_ROW);
}
}
@ -844,7 +901,7 @@ static void test_eevee_shadow_finalize()
/** The layout of these expected strings is Y down. */
StringRefNull expected_pages =
"12334444755555556666666666666666"
"12334444555555556666666666666667"
"22334444555555556666666666666666"
"33334444555555556666666666666666"
"33334444555555556666666666666666"
@ -859,15 +916,15 @@ static void test_eevee_shadow_finalize()
"55555555555555556666666666666666"
"55555555555555556666666666666666"
"55555555555555556666666666666666"
"55555555555555556666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"55555555555555556666666696666666"
"88888888666666666666666666666666"
"88888888666666666666666666666666"
"88888888666666666666666666666666"
"88888888666666666666666666666666"
"88888888666666666666666666666666"
"88888888666666666666666666666666"
"88888888666666666666666666666666"
"88888888666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
@ -881,17 +938,17 @@ static void test_eevee_shadow_finalize()
}
{
auto stringify_lod = [](Span<uint> data) -> std::string {
auto stringify_view = [](Span<uint> data) -> std::string {
std::string result = "";
for (auto x : data) {
result += (x == 0xFFFFFFFFu) ? '-' : '0' + (x % 10);
result += (x == 0u) ? '-' : ((x == 0xFFFFFFFFu) ? 'x' : '0' + (x % 10));
}
return result;
};
/** The layout of these expected strings is Y down. */
StringRefNull expected_lod0 =
"--------7-----------------------"
StringRefNull expected_view0 =
"6-------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
@ -924,71 +981,166 @@ static void test_eevee_shadow_finalize()
"--------------------------------"
"--------------------------------";
StringRefNull expected_lod1 =
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------";
StringRefNull expected_view1 =
"5-------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------";
StringRefNull expected_lod2 =
"3-------"
"--------"
"--------"
"--------"
"--------"
"--------"
"--------"
"--------";
StringRefNull expected_view2 =
"4xxx----------------------------"
"xxxx----------------------------"
"8xxx----------------------------"
"xxxx----------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------";
StringRefNull expected_lod3 =
"----"
"----"
"----"
"----";
StringRefNull expected_view3 =
"3-------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------";
StringRefNull expected_lod4 =
"5-"
"--";
StringRefNull expected_view4 =
"xxxxxxx7xxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"9xxxxxxxxxxxxxxx----------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------";
StringRefNull expected_lod5 = "6";
render_map_buf.read();
uint *pixels_lod0 = render_map_tx.read<uint32_t>(GPU_DATA_UINT, 0);
uint *pixels_lod1 = render_map_tx.read<uint32_t>(GPU_DATA_UINT, 1);
uint *pixels_lod2 = render_map_tx.read<uint32_t>(GPU_DATA_UINT, 2);
uint *pixels_lod3 = render_map_tx.read<uint32_t>(GPU_DATA_UINT, 3);
uint *pixels_lod4 = render_map_tx.read<uint32_t>(GPU_DATA_UINT, 4);
uint *pixels_lod5 = render_map_tx.read<uint32_t>(GPU_DATA_UINT, 5);
EXPECT_EQ(stringify_lod(Span<uint>(pixels_lod0, lod0_len)), expected_lod0);
EXPECT_EQ(stringify_lod(Span<uint>(pixels_lod1, lod1_len)), expected_lod1);
EXPECT_EQ(stringify_lod(Span<uint>(pixels_lod2, lod2_len)), expected_lod2);
EXPECT_EQ(stringify_lod(Span<uint>(pixels_lod3, lod3_len)), expected_lod3);
EXPECT_EQ(stringify_lod(Span<uint>(pixels_lod4, lod4_len)), expected_lod4);
EXPECT_EQ(stringify_lod(Span<uint>(pixels_lod5, 1)), expected_lod5);
MEM_SAFE_FREE(pixels_lod0);
MEM_SAFE_FREE(pixels_lod1);
MEM_SAFE_FREE(pixels_lod2);
MEM_SAFE_FREE(pixels_lod3);
MEM_SAFE_FREE(pixels_lod4);
MEM_SAFE_FREE(pixels_lod5);
EXPECT_EQ(stringify_view(Span<uint>(&render_map_buf[SHADOW_TILEMAP_LOD0_LEN * 0],
SHADOW_TILEMAP_LOD0_LEN)),
expected_view0);
EXPECT_EQ(stringify_view(Span<uint>(&render_map_buf[SHADOW_TILEMAP_LOD0_LEN * 1],
SHADOW_TILEMAP_LOD0_LEN)),
expected_view1);
EXPECT_EQ(stringify_view(Span<uint>(&render_map_buf[SHADOW_TILEMAP_LOD0_LEN * 2],
SHADOW_TILEMAP_LOD0_LEN)),
expected_view2);
EXPECT_EQ(stringify_view(Span<uint>(&render_map_buf[SHADOW_TILEMAP_LOD0_LEN * 3],
SHADOW_TILEMAP_LOD0_LEN)),
expected_view3);
EXPECT_EQ(stringify_view(Span<uint>(&render_map_buf[SHADOW_TILEMAP_LOD0_LEN * 4],
SHADOW_TILEMAP_LOD0_LEN)),
expected_view4);
}
pages_infos_data.read();
EXPECT_EQ(pages_infos_data.page_free_count, 0);
EXPECT_EQ(pages_infos_data.view_count, 1);
statistics_buf.read();
EXPECT_EQ(statistics_buf.view_needed_count, 5);
GPU_shader_free(sh);
DRW_shaders_free();