Eevee: Shadows: Transition to individual face rendering.

This gets rid of the need of a geom shader and instancing.

Both are pretty slow compared to the new method.
The only moment the old method could be better is when scene is filled
with lots of objects and most of the objects in the shadow map appear
on every layer.

But even then, we could optimize the culling and minimize the overhead.
This commit is contained in:
2018-04-15 22:22:50 +02:00
parent dd6fcd2f21
commit d8d1f637b1
5 changed files with 92 additions and 144 deletions

View File

@@ -106,7 +106,7 @@ void EEVEE_lights_init(EEVEE_ViewLayerData *sldata)
if (!e_data.shadow_sh) {
e_data.shadow_sh = DRW_shader_create(
datatoc_shadow_vert_glsl, datatoc_shadow_geom_glsl, datatoc_shadow_frag_glsl, NULL);
datatoc_shadow_vert_glsl, NULL, datatoc_shadow_frag_glsl, NULL);
}
if (!sldata->lamps) {
@@ -287,8 +287,7 @@ void EEVEE_lights_cache_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS;
psl->shadow_pass = DRW_pass_create("Shadow Pass", state);
DRWShadingGroup *grp = stl->g_data->shadow_shgrp = DRW_shgroup_create(e_data.shadow_sh, psl->shadow_pass);
DRW_shgroup_uniform_block(grp, "shadow_render_block", sldata->shadow_render_ubo);
stl->g_data->shadow_shgrp = DRW_shgroup_create(e_data.shadow_sh, psl->shadow_pass);
}
}
@@ -387,30 +386,26 @@ void EEVEE_lights_cache_add(EEVEE_ViewLayerData *sldata, Object *ob)
/* Add a shadow caster to the shadowpasses */
void EEVEE_lights_cache_shcaster_add(
EEVEE_ViewLayerData *sldata, EEVEE_StorageList *stl, struct Gwn_Batch *geom, Object *ob)
EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_StorageList *stl, struct Gwn_Batch *geom, Object *ob)
{
DRW_shgroup_call_object_instances_add(
DRW_shgroup_call_object_add(
stl->g_data->shadow_shgrp,
geom, ob,
&sldata->lamps->shadow_instance_count);
geom, ob);
}
void EEVEE_lights_cache_shcaster_material_add(
EEVEE_ViewLayerData *sldata, EEVEE_PassList *psl, struct GPUMaterial *gpumat,
struct Gwn_Batch *geom, struct Object *ob, float (*obmat)[4], float *alpha_threshold)
EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_PassList *psl, struct GPUMaterial *gpumat,
struct Gwn_Batch *geom, struct Object *ob, float *alpha_threshold)
{
/* TODO / PERF : reuse the same shading group for objects with the same material */
DRWShadingGroup *grp = DRW_shgroup_material_create(gpumat, psl->shadow_pass);
if (grp == NULL) return;
DRW_shgroup_uniform_block(grp, "shadow_render_block", sldata->shadow_render_ubo);
DRW_shgroup_uniform_mat4(grp, "ShadowModelMatrix", (float *)obmat);
if (alpha_threshold != NULL)
DRW_shgroup_uniform_float(grp, "alphaThreshold", alpha_threshold, 1);
DRW_shgroup_call_object_instances_add(grp, geom, ob, &sldata->lamps->shadow_instance_count);
DRW_shgroup_call_object_add(grp, geom, ob);
}
/* Make that object update shadow casting lamps inside its influence bounding box. */
@@ -719,7 +714,6 @@ static void eevee_shadow_cascade_setup(Object *ob, EEVEE_LampsInfo *linfo, EEVEE
}
/* Lamps Matrices */
float (*viewmat)[4], projmat[4][4];
int sh_nbr = 1; /* TODO : MSM */
int cascade_nbr = la->cascade_count;
@@ -730,10 +724,10 @@ static void eevee_shadow_cascade_setup(Object *ob, EEVEE_LampsInfo *linfo, EEVEE
/* obmat = Object Space > World Space */
/* viewmat = World Space > View Space */
invert_m4_m4(sh_data->clipmat.mat[DRW_MAT_VIEW], ob->obmat);
viewmat = sh_data->clipmat.mat[DRW_MAT_VIEW];
float (*viewmat)[4] = sh_data->viewmat;
invert_m4_m4(viewmat, ob->obmat);
normalize_m4(viewmat);
invert_m4_m4(sh_data->clipmat.mat[DRW_MAT_VIEWINV], viewmat);
invert_m4_m4(sh_data->viewinv, viewmat);
/* The technique consists into splitting
* the view frustum into several sub-frustum
@@ -833,8 +827,8 @@ static void eevee_shadow_cascade_setup(Object *ob, EEVEE_LampsInfo *linfo, EEVEE
cascade_data->split_start[0] = LERP(la->cascade_fade, cascade_data->split_end[cascade_nbr - 1], prev_split);
/* For each cascade */
rctf rect_clip, rect_cascade;
for (int c = 0; c < cascade_nbr; ++c) {
float (*projmat)[4] = sh_data->projmat[c];
/* Given 8 frustum corners */
float corners[8][3] = {
/* Near Cap */
@@ -877,32 +871,17 @@ static void eevee_shadow_cascade_setup(Object *ob, EEVEE_LampsInfo *linfo, EEVEE
add_v2_v2(center, shadow_texco);
/* Expand the projection to cover frustum range */
rctf rect_cascade;
BLI_rctf_init_pt_radius(&rect_cascade, center, sh_data->radius[c]);
orthographic_m4(projmat,
rect_cascade.xmin, rect_cascade.xmax,
rect_cascade.ymin, rect_cascade.ymax,
la->clipsta, la->clipend);
if (c == 0) {
memcpy(&rect_clip, &rect_cascade, sizeof(rect_clip));
}
else {
BLI_rctf_union(&rect_clip, &rect_cascade);
}
mul_m4_m4m4(sh_data->viewprojmat[c], projmat, viewmat);
mul_m4_m4m4(cascade_data->shadowmat[c], texcomat, sh_data->viewprojmat[c]);
}
/* Clipping mats */
orthographic_m4(sh_data->clipmat.mat[DRW_MAT_WIN],
rect_clip.xmin, rect_clip.xmax,
rect_clip.ymin, rect_clip.ymax,
la->clipsta, la->clipend);
mul_m4_m4m4(sh_data->clipmat.mat[DRW_MAT_PERS], sh_data->clipmat.mat[DRW_MAT_WIN], viewmat);
invert_m4_m4(sh_data->clipmat.mat[DRW_MAT_WININV], sh_data->clipmat.mat[DRW_MAT_WIN]);
invert_m4_m4(sh_data->clipmat.mat[DRW_MAT_PERSINV], sh_data->clipmat.mat[DRW_MAT_PERS]);
ubo_data->bias = 0.05f * la->bias;
ubo_data->near = la->clipsta;
ubo_data->far = la->clipend;
@@ -1004,37 +983,6 @@ void EEVEE_lights_update(EEVEE_ViewLayerData *sldata)
}
}
static void eevee_shadows_cube_culling_frustum(EEVEE_ShadowRender *srd)
{
float persmat[4][4], persinv[4][4];
float viewmat[4][4], viewinv[4][4];
float winmat[4][4], wininv[4][4];
orthographic_m4(winmat, -srd->clip_far, srd->clip_far, -srd->clip_far, srd->clip_far, -srd->clip_far, srd->clip_far);
DRW_viewport_matrix_override_set(winmat, DRW_MAT_WIN);
invert_m4_m4(wininv, winmat);
DRW_viewport_matrix_override_set(wininv, DRW_MAT_WININV);
unit_m4(viewmat);
negate_v3_v3(viewmat[3], srd->position);
DRW_viewport_matrix_override_set(viewmat, DRW_MAT_VIEW);
unit_m4(viewinv);
copy_v3_v3(viewinv[3], srd->position);
DRW_viewport_matrix_override_set(viewinv, DRW_MAT_VIEWINV);
mul_m4_m4m4(persmat, winmat, viewmat);
DRW_viewport_matrix_override_set(persmat, DRW_MAT_PERS);
invert_m4_m4(persinv, persmat);
DRW_viewport_matrix_override_set(persinv, DRW_MAT_PERSINV);
}
static void eevee_shadows_cascade_culling_frustum(EEVEE_ShadowCascadeData *evscd)
{
DRW_viewport_matrix_override_set_all(&evscd->clipmat);
}
/* this refresh lamps shadow buffers */
void EEVEE_draw_shadows(EEVEE_ViewLayerData *sldata, EEVEE_PassList *psl)
{
@@ -1058,32 +1006,45 @@ void EEVEE_draw_shadows(EEVEE_ViewLayerData *sldata, EEVEE_PassList *psl)
continue;
}
DRWMatrixState render_mats;
float (*winmat)[4] = render_mats.mat[DRW_MAT_WIN];
float (*viewmat)[4] = render_mats.mat[DRW_MAT_VIEW];
float (*persmat)[4] = render_mats.mat[DRW_MAT_PERS];
EEVEE_ShadowRender *srd = &linfo->shadow_render_data;
EEVEE_ShadowCubeData *evscd = &led->data.scd;
float cube_projmat[4][4];
float cube_viewmat[4][4];
perspective_m4(cube_projmat, -la->clipsta, la->clipsta, -la->clipsta, la->clipsta, la->clipsta, la->clipend);
unit_m4(cube_viewmat);
perspective_m4(winmat, -la->clipsta, la->clipsta, -la->clipsta, la->clipsta, la->clipsta, la->clipend);
srd->clip_near = la->clipsta;
srd->clip_far = la->clipend;
copy_v3_v3(srd->position, ob->obmat[3]);
negate_v3_v3(cube_viewmat[3], srd->position);
for (int j = 0; j < 6; j++) {
mul_m4_m4m4(srd->viewmat[j], cubefacemat[j], cube_viewmat);
mul_m4_m4m4(srd->shadowmat[j], cube_projmat, srd->viewmat[j]);
}
DRW_uniformbuffer_update(sldata->shadow_render_ubo, srd);
eevee_shadows_cube_culling_frustum(srd);
/* Render shadow cube */
linfo->shadow_instance_count = 6;
GPU_framebuffer_bind(sldata->shadow_cube_target_fb);
GPU_framebuffer_clear_depth(sldata->shadow_cube_target_fb, 1.0f);
DRW_draw_pass(psl->shadow_pass);
/* Render 6 faces separatly: seems to be faster for the general case.
* The only time it's more beneficial is when the CPU culling overhead
* outweight the instancing overhead. which is rarelly the case. */
for (int j = 0; j < 6; j++) {
/* TODO optimize */
float tmp[4][4];
unit_m4(tmp);
negate_v3_v3(tmp[3], srd->position);
mul_m4_m4m4(viewmat, cubefacemat[j], tmp);
mul_m4_m4m4(persmat, winmat, viewmat);
invert_m4_m4(render_mats.mat[DRW_MAT_WININV], winmat);
invert_m4_m4(render_mats.mat[DRW_MAT_VIEWINV], viewmat);
invert_m4_m4(render_mats.mat[DRW_MAT_PERSINV], persmat);
DRW_viewport_matrix_override_set_all(&render_mats);
GPU_framebuffer_texture_cubeface_attach(sldata->shadow_cube_target_fb,
sldata->shadow_cube_target, 0, j, 0);
GPU_framebuffer_bind(sldata->shadow_cube_target_fb);
GPU_framebuffer_clear_depth(sldata->shadow_cube_target_fb, 1.0f);
DRW_draw_pass(psl->shadow_pass);
}
/* 0.001f is arbitrary, but it should be relatively small so that filter size is not too big. */
float filter_texture_size = la->soft * 0.001f;
@@ -1143,22 +1104,39 @@ void EEVEE_draw_shadows(EEVEE_ViewLayerData *sldata, EEVEE_PassList *psl)
EEVEE_ShadowCascadeData *evscd = &led->data.scad;
EEVEE_ShadowRender *srd = &linfo->shadow_render_data;
DRWMatrixState render_mats;
float (*winmat)[4] = render_mats.mat[DRW_MAT_WIN];
float (*viewmat)[4] = render_mats.mat[DRW_MAT_VIEW];
float (*persmat)[4] = render_mats.mat[DRW_MAT_PERS];
eevee_shadow_cascade_setup(ob, linfo, led);
srd->clip_near = la->clipsta;
srd->clip_far = la->clipend;
for (int j = 0; j < la->cascade_count; ++j) {
copy_m4_m4(srd->shadowmat[j], evscd->viewprojmat[j]);
}
DRW_uniformbuffer_update(sldata->shadow_render_ubo, &linfo->shadow_render_data);
eevee_shadows_cascade_culling_frustum(evscd);
copy_m4_m4(viewmat, evscd->viewmat);
invert_m4_m4(render_mats.mat[DRW_MAT_VIEWINV], viewmat);
/* Render shadow cascades */
linfo->shadow_instance_count = la->cascade_count;
GPU_framebuffer_bind(sldata->shadow_cascade_target_fb);
GPU_framebuffer_clear_depth(sldata->shadow_cascade_target_fb, 1.0);
DRW_draw_pass(psl->shadow_pass);
/* Render cascade separatly: seems to be faster for the general case.
* The only time it's more beneficial is when the CPU culling overhead
* outweight the instancing overhead. which is rarelly the case. */
for (int j = 0; j < la->cascade_count; j++) {
copy_m4_m4(winmat, evscd->projmat[j]);
copy_m4_m4(persmat, evscd->viewprojmat[j]);
invert_m4_m4(render_mats.mat[DRW_MAT_WININV], winmat);
invert_m4_m4(render_mats.mat[DRW_MAT_PERSINV], persmat);
DRW_viewport_matrix_override_set_all(&render_mats);
GPU_framebuffer_texture_layer_attach(sldata->shadow_cascade_target_fb,
sldata->shadow_cascade_target, 0, j, 0);
GPU_framebuffer_bind(sldata->shadow_cascade_target_fb);
GPU_framebuffer_clear_depth(sldata->shadow_cascade_target_fb, 1.0f);
DRW_draw_pass(psl->shadow_pass);
}
/* TODO: OPTI: Filter all cascade in one/two draw call */
for (linfo->current_shadow_cascade = 0;

View File

@@ -47,7 +47,6 @@
/* *********** STATIC *********** */
static struct {
char *shadow_shader_lib;
char *frag_shader_lib;
char *volume_shader_lib;
@@ -532,7 +531,8 @@ void EEVEE_materials_init(EEVEE_ViewLayerData *sldata, EEVEE_StorageList *stl, E
char *frag_str = NULL;
/* Shaders */
e_data.shadow_shader_lib = BLI_string_joinN(
e_data.frag_shader_lib = BLI_string_joinN(
datatoc_common_view_lib_glsl,
datatoc_common_uniforms_lib_glsl,
datatoc_bsdf_common_lib_glsl,
datatoc_bsdf_sampling_lib_glsl,
@@ -556,10 +556,6 @@ void EEVEE_materials_init(EEVEE_ViewLayerData *sldata, EEVEE_StorageList *stl, E
datatoc_lit_surface_frag_glsl,
datatoc_volumetric_lib_glsl);
e_data.frag_shader_lib = BLI_string_joinN(
datatoc_common_view_lib_glsl,
e_data.shadow_shader_lib);
e_data.volume_shader_lib = BLI_string_joinN(
datatoc_common_view_lib_glsl,
datatoc_common_uniforms_lib_glsl,
@@ -762,13 +758,13 @@ struct GPUMaterial *EEVEE_material_mesh_depth_get(
char *defines = eevee_get_defines(options);
char *frag_str = BLI_string_joinN(
(is_shadow) ? e_data.shadow_shader_lib : e_data.frag_shader_lib,
e_data.frag_shader_lib,
datatoc_prepass_frag_glsl);
mat = DRW_shader_create_from_material(
scene, ma, engine, options,
(is_shadow) ? datatoc_shadow_vert_glsl : datatoc_lit_surface_vert_glsl,
(is_shadow) ? datatoc_shadow_geom_glsl : NULL,
NULL,
frag_str,
defines);
@@ -1412,11 +1408,11 @@ void EEVEE_materials_cache_populate(EEVEE_Data *vedata, EEVEE_ViewLayerData *sld
break;
case MA_BS_CLIP:
gpumat = EEVEE_material_mesh_depth_get(scene, ma, false, true);
EEVEE_lights_cache_shcaster_material_add(sldata, psl, gpumat, mat_geom[i], ob, ob->obmat, &ma->alpha_threshold);
EEVEE_lights_cache_shcaster_material_add(sldata, psl, gpumat, mat_geom[i], ob, &ma->alpha_threshold);
break;
case MA_BS_HASHED:
gpumat = EEVEE_material_mesh_depth_get(scene, ma, true, true);
EEVEE_lights_cache_shcaster_material_add(sldata, psl, gpumat, mat_geom[i], ob, ob->obmat, NULL);
EEVEE_lights_cache_shcaster_material_add(sldata, psl, gpumat, mat_geom[i], ob, NULL);
break;
case MA_BS_NONE:
default:
@@ -1530,7 +1526,7 @@ void EEVEE_materials_free(void)
for (int i = 0; i < VAR_MAT_MAX; ++i) {
DRW_SHADER_FREE_SAFE(e_data.default_lit[i]);
}
MEM_SAFE_FREE(e_data.shadow_shader_lib);
MEM_SAFE_FREE(e_data.frag_shader_lib);
MEM_SAFE_FREE(e_data.frag_shader_lib);
MEM_SAFE_FREE(e_data.volume_shader_lib);
DRW_SHADER_FREE_SAFE(e_data.default_prepass_sh);

View File

@@ -675,7 +675,8 @@ typedef struct EEVEE_ShadowCubeData {
typedef struct EEVEE_ShadowCascadeData {
short light_id, shadow_id, cascade_id, layer_id;
float viewprojmat[MAX_CASCADE_NUM][4][4]; /* World->Lamp->NDC : used for rendering the shadow map. */
DRWMatrixState clipmat; /* Override matrices used for clipping. */
float projmat[MAX_CASCADE_NUM][4][4];
float viewmat[4][4], viewinv[4][4];
float radius[MAX_CASCADE_NUM];
} EEVEE_ShadowCascadeData;
@@ -806,7 +807,7 @@ void EEVEE_lights_cache_shcaster_add(
void EEVEE_lights_cache_shcaster_material_add(
EEVEE_ViewLayerData *sldata, EEVEE_PassList *psl,
struct GPUMaterial *gpumat, struct Gwn_Batch *geom, struct Object *ob,
float (*obmat)[4], float *alpha_threshold);
float *alpha_threshold);
void EEVEE_lights_cache_shcaster_object_add(EEVEE_ViewLayerData *sldata, struct Object *ob);
void EEVEE_lights_cache_finish(EEVEE_ViewLayerData *sldata);
void EEVEE_lights_update(EEVEE_ViewLayerData *sldata);

View File

@@ -8,37 +8,6 @@
#define LUT_SIZE 64
#ifdef SHADOW_SHADER
layout(std140) uniform shadow_render_block {
mat4 ShadowMatrix[6];
mat4 FaceViewMatrix[6];
vec4 lampPosition;
float cubeTexelSize;
float storedTexelSize;
float nearClip;
float farClip;
int shadowSampleCount;
float shadowInvSampleCount;
};
flat in int shFace; /* Shadow layer we are rendering to. */
/* Replacing viewBlock */
#define ViewMatrix FaceViewMatrix[shFace]
#define ViewProjectionMatrix ShadowMatrix[shFace]
/* TODO optimize */
#define ProjectionMatrix \
mat4(vec4(1.0, 0.0, 0.0, 0.0), \
vec4(0.0, 1.0, 0.0, 0.0), \
vec4(0.0, 0.0, -(farClip + nearClip) / (farClip - nearClip), -1.0), \
vec4(0.0, 0.0, (-2.0 * farClip * nearClip) / (farClip - nearClip), 0.0))
#define ViewMatrixInverse inverse(ViewMatrix)
#define ViewProjectionMatrixInverse inverse(ViewProjectionMatrix)
#define ProjectionMatrixInverse inverse(ProjectionMatrix)
#define CameraTexCoFactors vec4(1.0f, 1.0f, 0.0f, 0.0f)
#endif
/* Buffers */
uniform sampler2D colorBuffer;
uniform sampler2D depthBuffer;

View File

@@ -1,29 +1,33 @@
uniform mat4 ModelMatrix;
uniform mat4 ModelViewProjectionMatrix;
#ifdef MESH_SHADER
uniform mat4 ModelMatrix;
uniform mat4 ModelViewMatrix;
uniform mat3 WorldNormalMatrix;
# ifndef ATTRIB
uniform mat3 NormalMatrix;
# endif
#endif
in vec3 pos;
#ifdef MESH_SHADER
in vec3 nor;
#endif
out vec4 vPos;
#ifdef MESH_SHADER
out vec3 vNor;
out vec3 worldPosition;
out vec3 viewPosition;
out vec3 worldNormal;
out vec3 viewNormal;
#endif
flat out int face;
void main() {
vPos = ModelMatrix * vec4(pos, 1.0);
face = gl_InstanceID;
gl_Position = ModelViewProjectionMatrix * vec4(pos, 1.0);
#ifdef MESH_SHADER
vNor = WorldNormalMatrix * nor;
viewPosition = (ModelViewMatrix * vec4(pos, 1.0)).xyz;
worldPosition = (ModelMatrix * vec4(pos, 1.0)).xyz;
viewNormal = normalize(NormalMatrix * nor);
worldNormal = normalize(WorldNormalMatrix * nor);
#ifdef ATTRIB
pass_attrib(pos);
#endif
#endif
}
}