EEVEE Next: Subsurface Scattering #107407

Merged
Miguel Pozo merged 24 commits from pragma37/blender:pull-eevee-next-sss into main 2023-06-15 15:49:12 +02:00
20 changed files with 556 additions and 64 deletions

View File

@ -155,6 +155,7 @@ set(SRC
engines/eevee_next/eevee_shader.cc
engines/eevee_next/eevee_shadow.cc
engines/eevee_next/eevee_sync.cc
engines/eevee_next/eevee_subsurface.cc
engines/eevee_next/eevee_velocity.cc
engines/eevee_next/eevee_view.cc
engines/eevee_next/eevee_world.cc
@ -296,6 +297,7 @@ set(SRC
engines/eevee_next/eevee_shader.hh
engines/eevee_next/eevee_shadow.hh
engines/eevee_next/eevee_sync.hh
engines/eevee_next/eevee_subsurface.hh
engines/eevee_next/eevee_velocity.hh
engines/eevee_next/eevee_view.hh
engines/eevee_next/eevee_world.hh
@ -504,6 +506,7 @@ set(GLSL_SRC
engines/eevee_next/shaders/eevee_shadow_tilemap_init_comp.glsl
engines/eevee_next/shaders/eevee_shadow_tilemap_lib.glsl
engines/eevee_next/shaders/eevee_spherical_harmonics_lib.glsl
engines/eevee_next/shaders/eevee_subsurface_eval_frag.glsl
engines/eevee_next/shaders/eevee_surf_deferred_frag.glsl
engines/eevee_next/shaders/eevee_surf_depth_frag.glsl
engines/eevee_next/shaders/eevee_surf_forward_frag.glsl

View File

@ -92,11 +92,13 @@
/* Textures. */
/* Used anywhere. (Starts at index 2, since 0 and 1 are used by draw_gpencil) */
#define RBUFS_UTILITY_TEX_SLOT 2
#define HIZ_TEX_SLOT 3
/* Only during surface shading (forward and deferred eval). */
#define SHADOW_TILEMAPS_TEX_SLOT 3
#define SHADOW_ATLAS_TEX_SLOT 4
#define SHADOW_TILEMAPS_TEX_SLOT 4
#define SHADOW_ATLAS_TEX_SLOT 5
#define SSS_TRANSMITTANCE_TEX_SLOT 6
/* Only during shadow rendering. */
pragma37 marked this conversation as resolved Outdated

Do not use slot 16 (or anything above slot 15). It is not available in all implementations.

Do not use slot 16 (or anything above slot 15). It is not available in all implementations.
#define SHADOW_RENDER_MAP_SLOT 3
#define SHADOW_RENDER_MAP_SLOT 4
/* Images. */
#define RBUFS_COLOR_SLOT 0

View File

@ -206,6 +206,7 @@ void Instance::end_sync()
shadows.end_sync(); /** \note: Needs to be before lights. */
lights.end_sync();
sampling.end_sync();
subsurface.end_sync();
film.end_sync();
cryptomatte.end_sync();
pipelines.end_sync();

View File

@ -31,6 +31,7 @@
#include "eevee_sampling.hh"
#include "eevee_shader.hh"
#include "eevee_shadow.hh"
#include "eevee_subsurface.hh"
#include "eevee_sync.hh"
#include "eevee_view.hh"
#include "eevee_world.hh"
@ -49,6 +50,7 @@ class Instance {
ShaderModule &shaders;
SyncModule sync;
MaterialModule materials;
SubsurfaceModule subsurface;
PipelineModule pipelines;
ShadowModule shadows;
LightModule lights;
@ -95,6 +97,7 @@ class Instance {
: shaders(*ShaderModule::module_get()),
sync(*this),
materials(*this),
subsurface(*this),
pipelines(*this),
shadows(*this),
lights(*this),

View File

@ -142,6 +142,8 @@ void ForwardPipeline::sync()
opaque_ps_.bind_image(RBUFS_CRYPTOMATTE_SLOT, &inst_.render_buffers.cryptomatte_tx);
/* Textures. */
opaque_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
opaque_ps_.bind_texture(SSS_TRANSMITTANCE_TEX_SLOT, inst_.subsurface.transmittance_tx_get());
/* Uniform Buffer. */
opaque_ps_.bind_ubo(CAMERA_BUF_SLOT, inst_.camera.ubo_get());
opaque_ps_.bind_ubo(RBUFS_BUF_SLOT, &inst_.render_buffers.data);
@ -169,6 +171,7 @@ void ForwardPipeline::sync()
/* Textures. */
sub.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
sub.bind_texture(SSS_TRANSMITTANCE_TEX_SLOT, inst_.subsurface.transmittance_tx_get());
/* Uniform Buffer. */
sub.bind_ubo(CAMERA_BUF_SLOT, inst_.camera.ubo_get());
@ -306,7 +309,7 @@ void DeferredLayer::begin_sync()
{
gbuffer_ps_.init();
gbuffer_ps_.clear_stencil(0x00u);
gbuffer_ps_.state_stencil(0x01u, 0x01u, 0x01u);
gbuffer_ps_.state_stencil(0xFFu, 0xFFu, 0xFFu);
{
/* Common resources. */
@ -343,18 +346,14 @@ void DeferredLayer::begin_sync()
void DeferredLayer::end_sync()
{
/* Use stencil test to reject pixel not written by this layer. */
/* WORKAROUND: Stencil write is only here to avoid rasterizer discard. */
DRWState state = DRW_STATE_WRITE_STENCIL | DRW_STATE_STENCIL_EQUAL;
/* Allow output to combined pass for the last pass. */
DRWState state_write_color = state | DRW_STATE_WRITE_COLOR | DRW_STATE_BLEND_CUSTOM;
if (closure_bits_ & (CLOSURE_DIFFUSE | CLOSURE_REFLECTION)) {
const bool is_last_eval_pass = true;
const bool is_last_eval_pass = !(closure_bits_ & CLOSURE_SSS);
eval_light_ps_.init();
eval_light_ps_.state_set(is_last_eval_pass ? state_write_color : state);
eval_light_ps_.state_stencil(0x00u, 0x01u, 0xFFu);
/* Use stencil test to reject pixel not written by this layer. */
eval_light_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_STENCIL_NEQUAL |
DRW_STATE_BLEND_CUSTOM);
eval_light_ps_.state_stencil(0x00u, 0x00u, (CLOSURE_DIFFUSE | CLOSURE_REFLECTION));
eval_light_ps_.shader_set(inst_.shaders.static_shader_get(DEFERRED_LIGHT));
eval_light_ps_.bind_image("out_diffuse_light_img", &diffuse_light_tx_);
eval_light_ps_.bind_image("out_specular_light_img", &specular_light_tx_);
@ -364,6 +363,8 @@ void DeferredLayer::end_sync()
eval_light_ps_.bind_image(RBUFS_COLOR_SLOT, &inst_.render_buffers.rp_color_tx);
eval_light_ps_.bind_image(RBUFS_VALUE_SLOT, &inst_.render_buffers.rp_value_tx);
eval_light_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
eval_light_ps_.bind_texture(SSS_TRANSMITTANCE_TEX_SLOT,
inst_.subsurface.transmittance_tx_get());
eval_light_ps_.bind_ubo(RBUFS_BUF_SLOT, &inst_.render_buffers.data);
inst_.lights.bind_resources(&eval_light_ps_);
@ -391,12 +392,15 @@ PassMain::Sub *DeferredLayer::prepass_add(::Material *blender_mat,
PassMain::Sub *DeferredLayer::material_add(::Material *blender_mat, GPUMaterial *gpumat)
{
closure_bits_ |= shader_closure_bits_from_flag(gpumat);
eClosureBits closure_bits = shader_closure_bits_from_flag(gpumat);
closure_bits_ |= closure_bits;
PassMain::Sub *pass = (blender_mat->blend_flag & MA_BL_CULL_BACKFACE) ?
pragma37 marked this conversation as resolved Outdated

This subsurface pass should go inside SubsurfaceModule.

Note that, currently, the subsurface algorithm is quite simple, we might extend it in the future. Making everything in one place will avoid much trouble later on.

You can init the pass in all cases and only submit it if the closure bit is present.

This subsurface pass should go inside `SubsurfaceModule`. Note that, currently, the subsurface algorithm is quite simple, we might extend it in the future. Making everything in one place will avoid much trouble later on. You can init the pass in all cases and only submit it if the closure bit is present.
gbuffer_single_sided_ps_ :
gbuffer_double_sided_ps_;
return &pass->sub(GPU_material_get_name(gpumat));
pass = &pass->sub(GPU_material_get_name(gpumat));
pass->state_stencil(closure_bits, 0xFFu, 0xFFu);
return pass;
}
void DeferredLayer::render(View &view,
@ -404,7 +408,6 @@ void DeferredLayer::render(View &view,
Framebuffer &combined_fb,
int2 extent)
{
GPU_framebuffer_bind(prepass_fb);
inst_.manager->submit(prepass_ps_, view);
@ -424,6 +427,10 @@ void DeferredLayer::render(View &view,
inst_.manager->submit(eval_light_ps_, view);
if (closure_bits_ & CLOSURE_SSS) {
inst_.subsurface.render(view, combined_fb, diffuse_light_tx_);
}
diffuse_light_tx_.release();
specular_light_tx_.release();

View File

@ -173,6 +173,8 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_
return "eevee_shadow_tag_usage_opaque";
case SHADOW_TILEMAP_TAG_USAGE_TRANSPARENT:
return "eevee_shadow_tag_usage_transparent";
case SUBSURFACE_EVAL:
return "eevee_subsurface_eval";
/* To avoid compiler warning about missing case. */
case MAX_SHADER_TYPE:
return "";
@ -256,6 +258,10 @@ void ShaderModule::material_create_info_ammend(GPUMaterial *gpumat, GPUCodegenOu
info.additional_info("eevee_cryptomatte_out");
}
if (GPU_material_flag_get(gpumat, GPU_MATFLAG_SUBSURFACE) && pipeline_type == MAT_PIPE_FORWARD) {
info.additional_info("eevee_transmittance_data");
}
if (GPU_material_flag_get(gpumat, GPU_MATFLAG_BARYCENTRIC)) {
switch (geometry_type) {
case MAT_GEOM_MESH:

View File

@ -81,6 +81,8 @@ enum eShaderType {
SHADOW_TILEMAP_TAG_USAGE_OPAQUE,
SHADOW_TILEMAP_TAG_USAGE_TRANSPARENT,
SUBSURFACE_EVAL,
MAX_SHADER_TYPE,
};

View File

@ -977,6 +977,7 @@ using ShadowPageCacheBuf = draw::StorageArrayBuffer<uint2, SHADOW_MAX_PAGE, true
using ShadowTileMapDataBuf = draw::StorageVectorBuffer<ShadowTileMapData, SHADOW_MAX_TILEMAP>;
using ShadowTileMapClipBuf = draw::StorageArrayBuffer<ShadowTileMapClip, SHADOW_MAX_TILEMAP, true>;
using ShadowTileDataBuf = draw::StorageArrayBuffer<ShadowTileDataPacked, SHADOW_MAX_TILE, true>;
using SubsurfaceDataBuf = draw::UniformBuffer<SubsurfaceData>;
using VelocityGeometryBuf = draw::StorageArrayBuffer<float4, 16, true>;
using VelocityIndexBuf = draw::StorageArrayBuffer<VelocityIndex, 16>;
using VelocityObjectBuf = draw::StorageArrayBuffer<float4x4, 16>;

View File

@ -0,0 +1,222 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2021 Blender Foundation.
*/
/** \file
* \ingroup eevee
*
*/
#include "BLI_vector.hh"
#include "eevee_instance.hh"
#include "eevee_subsurface.hh"
#include <iostream>
namespace blender::eevee {
/* -------------------------------------------------------------------- */
/** \name Subsurface
*
* \{ */
void SubsurfaceModule::end_sync()
{
data_.jitter_threshold = inst_.scene->eevee.sss_jitter_threshold;
if (data_.sample_len != inst_.scene->eevee.sss_samples) {
/* Convert sample count from old implementation which was using a separable filter. */
/* TODO(fclem) better remapping. */
// data_.sample_len = square_f(1 + 2 * inst_.scene->eevee.sss_samples);
data_.sample_len = 55;
}
if (!transmittance_tx_.is_valid()) {
precompute_transmittance_profile();
}
precompute_samples_location();
data_.push_update();
subsurface_ps_.init();
subsurface_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_STENCIL_EQUAL |
DRW_STATE_BLEND_ADD_FULL);
subsurface_ps_.state_stencil(0x00u, 0xFFu, CLOSURE_SSS);
subsurface_ps_.shader_set(inst_.shaders.static_shader_get(SUBSURFACE_EVAL));
inst_.subsurface.bind_resources(&subsurface_ps_);
inst_.hiz_buffer.bind_resources(&subsurface_ps_);
subsurface_ps_.bind_texture("radiance_tx", &diffuse_light_tx_);
subsurface_ps_.bind_texture("gbuffer_closure_tx", &inst_.gbuffer.closure_tx);
subsurface_ps_.bind_texture("gbuffer_color_tx", &inst_.gbuffer.color_tx);
subsurface_ps_.bind_ubo(RBUFS_BUF_SLOT, &inst_.render_buffers.data);
subsurface_ps_.bind_image(RBUFS_COLOR_SLOT, &inst_.render_buffers.rp_color_tx);
pragma37 marked this conversation as resolved

Also bind RBUFS_VALUE_SLOT otherwise it will trigger a warning / validation error.

Also bind `RBUFS_VALUE_SLOT` otherwise it will trigger a warning / validation error.
/** NOTE: Not used in the shader, but we bind it to avoid debug warnings. */
subsurface_ps_.bind_image(RBUFS_VALUE_SLOT, &inst_.render_buffers.rp_value_tx);
subsurface_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH);
subsurface_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3);
}
void SubsurfaceModule::render(View &view, Framebuffer &fb, Texture &diffuse_light_tx)
{
fb.bind();
diffuse_light_tx_ = *&diffuse_light_tx;
inst_.manager->submit(subsurface_ps_, view);
}
void SubsurfaceModule::precompute_samples_location()
{
/* Precompute sample position with white albedo. */
float d = burley_setup(1.0f, 1.0f);
float rand_u = inst_.sampling.rng_get(SAMPLING_SSS_U);
float rand_v = inst_.sampling.rng_get(SAMPLING_SSS_V);
double golden_angle = M_PI * (3.0 - sqrt(5.0));
for (auto i : IndexRange(data_.sample_len)) {
float theta = golden_angle * i + M_PI * 2.0f * rand_u;
/* Scale using rand_v in order to keep first sample always at center. */
float x = (1.0f + (rand_v / data_.sample_len)) * (i / (float)data_.sample_len);
float r = burley_sample(d, x);
data_.samples[i].x = cosf(theta) * r;
data_.samples[i].y = sinf(theta) * r;
data_.samples[i].z = 1.0f / burley_pdf(d, r);
}
}
void SubsurfaceModule::precompute_transmittance_profile()
{
Vector<float> profile(SSS_TRANSMIT_LUT_SIZE);
/* Precompute sample position with white albedo. */
float radius = 1.0f;
float d = burley_setup(radius, 1.0f);
/* For each distance d we compute the radiance incoming from an hypothetical parallel plane. */
for (auto i : IndexRange(SSS_TRANSMIT_LUT_SIZE)) {
/* Distance from the lit surface plane.
* Compute to a larger maximum distance to have a smoother falloff for all channels. */
float lut_radius = SSS_TRANSMIT_LUT_RADIUS * radius;
float distance = lut_radius * (i + 1e-5f) / profile.size();
/* Compute radius of the footprint on the hypothetical plane. */
float r_fp = sqrtf(square_f(lut_radius) - square_f(distance));
profile[i] = 0.0f;
float area_accum = 0.0f;
for (auto j : IndexRange(SSS_TRANSMIT_LUT_STEP_RES)) {
/* Compute distance to the "shading" point through the medium. */
float r = (r_fp * (j + 0.5f)) / SSS_TRANSMIT_LUT_STEP_RES;
float r_prev = (r_fp * (j + 0.0f)) / SSS_TRANSMIT_LUT_STEP_RES;
float r_next = (r_fp * (j + 1.0f)) / SSS_TRANSMIT_LUT_STEP_RES;
r = hypotf(r, distance);
float R = burley_eval(d, r);
/* Since the profile and configuration are radially symmetrical we
* can just evaluate it once and weight it accordingly */
float disk_area = square_f(r_next) - square_f(r_prev);
profile[i] += R * disk_area;
area_accum += disk_area;
}
/* Normalize over the disk. */
profile[i] /= area_accum;
}
/** NOTE: There's something very wrong here.
* This should be a small remap,
* but current profile range goes from 0.0399098 to 0.0026898. */
/* Make a smooth gradient from 1 to 0. */
float range = profile.first() - profile.last();
float offset = profile.last();
for (float &value : profile) {
value = (value - offset) / range;
/** HACK: Remap the curve to better fit Cycles values. */
value = std::pow(value, 1.6f);
}
profile.first() = 1;
profile.last() = 0;
transmittance_tx_.ensure_1d(
GPU_R16F, profile.size(), GPU_TEXTURE_USAGE_SHADER_READ, profile.data());
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Christensen-Burley SSS model
*
* Based on: "Approximate Reflectance Profiles for Efficient Subsurface Scattering"
* by Per Christensen
* https://graphics.pixar.com/library/ApproxBSSRDF/approxbssrdfslides.pdf
* \{ */
float SubsurfaceModule::burley_setup(float radius, float albedo)
{
float A = albedo;
/* Diffuse surface transmission, equation (6). */
float s = 1.9f - A + 3.5f * square_f(A - 0.8f);
/* Mean free path length adapted to fit ancient Cubic and Gaussian models. */
float l = 0.25 * M_1_PI * radius;
return l / s;
}
float SubsurfaceModule::burley_sample(float d, float x_rand)
{
x_rand *= SSS_BURLEY_TRUNCATE_CDF;
const float tolerance = 1e-6;
const int max_iteration_count = 10;
/* Do initial guess based on manual curve fitting, this allows us to reduce
* number of iterations to maximum 4 across the [0..1] range. We keep maximum
* number of iteration higher just to be sure we didn't miss root in some
* corner case.
*/
float r;
if (x_rand <= 0.9) {
r = exp(x_rand * x_rand * 2.4) - 1.0;
}
else {
/* TODO(sergey): Some nicer curve fit is possible here. */
r = 15.0;
}
/* Solve against scaled radius. */
for (int i = 0; i < max_iteration_count; i++) {
float exp_r_3 = exp(-r / 3.0);
float exp_r = exp_r_3 * exp_r_3 * exp_r_3;
float f = 1.0 - 0.25 * exp_r - 0.75 * exp_r_3 - x_rand;
float f_ = 0.25 * exp_r + 0.25 * exp_r_3;
if (abs(f) < tolerance || f_ == 0.0) {
break;
}
r = r - f / f_;
if (r < 0.0) {
r = 0.0;
}
}
return r * d;
}
float SubsurfaceModule::burley_eval(float d, float r)
{
if (r >= SSS_BURLEY_TRUNCATE * d) {
return 0.0;
}
/* Slide 33. */
float exp_r_3_d = expf(-r / (3.0f * d));
float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d;
return (exp_r_d + exp_r_3_d) / (8.0f * (float)M_PI * d);
}
float SubsurfaceModule::burley_pdf(float d, float r)
{
return burley_eval(d, r) / SSS_BURLEY_TRUNCATE_CDF;
}
/** \} */
} // namespace blender::eevee

View File

@ -0,0 +1,81 @@
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2021 Blender Foundation.
*/
/** \file
* \ingroup eevee
*
* Postprocess diffuse radiance output from the diffuse evaluation pass to mimic subsurface
* transmission.
*
* This implementation follows the technique described in the siggraph presentation:
* "Efficient screen space subsurface scattering Siggraph 2018"
* by Evgenii Golubev
*
* But, instead of having all the precomputed weights for all three color primaries,
* we precompute a weight profile texture to be able to support per pixel AND per channel radius.
*/
#pragma once
#include "eevee_shader.hh"
#include "eevee_shader_shared.hh"
namespace blender::eevee {
/* -------------------------------------------------------------------- */
/** \name Subsurface
*
* \{ */
class Instance;
struct SubsurfaceModule {
private:
Instance &inst_;
/** Contains samples locations. */
SubsurfaceDataBuf data_;
/** Contains translucence profile for a single color channel. */
Texture transmittance_tx_;
/** Scene diffuse irradiance. Pointer binded at sync time, set at render time. */
GPUTexture *diffuse_light_tx_;
/** Subsurface eval pass. Runs after the deferred pass. */
PassSimple subsurface_ps_ = {"Subsurface"};
public:
SubsurfaceModule(Instance &inst) : inst_(inst)
{
/* Force first update. */
data_.sample_len = -1;
};
~SubsurfaceModule(){};
void end_sync();
void render(View &view, Framebuffer &fb, Texture &diffuse_light_tx);
template<typename T> void bind_resources(draw::detail::PassBase<T> *pass)
{
pass->bind_ubo("sss_buf", data_);
}
GPUTexture **transmittance_tx_get(void)
{
return &transmittance_tx_;
}
private:
void precompute_samples_location();
void precompute_transmittance_profile();
/** Christensen-Burley implementation. */
static float burley_setup(float radius, float albedo);
static float burley_sample(float d, float x_rand);
static float burley_eval(float d, float r);
static float burley_pdf(float d, float r);
};
/** \} */
} // namespace blender::eevee

View File

@ -42,11 +42,11 @@ void main()
* produces a complete diffuse light buffer that will be correctly convolved by the SSSS.
* The refraction pixels will just set the diffuse radiance to 0. */
}
else if (false /* TODO */) {
else if (textureSize(gbuffer_closure_tx, 0).z >= 3) {
vec4 gbuffer_2_packed = texelFetch(gbuffer_closure_tx, ivec3(texel, 2), 0);
diffuse_data.sss_radius = gbuffer_sss_radii_unpack(gbuffer_2_packed.xyz);
diffuse_data.sss_id = gbuffer_object_id_unorm16_unpack(gbuffer_2_packed.w);
thickness = gbuffer_thickness_pack(gbuffer_1_packed.z);
thickness = gbuffer_thickness_unpack(gbuffer_1_packed.z);
}
vec3 diffuse_light = vec3(0.0);
@ -64,51 +64,41 @@ void main()
reflection_light,
shadow);
if (is_last_eval_pass) {
/* Apply color and output lighting to render-passes. */
vec4 color_0_packed = texelFetch(gbuffer_color_tx, ivec3(texel, 0), 0);
vec4 color_1_packed = texelFetch(gbuffer_color_tx, ivec3(texel, 1), 0);
reflection_data.color = gbuffer_color_unpack(color_0_packed);
diffuse_data.color = gbuffer_color_unpack(color_1_packed);
if (is_refraction) {
diffuse_data.color = vec3(0.0);
}
/* Light passes. */
if (rp_buf.diffuse_light_id >= 0) {
imageStore(rp_color_img, ivec3(texel, rp_buf.diffuse_light_id), vec4(diffuse_light, 1.0));
}
if (rp_buf.specular_light_id >= 0) {
imageStore(
rp_color_img, ivec3(texel, rp_buf.specular_light_id), vec4(reflection_light, 1.0));
}
if (rp_buf.shadow_id >= 0) {
imageStore(rp_value_img, ivec3(texel, rp_buf.shadow_id), vec4(shadow));
}
/* TODO: AO. */
diffuse_light *= diffuse_data.color;
reflection_light *= reflection_data.color;
/* Add radiance to combined pass. */
out_radiance = vec4(diffuse_light + reflection_light, 0.0);
out_transmittance = vec4(1.0);
}
else {
/* Store lighting for next deferred pass. */
if (!is_last_eval_pass) {
/* Output diffuse light along with object ID for sub-surface screen space processing. */
vec4 diffuse_radiance;
diffuse_radiance.xyz = diffuse_light;
diffuse_radiance.w = gbuffer_object_id_f16_pack(diffuse_data.sss_id);
imageStore(out_diffuse_light_img, texel, diffuse_radiance);
imageStore(out_specular_light_img, texel, vec4(reflection_light, 0.0));
/* Final radiance will be amended by the last pass.
* This should do nothing as color write should be disabled in this case. */
out_radiance = vec4(0.0);
out_transmittance = vec4(0.0);
}
/* Apply color and output lighting to render-passes. */
vec4 color_0_packed = texelFetch(gbuffer_color_tx, ivec3(texel, 0), 0);
vec4 color_1_packed = texelFetch(gbuffer_color_tx, ivec3(texel, 1), 0);
reflection_data.color = gbuffer_color_unpack(color_0_packed);
diffuse_data.color = gbuffer_color_unpack(color_1_packed);
if (is_refraction) {
diffuse_data.color = vec3(0.0);
}
/* Light passes. */
if (rp_buf.diffuse_light_id >= 0) {
imageStore(rp_color_img, ivec3(texel, rp_buf.diffuse_light_id), vec4(diffuse_light, 1.0));
}
if (rp_buf.specular_light_id >= 0) {
imageStore(rp_color_img, ivec3(texel, rp_buf.specular_light_id), vec4(reflection_light, 1.0));
}
if (rp_buf.shadow_id >= 0) {
imageStore(rp_value_img, ivec3(texel, rp_buf.shadow_id), vec4(shadow));
}
/* TODO: AO. */
diffuse_light *= diffuse_data.color;
reflection_light *= reflection_data.color;
/* Add radiance to combined pass. */
out_radiance = vec4(diffuse_light + reflection_light, 0.0);
out_transmittance = vec4(1.0);
}

View File

@ -10,7 +10,10 @@
vec2 gbuffer_normal_pack(vec3 N)
{
N /= length_manhattan(N);
N.xy = (N.z >= 0.0) ? N.xy : ((1.0 - abs(N.yx)) * sign(N.xy));
vec2 _sign = sign(N.xy);
_sign.x = _sign.x == 0.0 ? 1.0 : _sign.x;
_sign.y = _sign.y == 0.0 ? 1.0 : _sign.y;
N.xy = (N.z >= 0.0) ? N.xy : ((1.0 - abs(N.yx)) * _sign);
N.xy = N.xy * 0.5 + 0.5;
return N.xy;
}
@ -32,7 +35,7 @@ float gbuffer_ior_pack(float ior)
float gbuffer_ior_unpack(float ior_packed)
{
return (ior_packed > 0.5) ? (-1.0 / (ior_packed * 2.0 + 2.0)) : (2.0 * ior_packed);
return (ior_packed > 0.5) ? (0.5 / (1.0 - ior_packed)) : (2.0 * ior_packed);
}
float gbuffer_thickness_pack(float thickness)

View File

@ -47,7 +47,7 @@ void light_eval_ex(ClosureDiffuse diffuse,
#ifdef SSS_TRANSMITTANCE
/* Transmittance evaluation first to use initial visibility without shadow. */
if (diffuse.sss_id != 0u && light.diffuse_power > 0.0) {
float delta = max(thickness, samp.occluder_delta + samp.bias);
float delta = max(thickness, -(samp.occluder_delta + samp.bias));
vec3 intensity = visibility * light.transmit_power *
light_translucent(sss_transmittance_tx,

View File

@ -0,0 +1,146 @@
/**
* Postprocess diffuse radiance output from the diffuse evaluation pass to mimic subsurface
* transmission.
*
* This implementation follows the technique described in the siggraph presentation:
* "Efficient screen space subsurface scattering Siggraph 2018"
* by Evgenii Golubev
*
* But, instead of having all the precomputed weights for all three color primaries,
* we precompute a weight profile texture to be able to support per pixel AND per channel radius.
**/
#pragma BLENDER_REQUIRE(gpu_shader_codegen_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_gbuffer_lib.glsl)
#pragma BLENDER_REQUIRE(common_view_lib.glsl)
#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
vec3 burley_setup(vec3 radius, vec3 albedo)
{
/* Scale albedo because we can have HDR value caused by BSDF sampling. */
vec3 A = albedo / max(1e-6, max_v3(albedo));
/* Diffuse surface transmission, equation (6). */
vec3 s = 1.9 - A + 3.5 * sqr(A - 0.8);
/* Mean free path length adapted to fit ancient Cubic and Gaussian models. */
vec3 l = 0.25 * M_1_PI * radius;
return l / s;
}
vec3 burley_eval(vec3 d, float r)
{
/* Slide 33. */
vec3 exp_r_3_d = exp(-r / (3.0 * d));
vec3 exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d;
/** NOTE:
* - Surface albedo is applied at the end.
* - This is normalized diffuse model, so the equation is multiplied
* by 2*pi, which also matches cdf().
*/
return (exp_r_d + exp_r_3_d) / (4.0 * d);
}
void main(void)
{
vec2 center_uv = uvcoordsvar.xy;
ivec2 texel = ivec2(gl_FragCoord.xy);
float gbuffer_depth = texelFetch(hiz_tx, texel, 0).r;
vec3 vP = get_view_space_from_depth(center_uv, gbuffer_depth);
vec4 color_1_packed = texelFetch(gbuffer_color_tx, ivec3(texel, 1), 0);
vec4 gbuffer_2_packed = texelFetch(gbuffer_closure_tx, ivec3(texel, 2), 0);
ClosureDiffuse diffuse;
diffuse.sss_radius = gbuffer_sss_radii_unpack(gbuffer_2_packed.xyz);
diffuse.sss_id = gbuffer_object_id_unorm16_unpack(gbuffer_2_packed.w);
diffuse.color = gbuffer_color_unpack(color_1_packed);
if (diffuse.sss_id == 0u) {
/* Normal diffuse is already in combined pass. */
/* Refraction also go into this case. */
out_combined = vec4(0.0);
return;
}
float max_radius = max_v3(diffuse.sss_radius);
float homcoord = ProjectionMatrix[2][3] * vP.z + ProjectionMatrix[3][3];
vec2 sample_scale = vec2(ProjectionMatrix[0][0], ProjectionMatrix[1][1]) *
(0.5 * max_radius / homcoord);
float pixel_footprint = sample_scale.x * textureSize(hiz_tx, 0).x;
if (pixel_footprint <= 1.0) {
/* Early out. */
out_combined = vec4(0.0);
return;
}
diffuse.sss_radius = max(vec3(1e-4), diffuse.sss_radius / max_radius) * max_radius;
vec3 d = burley_setup(diffuse.sss_radius, diffuse.color);
/* Do not rotate too much to avoid too much cache misses. */
float golden_angle = M_PI * (3.0 - sqrt(5.0));
float theta = interlieved_gradient_noise(gl_FragCoord.xy, 0, 0.0) * golden_angle;
float cos_theta = cos(theta);
float sin_theta = sqrt(1.0 - sqr(cos_theta));
mat2 rot = mat2(cos_theta, sin_theta, -sin_theta, cos_theta);
mat2 scale = mat2(sample_scale.x, 0.0, 0.0, sample_scale.y);
mat2 sample_space = scale * rot;
vec3 accum_weight = vec3(0.0);
vec3 accum = vec3(0.0);
/* TODO/OPTI(fclem) Make separate sample set for lower radius. */
for (int i = 0; i < sss_buf.sample_len; i++) {
vec2 sample_uv = center_uv + sample_space * sss_buf.samples[i].xy;
float pdf_inv = sss_buf.samples[i].z;
float sample_depth = textureLod(hiz_tx, sample_uv * hiz_buf.uv_scale, 0.0).r;
vec3 sample_vP = get_view_space_from_depth(sample_uv, sample_depth);
vec4 sample_data = texture(radiance_tx, sample_uv);
vec3 sample_radiance = sample_data.rgb;
uint sample_sss_id = uint(sample_data.a);
if (sample_sss_id != diffuse.sss_id) {
continue;
}
/* Discard out of bounds samples. */
if (any(lessThan(sample_uv, vec2(0.0))) || any(greaterThan(sample_uv, vec2(1.0)))) {
continue;
}
/* Slide 34. */
float r = distance(sample_vP, vP);
vec3 weight = burley_eval(d, r) * pdf_inv;
accum += sample_radiance * weight;
accum_weight += weight;
}
/* Normalize the sum (slide 34). */
accum /= accum_weight;
if (rp_buf.diffuse_light_id >= 0) {
imageStore(rp_color_img, ivec3(texel, rp_buf.diffuse_light_id), vec4(accum, 1.0));
}
/* This pass uses additive blending.
* Subtract the surface diffuse radiance so it's not added twice. */
accum -= texelFetch(radiance_tx, texel, 0).rgb;
/* Apply surface color on final radiance. */
accum *= diffuse.color;
/* Debug, detect NaNs. */
if (any(isnan(accum))) {
accum = vec3(1.0, 0.0, 1.0);
}
out_combined = vec4(accum, 0.0);
}

View File

@ -124,7 +124,7 @@ void main()
/* SubSurface Scattering. */
vec4 closure;
closure.xyz = gbuffer_sss_radii_pack(g_diffuse_data.sss_radius);
closure.w = gbuffer_object_id_unorm16_pack(g_diffuse_data.sss_id);
closure.w = gbuffer_object_id_unorm16_pack(g_diffuse_data.sss_id > 0 ? uint(resource_id) : 0);
imageStore(out_gbuff_closure_img, ivec3(out_texel, 2), closure);
}

View File

@ -63,6 +63,8 @@ void main()
g_holdout = saturate(g_holdout);
float thickness = nodetree_thickness();
vec3 diffuse_light = vec3(0.0);
vec3 reflection_light = vec3(0.0);
vec3 refraction_light = vec3(0.0);
@ -76,7 +78,7 @@ void main()
g_data.Ng,
cameraVec(g_data.P),
vP_z,
0.01 /* TODO(fclem) thickness. */,
thickness,
diffuse_light,
reflection_light,
shadow);

View File

@ -39,6 +39,7 @@ GPU_SHADER_CREATE_INFO(eevee_deferred_light)
"eevee_light_data",
"eevee_shadow_data",
"eevee_deferred_base",
"eevee_transmittance_data",
"eevee_hiz_data",
"eevee_render_pass_out",
"draw_view",

View File

@ -6,7 +6,7 @@
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(eevee_hiz_data)
.sampler(15, ImageType::FLOAT_2D, "hiz_tx")
.sampler(HIZ_TEX_SLOT, ImageType::FLOAT_2D, "hiz_tx")
.uniform_buf(5, "HiZData", "hiz_buf");
GPU_SHADER_CREATE_INFO(eevee_hiz_update)

View File

@ -0,0 +1,21 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "eevee_defines.hh"
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(eevee_transmittance_data)
.define("SSS_TRANSMITTANCE")
.sampler(SSS_TRANSMITTANCE_TEX_SLOT, ImageType::FLOAT_1D, "sss_transmittance_tx");
GPU_SHADER_CREATE_INFO(eevee_subsurface_eval)
.do_static_compilation(true)
.additional_info("eevee_shared", "eevee_render_pass_out")
.uniform_buf(1, "SubsurfaceData", "sss_buf")
.sampler(0, ImageType::FLOAT_2D_ARRAY, "gbuffer_closure_tx")
.sampler(1, ImageType::FLOAT_2D_ARRAY, "gbuffer_color_tx")
.sampler(2, ImageType::FLOAT_2D, "radiance_tx")
.early_fragment_test(true)
.fragment_out(0, Type::VEC4, "out_combined")
.fragment_source("eevee_subsurface_eval_frag.glsl")
/* TODO(fclem) Output to diffuse pass without feedback loop. */
.additional_info("draw_fullscreen", "draw_view", "eevee_hiz_data");

View File

@ -652,6 +652,7 @@ set(SRC_SHADER_CREATE_INFOS
../draw/engines/eevee_next/shaders/infos/eevee_material_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_shadow_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_subsurface_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh
../draw/engines/gpencil/shaders/infos/gpencil_info.hh
../draw/engines/gpencil/shaders/infos/gpencil_vfx_info.hh