This repository has been archived on 2023-10-09. You can view files and clone it, but cannot push or open issues or pull requests.
Files
blender-archive/source/blender/gpu/intern/gpu_material.c
Clément Foucault cdbda1c3d8 GPUPass: Refactor gpupass caching system to allow fast gpumaterial creation.
This is part of the work needed to refactor the material parameters update.

Now the gpupass cache is polled before adding the gpumaterial to the
deferred compilation queue.

We store gpupasses in a single linked list grouped based on their hashes.
This is not the most efficient way but it can be improved upon later.
2018-06-07 12:02:42 +02:00

713 lines
19 KiB
C

/*
* ***** BEGIN GPL LICENSE BLOCK *****
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* The Original Code is Copyright (C) 2006 Blender Foundation.
* All rights reserved.
*
* The Original Code is: all of this file.
*
* Contributor(s): Brecht Van Lommel.
*
* ***** END GPL LICENSE BLOCK *****
*/
/** \file blender/gpu/intern/gpu_material.c
* \ingroup gpu
*
* Manages materials, lights and textures.
*/
#include <math.h>
#include <string.h>
#include "MEM_guardedalloc.h"
#include "DNA_lamp_types.h"
#include "DNA_material_types.h"
#include "DNA_object_types.h"
#include "DNA_scene_types.h"
#include "DNA_world_types.h"
#include "BLI_math.h"
#include "BLI_blenlib.h"
#include "BLI_utildefines.h"
#include "BLI_rand.h"
#include "BKE_anim.h"
#include "BKE_colorband.h"
#include "BKE_colortools.h"
#include "BKE_global.h"
#include "BKE_image.h"
#include "BKE_layer.h"
#include "BKE_main.h"
#include "BKE_node.h"
#include "BKE_scene.h"
#include "IMB_imbuf_types.h"
#include "GPU_extensions.h"
#include "GPU_framebuffer.h"
#include "GPU_material.h"
#include "GPU_shader.h"
#include "GPU_texture.h"
#include "GPU_uniformbuffer.h"
#include "DRW_engine.h"
#include "gpu_codegen.h"
#ifdef WITH_OPENSUBDIV
# include "BKE_DerivedMesh.h"
#endif
/* Structs */
struct GPUMaterial {
Scene *scene; /* DEPRECATED was only usefull for lamps */
Material *ma;
/* material for mesh surface, worlds or something else.
* some code generation is done differently depending on the use case */
int type; /* DEPRECATED */
GPUMaterialStatus status;
const void *engine_type; /* attached engine type */
int options; /* to identify shader variations (shadow, probe, world background...) */
/* for creating the material */
ListBase nodes;
GPUNodeLink *outlink;
/* for binding the material */
GPUPass *pass;
ListBase inputs; /* GPUInput */
GPUVertexAttribs attribs;
int builtins;
int alpha, obcolalpha;
int dynproperty;
/* for passing uniforms */
int viewmatloc, invviewmatloc;
int obmatloc, invobmatloc;
int localtoviewmatloc, invlocaltoviewmatloc;
int obcolloc, obautobumpscaleloc;
int cameratexcofacloc;
int partscalarpropsloc;
int partcoloc;
int partvel;
int partangvel;
int objectinfoloc;
bool is_opensubdiv;
/* XXX: Should be in Material. But it depends on the output node
* used and since the output selection is difference for GPUMaterial...
*/
int domain;
/* Used by 2.8 pipeline */
GPUUniformBuffer *ubo; /* UBOs for shader uniforms. */
/* Eevee SSS */
GPUUniformBuffer *sss_profile; /* UBO containing SSS profile. */
GPUTexture *sss_tex_profile; /* Texture containing SSS profile. */
float *sss_radii; /* UBO containing SSS profile. */
int sss_samples;
short int *sss_falloff;
float *sss_sharpness;
bool sss_dirty;
};
enum {
GPU_DOMAIN_SURFACE = (1 << 0),
GPU_DOMAIN_VOLUME = (1 << 1),
GPU_DOMAIN_SSS = (1 << 2)
};
/* Functions */
void GPU_material_free(ListBase *gpumaterial)
{
for (LinkData *link = gpumaterial->first; link; link = link->next) {
GPUMaterial *material = link->data;
/* Cancel / wait any pending lazy compilation. */
DRW_deferred_shader_remove(material);
GPU_pass_free_nodes(&material->nodes);
GPU_inputs_free(&material->inputs);
if (material->pass)
GPU_pass_release(material->pass);
if (material->ubo != NULL) {
GPU_uniformbuffer_free(material->ubo);
}
if (material->sss_tex_profile != NULL) {
GPU_texture_free(material->sss_tex_profile);
}
if (material->sss_profile != NULL) {
GPU_uniformbuffer_free(material->sss_profile);
}
MEM_freeN(material);
}
BLI_freelistN(gpumaterial);
}
GPUBuiltin GPU_get_material_builtins(GPUMaterial *material)
{
return material->builtins;
}
Scene *GPU_material_scene(GPUMaterial *material)
{
return material->scene;
}
GPUMatType GPU_Material_get_type(GPUMaterial *material)
{
return material->type;
}
GPUPass *GPU_material_get_pass(GPUMaterial *material)
{
return material->pass;
}
ListBase *GPU_material_get_inputs(GPUMaterial *material)
{
return &material->inputs;
}
GPUUniformBuffer *GPU_material_get_uniform_buffer(GPUMaterial *material)
{
return material->ubo;
}
/**
* Create dynamic UBO from parameters
* \param ListBase of BLI_genericNodeN(GPUInput)
*/
void GPU_material_create_uniform_buffer(GPUMaterial *material, ListBase *inputs)
{
material->ubo = GPU_uniformbuffer_dynamic_create(inputs, NULL);
}
void GPU_material_uniform_buffer_tag_dirty(ListBase *gpumaterials)
{
for (LinkData *link = gpumaterials->first; link; link = link->next) {
GPUMaterial *material = link->data;
if (material->ubo != NULL) {
GPU_uniformbuffer_tag_dirty(material->ubo);
}
if (material->sss_profile != NULL) {
material->sss_dirty = true;
}
}
}
/* Eevee Subsurface scattering. */
/* Based on Separable SSS. by Jorge Jimenez and Diego Gutierrez */
#define SSS_SAMPLES 65
#define SSS_EXPONENT 2.0f /* Importance sampling exponent */
typedef struct GPUSssKernelData {
float kernel[SSS_SAMPLES][4];
float param[3], max_radius;
int samples;
} GPUSssKernelData;
static void sss_calculate_offsets(GPUSssKernelData *kd, int count, float exponent)
{
float step = 2.0f / (float)(count - 1);
for (int i = 0; i < count; i++) {
float o = ((float)i) * step - 1.0f;
float sign = (o < 0.0f) ? -1.0f : 1.0f;
float ofs = sign * fabsf(powf(o, exponent));
kd->kernel[i][3] = ofs;
}
}
#define GAUSS_TRUNCATE 12.46f
static float gaussian_profile(float r, float radius)
{
const float v = radius * radius * (0.25f * 0.25f);
const float Rm = sqrtf(v * GAUSS_TRUNCATE);
if (r >= Rm) {
return 0.0f;
}
return expf(-r * r / (2.0f * v)) / (2.0f * M_PI * v);
}
#define BURLEY_TRUNCATE 16.0f
#define BURLEY_TRUNCATE_CDF 0.9963790093708328f // cdf(BURLEY_TRUNCATE)
static float burley_profile(float r, float d)
{
float exp_r_3_d = expf(-r / (3.0f * d));
float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d;
return (exp_r_d + exp_r_3_d) / (4.0f * d);
}
static float cubic_profile(float r, float radius, float sharpness)
{
float Rm = radius * (1.0f + sharpness);
if (r >= Rm) {
return 0.0f;
}
/* custom variation with extra sharpness, to match the previous code */
const float y = 1.0f / (1.0f + sharpness);
float Rmy, ry, ryinv;
Rmy = powf(Rm, y);
ry = powf(r, y);
ryinv = (r > 0.0f) ? powf(r, y - 1.0f) : 0.0f;
const float Rmy5 = (Rmy * Rmy) * (Rmy * Rmy) * Rmy;
const float f = Rmy - ry;
const float num = f * (f * f) * (y * ryinv);
return (10.0f * num) / (Rmy5 * M_PI);
}
static float eval_profile(float r, short falloff_type, float sharpness, float param)
{
r = fabsf(r);
if (falloff_type == SHD_SUBSURFACE_BURLEY ||
falloff_type == SHD_SUBSURFACE_RANDOM_WALK)
{
return burley_profile(r, param) / BURLEY_TRUNCATE_CDF;
}
else if (falloff_type == SHD_SUBSURFACE_CUBIC) {
return cubic_profile(r, param, sharpness);
}
else {
return gaussian_profile(r, param);
}
}
/* Resolution for each sample of the precomputed kernel profile */
#define INTEGRAL_RESOLUTION 32
static float eval_integral(float x0, float x1, short falloff_type, float sharpness, float param)
{
const float range = x1 - x0;
const float step = range / INTEGRAL_RESOLUTION;
float integral = 0.0f;
for (int i = 0; i < INTEGRAL_RESOLUTION; ++i) {
float x = x0 + range * ((float)i + 0.5f) / (float)INTEGRAL_RESOLUTION;
float y = eval_profile(x, falloff_type, sharpness, param);
integral += y * step;
}
return integral;
}
#undef INTEGRAL_RESOLUTION
static void compute_sss_kernel(
GPUSssKernelData *kd, float *radii, int sample_ct, int falloff_type, float sharpness)
{
float rad[3];
/* Minimum radius */
rad[0] = MAX2(radii[0], 1e-15f);
rad[1] = MAX2(radii[1], 1e-15f);
rad[2] = MAX2(radii[2], 1e-15f);
/* Christensen-Burley fitting */
float l[3], d[3];
if (falloff_type == SHD_SUBSURFACE_BURLEY ||
falloff_type == SHD_SUBSURFACE_RANDOM_WALK)
{
mul_v3_v3fl(l, rad, 0.25f * M_1_PI);
const float A = 1.0f;
const float s = 1.9f - A + 3.5f * (A - 0.8f) * (A - 0.8f);
/* XXX 0.6f Out of nowhere to match cycles! Empirical! Can be tweak better. */
mul_v3_v3fl(d, l, 0.6f / s);
mul_v3_v3fl(rad, d, BURLEY_TRUNCATE);
kd->max_radius = MAX3(rad[0], rad[1], rad[2]);
copy_v3_v3(kd->param, d);
}
else if (falloff_type == SHD_SUBSURFACE_CUBIC) {
copy_v3_v3(kd->param, rad);
mul_v3_fl(rad, 1.0f + sharpness);
kd->max_radius = MAX3(rad[0], rad[1], rad[2]);
}
else {
kd->max_radius = MAX3(rad[0], rad[1], rad[2]);
copy_v3_v3(kd->param, rad);
}
/* Compute samples locations on the 1d kernel [-1..1] */
sss_calculate_offsets(kd, sample_ct, SSS_EXPONENT);
/* Weights sum for normalization */
float sum[3] = {0.0f, 0.0f, 0.0f};
/* Compute integral of each sample footprint */
for (int i = 0; i < sample_ct; i++) {
float x0, x1;
if (i == 0) {
x0 = kd->kernel[0][3] - fabsf(kd->kernel[0][3] - kd->kernel[1][3]) / 2.0f;
}
else {
x0 = (kd->kernel[i - 1][3] + kd->kernel[i][3]) / 2.0f;
}
if (i == sample_ct - 1) {
x1 = kd->kernel[sample_ct - 1][3] + fabsf(kd->kernel[sample_ct - 2][3] - kd->kernel[sample_ct - 1][3]) / 2.0f;
}
else {
x1 = (kd->kernel[i][3] + kd->kernel[i + 1][3]) / 2.0f;
}
x0 *= kd->max_radius;
x1 *= kd->max_radius;
kd->kernel[i][0] = eval_integral(x0, x1, falloff_type, sharpness, kd->param[0]);
kd->kernel[i][1] = eval_integral(x0, x1, falloff_type, sharpness, kd->param[1]);
kd->kernel[i][2] = eval_integral(x0, x1, falloff_type, sharpness, kd->param[2]);
sum[0] += kd->kernel[i][0];
sum[1] += kd->kernel[i][1];
sum[2] += kd->kernel[i][2];
}
for (int i = 0; i < 3; ++i) {
if (sum[i] > 0.0f) {
/* Normalize */
for (int j = 0; j < sample_ct; j++) {
kd->kernel[j][i] /= sum[i];
}
}
else {
/* Avoid 0 kernel sum. */
kd->kernel[sample_ct / 2][i] = 1.0f;
}
}
/* Put center sample at the start of the array (to sample first) */
float tmpv[4];
copy_v4_v4(tmpv, kd->kernel[sample_ct / 2]);
for (int i = sample_ct / 2; i > 0; i--) {
copy_v4_v4(kd->kernel[i], kd->kernel[i - 1]);
}
copy_v4_v4(kd->kernel[0], tmpv);
kd->samples = sample_ct;
}
#define INTEGRAL_RESOLUTION 512
static void compute_sss_translucence_kernel(
const GPUSssKernelData *kd, int resolution, short falloff_type, float sharpness, float **output)
{
float (*texels)[4];
texels = MEM_callocN(sizeof(float) * 4 * resolution, "compute_sss_translucence_kernel");
*output = (float *)texels;
/* Last texel should be black, hence the - 1. */
for (int i = 0; i < resolution - 1; ++i) {
/* Distance from surface. */
float d = kd->max_radius * ((float)i + 0.00001f) / ((float)resolution);
/* For each distance d we compute the radiance incomming from an hypothetic parallel plane. */
/* Compute radius of the footprint on the hypothetic plane */
float r_fp = sqrtf(kd->max_radius * kd->max_radius - d * d);
float r_step = r_fp / INTEGRAL_RESOLUTION;
float area_accum = 0.0f;
for (float r = 0.0f; r < r_fp; r += r_step) {
/* Compute distance to the "shading" point through the medium. */
/* r_step * 0.5f to put sample between the area borders */
float dist = hypotf(r + r_step * 0.5f, d);
float profile[3];
profile[0] = eval_profile(dist, falloff_type, sharpness, kd->param[0]);
profile[1] = eval_profile(dist, falloff_type, sharpness, kd->param[1]);
profile[2] = eval_profile(dist, falloff_type, sharpness, kd->param[2]);
/* Since the profile and configuration are radially symetrical we
* can just evaluate it once and weight it accordingly */
float r_next = r + r_step;
float disk_area = (M_PI * r_next * r_next) - (M_PI * r * r);
mul_v3_fl(profile, disk_area);
add_v3_v3(texels[i], profile);
area_accum += disk_area;
}
/* Normalize over the disk. */
mul_v3_fl(texels[i], 1.0f / (area_accum));
}
/* Normalize */
for (int j = resolution - 2; j > 0; j--) {
texels[j][0] /= (texels[0][0] > 0.0f) ? texels[0][0] : 1.0f;
texels[j][1] /= (texels[0][1] > 0.0f) ? texels[0][1] : 1.0f;
texels[j][2] /= (texels[0][2] > 0.0f) ? texels[0][2] : 1.0f;
}
/* First texel should be white */
texels[0][0] = (texels[0][0] > 0.0f) ? 1.0f : 0.0f;
texels[0][1] = (texels[0][1] > 0.0f) ? 1.0f : 0.0f;
texels[0][2] = (texels[0][2] > 0.0f) ? 1.0f : 0.0f;
/* dim the last few texels for smoother transition */
mul_v3_fl(texels[resolution - 2], 0.25f);
mul_v3_fl(texels[resolution - 3], 0.5f);
mul_v3_fl(texels[resolution - 4], 0.75f);
}
#undef INTEGRAL_RESOLUTION
void GPU_material_sss_profile_create(GPUMaterial *material, float *radii, short *falloff_type, float *sharpness)
{
material->sss_radii = radii;
material->sss_falloff = falloff_type;
material->sss_sharpness = sharpness;
material->sss_dirty = true;
/* Update / Create UBO */
if (material->sss_profile == NULL) {
material->sss_profile = GPU_uniformbuffer_create(sizeof(GPUSssKernelData), NULL, NULL);
}
}
struct GPUUniformBuffer *GPU_material_sss_profile_get(GPUMaterial *material, int sample_ct, GPUTexture **tex_profile)
{
if (material->sss_radii == NULL)
return NULL;
if (material->sss_dirty || (material->sss_samples != sample_ct)) {
GPUSssKernelData kd;
float sharpness = (material->sss_sharpness != NULL) ? *material->sss_sharpness : 0.0f;
/* XXX Black magic but it seems to fit. Maybe because we integrate -1..1 */
sharpness *= 0.5f;
compute_sss_kernel(&kd, material->sss_radii, sample_ct, *material->sss_falloff, sharpness);
/* Update / Create UBO */
GPU_uniformbuffer_update(material->sss_profile, &kd);
/* Update / Create Tex */
float *translucence_profile;
compute_sss_translucence_kernel(&kd, 64, *material->sss_falloff, sharpness, &translucence_profile);
if (material->sss_tex_profile != NULL) {
GPU_texture_free(material->sss_tex_profile);
}
material->sss_tex_profile = GPU_texture_create_1D(64, GPU_RGBA16F, translucence_profile, NULL);
MEM_freeN(translucence_profile);
material->sss_samples = sample_ct;
material->sss_dirty = false;
}
if (tex_profile != NULL) {
*tex_profile = material->sss_tex_profile;
}
return material->sss_profile;
}
#undef SSS_EXPONENT
#undef SSS_SAMPLES
void GPU_material_vertex_attributes(GPUMaterial *material, GPUVertexAttribs *attribs)
{
*attribs = material->attribs;
}
void GPU_material_output_link(GPUMaterial *material, GPUNodeLink *link)
{
if (!material->outlink)
material->outlink = link;
}
void gpu_material_add_node(GPUMaterial *material, GPUNode *node)
{
BLI_addtail(&material->nodes, node);
}
/* Return true if the material compilation has not yet begin or begin. */
GPUMaterialStatus GPU_material_status(GPUMaterial *mat)
{
return mat->status;
}
/* Code generation */
bool GPU_material_do_color_management(GPUMaterial *mat)
{
if (!BKE_scene_check_color_management_enabled(mat->scene))
return false;
return true;
}
bool GPU_material_use_domain_surface(GPUMaterial *mat)
{
return (mat->domain & GPU_DOMAIN_SURFACE);
}
bool GPU_material_use_domain_volume(GPUMaterial *mat)
{
return (mat->domain & GPU_DOMAIN_VOLUME);
}
GPUMaterial *GPU_material_from_nodetree_find(
ListBase *gpumaterials, const void *engine_type, int options)
{
for (LinkData *link = gpumaterials->first; link; link = link->next) {
GPUMaterial *current_material = (GPUMaterial *)link->data;
if (current_material->engine_type == engine_type &&
current_material->options == options)
{
return current_material;
}
}
return NULL;
}
/**
* \note Caller must use #GPU_material_from_nodetree_find to re-use existing materials,
* This is enforced since constructing other arguments to this function may be expensive
* so only do this when they are needed.
*/
GPUMaterial *GPU_material_from_nodetree(
Scene *scene, struct bNodeTree *ntree, ListBase *gpumaterials, const void *engine_type, int options,
const char *vert_code, const char *geom_code, const char *frag_lib, const char *defines)
{
LinkData *link;
bool has_volume_output, has_surface_output;
/* Caller must re-use materials. */
BLI_assert(GPU_material_from_nodetree_find(gpumaterials, engine_type, options) == NULL);
/* allocate material */
GPUMaterial *mat = MEM_callocN(sizeof(GPUMaterial), "GPUMaterial");;
mat->scene = scene;
mat->engine_type = engine_type;
mat->options = options;
ntreeGPUMaterialNodes(ntree, mat, NODE_NEW_SHADING | NODE_NEWER_SHADING);
ntreeGPUMaterialDomain(ntree, &has_surface_output, &has_volume_output);
if (has_surface_output) {
mat->domain |= GPU_DOMAIN_SURFACE;
}
if (has_volume_output) {
mat->domain |= GPU_DOMAIN_VOLUME;
}
if (mat->outlink) {
/* Prune the unused nodes and extract attribs before compiling so the
* generated VBOs are ready to accept the future shader. */
GPU_nodes_prune(&mat->nodes, mat->outlink);
GPU_nodes_get_vertex_attributes(&mat->nodes, &mat->attribs);
/* Create source code and search pass cache for an already compiled version. */
mat->pass = GPU_generate_pass_new(mat,
mat->outlink,
&mat->attribs,
&mat->nodes,
vert_code,
geom_code,
frag_lib,
defines);
if (mat->pass == NULL) {
/* We had a cache hit and the shader has already failed to compile. */
mat->status = GPU_MAT_FAILED;
}
else {
GPUShader *sh = GPU_pass_shader_get(mat->pass);
if (sh != NULL) {
/* We had a cache hit and the shader is already compiled. */
mat->status = GPU_MAT_SUCCESS;
GPU_nodes_extract_dynamic_inputs(sh, &mat->inputs, &mat->nodes);
}
else {
mat->status = GPU_MAT_QUEUED;
}
}
}
else {
mat->status = GPU_MAT_FAILED;
}
/* note that even if building the shader fails in some way, we still keep
* it to avoid trying to compile again and again, and simply do not use
* the actual shader on drawing */
link = MEM_callocN(sizeof(LinkData), "GPUMaterialLink");
link->data = mat;
BLI_addtail(gpumaterials, link);
return mat;
}
void GPU_material_compile(GPUMaterial *mat)
{
/* Only run once! */
BLI_assert(mat->status == GPU_MAT_QUEUED);
BLI_assert(mat->pass);
/* NOTE: The shader may have already been compiled here since we are
* sharing GPUShader across GPUMaterials. In this case it's a no-op. */
GPU_pass_compile(mat->pass);
GPUShader *sh = GPU_pass_shader_get(mat->pass);
if (sh != NULL) {
mat->status = GPU_MAT_SUCCESS;
GPU_nodes_extract_dynamic_inputs(sh, &mat->inputs, &mat->nodes);
}
else {
mat->status = GPU_MAT_FAILED;
GPU_pass_free_nodes(&mat->nodes);
GPU_pass_release(mat->pass);
mat->pass = NULL;
}
}
void GPU_materials_free(void)
{
Material *ma;
World *wo;
extern Material defmaterial;
for (ma = G.main->mat.first; ma; ma = ma->id.next)
GPU_material_free(&ma->gpumaterial);
for (wo = G.main->world.first; wo; wo = wo->id.next)
GPU_material_free(&wo->gpumaterial);
GPU_material_free(&defmaterial.gpumaterial);
}