GPU: Revert part of D16017 that was accidentally commited
This code slipped through the final review step surely caused by a faulty merge. Fixes T101372 Regression: World shader setup crashes Blender in rendered view Regression introduced by rB697b447c2069bbbbaa9929aab0ea1f66ef8bf4d0
This commit is contained in:
@@ -201,7 +201,6 @@ void DRW_gpu_render_context_enable(void *re_gpu_context);
|
||||
void DRW_gpu_render_context_disable(void *re_gpu_context);
|
||||
|
||||
void DRW_deferred_shader_remove(struct GPUMaterial *mat);
|
||||
void DRW_deferred_shader_optimize_remove(struct GPUMaterial *mat);
|
||||
|
||||
/**
|
||||
* Get DrawData from the given ID-block. In order for this to work, we assume that
|
||||
|
||||
@@ -471,8 +471,6 @@ GPUMaterial *ShaderModule::material_shader_get(const char *name,
|
||||
this);
|
||||
GPU_material_status_set(gpumat, GPU_MAT_QUEUED);
|
||||
GPU_material_compile(gpumat);
|
||||
/* Queue deferred material optimization. */
|
||||
DRW_shader_queue_optimize_material(gpumat);
|
||||
return gpumat;
|
||||
}
|
||||
|
||||
|
||||
@@ -251,7 +251,6 @@ struct GPUMaterial *DRW_shader_from_material(struct Material *ma,
|
||||
bool deferred,
|
||||
GPUCodegenCallbackFn callback,
|
||||
void *thunk);
|
||||
void DRW_shader_queue_optimize_material(struct GPUMaterial *mat);
|
||||
void DRW_shader_free(struct GPUShader *shader);
|
||||
#define DRW_SHADER_FREE_SAFE(shader) \
|
||||
do { \
|
||||
|
||||
@@ -55,9 +55,6 @@ typedef struct DRWShaderCompiler {
|
||||
ListBase queue; /* GPUMaterial */
|
||||
SpinLock list_lock;
|
||||
|
||||
/** Optimization queue. */
|
||||
ListBase optimize_queue; /* GPUMaterial */
|
||||
|
||||
void *gl_context;
|
||||
GPUContext *gpu_context;
|
||||
bool own_context;
|
||||
@@ -113,29 +110,8 @@ static void drw_deferred_shader_compilation_exec(
|
||||
MEM_freeN(link);
|
||||
}
|
||||
else {
|
||||
/* Check for Material Optimization job once there are no more
|
||||
* shaders to compile. */
|
||||
BLI_spin_lock(&comp->list_lock);
|
||||
/* Pop tail because it will be less likely to lock the main thread
|
||||
* if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */
|
||||
link = (LinkData *)BLI_poptail(&comp->optimize_queue);
|
||||
GPUMaterial *optimize_mat = link ? (GPUMaterial *)link->data : NULL;
|
||||
if (optimize_mat) {
|
||||
/* Avoid another thread freeing the material during optimization. */
|
||||
GPU_material_acquire(optimize_mat);
|
||||
}
|
||||
BLI_spin_unlock(&comp->list_lock);
|
||||
|
||||
if (optimize_mat) {
|
||||
/* Compile optimized material shader. */
|
||||
GPU_material_optimize(optimize_mat);
|
||||
GPU_material_release(optimize_mat);
|
||||
MEM_freeN(link);
|
||||
}
|
||||
else {
|
||||
/* No more materials to optimize, or shaders to compile. */
|
||||
break;
|
||||
}
|
||||
/* No more materials to optimize, or shaders to compile. */
|
||||
break;
|
||||
}
|
||||
|
||||
if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
|
||||
@@ -157,7 +133,6 @@ static void drw_deferred_shader_compilation_free(void *custom_data)
|
||||
|
||||
BLI_spin_lock(&comp->list_lock);
|
||||
BLI_freelistN(&comp->queue);
|
||||
BLI_freelistN(&comp->optimize_queue);
|
||||
BLI_spin_unlock(&comp->list_lock);
|
||||
|
||||
if (comp->own_context) {
|
||||
@@ -173,13 +148,34 @@ static void drw_deferred_shader_compilation_free(void *custom_data)
|
||||
MEM_freeN(comp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Append either shader compilation or optimization job to deferred queue and
|
||||
* ensure shader compilation worker is active.
|
||||
* We keep two separate queue's to ensure core compilations always complete before optimization.
|
||||
*/
|
||||
static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job)
|
||||
static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
|
||||
{
|
||||
if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) {
|
||||
return;
|
||||
}
|
||||
/* Do not defer the compilation if we are rendering for image.
|
||||
* deferred rendering is only possible when `evil_C` is available */
|
||||
if (DST.draw_ctx.evil_C == NULL || DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) {
|
||||
deferred = false;
|
||||
}
|
||||
|
||||
if (!deferred) {
|
||||
DRW_deferred_shader_remove(mat);
|
||||
/* Shaders could already be compiling. Have to wait for compilation to finish. */
|
||||
while (GPU_material_status(mat) == GPU_MAT_QUEUED) {
|
||||
PIL_sleep_ms(20);
|
||||
}
|
||||
if (GPU_material_status(mat) == GPU_MAT_CREATED) {
|
||||
GPU_material_compile(mat);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Don't add material to the queue twice. */
|
||||
if (GPU_material_status(mat) == GPU_MAT_QUEUED) {
|
||||
return;
|
||||
}
|
||||
|
||||
const bool use_main_context = GPU_use_main_context_workaround();
|
||||
const bool job_own_context = !use_main_context;
|
||||
|
||||
@@ -200,7 +196,6 @@ static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job
|
||||
if (old_comp) {
|
||||
BLI_spin_lock(&old_comp->list_lock);
|
||||
BLI_movelisttolist(&comp->queue, &old_comp->queue);
|
||||
BLI_movelisttolist(&comp->optimize_queue, &old_comp->optimize_queue);
|
||||
BLI_spin_unlock(&old_comp->list_lock);
|
||||
/* Do not recreate context, just pass ownership. */
|
||||
if (old_comp->gl_context) {
|
||||
@@ -211,18 +206,9 @@ static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job
|
||||
}
|
||||
}
|
||||
|
||||
/* Add to either compilation or optimization queue. */
|
||||
if (is_optimization_job) {
|
||||
BLI_assert(GPU_material_optimization_status(mat) != GPU_MAT_OPTIMIZATION_QUEUED);
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_QUEUED);
|
||||
LinkData *node = BLI_genericNodeN(mat);
|
||||
BLI_addtail(&comp->optimize_queue, node);
|
||||
}
|
||||
else {
|
||||
GPU_material_status_set(mat, GPU_MAT_QUEUED);
|
||||
LinkData *node = BLI_genericNodeN(mat);
|
||||
BLI_addtail(&comp->queue, node);
|
||||
}
|
||||
GPU_material_status_set(mat, GPU_MAT_QUEUED);
|
||||
LinkData *node = BLI_genericNodeN(mat);
|
||||
BLI_addtail(&comp->queue, node);
|
||||
|
||||
/* Create only one context. */
|
||||
if (comp->gl_context == NULL) {
|
||||
@@ -251,39 +237,6 @@ static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job
|
||||
WM_jobs_start(wm, wm_job);
|
||||
}
|
||||
|
||||
static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
|
||||
{
|
||||
if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Do not defer the compilation if we are rendering for image.
|
||||
* deferred rendering is only possible when `evil_C` is available */
|
||||
if (DST.draw_ctx.evil_C == NULL || DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) {
|
||||
deferred = false;
|
||||
}
|
||||
|
||||
if (!deferred) {
|
||||
DRW_deferred_shader_remove(mat);
|
||||
/* Shaders could already be compiling. Have to wait for compilation to finish. */
|
||||
while (GPU_material_status(mat) == GPU_MAT_QUEUED) {
|
||||
PIL_sleep_ms(20);
|
||||
}
|
||||
if (GPU_material_status(mat) == GPU_MAT_CREATED) {
|
||||
GPU_material_compile(mat);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Don't add material to the queue twice. */
|
||||
if (GPU_material_status(mat) == GPU_MAT_QUEUED) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Add deferred shader compilation to queue. */
|
||||
drw_deferred_queue_append(mat, false);
|
||||
}
|
||||
|
||||
void DRW_deferred_shader_remove(GPUMaterial *mat)
|
||||
{
|
||||
LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) {
|
||||
@@ -299,42 +252,9 @@ void DRW_deferred_shader_remove(GPUMaterial *mat)
|
||||
BLI_remlink(&comp->queue, link);
|
||||
GPU_material_status_set(link->data, GPU_MAT_CREATED);
|
||||
}
|
||||
BLI_spin_unlock(&comp->list_lock);
|
||||
|
||||
MEM_SAFE_FREE(link);
|
||||
|
||||
/* Search for optimization job in queue. */
|
||||
LinkData *opti_link = (LinkData *)BLI_findptr(
|
||||
&comp->optimize_queue, mat, offsetof(LinkData, data));
|
||||
if (opti_link) {
|
||||
BLI_remlink(&comp->optimize_queue, opti_link);
|
||||
GPU_material_optimization_status_set(opti_link->data, GPU_MAT_OPTIMIZATION_READY);
|
||||
}
|
||||
BLI_spin_unlock(&comp->list_lock);
|
||||
|
||||
MEM_SAFE_FREE(opti_link);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DRW_deferred_shader_optimize_remove(GPUMaterial *mat)
|
||||
{
|
||||
LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) {
|
||||
LISTBASE_FOREACH (wmWindow *, win, &wm->windows) {
|
||||
DRWShaderCompiler *comp = (DRWShaderCompiler *)WM_jobs_customdata_from_type(
|
||||
wm, wm, WM_JOB_TYPE_SHADER_COMPILATION);
|
||||
if (comp != NULL) {
|
||||
BLI_spin_lock(&comp->list_lock);
|
||||
/* Search for optimization job in queue. */
|
||||
LinkData *opti_link = (LinkData *)BLI_findptr(
|
||||
&comp->optimize_queue, mat, offsetof(LinkData, data));
|
||||
if (opti_link) {
|
||||
BLI_remlink(&comp->optimize_queue, opti_link);
|
||||
GPU_material_optimization_status_set(opti_link->data, GPU_MAT_OPTIMIZATION_READY);
|
||||
}
|
||||
BLI_spin_unlock(&comp->list_lock);
|
||||
|
||||
MEM_SAFE_FREE(opti_link);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -468,7 +388,6 @@ GPUMaterial *DRW_shader_from_world(World *wo,
|
||||
}
|
||||
|
||||
drw_deferred_shader_add(mat, deferred);
|
||||
DRW_shader_queue_optimize_material(mat);
|
||||
return mat;
|
||||
}
|
||||
|
||||
@@ -498,52 +417,9 @@ GPUMaterial *DRW_shader_from_material(Material *ma,
|
||||
}
|
||||
|
||||
drw_deferred_shader_add(mat, deferred);
|
||||
DRW_shader_queue_optimize_material(mat);
|
||||
return mat;
|
||||
}
|
||||
|
||||
void DRW_shader_queue_optimize_material(GPUMaterial *mat)
|
||||
{
|
||||
/* Do not perform deferred optimization if performing render.
|
||||
* De-queue any queued optimization jobs. */
|
||||
if (DRW_state_is_image_render()) {
|
||||
if (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) {
|
||||
/* Remove from pending optimization job queue. */
|
||||
DRW_deferred_shader_optimize_remove(mat);
|
||||
/* If optimization job had already started, wait for it to complete. */
|
||||
while (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) {
|
||||
PIL_sleep_ms(20);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* We do not need to perform optimization on the material if it is already compiled or in the
|
||||
* optimization queue. If optimization is not required, the status will be flagged as
|
||||
* `GPU_MAT_OPTIMIZATION_SKIP`.
|
||||
* We can also skip cases which have already been queued up. */
|
||||
if (ELEM(GPU_material_optimization_status(mat),
|
||||
GPU_MAT_OPTIMIZATION_SKIP,
|
||||
GPU_MAT_OPTIMIZATION_SUCCESS,
|
||||
GPU_MAT_OPTIMIZATION_QUEUED)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Only queue optimization once the original shader has been successfully compiled. */
|
||||
if (GPU_material_status(mat) != GPU_MAT_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Defer optimization until sufficient time has passed beyond creation. This avoids excessive
|
||||
* recompilation for shaders which are being actively modified. */
|
||||
if (!GPU_material_optimization_ready(mat)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Add deferred shader compilation to queue. */
|
||||
drw_deferred_queue_append(mat, true);
|
||||
}
|
||||
|
||||
void DRW_shader_free(GPUShader *shader)
|
||||
{
|
||||
GPU_shader_free(shader);
|
||||
|
||||
@@ -256,14 +256,6 @@ struct GPUPass *GPU_material_get_pass(GPUMaterial *material);
|
||||
struct GPUShader *GPU_material_get_shader(GPUMaterial *material);
|
||||
const char *GPU_material_get_name(GPUMaterial *material);
|
||||
|
||||
/**
|
||||
* Material Optimization.
|
||||
* \note Compiles optimal version of shader graph, populating mat->optimized_pass.
|
||||
* This operation should always be deferred until existing compilations have completed.
|
||||
* Default un-optimized materials will still exist for interactive material editing performance.
|
||||
*/
|
||||
void GPU_material_optimize(GPUMaterial *mat);
|
||||
|
||||
/**
|
||||
* Return can be NULL if it's a world material.
|
||||
*/
|
||||
|
||||
@@ -95,9 +95,6 @@ struct GPUPass {
|
||||
uint32_t hash;
|
||||
/** Did we already tried to compile the attached GPUShader. */
|
||||
bool compiled;
|
||||
/** Hint that an optimized variant of this pass should be created based on a complexity heuristic
|
||||
* during pass code generation. */
|
||||
bool should_optimize;
|
||||
};
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
@@ -204,8 +201,7 @@ static std::ostream &operator<<(std::ostream &stream, const GPUOutput *output)
|
||||
}
|
||||
|
||||
/* Trick type to change overload and keep a somewhat nice syntax. */
|
||||
struct GPUConstant : public GPUInput {
|
||||
};
|
||||
struct GPUConstant : public GPUInput {};
|
||||
|
||||
/* Print data constructor (i.e: vec2(1.0f, 1.0f)). */
|
||||
static std::ostream &operator<<(std::ostream &stream, const GPUConstant *input)
|
||||
@@ -245,11 +241,6 @@ class GPUCodegen {
|
||||
ListBase ubo_inputs_ = {nullptr, nullptr};
|
||||
GPUInput *cryptomatte_input_ = nullptr;
|
||||
|
||||
/** Cache parameters for complexity heuristic. */
|
||||
uint nodes_total_ = 0;
|
||||
uint textures_total_ = 0;
|
||||
uint uniforms_total_ = 0;
|
||||
|
||||
public:
|
||||
GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_) : mat(*mat_), graph(*graph_)
|
||||
{
|
||||
@@ -290,14 +281,6 @@ class GPUCodegen {
|
||||
return hash_;
|
||||
}
|
||||
|
||||
/* Heuristic determined during pass codegen for whether a
|
||||
* more optimal variant of this material should be compiled. */
|
||||
bool should_optimize_heuristic() const
|
||||
{
|
||||
bool do_optimize = (nodes_total_ >= 100 || textures_total_ >= 4 || uniforms_total_ >= 64);
|
||||
return do_optimize;
|
||||
}
|
||||
|
||||
private:
|
||||
void set_unique_ids();
|
||||
|
||||
@@ -419,9 +402,6 @@ void GPUCodegen::generate_resources()
|
||||
}
|
||||
}
|
||||
|
||||
/* Increment heuristic. */
|
||||
textures_total_ = slot;
|
||||
|
||||
if (!BLI_listbase_is_empty(&ubo_inputs_)) {
|
||||
/* NOTE: generate_uniform_buffer() should have sorted the inputs before this. */
|
||||
ss << "struct NodeTree {\n";
|
||||
@@ -459,16 +439,11 @@ void GPUCodegen::generate_library()
|
||||
GPUCodegenCreateInfo &info = *create_info;
|
||||
|
||||
void *value;
|
||||
/* Iterate over libraries. We need to keep this struct intact in case
|
||||
* it is required for the optimization an pass. */
|
||||
GHashIterator *ihash = BLI_ghashIterator_new((GHash *)graph.used_libraries);
|
||||
while (!BLI_ghashIterator_done(ihash)) {
|
||||
value = BLI_ghashIterator_getKey(ihash);
|
||||
GSetIterState pop_state = {};
|
||||
while (BLI_gset_pop(graph.used_libraries, &pop_state, &value)) {
|
||||
auto deps = gpu_shader_dependency_get_resolved_source((const char *)value);
|
||||
info.dependencies_generated.extend_non_duplicates(deps);
|
||||
BLI_ghashIterator_step(ihash);
|
||||
}
|
||||
BLI_ghashIterator_free(ihash);
|
||||
}
|
||||
|
||||
void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
|
||||
@@ -536,9 +511,6 @@ void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
|
||||
}
|
||||
}
|
||||
eval_ss << ");\n\n";
|
||||
|
||||
/* Increment heuristic. */
|
||||
nodes_total_++;
|
||||
}
|
||||
|
||||
char *GPUCodegen::graph_serialize(eGPUNodeTag tree_tag, GPUNodeLink *output_link)
|
||||
@@ -602,7 +574,6 @@ void GPUCodegen::generate_uniform_buffer()
|
||||
if (input->source == GPU_SOURCE_UNIFORM && !input->link) {
|
||||
/* We handle the UBO uniforms separately. */
|
||||
BLI_addtail(&ubo_inputs_, BLI_genericNodeN(input));
|
||||
uniforms_total_++;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -630,7 +601,6 @@ void GPUCodegen::generate_graphs()
|
||||
{
|
||||
set_unique_ids();
|
||||
|
||||
/* Serialize graph. */
|
||||
output.surface = graph_serialize(GPU_NODE_TAG_SURFACE | GPU_NODE_TAG_AOV, graph.outlink_surface);
|
||||
output.volume = graph_serialize(GPU_NODE_TAG_VOLUME, graph.outlink_volume);
|
||||
output.displacement = graph_serialize(GPU_NODE_TAG_DISPLACEMENT, graph.outlink_displacement);
|
||||
@@ -666,17 +636,10 @@ void GPUCodegen::generate_graphs()
|
||||
GPUPass *GPU_generate_pass(GPUMaterial *material,
|
||||
GPUNodeGraph *graph,
|
||||
GPUCodegenCallbackFn finalize_source_cb,
|
||||
void *thunk,
|
||||
bool optimize_graph)
|
||||
void *thunk)
|
||||
{
|
||||
gpu_node_graph_prune_unused(graph);
|
||||
|
||||
/* If Optimize flag is passed in, we are generating an optimized
|
||||
* variant of the GPUMaterial's GPUPass. */
|
||||
if (optimize_graph) {
|
||||
gpu_node_graph_optimize(graph);
|
||||
}
|
||||
|
||||
/* Extract attributes before compiling so the generated VBOs are ready to accept the future
|
||||
* shader. */
|
||||
gpu_node_graph_finalize_uniform_attrs(graph);
|
||||
@@ -684,33 +647,23 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
|
||||
GPUCodegen codegen(material, graph);
|
||||
codegen.generate_graphs();
|
||||
codegen.generate_cryptomatte();
|
||||
codegen.generate_uniform_buffer();
|
||||
|
||||
GPUPass *pass_hash = nullptr;
|
||||
/* Cache lookup: Reuse shaders already compiled. */
|
||||
GPUPass *pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
|
||||
|
||||
if (!optimize_graph) {
|
||||
/* The optimized version of the shader should not re-generate a UBO.
|
||||
* The UBO will not be used for this variant. */
|
||||
codegen.generate_uniform_buffer();
|
||||
|
||||
/** Cache lookup: Reuse shaders already compiled.
|
||||
* NOTE: We only perform cache look-up for non-optimized shader
|
||||
* graphs, as baked constant data among other optimizations will generate too many
|
||||
* shader source permutations, with minimal re-usability. */
|
||||
pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
|
||||
|
||||
/* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
|
||||
* there is no way to have a collision currently. Some advocated to only use a bigger hash. */
|
||||
if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
|
||||
if (!gpu_pass_is_valid(pass_hash)) {
|
||||
/* Shader has already been created but failed to compile. */
|
||||
return nullptr;
|
||||
}
|
||||
/* No collision, just return the pass. */
|
||||
BLI_spin_lock(&pass_cache_spin);
|
||||
pass_hash->refcount += 1;
|
||||
BLI_spin_unlock(&pass_cache_spin);
|
||||
return pass_hash;
|
||||
/* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
|
||||
* there is no way to have a collision currently. Some advocated to only use a bigger hash. */
|
||||
if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
|
||||
if (!gpu_pass_is_valid(pass_hash)) {
|
||||
/* Shader has already been created but failed to compile. */
|
||||
return nullptr;
|
||||
}
|
||||
/* No collision, just return the pass. */
|
||||
BLI_spin_lock(&pass_cache_spin);
|
||||
pass_hash->refcount += 1;
|
||||
BLI_spin_unlock(&pass_cache_spin);
|
||||
return pass_hash;
|
||||
}
|
||||
|
||||
/* Either the shader is not compiled or there is a hash collision...
|
||||
@@ -748,31 +701,14 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
|
||||
pass->create_info = codegen.create_info;
|
||||
pass->hash = codegen.hash_get();
|
||||
pass->compiled = false;
|
||||
/* Only flag pass optimization hint if this is the first generated pass for a material.
|
||||
* Optimized passes cannot be optimized further, even if the heuristic is still not
|
||||
* favorable. */
|
||||
pass->should_optimize = (!optimize_graph) && codegen.should_optimize_heuristic();
|
||||
|
||||
codegen.create_info = nullptr;
|
||||
|
||||
/* Only insert non-optimized graphs into cache.
|
||||
* Optimized graphs will continuously be recompiled with new unique source during material
|
||||
* editing, and thus causing the cache to fill up quickly with materials offering minimal
|
||||
* re-use. */
|
||||
if (!optimize_graph) {
|
||||
gpu_pass_cache_insert_after(pass_hash, pass);
|
||||
}
|
||||
gpu_pass_cache_insert_after(pass_hash, pass);
|
||||
}
|
||||
return pass;
|
||||
}
|
||||
|
||||
bool GPU_pass_should_optimize(GPUPass *pass)
|
||||
{
|
||||
/* Returns optimization heuristic prepared during
|
||||
* initial codegen. */
|
||||
return pass->should_optimize;
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
|
||||
@@ -25,12 +25,10 @@ typedef struct GPUPass GPUPass;
|
||||
GPUPass *GPU_generate_pass(GPUMaterial *material,
|
||||
struct GPUNodeGraph *graph,
|
||||
GPUCodegenCallbackFn finalize_source_cb,
|
||||
void *thunk,
|
||||
bool optimize_graph);
|
||||
void *thunk);
|
||||
GPUShader *GPU_pass_shader_get(GPUPass *pass);
|
||||
bool GPU_pass_compile(GPUPass *pass, const char *shname);
|
||||
void GPU_pass_release(GPUPass *pass);
|
||||
bool GPU_pass_should_optimize(GPUPass *pass);
|
||||
|
||||
/* Module */
|
||||
|
||||
|
||||
@@ -34,8 +34,6 @@
|
||||
|
||||
#include "DRW_engine.h"
|
||||
|
||||
#include "PIL_time.h"
|
||||
|
||||
#include "gpu_codegen.h"
|
||||
#include "gpu_node_graph.h"
|
||||
|
||||
@@ -45,17 +43,6 @@
|
||||
#define MAX_COLOR_BAND 128
|
||||
#define MAX_GPU_SKIES 8
|
||||
|
||||
/** Whether the optimized variant of the #GPUPass should be created asynchronously.
|
||||
* Usage of this depends on whether there are possible threading challenges of doing so.
|
||||
* Currently, the overhead of GPU_generate_pass is relatively small in comparison to shader
|
||||
* compilation, though this option exists in case any potential scenarios for material graph
|
||||
* optimization cause a slow down on the main thread.
|
||||
*
|
||||
* NOTE: The actual shader program for the optimized pass will always be compiled asynchronously,
|
||||
* this flag controls whether shader node graph source serialization happens on the compilation
|
||||
* worker thread. */
|
||||
#define ASYNC_OPTIMIZED_PASS_CREATION 0
|
||||
|
||||
typedef struct GPUColorBandBuilder {
|
||||
float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4];
|
||||
int current_layer;
|
||||
@@ -70,27 +57,6 @@ struct GPUMaterial {
|
||||
/* Contains #GPUShader and source code for deferred compilation.
|
||||
* Can be shared between similar material (i.e: sharing same node-tree topology). */
|
||||
GPUPass *pass;
|
||||
/* Optimized GPUPass, situationally compiled after initial pass for optimal realtime performance.
|
||||
* This shader variant bakes dynamic uniform data as constant. This variant will not use
|
||||
* the ubo, and instead bake constants directly into the shader source. */
|
||||
GPUPass *optimized_pass;
|
||||
/* Optimization status.
|
||||
* We also use this status to determine whether this material should be considered for
|
||||
* optimization. Only sufficiently complex shaders benefit from constant-folding optimizations.
|
||||
* `GPU_MAT_OPTIMIZATION_READY` -> shader should be optimized and is ready for optimization.
|
||||
* `GPU_MAT_OPTIMIZATION_SKIP` -> Shader should not be optimized as it would not benefit
|
||||
* performance to do so, based on the heuristic.
|
||||
*/
|
||||
eGPUMaterialOptimizationStatus optimization_status;
|
||||
double creation_time;
|
||||
#if ASYNC_OPTIMIZED_PASS_CREATION == 1
|
||||
struct DeferredOptimizePass {
|
||||
GPUCodegenCallbackFn callback;
|
||||
void *thunk;
|
||||
} DeferredOptimizePass;
|
||||
struct DeferredOptimizePass optimize_pass_info;
|
||||
#endif
|
||||
|
||||
/** UBOs for this material parameters. */
|
||||
GPUUniformBuf *ubo;
|
||||
/** Compilation status. Do not use if shader is not GPU_MAT_SUCCESS. */
|
||||
@@ -243,9 +209,6 @@ void GPU_material_free_single(GPUMaterial *material)
|
||||
|
||||
gpu_node_graph_free(&material->graph);
|
||||
|
||||
if (material->optimized_pass != NULL) {
|
||||
GPU_pass_release(material->optimized_pass);
|
||||
}
|
||||
if (material->pass != NULL) {
|
||||
GPU_pass_release(material->pass);
|
||||
}
|
||||
@@ -284,15 +247,12 @@ Scene *GPU_material_scene(GPUMaterial *material)
|
||||
|
||||
GPUPass *GPU_material_get_pass(GPUMaterial *material)
|
||||
{
|
||||
return (material->optimized_pass) ? material->optimized_pass : material->pass;
|
||||
return material->pass;
|
||||
}
|
||||
|
||||
GPUShader *GPU_material_get_shader(GPUMaterial *material)
|
||||
{
|
||||
/* First attempt to select optimized shader. If not available, fetch original. */
|
||||
GPUShader *shader = (material->optimized_pass) ? GPU_pass_shader_get(material->optimized_pass) :
|
||||
NULL;
|
||||
return (shader) ? shader : ((material->pass) ? GPU_pass_shader_get(material->pass) : NULL);
|
||||
return material->pass ? GPU_pass_shader_get(material->pass) : NULL;
|
||||
}
|
||||
|
||||
const char *GPU_material_get_name(GPUMaterial *material)
|
||||
@@ -705,29 +665,6 @@ void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status)
|
||||
mat->status = status;
|
||||
}
|
||||
|
||||
eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat)
|
||||
{
|
||||
return mat->optimization_status;
|
||||
}
|
||||
|
||||
void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status)
|
||||
{
|
||||
mat->optimization_status = status;
|
||||
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
|
||||
/* Reset creation timer to delay optimization pass. */
|
||||
mat->creation_time = PIL_check_seconds_timer();
|
||||
}
|
||||
}
|
||||
|
||||
bool GPU_material_optimization_ready(GPUMaterial *mat)
|
||||
{
|
||||
/* Timer threshold before optimizations will be queued.
|
||||
* When materials are frequently being modified, optimization
|
||||
* can incur CPU overhead from excessive compilation. */
|
||||
const double optimization_time_threshold_s = 5.0;
|
||||
return ((PIL_check_seconds_timer() - mat->creation_time) >= optimization_time_threshold_s);
|
||||
}
|
||||
|
||||
/* Code generation */
|
||||
|
||||
bool GPU_material_has_surface_output(GPUMaterial *mat)
|
||||
@@ -793,7 +730,6 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
|
||||
mat->uuid = shader_uuid;
|
||||
mat->flag = GPU_MATFLAG_UPDATED;
|
||||
mat->status = GPU_MAT_CREATED;
|
||||
mat->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
|
||||
mat->is_volume_shader = is_volume_shader;
|
||||
mat->graph.used_libraries = BLI_gset_new(
|
||||
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
|
||||
@@ -812,7 +748,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
|
||||
|
||||
{
|
||||
/* Create source code and search pass cache for an already compiled version. */
|
||||
mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, false);
|
||||
mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk);
|
||||
|
||||
if (mat->pass == NULL) {
|
||||
/* We had a cache hit and the shader has already failed to compile. */
|
||||
@@ -820,44 +756,11 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
|
||||
gpu_node_graph_free(&mat->graph);
|
||||
}
|
||||
else {
|
||||
/* Determine whether we should generate an optimized variant of the graph.
|
||||
* Heuristic is based on complexity of default material pass and shader node graph. */
|
||||
if (GPU_pass_should_optimize(mat->pass)) {
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
|
||||
}
|
||||
|
||||
GPUShader *sh = GPU_pass_shader_get(mat->pass);
|
||||
if (sh != NULL) {
|
||||
/* We had a cache hit and the shader is already compiled. */
|
||||
mat->status = GPU_MAT_SUCCESS;
|
||||
|
||||
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
|
||||
gpu_node_graph_free_nodes(&mat->graph);
|
||||
}
|
||||
}
|
||||
|
||||
/* Generate optimized pass. */
|
||||
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
|
||||
#if ASYNC_OPTIMIZED_PASS_CREATION == 1
|
||||
mat->optimized_pass = NULL;
|
||||
mat->optimize_pass_info.callback = callback;
|
||||
mat->optimize_pass_info.thunk = thunk;
|
||||
#else
|
||||
mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, true);
|
||||
if (mat->optimized_pass == NULL) {
|
||||
/* Failed to create optimized pass. */
|
||||
gpu_node_graph_free_nodes(&mat->graph);
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
|
||||
}
|
||||
else {
|
||||
GPUShader *optimized_sh = GPU_pass_shader_get(mat->optimized_pass);
|
||||
if (optimized_sh != NULL) {
|
||||
/* Optimized shader already available. */
|
||||
gpu_node_graph_free_nodes(&mat->graph);
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
gpu_node_graph_free_nodes(&mat->graph);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -908,11 +811,7 @@ void GPU_material_compile(GPUMaterial *mat)
|
||||
GPUShader *sh = GPU_pass_shader_get(mat->pass);
|
||||
if (sh != NULL) {
|
||||
mat->status = GPU_MAT_SUCCESS;
|
||||
|
||||
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
|
||||
/* Only free node graph nodes if not required by secondary optimization pass. */
|
||||
gpu_node_graph_free_nodes(&mat->graph);
|
||||
}
|
||||
gpu_node_graph_free_nodes(&mat->graph);
|
||||
}
|
||||
else {
|
||||
mat->status = GPU_MAT_FAILED;
|
||||
@@ -926,71 +825,6 @@ void GPU_material_compile(GPUMaterial *mat)
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_material_optimize(GPUMaterial *mat)
|
||||
{
|
||||
/* If shader is flagged for skipping optimization or has already been successfully
|
||||
* optimized, skip. */
|
||||
if (ELEM(mat->optimization_status, GPU_MAT_OPTIMIZATION_SKIP, GPU_MAT_OPTIMIZATION_SUCCESS)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* If original shader has not been fully compiled, we are not
|
||||
* ready to perform optimization. */
|
||||
if (mat->status != GPU_MAT_SUCCESS) {
|
||||
/* Reset optimization status. */
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
|
||||
return;
|
||||
}
|
||||
|
||||
#if ASYNC_OPTIMIZED_PASS_CREATION == 1
|
||||
/* If the optimized pass is not valid, first generate optimized pass.
|
||||
* NOTE(Threading): Need to verify if GPU_generate_pass can cause side-effects, especially when
|
||||
* used with "thunk". So far, this appears to work, and deferring optimized pass creation is more
|
||||
* optimal, as these do not benefit from caching, due to baked constants. However, this could
|
||||
* possibly be cause for concern for certain cases. */
|
||||
if (!mat->optimized_pass) {
|
||||
mat->optimized_pass = GPU_generate_pass(
|
||||
mat, &mat->graph, mat->optimize_pass_info.callback, mat->optimize_pass_info.thunk, true);
|
||||
BLI_assert(mat->optimized_pass);
|
||||
}
|
||||
#else
|
||||
if (!mat->optimized_pass) {
|
||||
/* Optimized pass has not been created, skip future optimization attempts. */
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool success;
|
||||
/* NOTE: The shader may have already been compiled here since we are
|
||||
* sharing GPUShader across GPUMaterials. In this case it's a no-op. */
|
||||
#ifndef NDEBUG
|
||||
success = GPU_pass_compile(mat->optimized_pass, mat->name);
|
||||
#else
|
||||
success = GPU_pass_compile(mat->optimized_pass, __func__);
|
||||
#endif
|
||||
|
||||
if (success) {
|
||||
GPUShader *sh = GPU_pass_shader_get(mat->optimized_pass);
|
||||
if (sh != NULL) {
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
|
||||
}
|
||||
else {
|
||||
/* Optimized pass failed to compile. Disable any future optimization attempts. */
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Optimization pass generation failed. Disable future attempts to optimize. */
|
||||
GPU_pass_release(mat->optimized_pass);
|
||||
mat->optimized_pass = NULL;
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
|
||||
}
|
||||
|
||||
/* Release node graph as no longer needed. */
|
||||
gpu_node_graph_free_nodes(&mat->graph);
|
||||
}
|
||||
|
||||
void GPU_materials_free(Main *bmain)
|
||||
{
|
||||
LISTBASE_FOREACH (Material *, ma, &bmain->materials) {
|
||||
@@ -1014,8 +848,6 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
|
||||
material->graph.used_libraries = BLI_gset_new(
|
||||
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
|
||||
material->refcount = 1;
|
||||
material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
|
||||
material->optimized_pass = NULL;
|
||||
|
||||
/* Construct the material graph by adding and linking the necessary GPU material nodes. */
|
||||
construct_function_cb(thunk, material);
|
||||
@@ -1024,9 +856,7 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
|
||||
gpu_material_ramp_texture_build(material);
|
||||
|
||||
/* Lookup an existing pass in the cache or generate a new one. */
|
||||
material->pass = GPU_generate_pass(
|
||||
material, &material->graph, generate_code_function_cb, thunk, false);
|
||||
material->optimized_pass = NULL;
|
||||
material->pass = GPU_generate_pass(material, &material->graph, generate_code_function_cb, thunk);
|
||||
|
||||
/* The pass already exists in the pass cache but its shader already failed to compile. */
|
||||
if (material->pass == NULL) {
|
||||
@@ -1035,42 +865,11 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
|
||||
return material;
|
||||
}
|
||||
|
||||
/* Generate optimized pass. */
|
||||
if (GPU_pass_should_optimize(material->pass)) {
|
||||
|
||||
#if ASYNC_OPTIMIZED_PASS_CREATION == 1
|
||||
mmaterial->optimized_pass = NULL;
|
||||
material->optimize_pass_info.callback = generate_code_function_cb;
|
||||
material->optimize_pass_info.thunk = thunk;
|
||||
GPU_material_optimization_status_set(GPU_MAT_OPTIMIZATION_READY);
|
||||
#else
|
||||
material->optimized_pass = GPU_generate_pass(
|
||||
material, &material->graph, generate_code_function_cb, thunk, true);
|
||||
|
||||
if (material->optimized_pass == NULL) {
|
||||
/* Failed to create optimized pass. */
|
||||
gpu_node_graph_free_nodes(&material->graph);
|
||||
GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SKIP);
|
||||
}
|
||||
else {
|
||||
GPUShader *optimized_sh = GPU_pass_shader_get(material->optimized_pass);
|
||||
if (optimized_sh != NULL) {
|
||||
/* Optimized shader already available. */
|
||||
gpu_node_graph_free_nodes(&material->graph);
|
||||
GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SUCCESS);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* The pass already exists in the pass cache and its shader is already compiled. */
|
||||
GPUShader *shader = GPU_pass_shader_get(material->pass);
|
||||
if (shader != NULL) {
|
||||
material->status = GPU_MAT_SUCCESS;
|
||||
if (material->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
|
||||
/* Only free node graph if not required by secondary optimization pass. */
|
||||
gpu_node_graph_free_nodes(&material->graph);
|
||||
}
|
||||
gpu_node_graph_free_nodes(&material->graph);
|
||||
return material;
|
||||
}
|
||||
|
||||
|
||||
@@ -914,22 +914,3 @@ void gpu_node_graph_prune_unused(GPUNodeGraph *graph)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gpu_node_graph_optimize(GPUNodeGraph *graph)
|
||||
{
|
||||
/* Replace all uniform node links with constant. */
|
||||
LISTBASE_FOREACH (GPUNode *, node, &graph->nodes) {
|
||||
LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
|
||||
if (input->link) {
|
||||
if (input->link->link_type == GPU_NODE_LINK_UNIFORM) {
|
||||
input->link->link_type = GPU_NODE_LINK_CONSTANT;
|
||||
}
|
||||
}
|
||||
if (input->source == GPU_SOURCE_UNIFORM) {
|
||||
input->source = (input->type == GPU_CLOSURE) ? GPU_SOURCE_STRUCT : GPU_SOURCE_CONSTANT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: Consider performing other node graph optimizations here. */
|
||||
}
|
||||
|
||||
@@ -180,20 +180,6 @@ typedef struct GPUNodeGraph {
|
||||
void gpu_node_graph_prune_unused(GPUNodeGraph *graph);
|
||||
void gpu_node_graph_finalize_uniform_attrs(GPUNodeGraph *graph);
|
||||
|
||||
/**
|
||||
* Optimize node graph for optimized material shader path.
|
||||
* Once the base material has been generated, we can modify the shader
|
||||
* node graph to create one which will produce an optimally performing shader.
|
||||
* This currently involves baking uniform data into constant data to enable
|
||||
* aggressive constant folding by the compiler in order to reduce complexity and
|
||||
* shader core memory pressure.
|
||||
*
|
||||
* NOTE: Graph optimizations will produce a shader which needs to be re-compiled
|
||||
* more frequently, however, the default material pass will always exist to fall
|
||||
* back on.
|
||||
*/
|
||||
void gpu_node_graph_optimize(GPUNodeGraph *graph);
|
||||
|
||||
/**
|
||||
* Free intermediate node graph.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user