Curves: Add delete operator to edit mode #104765
|
@ -6,11 +6,11 @@
|
|||
# - That dependency uses all available cores.
|
||||
#
|
||||
# Without this, simply calling `make -j$(nproc)` from the `${CMAKE_BUILD_DIR}/deps/`
|
||||
# directory will build all projects at once.
|
||||
# directory will build many projects at once.
|
||||
#
|
||||
# This is undesirable for the following reasons:
|
||||
#
|
||||
# - The output from many projects is mixed together,
|
||||
# - The output from projects is mixed together,
|
||||
# making it difficult to track down the cause of a build failure.
|
||||
#
|
||||
# - Larger dependencies such as LLVM can bottleneck the build process,
|
||||
|
@ -20,7 +20,7 @@
|
|||
# It's possible canceling happens as a patch is being applied or files are being copied.
|
||||
# (steps that aren't part of the compilation process where it's typically safe to cancel).
|
||||
|
||||
if [[ -z "${MY_MAKE_CALL_LEVEL}" ]]; then
|
||||
if [[ -z "$MY_MAKE_CALL_LEVEL" ]]; then
|
||||
export MY_MAKE_CALL_LEVEL=0
|
||||
export MY_MAKEFLAGS=$MAKEFLAGS
|
||||
|
||||
|
@ -31,34 +31,35 @@ if [[ -z "${MY_MAKE_CALL_LEVEL}" ]]; then
|
|||
-j*)
|
||||
export MY_JOBS_ARG=$i
|
||||
if [ "$MY_JOBS_ARG" = "-j" ]; then
|
||||
add_next=1
|
||||
add_next=1
|
||||
fi
|
||||
;;
|
||||
--jobs=*)
|
||||
shift # past argument=value
|
||||
export MY_JOBS_ARG=$i
|
||||
MY_JOBS_ARG=$i
|
||||
;;
|
||||
*)
|
||||
if (( $add_next == 1 )); then
|
||||
export MY_JOBS_ARG="$MY_JOBS_ARG $i"
|
||||
add_next=0
|
||||
if (( add_next == 1 )); then
|
||||
MY_JOBS_ARG="$MY_JOBS_ARG $i"
|
||||
add_next=0
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
done
|
||||
unset i add_next
|
||||
|
||||
if [[ -z "${MY_JOBS_ARG}" ]]; then
|
||||
export MY_JOBS_ARG="-j$(nproc)"
|
||||
if [[ -z "$MY_JOBS_ARG" ]]; then
|
||||
MY_JOBS_ARG="-j$(nproc)"
|
||||
fi
|
||||
export MY_JOBS_ARG
|
||||
# Support user defined `MAKEFLAGS`.
|
||||
export MAKEFLAGS="$MY_MAKEFLAGS -j1"
|
||||
else
|
||||
export MY_MAKE_CALL_LEVEL=$(( $MY_MAKE_CALL_LEVEL + 1 ))
|
||||
if (( $MY_MAKE_CALL_LEVEL == 1 )); then
|
||||
export MY_MAKE_CALL_LEVEL=$(( MY_MAKE_CALL_LEVEL + 1 ))
|
||||
if (( MY_MAKE_CALL_LEVEL == 1 )); then
|
||||
# Important to set jobs to 1, otherwise user defined jobs argument is used.
|
||||
export MAKEFLAGS="$MY_MAKEFLAGS -j1"
|
||||
elif (( $MY_MAKE_CALL_LEVEL == 2 )); then
|
||||
elif (( MY_MAKE_CALL_LEVEL == 2 )); then
|
||||
# This is the level used by each sub-project.
|
||||
export MAKEFLAGS="$MY_MAKEFLAGS $MY_JOBS_ARG"
|
||||
fi
|
||||
|
|
|
@ -3869,6 +3869,7 @@ class VIEW3D_MT_edit_mesh(Menu):
|
|||
layout.menu("VIEW3D_MT_edit_mesh_normals")
|
||||
layout.menu("VIEW3D_MT_edit_mesh_shading")
|
||||
layout.menu("VIEW3D_MT_edit_mesh_weights")
|
||||
layout.operator("mesh.attribute_set")
|
||||
layout.operator_menu_enum("mesh.sort_elements", "type", text="Sort Elements...")
|
||||
|
||||
layout.separator()
|
||||
|
@ -6721,15 +6722,15 @@ class VIEW3D_PT_overlay_sculpt(Panel):
|
|||
overlay = view.overlay
|
||||
|
||||
row = layout.row(align=True)
|
||||
row.prop(overlay, "sculpt_show_mask", text="")
|
||||
row.prop(overlay, "show_sculpt_mask", text="")
|
||||
sub = row.row()
|
||||
sub.active = overlay.sculpt_show_mask
|
||||
sub.active = overlay.show_sculpt_mask
|
||||
sub.prop(overlay, "sculpt_mode_mask_opacity", text="Mask")
|
||||
|
||||
row = layout.row(align=True)
|
||||
row.prop(overlay, "sculpt_show_face_sets", text="")
|
||||
row.prop(overlay, "show_sculpt_face_sets", text="")
|
||||
sub = row.row()
|
||||
sub.active = overlay.sculpt_show_face_sets
|
||||
sub.active = overlay.show_sculpt_face_sets
|
||||
row.prop(overlay, "sculpt_mode_face_sets_opacity", text="Face Sets")
|
||||
|
||||
|
||||
|
@ -6758,9 +6759,9 @@ class VIEW3D_PT_overlay_sculpt_curves(Panel):
|
|||
|
||||
row = layout.row(align=True)
|
||||
row.active = overlay.show_overlays
|
||||
row.prop(overlay, "sculpt_curves_cage", text="")
|
||||
row.prop(overlay, "show_sculpt_curves_cage", text="")
|
||||
subrow = row.row(align=True)
|
||||
subrow.active = overlay.sculpt_curves_cage
|
||||
subrow.active = overlay.show_sculpt_curves_cage
|
||||
subrow.prop(overlay, "sculpt_curves_cage_opacity", text="Cage Opacity")
|
||||
|
||||
|
||||
|
|
|
@ -1601,6 +1601,12 @@ static bool version_merge_still_offsets(Sequence *seq, void * /*user_data*/)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool version_fix_delete_flag(Sequence *seq, void * /*user_data*/)
|
||||
{
|
||||
seq->flag &= ~SEQ_FLAG_DELETE;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Those `version_liboverride_rnacollections_*` functions mimic the old, pre-3.0 code to find
|
||||
* anchor and source items in the given list of modifiers, constraints etc., using only the
|
||||
* `subitem_local` data of the override property operation.
|
||||
|
@ -3938,6 +3944,14 @@ void blo_do_versions_300(FileData *fd, Library * /*lib*/, Main *bmain)
|
|||
}
|
||||
}
|
||||
|
||||
/* Fix possible uncleared `SEQ_FLAG_DELETE` flag */
|
||||
LISTBASE_FOREACH (Scene *, scene, &bmain->scenes) {
|
||||
Editing *ed = SEQ_editing_get(scene);
|
||||
if (ed != nullptr) {
|
||||
SEQ_for_each_callback(&ed->seqbase, version_fix_delete_flag, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
/* Keep this block, even when empty. */
|
||||
}
|
||||
}
|
||||
|
|
|
@ -161,7 +161,7 @@ void Evaluator::compile_and_evaluate_shader_compile_unit(CompileState &compile_s
|
|||
void Evaluator::map_shader_operation_inputs_to_their_results(ShaderOperation *operation,
|
||||
CompileState &compile_state)
|
||||
{
|
||||
for (const auto &item : operation->get_inputs_to_linked_outputs_map().items()) {
|
||||
for (const auto item : operation->get_inputs_to_linked_outputs_map().items()) {
|
||||
Result &result = compile_state.get_result_from_output_socket(item.value);
|
||||
operation->map_input_to_result(item.key, &result);
|
||||
}
|
||||
|
|
|
@ -81,7 +81,7 @@ Map<std::string, DOutputSocket> &ShaderOperation::get_inputs_to_linked_outputs_m
|
|||
|
||||
void ShaderOperation::compute_results_reference_counts(const Schedule &schedule)
|
||||
{
|
||||
for (const auto &item : output_sockets_to_output_identifiers_map_.items()) {
|
||||
for (const auto item : output_sockets_to_output_identifiers_map_.items()) {
|
||||
const int reference_count = number_of_inputs_linked_to_output_conditioned(
|
||||
item.key, [&](DInputSocket input) { return schedule.contains(input.node()); });
|
||||
|
||||
|
|
|
@ -197,6 +197,7 @@ void DRW_gpu_render_context_enable(void *re_gpu_context);
|
|||
void DRW_gpu_render_context_disable(void *re_gpu_context);
|
||||
|
||||
void DRW_deferred_shader_remove(struct GPUMaterial *mat);
|
||||
void DRW_deferred_shader_optimize_remove(struct GPUMaterial *mat);
|
||||
|
||||
/**
|
||||
* Get DrawData from the given ID-block. In order for this to work, we assume that
|
||||
|
|
|
@ -53,6 +53,7 @@ static void eevee_engine_init(void *ved)
|
|||
stl->g_data->valid_double_buffer = (txl->color_double_buffer != NULL);
|
||||
stl->g_data->valid_taa_history = (txl->taa_history != NULL);
|
||||
stl->g_data->queued_shaders_count = 0;
|
||||
stl->g_data->queued_optimise_shaders_count = 0;
|
||||
stl->g_data->render_timesteps = 1;
|
||||
stl->g_data->disable_ligthprobes = v3d &&
|
||||
(v3d->object_type_exclude_viewport & (1 << OB_LIGHTPROBE));
|
||||
|
@ -178,6 +179,11 @@ static void eevee_cache_finish(void *vedata)
|
|||
if (g_data->queued_shaders_count > 0) {
|
||||
SNPRINTF(ved->info, TIP_("Compiling Shaders (%d remaining)"), g_data->queued_shaders_count);
|
||||
}
|
||||
else if (g_data->queued_optimise_shaders_count > 0) {
|
||||
SNPRINTF(ved->info,
|
||||
TIP_("Optimizing Shaders (%d remaining)"),
|
||||
g_data->queued_optimise_shaders_count);
|
||||
}
|
||||
}
|
||||
|
||||
/* As renders in an HDR off-screen buffer, we need draw everything once
|
||||
|
|
|
@ -1000,6 +1000,8 @@ typedef struct EEVEE_PrivateData {
|
|||
/* Compiling shaders count. This is to track if a shader has finished compiling. */
|
||||
int queued_shaders_count;
|
||||
int queued_shaders_count_prev;
|
||||
/* Optimizing shaders count. */
|
||||
int queued_optimise_shaders_count;
|
||||
|
||||
/* LookDev Settings */
|
||||
int studiolight_index;
|
||||
|
|
|
@ -1390,12 +1390,21 @@ struct GPUMaterial *EEVEE_material_get(
|
|||
return nullptr;
|
||||
}
|
||||
switch (status) {
|
||||
case GPU_MAT_SUCCESS:
|
||||
break;
|
||||
case GPU_MAT_QUEUED:
|
||||
case GPU_MAT_SUCCESS: {
|
||||
/* Determine optimization status for remaining compilations counter. */
|
||||
int optimization_status = GPU_material_optimization_status(mat);
|
||||
if (optimization_status == GPU_MAT_OPTIMIZATION_QUEUED) {
|
||||
vedata->stl->g_data->queued_optimise_shaders_count++;
|
||||
}
|
||||
} break;
|
||||
case GPU_MAT_QUEUED: {
|
||||
vedata->stl->g_data->queued_shaders_count++;
|
||||
mat = EEVEE_material_default_get(scene, ma, options);
|
||||
break;
|
||||
GPUMaterial *default_mat = EEVEE_material_default_get(scene, ma, options);
|
||||
/* Mark pending material with its default material for future cache warming.*/
|
||||
GPU_material_set_default(mat, default_mat);
|
||||
/* Return default material. */
|
||||
mat = default_mat;
|
||||
} break;
|
||||
case GPU_MAT_FAILED:
|
||||
default:
|
||||
ma = EEVEE_material_default_error_get();
|
||||
|
|
|
@ -507,6 +507,8 @@ GPUMaterial *ShaderModule::material_shader_get(const char *name,
|
|||
this);
|
||||
GPU_material_status_set(gpumat, GPU_MAT_QUEUED);
|
||||
GPU_material_compile(gpumat);
|
||||
/* Queue deferred material optimization. */
|
||||
DRW_shader_queue_optimize_material(gpumat);
|
||||
return gpumat;
|
||||
}
|
||||
|
||||
|
|
|
@ -630,7 +630,7 @@ GPU_SHADER_CREATE_INFO(overlay_uniform_color_pointcloud)
|
|||
|
||||
GPU_SHADER_CREATE_INFO(overlay_uniform_color_clipped)
|
||||
.do_static_compilation(true)
|
||||
.additional_info("overlay_depth_only", "drw_clipped");
|
||||
.additional_info("overlay_uniform_color", "drw_clipped");
|
||||
|
||||
GPU_SHADER_CREATE_INFO(overlay_uniform_color_pointcloud_clipped)
|
||||
.do_static_compilation(true)
|
||||
|
|
|
@ -307,6 +307,7 @@ struct GPUMaterial *DRW_shader_from_material(struct Material *ma,
|
|||
bool deferred,
|
||||
GPUCodegenCallbackFn callback,
|
||||
void *thunk);
|
||||
void DRW_shader_queue_optimize_material(struct GPUMaterial *mat);
|
||||
void DRW_shader_free(struct GPUShader *shader);
|
||||
#define DRW_SHADER_FREE_SAFE(shader) \
|
||||
do { \
|
||||
|
|
|
@ -55,6 +55,9 @@ typedef struct DRWShaderCompiler {
|
|||
ListBase queue; /* GPUMaterial */
|
||||
SpinLock list_lock;
|
||||
|
||||
/** Optimization queue. */
|
||||
ListBase optimize_queue; /* GPUMaterial */
|
||||
|
||||
void *gl_context;
|
||||
GPUContext *gpu_context;
|
||||
bool own_context;
|
||||
|
@ -110,8 +113,29 @@ static void drw_deferred_shader_compilation_exec(
|
|||
MEM_freeN(link);
|
||||
}
|
||||
else {
|
||||
/* No more materials to optimize, or shaders to compile. */
|
||||
break;
|
||||
/* Check for Material Optimization job once there are no more
|
||||
* shaders to compile. */
|
||||
BLI_spin_lock(&comp->list_lock);
|
||||
/* Pop tail because it will be less likely to lock the main thread
|
||||
* if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */
|
||||
link = (LinkData *)BLI_poptail(&comp->optimize_queue);
|
||||
GPUMaterial *optimize_mat = link ? (GPUMaterial *)link->data : NULL;
|
||||
if (optimize_mat) {
|
||||
/* Avoid another thread freeing the material during optimization. */
|
||||
GPU_material_acquire(optimize_mat);
|
||||
}
|
||||
BLI_spin_unlock(&comp->list_lock);
|
||||
|
||||
if (optimize_mat) {
|
||||
/* Compile optimized material shader. */
|
||||
GPU_material_optimize(optimize_mat);
|
||||
GPU_material_release(optimize_mat);
|
||||
MEM_freeN(link);
|
||||
}
|
||||
else {
|
||||
/* No more materials to optimize, or shaders to compile. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
|
||||
|
@ -133,6 +157,7 @@ static void drw_deferred_shader_compilation_free(void *custom_data)
|
|||
|
||||
BLI_spin_lock(&comp->list_lock);
|
||||
BLI_freelistN(&comp->queue);
|
||||
BLI_freelistN(&comp->optimize_queue);
|
||||
BLI_spin_unlock(&comp->list_lock);
|
||||
|
||||
if (comp->own_context) {
|
||||
|
@ -148,34 +173,13 @@ static void drw_deferred_shader_compilation_free(void *custom_data)
|
|||
MEM_freeN(comp);
|
||||
}
|
||||
|
||||
static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
|
||||
/**
|
||||
* Append either shader compilation or optimization job to deferred queue and
|
||||
* ensure shader compilation worker is active.
|
||||
* We keep two separate queue's to ensure core compilations always complete before optimization.
|
||||
*/
|
||||
static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job)
|
||||
{
|
||||
if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) {
|
||||
return;
|
||||
}
|
||||
/* Do not defer the compilation if we are rendering for image.
|
||||
* deferred rendering is only possible when `evil_C` is available */
|
||||
if (DST.draw_ctx.evil_C == NULL || DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) {
|
||||
deferred = false;
|
||||
}
|
||||
|
||||
if (!deferred) {
|
||||
DRW_deferred_shader_remove(mat);
|
||||
/* Shaders could already be compiling. Have to wait for compilation to finish. */
|
||||
while (GPU_material_status(mat) == GPU_MAT_QUEUED) {
|
||||
PIL_sleep_ms(20);
|
||||
}
|
||||
if (GPU_material_status(mat) == GPU_MAT_CREATED) {
|
||||
GPU_material_compile(mat);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Don't add material to the queue twice. */
|
||||
if (GPU_material_status(mat) == GPU_MAT_QUEUED) {
|
||||
return;
|
||||
}
|
||||
|
||||
const bool use_main_context = GPU_use_main_context_workaround();
|
||||
const bool job_own_context = !use_main_context;
|
||||
|
||||
|
@ -196,6 +200,7 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
|
|||
if (old_comp) {
|
||||
BLI_spin_lock(&old_comp->list_lock);
|
||||
BLI_movelisttolist(&comp->queue, &old_comp->queue);
|
||||
BLI_movelisttolist(&comp->optimize_queue, &old_comp->optimize_queue);
|
||||
BLI_spin_unlock(&old_comp->list_lock);
|
||||
/* Do not recreate context, just pass ownership. */
|
||||
if (old_comp->gl_context) {
|
||||
|
@ -206,9 +211,18 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
|
|||
}
|
||||
}
|
||||
|
||||
GPU_material_status_set(mat, GPU_MAT_QUEUED);
|
||||
LinkData *node = BLI_genericNodeN(mat);
|
||||
BLI_addtail(&comp->queue, node);
|
||||
/* Add to either compilation or optimization queue. */
|
||||
if (is_optimization_job) {
|
||||
BLI_assert(GPU_material_optimization_status(mat) != GPU_MAT_OPTIMIZATION_QUEUED);
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_QUEUED);
|
||||
LinkData *node = BLI_genericNodeN(mat);
|
||||
BLI_addtail(&comp->optimize_queue, node);
|
||||
}
|
||||
else {
|
||||
GPU_material_status_set(mat, GPU_MAT_QUEUED);
|
||||
LinkData *node = BLI_genericNodeN(mat);
|
||||
BLI_addtail(&comp->queue, node);
|
||||
}
|
||||
|
||||
/* Create only one context. */
|
||||
if (comp->gl_context == NULL) {
|
||||
|
@ -237,6 +251,39 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
|
|||
WM_jobs_start(wm, wm_job);
|
||||
}
|
||||
|
||||
static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
|
||||
{
|
||||
if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Do not defer the compilation if we are rendering for image.
|
||||
* deferred rendering is only possible when `evil_C` is available */
|
||||
if (DST.draw_ctx.evil_C == NULL || DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) {
|
||||
deferred = false;
|
||||
}
|
||||
|
||||
if (!deferred) {
|
||||
DRW_deferred_shader_remove(mat);
|
||||
/* Shaders could already be compiling. Have to wait for compilation to finish. */
|
||||
while (GPU_material_status(mat) == GPU_MAT_QUEUED) {
|
||||
PIL_sleep_ms(20);
|
||||
}
|
||||
if (GPU_material_status(mat) == GPU_MAT_CREATED) {
|
||||
GPU_material_compile(mat);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Don't add material to the queue twice. */
|
||||
if (GPU_material_status(mat) == GPU_MAT_QUEUED) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Add deferred shader compilation to queue. */
|
||||
drw_deferred_queue_append(mat, false);
|
||||
}
|
||||
|
||||
static void drw_register_shader_vlattrs(GPUMaterial *mat)
|
||||
{
|
||||
const ListBase *attrs = GPU_material_layer_attributes(mat);
|
||||
|
@ -288,9 +335,42 @@ void DRW_deferred_shader_remove(GPUMaterial *mat)
|
|||
BLI_remlink(&comp->queue, link);
|
||||
GPU_material_status_set(link->data, GPU_MAT_CREATED);
|
||||
}
|
||||
BLI_spin_unlock(&comp->list_lock);
|
||||
|
||||
MEM_SAFE_FREE(link);
|
||||
|
||||
/* Search for optimization job in queue. */
|
||||
LinkData *opti_link = (LinkData *)BLI_findptr(
|
||||
&comp->optimize_queue, mat, offsetof(LinkData, data));
|
||||
if (opti_link) {
|
||||
BLI_remlink(&comp->optimize_queue, opti_link);
|
||||
GPU_material_optimization_status_set(opti_link->data, GPU_MAT_OPTIMIZATION_READY);
|
||||
}
|
||||
BLI_spin_unlock(&comp->list_lock);
|
||||
|
||||
MEM_SAFE_FREE(opti_link);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DRW_deferred_shader_optimize_remove(GPUMaterial *mat)
|
||||
{
|
||||
LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) {
|
||||
LISTBASE_FOREACH (wmWindow *, win, &wm->windows) {
|
||||
DRWShaderCompiler *comp = (DRWShaderCompiler *)WM_jobs_customdata_from_type(
|
||||
wm, wm, WM_JOB_TYPE_SHADER_COMPILATION);
|
||||
if (comp != NULL) {
|
||||
BLI_spin_lock(&comp->list_lock);
|
||||
/* Search for optimization job in queue. */
|
||||
LinkData *opti_link = (LinkData *)BLI_findptr(
|
||||
&comp->optimize_queue, mat, offsetof(LinkData, data));
|
||||
if (opti_link) {
|
||||
BLI_remlink(&comp->optimize_queue, opti_link);
|
||||
GPU_material_optimization_status_set(opti_link->data, GPU_MAT_OPTIMIZATION_READY);
|
||||
}
|
||||
BLI_spin_unlock(&comp->list_lock);
|
||||
|
||||
MEM_SAFE_FREE(opti_link);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -432,6 +512,7 @@ GPUMaterial *DRW_shader_from_world(World *wo,
|
|||
}
|
||||
|
||||
drw_deferred_shader_add(mat, deferred);
|
||||
DRW_shader_queue_optimize_material(mat);
|
||||
return mat;
|
||||
}
|
||||
|
||||
|
@ -463,9 +544,52 @@ GPUMaterial *DRW_shader_from_material(Material *ma,
|
|||
}
|
||||
|
||||
drw_deferred_shader_add(mat, deferred);
|
||||
DRW_shader_queue_optimize_material(mat);
|
||||
return mat;
|
||||
}
|
||||
|
||||
void DRW_shader_queue_optimize_material(GPUMaterial *mat)
|
||||
{
|
||||
/* Do not perform deferred optimization if performing render.
|
||||
* De-queue any queued optimization jobs. */
|
||||
if (DRW_state_is_image_render()) {
|
||||
if (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) {
|
||||
/* Remove from pending optimization job queue. */
|
||||
DRW_deferred_shader_optimize_remove(mat);
|
||||
/* If optimization job had already started, wait for it to complete. */
|
||||
while (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) {
|
||||
PIL_sleep_ms(20);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* We do not need to perform optimization on the material if it is already compiled or in the
|
||||
* optimization queue. If optimization is not required, the status will be flagged as
|
||||
* `GPU_MAT_OPTIMIZATION_SKIP`.
|
||||
* We can also skip cases which have already been queued up. */
|
||||
if (ELEM(GPU_material_optimization_status(mat),
|
||||
GPU_MAT_OPTIMIZATION_SKIP,
|
||||
GPU_MAT_OPTIMIZATION_SUCCESS,
|
||||
GPU_MAT_OPTIMIZATION_QUEUED)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Only queue optimization once the original shader has been successfully compiled. */
|
||||
if (GPU_material_status(mat) != GPU_MAT_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Defer optimization until sufficient time has passed beyond creation. This avoids excessive
|
||||
* recompilation for shaders which are being actively modified. */
|
||||
if (!GPU_material_optimization_ready(mat)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Add deferred shader compilation to queue. */
|
||||
drw_deferred_queue_append(mat, true);
|
||||
}
|
||||
|
||||
void DRW_shader_free(GPUShader *shader)
|
||||
{
|
||||
GPU_shader_free(shader);
|
||||
|
|
|
@ -12,6 +12,7 @@ set(INC
|
|||
../../draw
|
||||
../../geometry
|
||||
../../gpu
|
||||
../../functions
|
||||
../../imbuf
|
||||
../../makesdna
|
||||
../../makesrna
|
||||
|
@ -27,6 +28,7 @@ set(SRC
|
|||
editface.cc
|
||||
editmesh_add.c
|
||||
editmesh_add_gizmo.c
|
||||
editmesh_attribute.cc
|
||||
editmesh_automerge.c
|
||||
editmesh_bevel.c
|
||||
editmesh_bisect.c
|
||||
|
|
|
@ -0,0 +1,393 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
/** \file
|
||||
* \ingroup edmesh
|
||||
*/
|
||||
|
||||
#include "BLI_color.hh"
|
||||
#include "BLI_generic_pointer.hh"
|
||||
|
||||
#include "BKE_attribute.h"
|
||||
#include "BKE_context.h"
|
||||
#include "BKE_editmesh.h"
|
||||
#include "BKE_layer.h"
|
||||
#include "BKE_mesh.h"
|
||||
#include "BKE_report.h"
|
||||
#include "BKE_type_conversions.hh"
|
||||
|
||||
#include "WM_api.h"
|
||||
#include "WM_types.h"
|
||||
|
||||
#include "RNA_access.h"
|
||||
#include "RNA_define.h"
|
||||
#include "RNA_enum_types.h"
|
||||
|
||||
#include "ED_mesh.h"
|
||||
#include "ED_object.h"
|
||||
#include "ED_screen.h"
|
||||
#include "ED_transform.h"
|
||||
#include "ED_view3d.h"
|
||||
|
||||
#include "BLT_translation.h"
|
||||
|
||||
#include "DNA_object_types.h"
|
||||
|
||||
#include "UI_interface.h"
|
||||
#include "UI_resources.h"
|
||||
|
||||
#include "bmesh_tools.h"
|
||||
|
||||
#include "DEG_depsgraph.h"
|
||||
#include "DEG_depsgraph_query.h"
|
||||
|
||||
#include "mesh_intern.h"
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Delete Operator
|
||||
* \{ */
|
||||
|
||||
namespace blender::ed::mesh {
|
||||
|
||||
static char domain_to_htype(const eAttrDomain domain)
|
||||
{
|
||||
switch (domain) {
|
||||
case ATTR_DOMAIN_POINT:
|
||||
return BM_VERT;
|
||||
case ATTR_DOMAIN_EDGE:
|
||||
return BM_EDGE;
|
||||
case ATTR_DOMAIN_FACE:
|
||||
return BM_FACE;
|
||||
case ATTR_DOMAIN_CORNER:
|
||||
return BM_LOOP;
|
||||
default:
|
||||
BLI_assert_unreachable();
|
||||
return BM_VERT;
|
||||
}
|
||||
}
|
||||
|
||||
static bool mesh_active_attribute_poll(bContext *C)
|
||||
{
|
||||
if (!ED_operator_editmesh(C)) {
|
||||
return false;
|
||||
}
|
||||
const Mesh *mesh = ED_mesh_context(C);
|
||||
const CustomDataLayer *layer = BKE_id_attributes_active_get(&const_cast<ID &>(mesh->id));
|
||||
if (!layer) {
|
||||
CTX_wm_operator_poll_msg_set(C, "No active attribute");
|
||||
return false;
|
||||
}
|
||||
if (layer->type == CD_PROP_STRING) {
|
||||
CTX_wm_operator_poll_msg_set(C, "Active string attribute not supported");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace set_attribute {
|
||||
|
||||
static StringRefNull rna_property_name_for_type(const eCustomDataType type)
|
||||
{
|
||||
switch (type) {
|
||||
case CD_PROP_FLOAT:
|
||||
return "value_float";
|
||||
case CD_PROP_FLOAT2:
|
||||
return "value_float_vector_2d";
|
||||
case CD_PROP_FLOAT3:
|
||||
return "value_float_vector_3d";
|
||||
case CD_PROP_COLOR:
|
||||
case CD_PROP_BYTE_COLOR:
|
||||
return "value_color";
|
||||
case CD_PROP_BOOL:
|
||||
return "value_bool";
|
||||
case CD_PROP_INT8:
|
||||
case CD_PROP_INT32:
|
||||
return "value_int";
|
||||
default:
|
||||
BLI_assert_unreachable();
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
static void bmesh_vert_edge_face_layer_selected_values_set(BMesh &bm,
|
||||
const BMIterType iter_type,
|
||||
const GPointer value,
|
||||
const int offset)
|
||||
{
|
||||
const CPPType &type = *value.type();
|
||||
BMIter iter;
|
||||
BMElem *elem;
|
||||
BM_ITER_MESH (elem, &iter, &bm, iter_type) {
|
||||
if (BM_elem_flag_test(elem, BM_ELEM_SELECT)) {
|
||||
type.copy_assign(value.get(), POINTER_OFFSET(elem->head.data, offset));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For face select mode, set face corner values of any selected face. For edge and vertex
|
||||
* select mode, set face corner values of loops connected to selected vertices.
|
||||
*/
|
||||
static void bmesh_loop_layer_selected_values_set(BMEditMesh &em,
|
||||
const GPointer value,
|
||||
const int offset)
|
||||
{
|
||||
/* In the separate select modes we may set the same loop values more than once.
|
||||
* This is okay because we're always setting the same value. */
|
||||
BMesh &bm = *em.bm;
|
||||
const CPPType &type = *value.type();
|
||||
if (em.selectmode & SCE_SELECT_FACE) {
|
||||
BMIter face_iter;
|
||||
BMFace *face;
|
||||
BM_ITER_MESH (face, &face_iter, &bm, BM_FACES_OF_MESH) {
|
||||
if (BM_elem_flag_test(face, BM_ELEM_SELECT)) {
|
||||
BMIter loop_iter;
|
||||
BMLoop *loop;
|
||||
BM_ITER_ELEM (loop, &loop_iter, face, BM_LOOPS_OF_FACE) {
|
||||
type.copy_assign(value.get(), POINTER_OFFSET(loop->head.data, offset));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (em.selectmode & (SCE_SELECT_VERTEX | SCE_SELECT_EDGE)) {
|
||||
BMIter vert_iter;
|
||||
BMVert *vert;
|
||||
BM_ITER_MESH (vert, &vert_iter, &bm, BM_VERTS_OF_MESH) {
|
||||
if (BM_elem_flag_test(vert, BM_ELEM_SELECT)) {
|
||||
BMIter loop_iter;
|
||||
BMLoop *loop;
|
||||
BM_ITER_ELEM (loop, &loop_iter, vert, BM_LOOPS_OF_VERT) {
|
||||
type.copy_assign(value.get(), POINTER_OFFSET(loop->head.data, offset));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int mesh_set_attribute_exec(bContext *C, wmOperator *op)
|
||||
{
|
||||
const Scene *scene = CTX_data_scene(C);
|
||||
ViewLayer *view_layer = CTX_data_view_layer(C);
|
||||
|
||||
uint objects_len = 0;
|
||||
Object **objects = BKE_view_layer_array_from_objects_in_edit_mode_unique_data(
|
||||
scene, view_layer, CTX_wm_view3d(C), &objects_len);
|
||||
|
||||
Mesh *mesh = ED_mesh_context(C);
|
||||
CustomDataLayer *active_attribute = BKE_id_attributes_active_get(&mesh->id);
|
||||
const eCustomDataType active_type = eCustomDataType(active_attribute->type);
|
||||
const CPPType &type = *bke::custom_data_type_to_cpp_type(active_type);
|
||||
|
||||
BUFFER_FOR_CPP_TYPE_VALUE(type, buffer);
|
||||
BLI_SCOPED_DEFER([&]() { type.destruct(buffer); });
|
||||
|
||||
const StringRefNull prop_name = rna_property_name_for_type(active_type);
|
||||
switch (active_type) {
|
||||
case CD_PROP_FLOAT:
|
||||
*static_cast<float *>(buffer) = RNA_float_get(op->ptr, prop_name.c_str());
|
||||
break;
|
||||
case CD_PROP_FLOAT2:
|
||||
RNA_float_get_array(op->ptr, prop_name.c_str(), static_cast<float *>(buffer));
|
||||
break;
|
||||
case CD_PROP_FLOAT3:
|
||||
RNA_float_get_array(op->ptr, prop_name.c_str(), static_cast<float *>(buffer));
|
||||
break;
|
||||
case CD_PROP_COLOR:
|
||||
RNA_float_get_array(op->ptr, prop_name.c_str(), static_cast<float *>(buffer));
|
||||
break;
|
||||
case CD_PROP_BYTE_COLOR:
|
||||
ColorGeometry4f value;
|
||||
RNA_float_get_array(op->ptr, prop_name.c_str(), value);
|
||||
*static_cast<ColorGeometry4b *>(buffer) = value.encode();
|
||||
break;
|
||||
case CD_PROP_BOOL:
|
||||
*static_cast<bool *>(buffer) = RNA_boolean_get(op->ptr, prop_name.c_str());
|
||||
break;
|
||||
case CD_PROP_INT8:
|
||||
*static_cast<int8_t *>(buffer) = RNA_int_get(op->ptr, prop_name.c_str());
|
||||
break;
|
||||
case CD_PROP_INT32:
|
||||
*static_cast<int32_t *>(buffer) = RNA_int_get(op->ptr, prop_name.c_str());
|
||||
break;
|
||||
default:
|
||||
BLI_assert_unreachable();
|
||||
}
|
||||
const GPointer value(type, buffer);
|
||||
const bke::DataTypeConversions &conversions = bke::get_implicit_type_conversions();
|
||||
|
||||
bool changed = false;
|
||||
for (const int i : IndexRange(objects_len)) {
|
||||
Object *object = objects[i];
|
||||
Mesh *mesh = static_cast<Mesh *>(object->data);
|
||||
BMEditMesh *em = BKE_editmesh_from_object(object);
|
||||
BMesh *bm = em->bm;
|
||||
|
||||
CustomDataLayer *layer = BKE_id_attributes_active_get(&mesh->id);
|
||||
if (!layer) {
|
||||
continue;
|
||||
}
|
||||
/* Use implicit conversions to try to handle the case where the active attribute has a
|
||||
* different type on multiple objects. */
|
||||
const eCustomDataType dst_data_type = eCustomDataType(active_attribute->type);
|
||||
const CPPType &dst_type = *bke::custom_data_type_to_cpp_type(dst_data_type);
|
||||
if (&type != &dst_type && !conversions.is_convertible(type, dst_type)) {
|
||||
continue;
|
||||
}
|
||||
BUFFER_FOR_CPP_TYPE_VALUE(dst_type, dst_buffer);
|
||||
BLI_SCOPED_DEFER([&]() { dst_type.destruct(dst_buffer); });
|
||||
conversions.convert_to_uninitialized(type, dst_type, value.get(), dst_buffer);
|
||||
const GPointer dst_value(dst_type, dst_buffer);
|
||||
switch (BKE_id_attribute_domain(&mesh->id, layer)) {
|
||||
case ATTR_DOMAIN_POINT:
|
||||
bmesh_vert_edge_face_layer_selected_values_set(
|
||||
*bm, BM_VERTS_OF_MESH, dst_value, layer->offset);
|
||||
break;
|
||||
case ATTR_DOMAIN_EDGE:
|
||||
bmesh_vert_edge_face_layer_selected_values_set(
|
||||
*bm, BM_EDGES_OF_MESH, dst_value, layer->offset);
|
||||
break;
|
||||
case ATTR_DOMAIN_FACE:
|
||||
bmesh_vert_edge_face_layer_selected_values_set(
|
||||
*bm, BM_FACES_OF_MESH, dst_value, layer->offset);
|
||||
break;
|
||||
case ATTR_DOMAIN_CORNER:
|
||||
bmesh_loop_layer_selected_values_set(*em, dst_value, layer->offset);
|
||||
break;
|
||||
default:
|
||||
BLI_assert_unreachable();
|
||||
break;
|
||||
}
|
||||
|
||||
changed = true;
|
||||
EDBMUpdate_Params update{};
|
||||
update.calc_looptri = false;
|
||||
update.calc_normals = false;
|
||||
update.is_destructive = false;
|
||||
EDBM_update(mesh, &update);
|
||||
}
|
||||
|
||||
MEM_freeN(objects);
|
||||
|
||||
return changed ? OPERATOR_FINISHED : OPERATOR_CANCELLED;
|
||||
}
|
||||
|
||||
static int mesh_set_attribute_invoke(bContext *C, wmOperator *op, const wmEvent *event)
|
||||
{
|
||||
Mesh *mesh = ED_mesh_context(C);
|
||||
BMesh *bm = mesh->edit_mesh->bm;
|
||||
|
||||
const CustomDataLayer *layer = BKE_id_attributes_active_get(&mesh->id);
|
||||
const eCustomDataType data_type = eCustomDataType(layer->type);
|
||||
const eAttrDomain domain = BKE_id_attribute_domain(&mesh->id, layer);
|
||||
const BMElem *active_elem = BM_mesh_active_elem_get(bm);
|
||||
if (!active_elem) {
|
||||
return WM_operator_props_popup(C, op, event);
|
||||
}
|
||||
|
||||
/* Only support filling the active data when the active selection mode matches the active
|
||||
* attribute domain. NOTE: This doesn't work well for corner domain attributes. */
|
||||
if (active_elem->head.htype != domain_to_htype(domain)) {
|
||||
return WM_operator_props_popup(C, op, event);
|
||||
}
|
||||
|
||||
const StringRefNull prop_name = rna_property_name_for_type(data_type);
|
||||
const CPPType &type = *bke::custom_data_type_to_cpp_type(data_type);
|
||||
const GPointer active_value(type, POINTER_OFFSET(active_elem->head.data, layer->offset));
|
||||
|
||||
PropertyRNA *prop = RNA_struct_find_property(op->ptr, prop_name.c_str());
|
||||
if (!RNA_property_is_set(op->ptr, prop)) {
|
||||
switch (data_type) {
|
||||
case CD_PROP_FLOAT:
|
||||
RNA_property_float_set(op->ptr, prop, *active_value.get<float>());
|
||||
break;
|
||||
case CD_PROP_FLOAT2:
|
||||
RNA_property_float_set_array(op->ptr, prop, *active_value.get<float2>());
|
||||
break;
|
||||
case CD_PROP_FLOAT3:
|
||||
RNA_property_float_set_array(op->ptr, prop, *active_value.get<float3>());
|
||||
break;
|
||||
case CD_PROP_BYTE_COLOR:
|
||||
RNA_property_float_set_array(op->ptr, prop, active_value.get<ColorGeometry4b>()->decode());
|
||||
break;
|
||||
case CD_PROP_COLOR:
|
||||
RNA_property_float_set_array(op->ptr, prop, *active_value.get<ColorGeometry4f>());
|
||||
break;
|
||||
case CD_PROP_BOOL:
|
||||
RNA_property_boolean_set(op->ptr, prop, *active_value.get<bool>());
|
||||
break;
|
||||
case CD_PROP_INT8:
|
||||
RNA_property_int_set(op->ptr, prop, *active_value.get<int8_t>());
|
||||
break;
|
||||
case CD_PROP_INT32:
|
||||
RNA_property_int_set(op->ptr, prop, *active_value.get<int32_t>());
|
||||
break;
|
||||
default:
|
||||
BLI_assert_unreachable();
|
||||
}
|
||||
}
|
||||
|
||||
return WM_operator_props_popup(C, op, event);
|
||||
}
|
||||
|
||||
static void mesh_set_attribute_ui(bContext *C, wmOperator *op)
|
||||
{
|
||||
uiLayout *layout = uiLayoutColumn(op->layout, true);
|
||||
uiLayoutSetPropSep(layout, true);
|
||||
uiLayoutSetPropDecorate(layout, false);
|
||||
|
||||
Mesh *mesh = ED_mesh_context(C);
|
||||
CustomDataLayer *active_attribute = BKE_id_attributes_active_get(&mesh->id);
|
||||
const eCustomDataType active_type = eCustomDataType(active_attribute->type);
|
||||
const StringRefNull prop_name = rna_property_name_for_type(active_type);
|
||||
const char *name = active_attribute->name;
|
||||
uiItemR(layout, op->ptr, prop_name.c_str(), 0, name, ICON_NONE);
|
||||
}
|
||||
|
||||
} // namespace set_attribute
|
||||
|
||||
} // namespace blender::ed::mesh
|
||||
|
||||
void MESH_OT_attribute_set(wmOperatorType *ot)
|
||||
{
|
||||
using namespace blender::ed::mesh;
|
||||
using namespace blender::ed::mesh::set_attribute;
|
||||
ot->name = "Set Attribute";
|
||||
ot->description = "Set values of the active attribute for selected elements";
|
||||
ot->idname = "MESH_OT_attribute_set";
|
||||
|
||||
ot->exec = mesh_set_attribute_exec;
|
||||
ot->invoke = mesh_set_attribute_invoke;
|
||||
ot->poll = mesh_active_attribute_poll;
|
||||
ot->ui = mesh_set_attribute_ui;
|
||||
|
||||
ot->flag = OPTYPE_REGISTER | OPTYPE_UNDO;
|
||||
|
||||
static blender::float4 color_default(1);
|
||||
|
||||
RNA_def_float(ot->srna, "value_float", 0.0f, -FLT_MAX, FLT_MAX, "Value", "", -FLT_MAX, FLT_MAX);
|
||||
RNA_def_float_array(ot->srna,
|
||||
"value_float_vector_2d",
|
||||
2,
|
||||
nullptr,
|
||||
-FLT_MAX,
|
||||
FLT_MAX,
|
||||
"Value",
|
||||
"",
|
||||
-FLT_MAX,
|
||||
FLT_MAX);
|
||||
RNA_def_float_array(ot->srna,
|
||||
"value_float_vector_3d",
|
||||
3,
|
||||
nullptr,
|
||||
-FLT_MAX,
|
||||
FLT_MAX,
|
||||
"Value",
|
||||
"",
|
||||
-FLT_MAX,
|
||||
FLT_MAX);
|
||||
RNA_def_int(ot->srna, "value_int", 0, INT_MIN, INT_MAX, "Value", "", INT_MIN, INT_MAX);
|
||||
RNA_def_float_color(
|
||||
ot->srna, "value_color", 4, color_default, -FLT_MAX, FLT_MAX, "Value", "", 0.0f, 1.0f);
|
||||
RNA_def_boolean(ot->srna, "value_bool", false, "Value", "");
|
||||
}
|
||||
|
||||
/** \} */
|
|
@ -120,6 +120,10 @@ void MESH_OT_primitive_ico_sphere_add(struct wmOperatorType *ot);
|
|||
|
||||
void MESH_OT_primitive_cube_add_gizmo(struct wmOperatorType *ot);
|
||||
|
||||
/* *** editmesh_attribute.cc *** */
|
||||
|
||||
void MESH_OT_attribute_set(struct wmOperatorType *ot);
|
||||
|
||||
/* *** editmesh_bevel.c *** */
|
||||
|
||||
void MESH_OT_bevel(struct wmOperatorType *ot);
|
||||
|
|
|
@ -54,6 +54,8 @@ void ED_operatortypes_mesh(void)
|
|||
|
||||
WM_operatortype_append(MESH_OT_primitive_cube_add_gizmo);
|
||||
|
||||
WM_operatortype_append(MESH_OT_attribute_set);
|
||||
|
||||
WM_operatortype_append(MESH_OT_duplicate);
|
||||
WM_operatortype_append(MESH_OT_remove_doubles);
|
||||
WM_operatortype_append(MESH_OT_spin);
|
||||
|
|
|
@ -2715,7 +2715,7 @@ void SCULPT_brush_strength_color(SculptSession *ss,
|
|||
|
||||
void SCULPT_calc_vertex_displacement(SculptSession *ss,
|
||||
const Brush *brush,
|
||||
float rgba[4],
|
||||
float rgba[3],
|
||||
float out_offset[3])
|
||||
{
|
||||
mul_v3_fl(rgba, ss->cache->bstrength);
|
||||
|
|
|
@ -254,9 +254,20 @@ void GPU_materials_free(struct Main *bmain);
|
|||
|
||||
struct Scene *GPU_material_scene(GPUMaterial *material);
|
||||
struct GPUPass *GPU_material_get_pass(GPUMaterial *material);
|
||||
/* Return the most optimal shader configuration for the given material .*/
|
||||
struct GPUShader *GPU_material_get_shader(GPUMaterial *material);
|
||||
/* Return the base un-optimized shader. */
|
||||
struct GPUShader *GPU_material_get_shader_base(GPUMaterial *material);
|
||||
const char *GPU_material_get_name(GPUMaterial *material);
|
||||
|
||||
/**
|
||||
* Material Optimization.
|
||||
* \note Compiles optimal version of shader graph, populating mat->optimized_pass.
|
||||
* This operation should always be deferred until existing compilations have completed.
|
||||
* Default un-optimized materials will still exist for interactive material editing performance.
|
||||
*/
|
||||
void GPU_material_optimize(GPUMaterial *mat);
|
||||
|
||||
/**
|
||||
* Return can be NULL if it's a world material.
|
||||
*/
|
||||
|
@ -274,6 +285,24 @@ eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat
|
|||
void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status);
|
||||
bool GPU_material_optimization_ready(GPUMaterial *mat);
|
||||
|
||||
/**
|
||||
* Store reference to a similar default material for async PSO cache warming.
|
||||
*
|
||||
* This function expects `material` to have not yet been compiled and for `default_material` to be
|
||||
* ready. When compiling `material` as part of an async shader compilation job, use existing PSO
|
||||
* descriptors from `default_material`'s shader to also compile PSOs for this new material
|
||||
* asynchronously, rather than at runtime.
|
||||
*
|
||||
* The default_material `options` should match this new materials options in order
|
||||
* for PSO descriptors to match those needed by the new `material`.
|
||||
*
|
||||
* NOTE: `default_material` must exist when `GPU_material_compile(..)` is called for
|
||||
* `material`.
|
||||
*
|
||||
* See `GPU_shader_warm_cache(..)` for more information.
|
||||
*/
|
||||
void GPU_material_set_default(GPUMaterial *material, GPUMaterial *default_material);
|
||||
|
||||
struct GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material);
|
||||
/**
|
||||
* Create dynamic UBO from parameters
|
||||
|
|
|
@ -217,6 +217,48 @@ GPUShader *GPU_shader_create_ex(const char *vertcode,
|
|||
bool GPU_shader_transform_feedback_enable(GPUShader *shader, struct GPUVertBuf *vertbuf);
|
||||
void GPU_shader_transform_feedback_disable(GPUShader *shader);
|
||||
|
||||
/**
|
||||
* Shader cache warming.
|
||||
* For each shader, rendering APIs perform a two-step compilation:
|
||||
*
|
||||
* * The first stage is Front-End compilation which only needs to be performed once, and generates
|
||||
* a portable intermediate representation. This happens during `gpu::Shader::finalize()`.
|
||||
*
|
||||
* * The second is Back-End compilation which compiles a device-specific executable shader
|
||||
* program. This compilation requires some contextual pipeline state which is baked into the
|
||||
* executable shader source, producing a Pipeline State Object (PSO). In OpenGL, backend
|
||||
* compilation happens in the background, within the driver, but can still incur runtime stutters.
|
||||
* In Metal/Vulkan, PSOs are compiled explicitly. These are currently resolved within the backend
|
||||
* based on the current pipeline state and can incur runtime stalls when they occur.
|
||||
*
|
||||
* Shader Cache warming uses the specified parent shader set using `GPU_shader_set_parent(..)` as a
|
||||
* template reference for pre-compiling Render Pipeline State Objects (PSOs) outside of the main
|
||||
* render pipeline.
|
||||
*
|
||||
* PSOs require descriptors containing information on the render state for a given shader, which
|
||||
* includes input vertex data layout and output pixel formats, along with some state such as
|
||||
* blend mode and color output masks. As this state information is usually consistent between
|
||||
* similar draws, we can assign a parent shader and use this shader's cached pipeline state's to
|
||||
* prime compilations.
|
||||
*
|
||||
* Shaders do not necessarily have to be similar in functionality to be used as a parent, so long
|
||||
* as the #GPUVertFormt and #GPUFrameBuffer which they are used with remain the same.
|
||||
* Other bindings such as textures, uniforms and UBOs are all assigned independently as dynamic
|
||||
* state.
|
||||
*
|
||||
* This function should be called asynchronously, mitigating the impact of run-time stuttering from
|
||||
* dynamic compilation of PSOs during normal rendering.
|
||||
*
|
||||
* \param: shader: The shader whose cache to warm.
|
||||
* \param limit: The maximum number of PSOs to compile within a call. Specifying
|
||||
* a limit <= 0 will compile a PSO for all cached PSOs in the parent shader. */
|
||||
void GPU_shader_warm_cache(GPUShader *shader, int limit);
|
||||
|
||||
/* We expect the parent shader to be compiled and already have some cached PSOs when being assigned
|
||||
* as a reference. Ensure the parent shader still exists when `GPU_shader_cache_warm(..)` is
|
||||
* called. */
|
||||
void GPU_shader_set_parent(GPUShader *shader, GPUShader *parent);
|
||||
|
||||
/** DEPRECATED: Kept only because of BGL API. */
|
||||
int GPU_shader_get_program(GPUShader *shader);
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "BKE_material.h"
|
||||
|
||||
#include "GPU_capabilities.h"
|
||||
#include "GPU_context.h"
|
||||
#include "GPU_material.h"
|
||||
#include "GPU_shader.h"
|
||||
#include "GPU_uniform_buffer.h"
|
||||
|
@ -95,6 +96,9 @@ struct GPUPass {
|
|||
uint32_t hash;
|
||||
/** Did we already tried to compile the attached GPUShader. */
|
||||
bool compiled;
|
||||
/** Hint that an optimized variant of this pass should be created based on a complexity heuristic
|
||||
* during pass code generation. */
|
||||
bool should_optimize;
|
||||
};
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
|
@ -252,6 +256,11 @@ class GPUCodegen {
|
|||
ListBase ubo_inputs_ = {nullptr, nullptr};
|
||||
GPUInput *cryptomatte_input_ = nullptr;
|
||||
|
||||
/** Cache parameters for complexity heuristic. */
|
||||
uint nodes_total_ = 0;
|
||||
uint textures_total_ = 0;
|
||||
uint uniforms_total_ = 0;
|
||||
|
||||
public:
|
||||
GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_) : mat(*mat_), graph(*graph_)
|
||||
{
|
||||
|
@ -292,6 +301,17 @@ class GPUCodegen {
|
|||
return hash_;
|
||||
}
|
||||
|
||||
/* Heuristic determined during pass codegen for whether a
|
||||
* more optimal variant of this material should be compiled. */
|
||||
bool should_optimize_heuristic() const
|
||||
{
|
||||
/* If each of the maximal attributes are exceeded, we can optimize, but we should also ensure
|
||||
* the baseline is met.*/
|
||||
bool do_optimize = (nodes_total_ >= 60 || textures_total_ >= 4 || uniforms_total_ >= 64) &&
|
||||
(textures_total_ >= 1 && uniforms_total_ >= 8 && nodes_total_ >= 4);
|
||||
return do_optimize;
|
||||
}
|
||||
|
||||
private:
|
||||
void set_unique_ids();
|
||||
|
||||
|
@ -413,6 +433,9 @@ void GPUCodegen::generate_resources()
|
|||
}
|
||||
}
|
||||
|
||||
/* Increment heuristic. */
|
||||
textures_total_ = slot;
|
||||
|
||||
if (!BLI_listbase_is_empty(&ubo_inputs_)) {
|
||||
/* NOTE: generate_uniform_buffer() should have sorted the inputs before this. */
|
||||
ss << "struct NodeTree {\n";
|
||||
|
@ -454,11 +477,16 @@ void GPUCodegen::generate_library()
|
|||
GPUCodegenCreateInfo &info = *create_info;
|
||||
|
||||
void *value;
|
||||
GSetIterState pop_state = {};
|
||||
while (BLI_gset_pop(graph.used_libraries, &pop_state, &value)) {
|
||||
/* Iterate over libraries. We need to keep this struct intact in case
|
||||
* it is required for the optimization pass. */
|
||||
GHashIterator *ihash = BLI_ghashIterator_new((GHash *)graph.used_libraries);
|
||||
while (!BLI_ghashIterator_done(ihash)) {
|
||||
value = BLI_ghashIterator_getKey(ihash);
|
||||
auto deps = gpu_shader_dependency_get_resolved_source((const char *)value);
|
||||
info.dependencies_generated.extend_non_duplicates(deps);
|
||||
BLI_ghashIterator_step(ihash);
|
||||
}
|
||||
BLI_ghashIterator_free(ihash);
|
||||
}
|
||||
|
||||
void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
|
||||
|
@ -526,6 +554,9 @@ void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
|
|||
}
|
||||
}
|
||||
eval_ss << ");\n\n";
|
||||
|
||||
/* Increment heuristic. */
|
||||
nodes_total_++;
|
||||
}
|
||||
|
||||
char *GPUCodegen::graph_serialize(eGPUNodeTag tree_tag, GPUNodeLink *output_link)
|
||||
|
@ -589,6 +620,7 @@ void GPUCodegen::generate_uniform_buffer()
|
|||
if (input->source == GPU_SOURCE_UNIFORM && !input->link) {
|
||||
/* We handle the UBO uniforms separately. */
|
||||
BLI_addtail(&ubo_inputs_, BLI_genericNodeN(input));
|
||||
uniforms_total_++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -661,10 +693,17 @@ void GPUCodegen::generate_graphs()
|
|||
GPUPass *GPU_generate_pass(GPUMaterial *material,
|
||||
GPUNodeGraph *graph,
|
||||
GPUCodegenCallbackFn finalize_source_cb,
|
||||
void *thunk)
|
||||
void *thunk,
|
||||
bool optimize_graph)
|
||||
{
|
||||
gpu_node_graph_prune_unused(graph);
|
||||
|
||||
/* If Optimize flag is passed in, we are generating an optimized
|
||||
* variant of the GPUMaterial's GPUPass. */
|
||||
if (optimize_graph) {
|
||||
gpu_node_graph_optimize(graph);
|
||||
}
|
||||
|
||||
/* Extract attributes before compiling so the generated VBOs are ready to accept the future
|
||||
* shader. */
|
||||
gpu_node_graph_finalize_uniform_attrs(graph);
|
||||
|
@ -672,23 +711,33 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
|
|||
GPUCodegen codegen(material, graph);
|
||||
codegen.generate_graphs();
|
||||
codegen.generate_cryptomatte();
|
||||
codegen.generate_uniform_buffer();
|
||||
|
||||
/* Cache lookup: Reuse shaders already compiled. */
|
||||
GPUPass *pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
|
||||
GPUPass *pass_hash = nullptr;
|
||||
|
||||
/* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
|
||||
* there is no way to have a collision currently. Some advocated to only use a bigger hash. */
|
||||
if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
|
||||
if (!gpu_pass_is_valid(pass_hash)) {
|
||||
/* Shader has already been created but failed to compile. */
|
||||
return nullptr;
|
||||
if (!optimize_graph) {
|
||||
/* The optimized version of the shader should not re-generate a UBO.
|
||||
* The UBO will not be used for this variant. */
|
||||
codegen.generate_uniform_buffer();
|
||||
|
||||
/** Cache lookup: Reuse shaders already compiled.
|
||||
* NOTE: We only perform cache look-up for non-optimized shader
|
||||
* graphs, as baked constant data among other optimizations will generate too many
|
||||
* shader source permutations, with minimal re-usability. */
|
||||
pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
|
||||
|
||||
/* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
|
||||
* there is no way to have a collision currently. Some advocated to only use a bigger hash. */
|
||||
if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
|
||||
if (!gpu_pass_is_valid(pass_hash)) {
|
||||
/* Shader has already been created but failed to compile. */
|
||||
return nullptr;
|
||||
}
|
||||
/* No collision, just return the pass. */
|
||||
BLI_spin_lock(&pass_cache_spin);
|
||||
pass_hash->refcount += 1;
|
||||
BLI_spin_unlock(&pass_cache_spin);
|
||||
return pass_hash;
|
||||
}
|
||||
/* No collision, just return the pass. */
|
||||
BLI_spin_lock(&pass_cache_spin);
|
||||
pass_hash->refcount += 1;
|
||||
BLI_spin_unlock(&pass_cache_spin);
|
||||
return pass_hash;
|
||||
}
|
||||
|
||||
/* Either the shader is not compiled or there is a hash collision...
|
||||
|
@ -726,14 +775,33 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
|
|||
pass->create_info = codegen.create_info;
|
||||
pass->hash = codegen.hash_get();
|
||||
pass->compiled = false;
|
||||
/* Only flag pass optimization hint if this is the first generated pass for a material.
|
||||
* Optimized passes cannot be optimized further, even if the heuristic is still not
|
||||
* favorable. */
|
||||
pass->should_optimize = (!optimize_graph) && codegen.should_optimize_heuristic();
|
||||
|
||||
codegen.create_info = nullptr;
|
||||
|
||||
gpu_pass_cache_insert_after(pass_hash, pass);
|
||||
/* Only insert non-optimized graphs into cache.
|
||||
* Optimized graphs will continuously be recompiled with new unique source during material
|
||||
* editing, and thus causing the cache to fill up quickly with materials offering minimal
|
||||
* re-use. */
|
||||
if (!optimize_graph) {
|
||||
gpu_pass_cache_insert_after(pass_hash, pass);
|
||||
}
|
||||
}
|
||||
return pass;
|
||||
}
|
||||
|
||||
bool GPU_pass_should_optimize(GPUPass *pass)
|
||||
{
|
||||
/* Returns optimization heuristic prepared during
|
||||
* initial codegen.
|
||||
* NOTE: Optimization currently limited to Metal backend as repeated compilations required for
|
||||
* material specialization cause impactful CPU stalls on OpenGL platforms. */
|
||||
return (GPU_backend_get_type() == GPU_BACKEND_METAL) && pass->should_optimize;
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
|
|
|
@ -25,10 +25,12 @@ typedef struct GPUPass GPUPass;
|
|||
GPUPass *GPU_generate_pass(GPUMaterial *material,
|
||||
struct GPUNodeGraph *graph,
|
||||
GPUCodegenCallbackFn finalize_source_cb,
|
||||
void *thunk);
|
||||
void *thunk,
|
||||
bool optimize_graph);
|
||||
GPUShader *GPU_pass_shader_get(GPUPass *pass);
|
||||
bool GPU_pass_compile(GPUPass *pass, const char *shname);
|
||||
void GPU_pass_release(GPUPass *pass);
|
||||
bool GPU_pass_should_optimize(GPUPass *pass);
|
||||
|
||||
/* Module */
|
||||
|
||||
|
|
|
@ -34,6 +34,8 @@
|
|||
|
||||
#include "DRW_engine.h"
|
||||
|
||||
#include "PIL_time.h"
|
||||
|
||||
#include "gpu_codegen.h"
|
||||
#include "gpu_node_graph.h"
|
||||
|
||||
|
@ -43,6 +45,17 @@
|
|||
#define MAX_COLOR_BAND 128
|
||||
#define MAX_GPU_SKIES 8
|
||||
|
||||
/** Whether the optimized variant of the GPUPass should be created asynchronously.
|
||||
* Usage of this depends on whether there are possible threading challenges of doing so.
|
||||
* Currently, the overhead of GPU_generate_pass is relatively small in comparison to shader
|
||||
* compilation, though this option exists in case any potential scenarios for material graph
|
||||
* optimization cause a slow down on the main thread.
|
||||
*
|
||||
* NOTE: The actual shader program for the optimized pass will always be compiled asynchronously,
|
||||
* this flag controls whether shader node graph source serialization happens on the compilation
|
||||
* worker thread as well. */
|
||||
#define ASYNC_OPTIMIZED_PASS_CREATION 0
|
||||
|
||||
typedef struct GPUColorBandBuilder {
|
||||
float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4];
|
||||
int current_layer;
|
||||
|
@ -57,6 +70,27 @@ struct GPUMaterial {
|
|||
/* Contains #GPUShader and source code for deferred compilation.
|
||||
* Can be shared between similar material (i.e: sharing same node-tree topology). */
|
||||
GPUPass *pass;
|
||||
/* Optimized GPUPass, situationally compiled after initial pass for optimal realtime performance.
|
||||
* This shader variant bakes dynamic uniform data as constant. This variant will not use
|
||||
* the ubo, and instead bake constants directly into the shader source. */
|
||||
GPUPass *optimized_pass;
|
||||
/* Optimization status.
|
||||
* We also use this status to determine whether this material should be considered for
|
||||
* optimization. Only sufficiently complex shaders benefit from constant-folding optimizations.
|
||||
* `GPU_MAT_OPTIMIZATION_READY` -> shader should be optimized and is ready for optimization.
|
||||
* `GPU_MAT_OPTIMIZATION_SKIP` -> Shader should not be optimized as it would not benefit
|
||||
* performance to do so, based on the heuristic.
|
||||
*/
|
||||
eGPUMaterialOptimizationStatus optimization_status;
|
||||
double creation_time;
|
||||
#if ASYNC_OPTIMIZED_PASS_CREATION == 1
|
||||
struct DeferredOptimizePass {
|
||||
GPUCodegenCallbackFn callback;
|
||||
void *thunk;
|
||||
} DeferredOptimizePass;
|
||||
struct DeferredOptimizePass optimize_pass_info;
|
||||
#endif
|
||||
|
||||
/** UBOs for this material parameters. */
|
||||
GPUUniformBuf *ubo;
|
||||
/** Compilation status. Do not use if shader is not GPU_MAT_SUCCESS. */
|
||||
|
@ -86,6 +120,12 @@ struct GPUMaterial {
|
|||
/* Low level node graph(s). Also contains resources needed by the material. */
|
||||
GPUNodeGraph graph;
|
||||
|
||||
/** Default material reference used for PSO cache warming. Default materials may perform
|
||||
* different operations, but the permutation will frequently share the same input PSO
|
||||
* descriptors. This enables asynchronous PSO compilation as part of the deferred compilation
|
||||
* pass, reducing runtime stuttering and responsiveness while compiling materials. */
|
||||
GPUMaterial *default_mat;
|
||||
|
||||
/** DEPRECATED: To remove. */
|
||||
bool has_surface_output;
|
||||
bool has_volume_output;
|
||||
|
@ -214,6 +254,9 @@ void GPU_material_free_single(GPUMaterial *material)
|
|||
|
||||
gpu_node_graph_free(&material->graph);
|
||||
|
||||
if (material->optimized_pass != NULL) {
|
||||
GPU_pass_release(material->optimized_pass);
|
||||
}
|
||||
if (material->pass != NULL) {
|
||||
GPU_pass_release(material->pass);
|
||||
}
|
||||
|
@ -252,12 +295,29 @@ Scene *GPU_material_scene(GPUMaterial *material)
|
|||
|
||||
GPUPass *GPU_material_get_pass(GPUMaterial *material)
|
||||
{
|
||||
return material->pass;
|
||||
/* If an optimized pass variant is available, and optimization is
|
||||
* flagged as complete, we use this one instead. */
|
||||
return ((GPU_material_optimization_status(material) == GPU_MAT_OPTIMIZATION_SUCCESS) &&
|
||||
material->optimized_pass) ?
|
||||
material->optimized_pass :
|
||||
material->pass;
|
||||
}
|
||||
|
||||
GPUShader *GPU_material_get_shader(GPUMaterial *material)
|
||||
{
|
||||
return material->pass ? GPU_pass_shader_get(material->pass) : NULL;
|
||||
/* If an optimized material shader variant is available, and optimization is
|
||||
* flagged as complete, we use this one instead. */
|
||||
GPUShader *shader = ((GPU_material_optimization_status(material) ==
|
||||
GPU_MAT_OPTIMIZATION_SUCCESS) &&
|
||||
material->optimized_pass) ?
|
||||
GPU_pass_shader_get(material->optimized_pass) :
|
||||
NULL;
|
||||
return (shader) ? shader : ((material->pass) ? GPU_pass_shader_get(material->pass) : NULL);
|
||||
}
|
||||
|
||||
GPUShader *GPU_material_get_shader_base(GPUMaterial *material)
|
||||
{
|
||||
return (material->pass) ? GPU_pass_shader_get(material->pass) : NULL;
|
||||
}
|
||||
|
||||
const char *GPU_material_get_name(GPUMaterial *material)
|
||||
|
@ -665,6 +725,41 @@ void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status)
|
|||
mat->status = status;
|
||||
}
|
||||
|
||||
eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat)
|
||||
{
|
||||
return mat->optimization_status;
|
||||
}
|
||||
|
||||
void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status)
|
||||
{
|
||||
mat->optimization_status = status;
|
||||
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
|
||||
/* Reset creation timer to delay optimization pass. */
|
||||
mat->creation_time = PIL_check_seconds_timer();
|
||||
}
|
||||
}
|
||||
|
||||
bool GPU_material_optimization_ready(GPUMaterial *mat)
|
||||
{
|
||||
/* Timer threshold before optimizations will be queued.
|
||||
* When materials are frequently being modified, optimization
|
||||
* can incur CPU overhead from excessive compilation.
|
||||
*
|
||||
* As the optimization is entirely asynchronous, it is still beneficial
|
||||
* to do this quickly to avoid build-up and improve runtime performance.
|
||||
* The threshold just prevents compilations being queued frame after frame. */
|
||||
const double optimization_time_threshold_s = 1.2;
|
||||
return ((PIL_check_seconds_timer() - mat->creation_time) >= optimization_time_threshold_s);
|
||||
}
|
||||
|
||||
void GPU_material_set_default(GPUMaterial *material, GPUMaterial *default_material)
|
||||
{
|
||||
BLI_assert(material != default_material);
|
||||
if (material != default_material) {
|
||||
material->default_mat = default_material;
|
||||
}
|
||||
}
|
||||
|
||||
/* Code generation */
|
||||
|
||||
bool GPU_material_has_surface_output(GPUMaterial *mat)
|
||||
|
@ -730,6 +825,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
|
|||
mat->uuid = shader_uuid;
|
||||
mat->flag = GPU_MATFLAG_UPDATED;
|
||||
mat->status = GPU_MAT_CREATED;
|
||||
mat->default_mat = NULL;
|
||||
mat->is_volume_shader = is_volume_shader;
|
||||
mat->graph.used_libraries = BLI_gset_new(
|
||||
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
|
||||
|
@ -748,7 +844,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
|
|||
|
||||
{
|
||||
/* Create source code and search pass cache for an already compiled version. */
|
||||
mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk);
|
||||
mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, false);
|
||||
|
||||
if (mat->pass == NULL) {
|
||||
/* We had a cache hit and the shader has already failed to compile. */
|
||||
|
@ -756,11 +852,44 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
|
|||
gpu_node_graph_free(&mat->graph);
|
||||
}
|
||||
else {
|
||||
/* Determine whether we should generate an optimized variant of the graph.
|
||||
* Heuristic is based on complexity of default material pass and shader node graph. */
|
||||
if (GPU_pass_should_optimize(mat->pass)) {
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
|
||||
}
|
||||
|
||||
GPUShader *sh = GPU_pass_shader_get(mat->pass);
|
||||
if (sh != NULL) {
|
||||
/* We had a cache hit and the shader is already compiled. */
|
||||
mat->status = GPU_MAT_SUCCESS;
|
||||
gpu_node_graph_free_nodes(&mat->graph);
|
||||
|
||||
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
|
||||
gpu_node_graph_free_nodes(&mat->graph);
|
||||
}
|
||||
}
|
||||
|
||||
/* Generate optimized pass. */
|
||||
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
|
||||
#if ASYNC_OPTIMIZED_PASS_CREATION == 1
|
||||
mat->optimized_pass = NULL;
|
||||
mat->optimize_pass_info.callback = callback;
|
||||
mat->optimize_pass_info.thunk = thunk;
|
||||
#else
|
||||
mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, true);
|
||||
if (mat->optimized_pass == NULL) {
|
||||
/* Failed to create optimized pass. */
|
||||
gpu_node_graph_free_nodes(&mat->graph);
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
|
||||
}
|
||||
else {
|
||||
GPUShader *optimized_sh = GPU_pass_shader_get(mat->optimized_pass);
|
||||
if (optimized_sh != NULL) {
|
||||
/* Optimized shader already available. */
|
||||
gpu_node_graph_free_nodes(&mat->graph);
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -810,8 +939,37 @@ void GPU_material_compile(GPUMaterial *mat)
|
|||
if (success) {
|
||||
GPUShader *sh = GPU_pass_shader_get(mat->pass);
|
||||
if (sh != NULL) {
|
||||
|
||||
/** Perform async Render Pipeline State Object (PSO) compilation.
|
||||
*
|
||||
* Warm PSO cache within async compilation thread using default material as source.
|
||||
* GPU_shader_warm_cache(..) performs the API-specific PSO compilation using the assigned
|
||||
* parent shader's cached PSO descriptors as an input.
|
||||
*
|
||||
* This is only applied if the given material has a specified default reference
|
||||
* material available, and the default material is already compiled.
|
||||
*
|
||||
* As PSOs do not always match for default shaders, we limit warming for PSO
|
||||
* configurations to ensure compile time remains fast, as these first
|
||||
* entries will be the most commonly used PSOs. As not all PSOs are necessarily
|
||||
* required immediately, this limit should remain low (1-3 at most).
|
||||
* */
|
||||
if (mat->default_mat != NULL && mat->default_mat != mat) {
|
||||
if (mat->default_mat->pass != NULL) {
|
||||
GPUShader *parent_sh = GPU_pass_shader_get(mat->default_mat->pass);
|
||||
if (parent_sh) {
|
||||
GPU_shader_set_parent(sh, parent_sh);
|
||||
GPU_shader_warm_cache(sh, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Flag success. */
|
||||
mat->status = GPU_MAT_SUCCESS;
|
||||
gpu_node_graph_free_nodes(&mat->graph);
|
||||
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
|
||||
/* Only free node graph nodes if not required by secondary optimization pass. */
|
||||
gpu_node_graph_free_nodes(&mat->graph);
|
||||
}
|
||||
}
|
||||
else {
|
||||
mat->status = GPU_MAT_FAILED;
|
||||
|
@ -825,6 +983,89 @@ void GPU_material_compile(GPUMaterial *mat)
|
|||
}
|
||||
}
|
||||
|
||||
void GPU_material_optimize(GPUMaterial *mat)
|
||||
{
|
||||
/* If shader is flagged for skipping optimization or has already been successfully
|
||||
* optimized, skip. */
|
||||
if (ELEM(mat->optimization_status, GPU_MAT_OPTIMIZATION_SKIP, GPU_MAT_OPTIMIZATION_SUCCESS)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* If original shader has not been fully compiled, we are not
|
||||
* ready to perform optimization. */
|
||||
if (mat->status != GPU_MAT_SUCCESS) {
|
||||
/* Reset optimization status. */
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
|
||||
return;
|
||||
}
|
||||
|
||||
#if ASYNC_OPTIMIZED_PASS_CREATION == 1
|
||||
/* If the optimized pass is not valid, first generate optimized pass.
|
||||
* NOTE(Threading): Need to verify if GPU_generate_pass can cause side-effects, especially when
|
||||
* used with "thunk". So far, this appears to work, and deferring optimized pass creation is more
|
||||
* optimal, as these do not benefit from caching, due to baked constants. However, this could
|
||||
* possibly be cause for concern for certain cases. */
|
||||
if (!mat->optimized_pass) {
|
||||
mat->optimized_pass = GPU_generate_pass(
|
||||
mat, &mat->graph, mat->optimize_pass_info.callback, mat->optimize_pass_info.thunk, true);
|
||||
BLI_assert(mat->optimized_pass);
|
||||
}
|
||||
#else
|
||||
if (!mat->optimized_pass) {
|
||||
/* Optimized pass has not been created, skip future optimization attempts. */
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool success;
|
||||
/* NOTE: The shader may have already been compiled here since we are
|
||||
* sharing GPUShader across GPUMaterials. In this case it's a no-op. */
|
||||
#ifndef NDEBUG
|
||||
success = GPU_pass_compile(mat->optimized_pass, mat->name);
|
||||
#else
|
||||
success = GPU_pass_compile(mat->optimized_pass, __func__);
|
||||
#endif
|
||||
|
||||
if (success) {
|
||||
GPUShader *sh = GPU_pass_shader_get(mat->optimized_pass);
|
||||
if (sh != NULL) {
|
||||
/** Perform async Render Pipeline State Object (PSO) compilation.
|
||||
*
|
||||
* Warm PSO cache within async compilation thread for optimized materials.
|
||||
* This setup assigns the original unoptimized shader as a "parent" shader
|
||||
* for the optimized version. This then allows the associated GPU backend to
|
||||
* compile PSOs within this asynchronous pass, using the identical PSO descriptors of the
|
||||
* parent shader.
|
||||
*
|
||||
* This eliminates all run-time stuttering associated with material optimization and ensures
|
||||
* realtime material editing and animation remains seamless, while retaining optimal realtime
|
||||
* performance. */
|
||||
GPUShader *parent_sh = GPU_pass_shader_get(mat->pass);
|
||||
if (parent_sh) {
|
||||
GPU_shader_set_parent(sh, parent_sh);
|
||||
GPU_shader_warm_cache(sh, -1);
|
||||
}
|
||||
|
||||
/* Mark as complete. */
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
|
||||
}
|
||||
else {
|
||||
/* Optimized pass failed to compile. Disable any future optimization attempts. */
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Optimization pass generation failed. Disable future attempts to optimize. */
|
||||
GPU_pass_release(mat->optimized_pass);
|
||||
mat->optimized_pass = NULL;
|
||||
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
|
||||
}
|
||||
|
||||
/* Release node graph as no longer needed. */
|
||||
gpu_node_graph_free_nodes(&mat->graph);
|
||||
}
|
||||
|
||||
void GPU_materials_free(Main *bmain)
|
||||
{
|
||||
LISTBASE_FOREACH (Material *, ma, &bmain->materials) {
|
||||
|
@ -847,6 +1088,9 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
|
|||
material->graph.used_libraries = BLI_gset_new(
|
||||
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
|
||||
material->refcount = 1;
|
||||
material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
|
||||
material->optimized_pass = NULL;
|
||||
material->default_mat = NULL;
|
||||
|
||||
/* Construct the material graph by adding and linking the necessary GPU material nodes. */
|
||||
construct_function_cb(thunk, material);
|
||||
|
@ -855,7 +1099,9 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
|
|||
gpu_material_ramp_texture_build(material);
|
||||
|
||||
/* Lookup an existing pass in the cache or generate a new one. */
|
||||
material->pass = GPU_generate_pass(material, &material->graph, generate_code_function_cb, thunk);
|
||||
material->pass = GPU_generate_pass(
|
||||
material, &material->graph, generate_code_function_cb, thunk, false);
|
||||
material->optimized_pass = NULL;
|
||||
|
||||
/* The pass already exists in the pass cache but its shader already failed to compile. */
|
||||
if (material->pass == NULL) {
|
||||
|
@ -868,7 +1114,10 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
|
|||
GPUShader *shader = GPU_pass_shader_get(material->pass);
|
||||
if (shader != NULL) {
|
||||
material->status = GPU_MAT_SUCCESS;
|
||||
gpu_node_graph_free_nodes(&material->graph);
|
||||
if (material->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
|
||||
/* Only free node graph if not required by secondary optimization pass. */
|
||||
gpu_node_graph_free_nodes(&material->graph);
|
||||
}
|
||||
return material;
|
||||
}
|
||||
|
||||
|
|
|
@ -983,3 +983,22 @@ void gpu_node_graph_prune_unused(GPUNodeGraph *graph)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gpu_node_graph_optimize(GPUNodeGraph *graph)
|
||||
{
|
||||
/* Replace all uniform node links with constant. */
|
||||
LISTBASE_FOREACH (GPUNode *, node, &graph->nodes) {
|
||||
LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
|
||||
if (input->link) {
|
||||
if (input->link->link_type == GPU_NODE_LINK_UNIFORM) {
|
||||
input->link->link_type = GPU_NODE_LINK_CONSTANT;
|
||||
}
|
||||
}
|
||||
if (input->source == GPU_SOURCE_UNIFORM) {
|
||||
input->source = (input->type == GPU_CLOSURE) ? GPU_SOURCE_STRUCT : GPU_SOURCE_CONSTANT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: Consider performing other node graph optimizations here. */
|
||||
}
|
||||
|
|
|
@ -190,6 +190,19 @@ void gpu_nodes_tag(GPUNodeLink *link, eGPUNodeTag tag);
|
|||
void gpu_node_graph_prune_unused(GPUNodeGraph *graph);
|
||||
void gpu_node_graph_finalize_uniform_attrs(GPUNodeGraph *graph);
|
||||
|
||||
/**
|
||||
* Optimize node graph for optimized material shader path.
|
||||
* Once the base material has been generated, we can modify the shader
|
||||
* node graph to create one which will produce an optimally performing shader.
|
||||
* This currently involves baking uniform data into constant data to enable
|
||||
* aggressive constant folding by the compiler in order to reduce complexity and
|
||||
* shader core memory pressure.
|
||||
*
|
||||
* NOTE: Graph optimizations will produce a shader which needs to be re-compiled
|
||||
* more frequently, however, the default material pass will always exist to fall
|
||||
* back on. */
|
||||
void gpu_node_graph_optimize(GPUNodeGraph *graph);
|
||||
|
||||
/**
|
||||
* Free intermediate node graph.
|
||||
*/
|
||||
|
|
|
@ -500,6 +500,26 @@ const char *GPU_shader_get_name(GPUShader *shader)
|
|||
return unwrap(shader)->name_get();
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Shader cache warming
|
||||
* \{ */
|
||||
|
||||
void GPU_shader_set_parent(GPUShader *shader, GPUShader *parent)
|
||||
{
|
||||
BLI_assert(shader != nullptr);
|
||||
BLI_assert(shader != parent);
|
||||
if (shader != parent) {
|
||||
Shader *shd_child = unwrap(shader);
|
||||
Shader *shd_parent = unwrap(parent);
|
||||
shd_child->parent_set(shd_parent);
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_shader_warm_cache(GPUShader *shader, int limit)
|
||||
{
|
||||
unwrap(shader)->warm_cache(limit);
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
|
|
|
@ -34,6 +34,12 @@ class Shader {
|
|||
/** For debugging purpose. */
|
||||
char name[64];
|
||||
|
||||
/* Parent shader can be used for shaders which are derived from the same source material.
|
||||
* The child shader can pull information from its parent to prepare additional resources
|
||||
* such as PSOs upfront. This enables asynchronous PSO compilation which mitigates stuttering
|
||||
* when updating new materials. */
|
||||
Shader *parent_shader_ = nullptr;
|
||||
|
||||
public:
|
||||
Shader(const char *name);
|
||||
virtual ~Shader();
|
||||
|
@ -43,6 +49,11 @@ class Shader {
|
|||
virtual void fragment_shader_from_glsl(MutableSpan<const char *> sources) = 0;
|
||||
virtual void compute_shader_from_glsl(MutableSpan<const char *> sources) = 0;
|
||||
virtual bool finalize(const shader::ShaderCreateInfo *info = nullptr) = 0;
|
||||
/* Pre-warms PSOs using parent shader's cached PSO descriptors. Limit specifies maximum PSOs to
|
||||
* warm. If -1, compiles all PSO permutations in parent shader.
|
||||
*
|
||||
* See `GPU_shader_warm_cache(..)` in `GPU_shader.h` for more information. */
|
||||
virtual void warm_cache(int limit) = 0;
|
||||
|
||||
virtual void transform_feedback_names_set(Span<const char *> name_list,
|
||||
eGPUShaderTFBType geom_type) = 0;
|
||||
|
@ -69,7 +80,17 @@ class Shader {
|
|||
inline const char *const name_get() const
|
||||
{
|
||||
return name;
|
||||
};
|
||||
}
|
||||
|
||||
inline void parent_set(Shader *parent)
|
||||
{
|
||||
parent_shader_ = parent;
|
||||
}
|
||||
|
||||
inline Shader *parent_get() const
|
||||
{
|
||||
return parent_shader_;
|
||||
}
|
||||
|
||||
static bool srgb_uniform_dirty_get();
|
||||
static void set_srgb_uniform(GPUShader *shader);
|
||||
|
|
|
@ -31,6 +31,14 @@ struct MTLVertexAttributeDescriptorPSO {
|
|||
return uint64_t((uint64_t(this->format) ^ (this->offset << 4) ^ (this->buffer_index << 8) ^
|
||||
(this->format_conversion_mode << 12)));
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
format = MTLVertexFormatInvalid;
|
||||
offset = 0;
|
||||
buffer_index = 0;
|
||||
format_conversion_mode = GPU_FETCH_FLOAT;
|
||||
}
|
||||
};
|
||||
|
||||
struct MTLVertexBufferLayoutDescriptorPSO {
|
||||
|
@ -48,6 +56,13 @@ struct MTLVertexBufferLayoutDescriptorPSO {
|
|||
{
|
||||
return uint64_t(uint64_t(this->step_function) ^ (this->step_rate << 4) ^ (this->stride << 8));
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
step_function = MTLVertexStepFunctionPerVertex;
|
||||
step_rate = 1;
|
||||
stride = 0;
|
||||
}
|
||||
};
|
||||
|
||||
/* SSBO attribute state caching. */
|
||||
|
@ -76,6 +91,16 @@ struct MTLSSBOAttribute {
|
|||
{
|
||||
return (memcmp(this, &other, sizeof(MTLSSBOAttribute)) == 0);
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
mtl_attribute_index = 0;
|
||||
vbo_id = 0;
|
||||
attribute_offset = 0;
|
||||
per_vertex_stride = 0;
|
||||
attribute_format = 0;
|
||||
is_instance = false;
|
||||
}
|
||||
};
|
||||
|
||||
struct MTLVertexDescriptor {
|
||||
|
@ -241,10 +266,10 @@ struct MTLRenderPipelineStateDescriptor {
|
|||
hash ^= uint64_t(this->dest_rgb_blend_factor) << 37; /* Up to 18 (5 bits). */
|
||||
hash ^= uint64_t(this->src_alpha_blend_factor) << 42; /* Up to 18 (5 bits). */
|
||||
hash ^= uint64_t(this->src_rgb_blend_factor) << 47; /* Up to 18 (5 bits). */
|
||||
}
|
||||
|
||||
for (const uint c : IndexRange(GPU_FB_MAX_COLOR_ATTACHMENT)) {
|
||||
hash ^= uint64_t(this->color_attachment_format[c]) << (c + 52); /* Up to 555 (9 bits). */
|
||||
for (const uint c : IndexRange(GPU_FB_MAX_COLOR_ATTACHMENT)) {
|
||||
hash ^= uint64_t(this->color_attachment_format[c]) << (c + 52); /* Up to 555 (9 bits). */
|
||||
}
|
||||
}
|
||||
|
||||
hash |= uint64_t((this->blending_enabled && (this->num_color_attachments > 0)) ? 1 : 0) << 62;
|
||||
|
@ -262,9 +287,9 @@ struct MTLRenderPipelineStateDescriptor {
|
|||
vertex_descriptor.total_attributes = 0;
|
||||
vertex_descriptor.max_attribute_value = 0;
|
||||
vertex_descriptor.num_vert_buffers = 0;
|
||||
vertex_descriptor.prim_topology_class = MTLPrimitiveTopologyClassUnspecified;
|
||||
for (int i = 0; i < GPU_VERT_ATTR_MAX_LEN; i++) {
|
||||
vertex_descriptor.attributes[i].format = MTLVertexFormatInvalid;
|
||||
vertex_descriptor.attributes[i].offset = 0;
|
||||
vertex_descriptor.attributes[i].reset();
|
||||
}
|
||||
vertex_descriptor.uses_ssbo_vertex_fetch = false;
|
||||
vertex_descriptor.num_ssbo_attributes = 0;
|
||||
|
|
|
@ -76,6 +76,8 @@ struct MTLRenderPipelineStateInstance {
|
|||
int null_attribute_buffer_index;
|
||||
/* buffer bind used for transform feedback output buffer. */
|
||||
int transform_feedback_buffer_index;
|
||||
/* Topology class. */
|
||||
MTLPrimitiveTopologyClass prim_type;
|
||||
|
||||
/** Reflection Data.
|
||||
* Currently used to verify whether uniform buffers of incorrect sizes being bound, due to left
|
||||
|
@ -188,6 +190,7 @@ class MTLShader : public Shader {
|
|||
MTLRenderPipelineStateDescriptor current_pipeline_state_;
|
||||
/* Cache of compiled PipelineStateObjects. */
|
||||
blender::Map<MTLRenderPipelineStateDescriptor, MTLRenderPipelineStateInstance *> pso_cache_;
|
||||
std::mutex pso_cache_lock_;
|
||||
|
||||
/** Compute pipeline state and Compute PSO caching. */
|
||||
MTLComputePipelineStateInstance compute_pso_instance_;
|
||||
|
@ -256,6 +259,7 @@ class MTLShader : public Shader {
|
|||
/* Compile and build - Return true if successful. */
|
||||
bool finalize(const shader::ShaderCreateInfo *info = nullptr) override;
|
||||
bool finalize_compute(const shader::ShaderCreateInfo *info);
|
||||
void warm_cache(int limit) override;
|
||||
|
||||
/* Utility. */
|
||||
bool is_valid()
|
||||
|
@ -331,8 +335,14 @@ class MTLShader : public Shader {
|
|||
void shader_source_from_msl(NSString *input_vertex_source, NSString *input_fragment_source);
|
||||
void shader_compute_source_from_msl(NSString *input_compute_source);
|
||||
void set_interface(MTLShaderInterface *interface);
|
||||
|
||||
MTLRenderPipelineStateInstance *bake_current_pipeline_state(MTLContext *ctx,
|
||||
MTLPrimitiveTopologyClass prim_type);
|
||||
MTLRenderPipelineStateInstance *bake_pipeline_state(
|
||||
MTLContext *ctx,
|
||||
MTLPrimitiveTopologyClass prim_type,
|
||||
const MTLRenderPipelineStateDescriptor &pipeline_descriptor);
|
||||
|
||||
bool bake_compute_pipeline_state(MTLContext *ctx);
|
||||
const MTLComputePipelineStateInstance &get_compute_pipeline_state();
|
||||
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
#include "BKE_global.h"
|
||||
|
||||
#include "PIL_time.h"
|
||||
|
||||
#include "BLI_string.h"
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
|
@ -110,6 +112,7 @@ MTLShader::~MTLShader()
|
|||
}
|
||||
|
||||
/* Free Pipeline Cache. */
|
||||
pso_cache_lock_.lock();
|
||||
for (const MTLRenderPipelineStateInstance *pso_inst : pso_cache_.values()) {
|
||||
if (pso_inst->vert) {
|
||||
[pso_inst->vert release];
|
||||
|
@ -123,6 +126,7 @@ MTLShader::~MTLShader()
|
|||
delete pso_inst;
|
||||
}
|
||||
pso_cache_.clear();
|
||||
pso_cache_lock_.unlock();
|
||||
|
||||
/* Free Compute pipeline state object. */
|
||||
if (compute_pso_instance_.compute) {
|
||||
|
@ -616,6 +620,36 @@ void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty)
|
|||
push_constant_modified_ = is_dirty;
|
||||
}
|
||||
|
||||
void MTLShader::warm_cache(int limit)
|
||||
{
|
||||
if (parent_shader_ != nullptr) {
|
||||
MTLContext *ctx = MTLContext::get();
|
||||
MTLShader *parent_mtl = reinterpret_cast<MTLShader *>(parent_shader_);
|
||||
|
||||
/* Extract PSO descriptors from parent shader. */
|
||||
blender::Vector<MTLRenderPipelineStateDescriptor> descriptors;
|
||||
blender::Vector<MTLPrimitiveTopologyClass> prim_classes;
|
||||
|
||||
parent_mtl->pso_cache_lock_.lock();
|
||||
for (const auto &pso_entry : parent_mtl->pso_cache_.items()) {
|
||||
const MTLRenderPipelineStateDescriptor &pso_descriptor = pso_entry.key;
|
||||
const MTLRenderPipelineStateInstance *pso_inst = pso_entry.value;
|
||||
descriptors.append(pso_descriptor);
|
||||
prim_classes.append(pso_inst->prim_type);
|
||||
}
|
||||
parent_mtl->pso_cache_lock_.unlock();
|
||||
|
||||
/* Warm shader cache with applied limit.
|
||||
* If limit is <= 0, compile all PSO permutations. */
|
||||
limit = (limit > 0) ? limit : descriptors.size();
|
||||
for (int i : IndexRange(min_ii(descriptors.size(), limit))) {
|
||||
const MTLRenderPipelineStateDescriptor &pso_descriptor = descriptors[i];
|
||||
const MTLPrimitiveTopologyClass &prim_class = prim_classes[i];
|
||||
bake_pipeline_state(ctx, prim_class, pso_descriptor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
|
@ -681,12 +715,10 @@ void MTLShader::set_interface(MTLShaderInterface *interface)
|
|||
MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
|
||||
MTLContext *ctx, MTLPrimitiveTopologyClass prim_type)
|
||||
{
|
||||
/** Populate global pipeline descriptor and use this to prepare new PSO. */
|
||||
/* NOTE(Metal): PSO cache can be accessed from multiple threads, though these operations should
|
||||
* be thread-safe due to organization of high-level renderer. If there are any issues, then
|
||||
* access can be guarded as appropriate. */
|
||||
BLI_assert(this);
|
||||
MTLShaderInterface *mtl_interface = this->get_interface();
|
||||
BLI_assert(mtl_interface);
|
||||
BLI_assert(this->is_valid());
|
||||
|
||||
/* NOTE(Metal): Vertex input assembly description will have been populated externally
|
||||
|
@ -756,15 +788,32 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
|
|||
pipeline_descriptor.vertex_descriptor.prim_topology_class =
|
||||
(requires_specific_topology_class) ? prim_type : MTLPrimitiveTopologyClassUnspecified;
|
||||
|
||||
/* Bake pipeline state using global descriptor. */
|
||||
return bake_pipeline_state(ctx, prim_type, pipeline_descriptor);
|
||||
}
|
||||
|
||||
/* Variant which bakes a pipeline state based on an an existing MTLRenderPipelineStateDescriptor.
|
||||
* This function should be callable from a secondary compilation thread. */
|
||||
MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
|
||||
MTLContext *ctx,
|
||||
MTLPrimitiveTopologyClass prim_type,
|
||||
const MTLRenderPipelineStateDescriptor &pipeline_descriptor)
|
||||
{
|
||||
/* Fetch shader interface. */
|
||||
MTLShaderInterface *mtl_interface = this->get_interface();
|
||||
BLI_assert(mtl_interface);
|
||||
BLI_assert(this->is_valid());
|
||||
|
||||
/* Check if current PSO exists in the cache. */
|
||||
pso_cache_lock_.lock();
|
||||
MTLRenderPipelineStateInstance **pso_lookup = pso_cache_.lookup_ptr(pipeline_descriptor);
|
||||
MTLRenderPipelineStateInstance *pipeline_state = (pso_lookup) ? *pso_lookup : nullptr;
|
||||
pso_cache_lock_.unlock();
|
||||
|
||||
if (pipeline_state != nullptr) {
|
||||
return pipeline_state;
|
||||
}
|
||||
|
||||
shader_debug_printf("Baking new pipeline variant for shader: %s\n", this->name);
|
||||
|
||||
/* Generate new Render Pipeline State Object (PSO). */
|
||||
@autoreleasepool {
|
||||
/* Prepare Render Pipeline Descriptor. */
|
||||
|
@ -774,7 +823,6 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
|
|||
MTLFunctionConstantValues *values = [[MTLFunctionConstantValues new] autorelease];
|
||||
|
||||
/* Prepare Vertex descriptor based on current pipeline vertex binding state. */
|
||||
MTLRenderPipelineStateDescriptor ¤t_state = pipeline_descriptor;
|
||||
MTLRenderPipelineDescriptor *desc = pso_descriptor_;
|
||||
[desc reset];
|
||||
pso_descriptor_.label = [NSString stringWithUTF8String:this->name];
|
||||
|
@ -784,7 +832,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
|
|||
* specialization constant, customized per unique pipeline state permutation.
|
||||
*
|
||||
* NOTE: For binding point compaction, we could use the number of VBOs present
|
||||
* in the current PSO configuration `current_state.vertex_descriptor.num_vert_buffers`).
|
||||
* in the current PSO configuration `pipeline_descriptors.vertex_descriptor.num_vert_buffers`).
|
||||
* However, it is more efficient to simply offset the uniform buffer base index to the
|
||||
* maximal number of VBO bind-points, as then UBO bind-points for similar draw calls
|
||||
* will align and avoid the requirement for additional binding. */
|
||||
|
@ -792,7 +840,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
|
|||
|
||||
/* Null buffer index is used if an attribute is not found in the
|
||||
* bound VBOs #VertexFormat. */
|
||||
int null_buffer_index = current_state.vertex_descriptor.num_vert_buffers;
|
||||
int null_buffer_index = pipeline_descriptor.vertex_descriptor.num_vert_buffers;
|
||||
bool using_null_buffer = false;
|
||||
|
||||
if (this->get_uses_ssbo_vertex_fetch()) {
|
||||
|
@ -806,11 +854,12 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
|
|||
MTL_uniform_buffer_base_index = MTL_SSBO_VERTEX_FETCH_IBO_INDEX + 1;
|
||||
}
|
||||
else {
|
||||
for (const uint i : IndexRange(current_state.vertex_descriptor.max_attribute_value + 1)) {
|
||||
for (const uint i :
|
||||
IndexRange(pipeline_descriptor.vertex_descriptor.max_attribute_value + 1)) {
|
||||
|
||||
/* Metal back-end attribute descriptor state. */
|
||||
MTLVertexAttributeDescriptorPSO &attribute_desc =
|
||||
current_state.vertex_descriptor.attributes[i];
|
||||
const MTLVertexAttributeDescriptorPSO &attribute_desc =
|
||||
pipeline_descriptor.vertex_descriptor.attributes[i];
|
||||
|
||||
/* Flag format conversion */
|
||||
/* In some cases, Metal cannot implicitly convert between data types.
|
||||
|
@ -860,10 +909,10 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
|
|||
mtl_attribute.bufferIndex = attribute_desc.buffer_index;
|
||||
}
|
||||
|
||||
for (const uint i : IndexRange(current_state.vertex_descriptor.num_vert_buffers)) {
|
||||
for (const uint i : IndexRange(pipeline_descriptor.vertex_descriptor.num_vert_buffers)) {
|
||||
/* Metal back-end state buffer layout. */
|
||||
const MTLVertexBufferLayoutDescriptorPSO &buf_layout =
|
||||
current_state.vertex_descriptor.buffer_layouts[i];
|
||||
pipeline_descriptor.vertex_descriptor.buffer_layouts[i];
|
||||
/* Copy metal back-end buffer layout state into PSO descriptor.
|
||||
* NOTE: need to copy each element due to copying from internal
|
||||
* back-end descriptor to Metal API descriptor. */
|
||||
|
@ -875,7 +924,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
|
|||
}
|
||||
|
||||
/* Mark empty attribute conversion. */
|
||||
for (int i = current_state.vertex_descriptor.max_attribute_value + 1;
|
||||
for (int i = pipeline_descriptor.vertex_descriptor.max_attribute_value + 1;
|
||||
i < GPU_VERT_ATTR_MAX_LEN;
|
||||
i++) {
|
||||
int MTL_attribute_conversion_mode = 0;
|
||||
|
@ -1039,7 +1088,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
|
|||
for (int color_attachment = 0; color_attachment < GPU_FB_MAX_COLOR_ATTACHMENT;
|
||||
color_attachment++) {
|
||||
/* Fetch color attachment pixel format in back-end pipeline state. */
|
||||
MTLPixelFormat pixel_format = current_state.color_attachment_format[color_attachment];
|
||||
MTLPixelFormat pixel_format = pipeline_descriptor.color_attachment_format[color_attachment];
|
||||
/* Populate MTL API PSO attachment descriptor. */
|
||||
MTLRenderPipelineColorAttachmentDescriptor *col_attachment =
|
||||
desc.colorAttachments[color_attachment];
|
||||
|
@ -1048,19 +1097,19 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
|
|||
if (pixel_format != MTLPixelFormatInvalid) {
|
||||
bool format_supports_blending = mtl_format_supports_blending(pixel_format);
|
||||
|
||||
col_attachment.writeMask = current_state.color_write_mask;
|
||||
col_attachment.blendingEnabled = current_state.blending_enabled &&
|
||||
col_attachment.writeMask = pipeline_descriptor.color_write_mask;
|
||||
col_attachment.blendingEnabled = pipeline_descriptor.blending_enabled &&
|
||||
format_supports_blending;
|
||||
if (format_supports_blending && current_state.blending_enabled) {
|
||||
col_attachment.alphaBlendOperation = current_state.alpha_blend_op;
|
||||
col_attachment.rgbBlendOperation = current_state.rgb_blend_op;
|
||||
col_attachment.destinationAlphaBlendFactor = current_state.dest_alpha_blend_factor;
|
||||
col_attachment.destinationRGBBlendFactor = current_state.dest_rgb_blend_factor;
|
||||
col_attachment.sourceAlphaBlendFactor = current_state.src_alpha_blend_factor;
|
||||
col_attachment.sourceRGBBlendFactor = current_state.src_rgb_blend_factor;
|
||||
if (format_supports_blending && pipeline_descriptor.blending_enabled) {
|
||||
col_attachment.alphaBlendOperation = pipeline_descriptor.alpha_blend_op;
|
||||
col_attachment.rgbBlendOperation = pipeline_descriptor.rgb_blend_op;
|
||||
col_attachment.destinationAlphaBlendFactor = pipeline_descriptor.dest_alpha_blend_factor;
|
||||
col_attachment.destinationRGBBlendFactor = pipeline_descriptor.dest_rgb_blend_factor;
|
||||
col_attachment.sourceAlphaBlendFactor = pipeline_descriptor.src_alpha_blend_factor;
|
||||
col_attachment.sourceRGBBlendFactor = pipeline_descriptor.src_rgb_blend_factor;
|
||||
}
|
||||
else {
|
||||
if (current_state.blending_enabled && !format_supports_blending) {
|
||||
if (pipeline_descriptor.blending_enabled && !format_supports_blending) {
|
||||
shader_debug_printf(
|
||||
"[Warning] Attempting to Bake PSO, but MTLPixelFormat %d does not support "
|
||||
"blending\n",
|
||||
|
@ -1069,8 +1118,8 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
|
|||
}
|
||||
}
|
||||
}
|
||||
desc.depthAttachmentPixelFormat = current_state.depth_attachment_format;
|
||||
desc.stencilAttachmentPixelFormat = current_state.stencil_attachment_format;
|
||||
desc.depthAttachmentPixelFormat = pipeline_descriptor.depth_attachment_format;
|
||||
desc.stencilAttachmentPixelFormat = pipeline_descriptor.stencil_attachment_format;
|
||||
|
||||
/* Compile PSO */
|
||||
MTLAutoreleasedRenderPipelineReflection reflection_data;
|
||||
|
@ -1090,7 +1139,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
|
|||
return nullptr;
|
||||
}
|
||||
else {
|
||||
#ifndef NDEBUG
|
||||
#if 0
|
||||
NSLog(@"Successfully compiled PSO for shader: %s (Metal Context: %p)\n", this->name, ctx);
|
||||
#endif
|
||||
}
|
||||
|
@ -1103,7 +1152,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
|
|||
pso_inst->base_uniform_buffer_index = MTL_uniform_buffer_base_index;
|
||||
pso_inst->null_attribute_buffer_index = (using_null_buffer) ? null_buffer_index : -1;
|
||||
pso_inst->transform_feedback_buffer_index = MTL_transform_feedback_buffer_index;
|
||||
pso_inst->shader_pso_index = pso_cache_.size();
|
||||
pso_inst->prim_type = prim_type;
|
||||
|
||||
pso_inst->reflection_data_available = (reflection_data != nil);
|
||||
if (reflection_data != nil) {
|
||||
|
@ -1189,9 +1238,14 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
|
|||
[pso_inst->pso retain];
|
||||
|
||||
/* Insert into pso cache. */
|
||||
pso_cache_lock_.lock();
|
||||
pso_inst->shader_pso_index = pso_cache_.size();
|
||||
pso_cache_.add(pipeline_descriptor, pso_inst);
|
||||
shader_debug_printf("PSO CACHE: Stored new variant in PSO cache for shader '%s'\n",
|
||||
this->name);
|
||||
pso_cache_lock_.unlock();
|
||||
shader_debug_printf(
|
||||
"PSO CACHE: Stored new variant in PSO cache for shader '%s' Hash: '%llu'\n",
|
||||
this->name,
|
||||
pipeline_descriptor.hash());
|
||||
return pso_inst;
|
||||
}
|
||||
}
|
||||
|
@ -1256,7 +1310,7 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
|
|||
return false;
|
||||
}
|
||||
else {
|
||||
#ifndef NDEBUG
|
||||
#if 0
|
||||
NSLog(@"Successfully compiled compute PSO for shader: %s (Metal Context: %p)\n",
|
||||
this->name,
|
||||
ctx);
|
||||
|
|
|
@ -47,6 +47,7 @@ class GLShader : public Shader {
|
|||
void fragment_shader_from_glsl(MutableSpan<const char *> sources) override;
|
||||
void compute_shader_from_glsl(MutableSpan<const char *> sources) override;
|
||||
bool finalize(const shader::ShaderCreateInfo *info = nullptr) override;
|
||||
void warm_cache(int /*limit*/) override{};
|
||||
|
||||
std::string resources_declare(const shader::ShaderCreateInfo &info) const override;
|
||||
std::string vertex_interface_declare(const shader::ShaderCreateInfo &info) const override;
|
||||
|
|
|
@ -35,6 +35,7 @@ class VKShader : public Shader {
|
|||
void fragment_shader_from_glsl(MutableSpan<const char *> sources) override;
|
||||
void compute_shader_from_glsl(MutableSpan<const char *> sources) override;
|
||||
bool finalize(const shader::ShaderCreateInfo *info = nullptr) override;
|
||||
void warm_cache(int limit) override{};
|
||||
|
||||
void transform_feedback_names_set(Span<const char *> name_list,
|
||||
eGPUShaderTFBType geom_type) override;
|
||||
|
|
|
@ -86,6 +86,7 @@ void MeshFromGeometry::fixup_invalid_faces()
|
|||
/* Skip and remove faces that have fewer than 3 corners. */
|
||||
mesh_geometry_.total_loops_ -= curr_face.corner_count_;
|
||||
mesh_geometry_.face_elements_.remove_and_reorder(face_idx);
|
||||
--face_idx;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -128,6 +129,7 @@ void MeshFromGeometry::fixup_invalid_faces()
|
|||
/* Remove the invalid face. */
|
||||
mesh_geometry_.total_loops_ -= curr_face.corner_count_;
|
||||
mesh_geometry_.face_elements_.remove_and_reorder(face_idx);
|
||||
--face_idx;
|
||||
|
||||
Vector<Vector<int>> new_faces = fixup_invalid_polygon(global_vertices_.vertices, face_verts);
|
||||
|
||||
|
|
|
@ -486,6 +486,15 @@ TEST_F(obj_importer_test, import_faces_invalid_or_with_holes)
|
|||
import_and_check("faces_invalid_or_with_holes.obj", expect, std::size(expect), 0);
|
||||
}
|
||||
|
||||
TEST_F(obj_importer_test, import_invalid_faces)
|
||||
{
|
||||
Expectation expect[] = {
|
||||
{"OBCube", OB_MESH, 8, 12, 6, 24, float3(1, 1, -1), float3(-1, 1, 1)},
|
||||
{"OBTheMesh", OB_MESH, 5, 3, 1, 3, float3(-2, 0, -2), float3(0, 2, 0)},
|
||||
};
|
||||
import_and_check("invalid_faces.obj", expect, std::size(expect), 0);
|
||||
}
|
||||
|
||||
TEST_F(obj_importer_test, import_invalid_indices)
|
||||
{
|
||||
Expectation expect[] = {
|
||||
|
|
|
@ -4714,7 +4714,7 @@ static void rna_def_space_view3d_overlay(BlenderRNA *brna)
|
|||
RNA_def_property_range(prop, 0.0f, 1.0f);
|
||||
RNA_def_property_update(prop, NC_SPACE | ND_SPACE_VIEW3D, NULL);
|
||||
|
||||
prop = RNA_def_property(srna, "sculpt_curves_cage", PROP_BOOLEAN, PROP_NONE);
|
||||
prop = RNA_def_property(srna, "show_sculpt_curves_cage", PROP_BOOLEAN, PROP_NONE);
|
||||
RNA_def_property_boolean_sdna(prop, NULL, "overlay.flag", V3D_OVERLAY_SCULPT_CURVES_CAGE);
|
||||
RNA_def_property_ui_text(
|
||||
prop, "Sculpt Curves Cage", "Show original curves that are currently being edited");
|
||||
|
@ -4733,12 +4733,12 @@ static void rna_def_space_view3d_overlay(BlenderRNA *brna)
|
|||
RNA_def_property_range(prop, 0.0f, 1.0f);
|
||||
RNA_def_property_update(prop, NC_SPACE | ND_SPACE_VIEW3D, NULL);
|
||||
|
||||
prop = RNA_def_property(srna, "sculpt_show_mask", PROP_BOOLEAN, PROP_NONE);
|
||||
prop = RNA_def_property(srna, "show_sculpt_mask", PROP_BOOLEAN, PROP_NONE);
|
||||
RNA_def_property_boolean_sdna(prop, NULL, "overlay.flag", V3D_OVERLAY_SCULPT_SHOW_MASK);
|
||||
RNA_def_property_ui_text(prop, "Sculpt Show Mask", "");
|
||||
RNA_def_property_update(prop, NC_SPACE | ND_SPACE_VIEW3D, NULL);
|
||||
|
||||
prop = RNA_def_property(srna, "sculpt_show_face_sets", PROP_BOOLEAN, PROP_NONE);
|
||||
prop = RNA_def_property(srna, "show_sculpt_face_sets", PROP_BOOLEAN, PROP_NONE);
|
||||
RNA_def_property_boolean_sdna(prop, NULL, "overlay.flag", V3D_OVERLAY_SCULPT_SHOW_FACE_SETS);
|
||||
RNA_def_property_ui_text(prop, "Sculpt Show Face Sets", "");
|
||||
RNA_def_property_update(prop, NC_SPACE | ND_SPACE_VIEW3D, NULL);
|
||||
|
|
|
@ -330,7 +330,7 @@ void GeoTreeLog::ensure_used_named_attributes()
|
|||
GeoTreeLog &child_log = modifier_log_->get_tree_log(child_hash);
|
||||
child_log.ensure_used_named_attributes();
|
||||
if (const std::optional<int32_t> &group_node_id = child_log.tree_loggers_[0]->group_node_id) {
|
||||
for (const auto &item : child_log.used_named_attributes.items()) {
|
||||
for (const auto item : child_log.used_named_attributes.items()) {
|
||||
add_attribute(*group_node_id, item.key, item.value);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue