Fix_105606_MetalTextureUploadRegression #1

Closed
Michael Parkin-White wants to merge 17 commits from Fix_105606_MetalTextureUploadRegression into Fix_103605_MetalBarycentrics

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
8 changed files with 51 additions and 34 deletions
Showing only changes of commit 87766429b9 - Show all commits

View File

@ -567,6 +567,10 @@ void MetalDevice::compile_and_load(int device_id, MetalPipelineType pso_type)
thread_scoped_lock lock(existing_devices_mutex); thread_scoped_lock lock(existing_devices_mutex);
if (MetalDevice *instance = get_device_by_ID(device_id, lock)) { if (MetalDevice *instance = get_device_by_ID(device_id, lock)) {
if (mtlLibrary) { if (mtlLibrary) {
if (error && [error localizedDescription]) {
VLOG_WARNING << "MSL compilation messages: " << [[error localizedDescription] UTF8String];
}
instance->mtlLibrary[pso_type] = mtlLibrary; instance->mtlLibrary[pso_type] = mtlLibrary;
starttime = time_dt(); starttime = time_dt();

View File

@ -715,7 +715,7 @@ void MetalKernelPipeline::compile()
} }
} }
}; };
if (computePipelineStateDescriptor.linkedFunctions) { if (linked_functions) {
addComputePipelineFunctionsWithDescriptor(); addComputePipelineFunctionsWithDescriptor();
} }
@ -748,7 +748,7 @@ void MetalKernelPipeline::compile()
} }
/* Add pipeline into the new archive (unless we did it earlier). */ /* Add pipeline into the new archive (unless we did it earlier). */
if (pipeline && !computePipelineStateDescriptor.linkedFunctions) { if (pipeline && !linked_functions) {
addComputePipelineFunctionsWithDescriptor(); addComputePipelineFunctionsWithDescriptor();
} }
} }

View File

@ -38,7 +38,7 @@ ccl_device_inline void gpu_parallel_sort_bucket_pass(const uint num_states,
ccl_gpu_shared int *buckets, ccl_gpu_shared int *buckets,
const ushort local_id, const ushort local_id,
const ushort local_size, const ushort local_size,
const ushort grid_id) const uint grid_id)
{ {
/* Zero the bucket sizes. */ /* Zero the bucket sizes. */
if (local_id < max_shaders) { if (local_id < max_shaders) {
@ -89,7 +89,7 @@ ccl_device_inline void gpu_parallel_sort_write_pass(const uint num_states,
ccl_gpu_shared int *local_offset, ccl_gpu_shared int *local_offset,
const ushort local_id, const ushort local_id,
const ushort local_size, const ushort local_size,
const ushort grid_id) const uint grid_id)
{ {
/* Calculate each partition's global offset from the prefix sum of the active state counts per /* Calculate each partition's global offset from the prefix sum of the active state counts per
* partition. */ * partition. */

View File

@ -109,7 +109,7 @@ struct kernel_gpu_##name \
const uint metal_global_id, \ const uint metal_global_id, \
const ushort metal_local_id, \ const ushort metal_local_id, \
const ushort metal_local_size, \ const ushort metal_local_size, \
const ushort metal_grid_id, \ const uint metal_grid_id, \
uint simdgroup_size, \ uint simdgroup_size, \
uint simd_lane_index, \ uint simd_lane_index, \
uint simd_group_index, \ uint simd_group_index, \
@ -122,7 +122,7 @@ kernel void cycles_metal_##name(device const kernel_gpu_##name *params_struct, \
const uint metal_global_id [[thread_position_in_grid]], \ const uint metal_global_id [[thread_position_in_grid]], \
const ushort metal_local_id [[thread_position_in_threadgroup]], \ const ushort metal_local_id [[thread_position_in_threadgroup]], \
const ushort metal_local_size [[threads_per_threadgroup]], \ const ushort metal_local_size [[threads_per_threadgroup]], \
const ushort metal_grid_id [[threadgroup_position_in_grid]], \ const uint metal_grid_id [[threadgroup_position_in_grid]], \
uint simdgroup_size [[threads_per_simdgroup]], \ uint simdgroup_size [[threads_per_simdgroup]], \
uint simd_lane_index [[thread_index_in_simdgroup]], \ uint simd_lane_index [[thread_index_in_simdgroup]], \
uint simd_group_index [[simdgroup_index_in_threadgroup]], \ uint simd_group_index [[simdgroup_index_in_threadgroup]], \
@ -135,7 +135,7 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
const uint metal_global_id, \ const uint metal_global_id, \
const ushort metal_local_id, \ const ushort metal_local_id, \
const ushort metal_local_size, \ const ushort metal_local_size, \
const ushort metal_grid_id, \ const uint metal_grid_id, \
uint simdgroup_size, \ uint simdgroup_size, \
uint simd_lane_index, \ uint simd_lane_index, \
uint simd_group_index, \ uint simd_group_index, \

View File

@ -149,7 +149,7 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) || ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) ||
((ls.shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)) || ((ls.shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)) ||
((ls.shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER))) ((ls.shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER)))
return; continue;
} }
#endif #endif
@ -159,7 +159,7 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
* generate a firefly for small lights since it is improbable. */ * generate a firefly for small lights since it is improbable. */
const ccl_global KernelLight *klight = &kernel_data_fetch(lights, lamp); const ccl_global KernelLight *klight = &kernel_data_fetch(lights, lamp);
if (klight->use_caustics) if (klight->use_caustics)
return; continue;
} }
#endif /* __MNEE__ */ #endif /* __MNEE__ */
@ -169,7 +169,7 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time); Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
if (is_zero(light_eval)) { if (is_zero(light_eval)) {
return; continue;
} }
/* MIS weighting. */ /* MIS weighting. */

View File

@ -2039,24 +2039,16 @@ static bool gpencil_generic_stroke_select(bContext *C,
/* init space conversion stuff */ /* init space conversion stuff */
gpencil_point_conversion_init(C, &gsc); gpencil_point_conversion_init(C, &gsc);
/* Use only object transform matrix because all layer transformations are already included
* in the evaluated stroke. */
Depsgraph *depsgraph = CTX_data_ensure_evaluated_depsgraph(C);
Object *ob_eval = depsgraph != NULL ? DEG_get_evaluated_object(depsgraph, ob) : ob;
float select_mat[4][4];
copy_m4_m4(select_mat, ob_eval->object_to_world);
/* deselect all strokes first? */ /* deselect all strokes first? */
if (SEL_OP_USE_PRE_DESELECT(sel_op) || GPENCIL_PAINT_MODE(gpd)) { if (SEL_OP_USE_PRE_DESELECT(sel_op)) {
/* Set selection index to 0. */ deselect_all_selected(C);
gpd->select_last_index = 0;
CTX_DATA_BEGIN (C, bGPDstroke *, gps, editable_gpencil_strokes) {
bGPDspoint *pt;
int i;
for (i = 0, pt = gps->points; i < gps->totpoints; i++, pt++) {
pt->flag &= ~GP_SPOINT_SELECT;
}
gps->flag &= ~GP_STROKE_SELECT;
BKE_gpencil_stroke_select_index_reset(gps);
}
CTX_DATA_END;
changed = true; changed = true;
} }
@ -2071,9 +2063,9 @@ static bool gpencil_generic_stroke_select(bContext *C,
for (i = 0, pt = gps->points; i < gps->totpoints; i++, pt++) { for (i = 0, pt = gps->points; i < gps->totpoints; i++, pt++) {
bGPDspoint *pt_active = (pt->runtime.pt_orig) ? pt->runtime.pt_orig : pt; bGPDspoint *pt_active = (pt->runtime.pt_orig) ? pt->runtime.pt_orig : pt;
/* Convert point coords to screen-space. */ /* Convert point coords to screen-space. Needs to use the evaluated point
const bool is_inside = is_inside_fn( * to consider modifiers. */
gsc.region, gpstroke_iter.diff_mat, &pt_active->x, user_data); const bool is_inside = is_inside_fn(gsc.region, select_mat, &pt->x, user_data);
if (strokemode == false) { if (strokemode == false) {
const bool is_select = (pt_active->flag & GP_SPOINT_SELECT) != 0; const bool is_select = (pt_active->flag & GP_SPOINT_SELECT) != 0;
const int sel_op_result = ED_select_op_action_deselected(sel_op, is_select, is_inside); const int sel_op_result = ED_select_op_action_deselected(sel_op, is_select, is_inside);

View File

@ -3882,6 +3882,16 @@ static void filelist_readjob_all_asset_library(FileListReadJob *job_params,
/* A valid, but empty file-list from now. */ /* A valid, but empty file-list from now. */
filelist->filelist.entries_num = 0; filelist->filelist.entries_num = 0;
asset_system::AssetLibrary *current_file_library;
{
AssetLibraryReference library_ref{};
library_ref.custom_library_index = -1;
library_ref.type = ASSET_LIBRARY_LOCAL;
current_file_library = AS_asset_library_load(job_params->current_main, library_ref);
}
job_params->load_asset_library = current_file_library;
filelist_readjob_main_assets_add_items(job_params, stop, do_update, progress); filelist_readjob_main_assets_add_items(job_params, stop, do_update, progress);
/* When only doing partially reload for main data, we're done. */ /* When only doing partially reload for main data, we're done. */
@ -3904,6 +3914,10 @@ static void filelist_readjob_all_asset_library(FileListReadJob *job_params,
if (root_path.is_empty()) { if (root_path.is_empty()) {
return; return;
} }
if (&nested_library == current_file_library) {
/* Skip the "Current File" library, it's already loaded above. */
return;
}
/* Override library info to read this library. */ /* Override library info to read this library. */
job_params->load_asset_library = &nested_library; job_params->load_asset_library = &nested_library;

View File

@ -360,9 +360,16 @@ static std::ostream &print_qualifier(std::ostream &os, const Qualifier &qualifie
return os; return os;
} }
static void print_resource(std::ostream &os, const ShaderCreateInfo::Resource &res) static void print_resource(std::ostream &os,
const ShaderCreateInfo::Resource &res,
bool auto_resource_location)
{ {
if (GLContext::explicit_location_support) { if (auto_resource_location && res.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) {
/* Skip explicit binding location for samplers when not needed, since drivers can usually
* handle more sampler declarations this way (as long as they're not actually used by the
* shader). See #105661. */
}
else if (GLContext::explicit_location_support) {
os << "layout(binding = " << res.slot; os << "layout(binding = " << res.slot;
if (res.bind_type == ShaderCreateInfo::Resource::BindType::IMAGE) { if (res.bind_type == ShaderCreateInfo::Resource::BindType::IMAGE) {
os << ", " << to_string(res.image.format); os << ", " << to_string(res.image.format);
@ -466,14 +473,14 @@ std::string GLShader::resources_declare(const ShaderCreateInfo &info) const
ss << "\n/* Pass Resources. */\n"; ss << "\n/* Pass Resources. */\n";
for (const ShaderCreateInfo::Resource &res : info.pass_resources_) { for (const ShaderCreateInfo::Resource &res : info.pass_resources_) {
print_resource(ss, res); print_resource(ss, res, info.auto_resource_location_);
} }
for (const ShaderCreateInfo::Resource &res : info.pass_resources_) { for (const ShaderCreateInfo::Resource &res : info.pass_resources_) {
print_resource_alias(ss, res); print_resource_alias(ss, res);
} }
ss << "\n/* Batch Resources. */\n"; ss << "\n/* Batch Resources. */\n";
for (const ShaderCreateInfo::Resource &res : info.batch_resources_) { for (const ShaderCreateInfo::Resource &res : info.batch_resources_) {
print_resource(ss, res); print_resource(ss, res, info.auto_resource_location_);
} }
for (const ShaderCreateInfo::Resource &res : info.batch_resources_) { for (const ShaderCreateInfo::Resource &res : info.batch_resources_) {
print_resource_alias(ss, res); print_resource_alias(ss, res);