Fix #95477: Cycles Metal support for large textures #104579

Open
Michael Jones (Apple) wants to merge 5 commits from Michael-Jones/blender:LargeTextureSupport into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
7 changed files with 45 additions and 15 deletions
Showing only changes of commit b75f467c8f - Show all commits

View File

@ -182,7 +182,7 @@ class Device {
{
}
/* Return true if device is ready for rendering, or report status if not. */
/* Report status and return true if device is ready for rendering. */
virtual bool is_ready(string & /*status*/) const
{
return true;

View File

@ -74,6 +74,7 @@ class MetalDevice : public Device {
id<MTLBuffer> texture_bindings_3d = nil;
std::vector<id<MTLTexture>> texture_slot_map;
bool have_mtlbuffer_textures = false;
id<MTLArgumentEncoder> mtlTextureBufferArgEncoder = nil;
id<MTLBuffer> texture_buffers = nil;
std::vector<id<MTLBuffer>> texture_buffer_slot_map;

View File

@ -322,6 +322,11 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
global_defines += "#define __KERNEL_LOCAL_ATOMIC_SORT__\n";
}
if (pso_type == PSO_GENERIC || have_mtlbuffer_textures) {
/* Only enable MTLBuffer textures if needed as they add a small overhead. */
global_defines += "#define __KERNEL_METAL_BUFFER_TEXTURES__\n";
}
if (use_metalrt) {
global_defines += "#define __METALRT__\n";
if (motion_blur) {
@ -897,6 +902,17 @@ bool MetalDevice::is_ready(string &status) const
DEVICE_KERNEL_NUM);
return false;
}
if (int num_requests = MetalDeviceKernels::num_incomplete_specialization_requests()) {
status = string_printf("%d kernels to optimize", num_requests);
}
else if (kernel_specialization_level == PSO_SPECIALIZED_INTERSECT) {
status = "Using optimized intersection kernels";
}
else if (kernel_specialization_level == PSO_SPECIALIZED_SHADE) {
status = "Using optimized kernels";
}
metal_printf("MetalDevice::is_ready(...) --> true\n");
return true;
}
@ -933,7 +949,7 @@ void MetalDevice::optimize_for_scene(Scene *scene)
}
if (specialize_in_background) {
if (!MetalDeviceKernels::any_specialization_happening_now()) {
if (MetalDeviceKernels::num_incomplete_specialization_requests() == 0) {
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
specialize_kernels_fn);
}
@ -1103,7 +1119,7 @@ void MetalDevice::tex_alloc(device_texture &mem)
if (mem.data_width > 16384 || mem.data_height > 16384) {
use_tex = false;
}
if (auto str = getenv("USE_TEX")) {
if (auto str = getenv("CYCLES_METAL_FORCE_MTLTEXTURE")) {
use_tex = atoi(str);
}
if (use_tex) {
@ -1189,12 +1205,12 @@ void MetalDevice::tex_alloc(device_texture &mem)
mmem->mtlBuffer = mtlBuffer;
}
else {
have_mtlbuffer_textures = true;
generic_alloc(mem);
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
mtlBuffer = metal_mem_map[&mem]->mtlBuffer;
mem.device_pointer = (device_ptr)mtlBuffer;
//mem.host_pointer = 0;
}
/* Resize once */

View File

@ -104,7 +104,7 @@ struct MetalKernelPipeline {
/* Cache of Metal kernels for each DeviceKernel. */
namespace MetalDeviceKernels {
bool any_specialization_happening_now();
int num_incomplete_specialization_requests();
int get_loaded_kernel_count(MetalDevice const *device, MetalPipelineType pso_type);
bool should_load_kernels(MetalDevice const *device, MetalPipelineType pso_type);
bool load(MetalDevice *device, MetalPipelineType pso_type);

View File

@ -857,16 +857,15 @@ void MetalDeviceKernels::wait_for_all()
}
}
bool MetalDeviceKernels::any_specialization_happening_now()
int MetalDeviceKernels::num_incomplete_specialization_requests()
{
/* Return true if any ShaderCaches have ongoing specialization requests (typically there will be
* only 1). */
int total = 0;
for (int i = 0; i < g_shaderCacheCount; i++) {
if (g_shaderCache[i].second->incomplete_specialization_requests > 0) {
return true;
}
total += g_shaderCache[i].second->incomplete_specialization_requests;
}
return false;
return total;
}
int MetalDeviceKernels::get_loaded_kernel_count(MetalDevice const *device,

View File

@ -18,10 +18,7 @@ class MetalKernelContext {
: launch_params_metal(_launch_params_metal)
{}
/* texture fetch adapter functions */
typedef uint64_t ccl_gpu_tex_object_2D;
typedef uint64_t ccl_gpu_tex_object_3D;
#ifdef __KERNEL_METAL_BUFFER_TEXTURES__
template<typename T> ccl_device_forceinline T tex_fetch(device void* data, int64_t index)
{
return reinterpret_cast<ccl_global T *>(data)[index];
@ -162,13 +159,18 @@ class MetalKernelContext {
u[3] = (1.0f / 6.0f) * t * t * t; \
} \
(void)0
#endif /* __KERNEL_METAL_BUFFER_TEXTURES__ */
ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int tex_id, float x, float y)
{
device const TextureInfo &info = kernel_data_fetch(texture_info, tex_id);
const uint tid(info.data);
const uint sid(info.data >> 32);
#ifndef __KERNEL_METAL_BUFFER_TEXTURES__
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], make_float2(x,y));
#else
if (sid < 256) {
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], make_float2(x,y));
}
@ -216,6 +218,7 @@ class MetalKernelContext {
}
return r;
}
#endif
#endif
}
@ -225,6 +228,10 @@ class MetalKernelContext {
const uint tid(info.data);
const uint sid(info.data >> 32);
#ifndef __KERNEL_METAL_BUFFER_TEXTURES__
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], P);
#else
if (sid < 256) {
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], P);
}
@ -323,6 +330,7 @@ class MetalKernelContext {
return r;
}
#endif
#endif
}
// clang-format on

View File

@ -706,6 +706,12 @@ void Session::update_status_time(bool show_pause, bool show_done)
string_printf("Sample %d/%d", current_sample, num_samples));
}
/* Append any device-specific status (such as background kernel optimization) */
string device_status;
if (device->is_ready(device_status) && !device_status.empty()) {
substatus += string_printf(" (%s)", device_status.c_str());
}
/* TODO(sergey): Denoising status from the path trace. */
if (show_pause) {