Fix #95477: Cycles Metal support for large textures #104579
|
@ -182,7 +182,7 @@ class Device {
|
|||
{
|
||||
}
|
||||
|
||||
/* Return true if device is ready for rendering, or report status if not. */
|
||||
/* Report status and return true if device is ready for rendering. */
|
||||
virtual bool is_ready(string & /*status*/) const
|
||||
{
|
||||
return true;
|
||||
|
|
|
@ -74,6 +74,7 @@ class MetalDevice : public Device {
|
|||
id<MTLBuffer> texture_bindings_3d = nil;
|
||||
std::vector<id<MTLTexture>> texture_slot_map;
|
||||
|
||||
bool have_mtlbuffer_textures = false;
|
||||
id<MTLArgumentEncoder> mtlTextureBufferArgEncoder = nil;
|
||||
id<MTLBuffer> texture_buffers = nil;
|
||||
std::vector<id<MTLBuffer>> texture_buffer_slot_map;
|
||||
|
|
|
@ -322,6 +322,11 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
|
|||
global_defines += "#define __KERNEL_LOCAL_ATOMIC_SORT__\n";
|
||||
}
|
||||
|
||||
if (pso_type == PSO_GENERIC || have_mtlbuffer_textures) {
|
||||
/* Only enable MTLBuffer textures if needed as they add a small overhead. */
|
||||
global_defines += "#define __KERNEL_METAL_BUFFER_TEXTURES__\n";
|
||||
}
|
||||
|
||||
if (use_metalrt) {
|
||||
global_defines += "#define __METALRT__\n";
|
||||
if (motion_blur) {
|
||||
|
@ -897,6 +902,17 @@ bool MetalDevice::is_ready(string &status) const
|
|||
DEVICE_KERNEL_NUM);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (int num_requests = MetalDeviceKernels::num_incomplete_specialization_requests()) {
|
||||
status = string_printf("%d kernels to optimize", num_requests);
|
||||
}
|
||||
else if (kernel_specialization_level == PSO_SPECIALIZED_INTERSECT) {
|
||||
status = "Using optimized intersection kernels";
|
||||
}
|
||||
else if (kernel_specialization_level == PSO_SPECIALIZED_SHADE) {
|
||||
status = "Using optimized kernels";
|
||||
}
|
||||
|
||||
metal_printf("MetalDevice::is_ready(...) --> true\n");
|
||||
return true;
|
||||
}
|
||||
|
@ -933,7 +949,7 @@ void MetalDevice::optimize_for_scene(Scene *scene)
|
|||
}
|
||||
|
||||
if (specialize_in_background) {
|
||||
if (!MetalDeviceKernels::any_specialization_happening_now()) {
|
||||
if (MetalDeviceKernels::num_incomplete_specialization_requests() == 0) {
|
||||
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
|
||||
specialize_kernels_fn);
|
||||
}
|
||||
|
@ -1103,7 +1119,7 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
|||
if (mem.data_width > 16384 || mem.data_height > 16384) {
|
||||
use_tex = false;
|
||||
}
|
||||
if (auto str = getenv("USE_TEX")) {
|
||||
if (auto str = getenv("CYCLES_METAL_FORCE_MTLTEXTURE")) {
|
||||
use_tex = atoi(str);
|
||||
}
|
||||
if (use_tex) {
|
||||
|
@ -1189,12 +1205,12 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
|||
mmem->mtlBuffer = mtlBuffer;
|
||||
}
|
||||
else {
|
||||
have_mtlbuffer_textures = true;
|
||||
generic_alloc(mem);
|
||||
|
||||
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
|
||||
mtlBuffer = metal_mem_map[&mem]->mtlBuffer;
|
||||
mem.device_pointer = (device_ptr)mtlBuffer;
|
||||
//mem.host_pointer = 0;
|
||||
}
|
||||
|
||||
/* Resize once */
|
||||
|
|
|
@ -104,7 +104,7 @@ struct MetalKernelPipeline {
|
|||
/* Cache of Metal kernels for each DeviceKernel. */
|
||||
namespace MetalDeviceKernels {
|
||||
|
||||
bool any_specialization_happening_now();
|
||||
int num_incomplete_specialization_requests();
|
||||
int get_loaded_kernel_count(MetalDevice const *device, MetalPipelineType pso_type);
|
||||
bool should_load_kernels(MetalDevice const *device, MetalPipelineType pso_type);
|
||||
bool load(MetalDevice *device, MetalPipelineType pso_type);
|
||||
|
|
|
@ -857,16 +857,15 @@ void MetalDeviceKernels::wait_for_all()
|
|||
}
|
||||
}
|
||||
|
||||
bool MetalDeviceKernels::any_specialization_happening_now()
|
||||
int MetalDeviceKernels::num_incomplete_specialization_requests()
|
||||
{
|
||||
/* Return true if any ShaderCaches have ongoing specialization requests (typically there will be
|
||||
* only 1). */
|
||||
int total = 0;
|
||||
for (int i = 0; i < g_shaderCacheCount; i++) {
|
||||
if (g_shaderCache[i].second->incomplete_specialization_requests > 0) {
|
||||
return true;
|
||||
}
|
||||
total += g_shaderCache[i].second->incomplete_specialization_requests;
|
||||
}
|
||||
return false;
|
||||
return total;
|
||||
}
|
||||
|
||||
int MetalDeviceKernels::get_loaded_kernel_count(MetalDevice const *device,
|
||||
|
|
|
@ -18,10 +18,7 @@ class MetalKernelContext {
|
|||
: launch_params_metal(_launch_params_metal)
|
||||
{}
|
||||
|
||||
/* texture fetch adapter functions */
|
||||
typedef uint64_t ccl_gpu_tex_object_2D;
|
||||
typedef uint64_t ccl_gpu_tex_object_3D;
|
||||
|
||||
#ifdef __KERNEL_METAL_BUFFER_TEXTURES__
|
||||
template<typename T> ccl_device_forceinline T tex_fetch(device void* data, int64_t index)
|
||||
{
|
||||
return reinterpret_cast<ccl_global T *>(data)[index];
|
||||
|
@ -162,13 +159,18 @@ class MetalKernelContext {
|
|||
u[3] = (1.0f / 6.0f) * t * t * t; \
|
||||
} \
|
||||
(void)0
|
||||
|
||||
|
||||
#endif /* __KERNEL_METAL_BUFFER_TEXTURES__ */
|
||||
|
||||
ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int tex_id, float x, float y)
|
||||
{
|
||||
device const TextureInfo &info = kernel_data_fetch(texture_info, tex_id);
|
||||
|
||||
const uint tid(info.data);
|
||||
const uint sid(info.data >> 32);
|
||||
#ifndef __KERNEL_METAL_BUFFER_TEXTURES__
|
||||
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], make_float2(x,y));
|
||||
#else
|
||||
if (sid < 256) {
|
||||
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], make_float2(x,y));
|
||||
}
|
||||
|
@ -216,6 +218,7 @@ class MetalKernelContext {
|
|||
}
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -225,6 +228,10 @@ class MetalKernelContext {
|
|||
|
||||
const uint tid(info.data);
|
||||
const uint sid(info.data >> 32);
|
||||
|
||||
#ifndef __KERNEL_METAL_BUFFER_TEXTURES__
|
||||
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], P);
|
||||
#else
|
||||
if (sid < 256) {
|
||||
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], P);
|
||||
}
|
||||
|
@ -323,6 +330,7 @@ class MetalKernelContext {
|
|||
return r;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
// clang-format on
|
||||
|
|
|
@ -706,6 +706,12 @@ void Session::update_status_time(bool show_pause, bool show_done)
|
|||
string_printf("Sample %d/%d", current_sample, num_samples));
|
||||
}
|
||||
|
||||
/* Append any device-specific status (such as background kernel optimization) */
|
||||
string device_status;
|
||||
if (device->is_ready(device_status) && !device_status.empty()) {
|
||||
substatus += string_printf(" (%s)", device_status.c_str());
|
||||
}
|
||||
|
||||
/* TODO(sergey): Denoising status from the path trace. */
|
||||
|
||||
if (show_pause) {
|
||||
|
|
Loading…
Reference in New Issue