Cycles: Metal host-side code
This patch adds the Metal host-side code: - Add all core host-side Metal backend files (device_impl, queue, etc) - Add MetalRT BVH setup files - Integrate with Cycles device enumeration code - Revive `path_source_replace_includes` in util/path (required for MSL compilation) This patch also includes a couple of small kernel-side fixes: - Add an implementation of `lgammaf` for Metal [Nemes, Gergő (2010), "New asymptotic expansion for the Gamma function", Archiv der Mathematik](https://users.renyi.hu/~gergonemes/) - include "work_stealing.h" inside the Metal context class because it accesses state now Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D13423
This commit is contained in:
@@ -101,6 +101,11 @@ add_definitions(${GL_DEFINITIONS})
|
||||
if(WITH_CYCLES_DEVICE_HIP)
|
||||
add_definitions(-DWITH_HIP)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEVICE_METAL)
|
||||
add_definitions(-DWITH_METAL)
|
||||
endif()
|
||||
|
||||
if(WITH_MOD_FLUID)
|
||||
add_definitions(-DWITH_FLUID)
|
||||
endif()
|
||||
|
||||
@@ -28,7 +28,7 @@ def _configure_argument_parser():
|
||||
action='store_true')
|
||||
parser.add_argument("--cycles-device",
|
||||
help="Set the device to use for Cycles, overriding user preferences and the scene setting."
|
||||
"Valid options are 'CPU', 'CUDA', 'OPTIX', or 'HIP'"
|
||||
"Valid options are 'CPU', 'CUDA', 'OPTIX', 'HIP' or 'METAL'."
|
||||
"Additionally, you can append '+CPU' to any GPU type for hybrid rendering.",
|
||||
default=None)
|
||||
return parser
|
||||
|
||||
@@ -111,7 +111,8 @@ enum_device_type = (
|
||||
('CPU', "CPU", "CPU", 0),
|
||||
('CUDA', "CUDA", "CUDA", 1),
|
||||
('OPTIX', "OptiX", "OptiX", 3),
|
||||
("HIP", "HIP", "HIP", 4)
|
||||
('HIP', "HIP", "HIP", 4),
|
||||
('METAL', "Metal", "Metal", 5)
|
||||
)
|
||||
|
||||
enum_texture_limit = (
|
||||
@@ -1312,8 +1313,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
|
||||
def get_device_types(self, context):
|
||||
import _cycles
|
||||
has_cuda, has_optix, has_hip = _cycles.get_device_types()
|
||||
|
||||
has_cuda, has_optix, has_hip, has_metal = _cycles.get_device_types()
|
||||
list = [('NONE', "None", "Don't use compute device", 0)]
|
||||
if has_cuda:
|
||||
list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
|
||||
@@ -1321,6 +1321,8 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
list.append(('OPTIX', "OptiX", "Use OptiX for GPU acceleration", 3))
|
||||
if has_hip:
|
||||
list.append(('HIP', "HIP", "Use HIP for GPU acceleration", 4))
|
||||
if has_metal:
|
||||
list.append(('METAL', "Metal", "Use Metal for GPU acceleration", 5))
|
||||
|
||||
return list
|
||||
|
||||
@@ -1346,7 +1348,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
|
||||
def update_device_entries(self, device_list):
|
||||
for device in device_list:
|
||||
if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP'}:
|
||||
if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP', 'METAL'}:
|
||||
continue
|
||||
# Try to find existing Device entry
|
||||
entry = self.find_existing_device_entry(device)
|
||||
@@ -1390,7 +1392,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
import _cycles
|
||||
# Ensure `self.devices` is not re-allocated when the second call to
|
||||
# get_devices_for_type is made, freeing items from the first list.
|
||||
for device_type in ('CUDA', 'OPTIX', 'HIP'):
|
||||
for device_type in ('CUDA', 'OPTIX', 'HIP', 'METAL'):
|
||||
self.update_device_entries(_cycles.available_devices(device_type))
|
||||
|
||||
# Deprecated: use refresh_devices instead.
|
||||
|
||||
@@ -97,6 +97,11 @@ def use_cpu(context):
|
||||
return (get_device_type(context) == 'NONE' or cscene.device == 'CPU')
|
||||
|
||||
|
||||
def use_metal(context):
|
||||
cscene = context.scene.cycles
|
||||
|
||||
return (get_device_type(context) == 'METAL' and cscene.device == 'GPU')
|
||||
|
||||
def use_cuda(context):
|
||||
cscene = context.scene.cycles
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ enum ComputeDevice {
|
||||
COMPUTE_DEVICE_CUDA = 1,
|
||||
COMPUTE_DEVICE_OPTIX = 3,
|
||||
COMPUTE_DEVICE_HIP = 4,
|
||||
COMPUTE_DEVICE_METAL = 5,
|
||||
|
||||
COMPUTE_DEVICE_NUM
|
||||
};
|
||||
@@ -85,6 +86,9 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
|
||||
else if (compute_device == COMPUTE_DEVICE_HIP) {
|
||||
mask |= DEVICE_MASK_HIP;
|
||||
}
|
||||
else if (compute_device == COMPUTE_DEVICE_METAL) {
|
||||
mask |= DEVICE_MASK_METAL;
|
||||
}
|
||||
vector<DeviceInfo> devices = Device::available_devices(mask);
|
||||
|
||||
/* Match device preferences and available devices. */
|
||||
|
||||
@@ -906,16 +906,18 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
|
||||
static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
|
||||
{
|
||||
vector<DeviceType> device_types = Device::available_types();
|
||||
bool has_cuda = false, has_optix = false, has_hip = false;
|
||||
bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false;
|
||||
foreach (DeviceType device_type, device_types) {
|
||||
has_cuda |= (device_type == DEVICE_CUDA);
|
||||
has_optix |= (device_type == DEVICE_OPTIX);
|
||||
has_hip |= (device_type == DEVICE_HIP);
|
||||
has_metal |= (device_type == DEVICE_METAL);
|
||||
}
|
||||
PyObject *list = PyTuple_New(3);
|
||||
PyObject *list = PyTuple_New(4);
|
||||
PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
|
||||
PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
|
||||
PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
|
||||
PyTuple_SET_ITEM(list, 3, PyBool_FromLong(has_metal));
|
||||
return list;
|
||||
}
|
||||
|
||||
@@ -944,6 +946,9 @@ static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg)
|
||||
else if (override == "HIP") {
|
||||
BlenderSession::device_override = DEVICE_MASK_HIP;
|
||||
}
|
||||
else if (override == "METAL") {
|
||||
BlenderSession::device_override = DEVICE_MASK_METAL;
|
||||
}
|
||||
else {
|
||||
printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
|
||||
Py_RETURN_FALSE;
|
||||
|
||||
@@ -31,6 +31,7 @@ set(SRC
|
||||
sort.cpp
|
||||
split.cpp
|
||||
unaligned.cpp
|
||||
metal.mm
|
||||
)
|
||||
|
||||
set(SRC_HEADERS
|
||||
@@ -46,6 +47,7 @@ set(SRC_HEADERS
|
||||
sort.h
|
||||
split.h
|
||||
unaligned.h
|
||||
metal.h
|
||||
)
|
||||
|
||||
set(LIB
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
|
||||
#include "bvh/bvh2.h"
|
||||
#include "bvh/embree.h"
|
||||
#include "bvh/metal.h"
|
||||
#include "bvh/multi.h"
|
||||
#include "bvh/optix.h"
|
||||
|
||||
@@ -105,13 +106,18 @@ BVH *BVH::create(const BVHParams ¶ms,
|
||||
#else
|
||||
(void)device;
|
||||
break;
|
||||
#endif
|
||||
case BVH_LAYOUT_METAL:
|
||||
#ifdef WITH_METAL
|
||||
return bvh_metal_create(params, geometry, objects, device);
|
||||
#else
|
||||
(void)device;
|
||||
break;
|
||||
#endif
|
||||
case BVH_LAYOUT_MULTI_OPTIX:
|
||||
case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
|
||||
case BVH_LAYOUT_MULTI_METAL_EMBREE:
|
||||
return new BVHMulti(params, geometry, objects);
|
||||
case BVH_LAYOUT_METAL:
|
||||
/* host-side changes for BVH_LAYOUT_METAL are imminent */
|
||||
case BVH_LAYOUT_NONE:
|
||||
case BVH_LAYOUT_ALL:
|
||||
break;
|
||||
|
||||
35
intern/cycles/bvh/metal.h
Normal file
35
intern/cycles/bvh/metal.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef __BVH_METAL_H__
|
||||
#define __BVH_METAL_H__
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "bvh/bvh.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
BVH *bvh_metal_create(const BVHParams ¶ms,
|
||||
const vector<Geometry *> &geometry,
|
||||
const vector<Object *> &objects,
|
||||
Device *device);
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
||||
|
||||
#endif /* __BVH_METAL_H__ */
|
||||
33
intern/cycles/bvh/metal.mm
Normal file
33
intern/cycles/bvh/metal.mm
Normal file
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "device/metal/bvh.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
BVH *bvh_metal_create(const BVHParams ¶ms,
|
||||
const vector<Geometry *> &geometry,
|
||||
const vector<Object *> &objects,
|
||||
Device *device)
|
||||
{
|
||||
return new BVHMetal(params, geometry, objects, device);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
||||
@@ -551,4 +551,18 @@ if(NOT WITH_HIP_DYNLOAD)
|
||||
set(WITH_HIP_DYNLOAD ON)
|
||||
endif()
|
||||
|
||||
###########################################################################
|
||||
# Metal
|
||||
###########################################################################
|
||||
|
||||
if(WITH_CYCLES_DEVICE_METAL)
|
||||
FIND_LIBRARY(METAL_LIBRARY Metal)
|
||||
if (METAL_LIBRARY)
|
||||
message(STATUS "Found Metal: ${METAL_LIBRARY}")
|
||||
else()
|
||||
message(STATUS "Metal not found, disabling WITH_CYCLES_DEVICE_METAL")
|
||||
set(WITH_CYCLES_DEVICE_METAL OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
unset(_cycles_lib_dir)
|
||||
|
||||
@@ -43,7 +43,7 @@ if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
|
||||
add_definitions(-DWITH_HIP_DYNLOAD)
|
||||
endif()
|
||||
|
||||
set(SRC
|
||||
set(SRC_BASE
|
||||
device.cpp
|
||||
denoise.cpp
|
||||
graphics_interop.cpp
|
||||
@@ -104,6 +104,21 @@ set(SRC_MULTI
|
||||
multi/device.h
|
||||
)
|
||||
|
||||
set(SRC_METAL
|
||||
metal/bvh.mm
|
||||
metal/bvh.h
|
||||
metal/device.mm
|
||||
metal/device.h
|
||||
metal/device_impl.mm
|
||||
metal/device_impl.h
|
||||
metal/kernel.mm
|
||||
metal/kernel.h
|
||||
metal/queue.mm
|
||||
metal/queue.h
|
||||
metal/util.mm
|
||||
metal/util.h
|
||||
)
|
||||
|
||||
set(SRC_OPTIX
|
||||
optix/device.cpp
|
||||
optix/device.h
|
||||
@@ -123,6 +138,17 @@ set(SRC_HEADERS
|
||||
queue.h
|
||||
)
|
||||
|
||||
set(SRC
|
||||
${SRC_BASE}
|
||||
${SRC_CPU}
|
||||
${SRC_CUDA}
|
||||
${SRC_HIP}
|
||||
${SRC_DUMMY}
|
||||
${SRC_MULTI}
|
||||
${SRC_OPTIX}
|
||||
${SRC_HEADERS}
|
||||
)
|
||||
|
||||
set(LIB
|
||||
cycles_kernel
|
||||
cycles_util
|
||||
@@ -158,6 +184,15 @@ endif()
|
||||
if(WITH_CYCLES_DEVICE_OPTIX)
|
||||
add_definitions(-DWITH_OPTIX)
|
||||
endif()
|
||||
if(WITH_CYCLES_DEVICE_METAL)
|
||||
list(APPEND LIB
|
||||
${METAL_LIBRARY}
|
||||
)
|
||||
add_definitions(-DWITH_METAL)
|
||||
list(APPEND SRC
|
||||
${SRC_METAL}
|
||||
)
|
||||
endif()
|
||||
|
||||
if(WITH_OPENIMAGEDENOISE)
|
||||
list(APPEND LIB
|
||||
@@ -168,20 +203,12 @@ endif()
|
||||
include_directories(${INC})
|
||||
include_directories(SYSTEM ${INC_SYS})
|
||||
|
||||
cycles_add_library(cycles_device "${LIB}"
|
||||
${SRC}
|
||||
${SRC_CPU}
|
||||
${SRC_CUDA}
|
||||
${SRC_HIP}
|
||||
${SRC_DUMMY}
|
||||
${SRC_MULTI}
|
||||
${SRC_OPTIX}
|
||||
${SRC_HEADERS}
|
||||
)
|
||||
cycles_add_library(cycles_device "${LIB}" ${SRC})
|
||||
|
||||
source_group("cpu" FILES ${SRC_CPU})
|
||||
source_group("cuda" FILES ${SRC_CUDA})
|
||||
source_group("dummy" FILES ${SRC_DUMMY})
|
||||
source_group("multi" FILES ${SRC_MULTI})
|
||||
source_group("metal" FILES ${SRC_METAL})
|
||||
source_group("optix" FILES ${SRC_OPTIX})
|
||||
source_group("common" FILES ${SRC} ${SRC_HEADERS})
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "device/cuda/device.h"
|
||||
#include "device/dummy/device.h"
|
||||
#include "device/hip/device.h"
|
||||
#include "device/metal/device.h"
|
||||
#include "device/multi/device.h"
|
||||
#include "device/optix/device.h"
|
||||
|
||||
@@ -49,6 +50,7 @@ vector<DeviceInfo> Device::cuda_devices;
|
||||
vector<DeviceInfo> Device::optix_devices;
|
||||
vector<DeviceInfo> Device::cpu_devices;
|
||||
vector<DeviceInfo> Device::hip_devices;
|
||||
vector<DeviceInfo> Device::metal_devices;
|
||||
uint Device::devices_initialized_mask = 0;
|
||||
|
||||
/* Device */
|
||||
@@ -105,6 +107,12 @@ Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
break;
|
||||
#endif
|
||||
|
||||
#ifdef WITH_METAL
|
||||
case DEVICE_METAL:
|
||||
if (device_metal_init())
|
||||
device = device_metal_create(info, stats, profiler);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -128,6 +136,8 @@ DeviceType Device::type_from_string(const char *name)
|
||||
return DEVICE_MULTI;
|
||||
else if (strcmp(name, "HIP") == 0)
|
||||
return DEVICE_HIP;
|
||||
else if (strcmp(name, "METAL") == 0)
|
||||
return DEVICE_METAL;
|
||||
|
||||
return DEVICE_NONE;
|
||||
}
|
||||
@@ -144,6 +154,8 @@ string Device::string_from_type(DeviceType type)
|
||||
return "MULTI";
|
||||
else if (type == DEVICE_HIP)
|
||||
return "HIP";
|
||||
else if (type == DEVICE_METAL)
|
||||
return "METAL";
|
||||
|
||||
return "";
|
||||
}
|
||||
@@ -161,7 +173,9 @@ vector<DeviceType> Device::available_types()
|
||||
#ifdef WITH_HIP
|
||||
types.push_back(DEVICE_HIP);
|
||||
#endif
|
||||
|
||||
#ifdef WITH_METAL
|
||||
types.push_back(DEVICE_METAL);
|
||||
#endif
|
||||
return types;
|
||||
}
|
||||
|
||||
@@ -227,6 +241,20 @@ vector<DeviceInfo> Device::available_devices(uint mask)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WITH_METAL
|
||||
if (mask & DEVICE_MASK_METAL) {
|
||||
if (!(devices_initialized_mask & DEVICE_MASK_METAL)) {
|
||||
if (device_metal_init()) {
|
||||
device_metal_info(metal_devices);
|
||||
}
|
||||
devices_initialized_mask |= DEVICE_MASK_METAL;
|
||||
}
|
||||
foreach (DeviceInfo &info, metal_devices) {
|
||||
devices.push_back(info);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return devices;
|
||||
}
|
||||
|
||||
@@ -266,6 +294,15 @@ string Device::device_capabilities(uint mask)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef WITH_METAL
|
||||
if (mask & DEVICE_MASK_METAL) {
|
||||
if (device_metal_init()) {
|
||||
capabilities += "\nMetal device capabilities:\n";
|
||||
capabilities += device_metal_capabilities();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return capabilities;
|
||||
}
|
||||
|
||||
@@ -354,6 +391,7 @@ void Device::free_memory()
|
||||
optix_devices.free_memory();
|
||||
hip_devices.free_memory();
|
||||
cpu_devices.free_memory();
|
||||
metal_devices.free_memory();
|
||||
}
|
||||
|
||||
unique_ptr<DeviceQueue> Device::gpu_queue_create()
|
||||
|
||||
@@ -52,6 +52,7 @@ enum DeviceType {
|
||||
DEVICE_MULTI,
|
||||
DEVICE_OPTIX,
|
||||
DEVICE_HIP,
|
||||
DEVICE_METAL,
|
||||
DEVICE_DUMMY,
|
||||
};
|
||||
|
||||
@@ -60,6 +61,7 @@ enum DeviceTypeMask {
|
||||
DEVICE_MASK_CUDA = (1 << DEVICE_CUDA),
|
||||
DEVICE_MASK_OPTIX = (1 << DEVICE_OPTIX),
|
||||
DEVICE_MASK_HIP = (1 << DEVICE_HIP),
|
||||
DEVICE_MASK_METAL = (1 << DEVICE_METAL),
|
||||
DEVICE_MASK_ALL = ~0
|
||||
};
|
||||
|
||||
@@ -281,6 +283,7 @@ class Device {
|
||||
static vector<DeviceInfo> optix_devices;
|
||||
static vector<DeviceInfo> cpu_devices;
|
||||
static vector<DeviceInfo> hip_devices;
|
||||
static vector<DeviceInfo> metal_devices;
|
||||
static uint devices_initialized_mask;
|
||||
};
|
||||
|
||||
|
||||
@@ -263,6 +263,7 @@ class device_memory {
|
||||
friend class CUDADevice;
|
||||
friend class OptiXDevice;
|
||||
friend class HIPDevice;
|
||||
friend class MetalDevice;
|
||||
|
||||
/* Only create through subclasses. */
|
||||
device_memory(Device *device, const char *name, MemoryType type);
|
||||
|
||||
66
intern/cycles/device/metal/bvh.h
Normal file
66
intern/cycles/device/metal/bvh.h
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "bvh/bvh.h"
|
||||
# include "bvh/params.h"
|
||||
# include "device/memory.h"
|
||||
|
||||
# include <Metal/Metal.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class BVHMetal : public BVH {
|
||||
public:
|
||||
API_AVAILABLE(macos(11.0))
|
||||
id<MTLAccelerationStructure> accel_struct = nil;
|
||||
bool accel_struct_building = false;
|
||||
|
||||
API_AVAILABLE(macos(11.0))
|
||||
vector<id<MTLAccelerationStructure>> blas_array;
|
||||
|
||||
bool motion_blur = false;
|
||||
|
||||
Stats &stats;
|
||||
|
||||
bool build(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, bool refit);
|
||||
|
||||
BVHMetal(const BVHParams ¶ms,
|
||||
const vector<Geometry *> &geometry,
|
||||
const vector<Object *> &objects,
|
||||
Device *device);
|
||||
virtual ~BVHMetal();
|
||||
|
||||
bool build_BLAS(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, bool refit);
|
||||
bool build_BLAS_mesh(Progress &progress,
|
||||
id<MTLDevice> device,
|
||||
id<MTLCommandQueue> queue,
|
||||
Geometry *const geom,
|
||||
bool refit);
|
||||
bool build_BLAS_hair(Progress &progress,
|
||||
id<MTLDevice> device,
|
||||
id<MTLCommandQueue> queue,
|
||||
Geometry *const geom,
|
||||
bool refit);
|
||||
bool build_TLAS(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, bool refit);
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
||||
813
intern/cycles/device/metal/bvh.mm
Normal file
813
intern/cycles/device/metal/bvh.mm
Normal file
@@ -0,0 +1,813 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "scene/hair.h"
|
||||
# include "scene/mesh.h"
|
||||
# include "scene/object.h"
|
||||
|
||||
# include "util/progress.h"
|
||||
|
||||
# include "device/metal/bvh.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
# define BVH_status(...) \
|
||||
{ \
|
||||
string str = string_printf(__VA_ARGS__); \
|
||||
progress.set_substatus(str); \
|
||||
}
|
||||
|
||||
BVHMetal::BVHMetal(const BVHParams ¶ms_,
|
||||
const vector<Geometry *> &geometry_,
|
||||
const vector<Object *> &objects_,
|
||||
Device *device)
|
||||
: BVH(params_, geometry_, objects_), stats(device->stats)
|
||||
{
|
||||
}
|
||||
|
||||
BVHMetal::~BVHMetal()
|
||||
{
|
||||
if (@available(macos 12.0, *)) {
|
||||
if (accel_struct) {
|
||||
stats.mem_free(accel_struct.allocatedSize);
|
||||
[accel_struct release];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool BVHMetal::build_BLAS_mesh(Progress &progress,
|
||||
id<MTLDevice> device,
|
||||
id<MTLCommandQueue> queue,
|
||||
Geometry *const geom,
|
||||
bool refit)
|
||||
{
|
||||
if (@available(macos 12.0, *)) {
|
||||
/* Build BLAS for triangle primitives */
|
||||
Mesh *const mesh = static_cast<Mesh *const>(geom);
|
||||
if (mesh->num_triangles() == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/*------------------------------------------------*/
|
||||
BVH_status(
|
||||
"Building mesh BLAS | %7d tris | %s", (int)mesh->num_triangles(), geom->name.c_str());
|
||||
/*------------------------------------------------*/
|
||||
|
||||
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
|
||||
|
||||
const array<float3> &verts = mesh->get_verts();
|
||||
const array<int> &tris = mesh->get_triangles();
|
||||
const size_t num_verts = verts.size();
|
||||
const size_t num_indices = tris.size();
|
||||
|
||||
size_t num_motion_steps = 1;
|
||||
Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
|
||||
num_motion_steps = mesh->get_motion_steps();
|
||||
}
|
||||
|
||||
MTLResourceOptions storage_mode;
|
||||
if (device.hasUnifiedMemory) {
|
||||
storage_mode = MTLResourceStorageModeShared;
|
||||
}
|
||||
else {
|
||||
storage_mode = MTLResourceStorageModeManaged;
|
||||
}
|
||||
|
||||
/* Upload the mesh data to the GPU */
|
||||
id<MTLBuffer> posBuf = nil;
|
||||
id<MTLBuffer> indexBuf = [device newBufferWithBytes:tris.data()
|
||||
length:num_indices * sizeof(tris.data()[0])
|
||||
options:storage_mode];
|
||||
|
||||
if (num_motion_steps == 1) {
|
||||
posBuf = [device newBufferWithBytes:verts.data()
|
||||
length:num_verts * sizeof(verts.data()[0])
|
||||
options:storage_mode];
|
||||
}
|
||||
else {
|
||||
posBuf = [device newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0])
|
||||
options:storage_mode];
|
||||
float3 *dest_data = (float3 *)[posBuf contents];
|
||||
size_t center_step = (num_motion_steps - 1) / 2;
|
||||
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||
const float3 *verts = mesh->get_verts().data();
|
||||
|
||||
/* The center step for motion vertices is not stored in the attribute. */
|
||||
if (step != center_step) {
|
||||
verts = motion_keys->data_float3() + (step > center_step ? step - 1 : step) * num_verts;
|
||||
}
|
||||
memcpy(dest_data + num_verts * step, verts, num_verts * sizeof(float3));
|
||||
}
|
||||
if (storage_mode == MTLResourceStorageModeManaged) {
|
||||
[posBuf didModifyRange:NSMakeRange(0, posBuf.length)];
|
||||
}
|
||||
}
|
||||
|
||||
/* Create an acceleration structure. */
|
||||
MTLAccelerationStructureGeometryDescriptor *geomDesc;
|
||||
if (num_motion_steps > 1) {
|
||||
std::vector<MTLMotionKeyframeData *> vertex_ptrs;
|
||||
vertex_ptrs.reserve(num_motion_steps);
|
||||
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||
MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
|
||||
k.buffer = posBuf;
|
||||
k.offset = num_verts * step * sizeof(float3);
|
||||
vertex_ptrs.push_back(k);
|
||||
}
|
||||
|
||||
MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion =
|
||||
[MTLAccelerationStructureMotionTriangleGeometryDescriptor descriptor];
|
||||
geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data()
|
||||
count:vertex_ptrs.size()];
|
||||
geomDescMotion.vertexStride = sizeof(verts.data()[0]);
|
||||
geomDescMotion.indexBuffer = indexBuf;
|
||||
geomDescMotion.indexBufferOffset = 0;
|
||||
geomDescMotion.indexType = MTLIndexTypeUInt32;
|
||||
geomDescMotion.triangleCount = num_indices / 3;
|
||||
geomDescMotion.intersectionFunctionTableOffset = 0;
|
||||
|
||||
geomDesc = geomDescMotion;
|
||||
}
|
||||
else {
|
||||
MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion =
|
||||
[MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
|
||||
geomDescNoMotion.vertexBuffer = posBuf;
|
||||
geomDescNoMotion.vertexBufferOffset = 0;
|
||||
geomDescNoMotion.vertexStride = sizeof(verts.data()[0]);
|
||||
geomDescNoMotion.indexBuffer = indexBuf;
|
||||
geomDescNoMotion.indexBufferOffset = 0;
|
||||
geomDescNoMotion.indexType = MTLIndexTypeUInt32;
|
||||
geomDescNoMotion.triangleCount = num_indices / 3;
|
||||
geomDescNoMotion.intersectionFunctionTableOffset = 0;
|
||||
|
||||
geomDesc = geomDescNoMotion;
|
||||
}
|
||||
|
||||
/* Force a single any-hit call, so shadow record-all behavior works correctly */
|
||||
/* (Match optix behaviour: unsigned int build_flags =
|
||||
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
|
||||
geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
|
||||
|
||||
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
|
||||
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
|
||||
accelDesc.geometryDescriptors = @[ geomDesc ];
|
||||
if (num_motion_steps > 1) {
|
||||
accelDesc.motionStartTime = 0.0f;
|
||||
accelDesc.motionEndTime = 1.0f;
|
||||
accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp;
|
||||
accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp;
|
||||
accelDesc.motionKeyframeCount = num_motion_steps;
|
||||
}
|
||||
|
||||
if (!use_fast_trace_bvh) {
|
||||
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
|
||||
MTLAccelerationStructureUsagePreferFastBuild);
|
||||
}
|
||||
|
||||
MTLAccelerationStructureSizes accelSizes = [device
|
||||
accelerationStructureSizesWithDescriptor:accelDesc];
|
||||
id<MTLAccelerationStructure> accel_uncompressed = [device
|
||||
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
|
||||
id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
|
||||
options:MTLResourceStorageModePrivate];
|
||||
id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared];
|
||||
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||
[accelCommands accelerationStructureCommandEncoder];
|
||||
if (refit) {
|
||||
[accelEnc refitAccelerationStructure:accel_struct
|
||||
descriptor:accelDesc
|
||||
destination:accel_uncompressed
|
||||
scratchBuffer:scratchBuf
|
||||
scratchBufferOffset:0];
|
||||
}
|
||||
else {
|
||||
[accelEnc buildAccelerationStructure:accel_uncompressed
|
||||
descriptor:accelDesc
|
||||
scratchBuffer:scratchBuf
|
||||
scratchBufferOffset:0];
|
||||
}
|
||||
if (use_fast_trace_bvh) {
|
||||
[accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
|
||||
toBuffer:sizeBuf
|
||||
offset:0
|
||||
sizeDataType:MTLDataTypeULong];
|
||||
}
|
||||
[accelEnc endEncoding];
|
||||
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||
/* free temp resources */
|
||||
[scratchBuf release];
|
||||
[indexBuf release];
|
||||
[posBuf release];
|
||||
|
||||
if (use_fast_trace_bvh) {
|
||||
/* Compact the accel structure */
|
||||
uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
|
||||
|
||||
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
||||
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||
[accelCommands accelerationStructureCommandEncoder];
|
||||
id<MTLAccelerationStructure> accel = [device
|
||||
newAccelerationStructureWithSize:compressed_size];
|
||||
[accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
|
||||
toAccelerationStructure:accel];
|
||||
[accelEnc endEncoding];
|
||||
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||
uint64_t allocated_size = [accel allocatedSize];
|
||||
stats.mem_alloc(allocated_size);
|
||||
accel_struct = accel;
|
||||
[accel_uncompressed release];
|
||||
accel_struct_building = false;
|
||||
}];
|
||||
[accelCommands commit];
|
||||
});
|
||||
}
|
||||
else {
|
||||
/* set our acceleration structure to the uncompressed structure */
|
||||
accel_struct = accel_uncompressed;
|
||||
|
||||
uint64_t allocated_size = [accel_struct allocatedSize];
|
||||
stats.mem_alloc(allocated_size);
|
||||
accel_struct_building = false;
|
||||
}
|
||||
[sizeBuf release];
|
||||
}];
|
||||
|
||||
accel_struct_building = true;
|
||||
[accelCommands commit];
|
||||
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BVHMetal::build_BLAS_hair(Progress &progress,
|
||||
id<MTLDevice> device,
|
||||
id<MTLCommandQueue> queue,
|
||||
Geometry *const geom,
|
||||
bool refit)
|
||||
{
|
||||
if (@available(macos 12.0, *)) {
|
||||
/* Build BLAS for hair curves */
|
||||
Hair *hair = static_cast<Hair *>(geom);
|
||||
if (hair->num_curves() == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/*------------------------------------------------*/
|
||||
BVH_status(
|
||||
"Building hair BLAS | %7d curves | %s", (int)hair->num_curves(), geom->name.c_str());
|
||||
/*------------------------------------------------*/
|
||||
|
||||
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
|
||||
const size_t num_segments = hair->num_segments();
|
||||
|
||||
size_t num_motion_steps = 1;
|
||||
Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
||||
if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
|
||||
num_motion_steps = hair->get_motion_steps();
|
||||
}
|
||||
|
||||
const size_t num_aabbs = num_segments * num_motion_steps;
|
||||
|
||||
MTLResourceOptions storage_mode;
|
||||
if (device.hasUnifiedMemory) {
|
||||
storage_mode = MTLResourceStorageModeShared;
|
||||
}
|
||||
else {
|
||||
storage_mode = MTLResourceStorageModeManaged;
|
||||
}
|
||||
|
||||
/* Allocate a GPU buffer for the AABB data and populate it */
|
||||
id<MTLBuffer> aabbBuf = [device
|
||||
newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox)
|
||||
options:storage_mode];
|
||||
MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
|
||||
|
||||
/* Get AABBs for each motion step */
|
||||
size_t center_step = (num_motion_steps - 1) / 2;
|
||||
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||
/* The center step for motion vertices is not stored in the attribute */
|
||||
const float3 *keys = hair->get_curve_keys().data();
|
||||
if (step != center_step) {
|
||||
size_t attr_offset = (step > center_step) ? step - 1 : step;
|
||||
/* Technically this is a float4 array, but sizeof(float3) == sizeof(float4) */
|
||||
keys = motion_keys->data_float3() + attr_offset * hair->get_curve_keys().size();
|
||||
}
|
||||
|
||||
for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) {
|
||||
const Hair::Curve curve = hair->get_curve(j);
|
||||
|
||||
for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) {
|
||||
{
|
||||
BoundBox bounds = BoundBox::empty;
|
||||
curve.bounds_grow(segment, keys, hair->get_curve_radius().data(), bounds);
|
||||
|
||||
const size_t index = step * num_segments + i;
|
||||
aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
|
||||
aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (storage_mode == MTLResourceStorageModeManaged) {
|
||||
[aabbBuf didModifyRange:NSMakeRange(0, aabbBuf.length)];
|
||||
}
|
||||
|
||||
# if 0
|
||||
for (size_t i=0; i<num_aabbs && i < 400; i++) {
|
||||
MTLAxisAlignedBoundingBox& bb = aabb_data[i];
|
||||
printf(" %d: %.1f,%.1f,%.1f -- %.1f,%.1f,%.1f\n", int(i), bb.min.x, bb.min.y, bb.min.z, bb.max.x, bb.max.y, bb.max.z);
|
||||
}
|
||||
# endif
|
||||
|
||||
MTLAccelerationStructureGeometryDescriptor *geomDesc;
|
||||
if (motion_blur) {
|
||||
std::vector<MTLMotionKeyframeData *> aabb_ptrs;
|
||||
aabb_ptrs.reserve(num_motion_steps);
|
||||
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||
MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
|
||||
k.buffer = aabbBuf;
|
||||
k.offset = step * num_segments * sizeof(MTLAxisAlignedBoundingBox);
|
||||
aabb_ptrs.push_back(k);
|
||||
}
|
||||
|
||||
MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion =
|
||||
[MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor descriptor];
|
||||
geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data()
|
||||
count:aabb_ptrs.size()];
|
||||
geomDescMotion.boundingBoxCount = num_segments;
|
||||
geomDescMotion.boundingBoxStride = sizeof(aabb_data[0]);
|
||||
geomDescMotion.intersectionFunctionTableOffset = 1;
|
||||
|
||||
/* Force a single any-hit call, so shadow record-all behavior works correctly */
|
||||
/* (Match optix behaviour: unsigned int build_flags =
|
||||
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
|
||||
geomDescMotion.allowDuplicateIntersectionFunctionInvocation = false;
|
||||
geomDescMotion.opaque = true;
|
||||
geomDesc = geomDescMotion;
|
||||
}
|
||||
else {
|
||||
MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion =
|
||||
[MTLAccelerationStructureBoundingBoxGeometryDescriptor descriptor];
|
||||
geomDescNoMotion.boundingBoxBuffer = aabbBuf;
|
||||
geomDescNoMotion.boundingBoxBufferOffset = 0;
|
||||
geomDescNoMotion.boundingBoxCount = int(num_aabbs);
|
||||
geomDescNoMotion.boundingBoxStride = sizeof(aabb_data[0]);
|
||||
geomDescNoMotion.intersectionFunctionTableOffset = 1;
|
||||
|
||||
/* Force a single any-hit call, so shadow record-all behavior works correctly */
|
||||
/* (Match optix behaviour: unsigned int build_flags =
|
||||
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
|
||||
geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation = false;
|
||||
geomDescNoMotion.opaque = true;
|
||||
geomDesc = geomDescNoMotion;
|
||||
}
|
||||
|
||||
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
|
||||
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
|
||||
accelDesc.geometryDescriptors = @[ geomDesc ];
|
||||
|
||||
if (motion_blur) {
|
||||
accelDesc.motionStartTime = 0.0f;
|
||||
accelDesc.motionEndTime = 1.0f;
|
||||
accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
|
||||
accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
|
||||
accelDesc.motionKeyframeCount = num_motion_steps;
|
||||
}
|
||||
|
||||
if (!use_fast_trace_bvh) {
|
||||
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
|
||||
MTLAccelerationStructureUsagePreferFastBuild);
|
||||
}
|
||||
|
||||
MTLAccelerationStructureSizes accelSizes = [device
|
||||
accelerationStructureSizesWithDescriptor:accelDesc];
|
||||
id<MTLAccelerationStructure> accel_uncompressed = [device
|
||||
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
|
||||
id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
|
||||
options:MTLResourceStorageModePrivate];
|
||||
id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared];
|
||||
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||
[accelCommands accelerationStructureCommandEncoder];
|
||||
if (refit) {
|
||||
[accelEnc refitAccelerationStructure:accel_struct
|
||||
descriptor:accelDesc
|
||||
destination:accel_uncompressed
|
||||
scratchBuffer:scratchBuf
|
||||
scratchBufferOffset:0];
|
||||
}
|
||||
else {
|
||||
[accelEnc buildAccelerationStructure:accel_uncompressed
|
||||
descriptor:accelDesc
|
||||
scratchBuffer:scratchBuf
|
||||
scratchBufferOffset:0];
|
||||
}
|
||||
if (use_fast_trace_bvh) {
|
||||
[accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
|
||||
toBuffer:sizeBuf
|
||||
offset:0
|
||||
sizeDataType:MTLDataTypeULong];
|
||||
}
|
||||
[accelEnc endEncoding];
|
||||
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||
/* free temp resources */
|
||||
[scratchBuf release];
|
||||
[aabbBuf release];
|
||||
|
||||
if (use_fast_trace_bvh) {
|
||||
/* Compact the accel structure */
|
||||
uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
|
||||
|
||||
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
||||
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||
[accelCommands accelerationStructureCommandEncoder];
|
||||
id<MTLAccelerationStructure> accel = [device
|
||||
newAccelerationStructureWithSize:compressed_size];
|
||||
[accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
|
||||
toAccelerationStructure:accel];
|
||||
[accelEnc endEncoding];
|
||||
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||
uint64_t allocated_size = [accel allocatedSize];
|
||||
stats.mem_alloc(allocated_size);
|
||||
accel_struct = accel;
|
||||
[accel_uncompressed release];
|
||||
accel_struct_building = false;
|
||||
}];
|
||||
[accelCommands commit];
|
||||
});
|
||||
}
|
||||
else {
|
||||
/* set our acceleration structure to the uncompressed structure */
|
||||
accel_struct = accel_uncompressed;
|
||||
|
||||
uint64_t allocated_size = [accel_struct allocatedSize];
|
||||
stats.mem_alloc(allocated_size);
|
||||
accel_struct_building = false;
|
||||
}
|
||||
[sizeBuf release];
|
||||
}];
|
||||
|
||||
accel_struct_building = true;
|
||||
[accelCommands commit];
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BVHMetal::build_BLAS(Progress &progress,
|
||||
id<MTLDevice> device,
|
||||
id<MTLCommandQueue> queue,
|
||||
bool refit)
|
||||
{
|
||||
if (@available(macos 12.0, *)) {
|
||||
assert(objects.size() == 1 && geometry.size() == 1);
|
||||
|
||||
/* Build bottom level acceleration structures (BLAS) */
|
||||
Geometry *const geom = geometry[0];
|
||||
switch (geom->geometry_type) {
|
||||
case Geometry::VOLUME:
|
||||
case Geometry::MESH:
|
||||
return build_BLAS_mesh(progress, device, queue, geom, refit);
|
||||
case Geometry::HAIR:
|
||||
return build_BLAS_hair(progress, device, queue, geom, refit);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BVHMetal::build_TLAS(Progress &progress,
|
||||
id<MTLDevice> device,
|
||||
id<MTLCommandQueue> queue,
|
||||
bool refit)
|
||||
{
|
||||
if (@available(macos 12.0, *)) {
|
||||
|
||||
/* we need to sync here and ensure that all BLAS have completed async generation by both GCD
|
||||
* and Metal */
|
||||
{
|
||||
__block bool complete_bvh = false;
|
||||
while (!complete_bvh) {
|
||||
dispatch_sync(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
||||
complete_bvh = true;
|
||||
for (Object *ob : objects) {
|
||||
/* Skip non-traceable objects */
|
||||
if (!ob->is_traceable())
|
||||
continue;
|
||||
|
||||
Geometry const *geom = ob->get_geometry();
|
||||
BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh);
|
||||
if (blas->accel_struct_building) {
|
||||
complete_bvh = false;
|
||||
|
||||
/* We're likely waiting on a command buffer that's in flight to complete.
|
||||
* Queue up a command buffer and wait for it complete before checking the BLAS again
|
||||
*/
|
||||
id<MTLCommandBuffer> command_buffer = [queue commandBuffer];
|
||||
[command_buffer commit];
|
||||
[command_buffer waitUntilCompleted];
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t num_instances = 0;
|
||||
uint32_t num_motion_transforms = 0;
|
||||
for (Object *ob : objects) {
|
||||
/* Skip non-traceable objects */
|
||||
if (!ob->is_traceable())
|
||||
continue;
|
||||
num_instances++;
|
||||
|
||||
if (ob->use_motion()) {
|
||||
num_motion_transforms += max(1, ob->get_motion().size());
|
||||
}
|
||||
else {
|
||||
num_motion_transforms++;
|
||||
}
|
||||
}
|
||||
|
||||
/*------------------------------------------------*/
|
||||
BVH_status("Building TLAS | %7d instances", (int)num_instances);
|
||||
/*------------------------------------------------*/
|
||||
|
||||
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
|
||||
|
||||
NSMutableArray *all_blas = [NSMutableArray array];
|
||||
unordered_map<BVHMetal const *, int> instance_mapping;
|
||||
|
||||
/* Lambda function to build/retrieve the BLAS index mapping */
|
||||
auto get_blas_index = [&](BVHMetal const *blas) {
|
||||
auto it = instance_mapping.find(blas);
|
||||
if (it != instance_mapping.end()) {
|
||||
return it->second;
|
||||
}
|
||||
else {
|
||||
int blas_index = (int)[all_blas count];
|
||||
instance_mapping[blas] = blas_index;
|
||||
if (@available(macos 12.0, *)) {
|
||||
[all_blas addObject:blas->accel_struct];
|
||||
}
|
||||
return blas_index;
|
||||
}
|
||||
};
|
||||
|
||||
MTLResourceOptions storage_mode;
|
||||
if (device.hasUnifiedMemory) {
|
||||
storage_mode = MTLResourceStorageModeShared;
|
||||
}
|
||||
else {
|
||||
storage_mode = MTLResourceStorageModeManaged;
|
||||
}
|
||||
|
||||
size_t instance_size;
|
||||
if (motion_blur) {
|
||||
instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
|
||||
}
|
||||
else {
|
||||
instance_size = sizeof(MTLAccelerationStructureUserIDInstanceDescriptor);
|
||||
}
|
||||
|
||||
/* Allocate a GPU buffer for the instance data and populate it */
|
||||
id<MTLBuffer> instanceBuf = [device newBufferWithLength:num_instances * instance_size
|
||||
options:storage_mode];
|
||||
id<MTLBuffer> motion_transforms_buf = nil;
|
||||
MTLPackedFloat4x3 *motion_transforms = nullptr;
|
||||
if (motion_blur && num_motion_transforms) {
|
||||
motion_transforms_buf = [device
|
||||
newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3)
|
||||
options:storage_mode];
|
||||
motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
|
||||
}
|
||||
|
||||
uint32_t instance_index = 0;
|
||||
uint32_t motion_transform_index = 0;
|
||||
for (Object *ob : objects) {
|
||||
/* Skip non-traceable objects */
|
||||
if (!ob->is_traceable())
|
||||
continue;
|
||||
|
||||
Geometry const *geom = ob->get_geometry();
|
||||
|
||||
BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh);
|
||||
uint32_t accel_struct_index = get_blas_index(blas);
|
||||
|
||||
/* Add some of the object visibility bits to the mask.
|
||||
* __prim_visibility contains the combined visibility bits of all instances, so is not
|
||||
* reliable if they differ between instances.
|
||||
*
|
||||
* METAL_WIP: OptiX visibility mask can only contain 8 bits, so have to trade-off here
|
||||
* and select just a few important ones.
|
||||
*/
|
||||
uint32_t mask = ob->visibility_for_tracing() & 0xFF;
|
||||
|
||||
/* Have to have at least one bit in the mask, or else instance would always be culled. */
|
||||
if (0 == mask) {
|
||||
mask = 0xFF;
|
||||
}
|
||||
|
||||
/* Set user instance ID to object index */
|
||||
int object_index = ob->get_device_index();
|
||||
uint32_t user_id = uint32_t(object_index);
|
||||
|
||||
/* Bake into the appropriate descriptor */
|
||||
if (motion_blur) {
|
||||
MTLAccelerationStructureMotionInstanceDescriptor *instances =
|
||||
(MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
|
||||
MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[instance_index++];
|
||||
|
||||
desc.accelerationStructureIndex = accel_struct_index;
|
||||
desc.userID = user_id;
|
||||
desc.mask = mask;
|
||||
desc.motionStartTime = 0.0f;
|
||||
desc.motionEndTime = 1.0f;
|
||||
desc.motionTransformsStartIndex = motion_transform_index;
|
||||
desc.motionStartBorderMode = MTLMotionBorderModeVanish;
|
||||
desc.motionEndBorderMode = MTLMotionBorderModeVanish;
|
||||
desc.intersectionFunctionTableOffset = 0;
|
||||
|
||||
int key_count = ob->get_motion().size();
|
||||
if (key_count) {
|
||||
desc.motionTransformsCount = key_count;
|
||||
|
||||
Transform *keys = ob->get_motion().data();
|
||||
for (int i = 0; i < key_count; i++) {
|
||||
float *t = (float *)&motion_transforms[motion_transform_index++];
|
||||
/* Transpose transform */
|
||||
auto src = (float const *)&keys[i];
|
||||
for (int i = 0; i < 12; i++) {
|
||||
t[i] = src[(i / 3) + 4 * (i % 3)];
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
desc.motionTransformsCount = 1;
|
||||
|
||||
float *t = (float *)&motion_transforms[motion_transform_index++];
|
||||
if (ob->get_geometry()->is_instanced()) {
|
||||
/* Transpose transform */
|
||||
auto src = (float const *)&ob->get_tfm();
|
||||
for (int i = 0; i < 12; i++) {
|
||||
t[i] = src[(i / 3) + 4 * (i % 3)];
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Clear transform to identity matrix */
|
||||
t[0] = t[4] = t[8] = 1.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
MTLAccelerationStructureUserIDInstanceDescriptor *instances =
|
||||
(MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
|
||||
MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[instance_index++];
|
||||
|
||||
desc.accelerationStructureIndex = accel_struct_index;
|
||||
desc.userID = user_id;
|
||||
desc.mask = mask;
|
||||
desc.intersectionFunctionTableOffset = 0;
|
||||
|
||||
float *t = (float *)&desc.transformationMatrix;
|
||||
if (ob->get_geometry()->is_instanced()) {
|
||||
/* Transpose transform */
|
||||
auto src = (float const *)&ob->get_tfm();
|
||||
for (int i = 0; i < 12; i++) {
|
||||
t[i] = src[(i / 3) + 4 * (i % 3)];
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Clear transform to identity matrix */
|
||||
t[0] = t[4] = t[8] = 1.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (storage_mode == MTLResourceStorageModeManaged) {
|
||||
[instanceBuf didModifyRange:NSMakeRange(0, instanceBuf.length)];
|
||||
if (motion_transforms_buf) {
|
||||
[motion_transforms_buf didModifyRange:NSMakeRange(0, motion_transforms_buf.length)];
|
||||
assert(num_motion_transforms == motion_transform_index);
|
||||
}
|
||||
}
|
||||
|
||||
MTLInstanceAccelerationStructureDescriptor *accelDesc =
|
||||
[MTLInstanceAccelerationStructureDescriptor descriptor];
|
||||
accelDesc.instanceCount = num_instances;
|
||||
accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID;
|
||||
accelDesc.instanceDescriptorBuffer = instanceBuf;
|
||||
accelDesc.instanceDescriptorBufferOffset = 0;
|
||||
accelDesc.instanceDescriptorStride = instance_size;
|
||||
accelDesc.instancedAccelerationStructures = all_blas;
|
||||
|
||||
if (motion_blur) {
|
||||
accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion;
|
||||
accelDesc.motionTransformBuffer = motion_transforms_buf;
|
||||
accelDesc.motionTransformCount = num_motion_transforms;
|
||||
}
|
||||
|
||||
if (!use_fast_trace_bvh) {
|
||||
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
|
||||
MTLAccelerationStructureUsagePreferFastBuild);
|
||||
}
|
||||
|
||||
MTLAccelerationStructureSizes accelSizes = [device
|
||||
accelerationStructureSizesWithDescriptor:accelDesc];
|
||||
id<MTLAccelerationStructure> accel = [device
|
||||
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
|
||||
id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
|
||||
options:MTLResourceStorageModePrivate];
|
||||
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||
[accelCommands accelerationStructureCommandEncoder];
|
||||
if (refit) {
|
||||
[accelEnc refitAccelerationStructure:accel_struct
|
||||
descriptor:accelDesc
|
||||
destination:accel
|
||||
scratchBuffer:scratchBuf
|
||||
scratchBufferOffset:0];
|
||||
}
|
||||
else {
|
||||
[accelEnc buildAccelerationStructure:accel
|
||||
descriptor:accelDesc
|
||||
scratchBuffer:scratchBuf
|
||||
scratchBufferOffset:0];
|
||||
}
|
||||
[accelEnc endEncoding];
|
||||
[accelCommands commit];
|
||||
[accelCommands waitUntilCompleted];
|
||||
|
||||
if (motion_transforms_buf) {
|
||||
[motion_transforms_buf release];
|
||||
}
|
||||
[instanceBuf release];
|
||||
[scratchBuf release];
|
||||
|
||||
uint64_t allocated_size = [accel allocatedSize];
|
||||
stats.mem_alloc(allocated_size);
|
||||
|
||||
/* Cache top and bottom-level acceleration structs */
|
||||
accel_struct = accel;
|
||||
blas_array.clear();
|
||||
blas_array.reserve(all_blas.count);
|
||||
for (id<MTLAccelerationStructure> blas in all_blas) {
|
||||
blas_array.push_back(blas);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BVHMetal::build(Progress &progress,
|
||||
id<MTLDevice> device,
|
||||
id<MTLCommandQueue> queue,
|
||||
bool refit)
|
||||
{
|
||||
if (@available(macos 12.0, *)) {
|
||||
if (refit && params.bvh_type != BVH_TYPE_STATIC) {
|
||||
assert(accel_struct);
|
||||
}
|
||||
else {
|
||||
if (accel_struct) {
|
||||
stats.mem_free(accel_struct.allocatedSize);
|
||||
[accel_struct release];
|
||||
accel_struct = nil;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!params.top_level) {
|
||||
return build_BLAS(progress, device, queue, refit);
|
||||
}
|
||||
else {
|
||||
return build_TLAS(progress, device, queue, refit);
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
||||
37
intern/cycles/device/metal/device.h
Normal file
37
intern/cycles/device/metal/device.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util/string.h"
|
||||
#include "util/vector.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class Device;
|
||||
class DeviceInfo;
|
||||
class Profiler;
|
||||
class Stats;
|
||||
|
||||
bool device_metal_init();
|
||||
|
||||
Device *device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||
|
||||
void device_metal_info(vector<DeviceInfo> &devices);
|
||||
|
||||
string device_metal_capabilities();
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
136
intern/cycles/device/metal/device.mm
Normal file
136
intern/cycles/device/metal/device.mm
Normal file
@@ -0,0 +1,136 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "device/metal/device.h"
|
||||
# include "device/metal/device_impl.h"
|
||||
|
||||
#endif
|
||||
|
||||
#include "util/debug.h"
|
||||
#include "util/set.h"
|
||||
#include "util/system.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
Device *device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
{
|
||||
return new MetalDevice(info, stats, profiler);
|
||||
}
|
||||
|
||||
bool device_metal_init()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static int device_metal_get_num_devices_safe(uint32_t *num_devices)
|
||||
{
|
||||
*num_devices = MTLCopyAllDevices().count;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void device_metal_info(vector<DeviceInfo> &devices)
|
||||
{
|
||||
uint32_t num_devices = 0;
|
||||
device_metal_get_num_devices_safe(&num_devices);
|
||||
if (num_devices == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
vector<MetalPlatformDevice> usable_devices;
|
||||
MetalInfo::get_usable_devices(&usable_devices);
|
||||
/* Devices are numbered consecutively across platforms. */
|
||||
set<string> unique_ids;
|
||||
int device_index = 0;
|
||||
for (MetalPlatformDevice &device : usable_devices) {
|
||||
/* Compute unique ID for persistent user preferences. */
|
||||
const string &device_name = device.device_name;
|
||||
string id = string("METAL_") + device_name;
|
||||
|
||||
/* Hardware ID might not be unique, add device number in that case. */
|
||||
if (unique_ids.find(id) != unique_ids.end()) {
|
||||
id += string_printf("_ID_%d", num_devices);
|
||||
}
|
||||
unique_ids.insert(id);
|
||||
|
||||
/* Create DeviceInfo. */
|
||||
DeviceInfo info;
|
||||
info.type = DEVICE_METAL;
|
||||
info.description = string_remove_trademark(string(device_name));
|
||||
|
||||
/* Ensure unique naming on Apple Silicon / SoC devices which return the same string for CPU and
|
||||
* GPU */
|
||||
if (info.description == system_cpu_brand_string()) {
|
||||
info.description += " (GPU)";
|
||||
}
|
||||
|
||||
info.num = device_index;
|
||||
/* We don't know if it's used for display, but assume it is. */
|
||||
info.display_device = true;
|
||||
info.denoisers = DENOISER_NONE;
|
||||
info.id = id;
|
||||
|
||||
devices.push_back(info);
|
||||
device_index++;
|
||||
}
|
||||
}
|
||||
|
||||
string device_metal_capabilities()
|
||||
{
|
||||
string result = "";
|
||||
string error_msg = "";
|
||||
uint32_t num_devices = 0;
|
||||
assert(device_metal_get_num_devices_safe(&num_devices));
|
||||
if (num_devices == 0) {
|
||||
return "No Metal devices found\n";
|
||||
}
|
||||
result += string_printf("Number of devices: %u\n", num_devices);
|
||||
|
||||
NSArray<id<MTLDevice>> *allDevices = MTLCopyAllDevices();
|
||||
for (id<MTLDevice> device in allDevices) {
|
||||
result += string_printf("\t\tDevice: %s\n", [device.name UTF8String]);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
Device *device_metal_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool device_metal_init()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void device_metal_info(vector<DeviceInfo> &devices)
|
||||
{
|
||||
}
|
||||
|
||||
string device_metal_capabilities()
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
166
intern/cycles/device/metal/device_impl.h
Normal file
166
intern/cycles/device/metal/device_impl.h
Normal file
@@ -0,0 +1,166 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "bvh/bvh.h"
|
||||
# include "device/device.h"
|
||||
# include "device/metal/bvh.h"
|
||||
# include "device/metal/device.h"
|
||||
# include "device/metal/kernel.h"
|
||||
# include "device/metal/queue.h"
|
||||
# include "device/metal/util.h"
|
||||
|
||||
# include <Metal/Metal.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class DeviceQueue;
|
||||
|
||||
class MetalDevice : public Device {
|
||||
public:
|
||||
id<MTLDevice> mtlDevice = nil;
|
||||
id<MTLLibrary> mtlLibrary[PSO_NUM] = {nil};
|
||||
id<MTLArgumentEncoder> mtlBufferKernelParamsEncoder =
|
||||
nil; /* encoder used for fetching device pointers from MTLBuffers */
|
||||
id<MTLCommandQueue> mtlGeneralCommandQueue = nil;
|
||||
id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
|
||||
nil; /* encoder used for fetching device pointers from MTLBuffers */
|
||||
string source_used_for_compile[PSO_NUM];
|
||||
|
||||
KernelParamsMetal launch_params = {0};
|
||||
|
||||
/* MetalRT members ----------------------------------*/
|
||||
BVHMetal *bvhMetalRT = nullptr;
|
||||
bool motion_blur = false;
|
||||
id<MTLArgumentEncoder> mtlASArgEncoder =
|
||||
nil; /* encoder used for fetching device pointers from MTLAccelerationStructure */
|
||||
/*---------------------------------------------------*/
|
||||
|
||||
string device_name;
|
||||
MetalGPUVendor device_vendor;
|
||||
|
||||
uint kernel_features;
|
||||
MTLResourceOptions default_storage_mode;
|
||||
int max_threads_per_threadgroup;
|
||||
|
||||
int mtlDevId = 0;
|
||||
bool first_error = true;
|
||||
|
||||
struct MetalMem {
|
||||
device_memory *mem = nullptr;
|
||||
int pointer_index = -1;
|
||||
id<MTLBuffer> mtlBuffer = nil;
|
||||
id<MTLTexture> mtlTexture = nil;
|
||||
uint64_t offset = 0;
|
||||
uint64_t size = 0;
|
||||
void *hostPtr = nullptr;
|
||||
bool use_UMA = false; /* If true, UMA memory in shared_pointer is being used. */
|
||||
};
|
||||
typedef map<device_memory *, unique_ptr<MetalMem>> MetalMemMap;
|
||||
MetalMemMap metal_mem_map;
|
||||
std::vector<id<MTLResource>> delayed_free_list;
|
||||
std::recursive_mutex metal_mem_map_mutex;
|
||||
|
||||
/* Bindless Textures */
|
||||
device_vector<TextureInfo> texture_info;
|
||||
bool need_texture_info;
|
||||
id<MTLArgumentEncoder> mtlTextureArgEncoder = nil;
|
||||
id<MTLBuffer> texture_bindings_2d = nil;
|
||||
id<MTLBuffer> texture_bindings_3d = nil;
|
||||
std::vector<id<MTLTexture>> texture_slot_map;
|
||||
|
||||
MetalDeviceKernels kernels;
|
||||
bool use_metalrt = false;
|
||||
bool use_function_specialisation = false;
|
||||
|
||||
virtual BVHLayoutMask get_bvh_layout_mask() const override;
|
||||
|
||||
void set_error(const string &error) override;
|
||||
|
||||
MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
|
||||
|
||||
virtual ~MetalDevice();
|
||||
|
||||
bool support_device(const uint /*kernel_features*/);
|
||||
|
||||
bool check_peer_access(Device *peer_device) override;
|
||||
|
||||
bool use_adaptive_compilation();
|
||||
|
||||
string get_source(const uint kernel_features);
|
||||
|
||||
string compile_kernel(const uint kernel_features, const char *name);
|
||||
|
||||
virtual bool load_kernels(const uint kernel_features) override;
|
||||
|
||||
void reserve_local_memory(const uint kernel_features);
|
||||
|
||||
void init_host_memory();
|
||||
|
||||
void load_texture_info();
|
||||
|
||||
virtual bool should_use_graphics_interop() override;
|
||||
|
||||
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
|
||||
|
||||
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* low-level memory management */
|
||||
|
||||
MetalMem *generic_alloc(device_memory &mem);
|
||||
|
||||
void generic_copy_to(device_memory &mem);
|
||||
|
||||
void generic_free(device_memory &mem);
|
||||
|
||||
void mem_alloc(device_memory &mem) override;
|
||||
|
||||
void mem_copy_to(device_memory &mem) override;
|
||||
|
||||
void mem_copy_from(device_memory &mem)
|
||||
{
|
||||
mem_copy_from(mem, -1, -1, -1, -1);
|
||||
}
|
||||
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
|
||||
|
||||
void mem_zero(device_memory &mem) override;
|
||||
|
||||
void mem_free(device_memory &mem) override;
|
||||
|
||||
device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
|
||||
|
||||
virtual void const_copy_to(const char *name, void *host, size_t size) override;
|
||||
|
||||
void global_alloc(device_memory &mem);
|
||||
|
||||
void global_free(device_memory &mem);
|
||||
|
||||
void tex_alloc(device_texture &mem);
|
||||
|
||||
void tex_alloc_as_buffer(device_texture &mem);
|
||||
|
||||
void tex_free(device_texture &mem);
|
||||
|
||||
void flush_delayed_free_list();
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
1008
intern/cycles/device/metal/device_impl.mm
Normal file
1008
intern/cycles/device/metal/device_impl.mm
Normal file
File diff suppressed because it is too large
Load Diff
168
intern/cycles/device/metal/kernel.h
Normal file
168
intern/cycles/device/metal/kernel.h
Normal file
@@ -0,0 +1,168 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "device/kernel.h"
|
||||
# include <Metal/Metal.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class MetalDevice;
|
||||
|
||||
enum {
|
||||
METALRT_FUNC_DEFAULT_TRI,
|
||||
METALRT_FUNC_DEFAULT_BOX,
|
||||
METALRT_FUNC_SHADOW_TRI,
|
||||
METALRT_FUNC_SHADOW_BOX,
|
||||
METALRT_FUNC_LOCAL_TRI,
|
||||
METALRT_FUNC_LOCAL_BOX,
|
||||
METALRT_FUNC_CURVE_RIBBON,
|
||||
METALRT_FUNC_CURVE_RIBBON_SHADOW,
|
||||
METALRT_FUNC_CURVE_ALL,
|
||||
METALRT_FUNC_CURVE_ALL_SHADOW,
|
||||
METALRT_FUNC_NUM
|
||||
};
|
||||
|
||||
enum { METALRT_TABLE_DEFAULT, METALRT_TABLE_SHADOW, METALRT_TABLE_LOCAL, METALRT_TABLE_NUM };
|
||||
|
||||
/* Pipeline State Object types */
|
||||
enum {
|
||||
/* A kernel that can be used with all scenes, supporting all features.
|
||||
* It is slow to compile, but only needs to be compiled once and is then
|
||||
* cached for future render sessions. This allows a render to get underway
|
||||
* on the GPU quickly.
|
||||
*/
|
||||
PSO_GENERIC,
|
||||
|
||||
/* A kernel that is relatively quick to compile, but is specialised for the
|
||||
* scene being rendered. It only contains the functionality and even baked in
|
||||
* constants for values that means it needs to be recompiled whenever a
|
||||
* dependent setting is changed. The render performance of this kernel is
|
||||
* significantly faster though, and justifies the extra compile time.
|
||||
*/
|
||||
/* METAL_WIP: This isn't used and will require more changes to enable. */
|
||||
PSO_SPECIALISED,
|
||||
|
||||
PSO_NUM
|
||||
};
|
||||
|
||||
const char *kernel_type_as_string(int kernel_type);
|
||||
|
||||
struct MetalKernelPipeline {
|
||||
void release()
|
||||
{
|
||||
if (pipeline) {
|
||||
[pipeline release];
|
||||
pipeline = nil;
|
||||
if (@available(macOS 11.0, *)) {
|
||||
for (int i = 0; i < METALRT_TABLE_NUM; i++) {
|
||||
if (intersection_func_table[i]) {
|
||||
[intersection_func_table[i] release];
|
||||
intersection_func_table[i] = nil;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (function) {
|
||||
[function release];
|
||||
function = nil;
|
||||
}
|
||||
if (@available(macOS 11.0, *)) {
|
||||
for (int i = 0; i < METALRT_TABLE_NUM; i++) {
|
||||
if (intersection_func_table[i]) {
|
||||
[intersection_func_table[i] release];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool loaded = false;
|
||||
id<MTLFunction> function = nil;
|
||||
id<MTLComputePipelineState> pipeline = nil;
|
||||
|
||||
API_AVAILABLE(macos(11.0))
|
||||
id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
|
||||
};
|
||||
|
||||
struct MetalKernelLoadDesc {
|
||||
int pso_index = 0;
|
||||
const char *function_name = nullptr;
|
||||
int kernel_index = 0;
|
||||
int threads_per_threadgroup = 0;
|
||||
MTLFunctionConstantValues *constant_values = nullptr;
|
||||
NSArray *linked_functions = nullptr;
|
||||
|
||||
struct IntersectorFunctions {
|
||||
NSArray *defaults;
|
||||
NSArray *shadow;
|
||||
NSArray *local;
|
||||
NSArray *operator[](int index) const
|
||||
{
|
||||
if (index == METALRT_TABLE_DEFAULT)
|
||||
return defaults;
|
||||
if (index == METALRT_TABLE_SHADOW)
|
||||
return shadow;
|
||||
return local;
|
||||
}
|
||||
} intersector_functions = {nullptr};
|
||||
};
|
||||
|
||||
/* Metal kernel and associate occupancy information. */
|
||||
class MetalDeviceKernel {
|
||||
public:
|
||||
~MetalDeviceKernel();
|
||||
|
||||
bool load(MetalDevice *device, MetalKernelLoadDesc const &desc, class MD5Hash const &md5);
|
||||
|
||||
void mark_loaded(int pso_index)
|
||||
{
|
||||
pso[pso_index].loaded = true;
|
||||
}
|
||||
|
||||
int get_num_threads_per_block() const
|
||||
{
|
||||
return num_threads_per_block;
|
||||
}
|
||||
const MetalKernelPipeline &get_pso() const;
|
||||
|
||||
double load_duration = 0.0;
|
||||
|
||||
private:
|
||||
MetalKernelPipeline pso[PSO_NUM];
|
||||
|
||||
int num_threads_per_block = 0;
|
||||
};
|
||||
|
||||
/* Cache of Metal kernels for each DeviceKernel. */
|
||||
class MetalDeviceKernels {
|
||||
public:
|
||||
bool load(MetalDevice *device, int kernel_type);
|
||||
bool available(DeviceKernel kernel) const;
|
||||
const MetalDeviceKernel &get(DeviceKernel kernel) const;
|
||||
|
||||
MetalDeviceKernel kernels_[DEVICE_KERNEL_NUM];
|
||||
|
||||
id<MTLFunction> rt_intersection_funcs[PSO_NUM][METALRT_FUNC_NUM] = {{nil}};
|
||||
|
||||
string loaded_md5[PSO_NUM];
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
||||
523
intern/cycles/device/metal/kernel.mm
Normal file
523
intern/cycles/device/metal/kernel.mm
Normal file
@@ -0,0 +1,523 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "device/metal/kernel.h"
|
||||
# include "device/metal/device_impl.h"
|
||||
# include "util/md5.h"
|
||||
# include "util/path.h"
|
||||
# include "util/tbb.h"
|
||||
# include "util/time.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* limit to 2 MTLCompiler instances */
|
||||
int max_mtlcompiler_threads = 2;
|
||||
|
||||
const char *kernel_type_as_string(int kernel_type)
|
||||
{
|
||||
switch (kernel_type) {
|
||||
case PSO_GENERIC:
|
||||
return "PSO_GENERIC";
|
||||
case PSO_SPECIALISED:
|
||||
return "PSO_SPECIALISED";
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
MetalDeviceKernel::~MetalDeviceKernel()
|
||||
{
|
||||
for (int i = 0; i < PSO_NUM; i++) {
|
||||
pso[i].release();
|
||||
}
|
||||
}
|
||||
|
||||
bool MetalDeviceKernel::load(MetalDevice *device,
|
||||
MetalKernelLoadDesc const &desc_in,
|
||||
MD5Hash const &md5)
|
||||
{
|
||||
__block MetalKernelLoadDesc const desc(desc_in);
|
||||
if (desc.kernel_index == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
/* skip megakernel */
|
||||
return true;
|
||||
}
|
||||
|
||||
bool use_binary_archive = true;
|
||||
if (getenv("CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
|
||||
use_binary_archive = false;
|
||||
}
|
||||
|
||||
id<MTLBinaryArchive> archive = nil;
|
||||
string metalbin_path;
|
||||
if (use_binary_archive) {
|
||||
NSProcessInfo *processInfo = [NSProcessInfo processInfo];
|
||||
string osVersion = [[processInfo operatingSystemVersionString] UTF8String];
|
||||
MD5Hash local_md5(md5);
|
||||
local_md5.append(osVersion);
|
||||
string metalbin_name = string(desc.function_name) + "." + local_md5.get_hex() +
|
||||
to_string(desc.pso_index) + ".bin";
|
||||
metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
|
||||
path_create_directories(metalbin_path);
|
||||
|
||||
if (path_exists(metalbin_path) && use_binary_archive) {
|
||||
if (@available(macOS 11.0, *)) {
|
||||
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
|
||||
archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
|
||||
archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
|
||||
[archiveDesc release];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NSString *entryPoint = [@(desc.function_name) copy];
|
||||
|
||||
NSError *error = NULL;
|
||||
if (@available(macOS 11.0, *)) {
|
||||
MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
|
||||
func_desc.name = entryPoint;
|
||||
if (desc.constant_values) {
|
||||
func_desc.constantValues = desc.constant_values;
|
||||
}
|
||||
pso[desc.pso_index].function = [device->mtlLibrary[desc.pso_index]
|
||||
newFunctionWithDescriptor:func_desc
|
||||
error:&error];
|
||||
}
|
||||
[entryPoint release];
|
||||
|
||||
if (pso[desc.pso_index].function == nil) {
|
||||
NSString *err = [error localizedDescription];
|
||||
string errors = [err UTF8String];
|
||||
|
||||
device->set_error(
|
||||
string_printf("Error getting function \"%s\": %s", desc.function_name, errors.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
pso[desc.pso_index].function.label = [@(desc.function_name) copy];
|
||||
|
||||
__block MTLComputePipelineDescriptor *computePipelineStateDescriptor =
|
||||
[[MTLComputePipelineDescriptor alloc] init];
|
||||
|
||||
computePipelineStateDescriptor.buffers[0].mutability = MTLMutabilityImmutable;
|
||||
computePipelineStateDescriptor.buffers[1].mutability = MTLMutabilityImmutable;
|
||||
computePipelineStateDescriptor.buffers[2].mutability = MTLMutabilityImmutable;
|
||||
|
||||
computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = desc.threads_per_threadgroup;
|
||||
computePipelineStateDescriptor.threadGroupSizeIsMultipleOfThreadExecutionWidth = true;
|
||||
|
||||
computePipelineStateDescriptor.computeFunction = pso[desc.pso_index].function;
|
||||
if (@available(macOS 11.0, *)) {
|
||||
/* Attach the additional functions to an MTLLinkedFunctions object */
|
||||
if (desc.linked_functions) {
|
||||
computePipelineStateDescriptor.linkedFunctions = [[MTLLinkedFunctions alloc] init];
|
||||
computePipelineStateDescriptor.linkedFunctions.functions = desc.linked_functions;
|
||||
}
|
||||
|
||||
computePipelineStateDescriptor.maxCallStackDepth = 1;
|
||||
}
|
||||
|
||||
/* Create a new Compute pipeline state object */
|
||||
MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
|
||||
|
||||
bool creating_new_archive = false;
|
||||
if (@available(macOS 11.0, *)) {
|
||||
if (use_binary_archive) {
|
||||
if (!archive) {
|
||||
MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc] init];
|
||||
archiveDesc.url = nil;
|
||||
archive = [device->mtlDevice newBinaryArchiveWithDescriptor:archiveDesc error:nil];
|
||||
creating_new_archive = true;
|
||||
|
||||
double starttime = time_dt();
|
||||
|
||||
if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
|
||||
error:&error]) {
|
||||
NSString *errStr = [error localizedDescription];
|
||||
metal_printf("Failed to add PSO to archive:\n%s\n",
|
||||
errStr ? [errStr UTF8String] : "nil");
|
||||
}
|
||||
else {
|
||||
double duration = time_dt() - starttime;
|
||||
metal_printf("%2d | %-55s | %7.2fs\n",
|
||||
desc.kernel_index,
|
||||
device_kernel_as_string((DeviceKernel)desc.kernel_index),
|
||||
duration);
|
||||
|
||||
if (desc.pso_index == PSO_GENERIC) {
|
||||
this->load_duration = duration;
|
||||
}
|
||||
}
|
||||
}
|
||||
computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
|
||||
pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
|
||||
}
|
||||
}
|
||||
|
||||
double starttime = time_dt();
|
||||
|
||||
MTLNewComputePipelineStateWithReflectionCompletionHandler completionHandler = ^(
|
||||
id<MTLComputePipelineState> computePipelineState,
|
||||
MTLComputePipelineReflection *reflection,
|
||||
NSError *error) {
|
||||
bool recreate_archive = false;
|
||||
if (computePipelineState == nil && archive && !creating_new_archive) {
|
||||
|
||||
assert(0);
|
||||
|
||||
NSString *errStr = [error localizedDescription];
|
||||
metal_printf(
|
||||
"Failed to create compute pipeline state \"%s\" from archive - attempting recreation... "
|
||||
"(error: %s)\n",
|
||||
device_kernel_as_string((DeviceKernel)desc.kernel_index),
|
||||
errStr ? [errStr UTF8String] : "nil");
|
||||
computePipelineState = [device->mtlDevice
|
||||
newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
|
||||
options:MTLPipelineOptionNone
|
||||
reflection:nullptr
|
||||
error:&error];
|
||||
recreate_archive = true;
|
||||
}
|
||||
|
||||
double duration = time_dt() - starttime;
|
||||
|
||||
if (computePipelineState == nil) {
|
||||
NSString *errStr = [error localizedDescription];
|
||||
device->set_error(string_printf("Failed to create compute pipeline state \"%s\", error: \n",
|
||||
device_kernel_as_string((DeviceKernel)desc.kernel_index)) +
|
||||
(errStr ? [errStr UTF8String] : "nil"));
|
||||
metal_printf("%2d | %-55s | %7.2fs | FAILED!\n",
|
||||
desc.kernel_index,
|
||||
device_kernel_as_string((DeviceKernel)desc.kernel_index),
|
||||
duration);
|
||||
return;
|
||||
}
|
||||
|
||||
pso[desc.pso_index].pipeline = computePipelineState;
|
||||
num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup,
|
||||
computePipelineState.threadExecutionWidth);
|
||||
num_threads_per_block = std::max(num_threads_per_block,
|
||||
(int)computePipelineState.threadExecutionWidth);
|
||||
|
||||
if (!use_binary_archive) {
|
||||
metal_printf("%2d | %-55s | %7.2fs\n",
|
||||
desc.kernel_index,
|
||||
device_kernel_as_string((DeviceKernel)desc.kernel_index),
|
||||
duration);
|
||||
|
||||
if (desc.pso_index == PSO_GENERIC) {
|
||||
this->load_duration = duration;
|
||||
}
|
||||
}
|
||||
|
||||
if (@available(macOS 11.0, *)) {
|
||||
if (creating_new_archive || recreate_archive) {
|
||||
if (![archive serializeToURL:[NSURL fileURLWithPath:@(metalbin_path.c_str())]
|
||||
error:&error]) {
|
||||
metal_printf("Failed to save binary archive, error:\n%s\n",
|
||||
[[error localizedDescription] UTF8String]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[computePipelineStateDescriptor release];
|
||||
computePipelineStateDescriptor = nil;
|
||||
|
||||
if (device->use_metalrt && desc.linked_functions) {
|
||||
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
|
||||
if (@available(macOS 11.0, *)) {
|
||||
MTLIntersectionFunctionTableDescriptor *ift_desc =
|
||||
[[MTLIntersectionFunctionTableDescriptor alloc] init];
|
||||
ift_desc.functionCount = desc.intersector_functions[table].count;
|
||||
|
||||
pso[desc.pso_index].intersection_func_table[table] = [pso[desc.pso_index].pipeline
|
||||
newIntersectionFunctionTableWithDescriptor:ift_desc];
|
||||
|
||||
/* Finally write the function handles into this pipeline's table */
|
||||
for (int i = 0; i < 2; i++) {
|
||||
id<MTLFunctionHandle> handle = [pso[desc.pso_index].pipeline
|
||||
functionHandleWithFunction:desc.intersector_functions[table][i]];
|
||||
[pso[desc.pso_index].intersection_func_table[table] setFunction:handle atIndex:i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mark_loaded(desc.pso_index);
|
||||
};
|
||||
|
||||
if (desc.pso_index == PSO_SPECIALISED) {
|
||||
/* Asynchronous load */
|
||||
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
||||
NSError *error;
|
||||
id<MTLComputePipelineState> pipeline = [device->mtlDevice
|
||||
newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
|
||||
options:pipelineOptions
|
||||
reflection:nullptr
|
||||
error:&error];
|
||||
completionHandler(pipeline, nullptr, error);
|
||||
});
|
||||
}
|
||||
else {
|
||||
/* Block on load to ensure we continue with a valid kernel function */
|
||||
id<MTLComputePipelineState> pipeline = [device->mtlDevice
|
||||
newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
|
||||
options:pipelineOptions
|
||||
reflection:nullptr
|
||||
error:&error];
|
||||
completionHandler(pipeline, nullptr, error);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const MetalKernelPipeline &MetalDeviceKernel::get_pso() const
|
||||
{
|
||||
if (pso[PSO_SPECIALISED].loaded) {
|
||||
return pso[PSO_SPECIALISED];
|
||||
}
|
||||
|
||||
assert(pso[PSO_GENERIC].loaded);
|
||||
return pso[PSO_GENERIC];
|
||||
}
|
||||
|
||||
bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type)
|
||||
{
|
||||
bool any_error = false;
|
||||
|
||||
MD5Hash md5;
|
||||
|
||||
/* Build the function constant table */
|
||||
MTLFunctionConstantValues *constant_values = nullptr;
|
||||
if (kernel_type == PSO_SPECIALISED) {
|
||||
constant_values = [MTLFunctionConstantValues new];
|
||||
|
||||
# define KERNEL_FILM(_type, name) \
|
||||
[constant_values setConstantValue:&data.film.name \
|
||||
type:get_MTLDataType_##_type() \
|
||||
atIndex:KernelData_film_##name]; \
|
||||
md5.append((uint8_t *)&data.film.name, sizeof(data.film.name));
|
||||
|
||||
# define KERNEL_BACKGROUND(_type, name) \
|
||||
[constant_values setConstantValue:&data.background.name \
|
||||
type:get_MTLDataType_##_type() \
|
||||
atIndex:KernelData_background_##name]; \
|
||||
md5.append((uint8_t *)&data.background.name, sizeof(data.background.name));
|
||||
|
||||
# define KERNEL_INTEGRATOR(_type, name) \
|
||||
[constant_values setConstantValue:&data.integrator.name \
|
||||
type:get_MTLDataType_##_type() \
|
||||
atIndex:KernelData_integrator_##name]; \
|
||||
md5.append((uint8_t *)&data.integrator.name, sizeof(data.integrator.name));
|
||||
|
||||
# define KERNEL_BVH(_type, name) \
|
||||
[constant_values setConstantValue:&data.bvh.name \
|
||||
type:get_MTLDataType_##_type() \
|
||||
atIndex:KernelData_bvh_##name]; \
|
||||
md5.append((uint8_t *)&data.bvh.name, sizeof(data.bvh.name));
|
||||
|
||||
/* METAL_WIP: populate constant_values based on KernelData */
|
||||
assert(0);
|
||||
/*
|
||||
const KernelData &data = device->launch_params.data;
|
||||
# include "kernel/types/background.h"
|
||||
# include "kernel/types/bvh.h"
|
||||
# include "kernel/types/film.h"
|
||||
# include "kernel/types/integrator.h"
|
||||
*/
|
||||
}
|
||||
|
||||
if (device->use_metalrt) {
|
||||
if (@available(macOS 11.0, *)) {
|
||||
/* create the id<MTLFunction> for each intersection function */
|
||||
const char *function_names[] = {
|
||||
"__anyhit__cycles_metalrt_visibility_test_tri",
|
||||
"__anyhit__cycles_metalrt_visibility_test_box",
|
||||
"__anyhit__cycles_metalrt_shadow_all_hit_tri",
|
||||
"__anyhit__cycles_metalrt_shadow_all_hit_box",
|
||||
"__anyhit__cycles_metalrt_local_hit_tri",
|
||||
"__anyhit__cycles_metalrt_local_hit_box",
|
||||
"__intersection__curve_ribbon",
|
||||
"__intersection__curve_ribbon_shadow",
|
||||
"__intersection__curve_all",
|
||||
"__intersection__curve_all_shadow",
|
||||
};
|
||||
assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM);
|
||||
|
||||
MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
|
||||
if (kernel_type == PSO_SPECIALISED) {
|
||||
desc.constantValues = constant_values;
|
||||
}
|
||||
for (int i = 0; i < METALRT_FUNC_NUM; i++) {
|
||||
const char *function_name = function_names[i];
|
||||
desc.name = [@(function_name) copy];
|
||||
|
||||
NSError *error = NULL;
|
||||
rt_intersection_funcs[kernel_type][i] = [device->mtlLibrary[kernel_type]
|
||||
newFunctionWithDescriptor:desc
|
||||
error:&error];
|
||||
|
||||
if (rt_intersection_funcs[kernel_type][i] == nil) {
|
||||
NSString *err = [error localizedDescription];
|
||||
string errors = [err UTF8String];
|
||||
|
||||
device->set_error(string_printf(
|
||||
"Error getting intersection function \"%s\": %s", function_name, errors.c_str()));
|
||||
any_error = true;
|
||||
break;
|
||||
}
|
||||
|
||||
rt_intersection_funcs[kernel_type][i].label = [@(function_name) copy];
|
||||
}
|
||||
}
|
||||
}
|
||||
md5.append(device->source_used_for_compile[kernel_type]);
|
||||
|
||||
string hash = md5.get_hex();
|
||||
if (loaded_md5[kernel_type] == hash) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!any_error) {
|
||||
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
|
||||
NSArray *function_list = nil;
|
||||
|
||||
if (device->use_metalrt) {
|
||||
id<MTLFunction> box_intersect_default = nil;
|
||||
id<MTLFunction> box_intersect_shadow = nil;
|
||||
if (device->kernel_features & KERNEL_FEATURE_HAIR) {
|
||||
/* Add curve intersection programs. */
|
||||
if (device->kernel_features & KERNEL_FEATURE_HAIR_THICK) {
|
||||
/* Slower programs for thick hair since that also slows down ribbons.
|
||||
* Ideally this should not be needed. */
|
||||
box_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL];
|
||||
box_intersect_shadow = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL_SHADOW];
|
||||
}
|
||||
else {
|
||||
box_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON];
|
||||
box_intersect_shadow =
|
||||
rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON_SHADOW];
|
||||
}
|
||||
}
|
||||
table_functions[METALRT_TABLE_DEFAULT] = [NSArray
|
||||
arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_TRI],
|
||||
box_intersect_default ?
|
||||
box_intersect_default :
|
||||
rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX],
|
||||
nil];
|
||||
table_functions[METALRT_TABLE_SHADOW] = [NSArray
|
||||
arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_TRI],
|
||||
box_intersect_shadow ?
|
||||
box_intersect_shadow :
|
||||
rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX],
|
||||
nil];
|
||||
table_functions[METALRT_TABLE_LOCAL] = [NSArray
|
||||
arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_TRI],
|
||||
rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX],
|
||||
nil];
|
||||
|
||||
NSMutableSet *unique_functions = [NSMutableSet
|
||||
setWithArray:table_functions[METALRT_TABLE_DEFAULT]];
|
||||
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]];
|
||||
[unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]];
|
||||
|
||||
function_list = [[NSArray arrayWithArray:[unique_functions allObjects]]
|
||||
sortedArrayUsingComparator:^NSComparisonResult(id<MTLFunction> f1, id<MTLFunction> f2) {
|
||||
return [f1.label compare:f2.label];
|
||||
}];
|
||||
|
||||
unique_functions = nil;
|
||||
}
|
||||
|
||||
metal_printf("Starting %s \"cycles_metal_...\" pipeline builds\n",
|
||||
kernel_type_as_string(kernel_type));
|
||||
|
||||
tbb::task_arena local_arena(max_mtlcompiler_threads);
|
||||
local_arena.execute([&]() {
|
||||
tbb::parallel_for(int(0), int(DEVICE_KERNEL_NUM), [&](int i) {
|
||||
/* skip megakernel */
|
||||
if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Only specialise kernels where it can make an impact */
|
||||
if (kernel_type == PSO_SPECIALISED) {
|
||||
if (i < DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
MetalDeviceKernel &kernel = kernels_[i];
|
||||
|
||||
const std::string function_name = std::string("cycles_metal_") +
|
||||
device_kernel_as_string((DeviceKernel)i);
|
||||
int threads_per_threadgroup = device->max_threads_per_threadgroup;
|
||||
if (i > DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL && i < DEVICE_KERNEL_INTEGRATOR_RESET) {
|
||||
/* Always use 512 for the sorting kernels */
|
||||
threads_per_threadgroup = 512;
|
||||
}
|
||||
|
||||
NSArray *kernel_function_list = nil;
|
||||
|
||||
if (i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
|
||||
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
|
||||
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
|
||||
i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
|
||||
kernel_function_list = function_list;
|
||||
}
|
||||
|
||||
MetalKernelLoadDesc desc;
|
||||
desc.pso_index = kernel_type;
|
||||
desc.kernel_index = i;
|
||||
desc.linked_functions = kernel_function_list;
|
||||
desc.intersector_functions.defaults = table_functions[METALRT_TABLE_DEFAULT];
|
||||
desc.intersector_functions.shadow = table_functions[METALRT_TABLE_SHADOW];
|
||||
desc.intersector_functions.local = table_functions[METALRT_TABLE_LOCAL];
|
||||
desc.constant_values = constant_values;
|
||||
desc.threads_per_threadgroup = threads_per_threadgroup;
|
||||
desc.function_name = function_name.c_str();
|
||||
|
||||
bool success = kernel.load(device, desc, md5);
|
||||
|
||||
any_error |= !success;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
bool loaded = !any_error;
|
||||
if (loaded) {
|
||||
loaded_md5[kernel_type] = hash;
|
||||
}
|
||||
return loaded;
|
||||
}
|
||||
|
||||
const MetalDeviceKernel &MetalDeviceKernels::get(DeviceKernel kernel) const
|
||||
{
|
||||
return kernels_[(int)kernel];
|
||||
}
|
||||
|
||||
bool MetalDeviceKernels::available(DeviceKernel kernel) const
|
||||
{
|
||||
return kernels_[(int)kernel].get_pso().function != nil;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL*/
|
||||
97
intern/cycles/device/metal/queue.h
Normal file
97
intern/cycles/device/metal/queue.h
Normal file
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "device/kernel.h"
|
||||
# include "device/memory.h"
|
||||
# include "device/queue.h"
|
||||
|
||||
# include "device/metal/util.h"
|
||||
# include "kernel/device/metal/globals.h"
|
||||
|
||||
# define metal_printf VLOG(4) << string_printf
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class MetalDevice;
|
||||
|
||||
/* Base class for Metal queues. */
|
||||
class MetalDeviceQueue : public DeviceQueue {
|
||||
public:
|
||||
MetalDeviceQueue(MetalDevice *device);
|
||||
~MetalDeviceQueue();
|
||||
|
||||
virtual int num_concurrent_states(const size_t) const override;
|
||||
virtual int num_concurrent_busy_states() const override;
|
||||
|
||||
virtual void init_execution() override;
|
||||
|
||||
virtual bool enqueue(DeviceKernel kernel,
|
||||
const int work_size,
|
||||
DeviceKernelArguments const &args) override;
|
||||
|
||||
virtual bool synchronize() override;
|
||||
|
||||
virtual void zero_to_device(device_memory &mem) override;
|
||||
virtual void copy_to_device(device_memory &mem) override;
|
||||
virtual void copy_from_device(device_memory &mem) override;
|
||||
|
||||
virtual bool kernel_available(DeviceKernel kernel) const override;
|
||||
|
||||
protected:
|
||||
void prepare_resources(DeviceKernel kernel);
|
||||
|
||||
id<MTLComputeCommandEncoder> get_compute_encoder(DeviceKernel kernel);
|
||||
id<MTLBlitCommandEncoder> get_blit_encoder();
|
||||
|
||||
MetalDevice *metal_device;
|
||||
MetalBufferPool temp_buffer_pool;
|
||||
|
||||
API_AVAILABLE(macos(11.0), ios(14.0))
|
||||
MTLCommandBufferDescriptor *command_buffer_desc = nullptr;
|
||||
id<MTLDevice> mtlDevice = nil;
|
||||
id<MTLCommandQueue> mtlCommandQueue = nil;
|
||||
id<MTLCommandBuffer> mtlCommandBuffer = nil;
|
||||
id<MTLComputeCommandEncoder> mtlComputeEncoder = nil;
|
||||
id<MTLBlitCommandEncoder> mtlBlitEncoder = nil;
|
||||
id<MTLSharedEvent> shared_event = nil;
|
||||
MTLSharedEventListener *shared_event_listener = nil;
|
||||
|
||||
dispatch_queue_t event_queue;
|
||||
dispatch_semaphore_t wait_semaphore;
|
||||
|
||||
struct CopyBack {
|
||||
void *host_pointer;
|
||||
void *gpu_mem;
|
||||
uint64_t size;
|
||||
};
|
||||
std::vector<CopyBack> copy_back_mem;
|
||||
|
||||
uint64_t shared_event_id;
|
||||
uint64_t command_buffers_submitted = 0;
|
||||
uint64_t command_buffers_completed = 0;
|
||||
Stats &stats;
|
||||
|
||||
void close_compute_encoder();
|
||||
void close_blit_encoder();
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
||||
602
intern/cycles/device/metal/queue.mm
Normal file
602
intern/cycles/device/metal/queue.mm
Normal file
@@ -0,0 +1,602 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "device/metal/queue.h"
|
||||
|
||||
# include "device/metal/device_impl.h"
|
||||
# include "device/metal/kernel.h"
|
||||
|
||||
# include "util/path.h"
|
||||
# include "util/string.h"
|
||||
# include "util/time.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* MetalDeviceQueue */
|
||||
|
||||
MetalDeviceQueue::MetalDeviceQueue(MetalDevice *device)
|
||||
: DeviceQueue(device), metal_device(device), stats(device->stats)
|
||||
{
|
||||
if (@available(macos 11.0, *)) {
|
||||
command_buffer_desc = [[MTLCommandBufferDescriptor alloc] init];
|
||||
command_buffer_desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
|
||||
}
|
||||
|
||||
mtlDevice = device->mtlDevice;
|
||||
mtlCommandQueue = [mtlDevice newCommandQueue];
|
||||
|
||||
shared_event = [mtlDevice newSharedEvent];
|
||||
shared_event_id = 1;
|
||||
|
||||
/* Shareable event listener */
|
||||
event_queue = dispatch_queue_create("com.cycles.metal.event_queue", NULL);
|
||||
shared_event_listener = [[MTLSharedEventListener alloc] initWithDispatchQueue:event_queue];
|
||||
|
||||
wait_semaphore = dispatch_semaphore_create(0);
|
||||
}
|
||||
|
||||
MetalDeviceQueue::~MetalDeviceQueue()
|
||||
{
|
||||
/* Tidying up here isn't really practical - we should expect and require the work
|
||||
* queue to be empty here. */
|
||||
assert(mtlCommandBuffer == nil);
|
||||
assert(command_buffers_submitted == command_buffers_completed);
|
||||
|
||||
[shared_event_listener release];
|
||||
[shared_event release];
|
||||
|
||||
if (@available(macos 11.0, *)) {
|
||||
[command_buffer_desc release];
|
||||
}
|
||||
if (mtlCommandQueue) {
|
||||
[mtlCommandQueue release];
|
||||
mtlCommandQueue = nil;
|
||||
}
|
||||
}
|
||||
|
||||
int MetalDeviceQueue::num_concurrent_states(const size_t /*state_size*/) const
|
||||
{
|
||||
/* METAL_WIP */
|
||||
/* TODO: compute automatically. */
|
||||
/* TODO: must have at least num_threads_per_block. */
|
||||
int result = 1048576;
|
||||
if (metal_device->device_vendor == METAL_GPU_AMD) {
|
||||
result *= 2;
|
||||
}
|
||||
else if (metal_device->device_vendor == METAL_GPU_APPLE) {
|
||||
result *= 4;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int MetalDeviceQueue::num_concurrent_busy_states() const
|
||||
{
|
||||
/* METAL_WIP */
|
||||
/* TODO: compute automatically. */
|
||||
int result = 65536;
|
||||
if (metal_device->device_vendor == METAL_GPU_AMD) {
|
||||
result *= 2;
|
||||
}
|
||||
else if (metal_device->device_vendor == METAL_GPU_APPLE) {
|
||||
result *= 4;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::init_execution()
|
||||
{
|
||||
/* Synchronize all textures and memory copies before executing task. */
|
||||
metal_device->load_texture_info();
|
||||
|
||||
synchronize();
|
||||
}
|
||||
|
||||
bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
const int work_size,
|
||||
DeviceKernelArguments const &args)
|
||||
{
|
||||
if (metal_device->have_error()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
VLOG(3) << "Metal queue launch " << device_kernel_as_string(kernel) << ", work_size "
|
||||
<< work_size;
|
||||
|
||||
const MetalDeviceKernel &metal_kernel = metal_device->kernels.get(kernel);
|
||||
const MetalKernelPipeline &metal_kernel_pso = metal_kernel.get_pso();
|
||||
|
||||
id<MTLComputeCommandEncoder> mtlComputeCommandEncoder = get_compute_encoder(kernel);
|
||||
|
||||
/* Determine size requirement for argument buffer. */
|
||||
size_t arg_buffer_length = 0;
|
||||
for (size_t i = 0; i < args.count; i++) {
|
||||
size_t size_in_bytes = args.sizes[i];
|
||||
arg_buffer_length = round_up(arg_buffer_length, size_in_bytes) + size_in_bytes;
|
||||
}
|
||||
/* 256 is the Metal offset alignment for constant address space bindings */
|
||||
arg_buffer_length = round_up(arg_buffer_length, 256);
|
||||
|
||||
/* Globals placed after "vanilla" arguments. */
|
||||
size_t globals_offsets = arg_buffer_length;
|
||||
arg_buffer_length += sizeof(KernelParamsMetal);
|
||||
arg_buffer_length = round_up(arg_buffer_length, 256);
|
||||
|
||||
/* Metal ancilliary bindless pointers */
|
||||
size_t metal_offsets = arg_buffer_length;
|
||||
arg_buffer_length += metal_device->mtlAncillaryArgEncoder.encodedLength;
|
||||
arg_buffer_length = round_up(arg_buffer_length, metal_device->mtlAncillaryArgEncoder.alignment);
|
||||
|
||||
/* Temporary buffer used to prepare arg_buffer */
|
||||
uint8_t *init_arg_buffer = (uint8_t *)alloca(arg_buffer_length);
|
||||
memset(init_arg_buffer, 0, arg_buffer_length);
|
||||
|
||||
/* Prepare the non-pointer "enqueue" arguments */
|
||||
size_t bytes_written = 0;
|
||||
for (size_t i = 0; i < args.count; i++) {
|
||||
size_t size_in_bytes = args.sizes[i];
|
||||
bytes_written = round_up(bytes_written, size_in_bytes);
|
||||
if (args.types[i] != DeviceKernelArguments::POINTER) {
|
||||
memcpy(init_arg_buffer + bytes_written, args.values[i], size_in_bytes);
|
||||
}
|
||||
bytes_written += size_in_bytes;
|
||||
}
|
||||
|
||||
/* Prepare any non-pointer (i.e. plain-old-data) KernelParamsMetal data */
|
||||
/* The plain-old-data is contiguous, continuing to the end of KernelParamsMetal */
|
||||
size_t plain_old_launch_data_offset = offsetof(KernelParamsMetal, __integrator_state) +
|
||||
sizeof(IntegratorStateGPU);
|
||||
size_t plain_old_launch_data_size = sizeof(KernelParamsMetal) - plain_old_launch_data_offset;
|
||||
memcpy(init_arg_buffer + globals_offsets + plain_old_launch_data_offset,
|
||||
(uint8_t *)&metal_device->launch_params + plain_old_launch_data_offset,
|
||||
plain_old_launch_data_size);
|
||||
|
||||
/* Allocate an argument buffer. */
|
||||
MTLResourceOptions arg_buffer_options = MTLResourceStorageModeManaged;
|
||||
if (@available(macOS 11.0, *)) {
|
||||
if ([mtlDevice hasUnifiedMemory]) {
|
||||
arg_buffer_options = MTLResourceStorageModeShared;
|
||||
}
|
||||
}
|
||||
|
||||
id<MTLBuffer> arg_buffer = temp_buffer_pool.get_buffer(
|
||||
mtlDevice, mtlCommandBuffer, arg_buffer_length, arg_buffer_options, init_arg_buffer, stats);
|
||||
|
||||
/* Encode the pointer "enqueue" arguments */
|
||||
bytes_written = 0;
|
||||
for (size_t i = 0; i < args.count; i++) {
|
||||
size_t size_in_bytes = args.sizes[i];
|
||||
bytes_written = round_up(bytes_written, size_in_bytes);
|
||||
if (args.types[i] == DeviceKernelArguments::POINTER) {
|
||||
[metal_device->mtlBufferKernelParamsEncoder setArgumentBuffer:arg_buffer
|
||||
offset:bytes_written];
|
||||
if (MetalDevice::MetalMem *mmem = *(MetalDevice::MetalMem **)args.values[i]) {
|
||||
[mtlComputeCommandEncoder useResource:mmem->mtlBuffer
|
||||
usage:MTLResourceUsageRead | MTLResourceUsageWrite];
|
||||
[metal_device->mtlBufferKernelParamsEncoder setBuffer:mmem->mtlBuffer offset:0 atIndex:0];
|
||||
}
|
||||
else {
|
||||
if (@available(macos 12.0, *)) {
|
||||
[metal_device->mtlBufferKernelParamsEncoder setBuffer:nil offset:0 atIndex:0];
|
||||
}
|
||||
}
|
||||
}
|
||||
bytes_written += size_in_bytes;
|
||||
}
|
||||
|
||||
/* Encode KernelParamsMetal buffers */
|
||||
[metal_device->mtlBufferKernelParamsEncoder setArgumentBuffer:arg_buffer offset:globals_offsets];
|
||||
|
||||
/* this relies on IntegratorStateGPU layout being contiguous device_ptrs */
|
||||
const size_t pointer_block_end = offsetof(KernelParamsMetal, __integrator_state) +
|
||||
sizeof(IntegratorStateGPU);
|
||||
for (size_t offset = 0; offset < pointer_block_end; offset += sizeof(device_ptr)) {
|
||||
int pointer_index = offset / sizeof(device_ptr);
|
||||
MetalDevice::MetalMem *mmem = *(
|
||||
MetalDevice::MetalMem **)((uint8_t *)&metal_device->launch_params + offset);
|
||||
if (mmem && (mmem->mtlBuffer || mmem->mtlTexture)) {
|
||||
[metal_device->mtlBufferKernelParamsEncoder setBuffer:mmem->mtlBuffer
|
||||
offset:0
|
||||
atIndex:pointer_index];
|
||||
}
|
||||
else {
|
||||
if (@available(macos 12.0, *)) {
|
||||
[metal_device->mtlBufferKernelParamsEncoder setBuffer:nil offset:0 atIndex:pointer_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
bytes_written = globals_offsets + sizeof(KernelParamsMetal);
|
||||
|
||||
/* Encode ancillaries */
|
||||
[metal_device->mtlAncillaryArgEncoder setArgumentBuffer:arg_buffer offset:metal_offsets];
|
||||
[metal_device->mtlAncillaryArgEncoder setBuffer:metal_device->texture_bindings_2d
|
||||
offset:0
|
||||
atIndex:0];
|
||||
[metal_device->mtlAncillaryArgEncoder setBuffer:metal_device->texture_bindings_3d
|
||||
offset:0
|
||||
atIndex:1];
|
||||
if (@available(macos 12.0, *)) {
|
||||
if (metal_device->use_metalrt) {
|
||||
if (metal_device->bvhMetalRT) {
|
||||
id<MTLAccelerationStructure> accel_struct = metal_device->bvhMetalRT->accel_struct;
|
||||
[metal_device->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:2];
|
||||
}
|
||||
|
||||
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
|
||||
if (metal_kernel_pso.intersection_func_table[table]) {
|
||||
[metal_kernel_pso.intersection_func_table[table] setBuffer:arg_buffer
|
||||
offset:globals_offsets
|
||||
atIndex:1];
|
||||
[metal_device->mtlAncillaryArgEncoder
|
||||
setIntersectionFunctionTable:metal_kernel_pso.intersection_func_table[table]
|
||||
atIndex:3 + table];
|
||||
[mtlComputeCommandEncoder useResource:metal_kernel_pso.intersection_func_table[table]
|
||||
usage:MTLResourceUsageRead];
|
||||
}
|
||||
else {
|
||||
[metal_device->mtlAncillaryArgEncoder setIntersectionFunctionTable:nil
|
||||
atIndex:3 + table];
|
||||
}
|
||||
}
|
||||
}
|
||||
bytes_written = metal_offsets + metal_device->mtlAncillaryArgEncoder.encodedLength;
|
||||
}
|
||||
|
||||
if (arg_buffer.storageMode == MTLStorageModeManaged) {
|
||||
[arg_buffer didModifyRange:NSMakeRange(0, bytes_written)];
|
||||
}
|
||||
|
||||
[mtlComputeCommandEncoder setBuffer:arg_buffer offset:0 atIndex:0];
|
||||
[mtlComputeCommandEncoder setBuffer:arg_buffer offset:globals_offsets atIndex:1];
|
||||
[mtlComputeCommandEncoder setBuffer:arg_buffer offset:metal_offsets atIndex:2];
|
||||
|
||||
if (metal_device->use_metalrt) {
|
||||
if (@available(macos 12.0, *)) {
|
||||
|
||||
auto bvhMetalRT = metal_device->bvhMetalRT;
|
||||
switch (kernel) {
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
|
||||
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
|
||||
break;
|
||||
default:
|
||||
bvhMetalRT = nil;
|
||||
break;
|
||||
}
|
||||
|
||||
if (bvhMetalRT) {
|
||||
/* Mark all Accelerations resources as used */
|
||||
[mtlComputeCommandEncoder useResource:bvhMetalRT->accel_struct usage:MTLResourceUsageRead];
|
||||
[mtlComputeCommandEncoder useResources:bvhMetalRT->blas_array.data()
|
||||
count:bvhMetalRT->blas_array.size()
|
||||
usage:MTLResourceUsageRead];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso.pipeline];
|
||||
|
||||
/* Compute kernel launch parameters. */
|
||||
const int num_threads_per_block = metal_kernel.get_num_threads_per_block();
|
||||
|
||||
int shared_mem_bytes = 0;
|
||||
|
||||
switch (kernel) {
|
||||
case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY:
|
||||
case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY:
|
||||
/* See parallel_active_index.h for why this amount of shared memory is needed.
|
||||
* Rounded up to 16 bytes for Metal */
|
||||
shared_mem_bytes = round_up((num_threads_per_block + 1) * sizeof(int), 16);
|
||||
[mtlComputeCommandEncoder setThreadgroupMemoryLength:shared_mem_bytes atIndex:0];
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
MTLSize size_threadgroups_per_dispatch = MTLSizeMake(
|
||||
divide_up(work_size, num_threads_per_block), 1, 1);
|
||||
MTLSize size_threads_per_threadgroup = MTLSizeMake(num_threads_per_block, 1, 1);
|
||||
[mtlComputeCommandEncoder dispatchThreadgroups:size_threadgroups_per_dispatch
|
||||
threadsPerThreadgroup:size_threads_per_threadgroup];
|
||||
|
||||
[mtlCommandBuffer addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
|
||||
NSString *kernel_name = metal_kernel_pso.function.label;
|
||||
|
||||
/* Enhanced command buffer errors are only available in 11.0+ */
|
||||
if (@available(macos 11.0, *)) {
|
||||
if (command_buffer.status == MTLCommandBufferStatusError && command_buffer.error != nil) {
|
||||
printf("CommandBuffer Failed: %s\n", [kernel_name UTF8String]);
|
||||
NSArray<id<MTLCommandBufferEncoderInfo>> *encoderInfos = [command_buffer.error.userInfo
|
||||
valueForKey:MTLCommandBufferEncoderInfoErrorKey];
|
||||
if (encoderInfos != nil) {
|
||||
for (id<MTLCommandBufferEncoderInfo> encoderInfo : encoderInfos) {
|
||||
NSLog(@"%@", encoderInfo);
|
||||
}
|
||||
}
|
||||
id<MTLLogContainer> logs = command_buffer.logs;
|
||||
for (id<MTLFunctionLog> log in logs) {
|
||||
NSLog(@"%@", log);
|
||||
}
|
||||
}
|
||||
else if (command_buffer.error) {
|
||||
printf("CommandBuffer Failed: %s\n", [kernel_name UTF8String]);
|
||||
}
|
||||
}
|
||||
}];
|
||||
|
||||
return !(metal_device->have_error());
|
||||
}
|
||||
|
||||
bool MetalDeviceQueue::synchronize()
|
||||
{
|
||||
if (metal_device->have_error()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mtlComputeEncoder) {
|
||||
close_compute_encoder();
|
||||
}
|
||||
close_blit_encoder();
|
||||
|
||||
if (mtlCommandBuffer) {
|
||||
uint64_t shared_event_id = this->shared_event_id++;
|
||||
|
||||
__block dispatch_semaphore_t block_sema = wait_semaphore;
|
||||
[shared_event notifyListener:shared_event_listener
|
||||
atValue:shared_event_id
|
||||
block:^(id<MTLSharedEvent> sharedEvent, uint64_t value) {
|
||||
dispatch_semaphore_signal(block_sema);
|
||||
}];
|
||||
|
||||
[mtlCommandBuffer encodeSignalEvent:shared_event value:shared_event_id];
|
||||
[mtlCommandBuffer commit];
|
||||
dispatch_semaphore_wait(wait_semaphore, DISPATCH_TIME_FOREVER);
|
||||
|
||||
[mtlCommandBuffer release];
|
||||
|
||||
for (const CopyBack &mmem : copy_back_mem) {
|
||||
memcpy((uchar *)mmem.host_pointer, (uchar *)mmem.gpu_mem, mmem.size);
|
||||
}
|
||||
copy_back_mem.clear();
|
||||
|
||||
temp_buffer_pool.process_command_buffer_completion(mtlCommandBuffer);
|
||||
metal_device->flush_delayed_free_list();
|
||||
|
||||
mtlCommandBuffer = nil;
|
||||
}
|
||||
|
||||
return !(metal_device->have_error());
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::zero_to_device(device_memory &mem)
|
||||
{
|
||||
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
|
||||
|
||||
if (mem.memory_size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Allocate on demand. */
|
||||
if (mem.device_pointer == 0) {
|
||||
metal_device->mem_alloc(mem);
|
||||
}
|
||||
|
||||
/* Zero memory on device. */
|
||||
assert(mem.device_pointer != 0);
|
||||
|
||||
std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
|
||||
MetalDevice::MetalMem &mmem = *metal_device->metal_mem_map.at(&mem);
|
||||
if (mmem.mtlBuffer) {
|
||||
id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
|
||||
[blitEncoder fillBuffer:mmem.mtlBuffer range:NSMakeRange(mmem.offset, mmem.size) value:0];
|
||||
}
|
||||
else {
|
||||
metal_device->mem_zero(mem);
|
||||
}
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::copy_to_device(device_memory &mem)
|
||||
{
|
||||
if (mem.memory_size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Allocate on demand. */
|
||||
if (mem.device_pointer == 0) {
|
||||
metal_device->mem_alloc(mem);
|
||||
}
|
||||
|
||||
assert(mem.device_pointer != 0);
|
||||
assert(mem.host_pointer != nullptr);
|
||||
|
||||
std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
|
||||
auto result = metal_device->metal_mem_map.find(&mem);
|
||||
if (result != metal_device->metal_mem_map.end()) {
|
||||
if (mem.host_pointer == mem.shared_pointer) {
|
||||
return;
|
||||
}
|
||||
|
||||
MetalDevice::MetalMem &mmem = *result->second;
|
||||
id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
|
||||
|
||||
id<MTLBuffer> buffer = temp_buffer_pool.get_buffer(mtlDevice,
|
||||
mtlCommandBuffer,
|
||||
mmem.size,
|
||||
MTLResourceStorageModeShared,
|
||||
mem.host_pointer,
|
||||
stats);
|
||||
|
||||
[blitEncoder copyFromBuffer:buffer
|
||||
sourceOffset:0
|
||||
toBuffer:mmem.mtlBuffer
|
||||
destinationOffset:mmem.offset
|
||||
size:mmem.size];
|
||||
}
|
||||
else {
|
||||
metal_device->mem_copy_to(mem);
|
||||
}
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::copy_from_device(device_memory &mem)
|
||||
{
|
||||
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
|
||||
|
||||
if (mem.memory_size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
assert(mem.device_pointer != 0);
|
||||
assert(mem.host_pointer != nullptr);
|
||||
|
||||
std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
|
||||
MetalDevice::MetalMem &mmem = *metal_device->metal_mem_map.at(&mem);
|
||||
if (mmem.mtlBuffer) {
|
||||
const size_t size = mem.memory_size();
|
||||
|
||||
if (mem.device_pointer) {
|
||||
if ([mmem.mtlBuffer storageMode] == MTLStorageModeManaged) {
|
||||
id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
|
||||
[blitEncoder synchronizeResource:mmem.mtlBuffer];
|
||||
}
|
||||
if (mem.host_pointer != mmem.hostPtr) {
|
||||
if (mtlCommandBuffer) {
|
||||
copy_back_mem.push_back({mem.host_pointer, mmem.hostPtr, size});
|
||||
}
|
||||
else {
|
||||
memcpy((uchar *)mem.host_pointer, (uchar *)mmem.hostPtr, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
memset((char *)mem.host_pointer, 0, size);
|
||||
}
|
||||
}
|
||||
else {
|
||||
metal_device->mem_copy_from(mem);
|
||||
}
|
||||
}
|
||||
|
||||
bool MetalDeviceQueue::kernel_available(DeviceKernel kernel) const
|
||||
{
|
||||
return metal_device->kernels.available(kernel);
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::prepare_resources(DeviceKernel kernel)
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(metal_device->metal_mem_map_mutex);
|
||||
|
||||
/* declare resource usage */
|
||||
for (auto &it : metal_device->metal_mem_map) {
|
||||
device_memory *mem = it.first;
|
||||
|
||||
MTLResourceUsage usage = MTLResourceUsageRead;
|
||||
if (mem->type != MEM_GLOBAL && mem->type != MEM_READ_ONLY && mem->type != MEM_TEXTURE) {
|
||||
usage |= MTLResourceUsageWrite;
|
||||
}
|
||||
|
||||
if (it.second->mtlBuffer) {
|
||||
/* METAL_WIP - use array version (i.e. useResources) */
|
||||
[mtlComputeEncoder useResource:it.second->mtlBuffer usage:usage];
|
||||
}
|
||||
else if (it.second->mtlTexture) {
|
||||
/* METAL_WIP - use array version (i.e. useResources) */
|
||||
[mtlComputeEncoder useResource:it.second->mtlTexture usage:usage | MTLResourceUsageSample];
|
||||
}
|
||||
}
|
||||
|
||||
/* ancillaries */
|
||||
[mtlComputeEncoder useResource:metal_device->texture_bindings_2d usage:MTLResourceUsageRead];
|
||||
[mtlComputeEncoder useResource:metal_device->texture_bindings_3d usage:MTLResourceUsageRead];
|
||||
}
|
||||
|
||||
id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel kernel)
|
||||
{
|
||||
bool concurrent = (kernel < DEVICE_KERNEL_INTEGRATOR_NUM);
|
||||
|
||||
if (mtlComputeEncoder) {
|
||||
if (mtlComputeEncoder.dispatchType == concurrent ? MTLDispatchTypeConcurrent :
|
||||
MTLDispatchTypeSerial) {
|
||||
/* declare usage of MTLBuffers etc */
|
||||
prepare_resources(kernel);
|
||||
|
||||
return mtlComputeEncoder;
|
||||
}
|
||||
close_compute_encoder();
|
||||
}
|
||||
|
||||
close_blit_encoder();
|
||||
|
||||
if (!mtlCommandBuffer) {
|
||||
mtlCommandBuffer = [mtlCommandQueue commandBuffer];
|
||||
[mtlCommandBuffer retain];
|
||||
}
|
||||
|
||||
mtlComputeEncoder = [mtlCommandBuffer
|
||||
computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent :
|
||||
MTLDispatchTypeSerial];
|
||||
|
||||
/* declare usage of MTLBuffers etc */
|
||||
prepare_resources(kernel);
|
||||
|
||||
return mtlComputeEncoder;
|
||||
}
|
||||
|
||||
id<MTLBlitCommandEncoder> MetalDeviceQueue::get_blit_encoder()
|
||||
{
|
||||
if (mtlBlitEncoder) {
|
||||
return mtlBlitEncoder;
|
||||
}
|
||||
|
||||
if (mtlComputeEncoder) {
|
||||
close_compute_encoder();
|
||||
}
|
||||
|
||||
if (!mtlCommandBuffer) {
|
||||
mtlCommandBuffer = [mtlCommandQueue commandBuffer];
|
||||
[mtlCommandBuffer retain];
|
||||
}
|
||||
|
||||
mtlBlitEncoder = [mtlCommandBuffer blitCommandEncoder];
|
||||
return mtlBlitEncoder;
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::close_compute_encoder()
|
||||
{
|
||||
[mtlComputeEncoder endEncoding];
|
||||
mtlComputeEncoder = nil;
|
||||
}
|
||||
|
||||
void MetalDeviceQueue::close_blit_encoder()
|
||||
{
|
||||
if (mtlBlitEncoder) {
|
||||
[mtlBlitEncoder endEncoding];
|
||||
mtlBlitEncoder = nil;
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
||||
101
intern/cycles/device/metal/util.h
Normal file
101
intern/cycles/device/metal/util.h
Normal file
@@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include <Metal/Metal.h>
|
||||
# include <string>
|
||||
|
||||
# include "device/metal/device.h"
|
||||
# include "device/metal/kernel.h"
|
||||
# include "device/queue.h"
|
||||
|
||||
# include "util/thread.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
enum MetalGPUVendor {
|
||||
METAL_GPU_UNKNOWN = 0,
|
||||
METAL_GPU_APPLE = 1,
|
||||
METAL_GPU_AMD = 2,
|
||||
METAL_GPU_INTEL = 3,
|
||||
};
|
||||
|
||||
/* Retains a named MTLDevice for device enumeration. */
|
||||
struct MetalPlatformDevice {
|
||||
MetalPlatformDevice(id<MTLDevice> device, const string &device_name)
|
||||
: device_id(device), device_name(device_name)
|
||||
{
|
||||
[device_id retain];
|
||||
}
|
||||
~MetalPlatformDevice()
|
||||
{
|
||||
[device_id release];
|
||||
}
|
||||
id<MTLDevice> device_id;
|
||||
string device_name;
|
||||
};
|
||||
|
||||
/* Contains static Metal helper functions. */
|
||||
struct MetalInfo {
|
||||
static bool device_version_check(id<MTLDevice> device);
|
||||
static void get_usable_devices(vector<MetalPlatformDevice> *usable_devices);
|
||||
static MetalGPUVendor get_vendor_from_device_name(string const &device_name);
|
||||
|
||||
/* Platform information. */
|
||||
static bool get_num_devices(uint32_t *num_platforms);
|
||||
static uint32_t get_num_devices();
|
||||
|
||||
static bool get_device_name(id<MTLDevice> device_id, string *device_name);
|
||||
static string get_device_name(id<MTLDevice> device_id);
|
||||
};
|
||||
|
||||
/* Pool of MTLBuffers whose lifetime is linked to a single MTLCommandBuffer */
|
||||
class MetalBufferPool {
|
||||
struct MetalBufferListEntry {
|
||||
MetalBufferListEntry(id<MTLBuffer> buffer, id<MTLCommandBuffer> command_buffer)
|
||||
: buffer(buffer), command_buffer(command_buffer)
|
||||
{
|
||||
}
|
||||
|
||||
MetalBufferListEntry() = delete;
|
||||
|
||||
id<MTLBuffer> buffer;
|
||||
id<MTLCommandBuffer> command_buffer;
|
||||
};
|
||||
std::vector<MetalBufferListEntry> buffer_free_list;
|
||||
std::vector<MetalBufferListEntry> buffer_in_use_list;
|
||||
thread_mutex buffer_mutex;
|
||||
size_t total_temp_mem_size = 0;
|
||||
|
||||
public:
|
||||
MetalBufferPool() = default;
|
||||
~MetalBufferPool();
|
||||
|
||||
id<MTLBuffer> get_buffer(id<MTLDevice> device,
|
||||
id<MTLCommandBuffer> command_buffer,
|
||||
NSUInteger length,
|
||||
MTLResourceOptions options,
|
||||
const void *pointer,
|
||||
Stats &stats);
|
||||
void process_command_buffer_completion(id<MTLCommandBuffer> command_buffer);
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
||||
241
intern/cycles/device/metal/util.mm
Normal file
241
intern/cycles/device/metal/util.mm
Normal file
@@ -0,0 +1,241 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef WITH_METAL
|
||||
|
||||
# include "device/metal/util.h"
|
||||
# include "device/metal/device_impl.h"
|
||||
# include "util/md5.h"
|
||||
# include "util/path.h"
|
||||
# include "util/string.h"
|
||||
# include "util/time.h"
|
||||
|
||||
# include <pwd.h>
|
||||
# include <sys/shm.h>
|
||||
# include <time.h>
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
MetalGPUVendor MetalInfo::get_vendor_from_device_name(string const &device_name)
|
||||
{
|
||||
if (device_name.find("Intel") != string::npos) {
|
||||
return METAL_GPU_INTEL;
|
||||
}
|
||||
else if (device_name.find("AMD") != string::npos) {
|
||||
return METAL_GPU_AMD;
|
||||
}
|
||||
else if (device_name.find("Apple") != string::npos) {
|
||||
return METAL_GPU_APPLE;
|
||||
}
|
||||
return METAL_GPU_UNKNOWN;
|
||||
}
|
||||
|
||||
bool MetalInfo::device_version_check(id<MTLDevice> device)
|
||||
{
|
||||
if (@available(macos 12.0, *)) {
|
||||
MetalGPUVendor vendor = get_vendor_from_device_name([[device name] UTF8String]);
|
||||
|
||||
static const char *forceIntelStr = getenv("CYCLES_METAL_FORCE_INTEL");
|
||||
bool forceIntel = forceIntelStr ? (atoi(forceIntelStr) != 0) : false;
|
||||
|
||||
if (forceIntel) {
|
||||
/* return false for non-Intel GPUs to force selection of Intel */
|
||||
if (vendor == METAL_GPU_INTEL) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch (vendor) {
|
||||
case METAL_GPU_INTEL:
|
||||
/* isLowPower only returns true on machines that have an AMD GPU also
|
||||
* For Intel only machines - isLowPower will return false
|
||||
*/
|
||||
if (getenv("CYCLES_METAL_ALLOW_LOW_POWER_GPUS") || !device.isLowPower) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
case METAL_GPU_APPLE:
|
||||
case METAL_GPU_AMD:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void MetalInfo::get_usable_devices(vector<MetalPlatformDevice> *usable_devices)
|
||||
{
|
||||
static bool first_time = true;
|
||||
# define FIRST_VLOG(severity) \
|
||||
if (first_time) \
|
||||
VLOG(severity)
|
||||
|
||||
usable_devices->clear();
|
||||
|
||||
NSArray<id<MTLDevice>> *allDevices = MTLCopyAllDevices();
|
||||
for (id<MTLDevice> device in allDevices) {
|
||||
string device_name;
|
||||
if (!get_device_name(device, &device_name)) {
|
||||
FIRST_VLOG(2) << "Failed to get device name, ignoring.";
|
||||
continue;
|
||||
}
|
||||
|
||||
static const char *forceIntelStr = getenv("CYCLES_METAL_FORCE_INTEL");
|
||||
bool forceIntel = forceIntelStr ? (atoi(forceIntelStr) != 0) : false;
|
||||
if (forceIntel && device_name.find("Intel") == string::npos) {
|
||||
FIRST_VLOG(2) << "CYCLES_METAL_FORCE_INTEL causing non-Intel device " << device_name
|
||||
<< " to be ignored.";
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!device_version_check(device)) {
|
||||
FIRST_VLOG(2) << "Ignoring device " << device_name << " due to too old compiler version.";
|
||||
continue;
|
||||
}
|
||||
FIRST_VLOG(2) << "Adding new device " << device_name << ".";
|
||||
string hardware_id;
|
||||
usable_devices->push_back(MetalPlatformDevice(device, device_name));
|
||||
}
|
||||
first_time = false;
|
||||
}
|
||||
|
||||
bool MetalInfo::get_num_devices(uint32_t *num_devices)
|
||||
{
|
||||
*num_devices = MTLCopyAllDevices().count;
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t MetalInfo::get_num_devices()
|
||||
{
|
||||
uint32_t num_devices;
|
||||
if (!get_num_devices(&num_devices)) {
|
||||
return 0;
|
||||
}
|
||||
return num_devices;
|
||||
}
|
||||
|
||||
bool MetalInfo::get_device_name(id<MTLDevice> device, string *platform_name)
|
||||
{
|
||||
*platform_name = [device.name UTF8String];
|
||||
return true;
|
||||
}
|
||||
|
||||
string MetalInfo::get_device_name(id<MTLDevice> device)
|
||||
{
|
||||
string platform_name;
|
||||
if (!get_device_name(device, &platform_name)) {
|
||||
return "";
|
||||
}
|
||||
return platform_name;
|
||||
}
|
||||
|
||||
id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
|
||||
id<MTLCommandBuffer> command_buffer,
|
||||
NSUInteger length,
|
||||
MTLResourceOptions options,
|
||||
const void *pointer,
|
||||
Stats &stats)
|
||||
{
|
||||
id<MTLBuffer> buffer;
|
||||
|
||||
MTLStorageMode storageMode = MTLStorageMode((options & MTLResourceStorageModeMask) >>
|
||||
MTLResourceStorageModeShift);
|
||||
MTLCPUCacheMode cpuCacheMode = MTLCPUCacheMode((options & MTLResourceCPUCacheModeMask) >>
|
||||
MTLResourceCPUCacheModeShift);
|
||||
|
||||
buffer_mutex.lock();
|
||||
for (auto entry = buffer_free_list.begin(); entry != buffer_free_list.end(); entry++) {
|
||||
MetalBufferListEntry bufferEntry = *entry;
|
||||
|
||||
/* Check if buffer matches size and storage mode and is old enough to reuse */
|
||||
if (bufferEntry.buffer.length == length && storageMode == bufferEntry.buffer.storageMode &&
|
||||
cpuCacheMode == bufferEntry.buffer.cpuCacheMode) {
|
||||
buffer = bufferEntry.buffer;
|
||||
buffer_free_list.erase(entry);
|
||||
bufferEntry.command_buffer = command_buffer;
|
||||
buffer_in_use_list.push_back(bufferEntry);
|
||||
buffer_mutex.unlock();
|
||||
|
||||
/* Copy over data */
|
||||
if (pointer) {
|
||||
memcpy(buffer.contents, pointer, length);
|
||||
if (bufferEntry.buffer.storageMode == MTLStorageModeManaged) {
|
||||
[buffer didModifyRange:NSMakeRange(0, length)];
|
||||
}
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
}
|
||||
// NSLog(@"Creating buffer of length %lu (%lu)", length, frameCount);
|
||||
if (pointer) {
|
||||
buffer = [device newBufferWithBytes:pointer length:length options:options];
|
||||
}
|
||||
else {
|
||||
buffer = [device newBufferWithLength:length options:options];
|
||||
}
|
||||
|
||||
MetalBufferListEntry buffer_entry(buffer, command_buffer);
|
||||
|
||||
stats.mem_alloc(buffer.allocatedSize);
|
||||
|
||||
total_temp_mem_size += buffer.allocatedSize;
|
||||
buffer_in_use_list.push_back(buffer_entry);
|
||||
buffer_mutex.unlock();
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void MetalBufferPool::process_command_buffer_completion(id<MTLCommandBuffer> command_buffer)
|
||||
{
|
||||
assert(command_buffer);
|
||||
thread_scoped_lock lock(buffer_mutex);
|
||||
/* Release all buffers that have not been recently reused back into the free pool */
|
||||
for (auto entry = buffer_in_use_list.begin(); entry != buffer_in_use_list.end();) {
|
||||
MetalBufferListEntry buffer_entry = *entry;
|
||||
if (buffer_entry.command_buffer == command_buffer) {
|
||||
entry = buffer_in_use_list.erase(entry);
|
||||
buffer_entry.command_buffer = nil;
|
||||
buffer_free_list.push_back(buffer_entry);
|
||||
}
|
||||
else {
|
||||
entry++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MetalBufferPool::~MetalBufferPool()
|
||||
{
|
||||
thread_scoped_lock lock(buffer_mutex);
|
||||
/* Release all buffers that have not been recently reused */
|
||||
for (auto entry = buffer_free_list.begin(); entry != buffer_free_list.end();) {
|
||||
MetalBufferListEntry buffer_entry = *entry;
|
||||
|
||||
id<MTLBuffer> buffer = buffer_entry.buffer;
|
||||
// NSLog(@"Releasing buffer of length %lu (%lu) (%lu outstanding)", buffer.length, frameCount,
|
||||
// bufferFreeList.size());
|
||||
total_temp_mem_size -= buffer.allocatedSize;
|
||||
[buffer release];
|
||||
entry = buffer_free_list.erase(entry);
|
||||
}
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* WITH_METAL */
|
||||
@@ -182,6 +182,9 @@ class MultiDevice : public Device {
|
||||
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE)
|
||||
params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
|
||||
BVH_LAYOUT_EMBREE;
|
||||
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE)
|
||||
params.bvh_layout = sub.device->info.type == DEVICE_METAL ? BVH_LAYOUT_METAL :
|
||||
BVH_LAYOUT_EMBREE;
|
||||
|
||||
/* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
|
||||
* (since they are put into the top level directly, see bvh_embree.cpp) */
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#include "kernel/device/gpu/parallel_active_index.h"
|
||||
#include "kernel/device/gpu/parallel_prefix_sum.h"
|
||||
#include "kernel/device/gpu/parallel_sorted_index.h"
|
||||
#include "kernel/device/gpu/work_stealing.h"
|
||||
|
||||
#include "kernel/sample/lcg.h"
|
||||
|
||||
@@ -30,6 +29,8 @@
|
||||
# include "kernel/device/metal/context_begin.h"
|
||||
#endif
|
||||
|
||||
#include "kernel/device/gpu/work_stealing.h"
|
||||
|
||||
#include "kernel/integrator/state.h"
|
||||
#include "kernel/integrator/state_flow.h"
|
||||
#include "kernel/integrator/state_util.h"
|
||||
@@ -96,7 +97,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
const int state = tile->path_index_offset + tile_work_index;
|
||||
|
||||
uint x, y, sample;
|
||||
get_work_pixel(tile, tile_work_index, &x, &y, &sample);
|
||||
ccl_gpu_kernel_call(get_work_pixel(tile, tile_work_index, &x, &y, &sample));
|
||||
|
||||
ccl_gpu_kernel_call(
|
||||
integrator_init_from_camera(nullptr, state, tile, render_buffer, x, y, sample));
|
||||
@@ -127,7 +128,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
const int state = tile->path_index_offset + tile_work_index;
|
||||
|
||||
uint x, y, sample;
|
||||
get_work_pixel(tile, tile_work_index, &x, &y, &sample);
|
||||
ccl_gpu_kernel_call(get_work_pixel(tile, tile_work_index, &x, &y, &sample));
|
||||
|
||||
ccl_gpu_kernel_call(
|
||||
integrator_init_from_bake(nullptr, state, tile, render_buffer, x, y, sample));
|
||||
|
||||
@@ -117,7 +117,7 @@ struct kernel_gpu_##name \
|
||||
uint simd_group_index, \
|
||||
uint num_simd_groups) ccl_global const; \
|
||||
}; \
|
||||
kernel void kernel_metal_##name(device const kernel_gpu_##name *params_struct, \
|
||||
kernel void cycles_metal_##name(device const kernel_gpu_##name *params_struct, \
|
||||
constant KernelParamsMetal &ccl_restrict _launch_params_metal, \
|
||||
constant MetalAncillaries *_metal_ancillaries, \
|
||||
threadgroup int *simdgroup_offset[[ threadgroup(0) ]], \
|
||||
|
||||
@@ -126,7 +126,7 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]]
|
||||
TriangleIntersectionResult
|
||||
__anyhit__kernel_metalrt_local_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
__anyhit__cycles_metalrt_local_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
|
||||
uint instance_id [[user_instance_id]],
|
||||
uint primitive_id [[primitive_id]],
|
||||
@@ -139,7 +139,7 @@ __anyhit__kernel_metalrt_local_hit_tri(constant KernelParamsMetal &launch_params
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
|
||||
BoundingBoxIntersectionResult
|
||||
__anyhit__kernel_metalrt_local_hit_box(const float ray_tmax [[max_distance]])
|
||||
__anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]])
|
||||
{
|
||||
/* unused function */
|
||||
BoundingBoxIntersectionResult result;
|
||||
@@ -274,7 +274,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]]
|
||||
TriangleIntersectionResult
|
||||
__anyhit__kernel_metalrt_shadow_all_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
__anyhit__cycles_metalrt_shadow_all_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
|
||||
unsigned int object [[user_instance_id]],
|
||||
unsigned int primitive_id [[primitive_id]],
|
||||
@@ -292,7 +292,7 @@ __anyhit__kernel_metalrt_shadow_all_hit_tri(constant KernelParamsMetal &launch_p
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
|
||||
BoundingBoxIntersectionResult
|
||||
__anyhit__kernel_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]])
|
||||
__anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]])
|
||||
{
|
||||
/* unused function */
|
||||
BoundingBoxIntersectionResult result;
|
||||
@@ -345,7 +345,7 @@ inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_pa
|
||||
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]]
|
||||
TriangleIntersectionResult
|
||||
__anyhit__kernel_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
__anyhit__cycles_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
|
||||
unsigned int object [[user_instance_id]],
|
||||
unsigned int primitive_id [[primitive_id]])
|
||||
@@ -362,7 +362,7 @@ __anyhit__kernel_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
|
||||
BoundingBoxIntersectionResult
|
||||
__anyhit__kernel_metalrt_visibility_test_box(const float ray_tmax [[max_distance]])
|
||||
__anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance]])
|
||||
{
|
||||
/* Unused function */
|
||||
BoundingBoxIntersectionResult result;
|
||||
|
||||
@@ -719,6 +719,20 @@ ccl_device_inline float pow22(float a)
|
||||
return sqr(a * sqr(sqr(sqr(a)) * a));
|
||||
}
|
||||
|
||||
#ifdef __KERNEL_METAL__
|
||||
ccl_device_inline float lgammaf(float x)
|
||||
{
|
||||
/* Nemes, Gergő (2010), "New asymptotic expansion for the Gamma function", Archiv der Mathematik
|
||||
*/
|
||||
const float _1_180 = 1.0f / 180.0f;
|
||||
const float log2pi = 1.83787706641f;
|
||||
const float logx = log(x);
|
||||
return (log2pi - logx +
|
||||
x * (logx * 2.0f + log(x * sinh(1.0f / x) + (_1_180 / pow(x, 6.0f))) - 2.0f)) *
|
||||
0.5f;
|
||||
}
|
||||
#endif
|
||||
|
||||
ccl_device_inline float beta(float x, float y)
|
||||
{
|
||||
return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y));
|
||||
|
||||
@@ -750,6 +750,170 @@ bool path_remove(const string &path)
|
||||
return remove(path.c_str()) == 0;
|
||||
}
|
||||
|
||||
struct SourceReplaceState {
|
||||
typedef map<string, string> ProcessedMapping;
|
||||
/* Base director for all relative include headers. */
|
||||
string base;
|
||||
/* Result of processed files. */
|
||||
ProcessedMapping processed_files;
|
||||
/* Set of files containing #pragma once which have been included. */
|
||||
set<string> pragma_onced;
|
||||
};
|
||||
|
||||
static string path_source_replace_includes_recursive(const string &source,
|
||||
const string &source_filepath,
|
||||
SourceReplaceState *state);
|
||||
|
||||
static string path_source_handle_preprocessor(const string &preprocessor_line,
|
||||
const string &source_filepath,
|
||||
const size_t line_number,
|
||||
SourceReplaceState *state)
|
||||
{
|
||||
string result = preprocessor_line;
|
||||
|
||||
string rest_of_line = string_strip(preprocessor_line.substr(1));
|
||||
|
||||
if (0 == strncmp(rest_of_line.c_str(), "include", 7)) {
|
||||
rest_of_line = string_strip(rest_of_line.substr(8));
|
||||
if (rest_of_line[0] == '"') {
|
||||
const size_t n_start = 1;
|
||||
const size_t n_end = rest_of_line.find("\"", n_start);
|
||||
const string filename = rest_of_line.substr(n_start, n_end - n_start);
|
||||
|
||||
string filepath = path_join(state->base, filename);
|
||||
if (!path_exists(filepath)) {
|
||||
filepath = path_join(path_dirname(source_filepath), filename);
|
||||
}
|
||||
string text;
|
||||
if (path_read_text(filepath, text)) {
|
||||
text = path_source_replace_includes_recursive(text, filepath, state);
|
||||
/* Use line directives for better error messages. */
|
||||
return "\n" + text + "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Our own little c preprocessor that replaces #includes with the file
|
||||
* contents, to work around issue of OpenCL drivers not supporting
|
||||
* include paths with spaces in them.
|
||||
*/
|
||||
static string path_source_replace_includes_recursive(const string &_source,
|
||||
const string &source_filepath,
|
||||
SourceReplaceState *state)
|
||||
{
|
||||
const string *psource = &_source;
|
||||
string source_new;
|
||||
|
||||
auto pragma_once = _source.find("#pragma once");
|
||||
if (pragma_once != string::npos) {
|
||||
if (state->pragma_onced.find(source_filepath) != state->pragma_onced.end()) {
|
||||
return "";
|
||||
}
|
||||
state->pragma_onced.insert(source_filepath);
|
||||
|
||||
// "#pragma once"
|
||||
// "//prgma once"
|
||||
source_new = _source;
|
||||
memcpy(source_new.data() + pragma_once, "//pr", 4);
|
||||
psource = &source_new;
|
||||
}
|
||||
|
||||
/* Try to re-use processed file without spending time on replacing all
|
||||
* include directives again.
|
||||
*/
|
||||
SourceReplaceState::ProcessedMapping::iterator replaced_file = state->processed_files.find(
|
||||
source_filepath);
|
||||
if (replaced_file != state->processed_files.end()) {
|
||||
return replaced_file->second;
|
||||
}
|
||||
|
||||
const string &source = *psource;
|
||||
|
||||
/* Perform full file processing. */
|
||||
string result = "";
|
||||
const size_t source_length = source.length();
|
||||
size_t index = 0;
|
||||
/* Information about where we are in the source. */
|
||||
size_t line_number = 0, column_number = 1;
|
||||
/* Currently gathered non-preprocessor token.
|
||||
* Store as start/length rather than token itself to avoid overhead of
|
||||
* memory re-allocations on each character concatenation.
|
||||
*/
|
||||
size_t token_start = 0, token_length = 0;
|
||||
/* Denotes whether we're inside of preprocessor line, together with
|
||||
* preprocessor line itself.
|
||||
*
|
||||
* TODO(sergey): Investigate whether using token start/end position
|
||||
* gives measurable speedup.
|
||||
*/
|
||||
bool inside_preprocessor = false;
|
||||
string preprocessor_line = "";
|
||||
/* Actual loop over the whole source. */
|
||||
while (index < source_length) {
|
||||
char ch = source[index];
|
||||
|
||||
if (ch == '\n') {
|
||||
if (inside_preprocessor) {
|
||||
string block = path_source_handle_preprocessor(
|
||||
preprocessor_line, source_filepath, line_number, state);
|
||||
|
||||
if (!block.empty()) {
|
||||
result += block;
|
||||
}
|
||||
|
||||
/* Start gathering net part of the token. */
|
||||
token_start = index;
|
||||
token_length = 0;
|
||||
inside_preprocessor = false;
|
||||
preprocessor_line = "";
|
||||
}
|
||||
column_number = 0;
|
||||
++line_number;
|
||||
}
|
||||
else if (ch == '#' && column_number == 1 && !inside_preprocessor) {
|
||||
/* Append all possible non-preprocessor token to the result. */
|
||||
if (token_length != 0) {
|
||||
result.append(source, token_start, token_length);
|
||||
token_start = index;
|
||||
token_length = 0;
|
||||
}
|
||||
inside_preprocessor = true;
|
||||
}
|
||||
|
||||
if (inside_preprocessor) {
|
||||
preprocessor_line += ch;
|
||||
}
|
||||
else {
|
||||
++token_length;
|
||||
}
|
||||
++index;
|
||||
++column_number;
|
||||
}
|
||||
/* Append possible tokens which happened before special events handled
|
||||
* above.
|
||||
*/
|
||||
if (token_length != 0) {
|
||||
result.append(source, token_start, token_length);
|
||||
}
|
||||
if (inside_preprocessor) {
|
||||
result += path_source_handle_preprocessor(
|
||||
preprocessor_line, source_filepath, line_number, state);
|
||||
}
|
||||
/* Store result for further reuse. */
|
||||
state->processed_files[source_filepath] = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
string path_source_replace_includes(const string &source, const string &path)
|
||||
{
|
||||
SourceReplaceState state;
|
||||
state.base = path;
|
||||
return path_source_replace_includes_recursive(source, path, &state);
|
||||
}
|
||||
|
||||
FILE *path_fopen(const string &path, const string &mode)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
|
||||
@@ -66,6 +66,9 @@ bool path_read_text(const string &path, string &text);
|
||||
/* File manipulation. */
|
||||
bool path_remove(const string &path);
|
||||
|
||||
/* source code utility */
|
||||
string path_source_replace_includes(const string &source, const string &path);
|
||||
|
||||
/* cache utility */
|
||||
void path_cache_clear_except(const string &name, const set<string> &except);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user