WIP: Vulkan: Clearing Storage Buffers #105299

Closed
Jeroen Bakker wants to merge 73 commits from Jeroen-Bakker:gpu-storage-buffer-clear into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
11 changed files with 8145 additions and 58 deletions
Showing only changes of commit ec31a09e20 - Show all commits

View File

@ -44,13 +44,21 @@ set(OPENVDB_EXTRA_ARGS
# -DLLVM_DIR=${LIBDIR}/llvm/lib/cmake/llvm
)
set(OPENVDB_PATCH ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb.diff)
if(APPLE)
set(OPENVDB_PATCH
${OPENVDB_PATCH} &&
${PATCH_CMD} -p 0 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb_metal.diff
)
endif()
ExternalProject_Add(openvdb
URL file://${PACKAGE_DIR}/${OPENVDB_FILE}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
URL_HASH ${OPENVDB_HASH_TYPE}=${OPENVDB_HASH}
CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR}
PREFIX ${BUILD_DIR}/openvdb
PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/openvdb/src/openvdb < ${PATCH_DIR}/openvdb.diff
PATCH_COMMAND ${OPENVDB_PATCH}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/openvdb ${DEFAULT_CMAKE_FLAGS} ${OPENVDB_EXTRA_ARGS}
INSTALL_DIR ${LIBDIR}/openvdb
)

File diff suppressed because it is too large Load Diff

View File

@ -55,9 +55,8 @@ void device_metal_info(vector<DeviceInfo> &devices)
info.denoisers = DENOISER_NONE;
info.id = id;
if (MetalInfo::get_device_vendor(device) == METAL_GPU_AMD) {
info.has_light_tree = false;
}
info.has_nanovdb = MetalInfo::get_device_vendor(device) == METAL_GPU_APPLE;
info.has_light_tree = MetalInfo::get_device_vendor(device) != METAL_GPU_AMD;
devices.push_back(info);
device_index++;

View File

@ -67,9 +67,12 @@ class MetalDevice : public Device {
std::recursive_mutex metal_mem_map_mutex;
/* Bindless Textures */
bool is_texture(const TextureInfo &tex);
device_vector<TextureInfo> texture_info;
bool need_texture_info;
id<MTLArgumentEncoder> mtlTextureArgEncoder = nil;
id<MTLArgumentEncoder> mtlBufferArgEncoder = nil;
id<MTLBuffer> buffer_bindings_1d = nil;
id<MTLBuffer> texture_bindings_2d = nil;
id<MTLBuffer> texture_bindings_3d = nil;
std::vector<id<MTLTexture>> texture_slot_map;

View File

@ -91,11 +91,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
}
}
texture_bindings_2d = [mtlDevice newBufferWithLength:4096 options:default_storage_mode];
texture_bindings_3d = [mtlDevice newBufferWithLength:4096 options:default_storage_mode];
stats.mem_alloc(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
switch (device_vendor) {
default:
break;
@ -156,6 +151,16 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
arg_desc_texture.dataType = MTLDataTypeTexture;
arg_desc_texture.access = MTLArgumentAccessReadOnly;
mtlTextureArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_texture ]];
MTLArgumentDescriptor *arg_desc_buffer = [[MTLArgumentDescriptor alloc] init];
arg_desc_buffer.dataType = MTLDataTypePointer;
arg_desc_buffer.access = MTLArgumentAccessReadOnly;
mtlBufferArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_buffer ]];
buffer_bindings_1d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
texture_bindings_2d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
texture_bindings_3d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
texture_bindings_3d.allocatedSize);
/* command queue for non-tracing work on the GPU */
mtlGeneralCommandQueue = [mtlDevice newCommandQueue];
@ -180,6 +185,8 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
arg_desc_tex.dataType = MTLDataTypePointer;
arg_desc_tex.access = MTLArgumentAccessReadOnly;
arg_desc_tex.index = index++;
[ancillary_desc addObject:[arg_desc_tex copy]]; /* metal_buf_1d */
arg_desc_tex.index = index++;
[ancillary_desc addObject:[arg_desc_tex copy]]; /* metal_tex_2d */
arg_desc_tex.index = index++;
@ -253,22 +260,26 @@ MetalDevice::~MetalDevice()
* existing_devices_mutex). */
thread_scoped_lock lock(existing_devices_mutex);
for (auto &tex : texture_slot_map) {
if (tex) {
[tex release];
tex = nil;
int num_resources = texture_info.size();
for (int res = 0; res < num_resources; res++) {
if (is_texture(texture_info[res])) {
[texture_slot_map[res] release];
texture_slot_map[res] = nil;
}
}
flush_delayed_free_list();
if (texture_bindings_2d) {
stats.mem_free(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
stats.mem_free(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
texture_bindings_3d.allocatedSize);
[buffer_bindings_1d release];
[texture_bindings_2d release];
[texture_bindings_3d release];
}
[mtlTextureArgEncoder release];
[mtlBufferKernelParamsEncoder release];
[mtlBufferArgEncoder release];
[mtlASArgEncoder release];
[mtlAncillaryArgEncoder release];
[mtlGeneralCommandQueue release];
@ -332,6 +343,9 @@ void MetalDevice::make_source(MetalPipelineType pso_type, const uint kernel_feat
break;
case METAL_GPU_APPLE:
global_defines += "#define __KERNEL_METAL_APPLE__\n";
# ifdef WITH_NANOVDB
global_defines += "#define WITH_NANOVDB\n";
# endif
break;
}
@ -546,6 +560,11 @@ void MetalDevice::compile_and_load(int device_id, MetalPipelineType pso_type)
}
}
bool MetalDevice::is_texture(const TextureInfo &tex)
{
return (tex.depth > 0 || tex.height > 0);
}
void MetalDevice::load_texture_info()
{
if (need_texture_info) {
@ -557,21 +576,20 @@ void MetalDevice::load_texture_info()
for (int tex = 0; tex < num_textures; tex++) {
uint64_t offset = tex * sizeof(void *);
id<MTLTexture> metal_texture = texture_slot_map[tex];
if (!metal_texture) {
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
[mtlTextureArgEncoder setTexture:nil atIndex:0];
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
[mtlTextureArgEncoder setTexture:nil atIndex:0];
}
else {
if (is_texture(texture_info[tex]) && texture_slot_map[tex]) {
id<MTLTexture> metal_texture = texture_slot_map[tex];
MTLTextureType type = metal_texture.textureType;
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
[mtlTextureArgEncoder setTexture:type == MTLTextureType2D ? metal_texture : nil atIndex:0];
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
[mtlTextureArgEncoder setTexture:type == MTLTextureType3D ? metal_texture : nil atIndex:0];
}
else {
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_2d offset:offset];
[mtlTextureArgEncoder setTexture:nil atIndex:0];
[mtlTextureArgEncoder setArgumentBuffer:texture_bindings_3d offset:offset];
[mtlTextureArgEncoder setTexture:nil atIndex:0];
}
}
if (default_storage_mode == MTLResourceStorageModeManaged) {
[texture_bindings_2d didModifyRange:NSMakeRange(0, num_textures * sizeof(void *))];
@ -744,7 +762,6 @@ void MetalDevice::generic_free(device_memory &mem)
mem.shared_pointer = 0;
/* Free device memory. */
delayed_free_list.push_back(mmem.mtlBuffer);
mmem.mtlBuffer = nil;
}
@ -979,7 +996,7 @@ void MetalDevice::global_free(device_memory &mem)
void MetalDevice::tex_alloc_as_buffer(device_texture &mem)
{
generic_alloc(mem);
MetalDevice::MetalMem *mmem = generic_alloc(mem);
generic_copy_to(mem);
/* Resize once */
@ -988,27 +1005,32 @@ void MetalDevice::tex_alloc_as_buffer(device_texture &mem)
/* Allocate some slots in advance, to reduce amount
* of re-allocations. */
texture_info.resize(round_up(slot + 1, 128));
texture_slot_map.resize(round_up(slot + 1, 128));
}
mem.info.data = (uint64_t)mem.device_pointer;
/* Set Mapping and tag that we need to (re-)upload to device */
texture_info[slot] = mem.info;
uint64_t offset = slot * sizeof(void *);
[mtlBufferArgEncoder setArgumentBuffer:buffer_bindings_1d offset:offset];
[mtlBufferArgEncoder setBuffer:mmem->mtlBuffer offset:0 atIndex:0];
texture_info[slot].data = *(uint64_t *)((uint64_t)buffer_bindings_1d.contents + offset);
texture_slot_map[slot] = nil;
need_texture_info = true;
}
void MetalDevice::tex_alloc(device_texture &mem)
{
/* Check that dimensions fit within maximum allowable size.
* If 1D texture is allocated, use 1D buffer.
* See: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
if (mem.data_width > 16384 || mem.data_height > 16384) {
set_error(string_printf(
"Texture exceeds maximum allowed size of 16384 x 16384 (requested: %zu x %zu)",
mem.data_width,
mem.data_height));
return;
if (mem.data_height > 0) {
if (mem.data_width > 16384 || mem.data_height > 16384) {
set_error(string_printf(
"Texture exceeds maximum allowed size of 16384 x 16384 (requested: %zu x %zu)",
mem.data_width,
mem.data_height));
return;
}
}
MTLStorageMode storage_mode = MTLStorageModeManaged;
if (@available(macos 10.15, *)) {
if ([mtlDevice hasUnifiedMemory] &&
@ -1148,8 +1170,9 @@ void MetalDevice::tex_alloc(device_texture &mem)
bytesPerRow:src_pitch];
}
else {
assert(0);
/* 1D texture, using linear memory. */
tex_alloc_as_buffer(mem);
return;
}
mem.device_pointer = (device_ptr)mtlTexture;
@ -1173,17 +1196,22 @@ void MetalDevice::tex_alloc(device_texture &mem)
ssize_t min_buffer_length = sizeof(void *) * texture_info.size();
if (!texture_bindings_2d || (texture_bindings_2d.length < min_buffer_length)) {
if (texture_bindings_2d) {
delayed_free_list.push_back(buffer_bindings_1d);
delayed_free_list.push_back(texture_bindings_2d);
delayed_free_list.push_back(texture_bindings_3d);
stats.mem_free(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
stats.mem_free(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
texture_bindings_3d.allocatedSize);
}
buffer_bindings_1d = [mtlDevice newBufferWithLength:min_buffer_length
options:default_storage_mode];
texture_bindings_2d = [mtlDevice newBufferWithLength:min_buffer_length
options:default_storage_mode];
texture_bindings_3d = [mtlDevice newBufferWithLength:min_buffer_length
options:default_storage_mode];
stats.mem_alloc(texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize);
stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
texture_bindings_3d.allocatedSize);
}
}
@ -1210,12 +1238,18 @@ void MetalDevice::tex_alloc(device_texture &mem)
void MetalDevice::tex_free(device_texture &mem)
{
if (mem.data_depth == 0 && mem.data_height == 0) {
generic_free(mem);
return;
}
if (metal_mem_map.count(&mem)) {
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
MetalMem &mmem = *metal_mem_map.at(&mem);
assert(texture_slot_map[mem.slot] == mmem.mtlTexture);
texture_slot_map[mem.slot] = nil;
if (texture_slot_map[mem.slot] == mmem.mtlTexture)
texture_slot_map[mem.slot] = nil;
if (mmem.mtlTexture) {
/* Free bindless texture. */

View File

@ -477,17 +477,21 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->texture_bindings_3d
offset:0
atIndex:1];
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->buffer_bindings_1d
offset:0
atIndex:2];
if (@available(macos 12.0, *)) {
if (metal_device_->use_metalrt) {
if (metal_device_->bvhMetalRT) {
id<MTLAccelerationStructure> accel_struct = metal_device_->bvhMetalRT->accel_struct;
[metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:2];
[metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:3];
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_buffer
offset:0
atIndex:7];
atIndex:8];
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_lookup_buffer
offset:0
atIndex:8];
atIndex:9];
}
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
@ -497,13 +501,13 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
atIndex:1];
[metal_device_->mtlAncillaryArgEncoder
setIntersectionFunctionTable:metal_kernel_pso->intersection_func_table[table]
atIndex:3 + table];
atIndex:4 + table];
[mtlComputeCommandEncoder useResource:metal_kernel_pso->intersection_func_table[table]
usage:MTLResourceUsageRead];
}
else {
[metal_device_->mtlAncillaryArgEncoder setIntersectionFunctionTable:nil
atIndex:3 + table];
atIndex:4 + table];
}
}
}
@ -874,6 +878,7 @@ void MetalDeviceQueue::prepare_resources(DeviceKernel kernel)
/* ancillaries */
[mtlComputeEncoder_ useResource:metal_device_->texture_bindings_2d usage:MTLResourceUsageRead];
[mtlComputeEncoder_ useResource:metal_device_->texture_bindings_3d usage:MTLResourceUsageRead];
[mtlComputeEncoder_ useResource:metal_device_->buffer_bindings_1d usage:MTLResourceUsageRead];
}
id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(DeviceKernel kernel)

View File

@ -5,13 +5,14 @@
CCL_NAMESPACE_BEGIN
#ifdef WITH_NANOVDB
# define NDEBUG /* Disable "assert" in device code */
# define NANOVDB_USE_INTRINSICS
# include "nanovdb/NanoVDB.h"
# include "nanovdb/util/SampleFromVoxels.h"
#if !defined __KERNEL_METAL__
# ifdef WITH_NANOVDB
# define NDEBUG /* Disable "assert" in device code */
# define NANOVDB_USE_INTRINSICS
# include "nanovdb/NanoVDB.h"
# include "nanovdb/util/SampleFromVoxels.h"
# endif
#endif
/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
ccl_device float cubic_w0(float a)
{
@ -126,7 +127,7 @@ kernel_tex_image_interp_tricubic(ccl_global const TextureInfo &info, float x, fl
#ifdef WITH_NANOVDB
template<typename T, typename S>
ccl_device typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_tricubic_nanovdb(
S &s, float x, float y, float z)
ccl_private S &s, float x, float y, float z)
{
float px = floorf(x);
float py = floorf(y);
@ -157,13 +158,19 @@ ccl_device typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_tric
g1y * (g0x * s(Vec3f(x0, y1, z1)) + g1x * s(Vec3f(x1, y1, z1))));
}
# if defined(__KERNEL_METAL__)
template<typename T>
__attribute__((noinline)) typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_nanovdb(
ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
# else
template<typename T>
ccl_device_noinline typename nanovdb::NanoGrid<T>::ValueType kernel_tex_image_interp_nanovdb(
ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
# endif
{
using namespace nanovdb;
NanoGrid<T> *const grid = (NanoGrid<T> *)info.data;
ccl_global NanoGrid<T> *const grid = (ccl_global NanoGrid<T> *)info.data;
typedef typename nanovdb::NanoGrid<T>::AccessorType AccessorType;
AccessorType acc = grid->getAccessor();

View File

@ -290,6 +290,10 @@ typedef metal::raytracing::intersector<triangle_data> metalrt_blas_intersector_t
/* texture bindings and sampler setup */
struct Buffer1DParamsMetal {
device float *buf;
};
struct Texture2DParamsMetal {
texture2d<float, access::sample> tex;
};
@ -306,6 +310,7 @@ struct MetalRTBlasWrapper {
struct MetalAncillaries {
device Texture2DParamsMetal *textures_2d;
device Texture3DParamsMetal *textures_3d;
device Buffer1DParamsMetal *buffers;
#ifdef __METALRT__
metalrt_as_type accel_struct;

View File

@ -3,6 +3,13 @@
// clang-format off
#ifdef WITH_NANOVDB
# define NDEBUG /* Disable "assert" in device code */
# define NANOVDB_USE_INTRINSICS
# include "nanovdb/NanoVDB.h"
# include "nanovdb/util/SampleFromVoxels.h"
#endif
/* Open the Metal kernel context class
* Necessary to access resource bindings */
class MetalKernelContext {

View File

@ -214,6 +214,9 @@ std::string AssetLibraryService::root_path_from_library_ref(
if (ELEM(library_reference.type, ASSET_LIBRARY_ALL, ASSET_LIBRARY_LOCAL)) {
return "";
}
if (ELEM(library_reference.type, ASSET_LIBRARY_ESSENTIALS)) {
return essentials_directory_path();
}
if (ELEM(library_reference.type, ASSET_LIBRARY_ESSENTIALS)) {
return essentials_directory_path();

View File

@ -253,11 +253,20 @@ static void gather_search_link_ops_for_all_assets(const bContext &C,
C, node_tree, socket, library_ref, true, search_link_ops);
}
AssetLibraryReference library_ref{};
library_ref.custom_library_index = -1;
library_ref.type = ASSET_LIBRARY_LOCAL;
gather_search_link_ops_for_asset_library(
C, node_tree, socket, library_ref, false, search_link_ops);
{
AssetLibraryReference library_ref{};
library_ref.custom_library_index = -1;
library_ref.type = ASSET_LIBRARY_ESSENTIALS;
gather_search_link_ops_for_asset_library(
C, node_tree, socket, library_ref, true, search_link_ops);
}
{
AssetLibraryReference library_ref{};
library_ref.custom_library_index = -1;
library_ref.type = ASSET_LIBRARY_LOCAL;
gather_search_link_ops_for_asset_library(
C, node_tree, socket, library_ref, false, search_link_ops);
}
}
/**