Fix #124297: GPv3: Build modifier natural drawing speed fix #124350

Merged
Falk David merged 6 commits from ChengduLittleA/blender:fix-124297 into main 2024-07-16 10:26:50 +02:00
600 changed files with 48118 additions and 11026 deletions
Showing only changes of commit f6b7fc7664 - Show all commits

View File

@ -109,12 +109,12 @@ set(OPENIMAGEIO_EXTRA_ARGS
)
if(WIN32)
# We don't want the SOABI tags in the final filename since it gets the debug
# We don't want the SOABI tags in the final filename since it gets the debug
# tags wrong and the final .pyd won't be found by python, pybind11 will try to
# get the tags and dump them into PYTHON_MODULE_EXTENSION every time the current
# python interperter doesn't match the old one, overwriting our preference.
# To side step this behavior we set PYBIND11_PYTHON_EXECUTABLE_LAST so it'll
# leave the PYTHON_MODULE_EXTENSION value we set alone.
# leave the PYTHON_MODULE_EXTENSION value we set alone.
LIST(APPEND OPENIMAGEIO_EXTRA_ARGS -DPYBIND11_PYTHON_EXECUTABLE_LAST=${PYTHON_BINARY})
if(BUILD_MODE STREQUAL Release)
LIST(APPEND OPENIMAGEIO_EXTRA_ARGS -DPYTHON_MODULE_EXTENSION=.pyd)

View File

@ -165,7 +165,8 @@ m_audio_unit(nullptr)
m_specs = specs;
open();
close();
// NOTE: Keep the device open until #121911 is investigated/resolved from Apple side.
// close();
create();
}

View File

@ -71,8 +71,9 @@ void OpenCloseDevice::playing(bool playing)
if(m_delayed_close_thread.joinable())
m_delayed_close_thread.join();
m_delayed_close_running = true;
m_delayed_close_thread = std::thread(&OpenCloseDevice::closeAfterDelay, this);
// NOTE: Disabled until #121911 is investigated/resolved from Apple side.
// m_delayed_close_running = true;
// m_delayed_close_thread = std::thread(&OpenCloseDevice::closeAfterDelay, this);
}
}
}

View File

@ -57,6 +57,9 @@ class BVHMetal : public BVH {
Geometry *const geom,
bool refit);
bool build_TLAS(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, bool refit);
API_AVAILABLE(macos(11.0))
void set_accel_struct(id<MTLAccelerationStructure> new_accel_struct);
};
CCL_NAMESPACE_END

View File

@ -119,17 +119,27 @@ BVHMetal::BVHMetal(const BVHParams &params_,
BVHMetal::~BVHMetal()
{
/* Clear point used by enqueueing. */
device->release_bvh(this);
if (@available(macos 12.0, *)) {
set_accel_struct(nil);
if (null_BLAS) {
[null_BLAS release];
}
}
}
API_AVAILABLE(macos(11.0))
void BVHMetal::set_accel_struct(id<MTLAccelerationStructure> new_accel_struct)
{
if (@available(macos 12.0, *)) {
if (accel_struct) {
device->stats.mem_free(accel_struct.allocatedSize);
[accel_struct release];
accel_struct = nil;
}
if (null_BLAS) {
[null_BLAS release];
if (new_accel_struct) {
accel_struct = new_accel_struct;
device->stats.mem_alloc(accel_struct.allocatedSize);
}
}
}
@ -325,9 +335,7 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress,
toAccelerationStructure:accel];
[accelEnc endEncoding];
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
uint64_t allocated_size = [accel allocatedSize];
device->stats.mem_alloc(allocated_size);
accel_struct = accel;
set_accel_struct(accel);
[accel_uncompressed release];
/* Signal that we've finished doing GPU acceleration struct build. */
@ -338,10 +346,7 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress,
}
else {
/* set our acceleration structure to the uncompressed structure */
accel_struct = accel_uncompressed;
uint64_t allocated_size = [accel_struct allocatedSize];
device->stats.mem_alloc(allocated_size);
set_accel_struct(accel_uncompressed);
/* Signal that we've finished doing GPU acceleration struct build. */
g_bvh_build_throttler.release(wired_size);
@ -663,9 +668,7 @@ bool BVHMetal::build_BLAS_hair(Progress &progress,
toAccelerationStructure:accel];
[accelEnc endEncoding];
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
uint64_t allocated_size = [accel allocatedSize];
device->stats.mem_alloc(allocated_size);
accel_struct = accel;
set_accel_struct(accel);
[accel_uncompressed release];
/* Signal that we've finished doing GPU acceleration struct build. */
@ -676,10 +679,7 @@ bool BVHMetal::build_BLAS_hair(Progress &progress,
}
else {
/* set our acceleration structure to the uncompressed structure */
accel_struct = accel_uncompressed;
uint64_t allocated_size = [accel_struct allocatedSize];
device->stats.mem_alloc(allocated_size);
set_accel_struct(accel_uncompressed);
/* Signal that we've finished doing GPU acceleration struct build. */
g_bvh_build_throttler.release(wired_size);
@ -910,9 +910,7 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress,
toAccelerationStructure:accel];
[accelEnc endEncoding];
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
uint64_t allocated_size = [accel allocatedSize];
device->stats.mem_alloc(allocated_size);
accel_struct = accel;
set_accel_struct(accel);
[accel_uncompressed release];
/* Signal that we've finished doing GPU acceleration struct build. */
@ -923,10 +921,7 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress,
}
else {
/* set our acceleration structure to the uncompressed structure */
accel_struct = accel_uncompressed;
uint64_t allocated_size = [accel_struct allocatedSize];
device->stats.mem_alloc(allocated_size);
set_accel_struct(accel_uncompressed);
/* Signal that we've finished doing GPU acceleration struct build. */
g_bvh_build_throttler.release(wired_size);
@ -1036,10 +1031,6 @@ bool BVHMetal::build_TLAS(Progress &progress,
for (Object *ob : objects) {
num_instances++;
/* Skip motion for non-traceable objects */
if (!ob->is_traceable())
continue;
if (ob->use_motion()) {
num_motion_transforms += max((size_t)1, ob->get_motion().size());
}
@ -1115,8 +1106,8 @@ bool BVHMetal::build_TLAS(Progress &progress,
/* Skip non-traceable objects */
Geometry const *geom = ob->get_geometry();
BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh);
if (!blas || !blas->accel_struct) {
/* Place a degenerate instance, to ensure [[instance_id]] equals ob->get_mtl_device_index()
if (!blas || !blas->accel_struct || !ob->is_traceable()) {
/* Place a degenerate instance, to ensure [[instance_id]] equals ob->get_device_index()
* in our intersection functions */
blas = nullptr;
@ -1299,11 +1290,8 @@ bool BVHMetal::build_TLAS(Progress &progress,
[instanceBuf release];
[scratchBuf release];
uint64_t allocated_size = [accel allocatedSize];
device->stats.mem_alloc(allocated_size);
/* Cache top and bottom-level acceleration structs */
accel_struct = accel;
set_accel_struct(accel);
unique_blas_array.clear();
unique_blas_array.reserve(all_blas.count);
@ -1322,16 +1310,18 @@ bool BVHMetal::build(Progress &progress,
bool refit)
{
if (@available(macos 12.0, *)) {
if (refit && params.bvh_type != BVH_TYPE_STATIC) {
assert(accel_struct);
}
else {
if (accel_struct) {
device->stats.mem_free(accel_struct.allocatedSize);
[accel_struct release];
accel_struct = nil;
if (refit) {
/* It isn't valid to refit a non-existent BVH, or one which wasn't constructed as dynamic.
* In such cases, assert in development but try to recover in the wild. */
if (params.bvh_type != BVH_TYPE_DYNAMIC || !accel_struct) {
assert(false);
refit = false;
}
}
if (!refit) {
set_accel_struct(nil);
}
}
@autoreleasepool {

View File

@ -39,10 +39,19 @@ class MetalDevice : public Device {
KernelParamsMetal launch_params = {0};
/* MetalRT members ----------------------------------*/
BVHMetal *bvhMetalRT = nullptr;
bool use_metalrt = false;
bool motion_blur = false;
id<MTLArgumentEncoder> mtlASArgEncoder =
nil; /* encoder used for fetching device pointers from MTLAccelerationStructure */
id<MTLArgumentEncoder> mtlBlasArgEncoder = nil;
id<MTLBuffer> blas_buffer = nil;
API_AVAILABLE(macos(11.0))
vector<id<MTLAccelerationStructure>> unique_blas_array;
API_AVAILABLE(macos(11.0))
id<MTLAccelerationStructure> accel_struct = nil;
/*---------------------------------------------------*/
uint kernel_features;
@ -79,11 +88,6 @@ class MetalDevice : public Device {
id<MTLBuffer> texture_bindings_3d = nil;
std::vector<id<MTLTexture>> texture_slot_map;
/* BLAS encoding & lookup */
id<MTLArgumentEncoder> mtlBlasArgEncoder = nil;
id<MTLBuffer> blas_buffer = nil;
bool use_metalrt = false;
MetalPipelineType kernel_specialization_level = PSO_GENERIC;
int device_id = 0;
@ -138,8 +142,6 @@ class MetalDevice : public Device {
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
virtual void release_bvh(BVH *bvh) override;
virtual void optimize_for_scene(Scene *scene) override;
static void compile_and_load(int device_id, MetalPipelineType pso_type);
@ -184,6 +186,10 @@ class MetalDevice : public Device {
void tex_free(device_texture &mem);
void flush_delayed_free_list();
void free_bvh();
void update_bvh(BVHMetal *bvh_metal);
};
CCL_NAMESPACE_END

View File

@ -267,6 +267,7 @@ MetalDevice::~MetalDevice()
}
}
free_bvh();
flush_delayed_free_list();
if (texture_bindings_2d) {
@ -1372,24 +1373,7 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
if (bvh_metal->build(progress, mtlDevice, mtlGeneralCommandQueue, refit)) {
if (bvh->params.top_level) {
bvhMetalRT = bvh_metal;
// allocate required buffers for BLAS array
uint64_t count = bvhMetalRT->blas_array.size();
uint64_t bufferSize = mtlBlasArgEncoder.encodedLength * count;
blas_buffer = [mtlDevice newBufferWithLength:bufferSize options:default_storage_mode];
stats.mem_alloc(blas_buffer.allocatedSize);
for (uint64_t i = 0; i < count; ++i) {
if (bvhMetalRT->blas_array[i]) {
[mtlBlasArgEncoder setArgumentBuffer:blas_buffer
offset:i * mtlBlasArgEncoder.encodedLength];
[mtlBlasArgEncoder setAccelerationStructure:bvhMetalRT->blas_array[i] atIndex:0];
}
}
if (default_storage_mode == MTLResourceStorageModeManaged) {
[blas_buffer didModifyRange:NSMakeRange(0, blas_buffer.length)];
}
update_bvh(bvh_metal);
}
}
@ -1399,10 +1383,54 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
}
}
void MetalDevice::release_bvh(BVH *bvh)
void MetalDevice::free_bvh()
{
if (bvhMetalRT == bvh) {
bvhMetalRT = nullptr;
for (id<MTLAccelerationStructure> &blas : unique_blas_array) {
[blas release];
}
unique_blas_array.clear();
if (blas_buffer) {
[blas_buffer release];
blas_buffer = nil;
}
if (accel_struct) {
[accel_struct release];
accel_struct = nil;
}
}
void MetalDevice::update_bvh(BVHMetal *bvh_metal)
{
free_bvh();
if (!bvh_metal) {
return;
}
accel_struct = bvh_metal->accel_struct;
unique_blas_array = bvh_metal->unique_blas_array;
[accel_struct retain];
for (id<MTLAccelerationStructure> &blas : unique_blas_array) {
[blas retain];
}
// Allocate required buffers for BLAS array.
uint64_t count = bvh_metal->blas_array.size();
uint64_t buffer_size = mtlBlasArgEncoder.encodedLength * count;
blas_buffer = [mtlDevice newBufferWithLength:buffer_size options:default_storage_mode];
stats.mem_alloc(blas_buffer.allocatedSize);
for (uint64_t i = 0; i < count; ++i) {
if (bvh_metal->blas_array[i]) {
[mtlBlasArgEncoder setArgumentBuffer:blas_buffer offset:i * mtlBlasArgEncoder.encodedLength];
[mtlBlasArgEncoder setAccelerationStructure:bvh_metal->blas_array[i] atIndex:0];
}
}
if (default_storage_mode == MTLResourceStorageModeManaged) {
[blas_buffer didModifyRange:NSMakeRange(0, blas_buffer.length)];
}
}

View File

@ -54,10 +54,12 @@ enum MetalPipelineType {
const char *kernel_type_as_string(MetalPipelineType pso_type);
struct MetalKernelPipeline {
/* A pipeline object that can be shared between multiple instances of MetalDeviceQueue. */
class MetalKernelPipeline {
public:
void compile();
int pipeline_id;
int originating_device_id;
id<MTLLibrary> mtlLibrary = nil;
@ -83,6 +85,28 @@ struct MetalKernelPipeline {
string error_str;
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
};
/* An actively instanced pipeline that can only be used by a single instance of MetalDeviceQueue.
*/
class MetalDispatchPipeline {
public:
~MetalDispatchPipeline();
bool update(MetalDevice *metal_device, DeviceKernel kernel);
void free_intersection_function_tables();
private:
friend class MetalDeviceQueue;
friend struct ShaderCache;
int pipeline_id = -1;
MetalPipelineType pso_type;
id<MTLComputePipelineState> pipeline = nil;
int num_threads_per_block = 0;
API_AVAILABLE(macos(11.0))
id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
};

View File

@ -133,6 +133,9 @@ using DeviceShaderCache = std::pair<id<MTLDevice>, unique_ptr<ShaderCache>>;
int g_shaderCacheCount = 0;
DeviceShaderCache g_shaderCache[MAX_POSSIBLE_GPUS_ON_SYSTEM];
/* Next UID for associating a MetalDispatchPipeline with an originating MetalKernelPipeline. */
static std::atomic_int g_next_pipeline_id = 0;
ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice)
{
for (int i = 0; i < g_shaderCacheCount; i++) {
@ -325,6 +328,7 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
/* Keep track of the originating device's ID so that we can cancel requests if the device ceases
* to be active. */
pipeline->pipeline_id = g_next_pipeline_id.fetch_add(1);
pipeline->originating_device_id = device->device_id;
memcpy(&pipeline->kernel_data_, &device->launch_params.data, sizeof(pipeline->kernel_data_));
pipeline->pso_type = pso_type;
@ -450,6 +454,64 @@ static MTLFunctionConstantValues *GetConstantValues(KernelData const *data = nul
return constant_values;
}
void MetalDispatchPipeline::free_intersection_function_tables()
{
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
if (intersection_func_table[table]) {
[intersection_func_table[table] release];
intersection_func_table[table] = nil;
}
}
}
MetalDispatchPipeline::~MetalDispatchPipeline()
{
free_intersection_function_tables();
}
bool MetalDispatchPipeline::update(MetalDevice *metal_device, DeviceKernel kernel)
{
const MetalKernelPipeline *best_pipeline = MetalDeviceKernels::get_best_pipeline(metal_device,
kernel);
if (!best_pipeline) {
return false;
}
if (pipeline_id == best_pipeline->pipeline_id) {
/* The best pipeline is already active - nothing to do. */
return true;
}
pipeline_id = best_pipeline->pipeline_id;
pipeline = best_pipeline->pipeline;
pso_type = best_pipeline->pso_type;
num_threads_per_block = best_pipeline->num_threads_per_block;
/* Create the MTLIntersectionFunctionTables if needed. */
if (best_pipeline->use_metalrt && device_kernel_has_intersection(best_pipeline->device_kernel)) {
free_intersection_function_tables();
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
@autoreleasepool {
MTLIntersectionFunctionTableDescriptor *ift_desc =
[[MTLIntersectionFunctionTableDescriptor alloc] init];
ift_desc.functionCount = best_pipeline->table_functions[table].count;
intersection_func_table[table] = [this->pipeline
newIntersectionFunctionTableWithDescriptor:ift_desc];
/* Finally write the function handles into this pipeline's table */
int size = int([best_pipeline->table_functions[table] count]);
for (int i = 0; i < size; i++) {
id<MTLFunctionHandle> handle = [pipeline
functionHandleWithFunction:best_pipeline->table_functions[table][i]];
[intersection_func_table[table] setFunction:handle atIndex:i];
}
}
}
}
return true;
}
id<MTLFunction> MetalKernelPipeline::make_intersection_function(const char *function_name)
{
MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
@ -507,7 +569,6 @@ void MetalKernelPipeline::compile()
function.label = [@(function_name.c_str()) copy];
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
NSArray *linked_functions = nil;
if (use_metalrt && device_kernel_has_intersection(device_kernel)) {
@ -754,24 +815,6 @@ void MetalKernelPipeline::compile()
[computePipelineStateDescriptor release];
computePipelineStateDescriptor = nil;
if (use_metalrt && linked_functions) {
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
MTLIntersectionFunctionTableDescriptor *ift_desc =
[[MTLIntersectionFunctionTableDescriptor alloc] init];
ift_desc.functionCount = table_functions[table].count;
intersection_func_table[table] = [this->pipeline
newIntersectionFunctionTableWithDescriptor:ift_desc];
/* Finally write the function handles into this pipeline's table */
int size = (int)[table_functions[table] count];
for (int i = 0; i < size; i++) {
id<MTLFunctionHandle> handle = [pipeline
functionHandleWithFunction:table_functions[table][i]];
[intersection_func_table[table] setFunction:handle atIndex:i];
}
}
}
if (!use_binary_archive) {
metal_printf("%16s | %2d | %-55s | %7.2fs\n",
kernel_type_as_string(pso_type),

View File

@ -66,6 +66,7 @@ class MetalDeviceQueue : public DeviceQueue {
id<MTLSharedEvent> shared_event_ = nil;
API_AVAILABLE(macos(10.14), ios(14.0))
MTLSharedEventListener *shared_event_listener_ = nil;
MetalDispatchPipeline active_pipelines_[DEVICE_KERNEL_NUM];
dispatch_queue_t event_queue_;
dispatch_semaphore_t wait_semaphore_;

View File

@ -465,13 +465,12 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
}
bytes_written = globals_offsets + sizeof(KernelParamsMetal);
const MetalKernelPipeline *metal_kernel_pso = MetalDeviceKernels::get_best_pipeline(
metal_device_, kernel);
if (!metal_kernel_pso) {
if (!active_pipelines_[kernel].update(metal_device_, kernel)) {
metal_device_->set_error(
string_printf("No MetalKernelPipeline for %s\n", device_kernel_as_string(kernel)));
string_printf("Could not activate pipeline for %s\n", device_kernel_as_string(kernel)));
return false;
}
MetalDispatchPipeline &active_pipeline = active_pipelines_[kernel];
/* Encode ancillaries */
[metal_device_->mtlAncillaryArgEncoder setArgumentBuffer:arg_buffer offset:metal_offsets];
@ -487,8 +486,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
if (@available(macos 12.0, *)) {
if (metal_device_->use_metalrt && device_kernel_has_intersection(kernel)) {
if (metal_device_->bvhMetalRT) {
id<MTLAccelerationStructure> accel_struct = metal_device_->bvhMetalRT->accel_struct;
if (id<MTLAccelerationStructure> accel_struct = metal_device_->accel_struct) {
[metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:3];
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_buffer
offset:0
@ -496,14 +494,14 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
}
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
if (metal_kernel_pso->intersection_func_table[table]) {
[metal_kernel_pso->intersection_func_table[table] setBuffer:arg_buffer
offset:globals_offsets
atIndex:1];
if (active_pipeline.intersection_func_table[table]) {
[active_pipeline.intersection_func_table[table] setBuffer:arg_buffer
offset:globals_offsets
atIndex:1];
[metal_device_->mtlAncillaryArgEncoder
setIntersectionFunctionTable:metal_kernel_pso->intersection_func_table[table]
setIntersectionFunctionTable:active_pipeline.intersection_func_table[table]
atIndex:4 + table];
[mtlComputeCommandEncoder useResource:metal_kernel_pso->intersection_func_table[table]
[mtlComputeCommandEncoder useResource:active_pipeline.intersection_func_table[table]
usage:MTLResourceUsageRead];
}
else {
@ -526,24 +524,22 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
if (metal_device_->use_metalrt && device_kernel_has_intersection(kernel)) {
if (@available(macos 12.0, *)) {
BVHMetal *bvhMetalRT = metal_device_->bvhMetalRT;
if (bvhMetalRT && bvhMetalRT->accel_struct) {
if (id<MTLAccelerationStructure> accel_struct = metal_device_->accel_struct) {
/* Mark all Accelerations resources as used */
[mtlComputeCommandEncoder useResource:bvhMetalRT->accel_struct
usage:MTLResourceUsageRead];
[mtlComputeCommandEncoder useResource:accel_struct usage:MTLResourceUsageRead];
[mtlComputeCommandEncoder useResource:metal_device_->blas_buffer
usage:MTLResourceUsageRead];
[mtlComputeCommandEncoder useResources:bvhMetalRT->unique_blas_array.data()
count:bvhMetalRT->unique_blas_array.size()
[mtlComputeCommandEncoder useResources:metal_device_->unique_blas_array.data()
count:metal_device_->unique_blas_array.size()
usage:MTLResourceUsageRead];
}
}
}
[mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso->pipeline];
[mtlComputeCommandEncoder setComputePipelineState:active_pipeline.pipeline];
/* Compute kernel launch parameters. */
const int num_threads_per_block = metal_kernel_pso->num_threads_per_block;
const int num_threads_per_block = active_pipeline.num_threads_per_block;
int shared_mem_bytes = 0;
@ -594,7 +590,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
const char *errCStr = [[NSString stringWithFormat:@"%@", command_buffer.error]
UTF8String];
str += string_printf("(%s.%s):\n%s\n",
kernel_type_as_string(metal_kernel_pso->pso_type),
kernel_type_as_string(active_pipeline.pso_type),
device_kernel_as_string(kernel),
errCStr);
}

View File

@ -41,23 +41,14 @@ struct MetalInfo {
/* Pool of MTLBuffers whose lifetime is linked to a single MTLCommandBuffer */
class MetalBufferPool {
struct MetalBufferListEntry {
MetalBufferListEntry(id<MTLBuffer> buffer, id<MTLCommandBuffer> command_buffer)
: buffer(buffer), command_buffer(command_buffer)
{
}
MetalBufferListEntry() = delete;
id<MTLBuffer> buffer;
id<MTLCommandBuffer> command_buffer;
};
std::vector<MetalBufferListEntry> buffer_free_list;
std::vector<MetalBufferListEntry> buffer_in_use_list;
std::vector<MetalBufferListEntry> temp_buffers;
thread_mutex buffer_mutex;
size_t total_temp_mem_size = 0;
public:
MetalBufferPool() = default;
~MetalBufferPool();
id<MTLBuffer> get_buffer(id<MTLDevice> device,

View File

@ -123,53 +123,42 @@ id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
const void *pointer,
Stats &stats)
{
id<MTLBuffer> buffer;
id<MTLBuffer> buffer = nil;
MTLStorageMode storageMode = MTLStorageMode((options & MTLResourceStorageModeMask) >>
MTLResourceStorageModeShift);
MTLCPUCacheMode cpuCacheMode = MTLCPUCacheMode((options & MTLResourceCPUCacheModeMask) >>
MTLResourceCPUCacheModeShift);
buffer_mutex.lock();
for (auto entry = buffer_free_list.begin(); entry != buffer_free_list.end(); entry++) {
MetalBufferListEntry bufferEntry = *entry;
/* Check if buffer matches size and storage mode and is old enough to reuse */
if (bufferEntry.buffer.length == length && storageMode == bufferEntry.buffer.storageMode &&
cpuCacheMode == bufferEntry.buffer.cpuCacheMode)
{
buffer = bufferEntry.buffer;
buffer_free_list.erase(entry);
bufferEntry.command_buffer = command_buffer;
buffer_in_use_list.push_back(bufferEntry);
buffer_mutex.unlock();
/* Copy over data */
if (pointer) {
memcpy(buffer.contents, pointer, length);
if (bufferEntry.buffer.storageMode == MTLStorageModeManaged) {
[buffer didModifyRange:NSMakeRange(0, length)];
}
{
thread_scoped_lock lock(buffer_mutex);
/* Find an unused buffer with matching size and storage mode. */
for (MetalBufferListEntry &bufferEntry : temp_buffers) {
if (bufferEntry.buffer.length == length && storageMode == bufferEntry.buffer.storageMode &&
cpuCacheMode == bufferEntry.buffer.cpuCacheMode && bufferEntry.command_buffer == nil)
{
buffer = bufferEntry.buffer;
bufferEntry.command_buffer = command_buffer;
break;
}
return buffer;
}
if (!buffer) {
/* Create a new buffer and add it to the pool. Typically this pool will only grow to a
* handful of entries. */
buffer = [device newBufferWithLength:length options:options];
stats.mem_alloc(buffer.allocatedSize);
total_temp_mem_size += buffer.allocatedSize;
temp_buffers.push_back(MetalBufferListEntry{buffer, command_buffer});
}
}
// NSLog(@"Creating buffer of length %lu (%lu)", length, frameCount);
/* Copy over data */
if (pointer) {
buffer = [device newBufferWithBytes:pointer length:length options:options];
memcpy(buffer.contents, pointer, length);
if (buffer.storageMode == MTLStorageModeManaged) {
[buffer didModifyRange:NSMakeRange(0, length)];
}
}
else {
buffer = [device newBufferWithLength:length options:options];
}
MetalBufferListEntry buffer_entry(buffer, command_buffer);
stats.mem_alloc(buffer.allocatedSize);
total_temp_mem_size += buffer.allocatedSize;
buffer_in_use_list.push_back(buffer_entry);
buffer_mutex.unlock();
return buffer;
}
@ -178,16 +167,10 @@ void MetalBufferPool::process_command_buffer_completion(id<MTLCommandBuffer> com
{
assert(command_buffer);
thread_scoped_lock lock(buffer_mutex);
/* Release all buffers that have not been recently reused back into the free pool */
for (auto entry = buffer_in_use_list.begin(); entry != buffer_in_use_list.end();) {
MetalBufferListEntry buffer_entry = *entry;
/* Mark any temp buffers associated with command_buffer as unused. */
for (MetalBufferListEntry &buffer_entry : temp_buffers) {
if (buffer_entry.command_buffer == command_buffer) {
entry = buffer_in_use_list.erase(entry);
buffer_entry.command_buffer = nil;
buffer_free_list.push_back(buffer_entry);
}
else {
entry++;
}
}
}
@ -196,16 +179,12 @@ MetalBufferPool::~MetalBufferPool()
{
thread_scoped_lock lock(buffer_mutex);
/* Release all buffers that have not been recently reused */
for (auto entry = buffer_free_list.begin(); entry != buffer_free_list.end();) {
MetalBufferListEntry buffer_entry = *entry;
id<MTLBuffer> buffer = buffer_entry.buffer;
// NSLog(@"Releasing buffer of length %lu (%lu) (%lu outstanding)", buffer.length, frameCount,
// bufferFreeList.size());
total_temp_mem_size -= buffer.allocatedSize;
[buffer release];
entry = buffer_free_list.erase(entry);
for (MetalBufferListEntry &buffer_entry : temp_buffers) {
total_temp_mem_size -= buffer_entry.buffer.allocatedSize;
[buffer_entry.buffer release];
buffer_entry.buffer = nil;
}
temp_buffers.clear();
}
CCL_NAMESPACE_END

View File

@ -182,6 +182,7 @@ KERNEL_STRUCT_MEMBER(integrator, int, caustics_reflective)
KERNEL_STRUCT_MEMBER(integrator, int, caustics_refractive)
KERNEL_STRUCT_MEMBER(integrator, float, filter_glossy)
/* Seed. */
KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE
KERNEL_STRUCT_MEMBER(integrator, int, seed)
/* Clamp. */
KERNEL_STRUCT_MEMBER(integrator, float, sample_clamp_direct)

View File

@ -2,7 +2,7 @@
*
* SPDX-License-Identifier: Apache-2.0
*
* Adapted from Embree with with modifications. */
* Adapted from Embree with modifications. */
#pragma once

View File

@ -859,7 +859,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
probe_ray.self.prim = v.prim;
probe_ray.P = v.p;
/* Set view looking dir. */
/* Set view looking direction. */
wi = -wo;
wi_len = wo_len;
@ -888,7 +888,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true);
/* Set light looking dir. */
/* Set light looking direction. */
wo = (vi == vertex_count - 1) ? (light_fixed_direction ? ls->D : ls->P - v.p) :
vertices[vi + 1].p - v.p;
wo = normalize_len(wo, &wo_len);

View File

@ -989,11 +989,12 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
IntegratorState state,
ccl_private Ray *ccl_restrict ray,
const int object,
ccl_global float *ccl_restrict render_buffer)
{
ShaderData sd;
shader_setup_from_volume(kg, &sd, ray, object);
/* FIXME: `object` is used for light linking. We read the bottom of the stack for simplicity, but
* this does not work for overlapping volumes. */
shader_setup_from_volume(kg, &sd, ray, INTEGRATOR_STATE_ARRAY(state, volume_stack, 0, object));
/* Load random number state. */
RNGState rng_state;
@ -1186,8 +1187,7 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
volume_stack_clean(kg, state);
}
const VolumeIntegrateEvent event = volume_integrate(
kg, state, &ray, isect.object, render_buffer);
const VolumeIntegrateEvent event = volume_integrate(kg, state, &ray, render_buffer);
if (event == VOLUME_PATH_MISSED) {
/* End path. */
integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);

View File

@ -198,7 +198,7 @@ float compute_3d_gabor_standard_deviation(float frequency)
return sqrt(IMPULSES_COUNT * second_moment * integral_of_gabor_squared);
}
/* Computes the orientation of the Gabor kernel such that it is is constant for anisotropic
/* Computes the orientation of the Gabor kernel such that it is constant for anisotropic
* noise while it is random for isotropic noise. We randomize in spherical coordinates for a
* uniform distribution. */
vector3 compute_3d_orientation(vector3 orientation, float isotropy, vector4 seed)

View File

@ -195,7 +195,7 @@ ccl_device float compute_3d_gabor_standard_deviation(float frequency)
return sqrtf(IMPULSES_COUNT * second_moment * integral_of_gabor_squared);
}
/* Computes the orientation of the Gabor kernel such that it is is constant for anisotropic
/* Computes the orientation of the Gabor kernel such that it is constant for anisotropic
* noise while it is random for isotropic noise. We randomize in spherical coordinates for a
* uniform distribution. */
ccl_device float3 compute_3d_orientation(float3 orientation, float isotropy, float4 seed)

View File

@ -427,7 +427,7 @@ LightTreeNode *LightTree::build(Scene *scene, DeviceScene *dscene)
root_->light_link = root_->get_inner().children[left]->light_link +
root_->get_inner().children[right]->light_link;
/* Root nodes are never meant to be be shared, even if the local and distant lights are from the
/* Root nodes are never meant to be shared, even if the local and distant lights are from the
* same light linking set. Attempting to sharing it will make it so the specialized tree will
* try to use the same root as the default tree. */
root_->light_link.shareable = false;

View File

@ -1063,6 +1063,24 @@ void GHOST_XrGraphicsContextBindFuncs(GHOST_XrContextHandle xr_context,
*/
void GHOST_XrDrawViewFunc(GHOST_XrContextHandle xr_context, GHOST_XrDrawViewFn draw_view_fn);
/**
* Set the callback to check if passthrough is enabled.
* If enabled, the passthrough composition layer is added in GHOST_XrSession::draw().
*
* \param passthrough_enabled_fn: The callback to check if passthrough is enabled.
*/
void GHOST_XrPassthroughEnabledFunc(GHOST_XrContextHandle xr_context,
GHOST_XrPassthroughEnabledFn passthrough_enabled_fn);
/**
* Set the callback to force disable passthrough in case is not supported.
* Called in GHOST_XrSession::draw().
*
* \param disable_passthrough_fn: The callback to disable passthrough.
*/
void GHOST_XrDisablePassthroughFunc(GHOST_XrContextHandle xr_context,
GHOST_XrDisablePassthroughFn disable_passthrough_fn);
/* sessions */
/**
* Create internal session data for \a xr_context and ask the OpenXR runtime to invoke a session.

View File

@ -30,6 +30,8 @@ class GHOST_IXrContext {
virtual void setGraphicsContextBindFuncs(GHOST_XrGraphicsContextBindFn bind_fn,
GHOST_XrGraphicsContextUnbindFn unbind_fn) = 0;
virtual void setDrawViewFunc(GHOST_XrDrawViewFn draw_view_fn) = 0;
virtual void setPassthroughEnabledFunc(GHOST_XrPassthroughEnabledFn passthrough_enabled_fn) = 0;
virtual void setDisablePassthroughFunc(GHOST_XrDisablePassthroughFn disable_passthrough_fn) = 0;
virtual bool needsUpsideDownDrawing() const = 0;
};

View File

@ -25,9 +25,10 @@ extern GHOST_TSuccess GHOST_CreateSystemPaths();
extern GHOST_TSuccess GHOST_DisposeSystemPaths();
/**
* Determine the base dir in which shared resources are located. It will first try to use
* "unpack and run" path, then look for properly installed path, including versioning.
* \return Unsigned char string pointing to system dir (eg `/usr/share/blender/`).
* Determine the base directory in which shared resources are located.
* It will first try to use "unpack and run" path, then look for properly
* installed path, including versioning.
* \return Unsigned char string pointing to system directory (eg `/usr/share/blender/`).
*
* \note typically: `BKE_appdir_resource_path_id(BLENDER_RESOURCE_PATH_SYSTEM, false)` should be
* used instead of this function directly as it ensures environment variable overrides are used.
@ -35,8 +36,8 @@ extern GHOST_TSuccess GHOST_DisposeSystemPaths();
extern const char *GHOST_getSystemDir(int version, const char *versionstr);
/**
* Determine the base dir in which user configuration is stored, including versioning.
* \return Unsigned char string pointing to user dir (eg ~).
* Determine the base directory in which user configuration is stored, including versioning.
* \return Unsigned char string pointing to user directory (eg ~).
*
* \note typically: `BKE_appdir_resource_path_id(BLENDER_RESOURCE_PATH_USER, false)` should be
* used instead of this function directly as it ensures environment variable overrides are used.
@ -45,13 +46,13 @@ extern const char *GHOST_getUserDir(int version, const char *versionstr);
/**
* Determine a special ("well known") and easy to reach user directory.
* \return Unsigned char string pointing to user dir (eg `~/Documents/`).
* \return Unsigned char string pointing to user directory (eg `~/Documents/`).
*/
extern const char *GHOST_getUserSpecialDir(GHOST_TUserSpecialDirTypes type);
/**
* Determine the dir in which the binary file is found.
* \return Unsigned char string pointing to binary dir (eg ~/usr/local/bin/).
* Determine the directory in which the binary file is found.
* \return Unsigned char string pointing to binary directory (eg ~/usr/local/bin/).
*/
extern const char *GHOST_getBinaryDir();

View File

@ -785,6 +785,8 @@ typedef void (*GHOST_XrCustomdataFreeFn)(void *customdata);
typedef void *(*GHOST_XrGraphicsContextBindFn)(void);
typedef void (*GHOST_XrGraphicsContextUnbindFn)(GHOST_ContextHandle graphics_context);
typedef void (*GHOST_XrDrawViewFn)(const struct GHOST_XrDrawViewInfo *draw_view, void *customdata);
typedef bool (*GHOST_XrPassthroughEnabledFn)(void *customdata);
typedef void (*GHOST_XrDisablePassthroughFn)(void *customdata);
/**
* An array of #GHOST_TXrGraphicsBinding items defining the candidate bindings to use.

View File

@ -1046,6 +1046,20 @@ void GHOST_XrDrawViewFunc(GHOST_XrContextHandle xr_contexthandle, GHOST_XrDrawVi
GHOST_XR_CAPI_CALL(xr_context->setDrawViewFunc(draw_view_fn), xr_context);
}
void GHOST_XrPassthroughEnabledFunc(GHOST_XrContextHandle xr_contexthandle,
GHOST_XrPassthroughEnabledFn passthrough_enabled_fn)
{
GHOST_IXrContext *xr_context = (GHOST_IXrContext *)xr_contexthandle;
GHOST_XR_CAPI_CALL(xr_context->setPassthroughEnabledFunc(passthrough_enabled_fn), xr_context);
}
void GHOST_XrDisablePassthroughFunc(GHOST_XrContextHandle xr_contexthandle,
GHOST_XrDisablePassthroughFn disable_passthrough_fn)
{
GHOST_IXrContext *xr_context = (GHOST_IXrContext *)xr_contexthandle;
GHOST_XR_CAPI_CALL(xr_context->setDisablePassthroughFunc(disable_passthrough_fn), xr_context);
}
int GHOST_XrSessionNeedsUpsideDownDrawing(const GHOST_XrContextHandle xr_contexthandle)
{
const GHOST_IXrContext *xr_context = (const GHOST_IXrContext *)xr_contexthandle;

View File

@ -472,7 +472,27 @@ GHOST_TSuccess GHOST_ContextEGL::initializeDrawingContext()
}
if (m_nativeWindow != 0) {
m_surface = ::eglCreateWindowSurface(m_display, m_config, m_nativeWindow, nullptr);
std::vector<EGLint> surface_attrib_list;
surface_attrib_list.reserve(3);
#ifdef WITH_GHOST_WAYLAND
/* Fix transparency issue on: `Wayland + Nouveau/Zink+NVK`. Due to unsupported texture formats
* drivers can hit transparency code-paths resulting in showing the desktop in viewports.
*
* See #102994. */
/* EGL_EXT_present_opaque isn't added to the latest release of epoxy, but is part of the latest
* EGL https://github.com/KhronosGroup/EGL-Registry/blob/main/api/egl.xml */
if (epoxy_has_egl_extension(m_display, "EGL_EXT_present_opaque")) {
# ifndef EGL_PRESENT_OPAQUE_EXT
# define EGL_PRESENT_OPAQUE_EXT 0x31DF
# endif
surface_attrib_list.push_back(EGL_PRESENT_OPAQUE_EXT);
surface_attrib_list.push_back(EGL_TRUE);
}
#endif
surface_attrib_list.push_back(EGL_NONE);
m_surface = ::eglCreateWindowSurface(
m_display, m_config, m_nativeWindow, surface_attrib_list.data());
m_surface_from_native_window = true;
}
else {

View File

@ -3394,7 +3394,7 @@ static void data_device_handle_drop(void *data, wl_data_device * /*wl_data_devic
std::lock_guard lock{seat->data_offer_dnd_mutex};
/* No need to check this for null (as other callbacks do).
* because the the data-offer has not been accepted (actions set... etc). */
* because the data-offer has not been accepted (actions set... etc). */
GWL_DataOffer *data_offer = seat->data_offer_dnd;
/* Use a blank string for `mime_receive` to prevent crashes, although could also be `nullptr`.
@ -4871,7 +4871,7 @@ static void keyboard_handle_keymap(void *data,
CLOG_INFO(LOG, 2, "keymap");
/* Reset in case there was a previous non-zero active layout for the the last key-map.
/* Reset in case there was a previous non-zero active layout for the last key-map.
* Note that this is set later by `wl_keyboard_listener::modifiers`, it's possible that handling
* the first modifier will run #xkb_state_update_mask again (if the active layout is non-zero)
* however as this is only done when the layout changed, it's harmless.

View File

@ -2605,7 +2605,7 @@ bool GHOST_WindowWayland::outputs_changed_update_scale()
* each with different fractional scale, see: #109194.
*
* Note that the window will show larger, then resize to be smaller soon
* after opening. This would be nice to avoid but but would require DPI
* after opening. This would be nice to avoid but would require DPI
* to be stored in the window (as noted above). */
int size_next[2] = {0, 0};
int size_orig[2] = {0, 0};

View File

@ -430,6 +430,9 @@ void GHOST_XrContext::getExtensionsToEnable(
/* Varjo foveated extension. */
try_ext.push_back(XR_VARJO_FOVEATED_RENDERING_EXTENSION_NAME);
/* Meta/Facebook passthrough extension. */
try_ext.push_back(XR_FB_PASSTHROUGH_EXTENSION_NAME);
r_ext_names.reserve(try_ext.size() + graphics_binding_types.size());
/* Add graphics binding extensions (may be multiple ones, we'll settle for one to use later, once
@ -593,6 +596,18 @@ void GHOST_XrContext::setDrawViewFunc(GHOST_XrDrawViewFn draw_view_fn)
m_custom_funcs.draw_view_fn = draw_view_fn;
}
void GHOST_XrContext::setPassthroughEnabledFunc(
GHOST_XrPassthroughEnabledFn passthrough_enabled_fn)
{