MTLCommandBufferState for coordinating GPU workload submission and render pass coordination.

MTLFrameBuffer has been implemented to support creation of RenderCommandEncoders, along with supporting functionality in the Metal Context.

Optimisation stubs for GPU_framebuffer_bind_ext has been added, which enables specific assignment of attachment load-store ops at the bind level, rather than on a framebuffer object as a whole.

Begin and end frame markers are used to encapsulate frame boundaries for explicit workload submission. This is required for explicit APIs where implicit flushing of work does not occur.

Ref T96261

Reviewed By: fclem

Maniphest Tasks: T96261

Differential Revision: https://developer.blender.org/D15027
This commit is contained in:
Jason Fielder
2022-06-27 11:41:04 +02:00
committed by Clément Foucault
parent 7b6b740ace
commit 9130a60d3d
28 changed files with 3869 additions and 732 deletions

View File

@@ -188,7 +188,9 @@ set(OPENGL_SRC
set(METAL_SRC
metal/mtl_backend.mm
metal/mtl_context.mm
metal/mtl_command_buffer.mm
metal/mtl_debug.mm
metal/mtl_framebuffer.mm
metal/mtl_state.mm
metal/mtl_texture.mm
metal/mtl_texture_util.mm
@@ -198,6 +200,7 @@ set(METAL_SRC
metal/mtl_common.hh
metal/mtl_context.hh
metal/mtl_debug.hh
metal/mtl_framebuffer.hh
metal/mtl_state.hh
metal/mtl_texture.hh
)

View File

@@ -8,6 +8,14 @@
extern "C" {
#endif
typedef enum eGPULoadOp {
GPU_LOADACTION_CLEAR = 0,
GPU_LOADACTION_LOAD,
GPU_LOADACTION_DONT_CARE
} eGPULoadOp;
typedef enum eGPUStoreOp { GPU_STOREACTION_STORE = 0, GPU_STOREACTION_DONT_CARE } eGPUStoreOp;
typedef enum eGPUFrontFace {
GPU_CLOCKWISE,
GPU_COUNTERCLOCKWISE,

View File

@@ -38,6 +38,13 @@ void GPU_context_discard(GPUContext *);
void GPU_context_active_set(GPUContext *);
GPUContext *GPU_context_active_get(void);
/* Begin and end frame are used to mark the singular boundary representing the lifetime of a whole
* frame. This also acts as a divisor for ensuring workload submission and flushing, especially for
* background rendering when there is no call to present.
* This is required by explicit-API's where there is no implicit workload flushing. */
void GPU_context_begin_frame(GPUContext *ctx);
void GPU_context_end_frame(GPUContext *ctx);
/* Legacy GPU (Intel HD4000 series) do not support sharing GPU objects between GPU
* contexts. EEVEE/Workbench can create different contexts for image/preview rendering, baking or
* compiling. When a legacy GPU is detected (`GPU_use_main_context_workaround()`) any worker

View File

@@ -14,6 +14,7 @@
#pragma once
#include "GPU_common_types.h"
#include "GPU_texture.h"
typedef enum eGPUFrameBufferBits {
@@ -52,6 +53,44 @@ void GPU_framebuffer_bind(GPUFrameBuffer *fb);
void GPU_framebuffer_bind_no_srgb(GPUFrameBuffer *fb);
void GPU_framebuffer_restore(void);
/* Advanced binding control. */
typedef struct GPULoadStore {
eGPULoadOp load_action;
eGPUStoreOp store_action;
} GPULoadStore;
#define NULL_LOAD_STORE \
{ \
GPU_LOADACTION_DONT_CARE, GPU_STOREACTION_DONT_CARE \
}
/* Load store config array (load_store_actions) matches attachment structure of
* GPU_framebuffer_config_array. This allows us to explicitly specify whether attachment data needs
* to be loaded and stored on a per-attachment basis. This enables a number of bandwidth
* optimisations:
* - No need to load contents if subsequent work is over-writing every pixel.
* - No need to store attachments whose contents are not used beyond this pass e.g. depth buffer.
* - State can be customised at bind-time rather than applying to the framebuffer object as a
* whole.
*
* Example:
* \code{.c}
* GPU_framebuffer_bind_loadstore(&fb, {
* {GPU_LOADACTION_LOAD, GPU_STOREACTION_DONT_CARE} // must be depth buffer
* {GPU_LOADACTION_LOAD, GPU_STOREACTION_STORE}, // Colour attachment 0
* {GPU_LOADACTION_DONT_CARE, GPU_STOREACTION_STORE}, // Colour attachment 1
* {GPU_LOADACTION_DONT_CARE, GPU_STOREACTION_STORE} // Colour attachment 2
* })
* \encode
*/
void GPU_framebuffer_bind_loadstore(GPUFrameBuffer *fb,
const GPULoadStore *load_store_actions,
uint actions_len);
#define GPU_framebuffer_bind_ex(_fb, ...) \
{ \
GPULoadStore actions[] = __VA_ARGS__; \
GPU_framebuffer_bind_loadstore(_fb, actions, (sizeof(actions) / sizeof(GPULoadStore))); \
}
bool GPU_framebuffer_bound(GPUFrameBuffer *fb);
bool GPU_framebuffer_check_valid(GPUFrameBuffer *fb, char err_out[256]);

View File

@@ -123,6 +123,22 @@ GPUContext *GPU_context_active_get()
return wrap(Context::get());
}
void GPU_context_begin_frame(GPUContext *ctx)
{
blender::gpu::Context *_ctx = unwrap(ctx);
if (_ctx) {
_ctx->begin_frame();
}
}
void GPU_context_end_frame(GPUContext *ctx)
{
blender::gpu::Context *_ctx = unwrap(ctx);
if (_ctx) {
_ctx->end_frame();
}
}
/* -------------------------------------------------------------------- */
/** \name Main context global mutex
*

View File

@@ -63,6 +63,8 @@ class Context {
virtual void activate() = 0;
virtual void deactivate() = 0;
virtual void begin_frame() = 0;
virtual void end_frame() = 0;
/* Will push all pending commands to the GPU. */
virtual void flush() = 0;

View File

@@ -124,6 +124,43 @@ void FrameBuffer::attachment_remove(GPUAttachmentType type)
dirty_attachments_ = true;
}
void FrameBuffer::load_store_config_array(const GPULoadStore *load_store_actions, uint actions_len)
{
/* Follows attachment structure of GPU_framebuffer_config_array/GPU_framebuffer_ensure_config */
const GPULoadStore &depth_action = load_store_actions[0];
Span<GPULoadStore> color_attachments(load_store_actions + 1, actions_len - 1);
if (this->attachments_[GPU_FB_DEPTH_STENCIL_ATTACHMENT].tex) {
this->attachment_set_loadstore_op(
GPU_FB_DEPTH_STENCIL_ATTACHMENT, depth_action.load_action, depth_action.store_action);
}
if (this->attachments_[GPU_FB_DEPTH_ATTACHMENT].tex) {
this->attachment_set_loadstore_op(
GPU_FB_DEPTH_ATTACHMENT, depth_action.load_action, depth_action.store_action);
}
GPUAttachmentType type = GPU_FB_COLOR_ATTACHMENT0;
for (const GPULoadStore &actions : color_attachments) {
if (this->attachments_[type].tex) {
this->attachment_set_loadstore_op(type, actions.load_action, actions.store_action);
}
++type;
}
}
unsigned int FrameBuffer::get_bits_per_pixel(void)
{
unsigned int total_bits = 0;
for (GPUAttachment &attachment : attachments_) {
Texture *tex = reinterpret_cast<Texture *>(attachment.tex);
if (tex != nullptr) {
int bits = to_bytesize(tex->format_get()) * to_component_len(tex->format_get());
total_bits += bits;
}
}
return total_bits;
}
void FrameBuffer::recursive_downsample(int max_lvl,
void (*callback)(void *userData, int level),
void *userData)
@@ -149,10 +186,21 @@ void FrameBuffer::recursive_downsample(int max_lvl,
attachment.mip = mip_lvl;
}
}
/* Update the internal attachments and viewport size. */
dirty_attachments_ = true;
this->bind(true);
/* Optimise load-store state. */
GPUAttachmentType type = GPU_FB_DEPTH_ATTACHMENT;
for (GPUAttachment &attachment : attachments_) {
Texture *tex = reinterpret_cast<Texture *>(attachment.tex);
if (tex != nullptr) {
this->attachment_set_loadstore_op(type, GPU_LOADACTION_DONT_CARE, GPU_STOREACTION_STORE);
}
++type;
}
callback(userData, mip_lvl);
}
@@ -198,6 +246,18 @@ void GPU_framebuffer_bind(GPUFrameBuffer *gpu_fb)
unwrap(gpu_fb)->bind(enable_srgb);
}
void GPU_framebuffer_bind_loadstore(GPUFrameBuffer *gpu_fb,
const GPULoadStore *load_store_actions,
uint actions_len)
{
/* Bind */
GPU_framebuffer_bind(gpu_fb);
/* Update load store */
FrameBuffer *fb = unwrap(gpu_fb);
fb->load_store_config_array(load_store_actions, actions_len);
}
void GPU_framebuffer_bind_no_srgb(GPUFrameBuffer *gpu_fb)
{
const bool enable_srgb = false;

View File

@@ -114,6 +114,10 @@ class FrameBuffer {
eGPUDataFormat data_format,
const void *clear_value) = 0;
virtual void attachment_set_loadstore_op(GPUAttachmentType type,
eGPULoadOp load_action,
eGPUStoreOp store_action) = 0;
virtual void read(eGPUFrameBufferBits planes,
eGPUDataFormat format,
const int area[4],
@@ -128,12 +132,15 @@ class FrameBuffer {
int dst_offset_x,
int dst_offset_y) = 0;
void load_store_config_array(const GPULoadStore *load_store_actions, uint actions_len);
void attachment_set(GPUAttachmentType type, const GPUAttachment &new_attachment);
void attachment_remove(GPUAttachmentType type);
void recursive_downsample(int max_lvl,
void (*callback)(void *userData, int level),
void *userData);
uint get_bits_per_pixel();
inline void size_set(int width, int height)
{

View File

@@ -35,19 +35,19 @@ class MTLBackend : public GPUBackend {
return MTLBackend::capabilities;
}
inline ~MTLBackend()
~MTLBackend()
{
MTLBackend::platform_exit();
}
static bool metal_is_supported();
inline static MTLBackend *get()
static MTLBackend *get()
{
return static_cast<MTLBackend *>(GPUBackend::get());
}
void samplers_update() override;
inline void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) override
void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) override
{
/* Placeholder */
}

View File

@@ -9,6 +9,7 @@
#include "gpu_backend.hh"
#include "mtl_backend.hh"
#include "mtl_context.hh"
#include "mtl_framebuffer.hh"
#include "gpu_capabilities_private.hh"
#include "gpu_platform_private.hh"
@@ -50,8 +51,9 @@ DrawList *MTLBackend::drawlist_alloc(int list_length)
FrameBuffer *MTLBackend::framebuffer_alloc(const char *name)
{
/* TODO(Metal): Implement MTLFrameBuffer. */
return nullptr;
MTLContext *mtl_context = static_cast<MTLContext *>(
reinterpret_cast<Context *>(GPU_context_active_get()));
return new MTLFrameBuffer(mtl_context, name);
};
IndexBuf *MTLBackend::indexbuf_alloc()
@@ -380,11 +382,10 @@ void MTLBackend::capabilities_init(MTLContext *ctx)
/* In Metal, total_thread_count is 512 or 1024, such that
* threadgroup `width*height*depth <= total_thread_count` */
unsigned int max_threads_per_threadgroup_per_dim =
([device supportsFamily:MTLGPUFamilyApple4] ||
MTLBackend::capabilities.supports_family_mac1) ?
1024 :
512;
uint max_threads_per_threadgroup_per_dim = ([device supportsFamily:MTLGPUFamilyApple4] ||
MTLBackend::capabilities.supports_family_mac1) ?
1024 :
512;
GCaps.max_work_group_size[0] = max_threads_per_threadgroup_per_dim;
GCaps.max_work_group_size[1] = max_threads_per_threadgroup_per_dim;
GCaps.max_work_group_size[2] = max_threads_per_threadgroup_per_dim;

View File

@@ -19,7 +19,7 @@ namespace gpu {
#define MTL_MAX_UNIFORMS_PER_BLOCK 64
/* Context-specific limits -- populated in 'MTLBackend::platform_init' */
typedef struct MTLCapabilities {
struct MTLCapabilities {
/* Variable Limits & feature sets. */
int max_color_render_targets = 4; /* Minimum = 4 */
@@ -40,8 +40,7 @@ typedef struct MTLCapabilities {
bool supports_family_mac2 = false;
bool supports_family_mac_catalyst1 = false;
bool supports_family_mac_catalyst2 = false;
} MTLCapabilities;
};
} // namespace gpu
} // namespace blender

View File

@@ -0,0 +1,635 @@
#include "DNA_userdef_types.h"
#include "mtl_backend.hh"
#include "mtl_common.hh"
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "mtl_framebuffer.hh"
#include <fstream>
using namespace blender;
using namespace blender::gpu;
namespace blender::gpu {
/* Global sync event used across MTLContext's.
* This resolves flickering artifacts from command buffer
* dependencies not being honoured for work submitted between
* different GPUContext's. */
id<MTLEvent> MTLCommandBufferManager::sync_event = nil;
unsigned long long MTLCommandBufferManager::event_signal_val = 0;
/* Counter for active comand buffers. */
int MTLCommandBufferManager::num_active_cmd_bufs = 0;
/* -------------------------------------------------------------------- */
/** \name MTLCommandBuffer initialisation and render coordination.
* \{ */
void MTLCommandBufferManager::prepare(MTLContext *ctx, bool supports_render)
{
context_ = ctx;
render_pass_state_.prepare(this, ctx);
}
void MTLCommandBufferManager::register_encoder_counters()
{
encoder_count_++;
empty_ = false;
}
id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin()
{
if (active_command_buffer_ == nil) {
/* Verify number of active command buffers is below limit.
* Exceeding this limit will mean we either have a leak/GPU hang
* or we should increase the command buffer limit during MTLQueue creation */
BLI_assert(MTLCommandBufferManager::num_active_cmd_bufs < MTL_MAX_COMMAND_BUFFERS);
if (G.debug & G_DEBUG_GPU) {
/* Debug: Enable Advanced Errors for GPU work execution. */
MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init];
desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
desc.retainedReferences = YES;
active_command_buffer_ = [context_->queue commandBufferWithDescriptor:desc];
}
else {
active_command_buffer_ = [context_->queue commandBuffer];
}
[active_command_buffer_ retain];
MTLCommandBufferManager::num_active_cmd_bufs++;
/* Ensure command buffers execute in submission order across multiple MTLContext's. */
if (this->sync_event != nil) {
[active_command_buffer_ encodeWaitForEvent:this->sync_event value:this->event_signal_val];
}
/* Reset Command buffer heuristics. */
this->reset_counters();
}
BLI_assert(active_command_buffer_ != nil);
return active_command_buffer_;
}
/* If wait is true, CPU will stall until GPU work has completed. */
bool MTLCommandBufferManager::submit(bool wait)
{
/* Skip submission if command buffer is empty. */
if (empty_ || active_command_buffer_ == nil) {
return false;
}
/* Ensure current encoders are finished. */
this->end_active_command_encoder();
BLI_assert(active_command_encoder_type_ == MTL_NO_COMMAND_ENCODER);
/*** Submit Command Buffer. ***/
/* Strict ordering ensures command buffers are guaranteed to execute after a previous
* one has completed. Resolves flickering when command buffers are submitted from
* different MTLContext's. */
if (MTLCommandBufferManager::sync_event == nil) {
MTLCommandBufferManager::sync_event = [context_->device newEvent];
BLI_assert(MTLCommandBufferManager::sync_event);
[MTLCommandBufferManager::sync_event retain];
}
BLI_assert(MTLCommandBufferManager::sync_event != nil);
MTLCommandBufferManager::event_signal_val++;
[active_command_buffer_ encodeSignalEvent:MTLCommandBufferManager::sync_event
value:MTLCommandBufferManager::event_signal_val];
/* Command buffer lifetime tracking. */
/* TODO(Metal): This routine will later be used to track released memory allocations within the
* lifetime of a command buffer such that memory is only released once no longer in use. */
id<MTLCommandBuffer> cmd_buffer_ref = [active_command_buffer_ retain];
[cmd_buffer_ref addCompletedHandler:^(id<MTLCommandBuffer> cb) {
/* Release command buffer after completion callback handled. */
[cmd_buffer_ref release];
/* Decrement active cmd buffer count. */
MTLCommandBufferManager::num_active_cmd_bufs--;
}];
/* Submit command buffer to GPU. */
[active_command_buffer_ commit];
if (wait || (G.debug & G_DEBUG_GPU)) {
/* Wait until current GPU work has finished executing. */
[active_command_buffer_ waitUntilCompleted];
/* Command buffer execution debugging can return an error message if
* execution has failed or encoutered GPU-side errors. */
if (G.debug & G_DEBUG_GPU) {
NSError *error = [active_command_buffer_ error];
if (error != nil) {
NSLog(@"%@", error);
BLI_assert(false);
@autoreleasepool {
const char *stringAsChar = [[NSString stringWithFormat:@"%@", error] UTF8String];
std::ofstream outfile;
outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
outfile << stringAsChar;
outfile.close();
}
}
}
}
/* Release previous frames command buffer and reset active cmd buffer. */
if (last_submitted_command_buffer_ != nil) {
BLI_assert(MTLBackend::get()->is_inside_render_boundary());
[last_submitted_command_buffer_ autorelease];
last_submitted_command_buffer_ = nil;
}
last_submitted_command_buffer_ = active_command_buffer_;
active_command_buffer_ = nil;
return true;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Render Command Encoder Utility and management functions.
* \{ */
/* Fetch/query current encoder. */
bool MTLCommandBufferManager::is_inside_render_pass()
{
return (active_command_encoder_type_ == MTL_RENDER_COMMAND_ENCODER);
}
bool MTLCommandBufferManager::is_inside_blit()
{
return (active_command_encoder_type_ == MTL_BLIT_COMMAND_ENCODER);
}
bool MTLCommandBufferManager::is_inside_compute()
{
return (active_command_encoder_type_ == MTL_COMPUTE_COMMAND_ENCODER);
}
id<MTLRenderCommandEncoder> MTLCommandBufferManager::get_active_render_command_encoder()
{
/* Calling code should check if inside render pass. Otherwise nil. */
return active_render_command_encoder_;
}
id<MTLBlitCommandEncoder> MTLCommandBufferManager::get_active_blit_command_encoder()
{
/* Calling code should check if inside render pass. Otherwise nil. */
return active_blit_command_encoder_;
}
id<MTLComputeCommandEncoder> MTLCommandBufferManager::get_active_compute_command_encoder()
{
/* Calling code should check if inside render pass. Otherwise nil. */
return active_compute_command_encoder_;
}
MTLFrameBuffer *MTLCommandBufferManager::get_active_framebuffer()
{
/* If outside of RenderPass, nullptr will be returned. */
if (this->is_inside_render_pass()) {
return active_frame_buffer_;
}
return nullptr;
}
/* Encoder and Pass management. */
/* End currently active MTLCommandEncoder. */
bool MTLCommandBufferManager::end_active_command_encoder()
{
/* End active encoder if one is active. */
if (active_command_encoder_type_ != MTL_NO_COMMAND_ENCODER) {
switch (active_command_encoder_type_) {
case MTL_RENDER_COMMAND_ENCODER: {
/* Verify a RenderCommandEncoder is active and end. */
BLI_assert(active_render_command_encoder_ != nil);
/* Complete Encoding. */
[active_render_command_encoder_ endEncoding];
[active_render_command_encoder_ release];
active_render_command_encoder_ = nil;
active_command_encoder_type_ = MTL_NO_COMMAND_ENCODER;
/* Reset associated framebuffer flag. */
active_frame_buffer_ = nullptr;
active_pass_descriptor_ = nullptr;
return true;
}
case MTL_BLIT_COMMAND_ENCODER: {
/* Verify a RenderCommandEncoder is active and end. */
BLI_assert(active_blit_command_encoder_ != nil);
[active_blit_command_encoder_ endEncoding];
[active_blit_command_encoder_ release];
active_blit_command_encoder_ = nil;
active_command_encoder_type_ = MTL_NO_COMMAND_ENCODER;
return true;
}
case MTL_COMPUTE_COMMAND_ENCODER: {
/* Verify a RenderCommandEncoder is active and end. */
BLI_assert(active_compute_command_encoder_ != nil);
[active_compute_command_encoder_ endEncoding];
[active_compute_command_encoder_ release];
active_compute_command_encoder_ = nil;
active_command_encoder_type_ = MTL_NO_COMMAND_ENCODER;
return true;
}
default: {
BLI_assert(false && "Invalid command encoder type");
return false;
}
};
}
else {
/* MTL_NO_COMMAND_ENCODER. */
BLI_assert(active_render_command_encoder_ == nil);
BLI_assert(active_blit_command_encoder_ == nil);
BLI_assert(active_compute_command_encoder_ == nil);
return false;
}
}
id<MTLRenderCommandEncoder> MTLCommandBufferManager::ensure_begin_render_command_encoder(
MTLFrameBuffer *ctx_framebuffer, bool force_begin, bool *new_pass)
{
/* Ensure valid framebuffer. */
BLI_assert(ctx_framebuffer != nullptr);
/* Ensure active command buffer. */
id<MTLCommandBuffer> cmd_buf = this->ensure_begin();
BLI_assert(cmd_buf);
/* Begin new command encoder if the currently active one is
* incompatible or requires updating. */
if (active_command_encoder_type_ != MTL_RENDER_COMMAND_ENCODER ||
active_frame_buffer_ != ctx_framebuffer || force_begin) {
this->end_active_command_encoder();
/* Determine if this is a re-bind of the same framebuffer. */
bool is_rebind = (active_frame_buffer_ == ctx_framebuffer);
/* Generate RenderPassDescriptor from bound framebuffer. */
BLI_assert(ctx_framebuffer);
active_frame_buffer_ = ctx_framebuffer;
active_pass_descriptor_ = active_frame_buffer_->bake_render_pass_descriptor(
is_rebind && (!active_frame_buffer_->get_pending_clear()));
/* Ensure we have already cleaned up our previous render command encoder. */
BLI_assert(active_render_command_encoder_ == nil);
/* Create new RenderCommandEncoder based on descriptor (and begin encoding). */
active_render_command_encoder_ = [cmd_buf
renderCommandEncoderWithDescriptor:active_pass_descriptor_];
[active_render_command_encoder_ retain];
active_command_encoder_type_ = MTL_RENDER_COMMAND_ENCODER;
/* Update command buffer encoder heuristics. */
this->register_encoder_counters();
/* Apply initial state. */
/* Update Viewport and Scissor State */
active_frame_buffer_->apply_state();
/* FLAG FRAMEBUFFER AS CLEARED -- A clear only lasts as long as one has been specified.
* After this, resets to Load attachments to parallel GL behaviour. */
active_frame_buffer_->mark_cleared();
/* Reset RenderPassState to ensure resource bindings are re-applied. */
render_pass_state_.reset_state();
/* Return true as new pass started. */
*new_pass = true;
}
else {
/* No new pass. */
*new_pass = false;
}
BLI_assert(active_render_command_encoder_ != nil);
return active_render_command_encoder_;
}
id<MTLBlitCommandEncoder> MTLCommandBufferManager::ensure_begin_blit_encoder()
{
/* Ensure active command buffer. */
id<MTLCommandBuffer> cmd_buf = this->ensure_begin();
BLI_assert(cmd_buf);
/* Ensure no existing command encoder of a different type is active. */
if (active_command_encoder_type_ != MTL_BLIT_COMMAND_ENCODER) {
this->end_active_command_encoder();
}
/* Begin new Blit Encoder. */
if (active_blit_command_encoder_ == nil) {
active_blit_command_encoder_ = [cmd_buf blitCommandEncoder];
BLI_assert(active_blit_command_encoder_ != nil);
[active_blit_command_encoder_ retain];
active_command_encoder_type_ = MTL_BLIT_COMMAND_ENCODER;
/* Update command buffer encoder heuristics. */
this->register_encoder_counters();
}
BLI_assert(active_blit_command_encoder_ != nil);
return active_blit_command_encoder_;
}
id<MTLComputeCommandEncoder> MTLCommandBufferManager::ensure_begin_compute_encoder()
{
/* Ensure active command buffer. */
id<MTLCommandBuffer> cmd_buf = this->ensure_begin();
BLI_assert(cmd_buf);
/* Ensure no existing command encoder of a different type is active. */
if (active_command_encoder_type_ != MTL_COMPUTE_COMMAND_ENCODER) {
this->end_active_command_encoder();
}
/* Begin new Compute Encoder. */
if (active_compute_command_encoder_ == nil) {
active_compute_command_encoder_ = [cmd_buf computeCommandEncoder];
BLI_assert(active_compute_command_encoder_ != nil);
[active_compute_command_encoder_ retain];
active_command_encoder_type_ = MTL_COMPUTE_COMMAND_ENCODER;
/* Update command buffer encoder heuristics. */
this->register_encoder_counters();
}
BLI_assert(active_compute_command_encoder_ != nil);
return active_compute_command_encoder_;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Command buffer heuristics.
* \{ */
/* Rendering Heuristics. */
void MTLCommandBufferManager::register_draw_counters(int vertex_submission)
{
current_draw_call_count_++;
vertex_submitted_count_ += vertex_submission;
empty_ = false;
}
/* Reset workload counters. */
void MTLCommandBufferManager::reset_counters()
{
empty_ = true;
current_draw_call_count_ = 0;
encoder_count_ = 0;
vertex_submitted_count_ = 0;
}
/* Workload evaluation. */
bool MTLCommandBufferManager::do_break_submission()
{
/* Skip if no active command buffer. */
if (active_command_buffer_ == nil) {
return false;
}
/* Use optimised heuristic to split heavy command buffer submissions to better saturate the
* hardware and also reduce stalling from individual large submissions. */
if (GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_ANY, GPU_DRIVER_ANY) ||
GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_ANY, GPU_DRIVER_ANY)) {
return ((current_draw_call_count_ > 30000) || (vertex_submitted_count_ > 100000000) ||
(encoder_count_ > 25));
}
else {
/* Apple Silicon is less efficient if splitting submissions. */
return false;
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Command buffer debugging.
* \{ */
/* Debug. */
void MTLCommandBufferManager::push_debug_group(const char *name, int index)
{
id<MTLCommandBuffer> cmd = this->ensure_begin();
if (cmd != nil) {
[cmd pushDebugGroup:[NSString stringWithFormat:@"%s_%d", name, index]];
}
}
void MTLCommandBufferManager::pop_debug_group()
{
id<MTLCommandBuffer> cmd = this->ensure_begin();
if (cmd != nil) {
[cmd popDebugGroup];
}
}
/* Workload Synchronisation. */
bool MTLCommandBufferManager::insert_memory_barrier(eGPUBarrier barrier_bits,
eGPUStageBarrierBits before_stages,
eGPUStageBarrierBits after_stages)
{
/* Only supporting Metal on 10.14 onwards anyway - Check required for warnings. */
if (@available(macOS 10.14, *)) {
/* Resolve scope. */
MTLBarrierScope scope = 0;
if (barrier_bits & GPU_BARRIER_SHADER_IMAGE_ACCESS ||
barrier_bits & GPU_BARRIER_TEXTURE_FETCH) {
scope = scope | MTLBarrierScopeTextures | MTLBarrierScopeRenderTargets;
}
if (barrier_bits & GPU_BARRIER_SHADER_STORAGE ||
barrier_bits & GPU_BARRIER_VERTEX_ATTRIB_ARRAY ||
barrier_bits & GPU_BARRIER_ELEMENT_ARRAY) {
scope = scope | MTLBarrierScopeBuffers;
}
if (scope != 0) {
/* Issue barrier based on encoder. */
switch (active_command_encoder_type_) {
case MTL_NO_COMMAND_ENCODER:
case MTL_BLIT_COMMAND_ENCODER: {
/* No barrier to be inserted. */
return false;
}
/* Rendering. */
case MTL_RENDER_COMMAND_ENCODER: {
/* Currently flagging both stages -- can use bits above to filter on stage type --
* though full barrier is safe for now*/
MTLRenderStages before_stage_flags = 0;
MTLRenderStages after_stage_flags = 0;
if (before_stages & GPU_BARRIER_STAGE_VERTEX &&
!(before_stages & GPU_BARRIER_STAGE_FRAGMENT)) {
before_stage_flags = before_stage_flags | MTLRenderStageVertex;
}
if (before_stages & GPU_BARRIER_STAGE_FRAGMENT) {
before_stage_flags = before_stage_flags | MTLRenderStageFragment;
}
if (after_stages & GPU_BARRIER_STAGE_VERTEX) {
after_stage_flags = after_stage_flags | MTLRenderStageVertex;
}
if (after_stages & GPU_BARRIER_STAGE_FRAGMENT) {
after_stage_flags = MTLRenderStageFragment;
}
id<MTLRenderCommandEncoder> rec = this->get_active_render_command_encoder();
BLI_assert(rec != nil);
[rec memoryBarrierWithScope:scope
afterStages:after_stage_flags
beforeStages:before_stage_flags];
return true;
}
/* Compute. */
case MTL_COMPUTE_COMMAND_ENCODER: {
id<MTLComputeCommandEncoder> rec = this->get_active_compute_command_encoder();
BLI_assert(rec != nil);
[rec memoryBarrierWithScope:scope];
return true;
}
}
}
}
/* No barrier support. */
return false;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Render Pass State for active RenderCommandEncoder
* \{ */
/* Metal Render Pass State. */
void MTLRenderPassState::prepare(MTLCommandBufferManager *cmd, MTLContext *mtl_ctx)
{
this->cmd = cmd;
this->ctx = mtl_ctx;
this->reset_state();
}
/* Reset binding state when a new RenderCommandEncoder is bound, to ensure
* pipeline resources are re-applied to the new Encoder.
* Note: In Metal, state is only persistent within an MTLCommandEncoder,
* not globally. */
void MTLRenderPassState::reset_state()
{
/* Reset Cached pipeline state. */
this->bound_pso = nil;
this->bound_ds_state = nil;
/* Clear shader binding. */
this->last_bound_shader_state.set(nullptr, 0);
/* Other states. */
MTLFrameBuffer *fb = this->cmd->get_active_framebuffer();
this->last_used_stencil_ref_value = 0;
this->last_scissor_rect = {0,
0,
(unsigned long)((fb != nullptr) ? fb->get_width() : 0),
(unsigned long)((fb != nullptr) ? fb->get_height() : 0)};
/* Reset cached resource binding state */
for (int ubo = 0; ubo < MTL_MAX_UNIFORM_BUFFER_BINDINGS; ubo++) {
this->cached_vertex_buffer_bindings[ubo].is_bytes = false;
this->cached_vertex_buffer_bindings[ubo].metal_buffer = nil;
this->cached_vertex_buffer_bindings[ubo].offset = -1;
this->cached_fragment_buffer_bindings[ubo].is_bytes = false;
this->cached_fragment_buffer_bindings[ubo].metal_buffer = nil;
this->cached_fragment_buffer_bindings[ubo].offset = -1;
}
/* Reset cached texture and sampler state binding state. */
for (int tex = 0; tex < MTL_MAX_TEXTURE_SLOTS; tex++) {
this->cached_vertex_texture_bindings[tex].metal_texture = nil;
this->cached_vertex_sampler_state_bindings[tex].sampler_state = nil;
this->cached_vertex_sampler_state_bindings[tex].is_arg_buffer_binding = false;
this->cached_fragment_texture_bindings[tex].metal_texture = nil;
this->cached_fragment_sampler_state_bindings[tex].sampler_state = nil;
this->cached_fragment_sampler_state_bindings[tex].is_arg_buffer_binding = false;
}
}
/* Bind Texture to current RenderCommandEncoder. */
void MTLRenderPassState::bind_vertex_texture(id<MTLTexture> tex, uint slot)
{
if (this->cached_vertex_texture_bindings[slot].metal_texture != tex) {
id<MTLRenderCommandEncoder> rec = this->cmd->get_active_render_command_encoder();
BLI_assert(rec != nil);
[rec setVertexTexture:tex atIndex:slot];
this->cached_vertex_texture_bindings[slot].metal_texture = tex;
}
}
void MTLRenderPassState::bind_fragment_texture(id<MTLTexture> tex, uint slot)
{
if (this->cached_fragment_texture_bindings[slot].metal_texture != tex) {
id<MTLRenderCommandEncoder> rec = this->cmd->get_active_render_command_encoder();
BLI_assert(rec != nil);
[rec setFragmentTexture:tex atIndex:slot];
this->cached_fragment_texture_bindings[slot].metal_texture = tex;
}
}
void MTLRenderPassState::bind_vertex_sampler(MTLSamplerBinding &sampler_binding,
bool use_argument_buffer_for_samplers,
uint slot)
{
/* TODO(Metal): Implement RenderCommandEncoder vertex sampler binding utility. This will be
* implemented alongside MTLShader. */
}
void MTLRenderPassState::bind_fragment_sampler(MTLSamplerBinding &sampler_binding,
bool use_argument_buffer_for_samplers,
uint slot)
{
/* TODO(Metal): Implement RenderCommandEncoder fragment sampler binding utility. This will be
* implemented alongside MTLShader. */
}
void MTLRenderPassState::bind_vertex_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
{
/* TODO(Metal): Implement RenderCommandEncoder vertex buffer binding utility. This will be
* implemented alongside the full MTLMemoryManager. */
}
void MTLRenderPassState::bind_fragment_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
{
/* TODO(Metal): Implement RenderCommandEncoder fragment buffer binding utility. This will be
* implemented alongside the full MTLMemoryManager. */
}
void MTLRenderPassState::bind_vertex_bytes(void *bytes, uint length, uint index)
{
/* TODO(Metal): Implement RenderCommandEncoder vertex bytes binding utility. This will be
* implemented alongside the full MTLMemoryManager. */
}
void MTLRenderPassState::bind_fragment_bytes(void *bytes, uint length, uint index)
{
/* TODO(Metal): Implement RenderCommandEncoder fragment bytes binding utility. This will be
* implemented alongside the full MTLMemoryManager. */
}
/** \} */
} // blender::gpu

View File

@@ -6,5 +6,6 @@
// -- Renderer Options --
#define MTL_MAX_SET_BYTES_SIZE 4096
#define MTL_FORCE_WAIT_IDLE 0
#define MTL_MAX_COMMAND_BUFFERS 64
#endif

View File

@@ -10,7 +10,9 @@
#include "GPU_common_types.h"
#include "GPU_context.h"
#include "mtl_backend.hh"
#include "mtl_capabilities.hh"
#include "mtl_framebuffer.hh"
#include "mtl_texture.hh"
#include <Cocoa/Cocoa.h>
@@ -23,12 +25,118 @@
namespace blender::gpu {
/* Forward Declarations */
class MTLContext;
class MTLCommandBufferManager;
class MTLShader;
class MTLUniformBuf;
class MTLBuffer;
/* Structs containing information on current binding state for textures and samplers. */
struct MTLTextureBinding {
bool used;
/* Same value as index in bindings array. */
uint texture_slot_index;
gpu::MTLTexture *texture_resource;
};
struct MTLSamplerBinding {
bool used;
MTLSamplerState state;
bool operator==(MTLSamplerBinding const &other) const
{
return (used == other.used && state == other.state);
}
};
/* Metal Context Render Pass State -- Used to track active RenderCommandEncoder state based on
* bound MTLFrameBuffer's.Owned by MTLContext. */
struct MTLRenderPassState {
friend class MTLContext;
/* Given a RenderPassState is associated with a live RenderCommandEncoder,
* this state sits within the MTLCommandBufferManager. */
MTLCommandBufferManager *cmd;
MTLContext *ctx;
/* Caching of resource bindings for active MTLRenderCommandEncoder.
* In Metal, resource bindings are local to the MTLCommandEncoder,
* not globally to the whole pipeline/cmd buffer. */
struct MTLBoundShaderState {
MTLShader *shader_ = nullptr;
uint pso_index_;
void set(MTLShader *shader, uint pso_index)
{
shader_ = shader;
pso_index_ = pso_index;
}
};
MTLBoundShaderState last_bound_shader_state;
id<MTLRenderPipelineState> bound_pso = nil;
id<MTLDepthStencilState> bound_ds_state = nil;
uint last_used_stencil_ref_value = 0;
MTLScissorRect last_scissor_rect;
/* Caching of CommandEncoder Vertex/Fragment buffer bindings. */
struct BufferBindingCached {
/* Whether the given binding slot uses byte data (Push Constant equivalent)
* or an MTLBuffer. */
bool is_bytes;
id<MTLBuffer> metal_buffer;
int offset;
};
BufferBindingCached cached_vertex_buffer_bindings[MTL_MAX_UNIFORM_BUFFER_BINDINGS];
BufferBindingCached cached_fragment_buffer_bindings[MTL_MAX_UNIFORM_BUFFER_BINDINGS];
/* Caching of CommandEncoder textures bindings. */
struct TextureBindingCached {
id<MTLTexture> metal_texture;
};
TextureBindingCached cached_vertex_texture_bindings[MTL_MAX_TEXTURE_SLOTS];
TextureBindingCached cached_fragment_texture_bindings[MTL_MAX_TEXTURE_SLOTS];
/* Cached of CommandEncoder sampler states. */
struct SamplerStateBindingCached {
MTLSamplerState binding_state;
id<MTLSamplerState> sampler_state;
bool is_arg_buffer_binding;
};
SamplerStateBindingCached cached_vertex_sampler_state_bindings[MTL_MAX_TEXTURE_SLOTS];
SamplerStateBindingCached cached_fragment_sampler_state_bindings[MTL_MAX_TEXTURE_SLOTS];
/* Prepare. */
void prepare(MTLCommandBufferManager *cmd, MTLContext *ctx);
/* Reset RenderCommandEncoder binding state. */
void reset_state();
/* Texture Binding (RenderCommandEncoder). */
void bind_vertex_texture(id<MTLTexture> tex, uint slot);
void bind_fragment_texture(id<MTLTexture> tex, uint slot);
/* Sampler Binding (RenderCommandEncoder). */
void bind_vertex_sampler(MTLSamplerBinding &sampler_binding,
bool use_argument_buffer_for_samplers,
uint slot);
void bind_fragment_sampler(MTLSamplerBinding &sampler_binding,
bool use_argument_buffer_for_samplers,
uint slot);
/* Buffer binding (RenderCommandEncoder). */
void bind_vertex_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index);
void bind_fragment_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index);
void bind_vertex_bytes(void *bytes, uint length, uint index);
void bind_fragment_bytes(void *bytes, uint length, uint index);
};
/* Depth Stencil State */
typedef struct MTLContextDepthStencilState {
struct MTLContextDepthStencilState {
/* Depth State. */
bool depth_write_enable;
@@ -44,9 +152,9 @@ typedef struct MTLContextDepthStencilState {
/* Stencil State. */
bool stencil_test_enabled;
unsigned int stencil_read_mask;
unsigned int stencil_write_mask;
unsigned int stencil_ref;
uint stencil_read_mask;
uint stencil_write_mask;
uint stencil_ref;
MTLCompareFunction stencil_func;
MTLStencilOperation stencil_op_front_stencil_fail;
@@ -65,7 +173,7 @@ typedef struct MTLContextDepthStencilState {
/* TODO(Metal): Consider optimizing this function using memcmp.
* Un-used, but differing, stencil state leads to over-generation
* of state objects when doing trivial compare. */
inline bool operator==(const MTLContextDepthStencilState &other) const
bool operator==(const MTLContextDepthStencilState &other) const
{
bool depth_state_equality = (has_depth_target == other.has_depth_target &&
depth_write_enable == other.depth_write_enable &&
@@ -98,7 +206,7 @@ typedef struct MTLContextDepthStencilState {
* - setStencilReferenceValue:
* - setDepthBias:slopeScale:clamp:
*/
inline std::size_t hash() const
std::size_t hash() const
{
std::size_t boolean_bitmask = (this->depth_write_enable ? 1 : 0) |
((this->depth_test_enabled ? 1 : 0) << 1) |
@@ -127,9 +235,9 @@ typedef struct MTLContextDepthStencilState {
std::size_t final_hash = (main_hash << 8) | boolean_bitmask;
return final_hash;
}
} MTLContextDepthStencilState;
};
typedef struct MTLContextTextureUtils {
struct MTLContextTextureUtils {
/* Depth Update Utilities */
/* Depth texture updates are not directly supported with Blit operations, similarly, we cannot
@@ -174,8 +282,7 @@ typedef struct MTLContextTextureUtils {
blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>>
texture_buffer_update_compute_psos;
template<typename T>
inline void free_cached_pso_map(blender::Map<T, id<MTLComputePipelineState>> &map)
template<typename T> void free_cached_pso_map(blender::Map<T, id<MTLComputePipelineState>> &map)
{
for (typename blender::Map<T, id<MTLComputePipelineState>>::MutableItem item : map.items()) {
[item.value release];
@@ -183,12 +290,12 @@ typedef struct MTLContextTextureUtils {
map.clear();
}
inline void init()
void init()
{
fullscreen_blit_shader = nullptr;
}
inline void cleanup()
void cleanup()
{
if (fullscreen_blit_shader) {
GPU_shader_free(fullscreen_blit_shader);
@@ -213,37 +320,16 @@ typedef struct MTLContextTextureUtils {
free_cached_pso_map(texture_cube_array_update_compute_psos);
free_cached_pso_map(texture_buffer_update_compute_psos);
}
} MTLContextTextureUtils;
/* Structs containing information on current binding state for textures and samplers. */
typedef struct MTLTextureBinding {
bool used;
/* Same value as index in bindings array. */
unsigned int texture_slot_index;
gpu::MTLTexture *texture_resource;
} MTLTextureBinding;
typedef struct MTLSamplerBinding {
bool used;
MTLSamplerState state;
bool operator==(MTLSamplerBinding const &other) const
{
return (used == other.used && state == other.state);
}
} MTLSamplerBinding;
};
/* Combined sampler state configuration for Argument Buffer caching. */
struct MTLSamplerArray {
unsigned int num_samplers;
uint num_samplers;
/* MTLSamplerState permutations between 0..256 - slightly more than a byte. */
MTLSamplerState mtl_sampler_flags[MTL_MAX_TEXTURE_SLOTS];
id<MTLSamplerState> mtl_sampler[MTL_MAX_TEXTURE_SLOTS];
inline bool operator==(const MTLSamplerArray &other) const
bool operator==(const MTLSamplerArray &other) const
{
if (this->num_samplers != other.num_samplers) {
return false;
@@ -253,7 +339,7 @@ struct MTLSamplerArray {
sizeof(MTLSamplerState) * this->num_samplers) == 0);
}
inline uint32_t hash() const
uint32_t hash() const
{
uint32_t hash = this->num_samplers;
for (int i = 0; i < this->num_samplers; i++) {
@@ -287,12 +373,12 @@ typedef enum MTLPipelineStateDirtyFlag {
/* Ignore full flag bit-mask `MTL_PIPELINE_STATE_ALL_FLAG`. */
ENUM_OPERATORS(MTLPipelineStateDirtyFlag, MTL_PIPELINE_STATE_CULLMODE_FLAG);
typedef struct MTLUniformBufferBinding {
struct MTLUniformBufferBinding {
bool bound;
MTLUniformBuf *ubo;
} MTLUniformBufferBinding;
};
typedef struct MTLContextGlobalShaderPipelineState {
struct MTLContextGlobalShaderPipelineState {
bool initialised;
/* Whether the pipeline state has been modified since application.
@@ -358,11 +444,10 @@ typedef struct MTLContextGlobalShaderPipelineState {
/* Render parameters. */
float point_size = 1.0f;
float line_width = 1.0f;
} MTLContextGlobalShaderPipelineState;
};
/* Metal Buffer */
typedef struct MTLTemporaryBufferRange {
struct MTLTemporaryBufferRange {
id<MTLBuffer> metal_buffer;
void *host_ptr;
unsigned long long buffer_offset;
@@ -371,7 +456,118 @@ typedef struct MTLTemporaryBufferRange {
void flush();
bool requires_flush();
} MTLTemporaryBufferRange;
};
/* Command Buffer Manager - Owned by MTLContext.
* The MTLCommandBufferManager represents all work associated with
* a command buffer of a given identity. This manager is a fixed-state
* on the context, which coordinates the lifetime of command buffers
* for particular categories of work.
*
* This ensures operations on command buffers, and the state associated,
* is correctly tracked and managed. Workload submission and MTLCommandEncoder
* coordination is managed from here.
*
* There is currently only one MTLCommandBufferManager for managing submission
* of the "main" rendering commands. A secondary upload command buffer track,
* or asynchronous compute command buffer track may be added in the future. */
class MTLCommandBufferManager {
friend class MTLContext;
public:
/* Event to coordinate sequential execution across all "main" command buffers. */
static id<MTLEvent> sync_event;
static unsigned long long event_signal_val;
/* Counter for active command buffers. */
static int num_active_cmd_bufs;
private:
/* Associated Context and properties. */
MTLContext *context_ = nullptr;
bool supports_render_ = false;
/* CommandBuffer tracking. */
id<MTLCommandBuffer> active_command_buffer_ = nil;
id<MTLCommandBuffer> last_submitted_command_buffer_ = nil;
/* Active MTLCommandEncoders. */
enum {
MTL_NO_COMMAND_ENCODER = 0,
MTL_RENDER_COMMAND_ENCODER = 1,
MTL_BLIT_COMMAND_ENCODER = 2,
MTL_COMPUTE_COMMAND_ENCODER = 3
} active_command_encoder_type_ = MTL_NO_COMMAND_ENCODER;
id<MTLRenderCommandEncoder> active_render_command_encoder_ = nil;
id<MTLBlitCommandEncoder> active_blit_command_encoder_ = nil;
id<MTLComputeCommandEncoder> active_compute_command_encoder_ = nil;
/* State associated with active RenderCommandEncoder. */
MTLRenderPassState render_pass_state_;
MTLFrameBuffer *active_frame_buffer_ = nullptr;
MTLRenderPassDescriptor *active_pass_descriptor_ = nullptr;
/* Workload heuristics - We may need to split command buffers to optimise workload and balancing.
*/
int current_draw_call_count_ = 0;
int encoder_count_ = 0;
int vertex_submitted_count_ = 0;
bool empty_ = true;
public:
void prepare(MTLContext *ctx, bool supports_render = true);
/* If wait is true, CPU will stall until GPU work has completed. */
bool submit(bool wait);
/* Fetch/query current encoder. */
bool is_inside_render_pass();
bool is_inside_blit();
bool is_inside_compute();
id<MTLRenderCommandEncoder> get_active_render_command_encoder();
id<MTLBlitCommandEncoder> get_active_blit_command_encoder();
id<MTLComputeCommandEncoder> get_active_compute_command_encoder();
MTLFrameBuffer *get_active_framebuffer();
/* RenderPassState for RenderCommandEncoder. */
MTLRenderPassState &get_render_pass_state()
{
/* Render pass state should only be valid if we are inside a render pass. */
BLI_assert(this->is_inside_render_pass());
return render_pass_state_;
}
/* Rendering Heuristics. */
void register_draw_counters(int vertex_submission);
void reset_counters();
bool do_break_submission();
/* Encoder and Pass management. */
/* End currently active MTLCommandEncoder. */
bool end_active_command_encoder();
id<MTLRenderCommandEncoder> ensure_begin_render_command_encoder(MTLFrameBuffer *ctx_framebuffer,
bool force_begin,
bool *new_pass);
id<MTLBlitCommandEncoder> ensure_begin_blit_encoder();
id<MTLComputeCommandEncoder> ensure_begin_compute_encoder();
/* Workload Synchronisation. */
bool insert_memory_barrier(eGPUBarrier barrier_bits,
eGPUStageBarrierBits before_stages,
eGPUStageBarrierBits after_stages);
/* TODO(Metal): Support fences in command buffer class. */
/* Debug. */
void push_debug_group(const char *name, int index);
void pop_debug_group();
private:
/* Begin new command buffer. */
id<MTLCommandBuffer> ensure_begin();
void register_encoder_counters();
};
/** MTLContext -- Core render loop and state management. **/
/* NOTE(Metal): Partial MTLContext stub to provide wrapper functionality
@@ -397,6 +593,9 @@ class MTLContext : public Context {
MTLSamplerArray samplers_;
blender::Map<MTLSamplerArray, gpu::MTLBuffer *> cached_sampler_buffers_;
/* Frame. */
bool is_inside_frame_ = false;
public:
/* Shaders and Pipeline state. */
MTLContextGlobalShaderPipelineState pipeline_state;
@@ -405,17 +604,22 @@ class MTLContext : public Context {
id<MTLCommandQueue> queue = nil;
id<MTLDevice> device = nil;
/* CommandBuffer managers. */
MTLCommandBufferManager main_command_buffer;
/* GPUContext interface. */
MTLContext(void *ghost_window);
~MTLContext();
static void check_error(const char *info);
void activate(void) override;
void deactivate(void) override;
void activate() override;
void deactivate() override;
void begin_frame() override;
void end_frame() override;
void flush(void) override;
void finish(void) override;
void flush() override;
void finish() override;
void memory_statistics_get(int *total_mem, int *free_mem) override;
@@ -428,27 +632,32 @@ class MTLContext : public Context {
* rendering, binding resources, setting global state, resource management etc;
*/
/* Metal Context Core functions. */
/* Command Buffer Management. */
id<MTLCommandBuffer> get_active_command_buffer();
/** Metal Context Core functions. **/
/* Render Pass State and Management. */
void begin_render_pass();
void end_render_pass();
bool is_render_pass_active();
/* Bind framebuffer to context. */
void framebuffer_bind(MTLFrameBuffer *framebuffer);
/* Texture Binding. */
void texture_bind(gpu::MTLTexture *mtl_texture, unsigned int texture_unit);
void sampler_bind(MTLSamplerState, unsigned int sampler_unit);
/* Restore framebuffer used by active context to default backbuffer. */
void framebuffer_restore();
/* Ensure a render-pass using the Context framebuffer (active_fb_) is in progress. */
id<MTLRenderCommandEncoder> ensure_begin_render_pass();
MTLFrameBuffer *get_current_framebuffer();
MTLFrameBuffer *get_default_framebuffer();
/* Context Global-State Texture Binding. */
void texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit);
void sampler_bind(MTLSamplerState, uint sampler_unit);
void texture_unbind(gpu::MTLTexture *mtl_texture);
void texture_unbind_all(void);
void texture_unbind_all();
id<MTLSamplerState> get_sampler_from_state(MTLSamplerState state);
id<MTLSamplerState> generate_sampler_from_state(MTLSamplerState state);
id<MTLSamplerState> get_default_sampler_state();
/* Metal Context pipeline state. */
void pipeline_state_init(void);
MTLShader *get_active_shader(void);
void pipeline_state_init();
MTLShader *get_active_shader();
/* State assignment. */
void set_viewport(int origin_x, int origin_y, int width, int height);
@@ -458,7 +667,17 @@ class MTLContext : public Context {
/* Texture utilities. */
MTLContextTextureUtils &get_texture_utils()
{
return this->texture_utils_;
return texture_utils_;
}
bool get_active()
{
return is_active_;
}
bool get_inside_frame()
{
return is_inside_frame_;
}
};

View File

@@ -22,7 +22,7 @@ namespace blender::gpu {
bool MTLTemporaryBufferRange::requires_flush()
{
/* We do not need to flush shared memory */
/* We do not need to flush shared memory. */
return this->options & MTLResourceStorageModeManaged;
}
@@ -49,15 +49,86 @@ MTLContext::MTLContext(void *ghost_window)
/* Init debug. */
debug::mtl_debug_init();
/* Initialise command buffer state. */
this->main_command_buffer.prepare(this);
/* Frame management. */
is_inside_frame_ = false;
/* Create FrameBuffer handles. */
MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left");
MTLFrameBuffer *mtl_back_left = new MTLFrameBuffer(this, "back_left");
this->front_left = mtl_front_left;
this->back_left = mtl_back_left;
this->active_fb = this->back_left;
/* Prepare platform and capabilities. (Note: With METAL, this needs to be done after CTX
* initialisation). */
MTLBackend::platform_init(this);
MTLBackend::capabilities_init(this);
/* Initialize Metal modules. */
this->state_manager = new MTLStateManager(this);
/* TODO(Metal): Implement. */
/* Initialise texture read/update structures. */
this->get_texture_utils().init();
/* Bound Samplers struct. */
for (int i = 0; i < MTL_MAX_TEXTURE_SLOTS; i++) {
samplers_.mtl_sampler[i] = nil;
samplers_.mtl_sampler_flags[i] = DEFAULT_SAMPLER_STATE;
}
/* Initialise samplers. */
for (uint i = 0; i < GPU_SAMPLER_MAX; i++) {
MTLSamplerState state;
state.state = static_cast<eGPUSamplerState>(i);
sampler_state_cache_[i] = this->generate_sampler_from_state(state);
}
}
MTLContext::~MTLContext()
{
/* TODO(Metal): Implement. */
BLI_assert(this == reinterpret_cast<MTLContext *>(GPU_context_active_get()));
/* Ensure rendering is complete command encoders/command buffers are freed. */
if (MTLBackend::get()->is_inside_render_boundary()) {
this->finish();
/* End frame. */
if (is_inside_frame_) {
this->end_frame();
}
}
/* Release update/blit shaders. */
this->get_texture_utils().cleanup();
/* Release Sampler States. */
for (int i = 0; i < GPU_SAMPLER_MAX; i++) {
if (sampler_state_cache_[i] != nil) {
[sampler_state_cache_[i] release];
sampler_state_cache_[i] = nil;
}
}
}
void MTLContext::begin_frame()
{
BLI_assert(MTLBackend::get()->is_inside_render_boundary());
if (is_inside_frame_) {
return;
}
/* Begin Command buffer for next frame. */
is_inside_frame_ = true;
}
void MTLContext::end_frame()
{
BLI_assert(is_inside_frame_);
/* Ensure pre-present work is commited. */
this->flush();
/* Increment frame counter. */
is_inside_frame_ = false;
}
void MTLContext::check_error(const char *info)
@@ -90,26 +161,83 @@ void MTLContext::memory_statistics_get(int *total_mem, int *free_mem)
*free_mem = 0;
}
id<MTLCommandBuffer> MTLContext::get_active_command_buffer()
void MTLContext::framebuffer_bind(MTLFrameBuffer *framebuffer)
{
/* TODO(Metal): Implement. */
return nil;
/* We do not yet begin the pass -- We defer beginning the pass until a draw is requested. */
BLI_assert(framebuffer);
this->active_fb = framebuffer;
}
/* Render Pass State and Management */
void MTLContext::begin_render_pass()
void MTLContext::framebuffer_restore()
{
/* TODO(Metal): Implement. */
}
void MTLContext::end_render_pass()
{
/* TODO(Metal): Implement. */
/* Bind default framebuffer from context --
* We defer beginning the pass until a draw is requested. */
this->active_fb = this->back_left;
}
bool MTLContext::is_render_pass_active()
id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass()
{
/* TODO(Metal): Implement. */
return false;
BLI_assert(this);
/* Ensure the rendering frame has started. */
if (!is_inside_frame_) {
this->begin_frame();
}
/* Check whether a framebuffer is bound. */
if (!this->active_fb) {
BLI_assert(false && "No framebuffer is bound!");
return this->main_command_buffer.get_active_render_command_encoder();
}
/* Ensure command buffer workload submissions are optimal --
* Though do not split a batch mid-IMM recording */
/* TODO(Metal): Add IMM Check once MTLImmediate has been implemented. */
if (this->main_command_buffer.do_break_submission()/*&&
!((MTLImmediate *)(this->imm))->imm_is_recording()*/) {
this->flush();
}
/* Begin pass or perform a pass switch if the active framebuffer has been changed, or if the
* framebuffer state has been modified (is_dirty). */
if (!this->main_command_buffer.is_inside_render_pass() ||
this->active_fb != this->main_command_buffer.get_active_framebuffer() ||
this->main_command_buffer.get_active_framebuffer()->get_dirty()) {
/* Validate bound framebuffer before beginning render pass. */
if (!static_cast<MTLFrameBuffer *>(this->active_fb)->validate_render_pass()) {
MTL_LOG_WARNING("Framebuffer validation failed, falling back to default framebuffer\n");
this->framebuffer_restore();
if (!static_cast<MTLFrameBuffer *>(this->active_fb)->validate_render_pass()) {
MTL_LOG_ERROR("CRITICAL: DEFAULT FRAMEBUFFER FAIL VALIDATION!!\n");
}
}
/* Begin RenderCommandEncoder on main CommandBuffer. */
bool new_render_pass = false;
id<MTLRenderCommandEncoder> new_enc =
this->main_command_buffer.ensure_begin_render_command_encoder(
static_cast<MTLFrameBuffer *>(this->active_fb), true, &new_render_pass);
if (new_render_pass) {
/* Flag context pipeline state as dirty - dynamic pipeline state need re-applying. */
this->pipeline_state.dirty_flags = MTL_PIPELINE_STATE_ALL_FLAG;
}
return new_enc;
}
BLI_assert(!this->main_command_buffer.get_active_framebuffer()->get_dirty());
return this->main_command_buffer.get_active_render_command_encoder();
}
MTLFrameBuffer *MTLContext::get_current_framebuffer()
{
MTLFrameBuffer *last_bound = static_cast<MTLFrameBuffer *>(this->active_fb);
return last_bound ? last_bound : this->get_default_framebuffer();
}
MTLFrameBuffer *MTLContext::get_default_framebuffer()
{
return static_cast<MTLFrameBuffer *>(this->back_left);
}
/** \} */
@@ -200,13 +328,68 @@ void MTLContext::pipeline_state_init()
MTLStencilOperationKeep;
}
void MTLContext::set_viewport(int origin_x, int origin_y, int width, int height)
{
BLI_assert(this);
BLI_assert(width > 0);
BLI_assert(height > 0);
BLI_assert(origin_x >= 0);
BLI_assert(origin_y >= 0);
bool changed = (this->pipeline_state.viewport_offset_x != origin_x) ||
(this->pipeline_state.viewport_offset_y != origin_y) ||
(this->pipeline_state.viewport_width != width) ||
(this->pipeline_state.viewport_height != height);
this->pipeline_state.viewport_offset_x = origin_x;
this->pipeline_state.viewport_offset_y = origin_y;
this->pipeline_state.viewport_width = width;
this->pipeline_state.viewport_height = height;
if (changed) {
this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags |
MTL_PIPELINE_STATE_VIEWPORT_FLAG);
}
}
void MTLContext::set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height)
{
BLI_assert(this);
bool changed = (this->pipeline_state.scissor_x != scissor_x) ||
(this->pipeline_state.scissor_y != scissor_y) ||
(this->pipeline_state.scissor_width != scissor_width) ||
(this->pipeline_state.scissor_height != scissor_height) ||
(this->pipeline_state.scissor_enabled != true);
this->pipeline_state.scissor_x = scissor_x;
this->pipeline_state.scissor_y = scissor_y;
this->pipeline_state.scissor_width = scissor_width;
this->pipeline_state.scissor_height = scissor_height;
this->pipeline_state.scissor_enabled = (scissor_width > 0 && scissor_height > 0);
if (changed) {
this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags |
MTL_PIPELINE_STATE_SCISSOR_FLAG);
}
}
void MTLContext::set_scissor_enabled(bool scissor_enabled)
{
/* Only turn on Scissor if requested scissor region is valid */
scissor_enabled = scissor_enabled && (this->pipeline_state.scissor_width > 0 &&
this->pipeline_state.scissor_height > 0);
bool changed = (this->pipeline_state.scissor_enabled != scissor_enabled);
this->pipeline_state.scissor_enabled = scissor_enabled;
if (changed) {
this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags |
MTL_PIPELINE_STATE_SCISSOR_FLAG);
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Texture State Management
* \{ */
void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, unsigned int texture_unit)
void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit)
{
BLI_assert(this);
BLI_assert(mtl_texture);
@@ -226,7 +409,7 @@ void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, unsigned int texture
mtl_texture->is_bound_ = true;
}
void MTLContext::sampler_bind(MTLSamplerState sampler_state, unsigned int sampler_unit)
void MTLContext::sampler_bind(MTLSamplerState sampler_state, uint sampler_unit)
{
BLI_assert(this);
if (sampler_unit < 0 || sampler_unit >= GPU_max_textures() ||
@@ -271,14 +454,14 @@ void MTLContext::texture_unbind_all()
id<MTLSamplerState> MTLContext::get_sampler_from_state(MTLSamplerState sampler_state)
{
BLI_assert((unsigned int)sampler_state >= 0 && ((unsigned int)sampler_state) < GPU_SAMPLER_MAX);
return this->sampler_state_cache_[(unsigned int)sampler_state];
BLI_assert((uint)sampler_state >= 0 && ((uint)sampler_state) < GPU_SAMPLER_MAX);
return sampler_state_cache_[(uint)sampler_state];
}
id<MTLSamplerState> MTLContext::generate_sampler_from_state(MTLSamplerState sampler_state)
{
/* Check if sampler already exists for given state. */
id<MTLSamplerState> st = this->sampler_state_cache_[(unsigned int)sampler_state];
id<MTLSamplerState> st = sampler_state_cache_[(uint)sampler_state];
if (st != nil) {
return st;
}
@@ -318,7 +501,7 @@ id<MTLSamplerState> MTLContext::generate_sampler_from_state(MTLSamplerState samp
descriptor.supportArgumentBuffers = true;
id<MTLSamplerState> state = [this->device newSamplerStateWithDescriptor:descriptor];
this->sampler_state_cache_[(unsigned int)sampler_state] = state;
sampler_state_cache_[(uint)sampler_state] = state;
BLI_assert(state != nil);
[descriptor autorelease];
@@ -328,10 +511,10 @@ id<MTLSamplerState> MTLContext::generate_sampler_from_state(MTLSamplerState samp
id<MTLSamplerState> MTLContext::get_default_sampler_state()
{
if (this->default_sampler_state_ == nil) {
this->default_sampler_state_ = this->get_sampler_from_state(DEFAULT_SAMPLER_STATE);
if (default_sampler_state_ == nil) {
default_sampler_state_ = this->get_sampler_from_state(DEFAULT_SAMPLER_STATE);
}
return this->default_sampler_state_;
return default_sampler_state_;
}
/** \} */

View File

@@ -46,20 +46,14 @@ namespace blender::gpu {
void MTLContext::debug_group_begin(const char *name, int index)
{
if (G.debug & G_DEBUG_GPU) {
id<MTLCommandBuffer> cmd = this->get_active_command_buffer();
if (cmd != nil) {
[cmd pushDebugGroup:[NSString stringWithFormat:@"%s_%d", name, index]];
}
this->main_command_buffer.push_debug_group(name, index);
}
}
void MTLContext::debug_group_end()
{
if (G.debug & G_DEBUG_GPU) {
id<MTLCommandBuffer> cmd = this->get_active_command_buffer();
if (cmd != nil) {
[cmd popDebugGroup];
}
this->main_command_buffer.pop_debug_group();
}
}

View File

@@ -0,0 +1,231 @@
/** \file
* \ingroup gpu
*
* Encapsulation of Framebuffer states (attached textures, viewport, scissors).
*/
#pragma once
#include "GPU_common_types.h"
#include "MEM_guardedalloc.h"
#include "gpu_framebuffer_private.hh"
#include "mtl_texture.hh"
#include <Metal/Metal.h>
namespace blender::gpu {
class MTLContext;
struct MTLAttachment {
bool used;
gpu::MTLTexture *texture;
union {
float color[4];
float depth;
uint stencil;
} clear_value;
eGPULoadOp load_action;
eGPUStoreOp store_action;
uint mip;
uint slice;
uint depth_plane;
/* If Array Length is larger than zero, use multilayered rendering. */
uint render_target_array_length;
};
/**
* Implementation of FrameBuffer object using Metal.
**/
class MTLFrameBuffer : public FrameBuffer {
private:
/* Context Handle. */
MTLContext *context_;
/* Metal Attachment properties. */
uint colour_attachment_count_;
MTLAttachment mtl_color_attachments_[GPU_FB_MAX_COLOR_ATTACHMENT];
MTLAttachment mtl_depth_attachment_;
MTLAttachment mtl_stencil_attachment_;
bool use_multilayered_rendering_ = false;
/* State. */
/* Whether global framebuffer properties have changed and require
* re-generation of MTLRenderPassDescriptor/RenderCommandEncoders. */
bool is_dirty_;
/* Whether loadstore properties have changed (only affects certain cached configs). */
bool is_loadstore_dirty_;
/* Context that the latest modified state was last applied to.
* If this does not match current ctx, re-apply state. */
MTLContext *dirty_state_ctx_;
/* Whether a clear is pending -- Used to toggle between clear and load FB configurations
* (without dirtying the state) - Framebuffer load config is used if no GPU_clear_* command
* was issued after binding the FrameBuffer. */
bool has_pending_clear_;
/* Render Pass Descriptors:
* There are 3 MTLRenderPassDescriptors for different ways in which a framebuffer
* can be configured:
* [0] = CLEAR CONFIG -- Used when a GPU_framebuffer_clear_* command has been issued.
* [1] = LOAD CONFIG -- Used if bound, but no clear is required.
* [2] = CUSTOM CONFIG -- When using GPU_framebuffer_bind_ex to manually specify
* load-store configuration for optimal bandwidth utilisation.
* -- We cache these different configs to avoid re-generation --
*/
typedef enum {
MTL_FB_CONFIG_CLEAR = 0,
MTL_FB_CONFIG_LOAD = 1,
MTL_FB_CONFIG_CUSTOM = 2
} MTL_FB_CONFIG;
#define MTL_FB_CONFIG_MAX (MTL_FB_CONFIG_CUSTOM + 1)
MTLRenderPassDescriptor *framebuffer_descriptor_[MTL_FB_CONFIG_MAX];
MTLRenderPassColorAttachmentDescriptor
*colour_attachment_descriptors_[GPU_FB_MAX_COLOR_ATTACHMENT];
/* Whether MTLRenderPassDescriptor[N] requires updating with latest state. */
bool descriptor_dirty_[MTL_FB_CONFIG_MAX];
/* Whether SRGB is enabled for this framebuffer configuration. */
bool srgb_enabled_;
/* Whether the primary Framebuffer attachment is an SRGB target or not. */
bool is_srgb_;
public:
/**
* Create a conventional framebuffer to attach texture to.
**/
MTLFrameBuffer(MTLContext *ctx, const char *name);
~MTLFrameBuffer();
void bind(bool enabled_srgb) override;
bool check(char err_out[256]) override;
void clear(eGPUFrameBufferBits buffers,
const float clear_col[4],
float clear_depth,
uint clear_stencil) override;
void clear_multi(const float (*clear_cols)[4]) override;
void clear_attachment(GPUAttachmentType type,
eGPUDataFormat data_format,
const void *clear_value) override;
void attachment_set_loadstore_op(GPUAttachmentType type,
eGPULoadOp load_action,
eGPUStoreOp store_action) override;
void read(eGPUFrameBufferBits planes,
eGPUDataFormat format,
const int area[4],
int channel_len,
int slot,
void *r_data) override;
void blit_to(eGPUFrameBufferBits planes,
int src_slot,
FrameBuffer *dst,
int dst_slot,
int dst_offset_x,
int dst_offset_y) override;
void apply_state();
/* State. */
/* Flag MTLFramebuffer configuration as having changed. */
void mark_dirty();
void mark_loadstore_dirty();
/* Mark that a pending clear has been performed. */
void mark_cleared();
/* Mark that we have a pending clear. */
void mark_do_clear();
/* Attachment management. */
/* When dirty_attachments_ is true, we need to reprocess attachments to extract Metal
* information. */
void update_attachments(bool update_viewport);
bool add_color_attachment(gpu::MTLTexture *texture, uint slot, int miplevel, int layer);
bool add_depth_attachment(gpu::MTLTexture *texture, int miplevel, int layer);
bool add_stencil_attachment(gpu::MTLTexture *texture, int miplevel, int layer);
bool remove_color_attachment(uint slot);
bool remove_depth_attachment();
bool remove_stencil_attachment();
void remove_all_attachments();
void ensure_render_target_size();
/* Clear values -> Load/store actions. */
bool set_color_attachment_clear_color(uint slot, const float clear_color[4]);
bool set_depth_attachment_clear_value(float depth_clear);
bool set_stencil_attachment_clear_value(uint stencil_clear);
bool set_color_loadstore_op(uint slot, eGPULoadOp load_action, eGPUStoreOp store_action);
bool set_depth_loadstore_op(eGPULoadOp load_action, eGPUStoreOp store_action);
bool set_stencil_loadstore_op(eGPULoadOp load_action, eGPUStoreOp store_action);
/* Remove any pending clears - Ensure "load" configuration is used. */
bool reset_clear_state();
/* Fetch values */
bool has_attachment_at_slot(uint slot);
bool has_color_attachment_with_texture(gpu::MTLTexture *texture);
bool has_depth_attachment();
bool has_stencil_attachment();
int get_color_attachment_slot_from_texture(gpu::MTLTexture *texture);
uint get_attachment_count();
uint get_attachment_limit()
{
return GPU_FB_MAX_COLOR_ATTACHMENT;
};
MTLAttachment get_color_attachment(uint slot);
MTLAttachment get_depth_attachment();
MTLAttachment get_stencil_attachment();
/* Metal API resources and validation. */
bool validate_render_pass();
MTLRenderPassDescriptor *bake_render_pass_descriptor(bool load_contents);
/* Blitting. */
void blit(uint read_slot,
uint src_x_offset,
uint src_y_offset,
MTLFrameBuffer *metal_fb_write,
uint write_slot,
uint dst_x_offset,
uint dst_y_offset,
uint width,
uint height,
eGPUFrameBufferBits blit_buffers);
int get_width();
int get_height();
bool get_dirty()
{
return is_dirty_ || is_loadstore_dirty_;
}
bool get_pending_clear()
{
return has_pending_clear_;
}
bool get_srgb_enabled()
{
return srgb_enabled_;
}
bool get_is_srgb()
{
return is_srgb_;
}
private:
/* Clears a render target by force-opening a render pass. */
void force_clear();
MEM_CXX_CLASS_ALLOC_FUNCS("MTLFrameBuffer");
};
} // namespace blender::gpu

File diff suppressed because it is too large Load Diff

View File

@@ -62,10 +62,10 @@ class MTLStateManager : public StateManager {
void set_mutable_state(const GPUStateMutable &state);
/* METAL State utility functions. */
void mtl_state_init(void);
void mtl_state_init();
void mtl_depth_range(float near, float far);
void mtl_stencil_mask(unsigned int mask);
void mtl_stencil_set_func(eGPUStencilTest stencil_func, int ref, unsigned int mask);
void mtl_stencil_mask(uint mask);
void mtl_stencil_set_func(eGPUStencilTest stencil_func, int ref, uint mask);
MEM_CXX_CLASS_ALLOC_FUNCS("MTLStateManager")
};

View File

@@ -8,6 +8,7 @@
#include "GPU_framebuffer.h"
#include "mtl_context.hh"
#include "mtl_framebuffer.hh"
#include "mtl_state.hh"
namespace blender::gpu {
@@ -18,14 +19,14 @@ namespace blender::gpu {
void MTLStateManager::mtl_state_init(void)
{
BLI_assert(this->context_);
this->context_->pipeline_state_init();
BLI_assert(context_);
context_->pipeline_state_init();
}
MTLStateManager::MTLStateManager(MTLContext *ctx) : StateManager()
{
/* Initialize State. */
this->context_ = ctx;
context_ = ctx;
mtl_state_init();
/* Force update using default state. */
@@ -39,8 +40,9 @@ void MTLStateManager::apply_state(void)
{
this->set_state(this->state);
this->set_mutable_state(this->mutable_state);
/* TODO(Metal): Enable after integration of MTLFrameBuffer. */
/* static_cast<MTLFrameBuffer *>(this->context_->active_fb)->apply_state(); */
/* Apply active FrameBuffer state. */
static_cast<MTLFrameBuffer *>(context_->active_fb)->apply_state();
};
void MTLStateManager::force_state(void)
@@ -103,10 +105,10 @@ void MTLStateManager::set_state(const GPUState &state)
void MTLStateManager::mtl_depth_range(float near, float far)
{
BLI_assert(this->context_);
BLI_assert(context_);
BLI_assert(near >= 0.0 && near < 1.0);
BLI_assert(far > 0.0 && far <= 1.0);
MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state;
MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state;
MTLContextDepthStencilState &ds_state = pipeline_state.depth_stencil_state;
ds_state.depth_range_near = near;
@@ -117,7 +119,7 @@ void MTLStateManager::mtl_depth_range(float near, float far)
void MTLStateManager::set_mutable_state(const GPUStateMutable &state)
{
GPUStateMutable changed = state ^ current_mutable_;
MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state;
MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state;
if (float_as_uint(changed.point_size) != 0) {
pipeline_state.point_size = state.point_size;
@@ -150,8 +152,8 @@ void MTLStateManager::set_mutable_state(const GPUStateMutable &state)
void MTLStateManager::set_write_mask(const eGPUWriteMask value)
{
BLI_assert(this->context_);
MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state;
BLI_assert(context_);
MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state;
pipeline_state.depth_stencil_state.depth_write_enable = ((value & GPU_WRITE_DEPTH) != 0);
pipeline_state.color_write_mask =
(((value & GPU_WRITE_RED) != 0) ? MTLColorWriteMaskRed : MTLColorWriteMaskNone) |
@@ -205,8 +207,8 @@ static MTLCompareFunction gpu_stencil_func_to_metal(eGPUStencilTest stencil_func
void MTLStateManager::set_depth_test(const eGPUDepthTest value)
{
BLI_assert(this->context_);
MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state;
BLI_assert(context_);
MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state;
MTLContextDepthStencilState &ds_state = pipeline_state.depth_stencil_state;
ds_state.depth_test_enabled = (value != GPU_DEPTH_NONE);
@@ -214,20 +216,18 @@ void MTLStateManager::set_depth_test(const eGPUDepthTest value)
pipeline_state.dirty_flags |= MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG;
}
void MTLStateManager::mtl_stencil_mask(unsigned int mask)
void MTLStateManager::mtl_stencil_mask(uint mask)
{
BLI_assert(this->context_);
MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state;
BLI_assert(context_);
MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state;
pipeline_state.depth_stencil_state.stencil_write_mask = mask;
pipeline_state.dirty_flags |= MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG;
}
void MTLStateManager::mtl_stencil_set_func(eGPUStencilTest stencil_func,
int ref,
unsigned int mask)
void MTLStateManager::mtl_stencil_set_func(eGPUStencilTest stencil_func, int ref, uint mask)
{
BLI_assert(this->context_);
MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state;
BLI_assert(context_);
MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state;
MTLContextDepthStencilState &ds_state = pipeline_state.depth_stencil_state;
ds_state.stencil_func = gpu_stencil_func_to_metal(stencil_func);
@@ -275,19 +275,17 @@ void MTLStateManager::set_stencil_test(const eGPUStencilTest test, const eGPUSte
{
switch (operation) {
case GPU_STENCIL_OP_REPLACE:
mtl_stencil_set_op(this->context_,
MTLStencilOperationKeep,
MTLStencilOperationKeep,
MTLStencilOperationReplace);
mtl_stencil_set_op(
context_, MTLStencilOperationKeep, MTLStencilOperationKeep, MTLStencilOperationReplace);
break;
case GPU_STENCIL_OP_COUNT_DEPTH_PASS:
/* Winding inversed due to flipped Y coordinate system in Metal. */
mtl_stencil_set_op_separate(this->context_,
mtl_stencil_set_op_separate(context_,
GPU_CULL_FRONT,
MTLStencilOperationKeep,
MTLStencilOperationKeep,
MTLStencilOperationIncrementWrap);
mtl_stencil_set_op_separate(this->context_,
mtl_stencil_set_op_separate(context_,
GPU_CULL_BACK,
MTLStencilOperationKeep,
MTLStencilOperationKeep,
@@ -295,12 +293,12 @@ void MTLStateManager::set_stencil_test(const eGPUStencilTest test, const eGPUSte
break;
case GPU_STENCIL_OP_COUNT_DEPTH_FAIL:
/* Winding inversed due to flipped Y coordinate system in Metal. */
mtl_stencil_set_op_separate(this->context_,
mtl_stencil_set_op_separate(context_,
GPU_CULL_FRONT,
MTLStencilOperationKeep,
MTLStencilOperationDecrementWrap,
MTLStencilOperationKeep);
mtl_stencil_set_op_separate(this->context_,
mtl_stencil_set_op_separate(context_,
GPU_CULL_BACK,
MTLStencilOperationKeep,
MTLStencilOperationIncrementWrap,
@@ -308,14 +306,12 @@ void MTLStateManager::set_stencil_test(const eGPUStencilTest test, const eGPUSte
break;
case GPU_STENCIL_OP_NONE:
default:
mtl_stencil_set_op(this->context_,
MTLStencilOperationKeep,
MTLStencilOperationKeep,
MTLStencilOperationKeep);
mtl_stencil_set_op(
context_, MTLStencilOperationKeep, MTLStencilOperationKeep, MTLStencilOperationKeep);
}
BLI_assert(this->context_);
MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state;
BLI_assert(context_);
MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state;
pipeline_state.depth_stencil_state.stencil_test_enabled = (test != GPU_STENCIL_NONE);
pipeline_state.dirty_flags |= MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG;
}
@@ -347,8 +343,8 @@ void MTLStateManager::set_logic_op(const bool enable)
void MTLStateManager::set_facing(const bool invert)
{
/* Check Current Context. */
BLI_assert(this->context_);
MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state;
BLI_assert(context_);
MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state;
/* Apply State -- opposite of GL, as METAL default is GPU_CLOCKWISE, GL default is
* COUNTERCLOCKWISE. This needs to be the inverse of the default. */
@@ -362,8 +358,8 @@ void MTLStateManager::set_facing(const bool invert)
void MTLStateManager::set_backface_culling(const eGPUFaceCullTest test)
{
/* Check Current Context. */
BLI_assert(this->context_);
MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state;
BLI_assert(context_);
MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state;
/* Apply State. */
pipeline_state.culling_enabled = (test != GPU_CULL_NONE);
@@ -386,8 +382,8 @@ void MTLStateManager::set_provoking_vert(const eGPUProvokingVertex vert)
void MTLStateManager::set_shadow_bias(const bool enable)
{
/* Check Current Context. */
BLI_assert(this->context_);
MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state;
BLI_assert(context_);
MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state;
MTLContextDepthStencilState &ds_state = pipeline_state.depth_stencil_state;
/* Apply State. */
@@ -500,8 +496,8 @@ void MTLStateManager::set_blend(const eGPUBlend value)
}
/* Check Current Context. */
BLI_assert(this->context_);
MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state;
BLI_assert(context_);
MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state;
if (value == GPU_BLEND_SUBTRACT) {
pipeline_state.rgb_blend_op = MTLBlendOperationReverseSubtract;
@@ -549,58 +545,18 @@ void MTLStateManager::issue_barrier(eGPUBarrier barrier_bits)
MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
BLI_assert(ctx);
if (ctx->is_render_pass_active()) {
/* Apple Silicon does not support memory barriers.
* We do not currently need these due to implicit API guarantees.
* NOTE(Metal): MTLFence/MTLEvent may be required to synchronize work if
* untracked resources are ever used. */
if ([ctx->device hasUnifiedMemory]) {
return;
}
/* Issue barrier. */
/* TODO(Metal): To be completed pending implementation of RenderCommandEncoder management. */
id<MTLRenderCommandEncoder> rec = nil; // ctx->get_active_render_command_encoder();
BLI_assert(rec);
/* Only supporting Metal on 10.15 onward anyway - Check required for warnings. */
if (@available(macOS 10.14, *)) {
MTLBarrierScope scope = 0;
if (barrier_bits & GPU_BARRIER_SHADER_IMAGE_ACCESS ||
barrier_bits & GPU_BARRIER_TEXTURE_FETCH) {
scope = scope | MTLBarrierScopeTextures | MTLBarrierScopeRenderTargets;
}
if (barrier_bits & GPU_BARRIER_SHADER_STORAGE ||
barrier_bits & GPU_BARRIER_VERTEX_ATTRIB_ARRAY ||
barrier_bits & GPU_BARRIER_ELEMENT_ARRAY) {
scope = scope | MTLBarrierScopeBuffers;
}
MTLRenderStages before_stage_flags = 0;
MTLRenderStages after_stage_flags = 0;
if (before_stages & GPU_BARRIER_STAGE_VERTEX &&
!(before_stages & GPU_BARRIER_STAGE_FRAGMENT)) {
before_stage_flags = before_stage_flags | MTLRenderStageVertex;
}
if (before_stages & GPU_BARRIER_STAGE_FRAGMENT) {
before_stage_flags = before_stage_flags | MTLRenderStageFragment;
}
if (after_stages & GPU_BARRIER_STAGE_VERTEX) {
after_stage_flags = after_stage_flags | MTLRenderStageVertex;
}
if (after_stages & GPU_BARRIER_STAGE_FRAGMENT) {
after_stage_flags = MTLRenderStageFragment;
}
if (scope != 0) {
[rec memoryBarrierWithScope:scope
afterStages:after_stage_flags
beforeStages:before_stage_flags];
}
}
/* Apple Silicon does not support memory barriers.
* We do not currently need these due to implicit API guarantees.
* Note(Metal): MTLFence/MTLEvent may be required to synchronize work if
* untracked resources are ever used. */
if ([ctx->device hasUnifiedMemory]) {
return;
}
ctx->main_command_buffer.insert_memory_barrier(barrier_bits, before_stages, after_stages);
}
/** \} */
/* -------------------------------------------------------------------- */

View File

@@ -40,7 +40,7 @@ struct TextureUpdateRoutineSpecialisation {
/* Number of channels the destination texture has (min=1, max=4). */
int component_count_output;
inline bool operator==(const TextureUpdateRoutineSpecialisation &other) const
bool operator==(const TextureUpdateRoutineSpecialisation &other) const
{
return ((input_data_type == other.input_data_type) &&
(output_data_type == other.output_data_type) &&
@@ -48,7 +48,7 @@ struct TextureUpdateRoutineSpecialisation {
(component_count_output == other.component_count_output));
}
inline uint64_t hash() const
uint64_t hash() const
{
blender::DefaultHash<std::string> string_hasher;
return (uint64_t)string_hasher(
@@ -71,12 +71,12 @@ typedef enum {
struct DepthTextureUpdateRoutineSpecialisation {
DepthTextureUpdateMode data_mode;
inline bool operator==(const DepthTextureUpdateRoutineSpecialisation &other) const
bool operator==(const DepthTextureUpdateRoutineSpecialisation &other) const
{
return ((data_mode == other.data_mode));
}
inline uint64_t hash() const
uint64_t hash() const
{
return (uint64_t)(this->data_mode);
}
@@ -93,10 +93,10 @@ struct TextureReadRoutineSpecialisation {
* 0 = Not a Depth format,
* 1 = FLOAT DEPTH,
* 2 = 24Bit Integer Depth,
* 4 = 32bit unsigned Integer Depth. */
* 4 = 32bit uinteger Depth. */
int depth_format_mode;
inline bool operator==(const TextureReadRoutineSpecialisation &other) const
bool operator==(const TextureReadRoutineSpecialisation &other) const
{
return ((input_data_type == other.input_data_type) &&
(output_data_type == other.output_data_type) &&
@@ -105,7 +105,7 @@ struct TextureReadRoutineSpecialisation {
(depth_format_mode == other.depth_format_mode));
}
inline uint64_t hash() const
uint64_t hash() const
{
blender::DefaultHash<std::string> string_hasher;
return (uint64_t)string_hasher(this->input_data_type + this->output_data_type +
@@ -125,28 +125,27 @@ static const int MTL_MAX_MIPMAP_COUNT = 15; /* Max: 16384x16384 */
static const int MTL_MAX_FBO_ATTACHED = 16;
/* Samplers */
typedef struct MTLSamplerState {
struct MTLSamplerState {
eGPUSamplerState state;
/* Mip min and mip max on sampler state always the same.
* Level range now controlled with textureView to be consistent with GL baseLevel. */
inline bool operator==(const MTLSamplerState &other) const
bool operator==(const MTLSamplerState &other) const
{
/* Add other parameters as needed. */
return (this->state == other.state);
}
operator unsigned int() const
operator uint() const
{
return (unsigned int)state;
return (uint)state;
}
operator uint64_t() const
{
return (uint64_t)state;
}
} MTLSamplerState;
};
const MTLSamplerState DEFAULT_SAMPLER_STATE = {GPU_SAMPLER_DEFAULT /*, 0, 9999*/};
@@ -174,12 +173,12 @@ class MTLTexture : public Texture {
/* Texture Storage. */
id<MTLBuffer> texture_buffer_;
unsigned int aligned_w_ = 0;
uint aligned_w_ = 0;
/* Blit Frame-buffer. */
GPUFrameBuffer *blit_fb_ = nullptr;
unsigned int blit_fb_slice_ = 0;
unsigned int blit_fb_mip_ = 0;
uint blit_fb_slice_ = 0;
uint blit_fb_mip_ = 0;
/* Texture view properties */
/* In Metal, we use texture views to either limit mipmap ranges,
@@ -252,7 +251,7 @@ class MTLTexture : public Texture {
uint gl_bindcode_get(void) const override;
bool texture_is_baked();
inline const char *get_name()
const char *get_name()
{
return name_;
}
@@ -280,7 +279,7 @@ class MTLTexture : public Texture {
void ensure_mipmaps(int miplvl);
/* Flags a given mip level as being used. */
void add_subresource(unsigned int level);
void add_subresource(uint level);
void read_internal(int mip,
int x_off,
@@ -299,31 +298,31 @@ class MTLTexture : public Texture {
id<MTLTexture> get_metal_handle_base();
MTLSamplerState get_sampler_state();
void blit(id<MTLBlitCommandEncoder> blit_encoder,
unsigned int src_x_offset,
unsigned int src_y_offset,
unsigned int src_z_offset,
unsigned int src_slice,
unsigned int src_mip,
uint src_x_offset,
uint src_y_offset,
uint src_z_offset,
uint src_slice,
uint src_mip,
gpu::MTLTexture *dest,
unsigned int dst_x_offset,
unsigned int dst_y_offset,
unsigned int dst_z_offset,
unsigned int dst_slice,
unsigned int dst_mip,
unsigned int width,
unsigned int height,
unsigned int depth);
uint dst_x_offset,
uint dst_y_offset,
uint dst_z_offset,
uint dst_slice,
uint dst_mip,
uint width,
uint height,
uint depth);
void blit(gpu::MTLTexture *dest,
unsigned int src_x_offset,
unsigned int src_y_offset,
unsigned int dst_x_offset,
unsigned int dst_y_offset,
unsigned int src_mip,
unsigned int dst_mip,
unsigned int dst_slice,
uint src_x_offset,
uint src_y_offset,
uint dst_x_offset,
uint dst_y_offset,
uint src_mip,
uint dst_mip,
uint dst_slice,
int width,
int height);
GPUFrameBuffer *get_blit_framebuffer(unsigned int dst_slice, unsigned int dst_mip);
GPUFrameBuffer *get_blit_framebuffer(uint dst_slice, uint dst_mip);
MEM_CXX_CLASS_ALLOC_FUNCS("gpu::MTLTexture")

File diff suppressed because it is too large Load Diff

View File

@@ -493,13 +493,13 @@ void gpu::MTLTexture::update_sub_depth_2d(
int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data)
{
/* Verify we are in a valid configuration. */
BLI_assert(ELEM(this->format_,
BLI_assert(ELEM(format_,
GPU_DEPTH_COMPONENT24,
GPU_DEPTH_COMPONENT32F,
GPU_DEPTH_COMPONENT16,
GPU_DEPTH24_STENCIL8,
GPU_DEPTH32F_STENCIL8));
BLI_assert(validate_data_format_mtl(this->format_, type));
BLI_assert(validate_data_format_mtl(format_, type));
BLI_assert(ELEM(type, GPU_DATA_FLOAT, GPU_DATA_UINT_24_8, GPU_DATA_UINT));
/* Determine whether we are in GPU_DATA_UINT_24_8 or GPU_DATA_FLOAT mode. */
@@ -528,7 +528,7 @@ void gpu::MTLTexture::update_sub_depth_2d(
/* Push contents into an r32_tex and render contents to depth using a shader. */
GPUTexture *r32_tex_tmp = GPU_texture_create_2d(
"depth_intermediate_copy_tex", this->w_, this->h_, 1, format, nullptr);
"depth_intermediate_copy_tex", w_, h_, 1, format, nullptr);
GPU_texture_filter_mode(r32_tex_tmp, false);
GPU_texture_wrap_mode(r32_tex_tmp, false, true);
gpu::MTLTexture *mtl_tex = static_cast<gpu::MTLTexture *>(unwrap(r32_tex_tmp));
@@ -538,7 +538,7 @@ void gpu::MTLTexture::update_sub_depth_2d(
GPUFrameBuffer *depth_fb_temp = GPU_framebuffer_create("depth_intermediate_copy_fb");
GPU_framebuffer_texture_attach(depth_fb_temp, wrap(static_cast<Texture *>(this)), 0, mip);
GPU_framebuffer_bind(depth_fb_temp);
if (extent[0] == this->w_ && extent[1] == this->h_) {
if (extent[0] == w_ && extent[1] == h_) {
/* Skip load if the whole texture is being updated. */
GPU_framebuffer_clear_depth(depth_fb_temp, 0.0);
GPU_framebuffer_clear_stencil(depth_fb_temp, 0);
@@ -553,7 +553,7 @@ void gpu::MTLTexture::update_sub_depth_2d(
GPU_batch_uniform_1i(quad, "mip", mip);
GPU_batch_uniform_2f(quad, "extent", (float)extent[0], (float)extent[1]);
GPU_batch_uniform_2f(quad, "offset", (float)offset[0], (float)offset[1]);
GPU_batch_uniform_2f(quad, "size", (float)this->w_, (float)this->h_);
GPU_batch_uniform_2f(quad, "size", (float)w_, (float)h_);
bool depth_write_prev = GPU_depth_mask_get();
uint stencil_mask_prev = GPU_stencil_mask_get();
@@ -624,11 +624,11 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
depth_scale_factor = 1;
break;
case 2:
/* D24 unsigned int */
/* D24 uint */
depth_scale_factor = 0xFFFFFFu;
break;
case 4:
/* D32 unsigned int */
/* D32 uint */
depth_scale_factor = 0xFFFFFFFFu;
break;
default:

View File

@@ -149,6 +149,16 @@ void GLContext::deactivate()
is_active_ = false;
}
void GLContext::begin_frame()
{
/* No-op. */
}
void GLContext::end_frame()
{
/* No-op. */
}
/** \} */
/* -------------------------------------------------------------------- */

View File

@@ -106,6 +106,8 @@ class GLContext : public Context {
void activate() override;
void deactivate() override;
void begin_frame() override;
void end_frame() override;
void flush() override;
void finish() override;

View File

@@ -77,6 +77,11 @@ class GLFrameBuffer : public FrameBuffer {
eGPUDataFormat data_format,
const void *clear_value) override;
/* Attachment load-stores are currently no-op's in OpenGL. */
void attachment_set_loadstore_op(GPUAttachmentType type,
eGPULoadOp load_action,
eGPUStoreOp store_action) override{};
void read(eGPUFrameBufferBits planes,
eGPUDataFormat format,
const int area[4],

View File

@@ -1098,6 +1098,8 @@ static void wm_draw_window_onscreen(bContext *C, wmWindow *win, int view)
static void wm_draw_window(bContext *C, wmWindow *win)
{
GPU_context_begin_frame(win->gpuctx);
bScreen *screen = WM_window_get_active_screen(win);
bool stereo = WM_stereo3d_enabled(win, false);
@@ -1167,6 +1169,8 @@ static void wm_draw_window(bContext *C, wmWindow *win)
}
screen->do_draw = false;
GPU_context_end_frame(win->gpuctx);
}
/**
@@ -1177,8 +1181,12 @@ static void wm_draw_surface(bContext *C, wmSurface *surface)
wm_window_clear_drawable(CTX_wm_manager(C));
wm_surface_make_drawable(surface);
GPU_context_begin_frame(surface->gpu_ctx);
surface->draw(C);
GPU_context_end_frame(surface->gpu_ctx);
/* Avoid interference with window drawable */
wm_surface_clear_drawable();
}

View File

@@ -310,6 +310,7 @@ void WM_init(bContext *C, int argc, const char **argv)
IMB_thumb_clear_translations();
if (!G.background) {
GPU_render_begin();
#ifdef WITH_INPUT_NDOF
/* Sets 3D mouse dead-zone. */
@@ -322,7 +323,10 @@ void WM_init(bContext *C, int argc, const char **argv)
exit(-1);
}
GPU_context_begin_frame(GPU_context_active_get());
UI_init();
GPU_context_end_frame(GPU_context_active_get());
GPU_render_end();
}
BKE_subdiv_init();