Node: Gabor Noise Texture #110802

Open
Charlie Jolly wants to merge 68 commits from CharlieJolly/blender:gabor into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
40 changed files with 746 additions and 190 deletions
Showing only changes of commit a5ef4e832f - Show all commits

View File

@ -1891,6 +1891,7 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
C_WARN_NO_STRICT_PROTOTYPES -Wno-strict-prototypes
C_WARN_NO_BITWISE_INSTEAD_OF_LOGICAL -Wno-bitwise-instead-of-logical
C_WARN_NO_IMPLICIT_CONST_INT_FLOAT_CONVERSION -Wno-implicit-const-int-float-conversion
C_WARN_NO_SINGLE_BIT_BITFIELD_CONSTANT_CONVERSION -Wno-single-bit-bitfield-constant-conversion
)
add_check_cxx_compiler_flags(

View File

@ -4,6 +4,8 @@
#ifdef WITH_ONEAPI
/* <algorithm> is needed until included upstream in sycl/detail/property_list_base.hpp */
# include <algorithm>
# include <sycl/sycl.hpp>
# include "device/device.h"

View File

@ -30,6 +30,10 @@ ccl_device_noinline void svm_node_enter_bump_eval(KernelGlobals kg,
sd->P = P;
sd->dP = differential_make_compact(dP);
/* Save the full differential, the compact form isn't enough for svm_node_set_bump. */
stack_store_float3(stack, offset + 4, dP.dx);
stack_store_float3(stack, offset + 7, dP.dy);
}
}

View File

@ -15,6 +15,9 @@ ccl_device_noinline void svm_node_set_bump(KernelGlobals kg,
ccl_private float *stack,
uint4 node)
{
uint out_offset, bump_state_offset, dummy;
svm_unpack_node_uchar4(node.w, &out_offset, &bump_state_offset, &dummy, &dummy);
#ifdef __RAY_DIFFERENTIALS__
IF_KERNEL_NODES_FEATURE(BUMP)
{
@ -25,7 +28,16 @@ ccl_device_noinline void svm_node_set_bump(KernelGlobals kg,
float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) :
sd->N;
differential3 dP = differential_from_compact(sd->Ng, sd->dP);
/* If we have saved bump state, read the full differential from there.
* Just using the compact form in those cases leads to incorrect normals (see #111588). */
differential3 dP;
if (bump_state_offset == SVM_STACK_INVALID) {
dP = differential_from_compact(sd->Ng, sd->dP);
}
else {
dP.dx = stack_load_float3(stack, bump_state_offset + 4);
dP.dy = stack_load_float3(stack, bump_state_offset + 7);
}
if (use_object_space) {
object_inverse_normal_transform(kg, sd, &normal_in);
@ -72,10 +84,10 @@ ccl_device_noinline void svm_node_set_bump(KernelGlobals kg,
object_normal_transform(kg, sd, &normal_out);
}
stack_store_float3(stack, node.w, normal_out);
stack_store_float3(stack, out_offset, normal_out);
}
else {
stack_store_float3(stack, node.w, zero_float3());
stack_store_float3(stack, out_offset, zero_float3());
}
#endif
}

View File

@ -13,7 +13,7 @@ CCL_NAMESPACE_BEGIN
/* SVM stack offsets with this value indicate that it's not on the stack */
#define SVM_STACK_INVALID 255
#define SVM_BUMP_EVAL_STATE_SIZE 4
#define SVM_BUMP_EVAL_STATE_SIZE 10
/* Nodes */

View File

@ -6899,16 +6899,17 @@ void BumpNode::compile(SVMCompiler &compiler)
ShaderOutput *normal_out = output("Normal");
/* pack all parameters in the node */
compiler.add_node(NODE_SET_BUMP,
compiler.encode_uchar4(compiler.stack_assign_if_linked(normal_in),
compiler.stack_assign(distance_in),
invert,
use_object_space),
compiler.encode_uchar4(compiler.stack_assign(center_in),
compiler.stack_assign(dx_in),
compiler.stack_assign(dy_in),
compiler.stack_assign(strength_in)),
compiler.stack_assign(normal_out));
compiler.add_node(
NODE_SET_BUMP,
compiler.encode_uchar4(compiler.stack_assign_if_linked(normal_in),
compiler.stack_assign(distance_in),
invert,
use_object_space),
compiler.encode_uchar4(compiler.stack_assign(center_in),
compiler.stack_assign(dx_in),
compiler.stack_assign(dy_in),
compiler.stack_assign(strength_in)),
compiler.encode_uchar4(compiler.stack_assign(normal_out), compiler.get_bump_state_offset()));
}
void BumpNode::compile(OSLCompiler &compiler)

View File

@ -163,6 +163,7 @@ SVMCompiler::SVMCompiler(Scene *scene) : scene(scene)
current_graph = NULL;
background = false;
mix_weight_offset = SVM_STACK_INVALID;
bump_state_offset = SVM_STACK_INVALID;
compile_failed = false;
/* This struct has one entry for every node, in order of ShaderNodeType definition. */
@ -784,9 +785,8 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
}
/* for the bump shader we need add a node to store the shader state */
bool need_bump_state = (type == SHADER_TYPE_BUMP) &&
(shader->get_displacement_method() == DISPLACE_BOTH);
int bump_state_offset = SVM_STACK_INVALID;
const bool need_bump_state = (type == SHADER_TYPE_BUMP) &&
(shader->get_displacement_method() == DISPLACE_BOTH);
if (need_bump_state) {
bump_state_offset = stack_find_offset(SVM_BUMP_EVAL_STATE_SIZE);
add_node(NODE_ENTER_BUMP_EVAL, bump_state_offset);
@ -846,6 +846,7 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
/* add node to restore state after bump shader has finished */
if (need_bump_state) {
add_node(NODE_LEAVE_BUMP_EVAL, bump_state_offset);
bump_state_offset = SVM_STACK_INVALID;
}
/* if compile failed, generate empty shader */

View File

@ -106,6 +106,10 @@ class SVMCompiler {
{
return mix_weight_offset;
}
uint get_bump_state_offset()
{
return bump_state_offset;
}
ShaderType output_type()
{
@ -222,6 +226,7 @@ class SVMCompiler {
Stack active_stack;
int max_stack_use;
uint mix_weight_offset;
uint bump_state_offset;
bool compile_failed;
};

View File

@ -276,10 +276,10 @@ class NLA_OT_bake(Operator):
do_constraint_clear=self.clear_constraints,
do_parents_clear=self.clear_parents,
do_clean=self.clean_curves,
do_location = 'LOCATION' in self.channel_types,
do_rotation = 'ROTATION' in self.channel_types,
do_scale = 'SCALE' in self.channel_types,
do_bbone = 'BBONE' in self.channel_types,
do_location='LOCATION' in self.channel_types,
do_rotation='ROTATION' in self.channel_types,
do_scale='SCALE' in self.channel_types,
do_bbone='BBONE' in self.channel_types,
)
if bake_options.do_pose and self.only_selected:

View File

@ -37,6 +37,26 @@ void FramebufferBind::execute() const
GPU_framebuffer_bind(*framebuffer);
}
void SubPassTransition::execute() const
{
/* TODO(fclem): Require framebuffer bind to always be part of the pass so that we can track it
* inside RecordingState. */
GPUFrameBuffer *framebuffer = GPU_framebuffer_active_get();
/* Unpack to the real enum type. */
const GPUAttachmentState states[9] = {
GPUAttachmentState(depth_state),
GPUAttachmentState(color_states[0]),
GPUAttachmentState(color_states[1]),
GPUAttachmentState(color_states[2]),
GPUAttachmentState(color_states[3]),
GPUAttachmentState(color_states[4]),
GPUAttachmentState(color_states[5]),
GPUAttachmentState(color_states[6]),
GPUAttachmentState(color_states[7]),
};
GPU_framebuffer_subpass_transition_array(framebuffer, states, ARRAY_SIZE(states));
}
void ResourceBind::execute() const
{
if (slot == -1) {
@ -258,6 +278,26 @@ std::string FramebufferBind::serialize() const
(*framebuffer == nullptr ? "nullptr" : GPU_framebuffer_get_name(*framebuffer)) + ")";
}
std::string SubPassTransition::serialize() const
{
auto to_str = [](GPUAttachmentState state) {
return (state != GPU_ATTACHEMENT_IGNORE) ?
((state == GPU_ATTACHEMENT_WRITE) ? "write" : "read") :
"ignore";
};
return std::string(".subpass_transition(\n") +
"depth=" + to_str(GPUAttachmentState(depth_state)) + ",\n" +
"color0=" + to_str(GPUAttachmentState(color_states[0])) + ",\n" +
"color1=" + to_str(GPUAttachmentState(color_states[1])) + ",\n" +
"color2=" + to_str(GPUAttachmentState(color_states[2])) + ",\n" +
"color3=" + to_str(GPUAttachmentState(color_states[3])) + ",\n" +
"color4=" + to_str(GPUAttachmentState(color_states[4])) + ",\n" +
"color5=" + to_str(GPUAttachmentState(color_states[5])) + ",\n" +
"color6=" + to_str(GPUAttachmentState(color_states[6])) + ",\n" +
"color7=" + to_str(GPUAttachmentState(color_states[7])) + "\n)";
}
std::string ResourceBind::serialize() const
{
switch (type) {

View File

@ -100,6 +100,7 @@ enum class Type : uint8_t {
PushConstant,
ResourceBind,
ShaderBind,
SubPassTransition,
StateSet,
StencilSet,
@ -134,6 +135,16 @@ struct FramebufferBind {
std::string serialize() const;
};
struct SubPassTransition {
/** \note uint8_t storing `GPUAttachmentState` for compactness. */
uint8_t depth_state;
/** \note 8 is GPU_FB_MAX_COLOR_ATTACHMENT. */
uint8_t color_states[8];
void execute() const;
std::string serialize() const;
};
struct ResourceBind {
GPUSamplerState sampler;
int slot;
@ -385,6 +396,7 @@ union Undetermined {
ShaderBind shader_bind;
ResourceBind resource_bind;
FramebufferBind framebuffer_bind;
SubPassTransition subpass_transition;
PushConstant push_constant;
Draw draw;
DrawMulti draw_multi;

View File

@ -214,6 +214,14 @@ class PassBase {
*/
void framebuffer_set(GPUFrameBuffer **framebuffer);
/**
* Start a new sub-pass and change framebuffer attachments status.
* \note Affect the currently bound framebuffer at the time of submission and execution.
* \note States are copied and stored in the command.
*/
void subpass_transition(GPUAttachmentState depth_attachment,
Span<GPUAttachmentState> color_attachments);
/**
* Bind a material shader along with its associated resources. Any following bind() or
* push_constant() call will use its interface.
@ -550,6 +558,9 @@ template<class T> void PassBase<T>::submit(command::RecordingState &state) const
case command::Type::FramebufferBind:
commands_[header.index].framebuffer_bind.execute();
break;
case command::Type::SubPassTransition:
commands_[header.index].subpass_transition.execute();
break;
case command::Type::ShaderBind:
commands_[header.index].shader_bind.execute(state);
break;
@ -611,6 +622,9 @@ template<class T> std::string PassBase<T>::serialize(std::string line_prefix) co
case Type::FramebufferBind:
ss << line_prefix << commands_[header.index].framebuffer_bind.serialize() << std::endl;
break;
case Type::SubPassTransition:
ss << line_prefix << commands_[header.index].subpass_transition.serialize() << std::endl;
break;
case Type::ShaderBind:
ss << line_prefix << commands_[header.index].shader_bind.serialize() << std::endl;
break;
@ -825,6 +839,25 @@ template<class T> inline void PassBase<T>::framebuffer_set(GPUFrameBuffer **fram
create_command(Type::FramebufferBind).framebuffer_bind = {framebuffer};
}
template<class T>
inline void PassBase<T>::subpass_transition(GPUAttachmentState depth_attachment,
Span<GPUAttachmentState> color_attachments)
{
uint8_t color_states[8] = {GPU_ATTACHEMENT_IGNORE};
for (auto i : color_attachments.index_range()) {
color_states[i] = uint8_t(color_attachments[i]);
}
create_command(Type::SubPassTransition).subpass_transition = {uint8_t(depth_attachment),
color_states[0],
color_states[1],
color_states[2],
color_states[3],
color_states[4],
color_states[5],
color_states[6],
color_states[7]};
}
template<class T> inline void PassBase<T>::material_set(Manager &manager, GPUMaterial *material)
{
GPUPass *gpupass = GPU_material_get_pass(material);

View File

@ -29,6 +29,7 @@ static void test_draw_pass_all_commands()
/* Won't be dereferenced. */
GPUVertBuf *vbo = (GPUVertBuf *)1;
GPUIndexBuf *ibo = (GPUIndexBuf *)1;
GPUFrameBuffer *fb = nullptr;
float4 color(1.0f, 1.0f, 1.0f, 0.0f);
int3 dispatch_size(1);
@ -42,6 +43,8 @@ static void test_draw_pass_all_commands()
const int color_location = GPU_shader_get_uniform(sh, "color");
const int mvp_location = GPU_shader_get_uniform(sh, "ModelViewProjectionMatrix");
pass.shader_set(sh);
pass.framebuffer_set(&fb);
pass.subpass_transition(GPU_ATTACHEMENT_IGNORE, {GPU_ATTACHEMENT_WRITE, GPU_ATTACHEMENT_READ});
pass.bind_texture("image", tex);
pass.bind_texture("image", &tex);
pass.bind_image("missing_image", tex); /* Should not crash. */
@ -80,6 +83,18 @@ static void test_draw_pass_all_commands()
<< " .stencil_set(write_mask=0b10000000, reference=0b00001111, compare_mask=0b10001111)"
<< std::endl;
expected << " .shader_bind(gpu_shader_3D_image_color)" << std::endl;
expected << " .framebuffer_bind(nullptr)" << std::endl;
expected << " .subpass_transition(" << std::endl;
expected << "depth=ignore," << std::endl;
expected << "color0=write," << std::endl;
expected << "color1=read," << std::endl;
expected << "color2=ignore," << std::endl;
expected << "color3=ignore," << std::endl;
expected << "color4=ignore," << std::endl;
expected << "color5=ignore," << std::endl;
expected << "color6=ignore," << std::endl;
expected << "color7=ignore" << std::endl;
expected << ")" << std::endl;
expected << " .bind_texture(0, sampler=internal)" << std::endl;
expected << " .bind_texture_ref(0, sampler=internal)" << std::endl;
expected << " .bind_image(-1)" << std::endl;

View File

@ -273,16 +273,36 @@ bool ED_curve_select_swap(EditNurb *editnurb, bool hide_handles)
int a;
bool changed = false;
/* This could be an argument to swap individual handle selection.
* At the moment this is always used though. */
bool swap_handles = false;
/* When hiding handles, ignore handle selection. */
if (hide_handles) {
swap_handles = true;
}
LISTBASE_FOREACH (Nurb *, nu, &editnurb->nurbs) {
if (nu->type == CU_BEZIER) {
bezt = nu->bezt;
a = nu->pntsu;
while (a--) {
if (bezt->hide == 0) {
bezt->f2 ^= SELECT; /* always do the center point */
if (!hide_handles) {
bezt->f1 ^= SELECT;
bezt->f3 ^= SELECT;
if (swap_handles) {
bezt->f2 ^= SELECT; /* always do the center point */
if (!hide_handles) {
bezt->f1 ^= SELECT;
bezt->f3 ^= SELECT;
}
}
else {
BLI_assert(!hide_handles);
if (BEZT_ISSEL_ANY(bezt)) {
BEZT_DESEL_ALL(bezt);
}
else {
BEZT_SEL_ALL(bezt);
}
}
changed = true;
}

View File

@ -2691,7 +2691,12 @@ void ED_gpencil_select_curve_toggle_all(bContext *C, int action)
break;
case SEL_INVERT:
gpc_pt->flag ^= GP_CURVE_POINT_SELECT;
BEZT_SEL_INVERT(bezt);
if (gpc_pt->flag & GP_CURVE_POINT_SELECT) {
BEZT_SEL_ALL(bezt);
}
else {
BEZT_DESEL_ALL(bezt);
}
break;
default:
break;

View File

@ -3803,7 +3803,8 @@ static void ui_do_but_textedit(
but->pos = short(selend);
but->selsta = short(selsta);
but->selend = short(selend);
data->sel_pos_init = selsta;
/* Anchor selection to the left side unless the last word. */
data->sel_pos_init = ((selend == strlen(data->str)) && (selsta != 0)) ? selend : selsta;
retval = WM_UI_HANDLER_BREAK;
changed = true;
}

View File

@ -12,13 +12,63 @@
extern "C" {
#endif
/**
* Describes the load operation of a framebuffer attachment at the start of a render pass.
*/
typedef enum eGPULoadOp {
/**
* Clear the framebuffer attachment using the clear value.
*/
GPU_LOADACTION_CLEAR = 0,
/**
* Load the value from the attached texture.
* Cannot be used with memoryless attachments.
* Slower than `GPU_LOADACTION_CLEAR` or `GPU_LOADACTION_DONT_CARE`.
*/
GPU_LOADACTION_LOAD,
GPU_LOADACTION_DONT_CARE
/**
* Do not care about the content of the attachment when the render pass starts.
* Useful if only the values being written are important.
* Faster than `GPU_LOADACTION_CLEAR`.
*/
GPU_LOADACTION_DONT_CARE,
} eGPULoadOp;
typedef enum eGPUStoreOp { GPU_STOREACTION_STORE = 0, GPU_STOREACTION_DONT_CARE } eGPUStoreOp;
/**
* Describes the store operation of a framebuffer attachment at the end of a render pass.
*/
typedef enum eGPUStoreOp {
/**
* Do not care about the content of the attachment when the render pass ends.
* Useful if only the values being written are important.
* Cannot be used with memoryless attachments.
*/
GPU_STOREACTION_STORE = 0,
/**
* The result of the rendering for this attachment will be discarded.
* No writes to the texture memory will be done which makes it faster than
* `GPU_STOREACTION_STORE`.
* IMPORTANT: The actual values of the attachment is to be considered undefined.
* Only to be used on transient attachment that are only used within the boundaries of
* a render pass (ex.: Uneeded depth buffer result).
*/
GPU_STOREACTION_DONT_CARE,
} eGPUStoreOp;
/**
* Describes the state of a framebuffer attachment during a sub-pass.
*
* NOTE: Until this is correctly implemented in all backend, reading and writing from the
* same attachment will not work. Although there is no case where it would currently be useful.
*/
typedef enum GPUAttachmentState {
/** Attachment will not be written during rendering. */
GPU_ATTACHEMENT_IGNORE = 0,
/** Attachment will be written during render sub-pass. This also works with blending. */
GPU_ATTACHEMENT_WRITE,
/** Attachment is used as input in the fragment shader. Incompatible with depth on Metal. */
GPU_ATTACHEMENT_READ,
} GPUAttachmentState;
typedef enum eGPUFrontFace {
GPU_CLOCKWISE,

View File

@ -178,6 +178,35 @@ void GPU_framebuffer_bind_loadstore(GPUFrameBuffer *framebuffer,
GPU_framebuffer_bind_loadstore(_fb, actions, (sizeof(actions) / sizeof(GPULoadStore))); \
}
/**
* Sub-pass config array matches attachment structure of `GPU_framebuffer_config_array`.
* This allows to explicitly specify attachment state within the next sub-pass.
* This enables a number of bandwidth optimizations specially on Tile Based Deferred Renderers
* where the attachments can be kept into tile memory and used in place for later sub-passes.
*
* Example:
* \code{.c}
* GPU_framebuffer_bind_loadstore(&fb, {
* GPU_ATTACHEMENT_WRITE, // must be depth buffer
* GPU_ATTACHEMENT_READ, // Color attachment 0
* GPU_ATTACHEMENT_IGNORE, // Color attachment 1
* GPU_ATTACHEMENT_WRITE} // Color attachment 2
* })
* \endcode
*
* \note Excess attachments will have no effect as long as they are GPU_ATTACHEMENT_IGNORE.
*/
void GPU_framebuffer_subpass_transition_array(GPUFrameBuffer *framebuffer,
const GPUAttachmentState *attachment_states,
uint attachment_len);
#define GPU_framebuffer_subpass_transition(_fb, ...) \
{ \
GPUAttachmentState actions[] = __VA_ARGS__; \
GPU_framebuffer_subpass_transition_array( \
_fb, actions, (sizeof(actions) / sizeof(GPUAttachmentState))); \
}
/** \} */
/* -------------------------------------------------------------------- */

View File

@ -35,6 +35,9 @@ class DummyFrameBuffer : public FrameBuffer {
void attachment_set_loadstore_op(GPUAttachmentType /*type*/, GPULoadStore /*ls*/) override {}
void subpass_transition(const GPUAttachmentState /*depth_attachment_state*/,
Span<GPUAttachmentState> /*color_attachment_states*/) override{};
void read(eGPUFrameBufferBits /*planes*/,
eGPUDataFormat /*format*/,
const int /*area*/[4],

View File

@ -265,6 +265,14 @@ void GPU_framebuffer_bind_loadstore(GPUFrameBuffer *gpu_fb,
fb->load_store_config_array(load_store_actions, actions_len);
}
void GPU_framebuffer_subpass_transition_array(GPUFrameBuffer *gpu_fb,
const GPUAttachmentState *attachment_states,
uint attachment_len)
{
unwrap(gpu_fb)->subpass_transition(
attachment_states[0], Span<GPUAttachmentState>(attachment_states + 1, attachment_len - 1));
}
void GPU_framebuffer_bind_no_srgb(GPUFrameBuffer *gpu_fb)
{
const bool enable_srgb = false;

View File

@ -127,6 +127,9 @@ class FrameBuffer {
int dst_offset_x,
int dst_offset_y) = 0;
virtual void subpass_transition(const GPUAttachmentState depth_attachment_state,
Span<GPUAttachmentState> color_attachment_states) = 0;
void load_store_config_array(const GPULoadStore *load_store_actions, uint actions_len);
void attachment_set(GPUAttachmentType type, const GPUAttachment &new_attachment);

View File

@ -153,6 +153,9 @@ class MTLFrameBuffer : public FrameBuffer {
int dst_offset_x,
int dst_offset_y) override;
void subpass_transition(const GPUAttachmentState /*depth_attachment_state*/,
Span<GPUAttachmentState> /*color_attachment_states*/) override{};
void apply_state();
/* State. */

View File

@ -46,8 +46,6 @@ void GLBackend::platform_init()
#ifdef _WIN32
os = GPU_OS_WIN;
#elif defined(__APPLE__)
os = GPU_OS_MAC;
#else
os = GPU_OS_UNIX;
#endif
@ -271,6 +269,8 @@ static void detect_workarounds()
/* Turn off vendor specific extensions. */
GLContext::native_barycentric_support = false;
GLContext::framebuffer_fetch_support = false;
GLContext::texture_barrier_support = false;
/* Do not alter OpenGL 4.3 features.
* These code paths should be removed. */
@ -361,17 +361,6 @@ static void detect_workarounds()
GCaps.use_hq_normals_workaround = true;
}
}
/* There is an issue with the #glBlitFramebuffer on MacOS with radeon pro graphics.
* Blitting depth with#GL_DEPTH24_STENCIL8 is buggy so the workaround is to use
* #GPU_DEPTH32F_STENCIL8. Then Blitting depth will work but blitting stencil will
* still be broken. */
if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_MAC, GPU_DRIVER_OFFICIAL)) {
if (strstr(renderer, "AMD Radeon Pro") || strstr(renderer, "AMD Radeon R9") ||
strstr(renderer, "AMD Radeon RX"))
{
GCaps.depth_blitting_workaround = true;
}
}
/* Limit this fix to older hardware with GL < 4.5. This means Broadwell GPUs are
* covered since they only support GL 4.4 on windows.
* This fixes some issues with workbench anti-aliasing on Win + Intel GPU. (see #76273) */
@ -446,11 +435,6 @@ static void detect_workarounds()
}
}
/* Disable TF on macOS. */
if (GPU_type_matches(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY)) {
GCaps.transform_feedback_support = false;
}
/* Some Intel drivers have issues with using mips as frame-buffer targets if
* GL_TEXTURE_MAX_LEVEL is higher than the target MIP.
* Only check at the end after all other workarounds because this uses the drawing code.
@ -467,21 +451,6 @@ static void detect_workarounds()
GLContext::debug_layer_workaround = true;
}
/* Broken glGenerateMipmap on macOS 10.15.7 security update. */
if (GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_MAC, GPU_DRIVER_ANY) &&
strstr(renderer, "HD Graphics 4000"))
{
GLContext::generate_mipmap_workaround = true;
}
/* Certain Intel/AMD based platforms don't clear the viewport textures. Always clearing leads to
* noticeable performance regressions on other platforms as well. */
if (GPU_type_matches(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY) ||
GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_ANY, GPU_DRIVER_ANY))
{
GCaps.clear_viewport_workaround = true;
}
/* There is an issue in AMD official driver where we cannot use multi bind when using images. AMD
* is aware of the issue, but hasn't released a fix. */
if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_ANY, GPU_DRIVER_OFFICIAL)) {
@ -510,6 +479,7 @@ bool GLContext::copy_image_support = false;
bool GLContext::debug_layer_support = false;
bool GLContext::direct_state_access_support = false;
bool GLContext::explicit_location_support = false;
bool GLContext::framebuffer_fetch_support = false;
bool GLContext::geometry_shader_invocations = false;
bool GLContext::fixed_restart_index_support = false;
bool GLContext::layered_rendering_support = false;
@ -519,6 +489,7 @@ bool GLContext::multi_bind_image_support = false;
bool GLContext::multi_draw_indirect_support = false;
bool GLContext::shader_draw_parameters_support = false;
bool GLContext::stencil_texturing_support = false;
bool GLContext::texture_barrier_support = false;
bool GLContext::texture_cube_map_array_support = false;
bool GLContext::texture_filter_anisotropic_support = false;
bool GLContext::texture_gather_support = false;
@ -547,14 +518,7 @@ void GLBackend::capabilities_init()
glGetIntegerv(GL_MAX_ELEMENTS_INDICES, &GCaps.max_batch_indices);
glGetIntegerv(GL_MAX_ELEMENTS_VERTICES, &GCaps.max_batch_vertices);
glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &GCaps.max_vertex_attribs);
if (GPU_type_matches(GPU_DEVICE_APPLE, GPU_OS_MAC, GPU_DRIVER_OFFICIAL)) {
/* Due to a bug, querying GL_MAX_VARYING_FLOATS is emitting GL_INVALID_ENUM.
* Force use minimum required value. */
GCaps.max_varying_floats = 32;
}
else {
glGetIntegerv(GL_MAX_VARYING_FLOATS, &GCaps.max_varying_floats);
}
glGetIntegerv(GL_MAX_VARYING_FLOATS, &GCaps.max_varying_floats);
glGetIntegerv(GL_NUM_EXTENSIONS, &GCaps.extensions_len);
GCaps.extension_get = gl_extension_get;
@ -609,6 +573,8 @@ void GLBackend::capabilities_init()
GLContext::explicit_location_support = epoxy_gl_version() >= 43;
GLContext::geometry_shader_invocations = epoxy_has_gl_extension("GL_ARB_gpu_shader5");
GLContext::fixed_restart_index_support = epoxy_has_gl_extension("GL_ARB_ES3_compatibility");
GLContext::framebuffer_fetch_support = epoxy_has_gl_extension("GL_EXT_shader_framebuffer_fetch");
GLContext::texture_barrier_support = epoxy_has_gl_extension("GL_ARB_texture_barrier");
GLContext::layered_rendering_support = epoxy_has_gl_extension(
"GL_ARB_shader_viewport_layer_array");
GLContext::native_barycentric_support = epoxy_has_gl_extension(
@ -628,6 +594,9 @@ void GLBackend::capabilities_init()
GLContext::vertex_attrib_binding_support = epoxy_has_gl_extension(
"GL_ARB_vertex_attrib_binding");
/* Disabled until it is proven to work. */
GLContext::framebuffer_fetch_support = false;
detect_workarounds();
/* Disable this feature entirely when not debugging. */

View File

@ -219,15 +219,6 @@ GLuint GLVaoCache::base_instance_vao_get(GPUBatch *batch, int i_first)
/* Trigger update. */
base_instance_ = 0;
}
/**
* There seems to be a nasty bug when drawing using the same VAO reconfiguring (#71147).
* We just use a throwaway VAO for that. Note that this is likely to degrade performance.
*/
#ifdef __APPLE__
glDeleteVertexArrays(1, &vao_base_instance_);
vao_base_instance_ = 0;
base_instance_ = 0;
#endif
if (vao_base_instance_ == 0) {
glGenVertexArrays(1, &vao_base_instance_);
@ -318,18 +309,12 @@ void GLBatch::draw(int v_first, int v_count, int i_first, int i_count)
}
}
else {
#ifdef __APPLE__
glDisable(GL_PRIMITIVE_RESTART);
#endif
if (GLContext::base_instance_support) {
glDrawArraysInstancedBaseInstance(gl_type, v_first, v_count, i_count, i_first);
}
else {
glDrawArraysInstanced(gl_type, v_first, v_count, i_count);
}
#ifdef __APPLE__
glEnable(GL_PRIMITIVE_RESTART);
#endif
}
}

View File

@ -304,7 +304,6 @@ void GLContext::vao_cache_unregister(GLVaoCache *cache)
void GLContext::memory_statistics_get(int *r_total_mem, int *r_free_mem)
{
/* TODO(merwin): use Apple's platform API to get this info. */
if (epoxy_has_gl_extension("GL_NVX_gpu_memory_info")) {
/* Returned value in Kb. */
glGetIntegerv(GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX, r_total_mem);

View File

@ -56,6 +56,7 @@ class GLContext : public Context {
static bool explicit_location_support;
static bool geometry_shader_invocations;
static bool fixed_restart_index_support;
static bool framebuffer_fetch_support;
static bool layered_rendering_support;
static bool native_barycentric_support;
static bool multi_bind_support;
@ -63,6 +64,7 @@ class GLContext : public Context {
static bool multi_draw_indirect_support;
static bool shader_draw_parameters_support;
static bool stencil_texturing_support;
static bool texture_barrier_support;
static bool texture_cube_map_array_support;
static bool texture_filter_anisotropic_support;
static bool texture_gather_support;

View File

@ -226,6 +226,89 @@ void GLFrameBuffer::update_attachments()
}
}
void GLFrameBuffer::subpass_transition(const GPUAttachmentState depth_attachment_state,
Span<GPUAttachmentState> color_attachment_states)
{
/* NOTE: Depth is not supported as input attachment because the Metal API doesn't support it and
* because depth is not compatible with the framebuffer fetch implementation. */
BLI_assert(depth_attachment_state != GPU_ATTACHEMENT_READ);
GPU_depth_mask(depth_attachment_state == GPU_ATTACHEMENT_WRITE);
bool any_read = false;
for (auto attachment : color_attachment_states.index_range()) {
if (attachment == GPU_ATTACHEMENT_READ) {
any_read = true;
break;
}
}
if (GLContext::framebuffer_fetch_support) {
if (any_read) {
glFramebufferFetchBarrierEXT();
}
}
else if (GLContext::texture_barrier_support) {
if (any_read) {
glTextureBarrier();
}
GLenum attachments[GPU_FB_MAX_COLOR_ATTACHMENT] = {GL_NONE};
for (int i : color_attachment_states.index_range()) {
GPUAttachmentType type = GPU_FB_COLOR_ATTACHMENT0 + i;
GPUTexture *attach_tex = this->attachments_[type].tex;
if (color_attachment_states[i] == GPU_ATTACHEMENT_READ) {
tmp_detached_[type] = this->attachments_[type]; /* Bypass feedback loop check. */
GPU_texture_bind_ex(attach_tex, GPUSamplerState::default_sampler(), i);
}
else {
tmp_detached_[type] = GPU_ATTACHMENT_NONE;
}
bool attach_write = color_attachment_states[i] == GPU_ATTACHEMENT_WRITE;
attachments[i] = (attach_tex && attach_write) ? to_gl(type) : GL_NONE;
}
/* We have to use `glDrawBuffers` instead of `glColorMaski` because the later is overwritten
* by the `GLStateManager`. */
/* WATCH(fclem): This modifies the frame-buffer state without setting `dirty_attachments_`. */
glDrawBuffers(ARRAY_SIZE(attachments), attachments);
}
else {
/* The only way to have correct visibility without extensions and ensure defined behavior, is
* to unbind the textures and update the frame-buffer. This is a slow operation but that's all
* we can do to emulate the sub-pass input. */
/* TODO(fclem): Could avoid the framebuffer reconfiguration by creating multiple framebuffers
* internally. */
for (int i : color_attachment_states.index_range()) {
GPUAttachmentType type = GPU_FB_COLOR_ATTACHMENT0 + i;
if (color_attachment_states[i] == GPU_ATTACHEMENT_WRITE) {
if (tmp_detached_[type].tex != nullptr) {
/* Re-attach previous read attachments. */
this->attachment_set(type, tmp_detached_[type]);
tmp_detached_[type] = GPU_ATTACHMENT_NONE;
}
}
else if (color_attachment_states[i] == GPU_ATTACHEMENT_READ) {
tmp_detached_[type] = this->attachments_[type];
unwrap(tmp_detached_[type].tex)->detach_from(this);
GPU_texture_bind_ex(tmp_detached_[type].tex, GPUSamplerState::default_sampler(), i);
}
}
if (dirty_attachments_) {
this->update_attachments();
}
}
}
void GLFrameBuffer::attachment_set_loadstore_op(GPUAttachmentType type, GPULoadStore ls)
{
BLI_assert(context_->active_fb == this);
/* TODO(fclem): Add support for other ops. */
if (ls.load_action == eGPULoadOp::GPU_LOADACTION_CLEAR) {
clear_attachment(type, GPU_DATA_FLOAT, ls.clear_value);
}
}
void GLFrameBuffer::apply_state()
{
if (dirty_state_ == false) {
@ -364,6 +447,8 @@ void GLFrameBuffer::clear_attachment(GPUAttachmentType type,
/* Save and restore the state. */
eGPUWriteMask write_mask = GPU_write_mask_get();
GPU_color_mask(true, true, true, true);
bool depth_mask = GPU_depth_mask_get();
GPU_depth_mask(true);
context_->state_manager->apply_state();
@ -404,6 +489,7 @@ void GLFrameBuffer::clear_attachment(GPUAttachmentType type,
}
GPU_write_mask(write_mask);
GPU_depth_mask(depth_mask);
}
void GLFrameBuffer::clear_multi(const float (*clear_cols)[4])

View File

@ -34,6 +34,8 @@ class GLFrameBuffer : public FrameBuffer {
GLStateManager *state_manager_ = nullptr;
/** Copy of the GL state. Contains ONLY color attachments enums for slot binding. */
GLenum gl_attachments_[GPU_FB_MAX_COLOR_ATTACHMENT] = {0};
/** List of attachment that are associated with this frame-buffer but temporarily detached. */
GPUAttachment tmp_detached_[GPU_FB_MAX_ATTACHMENT];
/** Internal frame-buffers are immutable. */
bool immutable_ = false;
/** True is the frame-buffer has its first color target using the GPU_SRGB8_A8 format. */
@ -77,7 +79,10 @@ class GLFrameBuffer : public FrameBuffer {
const void *clear_value) override;
/* Attachment load-stores are currently no-op's in OpenGL. */
void attachment_set_loadstore_op(GPUAttachmentType /*type*/, GPULoadStore /*ls*/) override{};
void attachment_set_loadstore_op(GPUAttachmentType type, GPULoadStore ls) override;
void subpass_transition(const GPUAttachmentState depth_attachment_state,
Span<GPUAttachmentState> color_attachment_states) override;
void read(eGPUFrameBufferBits planes,
eGPUDataFormat format,

View File

@ -147,13 +147,8 @@ void GLImmediate::end()
/* Update matrices. */
GPU_shader_bind(shader);
#ifdef __APPLE__
glDisable(GL_PRIMITIVE_RESTART);
#endif
glDrawArrays(to_gl(prim_type), 0, vertex_len);
#ifdef __APPLE__
glEnable(GL_PRIMITIVE_RESTART);
#endif
/* These lines are causing crash on startup on some old GPU + drivers.
* They are not required so just comment them. (#55722) */
// glBindBuffer(GL_ARRAY_BUFFER, 0);

View File

@ -110,9 +110,139 @@ static const char *to_string(const Type &type)
return "ivec4";
case Type::BOOL:
return "bool";
default:
return "unknown";
/* Alias special types. */
case Type::UCHAR:
case Type::USHORT:
return "uint";
case Type::UCHAR2:
case Type::USHORT2:
return "uvec2";
case Type::UCHAR3:
case Type::USHORT3:
return "uvec3";
case Type::UCHAR4:
case Type::USHORT4:
return "uvec4";
case Type::CHAR:
case Type::SHORT:
return "int";
case Type::CHAR2:
case Type::SHORT2:
return "ivec2";
case Type::CHAR3:
case Type::SHORT3:
return "ivec3";
case Type::CHAR4:
case Type::SHORT4:
return "ivec4";
case Type::VEC3_101010I2:
return "vec3";
}
BLI_assert_unreachable();
return "unknown";
}
static const int to_component_count(const Type &type)
{
switch (type) {
case Type::FLOAT:
case Type::UINT:
case Type::INT:
case Type::BOOL:
return 1;
case Type::VEC2:
case Type::UVEC2:
case Type::IVEC2:
return 2;
case Type::VEC3:
case Type::UVEC3:
case Type::IVEC3:
return 3;
case Type::VEC4:
case Type::UVEC4:
case Type::IVEC4:
return 4;
case Type::MAT3:
return 9;
case Type::MAT4:
return 16;
/* Alias special types. */
case Type::UCHAR:
case Type::USHORT:
return 1;
case Type::UCHAR2:
case Type::USHORT2:
return 2;
case Type::UCHAR3:
case Type::USHORT3:
return 3;
case Type::UCHAR4:
case Type::USHORT4:
return 4;
case Type::CHAR:
case Type::SHORT:
return 1;
case Type::CHAR2:
case Type::SHORT2:
return 2;
case Type::CHAR3:
case Type::SHORT3:
return 3;
case Type::CHAR4:
case Type::SHORT4:
return 4;
case Type::VEC3_101010I2:
return 3;
}
BLI_assert_unreachable();
return -1;
}
static const Type to_component_type(const Type &type)
{
switch (type) {
case Type::FLOAT:
case Type::VEC2:
case Type::VEC3:
case Type::VEC4:
case Type::MAT3:
case Type::MAT4:
return Type::FLOAT;
case Type::UINT:
case Type::UVEC2:
case Type::UVEC3:
case Type::UVEC4:
return Type::UINT;
case Type::INT:
case Type::IVEC2:
case Type::IVEC3:
case Type::IVEC4:
case Type::BOOL:
return Type::INT;
/* Alias special types. */
case Type::UCHAR:
case Type::UCHAR2:
case Type::UCHAR3:
case Type::UCHAR4:
case Type::USHORT:
case Type::USHORT2:
case Type::USHORT3:
case Type::USHORT4:
return Type::UINT;
case Type::CHAR:
case Type::CHAR2:
case Type::CHAR3:
case Type::CHAR4:
case Type::SHORT:
case Type::SHORT2:
case Type::SHORT3:
case Type::SHORT4:
return Type::INT;
case Type::VEC3_101010I2:
return Type::FLOAT;
}
BLI_assert_unreachable();
return Type::FLOAT;
}
static const char *to_string(const eGPUTextureFormat &type)
@ -540,12 +670,6 @@ std::string GLShader::vertex_interface_declare(const ShaderCreateInfo &info) con
}
ss << "in " << to_string(attr.type) << " " << attr.name << ";\n";
}
/* NOTE(D4490): Fix a bug where shader without any vertex attributes do not behave correctly. */
if (GPU_type_matches_ex(GPU_DEVICE_APPLE, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL) &&
info.vertex_inputs_.is_empty())
{
ss << "in float gpu_dummy_workaround;\n";
}
ss << "\n/* Interfaces. */\n";
for (const StageInterfaceInfo *iface : info.vertex_out_interfaces_) {
print_interface(ss, "out", *iface);
@ -581,7 +705,7 @@ std::string GLShader::vertex_interface_declare(const ShaderCreateInfo &info) con
std::string GLShader::fragment_interface_declare(const ShaderCreateInfo &info) const
{
std::stringstream ss;
std::string pre_main;
std::string pre_main, post_main;
ss << "\n/* Interfaces. */\n";
const Vector<StageInterfaceInfo *> &in_interfaces = info.geometry_source_.is_empty() ?
@ -605,7 +729,6 @@ std::string GLShader::fragment_interface_declare(const ShaderCreateInfo &info) c
ss << "#define gpu_position_at_vertex(v) gpu_pos[v]\n";
}
else if (epoxy_has_gl_extension("GL_AMD_shader_explicit_vertex_parameter")) {
std::cout << "native" << std::endl;
/* NOTE(fclem): This won't work with geometry shader. Hopefully, we don't need geometry
* shader workaround if this extension/feature is detected. */
ss << "\n/* Stable Barycentric Coordinates. */\n";
@ -638,12 +761,63 @@ std::string GLShader::fragment_interface_declare(const ShaderCreateInfo &info) c
if (epoxy_has_gl_extension("GL_ARB_conservative_depth")) {
ss << "layout(" << to_string(info.depth_write_) << ") out float gl_FragDepth;\n";
}
ss << "\n/* Sub-pass Inputs. */\n";
for (const ShaderCreateInfo::SubpassIn &input : info.subpass_inputs_) {
/* TODO(fclem): Add GL_EXT_shader_framebuffer_fetch support and fallback using imageLoad.
* For now avoid compilation failure. */
ss << "const " << to_string(input.type) << " " << input.name << " = " << to_string(input.type)
<< "(0);\n";
if (GLContext::framebuffer_fetch_support) {
/* Declare as inout but do not write to it. */
ss << "layout(location = " << std::to_string(input.index) << ") inout "
<< to_string(input.type) << " " << input.name << ";\n";
}
else {
std::string image_name = "gpu_subpass_img_";
image_name += std::to_string(input.index);
/* Declare global for input. */
ss << to_string(input.type) << " " << input.name << ";\n";
/* IMPORTANT: We assume that the frame-buffer will be layered or not based on the layer
* built-in flag. */
bool is_layered_fb = bool(info.builtins_ & BuiltinBits::LAYER);
/* Start with invalid value to detect failure cases. */
ImageType image_type = ImageType::FLOAT_BUFFER;
switch (to_component_type(input.type)) {
case Type::FLOAT:
image_type = is_layered_fb ? ImageType::FLOAT_2D_ARRAY : ImageType::FLOAT_2D;
break;
case Type::INT:
image_type = is_layered_fb ? ImageType::INT_2D_ARRAY : ImageType::INT_2D;
break;
case Type::UINT:
image_type = is_layered_fb ? ImageType::UINT_2D_ARRAY : ImageType::UINT_2D;
break;
default:
break;
}
/* Declare image. */
using Resource = ShaderCreateInfo::Resource;
/* NOTE(fclem): Using the attachment index as resource index might be problematic as it might
* collide with other resources. */
Resource res(Resource::BindType::SAMPLER, input.index);
res.sampler.type = image_type;
res.sampler.sampler = GPUSamplerState::default_sampler();
res.sampler.name = image_name;
print_resource(ss, res, false);
char swizzle[] = "xyzw";
swizzle[to_component_count(input.type)] = '\0';
std::string texel_co = (is_layered_fb) ? "ivec3(gl_FragCoord.xy, gpu_Layer)" :
"ivec2(gl_FragCoord.xy)";
std::stringstream ss_pre;
/* Populate the global before main using imageLoad. */
ss_pre << " " << input.name << " = texelFetch(" << image_name << ", " << texel_co << ", 0)."
<< swizzle << ";\n";
pre_main += ss_pre.str();
}
}
ss << "\n/* Outputs. */\n";
for (const ShaderCreateInfo::FragOut &output : info.fragment_outputs_) {
@ -663,8 +837,7 @@ std::string GLShader::fragment_interface_declare(const ShaderCreateInfo &info) c
}
ss << "\n";
if (pre_main.empty() == false) {
std::string post_main;
if (!pre_main.empty() || !post_main.empty()) {
ss << main_function_wrapper(pre_main, post_main);
}
return ss.str();
@ -891,6 +1064,9 @@ static char *glsl_patch_default_get()
if (GLContext::native_barycentric_support) {
STR_CONCAT(patch, slen, "#extension GL_AMD_shader_explicit_vertex_parameter: enable\n");
}
if (GLContext::framebuffer_fetch_support) {
STR_CONCAT(patch, slen, "#extension GL_EXT_shader_framebuffer_fetch: enable\n");
}
/* Fallbacks. */
if (!GLContext::shader_draw_parameters_support) {

View File

@ -31,6 +31,8 @@ class GLShaderInterface : public ShaderInterface {
private:
/** Reference to VaoCaches using this interface */
Vector<GLVaoCache *> refs_;
/** Bitmask of color attachments to bind as images for sub-pass input emulation. */
uint8_t subpass_inputs_ = 0u;
public:
GLShaderInterface(GLuint program, const shader::ShaderCreateInfo &info);

View File

@ -39,10 +39,7 @@ const char *GLLogParser::parse_line(const char *log_line, GPULogItem &log_item)
}
if ((log_item.cursor.row != -1) && (log_item.cursor.column != -1)) {
if (GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_OFFICIAL) ||
GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_MAC, GPU_DRIVER_OFFICIAL) ||
GPU_type_matches(GPU_DEVICE_APPLE, GPU_OS_MAC, GPU_DRIVER_OFFICIAL))
{
if (GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_OFFICIAL)) {
/* 0:line */
log_item.cursor.row = log_item.cursor.column;
log_item.cursor.column = -1;

View File

@ -735,7 +735,6 @@ bool GLTexture::proxy_check(int mip)
}
if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_WIN, GPU_DRIVER_ANY) ||
GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_MAC, GPU_DRIVER_OFFICIAL) ||
GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_UNIX, GPU_DRIVER_OFFICIAL))
{
/* Some AMD drivers have a faulty `GL_PROXY_TEXTURE_..` check.
@ -747,13 +746,6 @@ bool GLTexture::proxy_check(int mip)
return true;
}
if ((type_ == GPU_TEXTURE_CUBE_ARRAY) &&
GPU_type_matches(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY))
{
/* Special fix for #79703. */
return true;
}
GLenum gl_proxy = to_gl_proxy(type_);
GLenum internal_format = to_gl_internal_format(format_);
GLenum gl_format = to_gl_data_format(format_);
@ -817,7 +809,10 @@ void GLTexture::check_feedback_loop()
if (fb_[i] == fb) {
GPUAttachmentType type = fb_attachment_[i];
GPUAttachment attachment = fb->attachments_[type];
if (attachment.mip <= mip_max_ && attachment.mip >= mip_min_) {
/* Check for when texture is used with texture barrier. */
GPUAttachment attachment_read = fb->tmp_detached_[type];
if (attachment.mip <= mip_max_ && attachment.mip >= mip_min_ &&
attachment_read.tex == nullptr) {
char msg[256];
SNPRINTF(msg,
"Feedback loop: Trying to bind a texture (%s) with mip range %d-%d but mip %d is "

View File

@ -985,52 +985,52 @@ inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_3d<S
/* Atomic Min. */
template<typename S, access A>
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
int coord,
vec<S, 4> data)
S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
int coord,
S data)
{
return tex.texture->atomic_fetch_min(uint(coord), data);
return tex.texture->atomic_fetch_min(uint(coord), vec<S, 4>(data)).x;
}
template<typename S, access A>
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
int2 coord,
vec<S, 4> data)
S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
int2 coord,
S data)
{
return tex.texture->atomic_fetch_min(uint(coord.x), uint(coord.y), data);
return tex.texture->atomic_fetch_min(uint(coord.x), uint(coord.y), vec<S, 4>(data)).x;
}
template<typename S, access A>
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
int2 coord,
vec<S, 4> data)
S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
int2 coord,
S data)
{
return tex.texture->atomic_fetch_min(uint2(coord.xy), data);
return tex.texture->atomic_fetch_min(uint2(coord.xy), vec<S, 4>(data)).x;
}
template<typename S, access A>
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
int3 coord,
vec<S, 4> data)
S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_fetch_min(uint2(coord.xy), uint(coord.z), data);
return tex.texture->atomic_fetch_min(uint2(coord.xy), uint(coord.z), vec<S, 4>(data)).x;
}
template<typename S, access A>
vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
int3 coord,
vec<S, 4> data)
S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_fetch_min(uint3(coord), data);
return tex.texture->atomic_fetch_min(uint3(coord), vec<S, 4>(data)).x;
}
/* Atomic Exchange. */
template<typename S, access A, int N>
vec<S, N> _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
int coord,
vec<S, N> data)
template<typename S, access A>
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
int coord,
S data)
{
return tex.texture->atomic_exchange(uint(coord), data);
return tex.texture->atomic_exchange(uint(coord), vec<S, 4>(data)).x;
}
template<typename S, access A>
@ -1038,31 +1038,31 @@ S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d_
int2 coord,
S data)
{
return tex.texture->atomic_exchange(uint(coord.x), uint(coord.y), data);
return tex.texture->atomic_exchange(uint(coord.x), uint(coord.y), vec<S, 4>(data)).x;
}
template<typename S, access A, int N>
template<typename S, access A>
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
int2 coord,
S data)
{
return tex.texture->atomic_exchange(uint2(coord.xy), data);
return tex.texture->atomic_exchange(uint2(coord.xy), vec<S, 4>(data)).x;
}
template<typename S, access A, int N>
template<typename S, access A>
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_exchange(uint2(coord.xy), uint(coord.z), data);
return tex.texture->atomic_exchange(uint2(coord.xy), uint(coord.z), vec<S, 4>(data)).x;
}
template<typename S, access A, int N>
template<typename S, access A>
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_exchange(uint3(coord), data);
return tex.texture->atomic_exchange(uint3(coord), vec<S, 4>(data)).x;
}
#else

View File

@ -336,12 +336,15 @@ static void test_framebuffer_subpass_input()
const int2 size(1, 1);
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ;
GPUTexture *texture = GPU_texture_create_2d(
GPUTexture *texture_a = GPU_texture_create_2d(
__func__, UNPACK2(size), 1, GPU_R32I, usage, nullptr);
GPUTexture *texture_b = GPU_texture_create_2d(
__func__, UNPACK2(size), 1, GPU_R32I, usage, nullptr);
GPUFrameBuffer *framebuffer = GPU_framebuffer_create(__func__);
GPU_framebuffer_ensure_config(&framebuffer,
{GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(texture)});
GPU_framebuffer_ensure_config(
&framebuffer,
{GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(texture_a), GPU_ATTACHMENT_TEXTURE(texture_b)});
GPU_framebuffer_bind(framebuffer);
const float4 clear_color(0.0f);
@ -361,7 +364,7 @@ static void test_framebuffer_subpass_input()
create_info_read.vertex_source("gpu_framebuffer_subpass_input_test.glsl");
create_info_read.fragment_source("gpu_framebuffer_subpass_input_test.glsl");
create_info_read.subpass_in(0, Type::INT, "in_value", 0);
create_info_read.fragment_out(0, Type::INT, "out_value");
create_info_read.fragment_out(1, Type::INT, "out_value");
GPUShader *shader_read = GPU_shader_create_from_info(
reinterpret_cast<GPUShaderCreateInfo *>(&create_info_read));
@ -373,10 +376,16 @@ static void test_framebuffer_subpass_input()
GPU_vertbuf_data_alloc(verts, 3);
GPUBatch *batch = GPU_batch_create_ex(GPU_PRIM_TRIS, verts, nullptr, GPU_BATCH_OWNS_VBO);
/* Metal Raster Order Group does not need that. */
GPU_framebuffer_subpass_transition(
framebuffer, {GPU_ATTACHEMENT_IGNORE, GPU_ATTACHEMENT_WRITE, GPU_ATTACHEMENT_IGNORE});
GPU_batch_set_shader(batch, shader_write);
GPU_batch_draw(batch);
/* TODO(fclem): Vulkan might want to introduce an explicit sync event here. */
/* Metal Raster Order Group does not need that. */
GPU_framebuffer_subpass_transition(
framebuffer, {GPU_ATTACHEMENT_IGNORE, GPU_ATTACHEMENT_READ, GPU_ATTACHEMENT_WRITE});
GPU_batch_set_shader(batch, shader_read);
GPU_batch_draw(batch);
@ -385,12 +394,17 @@ static void test_framebuffer_subpass_input()
GPU_finish();
int *read_data = static_cast<int *>(GPU_texture_read(texture, GPU_DATA_INT, 0));
EXPECT_EQ(*read_data, 0xDEADC0DE);
MEM_freeN(read_data);
int *read_data_a = static_cast<int *>(GPU_texture_read(texture_a, GPU_DATA_INT, 0));
EXPECT_EQ(*read_data_a, 0xDEADBEEF);
MEM_freeN(read_data_a);
int *read_data_b = static_cast<int *>(GPU_texture_read(texture_b, GPU_DATA_INT, 0));
EXPECT_EQ(*read_data_b, 0xDEADC0DE);
MEM_freeN(read_data_b);
GPU_framebuffer_free(framebuffer);
GPU_texture_free(texture);
GPU_texture_free(texture_a);
GPU_texture_free(texture_b);
GPU_shader_free(shader_write);
GPU_shader_free(shader_read);

View File

@ -213,6 +213,18 @@ void VKFrameBuffer::attachment_set_loadstore_op(GPUAttachmentType /*type*/, GPUL
/** \} */
/* -------------------------------------------------------------------- */
/** \name Sub-pass transition
* \{ */
void VKFrameBuffer::subpass_transition(const GPUAttachmentState /*depth_attachment_state*/,
Span<GPUAttachmentState> /*color_attachment_states*/)
{
NOT_YET_IMPLEMENTED;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Read back
* \{ */

View File

@ -55,6 +55,9 @@ class VKFrameBuffer : public FrameBuffer {
void attachment_set_loadstore_op(GPUAttachmentType type, GPULoadStore /*ls*/) override;
void subpass_transition(const GPUAttachmentState depth_attachment_state,
Span<GPUAttachmentState> color_attachment_states) override;
void read(eGPUFrameBufferBits planes,
eGPUDataFormat format,
const int area[4],

View File

@ -3010,6 +3010,25 @@ static void rna_def_property_wrapper_funcs(FILE *f, StructDefRNA *dsrna, Propert
}
}
/**
* Counts the number of template arguments by looking at `<` and `,` characters in the name. More
* complex template arguments that contains `,` themselves are not handled yet.
*/
static int count_template_args(const char *function_name)
{
BLI_assert(function_name != nullptr);
if (!strstr(function_name, "<")) {
return 0;
}
int count = 1;
for (const char *c = function_name; *c; c++) {
if (*c == ',') {
count++;
}
}
return count;
}
static void rna_def_function_wrapper_funcs(FILE *f, StructDefRNA *dsrna, FunctionDefRNA *dfunc)
{
StructRNA *srna = dsrna->srna;
@ -3026,7 +3045,10 @@ static void rna_def_function_wrapper_funcs(FILE *f, StructDefRNA *dsrna, Functio
rna_construct_wrapper_function_name(
funcname, sizeof(funcname), srna->identifier, func->identifier, "func");
fprintf(f, "RNA_EXTERN_C ");
/* A function with templates cannot have C linkage. */
if (!(dfunc->call && count_template_args(dfunc->call) > 0)) {
fprintf(f, "RNA_EXTERN_C ");
}
rna_generate_static_parameter_prototypes(f, srna, dfunc, funcname, 0);
fprintf(f, "\n{\n");
@ -3785,6 +3807,19 @@ static void rna_generate_static_parameter_prototypes(FILE *f,
dsrna = rna_find_struct_def(srna);
func = dfunc->func;
const int template_args_num = dfunc->call ? count_template_args(dfunc->call) : 0;
if (!name_override && template_args_num > 0) {
/* The template names are called A, B, C, etc. */
BLI_assert(template_args_num <= 26);
fprintf(f, "template<typename A");
char template_name = 'B';
for (int i = 0; i < template_args_num - 1; i++) {
fprintf(f, ", typename %c", template_name);
template_name++;
}
fprintf(f, "> ");
}
/* return type */
LISTBASE_FOREACH (PropertyDefRNA *, dparm, &dfunc->cont.properties) {
if (dparm->prop == func->c_ret) {
@ -3810,7 +3845,15 @@ static void rna_generate_static_parameter_prototypes(FILE *f,
/* function name */
if (name_override == nullptr || name_override[0] == '\0') {
fprintf(f, "%s(", dfunc->call);
/* Here we only need the function name without the template parameters. */
const char *template_begin = strstr(dfunc->call, "<");
if (template_begin) {
const int num_chars = template_begin - dfunc->call;
fprintf(f, "%.*s(", num_chars, dfunc->call);
}
else {
fprintf(f, "%s(", dfunc->call);
}
}
else {
fprintf(f, "%s(", name_override);

View File

@ -5248,13 +5248,15 @@ static bool foreach_attr_type(BPy_PropertyRNA *self,
/* Values to assign. */
RawPropertyType *r_raw_type,
int *r_attr_tot,
bool *r_attr_signed)
bool *r_attr_signed,
bool *r_is_empty)
{
PropertyRNA *prop;
bool attr_ok = true;
*r_raw_type = PROP_RAW_UNSET;
*r_attr_tot = 0;
*r_attr_signed = false;
*r_is_empty = true;
/* NOTE: this is fail with zero length lists, so don't let this get called in that case. */
RNA_PROP_BEGIN (&self->ptr, itemptr, self->prop) {
@ -5267,6 +5269,7 @@ static bool foreach_attr_type(BPy_PropertyRNA *self,
else {
attr_ok = false;
}
*r_is_empty = false;
break;
}
RNA_PROP_END;
@ -5277,6 +5280,7 @@ static bool foreach_attr_type(BPy_PropertyRNA *self,
/* pyrna_prop_collection_foreach_get/set both use this. */
static int foreach_parse_args(BPy_PropertyRNA *self,
PyObject *args,
const char *function_name,
/* Values to assign. */
const char **r_attr,
@ -5287,9 +5291,6 @@ static int foreach_parse_args(BPy_PropertyRNA *self,
int *r_attr_tot,
bool *r_attr_signed)
{
int array_tot;
int target_tot;
*r_size = *r_attr_tot = 0;
*r_attr_signed = false;
*r_raw_type = PROP_RAW_UNSET;
@ -5299,10 +5300,10 @@ static int foreach_parse_args(BPy_PropertyRNA *self,
}
if (!PySequence_Check(*r_seq) && PyObject_CheckBuffer(*r_seq)) {
PyErr_Format(
PyExc_TypeError,
"foreach_get/set expected second argument to be a sequence or buffer, not a %.200s",
Py_TYPE(*r_seq)->tp_name);
PyErr_Format(PyExc_TypeError,
"%s(..) expected second argument to be a sequence or buffer, not a %.200s",
function_name,
Py_TYPE(*r_seq)->tp_name);
return -1;
}
@ -5310,6 +5311,10 @@ static int foreach_parse_args(BPy_PropertyRNA *self,
*r_tot = PySequence_Size(*r_seq);
if (*r_tot > 0) {
#if 0
/* Avoid a full collection count when all that's needed is to check it's empty. */
int array_tot;
if (RNA_property_type(self->prop) == PROP_COLLECTION) {
array_tot = RNA_property_collection_length(&self->ptr, self->prop);
}
@ -5318,42 +5323,63 @@ static int foreach_parse_args(BPy_PropertyRNA *self,
}
if (array_tot == 0) {
PyErr_Format(PyExc_TypeError,
"foreach_get(attr, sequence) sequence length mismatch given %d, needed 0",
"%s(..) sequence length mismatch given %d, needed 0",
function_name,
*r_tot);
return -1;
}
#endif
if (!foreach_attr_type(self, *r_attr, r_raw_type, r_attr_tot, r_attr_signed)) {
bool is_empty = false; /* `array_tot == 0`. */
if (!foreach_attr_type(self, *r_attr, r_raw_type, r_attr_tot, r_attr_signed, &is_empty)) {
PyErr_Format(PyExc_AttributeError,
"foreach_get/set '%.200s.%200s[...]' elements have no attribute '%.200s'",
"%s(..) '%.200s.%200s[...]' elements have no attribute '%.200s'",
function_name,
RNA_struct_identifier(self->ptr.type),
RNA_property_identifier(self->prop),
*r_attr);
return -1;
}
if (is_empty) {
PyErr_Format(PyExc_TypeError,
"%s(..) sequence length mismatch given %d, needed 0",
function_name,
*r_tot);
return -1;
}
*r_size = RNA_raw_type_sizeof(*r_raw_type);
#if 0
/* This size check does not work as the size check is based on the size of the
* first element and elements in the collection/array can have different sizes
* (i.e. for mixed quad/triangle meshes). See for example issue #111117. */
if ((*r_attr_tot) < 1) {
*r_attr_tot = 1;
}
target_tot = array_tot * (*r_attr_tot);
const int target_tot = array_tot * (*r_attr_tot);
/* rna_access.cc - rna_raw_access(...) uses this same method. */
if (target_tot != (*r_tot)) {
PyErr_Format(PyExc_TypeError,
"foreach_get(attr, sequence) sequence length mismatch given %d, needed %d",
"%s(..) sequence length mismatch given %d, needed %d",
function_name,
*r_tot,
target_tot);
return -1;
}
#endif
}
/* Check 'r_attr_tot' otherwise we don't know if any values were set.
* This isn't ideal because it means running on an empty list may
* fail silently when it's not compatible. */
if (*r_size == 0 && *r_attr_tot != 0) {
PyErr_SetString(PyExc_AttributeError, "attribute does not support foreach method");
PyErr_Format(
PyExc_AttributeError, "%s(..): attribute does not support foreach method", function_name);
return -1;
}
return 0;
@ -5412,8 +5438,16 @@ static PyObject *foreach_getset(BPy_PropertyRNA *self, PyObject *args, int set)
bool attr_signed;
RawPropertyType raw_type;
if (foreach_parse_args(
self, args, &attr, &seq, &tot, &size, &raw_type, &attr_tot, &attr_signed) == -1)
if (foreach_parse_args(self,
args,
set ? "foreach_set" : "foreach_get",
&attr,
&seq,
&tot,
&size,
&raw_type,
&attr_tot,
&attr_signed) == -1)
{
return nullptr;
}

View File

@ -692,8 +692,6 @@ static void wm_file_read_post(bContext *C,
CTX_wm_window_set(C, static_cast<wmWindow *>(wm->windows.first));
}
WM_cursor_wait(true);
#ifdef WITH_PYTHON
if (is_startup_file) {
/* On startup (by default), Python won't have been initialized.
@ -820,8 +818,6 @@ static void wm_file_read_post(bContext *C,
WM_toolsystem_init(C);
}
}
WM_cursor_wait(false);
}
static void wm_read_callback_pre_wrapper(bContext *C, const char *filepath)
@ -1071,7 +1067,6 @@ bool WM_file_read(bContext *C, const char *filepath, ReportList *reports)
bf_reports.duration.whole = PIL_check_seconds_timer() - bf_reports.duration.whole;
file_read_reports_finalize(&bf_reports);
WM_cursor_wait(true);
success = true;
}
}
@ -1176,8 +1171,6 @@ void wm_homefile_read_ex(bContext *C,
char filepath_startup[FILE_MAX];
char filepath_userdef[FILE_MAX];
WM_cursor_wait(true);
/* When 'app_template' is set:
* '{BLENDER_USER_CONFIG}/{app_template}' */
char app_template_system[FILE_MAX];
@ -1495,8 +1488,6 @@ void wm_homefile_read_ex(bContext *C,
CTX_wm_window_set(C, nullptr);
}
}
WM_cursor_wait(false);
}
void wm_homefile_read(bContext *C,