Undo: support implicit-sharing in memfile undo step #106903

Merged
Jacques Lucke merged 78 commits from JacquesLucke/blender:implicit-sharing-undo into main 2024-02-29 17:15:09 +01:00
41 changed files with 1805 additions and 637 deletions
Showing only changes of commit 061e3d827a - Show all commits

View File

@ -1628,7 +1628,7 @@ DISTRO_IDS_INSTALLERS = {
def get_distro(settings):
if settings.distro_id is not ...:
settings.logger.info(f"Distribution identifier forced by user to {settings.distro_id}.")
return
return settings.distro_id
import platform
info = platform.freedesktop_os_release()
ids = [info["ID"]]

View File

@ -4520,7 +4520,8 @@ static void xdg_output_handle_logical_size(void *data,
* Until this is fixed, validate that _some_ kind of scaling is being
* done (we can't match exactly because fractional scaling can't be
* detected otherwise), then override if necessary. */
if ((output->size_logical[0] == width) && (output->scale_fractional == wl_fixed_from_int(1))) {
if ((output->size_logical[0] == width) &&
(output->scale_fractional == (1 * FRACTIONAL_DENOMINATOR))) {
GHOST_PRINT("xdg_output scale did not match, overriding with wl_output scale\n");
#ifdef USE_GNOME_CONFINE_HACK
@ -4667,7 +4668,7 @@ static void output_handle_done(void *data, struct wl_output * /*wl_output*/)
GHOST_ASSERT(size_native[0] && output->size_logical[0],
"Screen size values were not set when they were expected to be.");
output->scale_fractional = wl_fixed_from_int(size_native[0]) / output->size_logical[0];
output->scale_fractional = (size_native[0] * FRACTIONAL_DENOMINATOR) / output->size_logical[0];
output->has_scale_fractional = true;
}
}
@ -7063,6 +7064,7 @@ bool GHOST_SystemWayland::output_unref(wl_output *wl_output)
for (GHOST_IWindow *iwin : window_manager->getWindows()) {
GHOST_WindowWayland *win = static_cast<GHOST_WindowWayland *>(iwin);
if (win->outputs_leave(output)) {
win->outputs_changed_update_scale_tag();
changed = true;
}
}
@ -7087,7 +7089,7 @@ void GHOST_SystemWayland::output_scale_update(GWL_Output *output)
GHOST_WindowWayland *win = static_cast<GHOST_WindowWayland *>(iwin);
const std::vector<GWL_Output *> &outputs = win->outputs();
if (!(std::find(outputs.begin(), outputs.end(), output) == outputs.cend())) {
win->outputs_changed_update_scale();
win->outputs_changed_update_scale_tag();
}
}
}

View File

@ -67,6 +67,8 @@ wl_fixed_t gwl_window_scale_wl_fixed_from(const GWL_WindowScaleParams &scale_par
int gwl_window_scale_int_to(const GWL_WindowScaleParams &scale_params, int value);
int gwl_window_scale_int_from(const GWL_WindowScaleParams &scale_params, int value);
#define FRACTIONAL_DENOMINATOR 120
#ifdef WITH_GHOST_WAYLAND_DYNLOAD
/**
* Return true when all required WAYLAND libraries are present,
@ -100,10 +102,10 @@ struct GWL_Output {
* as this is what is used for most API calls.
* Only use fractional scaling to calculate the DPI.
*
* \note Internally an #wl_fixed_t is used to store the scale of the display,
* so use the same value here (avoid floating point arithmetic in general).
* \note Use the same scale as #wp_fractional_scale_manager_v1
* (avoid floating point arithmetic in general).
*/
wl_fixed_t scale_fractional = wl_fixed_from_int(1);
int scale_fractional = (1 * FRACTIONAL_DENOMINATOR);
bool has_scale_fractional = false;
std::string make;

View File

@ -40,7 +40,6 @@
#include <xdg-activation-v1-client-protocol.h>
#include <xdg-decoration-unstable-v1-client-protocol.h>
#include <xdg-shell-client-protocol.h>
#define FRACTIONAL_DENOMINATOR 120
#include <atomic>
@ -150,6 +149,8 @@ enum eGWL_PendingWindowActions {
* this window is visible on may have changed. Recalculate the windows scale.
*/
PENDING_OUTPUT_SCALE_UPDATE,
PENDING_WINDOW_SURFACE_SCALE,
/**
* The surface needs a commit to run.
* Use this to avoid committing immediately which can cause flickering when other operations
@ -175,8 +176,16 @@ struct GWL_WindowFrame {
bool is_active = false;
/** Disable when the fractional scale is a whole number. */
int fractional_scale = 0;
/**
* Store the value of #wp_fractional_scale_v1_listener::preferred_scale
* before it's applied.
*/
int fractional_scale_preferred = 0;
/** The scale passed to #wl_surface_set_buffer_scale. */
int buffer_scale = 0;
/** Scale has been set (for the first time). */
bool is_scale_init = false;
};
struct GWL_Window {
@ -417,7 +426,9 @@ static int gwl_window_fractional_from_viewport_round(const GWL_WindowFrame &fram
return lroundf(double(value * FRACTIONAL_DENOMINATOR) / double(frame.fractional_scale));
}
static bool gwl_window_viewport_set(GWL_Window *win, bool *r_surface_needs_commit)
static bool gwl_window_viewport_set(GWL_Window *win,
bool *r_surface_needs_commit,
bool *r_surface_needs_buffer_scale)
{
if (win->viewport != nullptr) {
return false;
@ -434,7 +445,14 @@ static bool gwl_window_viewport_set(GWL_Window *win, bool *r_surface_needs_commi
/* Set the buffer scale to 1 since a viewport will be used. */
if (win->frame.buffer_scale != 1) {
win->frame.buffer_scale = 1;
wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
if (r_surface_needs_buffer_scale) {
*r_surface_needs_buffer_scale = true;
}
else {
wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
}
if (r_surface_needs_commit) {
*r_surface_needs_commit = true;
}
@ -446,7 +464,9 @@ static bool gwl_window_viewport_set(GWL_Window *win, bool *r_surface_needs_commi
return true;
}
static bool gwl_window_viewport_unset(GWL_Window *win, bool *r_surface_needs_commit)
static bool gwl_window_viewport_unset(GWL_Window *win,
bool *r_surface_needs_commit,
bool *r_surface_needs_buffer_scale)
{
if (win->viewport == nullptr) {
return false;
@ -458,7 +478,14 @@ static bool gwl_window_viewport_unset(GWL_Window *win, bool *r_surface_needs_com
GHOST_ASSERT(win->frame.buffer_scale == 1, "Unexpected scale!");
if (win->frame_pending.buffer_scale != win->frame.buffer_scale) {
win->frame.buffer_scale = win->frame_pending.buffer_scale;
wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
if (r_surface_needs_buffer_scale) {
*r_surface_needs_buffer_scale = true;
}
else {
wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
}
if (r_surface_needs_commit) {
*r_surface_needs_commit = true;
}
@ -548,7 +575,8 @@ static void gwl_window_activate(GWL_Window *win)
* \{ */
static void gwl_window_frame_pending_fractional_scale_set(GWL_Window *win,
bool *r_surface_needs_commit)
bool *r_surface_needs_commit,
bool *r_surface_needs_buffer_scale)
{
if (win->frame_pending.fractional_scale == win->frame.fractional_scale &&
win->frame_pending.buffer_scale == win->frame.buffer_scale) {
@ -557,16 +585,21 @@ static void gwl_window_frame_pending_fractional_scale_set(GWL_Window *win,
if (win->frame_pending.fractional_scale) {
win->frame.fractional_scale = win->frame_pending.fractional_scale;
gwl_window_viewport_set(win, r_surface_needs_commit);
gwl_window_viewport_set(win, r_surface_needs_commit, r_surface_needs_buffer_scale);
gwl_window_viewport_size_update(win);
}
else {
if (win->viewport) {
gwl_window_viewport_unset(win, r_surface_needs_commit);
gwl_window_viewport_unset(win, r_surface_needs_commit, r_surface_needs_buffer_scale);
}
else {
win->frame.buffer_scale = win->frame_pending.buffer_scale;
wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
if (r_surface_needs_buffer_scale) {
*r_surface_needs_buffer_scale = true;
}
else {
wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
}
if (r_surface_needs_commit) {
*r_surface_needs_commit = true;
}
@ -577,7 +610,10 @@ static void gwl_window_frame_pending_fractional_scale_set(GWL_Window *win,
}
}
static void gwl_window_frame_pending_size_set(GWL_Window *win, bool *r_surface_needs_commit)
static void gwl_window_frame_pending_size_set(GWL_Window *win,
bool *r_surface_needs_commit,
bool *r_surface_needs_egl_resize,
bool *r_surface_needs_buffer_scale)
{
if (win->frame_pending.size[0] == 0 || win->frame_pending.size[1] == 0) {
return;
@ -588,13 +624,19 @@ static void gwl_window_frame_pending_size_set(GWL_Window *win, bool *r_surface_n
if (win->frame_pending.fractional_scale != win->frame.fractional_scale ||
win->frame_pending.buffer_scale != win->frame.buffer_scale) {
gwl_window_frame_pending_fractional_scale_set(win, r_surface_needs_commit);
gwl_window_frame_pending_fractional_scale_set(
win, r_surface_needs_commit, r_surface_needs_buffer_scale);
}
else {
gwl_window_viewport_size_update(win);
}
wl_egl_window_resize(win->egl_window, UNPACK2(win->frame.size), 0, 0);
if (r_surface_needs_egl_resize) {
*r_surface_needs_egl_resize = true;
}
else {
wl_egl_window_resize(win->egl_window, UNPACK2(win->frame.size), 0, 0);
}
win->ghost_window->notify_size();
@ -628,7 +670,6 @@ static void gwl_window_pending_actions_handle(GWL_Window *win)
gwl_window_frame_update_from_pending(win);
}
if (actions[PENDING_EGL_WINDOW_RESIZE]) {
gwl_window_viewport_size_update(win);
wl_egl_window_resize(win->egl_window, UNPACK2(win->frame.size), 0, 0);
}
# ifdef GHOST_OPENGL_ALPHA
@ -639,6 +680,9 @@ static void gwl_window_pending_actions_handle(GWL_Window *win)
if (actions[PENDING_OUTPUT_SCALE_UPDATE]) {
win->ghost_window->outputs_changed_update_scale();
}
if (actions[PENDING_WINDOW_SURFACE_SCALE]) {
wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
}
if (actions[PENDING_WINDOW_SURFACE_COMMIT]) {
wl_surface_commit(win->wl_surface);
}
@ -660,16 +704,42 @@ static void gwl_window_frame_update_from_pending_no_lock(GWL_Window *win)
const bool dpi_changed = win->frame_pending.fractional_scale != win->frame.fractional_scale;
bool surface_needs_commit = false;
bool surface_needs_egl_resize = false;
bool surface_needs_buffer_scale = false;
if (win->frame_pending.size[0] != 0 && win->frame_pending.size[1] != 0) {
if ((win->frame.size[0] != win->frame_pending.size[0]) ||
(win->frame.size[1] != win->frame_pending.size[1])) {
gwl_window_frame_pending_size_set(win, &surface_needs_commit);
gwl_window_frame_pending_size_set(
win, &surface_needs_commit, &surface_needs_egl_resize, &surface_needs_buffer_scale);
}
}
if (win->fractional_scale_handle) {
gwl_window_frame_pending_fractional_scale_set(win, &surface_needs_commit);
if (win->frame_pending.fractional_scale || win->frame.fractional_scale) {
gwl_window_frame_pending_fractional_scale_set(
win, &surface_needs_commit, &surface_needs_buffer_scale);
}
else {
if (win->frame_pending.buffer_scale != win->frame.buffer_scale) {
win->frame.buffer_scale = win->frame_pending.buffer_scale;
surface_needs_buffer_scale = true;
}
}
if (surface_needs_egl_resize) {
#ifdef USE_EVENT_BACKGROUND_THREAD
gwl_window_pending_actions_tag(win, PENDING_EGL_WINDOW_RESIZE);
#else
wl_egl_window_resize(win->egl_window, UNPACK2(win->frame.size), 0, 0);
#endif
}
if (surface_needs_buffer_scale) {
#ifdef USE_EVENT_BACKGROUND_THREAD
gwl_window_pending_actions_tag(win, PENDING_WINDOW_SURFACE_SCALE);
#else
wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
#endif
}
if (surface_needs_commit) {
@ -729,11 +799,26 @@ static int output_scale_cmp(const GWL_Output *output_a, const GWL_Output *output
if (output_a->scale > output_b->scale) {
return 1;
}
if (output_a->has_scale_fractional || output_b->has_scale_fractional) {
const int scale_fractional_a = output_a->has_scale_fractional ?
output_a->scale_fractional :
(output_a->scale * FRACTIONAL_DENOMINATOR);
const int scale_fractional_b = output_b->has_scale_fractional ?
output_b->scale_fractional :
(output_b->scale * FRACTIONAL_DENOMINATOR);
if (scale_fractional_a < scale_fractional_b) {
return -1;
}
if (scale_fractional_a > scale_fractional_b) {
return 1;
}
}
return 0;
}
static int outputs_max_scale_or_default(const std::vector<GWL_Output *> &outputs,
const int32_t scale_default)
const int32_t scale_default,
int *r_scale_fractional)
{
const GWL_Output *output_max = nullptr;
for (const GWL_Output *reg_output : outputs) {
@ -743,9 +828,46 @@ static int outputs_max_scale_or_default(const std::vector<GWL_Output *> &outputs
}
if (output_max) {
if (r_scale_fractional) {
*r_scale_fractional = output_max->has_scale_fractional ?
output_max->scale_fractional :
(output_max->scale * FRACTIONAL_DENOMINATOR);
}
return output_max->scale;
}
if (r_scale_fractional) {
*r_scale_fractional = scale_default * FRACTIONAL_DENOMINATOR;
}
return scale_default;
}
static int outputs_uniform_scale_or_default(const std::vector<GWL_Output *> &outputs,
const int32_t scale_default,
int *r_scale_fractional)
{
const GWL_Output *output_uniform = nullptr;
for (const GWL_Output *reg_output : outputs) {
if (!output_uniform) {
output_uniform = reg_output;
}
else if (output_scale_cmp(output_uniform, reg_output) != 0) {
/* Non-uniform. */
output_uniform = nullptr;
break;
}
}
if (output_uniform) {
if (r_scale_fractional) {
*r_scale_fractional = output_uniform->has_scale_fractional ?
output_uniform->scale_fractional :
(output_uniform->scale * FRACTIONAL_DENOMINATOR);
}
return output_uniform->scale;
}
if (r_scale_fractional) {
*r_scale_fractional = scale_default * FRACTIONAL_DENOMINATOR;
}
return scale_default;
}
@ -876,35 +998,10 @@ static void wp_fractional_scale_handle_preferred_scale(
double(preferred_scale) / FRACTIONAL_DENOMINATOR);
GWL_Window *win = static_cast<GWL_Window *>(data);
const bool is_fractional = (preferred_scale % FRACTIONAL_DENOMINATOR) != 0;
/* When non-fractional, never use fractional scaling! */
win->frame_pending.fractional_scale = is_fractional ? preferred_scale : 0;
win->frame_pending.buffer_scale = is_fractional ? 1 : preferred_scale / FRACTIONAL_DENOMINATOR;
const int scale_prev = win->frame.fractional_scale ?
win->frame.fractional_scale :
win->frame.buffer_scale * FRACTIONAL_DENOMINATOR;
const int scale_next = preferred_scale;
if (scale_prev != scale_next) {
/* Resize the window failing to do so results in severe flickering with a
* multi-monitor setup when multiple monitors have different scales.
*
* NOTE: some flickering is still possible even when resizing this
* happens when dragging the right hand side of the title-bar in KDE
* as expanding changed the size on the RHS, this may be up to the compositor to fix. */
for (size_t i = 0; i < ARRAY_SIZE(win->frame_pending.size); i++) {
const int value = win->frame_pending.size[i] ? win->frame_pending.size[i] :
win->frame.size[i];
win->frame_pending.size[i] = lroundf(value * (double(scale_next) / double(scale_prev)));
}
#ifdef USE_EVENT_BACKGROUND_THREAD
gwl_window_pending_actions_tag(win, PENDING_WINDOW_FRAME_CONFIGURE);
#else
gwl_window_frame_update_from_pending(win);
#endif
if (win->frame_pending.fractional_scale_preferred != int(preferred_scale)) {
win->frame_pending.fractional_scale_preferred = preferred_scale;
win->ghost_window->outputs_changed_update_scale_tag();
}
}
@ -1124,7 +1221,7 @@ static void surface_handle_enter(void *data,
GWL_Output *reg_output = ghost_wl_output_user_data(wl_output);
GHOST_WindowWayland *win = static_cast<GHOST_WindowWayland *>(data);
if (win->outputs_enter(reg_output)) {
win->outputs_changed_update_scale();
win->outputs_changed_update_scale_tag();
}
}
@ -1141,7 +1238,7 @@ static void surface_handle_leave(void *data,
GWL_Output *reg_output = ghost_wl_output_user_data(wl_output);
GHOST_WindowWayland *win = static_cast<GHOST_WindowWayland *>(data);
if (win->outputs_leave(reg_output)) {
win->outputs_changed_update_scale();
win->outputs_changed_update_scale_tag();
}
}
@ -1197,7 +1294,13 @@ GHOST_WindowWayland::GHOST_WindowWayland(GHOST_SystemWayland *system,
*
* Using the maximum scale is best as it results in the window first being smaller,
* avoiding a large window flashing before it's made smaller. */
window_->frame.buffer_scale = outputs_max_scale_or_default(system_->outputs(), 1);
int fractional_scale = 0;
window_->frame.buffer_scale = outputs_uniform_scale_or_default(
system_->outputs(), 1, &fractional_scale);
if (fractional_scale / FRACTIONAL_DENOMINATOR != window_->frame.buffer_scale) {
window_->frame.buffer_scale = 1;
}
window_->frame_pending.buffer_scale = window_->frame.buffer_scale;
window_->frame.size[0] = int32_t(width);
@ -1443,7 +1546,7 @@ GHOST_TSuccess GHOST_WindowWayland::setClientSize(const uint32_t width, const ui
window_->frame_pending.size[0] = width;
window_->frame_pending.size[1] = height;
gwl_window_frame_pending_size_set(window_, nullptr);
gwl_window_frame_pending_size_set(window_, nullptr, nullptr, nullptr);
return GHOST_kSuccess;
}
@ -1830,6 +1933,15 @@ GHOST_TSuccess GHOST_WindowWayland::notify_decor_redraw()
* Functionality only used for the WAYLAND implementation.
* \{ */
void GHOST_WindowWayland::outputs_changed_update_scale_tag()
{
#ifdef USE_EVENT_BACKGROUND_THREAD
gwl_window_pending_actions_tag(window_, PENDING_OUTPUT_SCALE_UPDATE);
#else
outputs_changed_update_scale();
#endif
}
bool GHOST_WindowWayland::outputs_changed_update_scale()
{
#ifdef USE_EVENT_BACKGROUND_THREAD
@ -1838,46 +1950,109 @@ bool GHOST_WindowWayland::outputs_changed_update_scale()
return false;
}
#endif
int fractional_scale_next = -1;
int fractional_scale_from_output = 0;
if (window_->fractional_scale_handle) {
/* Let the #wp_fractional_scale_v1_listener::preferred_scale callback handle
* changes to the windows scale. */
return false;
}
int scale_next = outputs_max_scale_or_default(outputs(), 0, &fractional_scale_from_output);
const int scale_next = outputs_max_scale_or_default(outputs(), 0);
if (UNLIKELY(scale_next == 0)) {
return false;
}
const int scale_curr = window_->frame.buffer_scale;
bool changed = false;
if (scale_next != scale_curr) {
window_->frame.buffer_scale = scale_next;
wl_surface_set_buffer_scale(window_->wl_surface, scale_next);
if (window_->fractional_scale_handle) {
#ifdef USE_EVENT_BACKGROUND_THREAD
std::lock_guard lock_frame_guard{window_->frame_pending_mutex};
#endif
/* Let the #wp_fractional_scale_v1_listener::preferred_scale callback handle
* changes to the windows scale. */
if (window_->frame_pending.fractional_scale_preferred != 0) {
fractional_scale_next = window_->frame_pending.fractional_scale_preferred;
scale_next = fractional_scale_next / FRACTIONAL_DENOMINATOR;
}
}
/* It's important to resize the window immediately, to avoid the window changing size
* and flickering in a constant feedback loop (in some bases). */
if (fractional_scale_next == -1) {
fractional_scale_next = fractional_scale_from_output;
scale_next = fractional_scale_next / FRACTIONAL_DENOMINATOR;
}
else {
/* NOTE(@ideasman42): This often overrides #wp_fractional_scale_v1_listener::preferred_scale
* in favor of using the greatest overlapping scale.
* This was requested by the studio to prevent a tablet's built-in display of 75%
* from causing the main-display being up-scaled (showing pixelated). */
if (fractional_scale_next < fractional_scale_from_output) {
fractional_scale_next = fractional_scale_from_output;
scale_next = fractional_scale_next / FRACTIONAL_DENOMINATOR;
}
}
if ((window_->frame_pending.size[0] != 0) && (window_->frame_pending.size[1] != 0)) {
/* Unlikely but possible there is a pending size change is set. */
window_->frame.size[0] = window_->frame_pending.size[0];
window_->frame.size[1] = window_->frame_pending.size[1];
bool changed = false;
#ifdef USE_EVENT_BACKGROUND_THREAD
std::lock_guard lock_frame_guard{window_->frame_pending_mutex};
#endif
bool force_frame_update = false;
bool is_fractional_prev = window_->frame.fractional_scale != 0;
const bool is_fractional_next = (fractional_scale_next % FRACTIONAL_DENOMINATOR) != 0;
/* When non-fractional, never use fractional scaling! */
window_->frame_pending.fractional_scale = is_fractional_next ? fractional_scale_next : 0;
window_->frame_pending.buffer_scale = is_fractional_next ?
1 :
fractional_scale_next / FRACTIONAL_DENOMINATOR;
int fractional_scale_prev = window_->frame.fractional_scale ?
window_->frame.fractional_scale :
window_->frame.buffer_scale * FRACTIONAL_DENOMINATOR;
int scale_prev = fractional_scale_prev / FRACTIONAL_DENOMINATOR;
if (window_->frame_pending.is_scale_init == false) {
window_->frame_pending.is_scale_init = true;
/* NOTE(@ideasman42): Needed because new windows are created at their previous pixel-dimensions
* as the window doesn't save it's DPI. Restore the window size under the assumption it's
* opening on the same monitor so a window keeps it's previous size on a users system.
*
* To support anything more sophisticated, windows would need to be created with a scale
* argument (representing the scale used when the window was stored, for e.g.). */
is_fractional_prev = is_fractional_next;
scale_prev = scale_next;
fractional_scale_prev = fractional_scale_next;
/* Leave `window_->frame_pending` as-is, so changes are detected and updates are applied. */
force_frame_update = true;
}
if ((fractional_scale_prev != fractional_scale_next) ||
(window_->frame_pending.buffer_scale != window_->frame.buffer_scale) ||
(force_frame_update == true)) {
/* Resize the window failing to do so results in severe flickering with a
* multi-monitor setup when multiple monitors have different scales.
*
* NOTE: some flickering is still possible even when resizing this
* happens when dragging the right hand side of the title-bar in KDE
* as expanding changed the size on the RHS, this may be up to the compositor to fix. */
for (size_t i = 0; i < ARRAY_SIZE(window_->frame_pending.size); i++) {
const int value = window_->frame_pending.size[i] ? window_->frame_pending.size[i] :
window_->frame.size[i];
if (is_fractional_prev || is_fractional_next) {
window_->frame_pending.size[i] = lroundf(
value * (double(fractional_scale_next) / double(fractional_scale_prev)));
}
else {
window_->frame_pending.size[i] = (value * scale_next) / scale_prev;
}
if (window_->frame_pending.buffer_scale > 1) {
window_->frame_pending.size[i] = (window_->frame_pending.size[i] /
window_->frame_pending.buffer_scale) *
window_->frame_pending.buffer_scale;
}
}
/* Write to the pending values as these are what is applied. */
window_->frame_pending.size[0] = (window_->frame.size[0] / scale_curr) * scale_next;
window_->frame_pending.size[1] = (window_->frame.size[1] / scale_curr) * scale_next;
gwl_window_frame_pending_size_set(window_, nullptr);
GHOST_SystemWayland *system = window_->ghost_system;
system->pushEvent(
new GHOST_Event(system->getMilliSeconds(), GHOST_kEventWindowDPIHintChanged, this));
gwl_window_frame_update_from_pending_no_lock(window_);
changed = true;
}

View File

@ -165,6 +165,7 @@ class GHOST_WindowWayland : public GHOST_Window {
* Return true when the windows scale or DPI changes.
*/
bool outputs_changed_update_scale();
void outputs_changed_update_scale_tag();
#ifdef USE_EVENT_BACKGROUND_THREAD
void pending_actions_handle();

View File

@ -111,17 +111,6 @@ static void editmesh_tessface_calc_intern(BMEditMesh *em,
void BKE_editmesh_looptri_calc_ex(BMEditMesh *em, const BMeshCalcTessellation_Params *params)
{
editmesh_tessface_calc_intern(em, params);
/* commented because editbmesh_build_data() ensures we get tessfaces */
#if 0
if (em->mesh_eval_final && em->mesh_eval_final == em->mesh_eval_cage) {
BKE_mesh_runtime_looptri_ensure(em->mesh_eval_final);
}
else if (em->mesh_eval_final) {
BKE_mesh_runtime_looptri_ensure(em->mesh_eval_final);
BKE_mesh_runtime_looptri_ensure(em->mesh_eval_cage);
}
#endif
}
void BKE_editmesh_looptri_calc(BMEditMesh *em)

View File

@ -101,10 +101,13 @@ set(GLSL_SRC
shaders/compositor_convert.glsl
shaders/compositor_despeckle.glsl
shaders/compositor_directional_blur.glsl
shaders/compositor_displace.glsl
shaders/compositor_edge_filter.glsl
shaders/compositor_ellipse_mask.glsl
shaders/compositor_filter.glsl
shaders/compositor_flip.glsl
shaders/compositor_glare_fog_glow_downsample.glsl
shaders/compositor_glare_fog_glow_upsample.glsl
shaders/compositor_glare_ghost_accumulate.glsl
shaders/compositor_glare_ghost_base.glsl
shaders/compositor_glare_highlights.glsl
@ -116,6 +119,7 @@ set(GLSL_SRC
shaders/compositor_glare_streaks_accumulate.glsl
shaders/compositor_glare_streaks_filter.glsl
shaders/compositor_image_crop.glsl
shaders/compositor_map_uv.glsl
shaders/compositor_morphological_distance.glsl
shaders/compositor_morphological_distance_feather.glsl
shaders/compositor_morphological_distance_threshold.glsl
@ -136,6 +140,9 @@ set(GLSL_SRC
shaders/compositor_tone_map_photoreceptor.glsl
shaders/compositor_tone_map_simple.glsl
shaders/compositor_write_output.glsl
shaders/compositor_z_combine_compute_mask.glsl
shaders/compositor_z_combine_from_mask.glsl
shaders/compositor_z_combine_simple.glsl
shaders/library/gpu_shader_compositor_alpha_over.glsl
shaders/library/gpu_shader_compositor_blur_common.glsl
@ -202,12 +209,14 @@ set(SRC_SHADER_CREATE_INFOS
shaders/infos/compositor_convert_info.hh
shaders/infos/compositor_despeckle_info.hh
shaders/infos/compositor_directional_blur_info.hh
shaders/infos/compositor_displace_info.hh
shaders/infos/compositor_edge_filter_info.hh
shaders/infos/compositor_ellipse_mask_info.hh
shaders/infos/compositor_filter_info.hh
shaders/infos/compositor_flip_info.hh
shaders/infos/compositor_glare_info.hh
shaders/infos/compositor_image_crop_info.hh
shaders/infos/compositor_map_uv_info.hh
shaders/infos/compositor_morphological_distance_feather_info.hh
shaders/infos/compositor_morphological_distance_info.hh
shaders/infos/compositor_morphological_distance_threshold_info.hh
@ -226,6 +235,7 @@ set(SRC_SHADER_CREATE_INFOS
shaders/infos/compositor_tone_map_photoreceptor_info.hh
shaders/infos/compositor_tone_map_simple_info.hh
shaders/infos/compositor_write_output_info.hh
shaders/infos/compositor_z_combine_info.hh
)
set(SHADER_CREATE_INFOS_CONTENT "")

View File

@ -170,6 +170,12 @@ class Operation {
* evaluation of the operation to declare that the results are no longer needed by this
* operation. */
void release_inputs();
/* Release the results that were allocated in the execute method but are not actually needed.
* This can be the case if the execute method allocated a dummy texture for an unndeeded result,
* see the description of Result::allocate_texture() for more information. This is called after
* the evaluation of the operation. */
void release_unneeded_results();
};
} // namespace blender::realtime_compositor

View File

@ -112,7 +112,18 @@ class Result {
/* Declare the result to be a texture result, allocate a texture of an appropriate type with
* the size of the given domain from the result's texture pool, and set the domain of the result
* to the given domain. */
* to the given domain.
*
* If the result should not be computed, that is, should_compute() returns false, yet this method
* is called, that means the result is only being allocated because the shader that computes it
* also computes another result that is actually needed, and shaders needs to have a texture
* bound to all their images units for a correct invocation, even if some of those textures are
* not needed and will eventually be discarded. In that case, since allocating the full texture
* is not needed, allocate_single_value() is called instead and the reference count is set to 1.
* This essentially allocates a dummy 1x1 texture, which works because out of bound shader writes
* to images are safe. Since this result is not referenced by any other operation, it should be
* manually released after the operation is evaluated, which is implemented by calling the
* Operation::release_unneeded_results() method. */
void allocate_texture(Domain domain);
/* Declare the result to be a single value result, allocate a texture of an appropriate
@ -228,6 +239,9 @@ class Result {
/* Returns true if the result is a single value and false of it is a texture. */
bool is_single_value() const;
/* Returns true if the result is allocated. */
bool is_allocated() const;
/* Returns the allocated GPU texture of the result. */
GPUTexture *texture() const;

View File

@ -13,8 +13,8 @@ namespace blender::realtime_compositor {
void smaa(Context &context,
Result &input,
Result &output,
float threshold,
float local_contrast_adaptation_factor,
int corner_rounding);
float threshold = 0.1f,
float local_contrast_adaptation_factor = 2.0f,
int corner_rounding = 25);
} // namespace blender::realtime_compositor

View File

@ -6,6 +6,7 @@
#include "GPU_texture.h"
#include "COM_context.hh"
#include "COM_result.hh"
#include "COM_utilities.hh"
#include "COM_algorithm_smaa.hh"
@ -22,10 +23,25 @@ static Result detect_edges(Context &context,
GPUShader *shader = context.shader_manager().get("compositor_smaa_edge_detection");
GPU_shader_bind(shader);
float luminance_coefficients[3];
IMB_colormanagement_get_luminance_coefficients(luminance_coefficients);
switch (input.type()) {
case ResultType::Color: {
float luminance_coefficients[3];
IMB_colormanagement_get_luminance_coefficients(luminance_coefficients);
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
break;
}
case ResultType::Vector: {
float luminance_coefficients[3] = {1.0f, 1.0f, 1.0f};
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
break;
}
case ResultType::Float: {
float luminance_coefficients[3] = {1.0f, 0.0f, 0.0f};
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
break;
}
}
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
GPU_shader_uniform_1f(shader, "smaa_threshold", threshold);
GPU_shader_uniform_1f(
shader, "smaa_local_contrast_adaptation_factor", local_contrast_adaptation_factor);
@ -78,7 +94,9 @@ static Result calculate_blending_weights(Context &context, Result &edges, int co
static void blend_neighborhood(Context &context, Result &input, Result &weights, Result &output)
{
GPUShader *shader = context.shader_manager().get("compositor_smaa_neighborhood_blending");
GPUShader *shader = context.shader_manager().get(
input.type() == ResultType::Float ? "compositor_smaa_neighborhood_blending_float" :
"compositor_smaa_neighborhood_blending_color");
GPU_shader_bind(shader);
GPU_texture_filter_mode(input.texture(), true);

View File

@ -34,6 +34,8 @@ void Operation::evaluate()
execute();
release_inputs();
release_unneeded_results();
}
Result &Operation::get_result(StringRef identifier)
@ -201,4 +203,13 @@ void Operation::release_inputs()
}
}
void Operation::release_unneeded_results()
{
for (Result &result : results_.values()) {
if (!result.should_compute() && result.is_allocated()) {
result.release();
}
}
}
} // namespace blender::realtime_compositor

View File

@ -21,12 +21,21 @@ Result::Result(ResultType type, TexturePool &texture_pool)
Result Result::Temporary(ResultType type, TexturePool &texture_pool)
{
Result result = Result(type, texture_pool);
result.increment_reference_count();
result.set_initial_reference_count(1);
result.reset();
return result;
}
void Result::allocate_texture(Domain domain)
{
/* The result is not actually needed, so allocate a dummy single value texture instead. See the
* method description for more information. */
if (!should_compute()) {
allocate_single_value();
increment_reference_count();
return;
}
is_single_value_ = false;
switch (type_) {
case ResultType::Float:
@ -247,6 +256,11 @@ bool Result::is_single_value() const
return is_single_value_;
}
bool Result::is_allocated() const
{
return texture_ != nullptr;
}
GPUTexture *Result::texture() const
{
return texture_;

View File

@ -0,0 +1,55 @@
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
/* A shared table that stores the displaced coordinates of all pixels in the work group. This is
* necessary to avoid recomputing displaced coordinates when computing the gradients necessary for
* anisotropic filtering, see the implementation for more information. */
shared vec2 displaced_coordinates_table[gl_WorkGroupSize.x][gl_WorkGroupSize.y];
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
ivec2 input_size = texture_size(input_tx);
/* Add 0.5 to evaluate the input sampler at the center of the pixel and divide by the image size
* to get the coordinates into the sampler's expected [0, 1] range. */
vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(input_size);
/* Note that the input displacement is in pixel space, so divide by the input size to transform
* it into the normalized sampler space. */
vec2 scale = vec2(texture_load(x_scale_tx, texel).x, texture_load(y_scale_tx, texel).x);
vec2 displacement = texture_load(displacement_tx, texel).xy * scale / vec2(input_size);
vec2 displaced_coordinates = coordinates - displacement;
/* Store the displaced coordinates into the shared table and issue a barrier to later compute the
* gradients from the table. */
ivec2 table_index = ivec2(gl_LocalInvocationID.xy);
displaced_coordinates_table[table_index.x][table_index.y] = displaced_coordinates;
barrier();
/* Compute the partial derivative of the displaced coordinates along the x direction using a
* finite difference approximation. Odd invocations use a forward finite difference equation
* while even invocations use a backward finite difference equation. This is done such that
* invocations at the edges of the work group wouldn't need access to pixels that are outside of
* the work group.
*
* The x_step value is 1 for even invocations and when added to the x table index and multiplied
* by the result yields a standard forward finite difference equation. The x_step value is -1 for
* odd invocations and when added to the x table index and multiplied by the result yields a
* standard backward finite difference equation, because multiplication by -1 flips the order of
* subtraction. */
int x_step = (table_index.x % 2) * -2 + 1;
vec2 x_neighbour = displaced_coordinates_table[table_index.x + x_step][table_index.y];
vec2 x_gradient = (x_neighbour - displaced_coordinates) * x_step;
/* Compute the partial derivative of the displaced coordinates along the y direction using a
* finite difference approximation. See the previous code section for more information. */
int y_step = (table_index.y % 2) * -2 + 1;
vec2 y_neighbour = displaced_coordinates_table[table_index.x][table_index.y + y_step];
vec2 y_gradient = (y_neighbour - displaced_coordinates) * y_step;
/* Sample the input using the displaced coordinates passing in the computed gradients in order to
* utilize the anisotropic filtering capabilities of the sampler. */
vec4 displaced_color = textureGrad(input_tx, displaced_coordinates, x_gradient, y_gradient);
imageStore(output_img, texel, displaced_color);
}

View File

@ -0,0 +1,102 @@
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
#if defined(KARIS_AVERAGE)
/* Computes the weighted average of the given four colors, which are assumed to the colors of
* spatially neighbouring pixels. The weights are computed so as to reduce the contributions of
* fireflies on the result by applying a form of local tone mapping as described by Brian Karis in
* the article "Graphic Rants: Tone Mapping".
*
* https://graphicrants.blogspot.com/2013/12/tone-mapping.html */
vec4 karis_brightness_weighted_sum(vec4 color1, vec4 color2, vec4 color3, vec4 color4)
{
vec4 brightness = vec4(max_v3(color1), max_v3(color2), max_v3(color3), max_v3(color4));
vec4 weights = 1.0 / (brightness + 1.0);
return weighted_sum(color1, color2, color3, color4, weights);
}
#endif
void main()
{
/* Each invocation corresponds to one output pixel, where the output has half the size of the
* input. */
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
/* Add 0.5 to evaluate the sampler at the center of the pixel and divide by the image size to get
* the coordinates into the sampler's expected [0, 1] range. */
vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(imageSize(output_img));
/* All the offsets in the following code section are in the normalized pixel space of the input
* texture, so compute its normalized pixel size. */
vec2 pixel_size = 1.0 / vec2(texture_size(input_tx));
/* Each invocation downsamples a 6x6 area of pixels around the center of the corresponding output
* pixel, but instead of sampling each of the 36 pixels in the area, we only sample 13 positions
* using bilinear fetches at the center of a number of overlapping square 4-pixel groups. This
* downsampling strategy is described in the talk:
*
* Next Generation Post Processing in Call of Duty: Advanced Warfare
* https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
*
* In particular, the downsampling strategy is described and illustrated in slide 153 titled
* "Downsampling - Our Solution". This is employed as it significantly improves the stability of
* the glare as can be seen in the videos in the talk. */
vec4 center = texture(input_tx, coordinates);
vec4 upper_left_near = texture(input_tx, coordinates + pixel_size * vec2(-1.0, 1.0));
vec4 upper_right_near = texture(input_tx, coordinates + pixel_size * vec2(1.0, 1.0));
vec4 lower_left_near = texture(input_tx, coordinates + pixel_size * vec2(-1.0, -1.0));
vec4 lower_right_near = texture(input_tx, coordinates + pixel_size * vec2(1.0, -1.0));
vec4 left_far = texture(input_tx, coordinates + pixel_size * vec2(-2.0, 0.0));
vec4 right_far = texture(input_tx, coordinates + pixel_size * vec2(2.0, 0.0));
vec4 upper_far = texture(input_tx, coordinates + pixel_size * vec2(0.0, 2.0));
vec4 lower_far = texture(input_tx, coordinates + pixel_size * vec2(0.0, -2.0));
vec4 upper_left_far = texture(input_tx, coordinates + pixel_size * vec2(-2.0, 2.0));
vec4 upper_right_far = texture(input_tx, coordinates + pixel_size * vec2(2.0, 2.0));
vec4 lower_left_far = texture(input_tx, coordinates + pixel_size * vec2(-2.0, -2.0));
vec4 lower_right_far = texture(input_tx, coordinates + pixel_size * vec2(2.0, -2.0));
#if defined(SIMPLE_AVERAGE)
/* The original weights equation mentioned in slide 153 is:
* 0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
* The 0.5 corresponds to the center group of pixels and the 0.125 corresponds to the other
* groups of pixels. The center is sampled 4 times, the far non corner pixels are sampled 2
* times, the near corner pixels are sampled only once; but their weight is quadruple the weights
* of other groups; so they count as sampled 4 times, finally the far corner pixels are sampled
* only once, essentially totalling 32 samples. So the weights are as used in the following code
* section. */
vec4 result = (4.0 / 32.0) * center +
(4.0 / 32.0) *
(upper_left_near + upper_right_near + lower_left_near + lower_right_near) +
(2.0 / 32.0) * (left_far + right_far + upper_far + lower_far) +
(1.0 / 32.0) *
(upper_left_far + upper_right_far + lower_left_far + lower_right_far);
#elif defined(KARIS_AVERAGE)
/* Reduce the contributions of fireflies on the result by reducing each group of pixels using a
* Karis brightness weighted sum. This is described in slide 168 titled "Fireflies - Partial
* Karis Average".
*
* This needn't be done on all downsampling passes, but only the first one, since fireflies
* will not survive the first pass, later passes can use the weighted average. */
vec4 center_weighted_sum = karis_brightness_weighted_sum(
upper_left_near, upper_right_near, lower_right_near, lower_left_near);
vec4 upper_left_weighted_sum = karis_brightness_weighted_sum(
upper_left_far, upper_far, center, left_far);
vec4 upper_right_weighted_sum = karis_brightness_weighted_sum(
upper_far, upper_right_far, right_far, center);
vec4 lower_right_weighted_sum = karis_brightness_weighted_sum(
center, right_far, lower_right_far, lower_far);
vec4 lower_left_weighted_sum = karis_brightness_weighted_sum(
left_far, center, lower_far, lower_left_far);
/* The original weights equation mentioned in slide 153 is:
* 0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
* Multiply both sides by 8 and you get:
* 4 + 1 + 1 + 1 + 1 = 8
* So the weights are as used in the following code section. */
vec4 result = (4.0 / 8.0) * center_weighted_sum +
(1.0 / 8.0) * (upper_left_weighted_sum + upper_right_weighted_sum +
lower_left_weighted_sum + lower_right_weighted_sum);
#endif
imageStore(output_img, texel, result);
}

View File

@ -0,0 +1,37 @@
void main()
{
/* Each invocation corresponds to one output pixel, where the output has twice the size of the
* input. */
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
/* Add 0.5 to evaluate the sampler at the center of the pixel and divide by the image size to get
* the coordinates into the sampler's expected [0, 1] range. */
vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(imageSize(output_img));
/* All the offsets in the following code section are in the normalized pixel space of the output
* image, so compute its normalized pixel size. */
vec2 pixel_size = 1.0 / vec2(imageSize(output_img));
/* Upsample by applying a 3x3 tent filter on the bi-linearly interpolated values evaluated at the
* center of neighbouring output pixels. As more tent filter upsampling passes are applied, the
* result approximates a large sized Gaussian filter. This upsampling strategy is described in
* the talk:
*
* Next Generation Post Processing in Call of Duty: Advanced Warfare
* https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
*
* In particular, the upsampling strategy is described and illustrated in slide 162 titled
* "Upsampling - Our Solution". */
vec4 upsampled = vec4(0.0);
upsampled += (4.0 / 16.0) * texture(input_tx, coordinates);
upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(-1.0, 0.0));
upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(0.0, 1.0));
upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(1.0, 0.0));
upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(0.0, -1.0));
upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(-1.0, -1.0));
upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(-1.0, 1.0));
upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(1.0, -1.0));
upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(1.0, 1.0));
imageStore(output_img, texel, imageLoad(output_img, texel) + upsampled);
}

View File

@ -0,0 +1,66 @@
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
/* A shared table that stores the UV coordinates of all pixels in the work group. This is necessary
* to avoid recomputing UV coordinates when computing the gradients necessary for anisotropic
* filtering, see the implementation for more information. */
shared vec2 uv_coordinates_table[gl_WorkGroupSize.x][gl_WorkGroupSize.y];
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec2 uv_coordinates = texture_load(uv_tx, texel).xy;
/* Store the UV coordinates into the shared table and issue a barrier to later compute the
* gradients from the table. */
ivec2 table_index = ivec2(gl_LocalInvocationID.xy);
uv_coordinates_table[table_index.x][table_index.y] = uv_coordinates;
barrier();
/* Compute the partial derivative of the UV coordinates along the x direction using a finite
* difference approximation. Odd invocations use a forward finite difference equation while even
* invocations use a backward finite difference equation. This is done such that invocations at
* the edges of the work group wouldn't need access to pixels that are outside of the work group.
*
* The x_step value is 1 for even invocations and when added to the x table index and multiplied
* by the result yields a standard forward finite difference equation. The x_step value is -1 for
* odd invocations and when added to the x table index and multiplied by the result yields a
* standard backward finite difference equation, because multiplication by -1 flips the order of
* subtraction. */
int x_step = (table_index.x % 2) * -2 + 1;
vec2 x_neighbour = uv_coordinates_table[table_index.x + x_step][table_index.y];
vec2 x_gradient = (x_neighbour - uv_coordinates) * x_step;
/* Compute the partial derivative of the UV coordinates along the y direction using a
* finite difference approximation. See the previous code section for more information. */
int y_step = (table_index.y % 2) * -2 + 1;
vec2 y_neighbour = uv_coordinates_table[table_index.x][table_index.y + y_step];
vec2 y_gradient = (y_neighbour - uv_coordinates) * y_step;
/* Sample the input using the UV coordinates passing in the computed gradients in order to
* utilize the anisotropic filtering capabilities of the sampler. */
vec4 sampled_color = textureGrad(input_tx, uv_coordinates, x_gradient, y_gradient);
/* The UV coordinates might be defined in only a subset area of the UV textures, in which case,
* the gradients would be infinite at the boundary of that area, which would produce erroneous
* results due to anisotropic filtering. To workaround this, we attenuate the result if its
* computed gradients are too high such that the result tends to zero when the magnitude of the
* gradients tends to one, that is when their sum tends to 2. One is chosen as the threshold
* because that's the maximum gradient magnitude when the boundary is the maximum sampler value
* of one and the out of bound values are zero. Additionally, the user supplied gradient
* attenuation factor can be used to control this attenuation or even disable it when it is zero,
* ranging between zero and one. */
float gradient_magnitude = (length(x_gradient) + length(y_gradient)) / 2.0;
float gradient_attenuation = max(0.0, 1.0 - gradient_attenuation_factor * gradient_magnitude);
/* The UV texture is assumed to contain an alpha channel as its third channel, since the UV
* coordinates might be defined in only a subset area of the UV texture as mentioned. In that
* case, the alpha is typically opaque at the subset area and transparent everywhere else, and
* alpha pre-multiplication is then performed. This format of having an alpha channel in the UV
* coordinates is the format used by UV passes in render engines, hence the mentioned logic. */
float alpha = texture_load(uv_tx, texel).z;
vec4 result = sampled_color * gradient_attenuation * alpha;
imageStore(output_img, texel, result);
}

View File

@ -0,0 +1,18 @@
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 first_color = texture_load(first_tx, texel);
float first_z_value = texture_load(first_z_tx, texel).x;
float second_z_value = texture_load(second_z_tx, texel).x;
/* The same logic as in compositor_z_combine_simple.glsl but only computes the mask to be later
* anti-aliased and used for mixing, see the logic in that file for more information. */
float z_combine_factor = float(first_z_value < second_z_value);
float alpha_factor = use_alpha ? first_color.a : 1.0;
float mix_factor = z_combine_factor * alpha_factor;
imageStore(mask_img, texel, vec4(mix_factor));
}

View File

@ -0,0 +1,21 @@
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 first_color = texture_load(first_tx, texel);
vec4 second_color = texture_load(second_tx, texel);
float first_z_value = texture_load(first_z_tx, texel).x;
float second_z_value = texture_load(second_z_tx, texel).x;
float mask_value = texture_load(mask_tx, texel).x;
vec4 combined_color = mix(second_color, first_color, mask_value);
/* Use the more opaque alpha from the two images. */
combined_color.a = use_alpha ? max(second_color.a, first_color.a) : combined_color.a;
float combined_z = mix(second_z_value, first_z_value, mask_value);
imageStore(combined_img, texel, combined_color);
imageStore(combined_z_img, texel, vec4(combined_z));
}

View File

@ -0,0 +1,29 @@
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 first_color = texture_load(first_tx, texel);
vec4 second_color = texture_load(second_tx, texel);
float first_z_value = texture_load(first_z_tx, texel).x;
float second_z_value = texture_load(second_z_tx, texel).x;
/* Mix between the first and second images using a mask such that the image with the object
* closer to the camera is returned. The mask value is then 1, and thus returns the first image
* if its Z value is less than that of the second image. Otherwise, its value is 0, and thus
* returns the second image. Furthermore, if the object in the first image is closer but has a
* non-opaque alpha, then the alpha is used as a mask, but only if Use Alpha is enabled. */
float z_combine_factor = float(first_z_value < second_z_value);
float alpha_factor = use_alpha ? first_color.a : 1.0;
float mix_factor = z_combine_factor * alpha_factor;
vec4 combined_color = mix(second_color, first_color, mix_factor);
/* Use the more opaque alpha from the two images. */
combined_color.a = use_alpha ? max(second_color.a, first_color.a) : combined_color.a;
float combined_z = mix(second_z_value, first_z_value, mix_factor);
imageStore(combined_img, texel, combined_color);
imageStore(combined_z_img, texel, vec4(combined_z));
}

View File

@ -0,0 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(compositor_displace)
.local_group_size(16, 16)
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_2D, "displacement_tx")
.sampler(2, ImageType::FLOAT_2D, "x_scale_tx")
.sampler(3, ImageType::FLOAT_2D, "y_scale_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_displace.glsl")
.do_static_compilation(true);

View File

@ -104,3 +104,30 @@ GPU_SHADER_CREATE_INFO(compositor_glare_streaks_accumulate)
.image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "accumulated_streaks_img")
.compute_source("compositor_glare_streaks_accumulate.glsl")
.do_static_compilation(true);
/* --------
* Fog Glow
* -------- */
GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_downsample_shared)
.local_group_size(16, 16)
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_glare_fog_glow_downsample.glsl");
GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_downsample_simple_average)
.define("SIMPLE_AVERAGE")
.additional_info("compositor_glare_fog_glow_downsample_shared")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_downsample_karis_average)
.define("KARIS_AVERAGE")
.additional_info("compositor_glare_fog_glow_downsample_shared")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_upsample)
.local_group_size(16, 16)
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_glare_fog_glow_upsample.glsl")
.do_static_compilation(true);

View File

@ -0,0 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(compositor_map_uv)
.local_group_size(16, 16)
.push_constant(Type::FLOAT, "gradient_attenuation_factor")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_2D, "uv_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_map_uv.glsl")
.do_static_compilation(true);

View File

@ -32,13 +32,21 @@ GPU_SHADER_CREATE_INFO(compositor_smaa_blending_weight_calculation)
.compute_source("compositor_smaa_blending_weight_calculation.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_smaa_neighborhood_blending)
GPU_SHADER_CREATE_INFO(compositor_smaa_neighborhood_blending_shared)
.local_group_size(16, 16)
.define("SMAA_GLSL_3")
.define("SMAA_RT_METRICS",
"vec4(1.0 / vec2(textureSize(input_tx, 0)), vec2(textureSize(input_tx, 0)))")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_2D, "weights_tx")
.compute_source("compositor_smaa_neighborhood_blending.glsl");
GPU_SHADER_CREATE_INFO(compositor_smaa_neighborhood_blending_color)
.additional_info("compositor_smaa_neighborhood_blending_shared")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_smaa_neighborhood_blending.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_smaa_neighborhood_blending_float)
.additional_info("compositor_smaa_neighborhood_blending_shared")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.do_static_compilation(true);

View File

@ -0,0 +1,38 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(compositor_z_combine_simple)
.local_group_size(16, 16)
.push_constant(Type::BOOL, "use_alpha")
.sampler(0, ImageType::FLOAT_2D, "first_tx")
.sampler(1, ImageType::FLOAT_2D, "first_z_tx")
.sampler(2, ImageType::FLOAT_2D, "second_tx")
.sampler(3, ImageType::FLOAT_2D, "second_z_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "combined_img")
.image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "combined_z_img")
.compute_source("compositor_z_combine_simple.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_z_combine_compute_mask)
.local_group_size(16, 16)
.push_constant(Type::BOOL, "use_alpha")
.sampler(0, ImageType::FLOAT_2D, "first_tx")
.sampler(1, ImageType::FLOAT_2D, "first_z_tx")
.sampler(2, ImageType::FLOAT_2D, "second_z_tx")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "mask_img")
.compute_source("compositor_z_combine_compute_mask.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_z_combine_from_mask)
.local_group_size(16, 16)
.push_constant(Type::BOOL, "use_alpha")
.sampler(0, ImageType::FLOAT_2D, "first_tx")
.sampler(1, ImageType::FLOAT_2D, "first_z_tx")
.sampler(2, ImageType::FLOAT_2D, "second_tx")
.sampler(3, ImageType::FLOAT_2D, "second_z_tx")
.sampler(4, ImageType::FLOAT_2D, "mask_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "combined_img")
.image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "combined_z_img")
.compute_source("compositor_z_combine_from_mask.glsl")
.do_static_compilation(true);

View File

@ -102,7 +102,9 @@ GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_scatter_no_geom)
#endif
/* EEVEE_shaders_volumes_scatter_with_lights_sh_get */
GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_scatter_with_lights_common).define("VOLUME_LIGHTING");
GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_scatter_with_lights_common)
.define("VOLUME_LIGHTING")
.define("IRRADIANCE_HL2");
GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_scatter_with_lights)
.additional_info("eevee_legacy_volumes_scatter_with_lights_common")

View File

@ -9,6 +9,8 @@
#include "DNA_screen_types.h"
#include "ED_view3d.h"
#include "UI_resources.h"
#include "DRW_engine.h"
@ -138,29 +140,44 @@ static void select_cache_init(void *vedata)
DRWState state = DRW_STATE_DEFAULT;
state |= RV3D_CLIPPING_ENABLED(draw_ctx->v3d, draw_ctx->rv3d) ? DRW_STATE_CLIP_PLANES : 0;
bool retopology_occlusion = RETOPOLOGY_ENABLED(draw_ctx->v3d) && !XRAY_ENABLED(draw_ctx->v3d);
float retopology_offset = RETOPOLOGY_OFFSET(draw_ctx->v3d);
{
DRW_PASS_CREATE(psl->depth_only_pass, state);
pd->shgrp_depth_only = DRW_shgroup_create(sh->select_id_uniform, psl->depth_only_pass);
/* Not setting ID because this pass only draws to the depth buffer. */
DRW_shgroup_uniform_float_copy(pd->shgrp_depth_only, "retopologyOffset", retopology_offset);
if (retopology_occlusion) {
pd->shgrp_occlude = DRW_shgroup_create(sh->select_id_uniform, psl->depth_only_pass);
/* Not setting ID because this pass only draws to the depth buffer. */
DRW_shgroup_uniform_float_copy(pd->shgrp_occlude, "retopologyOffset", 0.0f);
}
DRW_PASS_CREATE(psl->select_id_face_pass, state);
if (e_data.context.select_mode & SCE_SELECT_FACE) {
pd->shgrp_face_flat = DRW_shgroup_create(sh->select_id_flat, psl->select_id_face_pass);
DRW_shgroup_uniform_float_copy(pd->shgrp_face_flat, "retopologyOffset", retopology_offset);
}
else {
pd->shgrp_face_unif = DRW_shgroup_create(sh->select_id_uniform, psl->select_id_face_pass);
DRW_shgroup_uniform_int_copy(pd->shgrp_face_unif, "id", 0);
DRW_shgroup_uniform_float_copy(pd->shgrp_face_unif, "retopologyOffset", retopology_offset);
}
if (e_data.context.select_mode & SCE_SELECT_EDGE) {
DRW_PASS_CREATE(psl->select_id_edge_pass, state | DRW_STATE_FIRST_VERTEX_CONVENTION);
pd->shgrp_edge = DRW_shgroup_create(sh->select_id_flat, psl->select_id_edge_pass);
DRW_shgroup_uniform_float_copy(pd->shgrp_edge, "retopologyOffset", retopology_offset);
}
if (e_data.context.select_mode & SCE_SELECT_VERTEX) {
DRW_PASS_CREATE(psl->select_id_vert_pass, state);
pd->shgrp_vert = DRW_shgroup_create(sh->select_id_flat, psl->select_id_vert_pass);
DRW_shgroup_uniform_float_copy(pd->shgrp_vert, "sizeVertex", 2 * G_draw.block.size_vertex);
DRW_shgroup_uniform_float_copy(pd->shgrp_vert, "retopologyOffset", retopology_offset);
}
}
@ -197,6 +214,16 @@ static void select_cache_populate(void *vedata, Object *ob)
SELECTID_StorageList *stl = ((SELECTID_Data *)vedata)->stl;
const DRWContextState *draw_ctx = DRW_context_state_get();
const bool retopology_occlusion = RETOPOLOGY_ENABLED(draw_ctx->v3d) &&
!XRAY_ENABLED(draw_ctx->v3d);
if (retopology_occlusion && !DRW_object_is_in_edit_mode(ob)) {
if (ob->dt >= OB_SOLID) {
struct GPUBatch *geom_faces = DRW_mesh_batch_cache_get_surface(ob->data);
DRW_shgroup_call_obmat(stl->g_data->shgrp_occlude, geom_faces, ob->object_to_world);
}
return;
}
SELECTID_ObjectData *sel_data = (SELECTID_ObjectData *)DRW_drawdata_get(
&ob->id, &draw_engine_select_type);

View File

@ -40,6 +40,7 @@ typedef struct SELECTID_Shaders {
typedef struct SELECTID_PrivateData {
DRWShadingGroup *shgrp_depth_only;
DRWShadingGroup *shgrp_occlude;
DRWShadingGroup *shgrp_face_unif;
DRWShadingGroup *shgrp_face_flat;
DRWShadingGroup *shgrp_edge;

View File

@ -11,6 +11,7 @@ GPU_SHADER_INTERFACE_INFO(select_id_iface, "").flat(Type::INT, "id");
GPU_SHADER_CREATE_INFO(select_id_flat)
.push_constant(Type::FLOAT, "sizeVertex")
.push_constant(Type::INT, "offset")
.push_constant(Type::FLOAT, "retopologyOffset")
.vertex_in(0, Type::VEC3, "pos")
.vertex_in(1, Type::INT, "index")
.vertex_out(select_id_iface)
@ -24,6 +25,7 @@ GPU_SHADER_CREATE_INFO(select_id_uniform)
.define("UNIFORM_ID")
.push_constant(Type::FLOAT, "sizeVertex")
.push_constant(Type::INT, "id")
.push_constant(Type::FLOAT, "retopologyOffset")
.vertex_in(0, Type::VEC3, "pos")
.fragment_out(0, Type::UINT, "fragColor")
.vertex_source("select_id_vert.glsl")

View File

@ -8,8 +8,12 @@ void main()
#endif
vec3 world_pos = point_object_to_world(pos);
gl_Position = point_world_to_ndc(world_pos);
vec3 view_pos = point_world_to_view(world_pos);
gl_Position = point_view_to_ndc(view_pos);
gl_PointSize = sizeVertex;
/* Offset Z position for retopology selection occlusion. */
gl_Position.z += get_homogenous_z_offset(view_pos.z, gl_Position.w, retopologyOffset);
view_clipping_distances(world_pos);
}

View File

@ -2770,6 +2770,25 @@ void DRW_draw_select_id(Depsgraph *depsgraph, ARegion *region, View3D *v3d, cons
drw_engines_cache_populate(obj_eval);
}
if (RETOPOLOGY_ENABLED(v3d) && !XRAY_ENABLED(v3d)) {
DEGObjectIterSettings deg_iter_settings = {0};
deg_iter_settings.depsgraph = depsgraph;
deg_iter_settings.flags = DEG_OBJECT_ITER_FOR_RENDER_ENGINE_FLAGS;
DEG_OBJECT_ITER_BEGIN (&deg_iter_settings, ob) {
if (ob->type != OB_MESH) {
/* The iterator has evaluated meshes for all solid objects.
* It also has non-mesh objects however, which are not supported here. */
continue;
}
if (DRW_object_is_in_edit_mode(ob)) {
/* Only background (non-edit) objects are used for occlusion. */
continue;
}
drw_engines_cache_populate(ob);
}
DEG_OBJECT_ITER_END;
}
drw_engines_cache_finish();
drw_task_graph_deinit();

View File

@ -1918,6 +1918,7 @@ static void ui_selectcontext_apply(bContext *C,
bool b;
int i;
float f;
char *str;
PointerRNA p;
} delta, min, max;
@ -1950,6 +1951,10 @@ static void ui_selectcontext_apply(bContext *C,
/* Not a delta in fact. */
delta.p = RNA_property_pointer_get(&but->rnapoin, prop);
}
else if (rna_type == PROP_STRING) {
/* Not a delta in fact. */
delta.str = RNA_property_string_get_alloc(&but->rnapoin, prop, nullptr, 0, nullptr);
}
# ifdef USE_ALLSELECT_LAYER_HACK
/* make up for not having 'handle_layer_buttons' */
@ -2023,9 +2028,16 @@ static void ui_selectcontext_apply(bContext *C,
const PointerRNA other_value = delta.p;
RNA_property_pointer_set(&lptr, lprop, other_value, nullptr);
}
else if (rna_type == PROP_STRING) {
const char *other_value = delta.str;
RNA_property_string_set(&lptr, lprop, other_value);
}
RNA_property_update(C, &lptr, prop);
}
if (rna_type == PROP_STRING) {
MEM_freeN(delta.str);
}
}
}

View File

@ -72,6 +72,7 @@ class UVPackIsland_Params {
eUVPackIsland_ShapeMethod shape_method;
};
class uv_phi;
class PackIsland {
public:
/** Aspect ratio, required for rotation. */
@ -102,6 +103,8 @@ class PackIsland {
/** Half of the diagonal of the AABB. */
float2 half_diagonal_;
void place_(const float scale, const uv_phi phi);
private:
void calculate_pivot(); /* Calculate `pivot_` and `half_diagonal_` based on added triangles. */
blender::Vector<float2> triangle_vertices_;

View File

@ -106,6 +106,8 @@ void PackIsland::add_triangle(const float2 uv0, const float2 uv1, const float2 u
void PackIsland::add_polygon(const blender::Span<float2> uvs, MemArena *arena, Heap *heap)
{
/* Internally, PackIsland uses triangles as the primitive, so we have to triangulate. */
int vert_count = int(uvs.size());
BLI_assert(vert_count >= 3);
int nfilltri = vert_count - 2;
@ -118,13 +120,7 @@ void PackIsland::add_polygon(const blender::Span<float2> uvs, MemArena *arena, H
/* Storage. */
uint(*tris)[3] = static_cast<uint(*)[3]>(
BLI_memarena_alloc(arena, sizeof(*tris) * size_t(nfilltri)));
float(*source)[2] = static_cast<float(*)[2]>(
BLI_memarena_alloc(arena, sizeof(*source) * size_t(vert_count)));
/* Copy input. */
for (int i = 0; i < vert_count; i++) {
copy_v2_v2(source[i], uvs[i]);
}
const float(*source)[2] = reinterpret_cast<const float(*)[2]>(uvs.data());
/* Triangulate. */
BLI_polyfill_calc_arena(source, vert_count, 0, tris, arena);
@ -163,7 +159,7 @@ void PackIsland::finalize_geometry(const UVPackIsland_Params &params, MemArena *
BLI_memarena_alloc(arena, sizeof(*index_map) * vert_count));
/* Prepare input for convex hull. */
float(*source)[2] = reinterpret_cast<float(*)[2]>(triangle_vertices_.data());
const float(*source)[2] = reinterpret_cast<const float(*)[2]>(triangle_vertices_.data());
/* Compute convex hull. */
int convex_len = BLI_convexhull_2d(source, vert_count, index_map);
@ -183,14 +179,25 @@ void PackIsland::finalize_geometry(const UVPackIsland_Params &params, MemArena *
void PackIsland::calculate_pivot()
{
/* `pivot_` is calculated as the center of the AABB,
* However `pivot_` cannot be outside of the convex hull. */
Bounds<float2> triangle_bounds = *bounds::min_max(triangle_vertices_.as_span());
pivot_ = (triangle_bounds.min + triangle_bounds.max) * 0.5f;
half_diagonal_ = (triangle_bounds.max - triangle_bounds.min) * 0.5f;
}
void PackIsland::place_(const float scale, const uv_phi phi)
{
angle = phi.rotation;
float matrix_inverse[2][2];
build_inverse_transformation(scale, phi.rotation, matrix_inverse);
mul_v2_m2v2(pre_translate, matrix_inverse, phi.translation);
pre_translate -= pivot_;
}
UVPackIsland_Params::UVPackIsland_Params()
{
/* TEMPORARY, set every thing to "zero" for backwards compatibility. */
rotate = false;
only_selected_uvs = false;
only_selected_faces = false;
@ -235,7 +242,7 @@ static void pack_islands_alpaca_turbo(const Span<UVAABBIsland *> islands,
/* Exclude an initial AABB near the origin. */
float next_u1 = *r_max_u;
float next_v1 = *r_max_v;
bool zigzag = next_u1 / target_aspect_y < next_v1; /* Horizontal or Vertical strip? */
bool zigzag = next_u1 < next_v1 * target_aspect_y; /* Horizontal or Vertical strip? */
float u0 = zigzag ? next_u1 : 0.0f;
float v0 = zigzag ? 0.0f : next_v1;
@ -254,7 +261,7 @@ static void pack_islands_alpaca_turbo(const Span<UVAABBIsland *> islands,
}
if (restart) {
/* We're at the end of a strip. Restart from U axis or V axis. */
zigzag = next_u1 / target_aspect_y < next_v1;
zigzag = next_u1 < next_v1 * target_aspect_y;
u0 = zigzag ? next_u1 : 0.0f;
v0 = zigzag ? 0.0f : next_v1;
}
@ -281,420 +288,6 @@ static void pack_islands_alpaca_turbo(const Span<UVAABBIsland *> islands,
*r_max_v = next_v1;
}
/* Wrapper around #BLI_box_pack_2d. */
static void pack_island_box_pack_2d(const Span<UVAABBIsland *> aabbs,
const Span<PackIsland *> islands,
const float scale,
const float margin,
const float target_aspect_y,
float *r_max_u,
float *r_max_v)
{
/* Allocate storage. */
BoxPack *box_array = static_cast<BoxPack *>(
MEM_mallocN(sizeof(*box_array) * islands.size(), __func__));
/* Prepare for box_pack_2d. */
for (const int64_t i : aabbs.index_range()) {
PackIsland *island = islands[aabbs[i]->index];
BoxPack *box = box_array + i;
box->w = (island->half_diagonal_.x * 2 * scale + 2 * margin) / target_aspect_y;
box->h = island->half_diagonal_.y * 2 * scale + 2 * margin;
}
const bool sort_boxes = false; /* Use existing ordering from `aabbs`. */
/* \note Writes to `*r_max_u` and `*r_max_v`. */
BLI_box_pack_2d(box_array, int(aabbs.size()), sort_boxes, r_max_u, r_max_v);
*r_max_u *= target_aspect_y;
/* Write back box_pack UVs. */
for (const int64_t i : aabbs.index_range()) {
PackIsland *island = islands[aabbs[i]->index];
BoxPack *box = box_array + i;
island->angle = 0.0f; /* #BLI_box_pack_2d never rotates. */
island->pre_translate.x = (box->x + box->w * 0.5f) * target_aspect_y / scale -
island->pivot_.x;
island->pre_translate.y = (box->y + box->h * 0.5f) / scale - island->pivot_.y;
}
/* Housekeeping. */
MEM_freeN(box_array);
}
/**
* Helper class for the `xatlas` strategy.
* Accelerates geometry queries by approximating exact queries with a bitmap.
* Includes some book keeping variables to simplify the algorithm.
*/
class Occupancy {
public:
Occupancy(const float initial_scale);
void increase_scale(); /* Resize the scale of the bitmap and clear it. */
/* Write or Query a triangle on the bitmap. */
float trace_triangle(const float2 &uv0,
const float2 &uv1,
const float2 &uv2,
const float margin,
const bool write) const;
/* Write or Query an island on the bitmap. */
float trace_island(const PackIsland *island,
const uv_phi phi,
const float scale,
const float margin,
const bool write) const;
int bitmap_radix; /* Width and Height of `bitmap`. */
float bitmap_scale_reciprocal; /* == 1.0f / `bitmap_scale`. */
private:
mutable blender::Array<float> bitmap_;
mutable float2 witness_; /* Witness to a previously known occupied pixel. */
mutable float witness_distance_; /* Signed distance to nearest placed island. */
mutable uint triangle_hint_; /* Hint to a previously suspected overlapping triangle. */
const float terminal = 1048576.0f; /* 4 * bitmap_radix < terminal < INT_MAX / 4. */
};
Occupancy::Occupancy(const float initial_scale)
: bitmap_radix(800), bitmap_(bitmap_radix * bitmap_radix, false)
{
increase_scale();
bitmap_scale_reciprocal = bitmap_radix / initial_scale;
}
void Occupancy::increase_scale()
{
bitmap_scale_reciprocal *= 0.5f;
for (int i = 0; i < bitmap_radix * bitmap_radix; i++) {
bitmap_[i] = terminal;
}
witness_.x = -1;
witness_.y = -1;
witness_distance_ = 0.0f;
triangle_hint_ = 0;
}
static float signed_distance_fat_triangle(const float2 probe,
const float2 uv0,
const float2 uv1,
const float2 uv2)
{
/* Be careful with ordering, uv0 <- uv1 <- uv2 <- uv0 <- uv1 etc. */
const float dist01_ssq = dist_signed_squared_to_edge(probe, uv0, uv1);
const float dist12_ssq = dist_signed_squared_to_edge(probe, uv1, uv2);
const float dist20_ssq = dist_signed_squared_to_edge(probe, uv2, uv0);
float result_ssq = max_fff(dist01_ssq, dist12_ssq, dist20_ssq);
if (result_ssq < 0.0f) {
return -sqrtf(-result_ssq);
}
BLI_assert(result_ssq >= 0.0f);
result_ssq = std::min(result_ssq, blender::math::length_squared(probe - uv0));
result_ssq = std::min(result_ssq, blender::math::length_squared(probe - uv1));
result_ssq = std::min(result_ssq, blender::math::length_squared(probe - uv2));
BLI_assert(result_ssq >= 0.0f);
return sqrtf(result_ssq);
}
float Occupancy::trace_triangle(const float2 &uv0,
const float2 &uv1,
const float2 &uv2,
const float margin,
const bool write) const
{
const float x0 = min_fff(uv0.x, uv1.x, uv2.x);
const float y0 = min_fff(uv0.y, uv1.y, uv2.y);
const float x1 = max_fff(uv0.x, uv1.x, uv2.x);
const float y1 = max_fff(uv0.y, uv1.y, uv2.y);
float spread = write ? margin * 2 : 0.0f;
int ix0 = std::max(int(floorf((x0 - spread) * bitmap_scale_reciprocal)), 0);
int iy0 = std::max(int(floorf((y0 - spread) * bitmap_scale_reciprocal)), 0);
int ix1 = std::min(int(floorf((x1 + spread) * bitmap_scale_reciprocal + 2)), bitmap_radix);
int iy1 = std::min(int(floorf((y1 + spread) * bitmap_scale_reciprocal + 2)), bitmap_radix);
const float2 uv0s = uv0 * bitmap_scale_reciprocal;
const float2 uv1s = uv1 * bitmap_scale_reciprocal;
const float2 uv2s = uv2 * bitmap_scale_reciprocal;
/* TODO: Better epsilon handling here could reduce search size. */
float epsilon = 0.7071f; /* == sqrt(0.5f), rounded up by 0.00002f. */
epsilon = std::max(epsilon, 2 * margin * bitmap_scale_reciprocal);
if (!write) {
if (ix0 <= witness_.x && witness_.x < ix1) {
if (iy0 <= witness_.y && witness_.y < iy1) {
const float distance = signed_distance_fat_triangle(witness_, uv0s, uv1s, uv2s);
const float extent = epsilon - distance - witness_distance_;
const float pixel_round_off = -0.1f; /* Go faster on nearly-axis aligned edges. */
if (extent > pixel_round_off) {
return std::max(0.0f, extent); /* Witness observes occupied. */
}
}
}
}
/* Iterate in opposite direction to outer search to improve witness effectiveness. */
for (int y = iy1 - 1; y >= iy0; y--) {
for (int x = ix1 - 1; x >= ix0; x--) {
float *hotspot = &bitmap_[y * bitmap_radix + x];
if (!write && *hotspot > epsilon) {
continue;
}
const float2 probe(x, y);
const float distance = signed_distance_fat_triangle(probe, uv0s, uv1s, uv2s);
if (write) {
*hotspot = min_ff(distance, *hotspot);
continue;
}
const float extent = epsilon - distance - *hotspot;
if (extent > 0.0f) {
witness_ = probe;
witness_distance_ = *hotspot;
return extent; /* Occupied. */
}
}
}
return -1.0f; /* Available. */
}
float2 PackIsland::get_diagonal_support_d4(const float scale,
const float rotation,
const float margin) const
{
if (rotation == 0.0f) {
return half_diagonal_ * scale + margin; /* Fast path for common case. */
}
/* TODO: BLI_assert rotation is a "Dihedral Group D4" transform. */
float matrix[2][2];
build_transformation(scale, rotation, matrix);
float diagonal_rotated[2];
mul_v2_m2v2(diagonal_rotated, matrix, half_diagonal_);
return float2(fabsf(diagonal_rotated[0]) + margin, fabsf(diagonal_rotated[1]) + margin);
}
float2 PackIsland::get_diagonal_support(const float scale,
const float rotation,
const float margin) const
{
/* Only "D4" transforms are currently supported. */
return get_diagonal_support_d4(scale, rotation, margin);
}
float Occupancy::trace_island(const PackIsland *island,
const uv_phi phi,
const float scale,
const float margin,
const bool write) const
{
float2 diagonal_support = island->get_diagonal_support(scale, phi.rotation, margin);
if (!write) {
if (phi.translation.x < diagonal_support.x || phi.translation.y < diagonal_support.y) {
return terminal; /* Occupied. */
}
}
float matrix[2][2];
island->build_transformation(scale, phi.rotation, matrix);
float2 pivot_transformed;
mul_v2_m2v2(pivot_transformed, matrix, island->pivot_);
float2 delta = phi.translation - pivot_transformed;
uint vert_count = uint(island->triangle_vertices_.size()); /* `uint` is faster than `int`. */
for (uint i = 0; i < vert_count; i += 3) {
uint j = (i + triangle_hint_) % vert_count;
float2 uv0;
float2 uv1;
float2 uv2;
mul_v2_m2v2(uv0, matrix, island->triangle_vertices_[j]);
mul_v2_m2v2(uv1, matrix, island->triangle_vertices_[j + 1]);
mul_v2_m2v2(uv2, matrix, island->triangle_vertices_[j + 2]);
float extent = trace_triangle(uv0 + delta, uv1 + delta, uv2 + delta, margin, write);
if (!write && extent >= 0.0f) {
triangle_hint_ = j;
return extent; /* Occupied. */
}
}
return -1.0f; /* Available. */
}
static uv_phi find_best_fit_for_island(const PackIsland *island,
const int scan_line,
Occupancy &occupancy,
const float scale,
const int angle_90_multiple,
const float margin,
const float target_aspect_y)
{
const float bitmap_scale = 1.0f / occupancy.bitmap_scale_reciprocal;
const float sqrt_target_aspect_y = sqrtf(target_aspect_y);
const int scan_line_x = int(scan_line * sqrt_target_aspect_y);
const int scan_line_y = int(scan_line / sqrt_target_aspect_y);
uv_phi phi;
phi.rotation = DEG2RADF(angle_90_multiple * 90);
float matrix[2][2];
island->build_transformation(scale, phi.rotation, matrix);
/* Caution, margin is zero for support_diagonal as we're tracking the top-right corner. */
float2 support_diagonal = island->get_diagonal_support_d4(scale, phi.rotation, 0.0f);
/* Scan using an "Alpaca"-style search, first horizontally using "less-than". */
int t = int(ceilf((2 * support_diagonal.x + margin) * occupancy.bitmap_scale_reciprocal));
while (t < scan_line_x) {
phi.translation = float2(t * bitmap_scale, scan_line_y * bitmap_scale) - support_diagonal;
const float extent = occupancy.trace_island(island, phi, scale, margin, false);
if (extent < 0.0f) {
return phi; /* Success. */
}
t = t + std::max(1, int(extent));
}
/* Then scan vertically using "less-than-or-equal" */
t = int(ceilf((2 * support_diagonal.y + margin) * occupancy.bitmap_scale_reciprocal));
while (t <= scan_line_y) {
phi.translation = float2(scan_line_x * bitmap_scale, t * bitmap_scale) - support_diagonal;
const float extent = occupancy.trace_island(island, phi, scale, margin, false);
if (extent < 0.0f) {
return phi; /* Success. */
}
t = t + std::max(1, int(extent));
}
return uv_phi(); /* Unable to find a place to fit. */
}
static float guess_initial_scale(const Span<PackIsland *> islands,
const float scale,
const float margin)
{
float sum = 1e-40f;
for (int64_t i : islands.index_range()) {
PackIsland *island = islands[i];
sum += island->half_diagonal_.x * 2 * scale + 2 * margin;
sum += island->half_diagonal_.y * 2 * scale + 2 * margin;
}
return sqrtf(sum) / 6.0f;
}
/**
* Pack irregular islands using the `xatlas` strategy, with no rotation.
*
* Loosely based on the 'xatlas' code by Jonathan Young
* from https://github.com/jpcy/xatlas
*
* A brute force packer (BF-Packer) with accelerators:
* - Uses a Bitmap Occupancy class.
* - Uses a "Witness Pixel" and a "Triangle Hint".
* - Write with `margin * 2`, read with `margin == 0`.
* - Lazy resetting of BF search.
*
* Performance would normally be `O(n^4)`, however the occupancy
* bitmap_radix is fixed, which gives a reduced time complexity of `O(n^3)`.
*/
static void pack_island_xatlas(const Span<UVAABBIsland *> island_indices,
const Span<PackIsland *> islands,
const float scale,
const float margin,
const UVPackIsland_Params &params,
float *r_max_u,
float *r_max_v)
{
Occupancy occupancy(guess_initial_scale(islands, scale, margin));
float max_u = 0.0f;
float max_v = 0.0f;
blender::Array<uv_phi> phis(island_indices.size());
int scan_line = 0;
int i = 0;
/* The following `while` loop is setting up a three-way race:
* `for (scan_line = 0; scan_line < bitmap_radix; scan_line++)`
* `for (i : island_indices.index_range())`
* `while (bitmap_scale_reciprocal > 0) { bitmap_scale_reciprocal *= 0.5f; }`
*/
while (i < island_indices.size()) {
PackIsland *island = islands[island_indices[i]->index];
uv_phi phi;
int max_90_multiple = params.rotate && (i < 50) ? 4 : 1;
for (int angle_90_multiple = 0; angle_90_multiple < max_90_multiple; angle_90_multiple++) {
phi = find_best_fit_for_island(
island, scan_line, occupancy, scale, angle_90_multiple, margin, params.target_aspect_y);
if (phi.is_valid()) {
break;
}
}
if (!phi.is_valid()) {
/* Unable to find a fit on this scan_line. */
island = nullptr; /* Just mark it as null, we won't use it further. */
if (i < 10) {
scan_line++;
}
else {
/* Increasing by 2 here has the effect of changing the sampling pattern.
* The parameter '2' is not "free" in the sense that changing it requires
* a change to `bitmap_radix` and then re-tuning `alpaca_cutoff`.
* Possible values here *could* be 1, 2 or 3, however the only *reasonable*
* choice is 2. */
scan_line += 2;
}
if (scan_line < occupancy.bitmap_radix *
sqrtf(std::min(params.target_aspect_y, 1.0f / params.target_aspect_y))) {
continue; /* Try again on next scan_line. */
}
/* Enlarge search parameters. */
scan_line = 0;
occupancy.increase_scale();
/* Redraw already placed islands. (Greedy.) */
for (int j = 0; j < i; j++) {
occupancy.trace_island(islands[island_indices[j]->index], phis[j], scale, margin, true);
}
continue;
}
phis[i] = phi; /* Place island. */
occupancy.trace_island(island, phi, scale, margin, true);
i++; /* Next island. */
island->angle = phi.rotation;
float matrix_inverse[2][2];
island->build_inverse_transformation(scale, phi.rotation, matrix_inverse);
mul_v2_m2v2(island->pre_translate, matrix_inverse, phi.translation);
island->pre_translate -= island->pivot_;
float2 support = island->get_diagonal_support(scale, phi.rotation, margin);
float2 top_right = phi.translation + support;
max_u = std::max(top_right.x, max_u);
max_v = std::max(top_right.y, max_v);
if (i < 128 || (i & 31) == 16) {
scan_line = 0; /* Restart completely. */
}
else {
scan_line = std::max(0, scan_line - 25); /* `-25` must by odd. */
}
}
*r_max_u = max_u;
*r_max_v = max_v;
}
/**
* Helper function for #pack_islands_alpaca_rotate
*
@ -852,6 +445,423 @@ static void pack_islands_alpaca_rotate(const Span<UVAABBIsland *> islands,
*r_max_v = next_v1;
}
/* Wrapper around #BLI_box_pack_2d. */
static void pack_island_box_pack_2d(const Span<UVAABBIsland *> aabbs,
const Span<PackIsland *> islands,
const float scale,
const float margin,
const float target_aspect_y,
float *r_max_u,
float *r_max_v)
{
/* Allocate storage. */
BoxPack *box_array = static_cast<BoxPack *>(
MEM_mallocN(sizeof(*box_array) * islands.size(), __func__));
/* Prepare for box_pack_2d. */
for (const int64_t i : aabbs.index_range()) {
PackIsland *island = islands[aabbs[i]->index];
BoxPack *box = box_array + i;
box->w = (island->half_diagonal_.x * 2 * scale + 2 * margin) / target_aspect_y;
box->h = island->half_diagonal_.y * 2 * scale + 2 * margin;
}
const bool sort_boxes = false; /* Use existing ordering from `aabbs`. */
/* \note Writes to `*r_max_u` and `*r_max_v`. */
BLI_box_pack_2d(box_array, int(aabbs.size()), sort_boxes, r_max_u, r_max_v);
*r_max_u *= target_aspect_y;
/* Write back box_pack UVs. */
for (const int64_t i : aabbs.index_range()) {
PackIsland *island = islands[aabbs[i]->index];
BoxPack *box = box_array + i;
uv_phi phi;
phi.rotation = 0.0f; /* #BLI_box_pack_2d never rotates. */
phi.translation.x = (box->x + box->w * 0.5f) * target_aspect_y;
phi.translation.y = (box->y + box->h * 0.5f);
island->place_(scale, phi);
}
/* Housekeeping. */
MEM_freeN(box_array);
}
/**
* Helper class for the `xatlas` strategy.
* Accelerates geometry queries by approximating exact queries with a bitmap.
* Includes some book keeping variables to simplify the algorithm.
*/
class Occupancy {
public:
Occupancy(const float initial_scale);
void increase_scale(); /* Resize the scale of the bitmap and clear it. */
/* Write or Query a triangle on the bitmap. */
float trace_triangle(const float2 &uv0,
const float2 &uv1,
const float2 &uv2,
const float margin,
const bool write) const;
/* Write or Query an island on the bitmap. */
float trace_island(const PackIsland *island,
const uv_phi phi,
const float scale,
const float margin,
const bool write) const;
int bitmap_radix; /* Width and Height of `bitmap`. */
float bitmap_scale_reciprocal; /* == 1.0f / `bitmap_scale`. */
private:
mutable blender::Array<float> bitmap_;
mutable float2 witness_; /* Witness to a previously known occupied pixel. */
mutable float witness_distance_; /* Signed distance to nearest placed island. */
mutable uint triangle_hint_; /* Hint to a previously suspected overlapping triangle. */
const float terminal = 1048576.0f; /* 4 * bitmap_radix < terminal < INT_MAX / 4. */
};
Occupancy::Occupancy(const float initial_scale)
: bitmap_radix(800), bitmap_(bitmap_radix * bitmap_radix, false)
{
bitmap_scale_reciprocal = 1.0f; /* lint, prevent uninitialized memory access. */
increase_scale();
bitmap_scale_reciprocal = bitmap_radix / initial_scale; /* Actually set the value. */
}
void Occupancy::increase_scale()
{
BLI_assert(bitmap_scale_reciprocal > 0.0f); /* TODO: Packing has failed, report error. */
bitmap_scale_reciprocal *= 0.5f;
for (int i = 0; i < bitmap_radix * bitmap_radix; i++) {
bitmap_[i] = terminal;
}
witness_.x = -1;
witness_.y = -1;
witness_distance_ = 0.0f;
triangle_hint_ = 0;
}
static float signed_distance_fat_triangle(const float2 probe,
const float2 uv0,
const float2 uv1,
const float2 uv2)
{
/* Be careful with ordering, uv0 <- uv1 <- uv2 <- uv0 <- uv1 etc. */
const float dist01_ssq = dist_signed_squared_to_edge(probe, uv0, uv1);
const float dist12_ssq = dist_signed_squared_to_edge(probe, uv1, uv2);
const float dist20_ssq = dist_signed_squared_to_edge(probe, uv2, uv0);
float result_ssq = max_fff(dist01_ssq, dist12_ssq, dist20_ssq);
if (result_ssq < 0.0f) {
return -sqrtf(-result_ssq);
}
BLI_assert(result_ssq >= 0.0f);
result_ssq = std::min(result_ssq, blender::math::length_squared(probe - uv0));
result_ssq = std::min(result_ssq, blender::math::length_squared(probe - uv1));
result_ssq = std::min(result_ssq, blender::math::length_squared(probe - uv2));
BLI_assert(result_ssq >= 0.0f);
return sqrtf(result_ssq);
}
float Occupancy::trace_triangle(const float2 &uv0,
const float2 &uv1,
const float2 &uv2,
const float margin,
const bool write) const
{
const float x0 = min_fff(uv0.x, uv1.x, uv2.x);
const float y0 = min_fff(uv0.y, uv1.y, uv2.y);
const float x1 = max_fff(uv0.x, uv1.x, uv2.x);
const float y1 = max_fff(uv0.y, uv1.y, uv2.y);
float spread = write ? margin * 2 : 0.0f;
int ix0 = std::max(int(floorf((x0 - spread) * bitmap_scale_reciprocal)), 0);
int iy0 = std::max(int(floorf((y0 - spread) * bitmap_scale_reciprocal)), 0);
int ix1 = std::min(int(floorf((x1 + spread) * bitmap_scale_reciprocal + 2)), bitmap_radix);
int iy1 = std::min(int(floorf((y1 + spread) * bitmap_scale_reciprocal + 2)), bitmap_radix);
const float2 uv0s = uv0 * bitmap_scale_reciprocal;
const float2 uv1s = uv1 * bitmap_scale_reciprocal;
const float2 uv2s = uv2 * bitmap_scale_reciprocal;
/* TODO: Better epsilon handling here could reduce search size. */
float epsilon = 0.7071f; /* == sqrt(0.5f), rounded up by 0.00002f. */
epsilon = std::max(epsilon, 2 * margin * bitmap_scale_reciprocal);
if (!write) {
if (ix0 <= witness_.x && witness_.x < ix1) {
if (iy0 <= witness_.y && witness_.y < iy1) {
const float distance = signed_distance_fat_triangle(witness_, uv0s, uv1s, uv2s);
const float extent = epsilon - distance - witness_distance_;
const float pixel_round_off = -0.1f; /* Go faster on nearly-axis aligned edges. */
if (extent > pixel_round_off) {
return std::max(0.0f, extent); /* Witness observes occupied. */
}
}
}
}
/* Iterate in opposite direction to outer search to improve witness effectiveness. */
for (int y = iy1 - 1; y >= iy0; y--) {
for (int x = ix1 - 1; x >= ix0; x--) {
float *hotspot = &bitmap_[y * bitmap_radix + x];
if (!write && *hotspot > epsilon) {
continue;
}
const float2 probe(x, y);
const float distance = signed_distance_fat_triangle(probe, uv0s, uv1s, uv2s);
if (write) {
*hotspot = min_ff(distance, *hotspot);
continue;
}
const float extent = epsilon - distance - *hotspot;
if (extent > 0.0f) {
witness_ = probe;
witness_distance_ = *hotspot;
return extent; /* Occupied. */
}
}
}
return -1.0f; /* Available. */
}
float2 PackIsland::get_diagonal_support_d4(const float scale,
const float rotation,
const float margin) const
{
if (rotation == 0.0f) {
return half_diagonal_ * scale + margin; /* Fast path for common case. */
}
if (rotation == DEG2RADF(180.0f)) {
return get_diagonal_support_d4(scale, 0.0f, margin); /* Same as 0.0f */
}
/* TODO: BLI_assert rotation is a "Dihedral Group D4" transform. */
float matrix[2][2];
build_transformation(scale, rotation, matrix);
float diagonal_rotated[2];
mul_v2_m2v2(diagonal_rotated, matrix, half_diagonal_);
return float2(fabsf(diagonal_rotated[0]) + margin, fabsf(diagonal_rotated[1]) + margin);
}
float2 PackIsland::get_diagonal_support(const float scale,
const float rotation,
const float margin) const
{
/* Only "D4" transforms are currently supported. */
return get_diagonal_support_d4(scale, rotation, margin);
}
float Occupancy::trace_island(const PackIsland *island,
const uv_phi phi,
const float scale,
const float margin,
const bool write) const
{
float2 diagonal_support = island->get_diagonal_support(scale, phi.rotation, margin);
if (!write) {
if (phi.translation.x < diagonal_support.x || phi.translation.y < diagonal_support.y) {
return terminal; /* Occupied. */
}
}
float matrix[2][2];
island->build_transformation(scale, phi.rotation, matrix);
float2 pivot_transformed;
mul_v2_m2v2(pivot_transformed, matrix, island->pivot_);
float2 delta = phi.translation - pivot_transformed;
uint vert_count = uint(island->triangle_vertices_.size()); /* `uint` is faster than `int`. */
for (uint i = 0; i < vert_count; i += 3) {
uint j = (i + triangle_hint_) % vert_count;
float2 uv0;
float2 uv1;
float2 uv2;
mul_v2_m2v2(uv0, matrix, island->triangle_vertices_[j]);
mul_v2_m2v2(uv1, matrix, island->triangle_vertices_[j + 1]);
mul_v2_m2v2(uv2, matrix, island->triangle_vertices_[j + 2]);
float extent = trace_triangle(uv0 + delta, uv1 + delta, uv2 + delta, margin, write);
if (!write && extent >= 0.0f) {
triangle_hint_ = j;
return extent; /* Occupied. */
}
}
return -1.0f; /* Available. */
}
static uv_phi find_best_fit_for_island(const PackIsland *island,
const int scan_line,
Occupancy &occupancy,
const float scale,
const int angle_90_multiple,
const float margin,
const float target_aspect_y)
{
const float bitmap_scale = 1.0f / occupancy.bitmap_scale_reciprocal;
const float sqrt_target_aspect_y = sqrtf(target_aspect_y);
const int scan_line_x = int(scan_line * sqrt_target_aspect_y);
const int scan_line_y = int(scan_line / sqrt_target_aspect_y);
uv_phi phi;
phi.rotation = DEG2RADF(angle_90_multiple * 90);
float matrix[2][2];
island->build_transformation(scale, phi.rotation, matrix);
/* Caution, margin is zero for support_diagonal as we're tracking the top-right corner. */
float2 support_diagonal = island->get_diagonal_support_d4(scale, phi.rotation, 0.0f);
/* Scan using an "Alpaca"-style search, first horizontally using "less-than". */
int t = int(ceilf((2 * support_diagonal.x + margin) * occupancy.bitmap_scale_reciprocal));
while (t < scan_line_x) {
phi.translation = float2(t * bitmap_scale, scan_line_y * bitmap_scale) - support_diagonal;
const float extent = occupancy.trace_island(island, phi, scale, margin, false);
if (extent < 0.0f) {
return phi; /* Success. */
}
t = t + std::max(1, int(extent));
}
/* Then scan vertically using "less-than-or-equal" */
t = int(ceilf((2 * support_diagonal.y + margin) * occupancy.bitmap_scale_reciprocal));
while (t <= scan_line_y) {
phi.translation = float2(scan_line_x * bitmap_scale, t * bitmap_scale) - support_diagonal;
const float extent = occupancy.trace_island(island, phi, scale, margin, false);
if (extent < 0.0f) {
return phi; /* Success. */
}
t = t + std::max(1, int(extent));
}
return uv_phi(); /* Unable to find a place to fit. */
}
static float guess_initial_scale(const Span<PackIsland *> islands,
const float scale,
const float margin)
{
float sum = 1e-40f;
for (int64_t i : islands.index_range()) {
PackIsland *island = islands[i];
sum += island->half_diagonal_.x * 2 * scale + 2 * margin;
sum += island->half_diagonal_.y * 2 * scale + 2 * margin;
}
return sqrtf(sum) / 6.0f;
}
/**
* Pack irregular islands using the `xatlas` strategy, and optional D4 transforms.
*
* Loosely based on the 'xatlas' code by Jonathan Young
* from https://github.com/jpcy/xatlas
*
* A brute force packer (BF-Packer) with accelerators:
* - Uses a Bitmap Occupancy class.
* - Uses a "Witness Pixel" and a "Triangle Hint".
* - Write with `margin * 2`, read with `margin == 0`.
* - Lazy resetting of BF search.
*
* Performance would normally be `O(n^4)`, however the occupancy
* bitmap_radix is fixed, which gives a reduced time complexity of `O(n^3)`.
*/
static void pack_island_xatlas(const Span<UVAABBIsland *> island_indices,
const Span<PackIsland *> islands,
const float scale,
const float margin,
const UVPackIsland_Params &params,
float *r_max_u,
float *r_max_v)
{
Occupancy occupancy(guess_initial_scale(islands, scale, margin));
float max_u = 0.0f;
float max_v = 0.0f;
blender::Array<uv_phi> phis(island_indices.size());
int scan_line = 0;
int i = 0;
/* The following `while` loop is setting up a three-way race:
* `for (scan_line = 0; scan_line < bitmap_radix; scan_line++)`
* `for (i : island_indices.index_range())`
* `while (bitmap_scale_reciprocal > 0) { bitmap_scale_reciprocal *= 0.5f; }`
*/
while (i < island_indices.size()) {
PackIsland *island = islands[island_indices[i]->index];
uv_phi phi;
int max_90_multiple = params.rotate && (i < 50) ? 4 : 1;
for (int angle_90_multiple = 0; angle_90_multiple < max_90_multiple; angle_90_multiple++) {
phi = find_best_fit_for_island(
island, scan_line, occupancy, scale, angle_90_multiple, margin, params.target_aspect_y);
if (phi.is_valid()) {
break;
}
}
if (!phi.is_valid()) {
/* Unable to find a fit on this scan_line. */
island = nullptr; /* Just mark it as null, we won't use it further. */
if (i < 10) {
scan_line++;
}
else {
/* Increasing by 2 here has the effect of changing the sampling pattern.
* The parameter '2' is not "free" in the sense that changing it requires
* a change to `bitmap_radix` and then re-tuning `alpaca_cutoff`.
* Possible values here *could* be 1, 2 or 3, however the only *reasonable*
* choice is 2. */
scan_line += 2;
}
if (scan_line < occupancy.bitmap_radix *
sqrtf(std::min(params.target_aspect_y, 1.0f / params.target_aspect_y))) {
continue; /* Try again on next scan_line. */
}
/* Enlarge search parameters. */
scan_line = 0;
occupancy.increase_scale();
/* Redraw already placed islands. (Greedy.) */
for (int j = 0; j < i; j++) {
occupancy.trace_island(islands[island_indices[j]->index], phis[j], scale, margin, true);
}
continue;
}
/* Place island. */
phis[i] = phi;
island->place_(scale, phi);
occupancy.trace_island(island, phi, scale, margin, true);
i++; /* Next island. */
/* Update top-right corner. */
float2 top_right = island->get_diagonal_support(scale, phi.rotation, margin) + phi.translation;
max_u = std::max(top_right.x, max_u);
max_v = std::max(top_right.y, max_v);
/* Heuristics to reduce size of brute-force search. */
if (i < 128 || (i & 31) == 16) {
scan_line = 0; /* Restart completely. */
}
else {
scan_line = std::max(0, scan_line - 25); /* `-25` must by odd. */
}
}
*r_max_u = max_u;
*r_max_v = max_v;
}
/**
* Pack islands using a mix of other strategies.
* \param islands: The islands to be packed. Will be modified with results.
@ -1171,15 +1181,31 @@ void PackIsland::build_transformation(const float scale,
r_matrix[0][1] = -sin_angle * scale * aspect_y;
r_matrix[1][0] = sin_angle * scale / aspect_y;
r_matrix[1][1] = cos_angle * scale;
/*
if (reflect) {
r_matrix[0][0] *= -1.0f;
r_matrix[0][1] *= -1.0f;
}
*/
}
void PackIsland::build_inverse_transformation(const float scale,
const float angle,
float (*r_matrix)[2]) const
{
/* TODO: Generate inverse transform directly. */
build_transformation(scale, angle, r_matrix);
invert_m2_m2(r_matrix, r_matrix);
const float cos_angle = cosf(angle);
const float sin_angle = sinf(angle);
r_matrix[0][0] = cos_angle / scale;
r_matrix[0][1] = sin_angle / scale * aspect_y;
r_matrix[1][0] = -sin_angle / scale / aspect_y;
r_matrix[1][1] = cos_angle / scale;
/*
if (reflect) {
r_matrix[0][0] *= -1.0f;
r_matrix[1][0] *= -1.0f;
}
*/
}
} // namespace blender::geometry

View File

@ -92,6 +92,8 @@ struct GPUPass {
GPUCodegenCreateInfo *create_info = nullptr;
/** Orphaned GPUPasses gets freed by the garbage collector. */
uint refcount;
/** The last time the refcount was greater than 0. */
int gc_timestamp;
/** Identity hash generated from all GLSL code. */
uint32_t hash;
/** Did we already tried to compile the attached GPUShader. */
@ -909,28 +911,23 @@ void GPU_pass_release(GPUPass *pass)
void GPU_pass_cache_garbage_collect(void)
{
static int lasttime = 0;
const int shadercollectrate = 60; /* hardcoded for now. */
int ctime = int(PIL_check_seconds_timer());
if (ctime < shadercollectrate + lasttime) {
return;
}
lasttime = ctime;
BLI_spin_lock(&pass_cache_spin);
GPUPass *next, **prev_pass = &pass_cache;
for (GPUPass *pass = pass_cache; pass; pass = next) {
next = pass->next;
if (pass->refcount == 0) {
if (pass->refcount > 0) {
pass->gc_timestamp = ctime;
}
else if (pass->gc_timestamp + shadercollectrate < ctime) {
/* Remove from list */
*prev_pass = next;
gpu_pass_free(pass);
continue;
}
else {
prev_pass = &pass->next;
}
prev_pass = &pass->next;
}
BLI_spin_unlock(&pass_cache_spin);
}

View File

@ -4551,7 +4551,10 @@ static void rna_def_space_view3d_overlay(BlenderRNA *brna)
prop = RNA_def_property(srna, "show_retopology", PROP_BOOLEAN, PROP_NONE);
RNA_def_property_boolean_sdna(prop, NULL, "overlay.edit_flag", V3D_OVERLAY_EDIT_RETOPOLOGY);
RNA_def_property_ui_text(prop, "Retopology", "Use retopology display");
RNA_def_property_ui_text(prop,
"Retopology",
"Hide the solid mesh and offset the overlay towards the view. "
"Selection is occluded by inactive geometry, unless X-Ray is enabled");
RNA_def_property_update(prop, NC_SPACE | ND_SPACE_VIEW3D | NS_VIEW3D_SHADING, NULL);
prop = RNA_def_property(srna, "retopology_offset", PROP_FLOAT, PROP_DISTANCE);

View File

@ -5,9 +5,15 @@
* \ingroup cmpnodes
*/
#include "BLI_math_vector.hh"
#include "GPU_shader.h"
#include "GPU_texture.h"
#include "BLT_translation.h"
#include "COM_node_operation.hh"
#include "COM_utilities.hh"
#include "node_composite_util.hh"
@ -17,14 +23,25 @@ namespace blender::nodes::node_composite_displace_cc {
static void cmp_node_displace_declare(NodeDeclarationBuilder &b)
{
b.add_input<decl::Color>(N_("Image")).default_value({1.0f, 1.0f, 1.0f, 1.0f});
b.add_input<decl::Color>(N_("Image"))
.default_value({1.0f, 1.0f, 1.0f, 1.0f})
.compositor_domain_priority(0);
b.add_input<decl::Vector>(N_("Vector"))
.default_value({1.0f, 1.0f, 1.0f})
.min(0.0f)
.max(1.0f)
.subtype(PROP_TRANSLATION);
b.add_input<decl::Float>(N_("X Scale")).default_value(0.0f).min(-1000.0f).max(1000.0f);
b.add_input<decl::Float>(N_("Y Scale")).default_value(0.0f).min(-1000.0f).max(1000.0f);
.subtype(PROP_TRANSLATION)
.compositor_domain_priority(1);
b.add_input<decl::Float>(N_("X Scale"))
.default_value(0.0f)
.min(-1000.0f)
.max(1000.0f)
.compositor_domain_priority(2);
b.add_input<decl::Float>(N_("Y Scale"))
.default_value(0.0f)
.min(-1000.0f)
.max(1000.0f)
.compositor_domain_priority(3);
b.add_output<decl::Color>(N_("Image"));
}
@ -36,8 +53,63 @@ class DisplaceOperation : public NodeOperation {
void execute() override
{
get_input("Image").pass_through(get_result("Image"));
context().set_info_message("Viewport compositor setup not fully supported");
if (is_identity()) {
get_input("Image").pass_through(get_result("Image"));
return;
}
GPUShader *shader = shader_manager().get("compositor_displace");
GPU_shader_bind(shader);
const Result &input_image = get_input("Image");
GPU_texture_mipmap_mode(input_image.texture(), true, true);
GPU_texture_anisotropic_filter(input_image.texture(), true);
GPU_texture_extend_mode(input_image.texture(), GPU_SAMPLER_EXTEND_MODE_CLAMP_TO_BORDER);
input_image.bind_as_texture(shader, "input_tx");
const Result &input_displacement = get_input("Vector");
input_displacement.bind_as_texture(shader, "displacement_tx");
const Result &input_x_scale = get_input("X Scale");
input_x_scale.bind_as_texture(shader, "x_scale_tx");
const Result &input_y_scale = get_input("Y Scale");
input_y_scale.bind_as_texture(shader, "y_scale_tx");
const Domain domain = compute_domain();
Result &output_image = get_result("Image");
output_image.allocate_texture(domain);
output_image.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, domain.size);
input_image.unbind_as_texture();
input_displacement.unbind_as_texture();
input_x_scale.unbind_as_texture();
input_y_scale.unbind_as_texture();
output_image.unbind_as_image();
GPU_shader_unbind();
}
bool is_identity()
{
const Result &input_image = get_input("Image");
if (input_image.is_single_value()) {
return true;
}
const Result &input_displacement = get_input("Vector");
if (input_displacement.is_single_value() &&
math::is_zero(input_displacement.get_vector_value())) {
return true;
}
const Result &input_x_scale = get_input("X Scale");
const Result &input_y_scale = get_input("Y Scale");
if (input_x_scale.is_single_value() && input_x_scale.get_float_value() == 0.0f &&
input_y_scale.is_single_value() && input_y_scale.get_float_value() == 0.0f) {
return true;
}
return false;
}
};
@ -57,8 +129,6 @@ void register_node_type_cmp_displace()
cmp_node_type_base(&ntype, CMP_NODE_DISPLACE, "Displace", NODE_CLASS_DISTORT);
ntype.declare = file_ns::cmp_node_displace_declare;
ntype.get_compositor_operation = file_ns::get_compositor_operation;
ntype.realtime_compositor_unsupported_message = N_(
"Node not supported in the Viewport compositor");
nodeRegisterType(&ntype);
}

View File

@ -7,6 +7,7 @@
#include <array>
#include "BLI_array.hh"
#include "BLI_assert.h"
#include "BLI_index_range.hh"
#include "BLI_math_base.h"
@ -33,6 +34,7 @@
#include "node_composite_util.hh"
#define MAX_GLARE_ITERATIONS 5
#define MAX_GLARE_SIZE 9
namespace blender::nodes::node_composite_glare_cc {
@ -131,11 +133,6 @@ class GlareOperation : public NodeOperation {
return true;
}
/* The fog glow mode is currently unsupported. */
if (node_storage(bnode()).type == CMP_NODE_GLARE_FOG_GLOW) {
return true;
}
return false;
}
@ -693,11 +690,132 @@ class GlareOperation : public NodeOperation {
* Fog Glow Glare.
* --------------- */
/* Not yet implemented. Unreachable code due to the is_identity method. */
Result execute_fog_glow(Result & /*highlights_result*/)
/* Fog glow is computed by first progressively half-downsampling the highlights down to a certain
* size, then progressively double-upsampling the last downsampled result up to the original size
* of the highlights, adding the downsampled result of the same size in each upsampling step.
* This can be illustrated as follows:
*
* Highlights ---+---> Fog Glare
* | |
* Downsampled ---+---> Upsampled
* | |
* Downsampled ---+---> Upsampled
* | |
* Downsampled ---+---> Upsampled
* | ^
* ... |
* Downsampled ------------'
*
* The smooth downsampling followed by smooth upsampling can be thought of as a cheap way to
* approximate a large radius blur, and adding the corresponding downsampled result while
* upsampling is done to counter the attenuation that happens during downsampling.
*
* Smaller downsampled results contribute to larger glare size, so controlling the size can be
* done by stopping downsampling down to a certain size, where the maximum possible size is
* achieved when downsampling happens down to the smallest size of 2. */
Result execute_fog_glow(Result &highlights_result)
{
BLI_assert_unreachable();
return Result(ResultType::Color, texture_pool());
/* The maximum possible glare size is achieved when we downsampled down to the smallest size of
* 2, which would result in a downsampling chain length of the binary logarithm of the smaller
* dimension of the size of the highlights.
*
* However, as users might want a smaller glare size, we reduce the chain length by the halving
* count supplied by the user. */
const int2 glare_size = get_glare_size();
const int smaller_glare_dimension = math::min(glare_size.x, glare_size.y);
const int chain_length = int(std::log2(smaller_glare_dimension)) -
compute_fog_glare_size_halving_count();
Array<Result> downsample_chain = compute_fog_glow_downsample_chain(highlights_result,
chain_length);
/* Notice that for a chain length of n, we need (n - 1) upsampling passes. */
const IndexRange upsample_passes_range(chain_length - 1);
GPUShader *shader = shader_manager().get("compositor_glare_fog_glow_upsample");
GPU_shader_bind(shader);
for (const int i : upsample_passes_range) {
Result &input = downsample_chain[upsample_passes_range.last() - i + 1];
input.bind_as_texture(shader, "input_tx");
GPU_texture_filter_mode(input.texture(), true);
const Result &output = downsample_chain[upsample_passes_range.last() - i];
output.bind_as_image(shader, "output_img", true);
compute_dispatch_threads_at_least(shader, output.domain().size);
input.unbind_as_texture();
output.unbind_as_image();
input.release();
}
GPU_shader_unbind();
return downsample_chain[0];
}
/* Progressively downsample the given result into a result with half the size for the given chain
* length, returning an array containing the chain of downsampled results. The first result of
* the chain is the given result itself for easier handling. The chain length is expected not
* to exceed the binary logarithm of the smaller dimension of the given result, because that
* would result in downsampling passes that produce useless textures with just one pixel. */
Array<Result> compute_fog_glow_downsample_chain(Result &highlights_result, int chain_length)
{
const Result downsampled_result = Result::Temporary(ResultType::Color, texture_pool());
Array<Result> downsample_chain(chain_length, downsampled_result);
/* We assign the original highlights result to the first result of the chain to make the code
* easier. In turn, the number of passes is one less than the chain length, because the first
* result needn't be computed. */
downsample_chain[0] = highlights_result;
const IndexRange downsample_passes_range(chain_length - 1);
GPUShader *shader;
for (const int i : downsample_passes_range) {
/* For the first downsample pass, we use a special "Karis" downsample pass that applies a
* form of local tone mapping to reduce the contributions of fireflies, see the shader for
* more information. Later passes use a simple average downsampling filter because fireflies
* doesn't service the first pass. */
if (i == downsample_passes_range.first()) {
shader = shader_manager().get("compositor_glare_fog_glow_downsample_karis_average");
GPU_shader_bind(shader);
}
else {
shader = shader_manager().get("compositor_glare_fog_glow_downsample_simple_average");
GPU_shader_bind(shader);
}
const Result &input = downsample_chain[i];
input.bind_as_texture(shader, "input_tx");
GPU_texture_filter_mode(input.texture(), true);
Result &output = downsample_chain[i + 1];
output.allocate_texture(input.domain().size / 2);
output.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, output.domain().size);
input.unbind_as_texture();
output.unbind_as_image();
GPU_shader_unbind();
}
return downsample_chain;
}
/* The fog glow has a maximum possible size when the fog glow size is equal to MAX_GLARE_SIZE and
* halves for every unit decrement of the fog glow size. This method computes the number of
* halving that should take place, which is simply the difference to MAX_GLARE_SIZE. */
int compute_fog_glare_size_halving_count()
{
return MAX_GLARE_SIZE - get_fog_glow_size();
}
/* The size of the fog glow relative to its maximum possible size, see the
* compute_fog_glare_size_halving_count() method for more information. */
int get_fog_glow_size()
{
return node_storage(bnode()).size;
}
/* ----------

View File

@ -7,10 +7,14 @@
#include "BLT_translation.h"
#include "GPU_shader.h"
#include "GPU_texture.h"
#include "UI_interface.h"
#include "UI_resources.h"
#include "COM_node_operation.hh"
#include "COM_utilities.hh"
#include "node_composite_util.hh"
@ -20,8 +24,14 @@ namespace blender::nodes::node_composite_map_uv_cc {
static void cmp_node_map_uv_declare(NodeDeclarationBuilder &b)
{
b.add_input<decl::Color>(N_("Image")).default_value({1.0f, 1.0f, 1.0f, 1.0f});
b.add_input<decl::Vector>(N_("UV")).default_value({1.0f, 0.0f, 0.0f}).min(0.0f).max(1.0f);
b.add_input<decl::Color>(N_("Image"))
.default_value({1.0f, 1.0f, 1.0f, 1.0f})
.compositor_skip_realization();
b.add_input<decl::Vector>(N_("UV"))
.default_value({1.0f, 0.0f, 0.0f})
.min(0.0f)
.max(1.0f)
.compositor_domain_priority(0);
b.add_output<decl::Color>(N_("Image"));
}
@ -38,8 +48,45 @@ class MapUVOperation : public NodeOperation {
void execute() override
{
get_input("Image").pass_through(get_result("Image"));
context().set_info_message("Viewport compositor setup not fully supported");
if (get_input("Image").is_single_value()) {
get_input("Image").pass_through(get_result("Image"));
return;
}
GPUShader *shader = shader_manager().get("compositor_map_uv");
GPU_shader_bind(shader);
GPU_shader_uniform_1f(
shader, "gradient_attenuation_factor", get_gradient_attenuation_factor());
const Result &input_image = get_input("Image");
GPU_texture_mipmap_mode(input_image.texture(), true, true);
GPU_texture_anisotropic_filter(input_image.texture(), true);
GPU_texture_extend_mode(input_image.texture(), GPU_SAMPLER_EXTEND_MODE_CLAMP_TO_BORDER);
input_image.bind_as_texture(shader, "input_tx");
const Result &input_uv = get_input("UV");
input_uv.bind_as_texture(shader, "uv_tx");
const Domain domain = compute_domain();
Result &output_image = get_result("Image");
output_image.allocate_texture(domain);
output_image.bind_as_image(shader, "output_img");
compute_dispatch_threads_at_least(shader, domain.size);
input_image.unbind_as_texture();
input_uv.unbind_as_texture();
output_image.unbind_as_image();
GPU_shader_unbind();
}
/* A factor that controls the attenuation of the result at the pixels where the gradients of the
* UV texture are too high, see the shader for more information. The factor ranges between zero
* and one, where it has no effect when it is zero and performs full attenuation when it is 1. */
float get_gradient_attenuation_factor()
{
return bnode().custom1 / 100.0f;
}
};
@ -60,8 +107,6 @@ void register_node_type_cmp_mapuv()
ntype.declare = file_ns::cmp_node_map_uv_declare;
ntype.draw_buttons = file_ns::node_composit_buts_map_uv;
ntype.get_compositor_operation = file_ns::get_compositor_operation;
ntype.realtime_compositor_unsupported_message = N_(
"Node not supported in the Viewport compositor");
nodeRegisterType(&ntype);
}

View File

@ -5,12 +5,20 @@
* \ingroup cmpnodes
*/
#include "BLI_math_base.hh"
#include "BLI_math_vector.hh"
#include "BLI_math_vector_types.hh"
#include "BLT_translation.h"
#include "UI_interface.h"
#include "UI_resources.h"
#include "COM_algorithm_smaa.hh"
#include "COM_node_operation.hh"
#include "COM_utilities.hh"
#include "GPU_shader.h"
#include "node_composite_util.hh"
@ -20,10 +28,22 @@ namespace blender::nodes::node_composite_zcombine_cc {
static void cmp_node_zcombine_declare(NodeDeclarationBuilder &b)
{
b.add_input<decl::Color>(N_("Image")).default_value({1.0f, 1.0f, 1.0f, 1.0f});
b.add_input<decl::Float>(N_("Z")).default_value(1.0f).min(0.0f).max(10000.0f);
b.add_input<decl::Color>(N_("Image"), "Image_001").default_value({1.0f, 1.0f, 1.0f, 1.0f});
b.add_input<decl::Float>(N_("Z"), "Z_001").default_value(1.0f).min(0.0f).max(10000.0f);
b.add_input<decl::Color>(N_("Image"))
.default_value({1.0f, 1.0f, 1.0f, 1.0f})
.compositor_domain_priority(0);
b.add_input<decl::Float>(N_("Z"))
.default_value(1.0f)
.min(0.0f)
.max(10000.0f)
.compositor_domain_priority(2);
b.add_input<decl::Color>(N_("Image"), "Image_001")
.default_value({1.0f, 1.0f, 1.0f, 1.0f})
.compositor_domain_priority(1);
b.add_input<decl::Float>(N_("Z"), "Z_001")
.default_value(1.0f)
.min(0.0f)
.max(10000.0f)
.compositor_domain_priority(3);
b.add_output<decl::Color>(N_("Image"));
b.add_output<decl::Float>(N_("Z"));
}
@ -45,9 +65,171 @@ class ZCombineOperation : public NodeOperation {
void execute() override
{
get_input("Image").pass_through(get_result("Image"));
get_result("Z").allocate_invalid();
context().set_info_message("Viewport compositor setup not fully supported");
if (compute_domain().size == int2(1)) {
execute_single_value();
}
else if (use_anti_aliasing()) {
execute_anti_aliased();
}
else {
execute_simple();
}
}
void execute_single_value()
{
const float4 first_color = get_input("Image").get_color_value();
const float4 second_color = get_input("Image_001").get_color_value();
const float first_z_value = get_input("Z").get_float_value();
const float second_z_value = get_input("Z_001").get_float_value();
/* Mix between the first and second images using a mask such that the image with the object
* closer to the camera is returned. The mask value is then 1, and thus returns the first image
* if its Z value is less than that of the second image. Otherwise, its value is 0, and thus
* returns the second image. Furthermore, if the object in the first image is closer but has a
* non-opaque alpha, then the alpha is used as a mask, but only if Use Alpha is enabled. */
const float z_combine_factor = float(first_z_value < second_z_value);
const float alpha_factor = use_alpha() ? first_color.w : 1.0f;
const float mix_factor = z_combine_factor * alpha_factor;
Result &combined = get_result("Image");
if (combined.should_compute()) {
float4 combined_color = math::interpolate(second_color, first_color, mix_factor);
/* Use the more opaque alpha from the two images. */
combined_color.w = use_alpha() ? math::max(second_color.w, first_color.w) : combined_color.w;
combined.allocate_single_value();
combined.set_color_value(combined_color);
}
Result &combined_z = get_result("Z");
if (combined_z.should_compute()) {
const float combined_z_value = math::interpolate(second_z_value, first_z_value, mix_factor);
combined_z.allocate_single_value();
combined_z.set_float_value(combined_z_value);
}
}
void execute_simple()
{
GPUShader *shader = shader_manager().get("compositor_z_combine_simple");
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "use_alpha", use_alpha());
const Result &first = get_input("Image");
first.bind_as_texture(shader, "first_tx");
const Result &first_z = get_input("Z");
first_z.bind_as_texture(shader, "first_z_tx");
const Result &second = get_input("Image_001");
second.bind_as_texture(shader, "second_tx");
const Result &second_z = get_input("Z_001");
second_z.bind_as_texture(shader, "second_z_tx");
Result &combined = get_result("Image");
const Domain domain = compute_domain();
combined.allocate_texture(domain);
combined.bind_as_image(shader, "combined_img");
Result &combined_z = get_result("Z");
combined_z.allocate_texture(domain);
combined_z.bind_as_image(shader, "combined_z_img");
compute_dispatch_threads_at_least(shader, domain.size);
first.unbind_as_texture();
first_z.unbind_as_texture();
second.unbind_as_texture();
second_z.unbind_as_texture();
combined.unbind_as_image();
combined_z.unbind_as_image();
GPU_shader_unbind();
}
void execute_anti_aliased()
{
Result mask = compute_mask();
GPUShader *shader = shader_manager().get("compositor_z_combine_from_mask");
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "use_alpha", use_alpha());
const Result &first = get_input("Image");
first.bind_as_texture(shader, "first_tx");
const Result &first_z = get_input("Z");
first_z.bind_as_texture(shader, "first_z_tx");
const Result &second = get_input("Image_001");
second.bind_as_texture(shader, "second_tx");
const Result &second_z = get_input("Z_001");
second_z.bind_as_texture(shader, "second_z_tx");
mask.bind_as_texture(shader, "mask_tx");
Result &combined = get_result("Image");
const Domain domain = compute_domain();
combined.allocate_texture(domain);
combined.bind_as_image(shader, "combined_img");
Result &combined_z = get_result("Z");
combined_z.allocate_texture(domain);
combined_z.bind_as_image(shader, "combined_z_img");
compute_dispatch_threads_at_least(shader, domain.size);
first.unbind_as_texture();
first_z.unbind_as_texture();
second.unbind_as_texture();
second_z.unbind_as_texture();
mask.unbind_as_texture();
combined.unbind_as_image();
combined_z.unbind_as_image();
GPU_shader_unbind();
mask.release();
}
Result compute_mask()
{
GPUShader *shader = shader_manager().get("compositor_z_combine_compute_mask");
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "use_alpha", use_alpha());
const Result &first = get_input("Image");
first.bind_as_texture(shader, "first_tx");
const Result &first_z = get_input("Z");
first_z.bind_as_texture(shader, "first_z_tx");
const Result &second_z = get_input("Z_001");
second_z.bind_as_texture(shader, "second_z_tx");
const Domain domain = compute_domain();
Result mask = Result::Temporary(ResultType::Float, texture_pool());
mask.allocate_texture(domain);
mask.bind_as_image(shader, "mask_img");
compute_dispatch_threads_at_least(shader, domain.size);
first.unbind_as_texture();
first_z.unbind_as_texture();
second_z.unbind_as_texture();
mask.unbind_as_image();
GPU_shader_unbind();
Result anti_aliased_mask = Result::Temporary(ResultType::Float, texture_pool());
smaa(context(), mask, anti_aliased_mask);
mask.release();
return anti_aliased_mask;
}
bool use_alpha()
{
return bnode().custom1 != 0;
}
bool use_anti_aliasing()
{
return bnode().custom2 == 0;
}
};
@ -68,8 +250,6 @@ void register_node_type_cmp_zcombine()
ntype.declare = file_ns::cmp_node_zcombine_declare;
ntype.draw_buttons = file_ns::node_composit_buts_zcombine;
ntype.get_compositor_operation = file_ns::get_compositor_operation;
ntype.realtime_compositor_unsupported_message = N_(
"Node not supported in the Viewport compositor");
nodeRegisterType(&ntype);
}

View File

@ -1110,12 +1110,6 @@ static PyObject *C_BVHTree_FromObject(PyObject * /*cls*/, PyObject *args, PyObje
bool use_cage = false;
bool free_mesh = false;
const MLoopTri *lt;
const int *corner_verts;
float(*coords)[3] = nullptr;
uint(*tris)[3] = nullptr;
uint coords_len, tris_len;
float epsilon = 0.0f;
if (!PyArg_ParseTupleAndKeywords(args,
@ -1142,69 +1136,66 @@ static PyObject *C_BVHTree_FromObject(PyObject * /*cls*/, PyObject *args, PyObje
return nullptr;
}
const blender::Span<int> corner_verts = mesh->corner_verts();
const blender::Span<MLoopTri> looptris = mesh->looptris();
/* Get data for tessellation */
{
lt = BKE_mesh_runtime_looptri_ensure(mesh);
tris_len = uint(BKE_mesh_runtime_looptri_len(mesh));
coords_len = uint(mesh->totvert);
const uint coords_len = uint(mesh->totvert);
coords = static_cast<float(*)[3]>(MEM_mallocN(sizeof(*coords) * size_t(coords_len), __func__));
tris = static_cast<uint(*)[3]>(MEM_mallocN(sizeof(*tris) * size_t(tris_len), __func__));
memcpy(coords, BKE_mesh_vert_positions(mesh), sizeof(float[3]) * size_t(mesh->totvert));
float(*coords)[3] = static_cast<float(*)[3]>(
MEM_mallocN(sizeof(*coords) * size_t(coords_len), __func__));
uint(*tris)[3] = static_cast<uint(*)[3]>(
MEM_mallocN(sizeof(*tris) * size_t(looptris.size()), __func__));
memcpy(coords, BKE_mesh_vert_positions(mesh), sizeof(float[3]) * size_t(mesh->totvert));
corner_verts = BKE_mesh_corner_verts(mesh);
}
BVHTree *tree;
{
BVHTree *tree;
uint i;
int *orig_index = nullptr;
blender::float3 *orig_normal = nullptr;
int *orig_index = nullptr;
blender::float3 *orig_normal = nullptr;
tree = BLI_bvhtree_new(int(tris_len), epsilon, PY_BVH_TREE_TYPE_DEFAULT, PY_BVH_AXIS_DEFAULT);
if (tree) {
orig_index = static_cast<int *>(
MEM_mallocN(sizeof(*orig_index) * size_t(tris_len), __func__));
if (!BKE_mesh_poly_normals_are_dirty(mesh)) {
const blender::Span<blender::float3> poly_normals = mesh->poly_normals();
orig_normal = static_cast<blender::float3 *>(
MEM_malloc_arrayN(size_t(mesh->totpoly), sizeof(blender::float3), __func__));
blender::MutableSpan(orig_normal, poly_normals.size()).copy_from(poly_normals);
}
for (i = 0; i < tris_len; i++, lt++) {
float co[3][3];
tris[i][0] = uint(corner_verts[lt->tri[0]]);
tris[i][1] = uint(corner_verts[lt->tri[1]]);
tris[i][2] = uint(corner_verts[lt->tri[2]]);
copy_v3_v3(co[0], coords[tris[i][0]]);
copy_v3_v3(co[1], coords[tris[i][1]]);
copy_v3_v3(co[2], coords[tris[i][2]]);
BLI_bvhtree_insert(tree, int(i), co[0], 3);
orig_index[i] = int(lt->poly);
}
BLI_bvhtree_balance(tree);
tree = BLI_bvhtree_new(
int(looptris.size()), epsilon, PY_BVH_TREE_TYPE_DEFAULT, PY_BVH_AXIS_DEFAULT);
if (tree) {
orig_index = static_cast<int *>(
MEM_mallocN(sizeof(*orig_index) * size_t(looptris.size()), __func__));
if (!BKE_mesh_poly_normals_are_dirty(mesh)) {
const blender::Span<blender::float3> poly_normals = mesh->poly_normals();
orig_normal = static_cast<blender::float3 *>(
MEM_malloc_arrayN(size_t(mesh->totpoly), sizeof(blender::float3), __func__));
blender::MutableSpan(orig_normal, poly_normals.size()).copy_from(poly_normals);
}
if (free_mesh) {
BKE_id_free(nullptr, mesh);
for (const int64_t i : looptris.index_range()) {
float co[3][3];
tris[i][0] = uint(corner_verts[looptris[i].tri[0]]);
tris[i][1] = uint(corner_verts[looptris[i].tri[1]]);
tris[i][2] = uint(corner_verts[looptris[i].tri[2]]);
copy_v3_v3(co[0], coords[tris[i][0]]);
copy_v3_v3(co[1], coords[tris[i][1]]);
copy_v3_v3(co[2], coords[tris[i][2]]);
BLI_bvhtree_insert(tree, int(i), co[0], 3);
orig_index[i] = int(looptris[i].poly);
}
return bvhtree_CreatePyObject(tree,
epsilon,
coords,
coords_len,
tris,
tris_len,
orig_index,
reinterpret_cast<float(*)[3]>(orig_normal));
BLI_bvhtree_balance(tree);
}
if (free_mesh) {
BKE_id_free(nullptr, mesh);
}
return bvhtree_CreatePyObject(tree,
epsilon,
coords,
coords_len,
tris,
uint(looptris.size()),
orig_index,
reinterpret_cast<float(*)[3]>(orig_normal));
}
#endif /* MATH_STANDALONE */