This patch improves the single core performance of the lattice deform. 1. Prefetching deform vert during initialization. This data is constant for each innerloop. This reduces the complexity of the inner loop what makes more CPU resources free for other optimizations. 2. Prefetching the Lattice instance. It was constant. Although performance wise this isn't noticeable it is always good to free some space in the branch prediction tables. 3. Remove branching in all loops by not exiting when the effect of the loop isn't there. The checks in the inner loops detected if this loop didn't have any effect on the final result and then continue to the next loop. This made the branch prediction unpredictable and a lot of mis predictions were done. For smaller inner loops it is always better to remove unpredictable if statements by using branchless code patterns. 4. Use SSE2 instruction when available. This gives 50% performance increase measured on a Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz with GCC 9.3. Also check other compilers. Before: ``` performance_no_dvert_10000 (4 ms) performance_no_dvert_100000 (30 ms) performance_no_dvert_1000000 (268 ms) performance_no_dvert_10000000 (2637 ms) ``` After: ``` performance_no_dvert_10000 (3 ms) performance_no_dvert_100000 (21 ms) performance_no_dvert_1000000 (180 ms) performance_no_dvert_10000000 (1756 ms) ``` Reviewed By: Campbell Barton Differential Revision: https://developer.blender.org/D9087
470 lines
15 KiB
C
470 lines
15 KiB
C
/*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*
|
|
* The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
|
|
* All rights reserved.
|
|
*/
|
|
|
|
/** \file
|
|
* \ingroup bke
|
|
*
|
|
* Deform coordinates by a lattice object (used by modifier).
|
|
*/
|
|
|
|
#include <math.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "MEM_guardedalloc.h"
|
|
|
|
#include "BLI_math.h"
|
|
#include "BLI_task.h"
|
|
#include "BLI_utildefines.h"
|
|
|
|
#include "DNA_curve_types.h"
|
|
#include "DNA_lattice_types.h"
|
|
#include "DNA_mesh_types.h"
|
|
#include "DNA_meshdata_types.h"
|
|
#include "DNA_object_types.h"
|
|
|
|
#include "BKE_curve.h"
|
|
#include "BKE_displist.h"
|
|
#include "BKE_editmesh.h"
|
|
#include "BKE_key.h"
|
|
#include "BKE_lattice.h"
|
|
#include "BKE_modifier.h"
|
|
|
|
#include "BKE_deform.h"
|
|
|
|
#ifdef __SSE2__
|
|
# include <emmintrin.h>
|
|
#endif
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/** \name Lattice Deform API
|
|
* \{ */
|
|
|
|
typedef struct LatticeDeformData {
|
|
/* Convert from object space to deform space */
|
|
float latmat[4][4];
|
|
/* Cached reference to the lattice to use for evaluation. When in edit mode this attribute
|
|
* is set to the edit mode lattice. */
|
|
const Lattice *lt;
|
|
/* Preprocessed lattice points (converted to deform space). */
|
|
float *latticedata;
|
|
/* Prefetched DeformWeights of the lattice. */
|
|
float *lattice_weights;
|
|
} LatticeDeformData;
|
|
|
|
LatticeDeformData *BKE_lattice_deform_data_create(const Object *oblatt, const Object *ob)
|
|
{
|
|
/* we make an array with all differences */
|
|
Lattice *lt = oblatt->data;
|
|
BPoint *bp;
|
|
DispList *dl = oblatt->runtime.curve_cache ?
|
|
BKE_displist_find(&oblatt->runtime.curve_cache->disp, DL_VERTS) :
|
|
NULL;
|
|
const float *co = dl ? dl->verts : NULL;
|
|
float *fp, imat[4][4];
|
|
float fu, fv, fw;
|
|
int u, v, w;
|
|
float *latticedata;
|
|
float *lattice_weights = NULL;
|
|
float latmat[4][4];
|
|
LatticeDeformData *lattice_deform_data;
|
|
|
|
if (lt->editlatt) {
|
|
lt = lt->editlatt->latt;
|
|
}
|
|
bp = lt->def;
|
|
|
|
const int32_t num_points = lt->pntsu * lt->pntsv * lt->pntsw;
|
|
/* We allocate one additional float for SSE2 optimizations. Without this
|
|
* the SSE2 instructions for the last item would read in unallocated memory. */
|
|
fp = latticedata = MEM_mallocN(sizeof(float[3]) * num_points + sizeof(float), "latticedata");
|
|
|
|
/* for example with a particle system: (ob == NULL) */
|
|
if (ob == NULL) {
|
|
/* in deformspace, calc matrix */
|
|
invert_m4_m4(latmat, oblatt->obmat);
|
|
|
|
/* back: put in deform array */
|
|
invert_m4_m4(imat, latmat);
|
|
}
|
|
else {
|
|
/* in deformspace, calc matrix */
|
|
invert_m4_m4(imat, oblatt->obmat);
|
|
mul_m4_m4m4(latmat, imat, ob->obmat);
|
|
|
|
/* back: put in deform array */
|
|
invert_m4_m4(imat, latmat);
|
|
}
|
|
|
|
/* Prefetch latice deform group weights. */
|
|
int defgrp_index = -1;
|
|
const MDeformVert *dvert = BKE_lattice_deform_verts_get(oblatt);
|
|
if (lt->vgroup[0] && dvert) {
|
|
defgrp_index = BKE_object_defgroup_name_index(ob, lt->vgroup);
|
|
|
|
if (defgrp_index != -1) {
|
|
lattice_weights = MEM_malloc_arrayN(sizeof(float), num_points, "lattice_weights");
|
|
for (int index = 0; index < num_points; index++) {
|
|
lattice_weights[index] = BKE_defvert_find_weight(dvert + index, defgrp_index);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (w = 0, fw = lt->fw; w < lt->pntsw; w++, fw += lt->dw) {
|
|
for (v = 0, fv = lt->fv; v < lt->pntsv; v++, fv += lt->dv) {
|
|
for (u = 0, fu = lt->fu; u < lt->pntsu; u++, bp++, co += 3, fp += 3, fu += lt->du) {
|
|
if (dl) {
|
|
fp[0] = co[0] - fu;
|
|
fp[1] = co[1] - fv;
|
|
fp[2] = co[2] - fw;
|
|
}
|
|
else {
|
|
fp[0] = bp->vec[0] - fu;
|
|
fp[1] = bp->vec[1] - fv;
|
|
fp[2] = bp->vec[2] - fw;
|
|
}
|
|
|
|
mul_mat3_m4_v3(imat, fp);
|
|
}
|
|
}
|
|
}
|
|
|
|
lattice_deform_data = MEM_mallocN(sizeof(LatticeDeformData), "Lattice Deform Data");
|
|
lattice_deform_data->latticedata = latticedata;
|
|
lattice_deform_data->lattice_weights = lattice_weights;
|
|
lattice_deform_data->lt = lt;
|
|
copy_m4_m4(lattice_deform_data->latmat, latmat);
|
|
|
|
return lattice_deform_data;
|
|
}
|
|
|
|
void BKE_lattice_deform_data_eval_co(LatticeDeformData *lattice_deform_data,
|
|
float co[3],
|
|
float weight)
|
|
{
|
|
float *latticedata = lattice_deform_data->latticedata;
|
|
float *lattice_weights = lattice_deform_data->lattice_weights;
|
|
BLI_assert(latticedata);
|
|
const Lattice *lt = lattice_deform_data->lt;
|
|
float u, v, w, tu[4], tv[4], tw[4];
|
|
float vec[3];
|
|
int idx_w, idx_v, idx_u;
|
|
int ui, vi, wi, uu, vv, ww;
|
|
|
|
/* vgroup influence */
|
|
float co_prev[3], weight_blend = 0.0f;
|
|
copy_v3_v3(co_prev, co);
|
|
#ifdef __SSE2__
|
|
__m128 co_vec = _mm_loadu_ps(co_prev);
|
|
#endif
|
|
|
|
/* co is in local coords, treat with latmat */
|
|
mul_v3_m4v3(vec, lattice_deform_data->latmat, co);
|
|
|
|
/* u v w coords */
|
|
|
|
if (lt->pntsu > 1) {
|
|
u = (vec[0] - lt->fu) / lt->du;
|
|
ui = (int)floor(u);
|
|
u -= ui;
|
|
key_curve_position_weights(u, tu, lt->typeu);
|
|
}
|
|
else {
|
|
tu[0] = tu[2] = tu[3] = 0.0;
|
|
tu[1] = 1.0;
|
|
ui = 0;
|
|
}
|
|
|
|
if (lt->pntsv > 1) {
|
|
v = (vec[1] - lt->fv) / lt->dv;
|
|
vi = (int)floor(v);
|
|
v -= vi;
|
|
key_curve_position_weights(v, tv, lt->typev);
|
|
}
|
|
else {
|
|
tv[0] = tv[2] = tv[3] = 0.0;
|
|
tv[1] = 1.0;
|
|
vi = 0;
|
|
}
|
|
|
|
if (lt->pntsw > 1) {
|
|
w = (vec[2] - lt->fw) / lt->dw;
|
|
wi = (int)floor(w);
|
|
w -= wi;
|
|
key_curve_position_weights(w, tw, lt->typew);
|
|
}
|
|
else {
|
|
tw[0] = tw[2] = tw[3] = 0.0;
|
|
tw[1] = 1.0;
|
|
wi = 0;
|
|
}
|
|
|
|
const int w_stride = lt->pntsu * lt->pntsv;
|
|
const int idx_w_max = (lt->pntsw - 1) * lt->pntsu * lt->pntsv;
|
|
const int v_stride = lt->pntsu;
|
|
const int idx_v_max = (lt->pntsv - 1) * lt->pntsu;
|
|
const int idx_u_max = (lt->pntsu - 1);
|
|
|
|
for (ww = wi - 1; ww <= wi + 2; ww++) {
|
|
w = weight * tw[ww - wi + 1];
|
|
idx_w = CLAMPIS(ww * w_stride, 0, idx_w_max);
|
|
for (vv = vi - 1; vv <= vi + 2; vv++) {
|
|
v = w * tv[vv - vi + 1];
|
|
idx_v = CLAMPIS(vv * v_stride, 0, idx_v_max);
|
|
for (uu = ui - 1; uu <= ui + 2; uu++) {
|
|
u = v * tu[uu - ui + 1];
|
|
idx_u = CLAMPIS(uu, 0, idx_u_max);
|
|
const int idx = idx_w + idx_v + idx_u;
|
|
#ifdef __SSE2__
|
|
{
|
|
__m128 weight_vec = _mm_set1_ps(u);
|
|
/* This will load one extra element, this is ok because
|
|
* we ignore that part of register anyway.
|
|
*/
|
|
__m128 lattice_vec = _mm_loadu_ps(&latticedata[idx * 3]);
|
|
co_vec = _mm_add_ps(co_vec, _mm_mul_ps(lattice_vec, weight_vec));
|
|
}
|
|
#else
|
|
madd_v3_v3fl(co, &latticedata[idx * 3], u);
|
|
#endif
|
|
if (lattice_weights) {
|
|
weight_blend += (u * lattice_weights[idx]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#ifdef __SSE2__
|
|
{
|
|
copy_v3_v3(co, (float *)&co_vec);
|
|
}
|
|
#endif
|
|
|
|
if (lattice_weights) {
|
|
interp_v3_v3v3(co, co_prev, co, weight_blend);
|
|
}
|
|
}
|
|
|
|
void BKE_lattice_deform_data_destroy(LatticeDeformData *lattice_deform_data)
|
|
{
|
|
if (lattice_deform_data->latticedata) {
|
|
MEM_freeN(lattice_deform_data->latticedata);
|
|
}
|
|
|
|
MEM_freeN(lattice_deform_data);
|
|
}
|
|
|
|
/** \} */
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/** \name Lattice Deform #BKE_lattice_deform_coords API
|
|
*
|
|
* #BKE_lattice_deform_coords and related functions.
|
|
* \{ */
|
|
|
|
typedef struct LatticeDeformUserdata {
|
|
LatticeDeformData *lattice_deform_data;
|
|
float (*vert_coords)[3];
|
|
const MDeformVert *dvert;
|
|
int defgrp_index;
|
|
float fac;
|
|
bool invert_vgroup;
|
|
|
|
/** Specific data types. */
|
|
struct {
|
|
int cd_dvert_offset;
|
|
} bmesh;
|
|
} LatticeDeformUserdata;
|
|
|
|
static void lattice_deform_vert_with_dvert(const LatticeDeformUserdata *data,
|
|
const int index,
|
|
const MDeformVert *dvert)
|
|
{
|
|
if (dvert != NULL) {
|
|
const float weight = data->invert_vgroup ?
|
|
1.0f - BKE_defvert_find_weight(dvert, data->defgrp_index) :
|
|
BKE_defvert_find_weight(dvert, data->defgrp_index);
|
|
if (weight > 0.0f) {
|
|
BKE_lattice_deform_data_eval_co(
|
|
data->lattice_deform_data, data->vert_coords[index], weight * data->fac);
|
|
}
|
|
}
|
|
else {
|
|
BKE_lattice_deform_data_eval_co(
|
|
data->lattice_deform_data, data->vert_coords[index], data->fac);
|
|
}
|
|
}
|
|
|
|
static void lattice_deform_vert_task(void *__restrict userdata,
|
|
const int index,
|
|
const TaskParallelTLS *__restrict UNUSED(tls))
|
|
{
|
|
const LatticeDeformUserdata *data = userdata;
|
|
lattice_deform_vert_with_dvert(data, index, data->dvert ? &data->dvert[index] : NULL);
|
|
}
|
|
|
|
static void lattice_vert_task_editmesh(void *__restrict userdata, MempoolIterData *iter)
|
|
{
|
|
const LatticeDeformUserdata *data = userdata;
|
|
BMVert *v = (BMVert *)iter;
|
|
MDeformVert *dvert = BM_ELEM_CD_GET_VOID_P(v, data->bmesh.cd_dvert_offset);
|
|
lattice_deform_vert_with_dvert(data, BM_elem_index_get(v), dvert);
|
|
}
|
|
|
|
static void lattice_vert_task_editmesh_no_dvert(void *__restrict userdata, MempoolIterData *iter)
|
|
{
|
|
const LatticeDeformUserdata *data = userdata;
|
|
BMVert *v = (BMVert *)iter;
|
|
lattice_deform_vert_with_dvert(data, BM_elem_index_get(v), NULL);
|
|
}
|
|
|
|
static void lattice_deform_coords_impl(const Object *ob_lattice,
|
|
const Object *ob_target,
|
|
float (*vert_coords)[3],
|
|
const int vert_coords_len,
|
|
const short flag,
|
|
const char *defgrp_name,
|
|
const float fac,
|
|
const Mesh *me_target,
|
|
BMEditMesh *em_target)
|
|
{
|
|
LatticeDeformData *lattice_deform_data;
|
|
const MDeformVert *dvert = NULL;
|
|
int defgrp_index = -1;
|
|
int cd_dvert_offset = -1;
|
|
|
|
if (ob_lattice->type != OB_LATTICE) {
|
|
return;
|
|
}
|
|
|
|
lattice_deform_data = BKE_lattice_deform_data_create(ob_lattice, ob_target);
|
|
|
|
/* Check whether to use vertex groups (only possible if ob_target is a Mesh or Lattice).
|
|
* We want either a Mesh/Lattice with no derived data, or derived data with deformverts.
|
|
*/
|
|
if (defgrp_name && defgrp_name[0] && ob_target && ELEM(ob_target->type, OB_MESH, OB_LATTICE)) {
|
|
defgrp_index = BKE_object_defgroup_name_index(ob_target, defgrp_name);
|
|
|
|
if (defgrp_index != -1) {
|
|
/* if there's derived data without deformverts, don't use vgroups */
|
|
if (em_target) {
|
|
cd_dvert_offset = CustomData_get_offset(&em_target->bm->vdata, CD_MDEFORMVERT);
|
|
}
|
|
else if (me_target) {
|
|
dvert = CustomData_get_layer(&me_target->vdata, CD_MDEFORMVERT);
|
|
}
|
|
else if (ob_target->type == OB_LATTICE) {
|
|
dvert = ((Lattice *)ob_target->data)->dvert;
|
|
}
|
|
else {
|
|
dvert = ((Mesh *)ob_target->data)->dvert;
|
|
}
|
|
}
|
|
}
|
|
|
|
LatticeDeformUserdata data = {
|
|
.lattice_deform_data = lattice_deform_data,
|
|
.vert_coords = vert_coords,
|
|
.dvert = dvert,
|
|
.defgrp_index = defgrp_index,
|
|
.fac = fac,
|
|
.invert_vgroup = (flag & MOD_LATTICE_INVERT_VGROUP) != 0,
|
|
.bmesh =
|
|
{
|
|
.cd_dvert_offset = cd_dvert_offset,
|
|
},
|
|
};
|
|
|
|
if (em_target != NULL) {
|
|
/* While this could cause an extra loop over mesh data, in most cases this will
|
|
* have already been properly set. */
|
|
BM_mesh_elem_index_ensure(em_target->bm, BM_VERT);
|
|
|
|
if (cd_dvert_offset != -1) {
|
|
BLI_task_parallel_mempool(em_target->bm->vpool, &data, lattice_vert_task_editmesh, true);
|
|
}
|
|
else {
|
|
BLI_task_parallel_mempool(
|
|
em_target->bm->vpool, &data, lattice_vert_task_editmesh_no_dvert, true);
|
|
}
|
|
}
|
|
else {
|
|
TaskParallelSettings settings;
|
|
BLI_parallel_range_settings_defaults(&settings);
|
|
settings.min_iter_per_thread = 32;
|
|
BLI_task_parallel_range(0, vert_coords_len, &data, lattice_deform_vert_task, &settings);
|
|
}
|
|
|
|
BKE_lattice_deform_data_destroy(lattice_deform_data);
|
|
}
|
|
|
|
void BKE_lattice_deform_coords(const Object *ob_lattice,
|
|
const Object *ob_target,
|
|
float (*vert_coords)[3],
|
|
const int vert_coords_len,
|
|
const short flag,
|
|
const char *defgrp_name,
|
|
float fac)
|
|
{
|
|
lattice_deform_coords_impl(
|
|
ob_lattice, ob_target, vert_coords, vert_coords_len, flag, defgrp_name, fac, NULL, NULL);
|
|
}
|
|
|
|
void BKE_lattice_deform_coords_with_mesh(const Object *ob_lattice,
|
|
const Object *ob_target,
|
|
float (*vert_coords)[3],
|
|
const int vert_coords_len,
|
|
const short flag,
|
|
const char *defgrp_name,
|
|
const float fac,
|
|
const Mesh *me_target)
|
|
{
|
|
lattice_deform_coords_impl(ob_lattice,
|
|
ob_target,
|
|
vert_coords,
|
|
vert_coords_len,
|
|
flag,
|
|
defgrp_name,
|
|
fac,
|
|
me_target,
|
|
NULL);
|
|
}
|
|
|
|
void BKE_lattice_deform_coords_with_editmesh(const struct Object *ob_lattice,
|
|
const struct Object *ob_target,
|
|
float (*vert_coords)[3],
|
|
const int vert_coords_len,
|
|
const short flag,
|
|
const char *defgrp_name,
|
|
const float fac,
|
|
struct BMEditMesh *em_target)
|
|
{
|
|
lattice_deform_coords_impl(ob_lattice,
|
|
ob_target,
|
|
vert_coords,
|
|
vert_coords_len,
|
|
flag,
|
|
defgrp_name,
|
|
fac,
|
|
NULL,
|
|
em_target);
|
|
}
|
|
|
|
/** \} */
|