Mikktspace tangent calculator optimizations #112256

Open
Eugene-Kuznetsov wants to merge 1 commits from Eugene-Kuznetsov/blender:ek_mikktspace_rework into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
11 changed files with 2374 additions and 412 deletions

View File

@ -26,7 +26,7 @@
#include "util/log.h"
#include "util/math.h"
#include "mikktspace.hh"
#include "mikktspace_ref.hh"
#include "BKE_attribute.hh"
#include "BKE_attribute_math.hh"
@ -229,12 +229,12 @@ static void mikk_compute_tangents(
if (is_subd) {
MikkMeshWrapper<true> userdata(b_mesh, layer_name, mesh, tangent, tangent_sign);
/* Compute tangents. */
mikk::Mikktspace(userdata).genTangSpace();
mikk::RefMikktspace(userdata).genTangSpace();
}
else {
MikkMeshWrapper<false> userdata(b_mesh, layer_name, mesh, tangent, tangent_sign);
/* Compute tangents. */
mikk::Mikktspace(userdata).genTangSpace();
mikk::RefMikktspace(userdata).genTangSpace();
}
}

View File

@ -77,6 +77,14 @@ class AtomicHashSet {
typedef typename std::conditional<isAtomic, std::atomic<KeyT>, KeyT>::type cell_type;
std::vector<cell_type> cells_;
static inline size_t power_of_two(size_t x)
{
size_t y = 1;
while (y < x)
y *= 2;
return y;
}
public:
struct Config {
KeyT emptyKey;
@ -97,7 +105,7 @@ class AtomicHashSet {
KeyHash hasher = KeyHash(),
KeyEqual equalityChecker = KeyEqual(),
const Config &c = Config())
: capacity_(size_t(double(maxSize) / c.maxLoadFactor) + 1),
: capacity_(power_of_two(size_t(double(maxSize) / c.maxLoadFactor) + 1)),
kEmptyKey_(c.emptyKey),
hasher_(hasher),
equalityChecker_(equalityChecker),

View File

@ -0,0 +1,402 @@
#pragma once
namespace mikk {
/* Only functions using v8sf require AVX2 (and some of those only require AVX).
* The rest requires at most SSE4.1, which has been included in all x86 CPUs since
* 2013 or so, and may be assumed to exist.
*
* We must separately test for AVX2 CPU support at runtime, becase Intel
* was still making non-AVX2 CPUs as late as 2020. */
#ifdef __GNUC__
# pragma GCC push_options
# pragma GCC target("avx2", "sse4.1")
#endif
Eugene-Kuznetsov marked this conversation as resolved Outdated

left over debug print

left over debug print
#ifdef __GNUC__
typedef float v4sf __attribute__((vector_size(16)));
typedef float v8sf __attribute__((vector_size(32)));
inline v4sf to_float4(const float3 &x)
{
return v4sf{x.x, x.y, x.z, 0.0f};
}
inline v4sf to_float4_masked(const float3 &x)
{
typedef int v4si __attribute__((vector_size(16)));
return __builtin_ia32_maskloadps((const v4sf *)&x, v4si{-1, -1, -1, 0});
}
inline float3 to_float3(v4sf x)
{
return float3(x[0], x[1], x[2]);
}
inline v8sf from_2xv4sf(v4sf a, v4sf b)
{
v8sf a2 = __builtin_ia32_ps256_ps(a);
return __builtin_ia32_vinsertf128_ps256(a2, b, 1);
}
inline v8sf from_v4sf(v4sf a)
{
v8sf a2 = __builtin_ia32_ps256_ps(a);
return __builtin_ia32_vinsertf128_ps256(a2, a, 1);
}
inline v4sf v4sf_set(float x)
{
return v4sf{x, x, x, x};
}
inline v4sf v4sf_setzero()
{
return v4sf{0, 0, 0, 0};
}
inline v8sf v8sf_set(float x)
{
return v8sf{x, x, x, x, x, x, x, x};
}
inline v8sf v8sf_setzero()
{
return v8sf{0, 0, 0, 0, 0, 0, 0, 0};
}
inline v8sf v8sf_set2(float x, float y)
{
return v8sf{x, x, x, x, y, y, y, y};
}
inline float extract(v4sf v, uint x)
{
return v[x];
}
inline float extract(v8sf v, uint x)
{
return v[x];
}
inline v4sf insert(v4sf v, int x, float y)
{
v[x] = y;
return v;
}
inline v8sf insert(v8sf v, int x, float y)
{
v[x] = y;
return v;
}
#elif _MSC_VER
/* Without this line, the compiler may use FMA ops to rewrite float arithmetics.
* Sometimes this results in slight changes in numerical results.
*
* For example,
* 'a*b-c*d' might compile into "temp = mul(c*d); out = fmsub(a,b,temp);"
*
* Here, temp is truncated to float32, but fmsub does everything internally in
* float64 precision, so executing this for a=c and b=d no longer produces a zero
* (but something on the order of |ab| * 10^-13).
*
* This would break our reference compatibility, because, e.g., the output of initTriangle()
* changes fundamentally if some internal values go from exactly zero to almost zero. */
# pragma fp_contract(off)
typedef __m128 v4sf;
typedef __m128i v4si;
typedef __m256 v8sf;
inline v4sf to_float4(const float3 &x)
{
return _mm_set_ps(0, x.z, x.y, x.x);
}
inline v4sf to_float4_masked(const float3 &x)
{
return _mm_maskload_ps(&x.x, _mm_set_epi32(0, -1, -1, -1));
}
inline float3 to_float3(v4sf x)
{
return float3(x.m128_f32[0], x.m128_f32[1], x.m128_f32[2]);
}
inline v8sf from_2xv4sf(v4sf a, v4sf b)
{
return _mm256_set_m128(b, a);
}
inline v8sf from_v4sf(v4sf a)
{
return _mm256_set_m128(a, a);
}
inline v4sf v4sf_set(float x)
{
return _mm_set1_ps(x);
}
inline v4si v4si_set(int x)
{
return _mm_set1_epi32(x);
}
inline v4sf v4sf_setzero()
{
return _mm_setzero_ps();
}
inline v8sf v8sf_setzero()
{
return _mm256_setzero_ps();
}
inline v8sf v8sf_set(float x)
{
return _mm256_set1_ps(x);
}
inline v8sf v8sf_set2(float x, float y)
{
return _mm256_set_ps(y, y, y, y, x, x, x, x);
}
# define __builtin_ia32_dpps _mm_dp_ps
# define __builtin_ia32_hsubps _mm_hsub_ps
# define __builtin_ia32_maxss _mm_max_ss
# define __builtin_ia32_maxps _mm_max_ps
# define __builtin_ia32_minps _mm_min_ps
# define __builtin_ia32_mulps _mm_mul_ps
# define __builtin_ia32_shufps _mm_shuffle_ps
# define __builtin_ia32_cmpltps _mm_cmplt_ps
# define __builtin_ia32_andps _mm_and_ps
# define __builtin_ia32_andnps _mm_andnot_ps
# define __builtin_ia32_rsqrtss _mm_rsqrt_ss
# define __builtin_ia32_sqrtps _mm_sqrt_ps
# define __builtin_ia32_maxps256 _mm256_max_ps
# define __builtin_ia32_sqrtps256 _mm256_sqrt_ps
# define __builtin_ia32_dpps256 _mm256_dp_ps
# define __builtin_ia32_vpermilps256 _mm256_permute_ps
# define __builtin_ia32_vextractf128_ps256 _mm256_extractf128_ps
# define __builtin_ia32_vperm2f128_ps256 _mm256_permute2f128_ps
# define __builtin_ia32_blendps256 _mm256_blend_ps
# define __builtin_ia32_permvarsf256 _mm256_permutevar8x32_ps
# define __builtin_ia32_cmpgtps _mm_cmpgt_ps
# define __builtin_ia32_cmpps256 _mm256_cmp_ps
# define __builtin_ia32_andps256 _mm256_and_ps
# define __builtin_ia32_andnps256 _mm256_andnot_ps
# define __builtin_ia32_haddps256 _mm256_hadd_ps
inline float extract(__m128 v, uint x)
{
return v.m128_f32[x];
}
inline int extract(__m128i v, uint x)
{
return v.m128i_i32[x];
}
inline float extract(__m256 v, uint x)
{
return v.m256_f32[x];
}
inline __m128 insert(__m128 v, int x, float y)
{
v.m128_f32[x] = y;
return v;
}
inline __m256 insert(__m256 v, int x, float y)
{
v.m256_f32[x] = y;
return v;
}
// GCC provides all these automatically
inline v4sf operator*(v4sf a, v4sf b)
{
return _mm_mul_ps(a, b);
}
inline v4sf operator*(v4sf a, float b)
{
return _mm_mul_ps(a, v4sf_set(b));
}
inline v4sf operator+(float a, v4sf b)
{
return _mm_add_ps(v4sf_set(a), b);
}
inline v4sf operator-(float a, v4sf b)
{
return _mm_sub_ps(v4sf_set(a), b);
}
inline v8sf operator*(v8sf a, v8sf b)
{
return _mm256_mul_ps(a, b);
}
inline v4sf operator-(v4sf a, v4sf b)
{
return _mm_sub_ps(a, b);
}
inline v4sf operator+(v4sf a, v4sf b)
{
return _mm_add_ps(a, b);
}
inline v4si operator+(v4si a, v4si b)
{
return _mm_add_epi32(a, b);
}
inline v8sf operator+(v8sf a, v8sf b)
{
return _mm256_add_ps(a, b);
}
inline v8sf operator-(v8sf a, v8sf b)
{
return _mm256_sub_ps(a, b);
}
inline v8sf operator/(v8sf a, v8sf b)
{
return _mm256_div_ps(a, b);
}
inline v4sf operator/(float a, v4sf b)
{
return _mm_div_ps(v4sf_set(a), b);
}
inline v8sf operator/(float a, v8sf b)
{
return _mm256_div_ps(v8sf_set(a), b);
}
inline void operator+=(v8sf &a, v8sf b)
{
a = _mm256_add_ps(a, b);
}
inline v4si operator*(v4si a, int b)
{
return _mm_mul_epi32(a, v4si_set(b));
}
inline v4si operator^(v4si a, v4si b)
{
return _mm_xor_si128(a, b);
}
#endif
inline v4sf to_float4(v4sf x)
{
return x;
}
inline void unpack(v4sf &p0, v4sf &p1, v8sf v)
{
p0 = __builtin_ia32_vextractf128_ps256(v, 0);
p1 = __builtin_ia32_vextractf128_ps256(v, 1);
}
template<int i> inline v4sf bcast(v4sf x)
{
return __builtin_ia32_shufps(x, x, i * 0x55);
}
inline float dot(const v4sf &a, const v4sf &b)
{
return extract(__builtin_ia32_dpps(a, b, 255), 0);
}
inline v8sf dot_single(v8sf a, v8sf b)
{
return __builtin_ia32_dpps256(a, b, 0xFF);
}
inline float dot_inner(const v8sf &a)
{
return extract(dot_single(a, __builtin_ia32_vperm2f128_ps256(a, a, 1)), 0);
}
/* Potentially somewhat expensive (creates a global constant and adds a memory access, though the
* constant may be expected to stay in cache), use sparingly. Sometimes unavoidable since few AVX2
* ops send data across the 128-bit boundary */
inline v8sf cross_lane_permute(
v8sf v, int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7)
{
#if __GNUC__
typedef int v8si __attribute__((vector_size(32)));
v8si perm_mask{i0, i1, i2, i3, i4, i5, i6, i7};
return __builtin_ia32_permvarsf256(v, perm_mask);
#else
return __builtin_ia32_permvarsf256(v, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0));
#endif
}
/* input: [... i j ..... ], with <i> in position <offset> and <j> in position <offset+1>
* output: [ i i i i j j j j ] */
template<int offset> inline v8sf unpack4x(v8sf a)
{
return cross_lane_permute(
a, offset, offset, offset, offset, offset + 1, offset + 1, offset + 1, offset + 1);
}
inline float length_squared(const v4sf &x)
{
return dot(x, x);
}
inline float length(const v4sf &x)
{
v4sf v = __builtin_ia32_dpps(x, x, 255);
v = __builtin_ia32_sqrtps(v);
return extract(v, 0);
}
inline float inv_length(const v4sf &x)
{
v4sf v = __builtin_ia32_dpps(x, x, 255);
v4sf one = v4sf_set(1.0f), zero = v4sf_setzero();
v4sf rv = 1.0f / __builtin_ia32_sqrtps(v);
v4sf nonzero = __builtin_ia32_cmpgtps(v, zero);
rv = __builtin_ia32_andps(nonzero, rv) + __builtin_ia32_andnps(nonzero, one);
return extract(rv, 0);
}
/* input: 8 float4's packed into 4 v8sf
* output: 8 floats containing inverse lengths of each */
inline v8sf inv_length8(v8sf px0, v8sf px1, v8sf px2, v8sf px3)
{
v8sf v;
v = __builtin_ia32_dpps256(px0, px0, 0xF1);
v += __builtin_ia32_dpps256(px1, px1, 0xF2);
v += __builtin_ia32_dpps256(px2, px2, 0xF4);
v += __builtin_ia32_dpps256(px3, px3, 0xF8);
/* v is [0 2 4 6 1 3 5 7], and we want [0 1 2 3 4 5 6 7] */
v = cross_lane_permute(v, 0, 4, 1, 5, 2, 6, 3, 7);
/* There is an intrinsic for rsqrt, but it has relative accuracy of ~1/1000 */
v8sf one = v8sf_set(1.0f), zero = v8sf_setzero();
v8sf nonzero = __builtin_ia32_cmpps256(v, zero, 4);
v8sf rv = 1.0f / __builtin_ia32_sqrtps256(v);
/* we must match reference behavior (return 1 if input is zero) or else
* we'll have significant mismatches in some situations */
return __builtin_ia32_andps256(nonzero, rv) + __builtin_ia32_andnps256(nonzero, one);
}
/* Projects v onto the surface with normal n. */
inline v4sf project1(v4sf n, v4sf v)
{
return v - (n * dot(n, v));
}
/* Projects two packed float3's in v8 onto surfaces in n8. */
inline v8sf project2(v8sf n8, v8sf v8)
{
v8sf d8 = dot_single(n8, v8);
return v8 - n8 * d8;
}
inline v4sf fast_acosf_4x(v4sf x)
{
v4sf f = __builtin_ia32_maxps(x, x * -1.0f);
v4sf one = v4sf_set(1.0f), zero = v4sf_setzero();
v4sf m = __builtin_ia32_minps(f, one);
v4sf a = __builtin_ia32_sqrtps(one - m) *
(1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
v4sf sign = __builtin_ia32_cmpltps(x, zero);
return __builtin_ia32_andps(sign, 3.1415926535897932f - a) + __builtin_ia32_andnps(sign, a);
}
#ifdef __GNUC__
# pragma GCC pop_options
#endif
} // namespace mikk

View File

@ -19,7 +19,7 @@ namespace mikk {
inline bool not_zero(const float fX)
{
return fabsf(fX) > FLT_MIN;
return (fX > FLT_MIN) || (-fX > FLT_MIN);
}
/* Helpers for (un)packing a 2-bit vertex index and a 30-bit face index to one integer. */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,880 @@
/* SPDX-FileCopyrightText: 2011 Morten S. Mikkelsen
* SPDX-FileCopyrightText: 2022 Blender Authors
*
* SPDX-License-Identifier: Apache-2.0 */
/** \file
* \ingroup mikktspace
*/
#include <algorithm>
#include <cassert>
#include <unordered_map>
#ifdef WITH_TBB
# include <tbb/parallel_for.h>
#endif
#include "mikk_atomic_hash_set.hh"
#include "mikk_float3.hh"
#include "mikk_util.hh"
namespace mikk {
static constexpr uint UNSET_ENTRY = 0xffffffffu;
template<typename Mesh> class RefMikktspace {
struct Triangle {
/* Stores neighboring triangle for group assignment. */
std::array<uint, 3> neighbor;
/* Stores assigned group of each vertex. */
std::array<uint, 3> group;
/* Stores vertex indices that make up the triangle. */
std::array<uint, 3> vertices;
/* Computed face tangent, will be accumulated into group. */
float3 tangent;
/* Index of the face that this triangle belongs to. */
uint faceIdx;
/* Index of the first of this triangle's vertices' TSpaces. */
uint tSpaceIdx;
/* Stores mapping from this triangle's vertices to the original
* face's vertices (relevant for quads). */
std::array<uint8_t, 3> faceVertex;
// flags
bool markDegenerate : 1;
bool quadOneDegenTri : 1;
bool groupWithAny : 1;
bool orientPreserving : 1;
Triangle(uint faceIdx_, uint tSpaceIdx_)
: tangent{0.0f, 0.0f, 0.0f},
faceIdx{faceIdx_},
tSpaceIdx{tSpaceIdx_},
markDegenerate{false},
quadOneDegenTri{false},
groupWithAny{true},
orientPreserving{false}
{
neighbor.fill(UNSET_ENTRY);
group.fill(UNSET_ENTRY);
}
void setVertices(uint8_t i0, uint8_t i1, uint8_t i2)
{
faceVertex[0] = i0;
faceVertex[1] = i1;
faceVertex[2] = i2;
vertices[0] = pack_index(faceIdx, i0);
vertices[1] = pack_index(faceIdx, i1);
vertices[2] = pack_index(faceIdx, i2);
}
};
struct Group {
float3 tangent;
uint vertexRepresentative;
bool orientPreserving;
Group(uint vertexRepresentative_, bool orientPreserving_)
: tangent{0.0f},
vertexRepresentative{vertexRepresentative_},
orientPreserving{orientPreserving_}
{
}
void normalizeTSpace()
{
tangent = tangent.normalize();
}
void accumulateTSpaceAtomic(float3 v_tangent)
{
float_add_atomic(&tangent.x, v_tangent.x);
float_add_atomic(&tangent.y, v_tangent.y);
float_add_atomic(&tangent.z, v_tangent.z);
}
void accumulateTSpace(float3 v_tangent)
{
tangent += v_tangent;
}
};
struct TSpace {
float3 tangent = float3(1.0f, 0.0f, 0.0f);
uint counter = 0;
bool orientPreserving = false;
void accumulateGroup(const Group &group)
{
assert(counter < 2);
if (counter == 0) {
tangent = group.tangent;
}
else if (tangent == group.tangent) {
// this if is important. Due to floating point precision
// averaging when ts0==ts1 will cause a slight difference
// which results in tangent space splits later on, so do nothing
}
else {
tangent = (tangent + group.tangent).normalize();
}
counter++;
orientPreserving = group.orientPreserving;
}
};
Mesh &mesh;
std::vector<Triangle> triangles;
std::vector<TSpace> tSpaces;
std::vector<Group> groups;
uint nrTSpaces, nrFaces, nrTriangles, totalTriangles;
int nrThreads;
bool isParallel;
public:
bool profile = false;
bool trace_on = false;
RefMikktspace(Mesh &mesh_) : mesh(mesh_) {}
void genTangSpace()
{
nrFaces = uint(mesh.GetNumFaces());
#ifdef WITH_TBB
nrThreads = tbb::this_task_arena::max_concurrency();
isParallel = (nrThreads > 1) && (nrFaces > 10000);
// isParallel = false;
#else
nrThreads = 1;
isParallel = false;
#endif
// make an initial triangle --> face index list
generateInitialVerticesIndexList();
if (nrTriangles == 0) {
return;
}
// make a welded index list of identical positions and attributes (pos, norm, texc)
generateSharedVerticesIndexList();
// mark all triangle pairs that belong to a quad with only one
// good triangle. These need special treatment in degenEpilogue().
// Additionally, move all good triangles to the start of
// triangles[] without changing order and
// put the degenerate triangles last.
degenPrologue();
if (nrTriangles == 0) {
// No point in building tangents if there are no non-degenerate triangles, so just zero them
tSpaces.resize(nrTSpaces);
}
else {
// evaluate triangle level attributes and neighbor list
initTriangle();
// match up edge pairs
buildNeighbors();
// based on the 4 rules, identify groups based on connectivity
build4RuleGroups();
// make tspaces, each group is split up into subgroups.
// Finally a tangent space is made for every resulting subgroup
generateTSpaces();
// degenerate quads with one good triangle will be fixed by copying a space from
// the good triangle to the coinciding vertex.
// all other degenerate triangles will just copy a space from any good triangle
// with the same welded index in vertices[].
degenEpilogue();
}
uint index = 0;
for (uint f = 0; f < nrFaces; f++) {
const uint verts = mesh.GetNumVerticesOfFace(f);
if (verts != 3 && verts != 4) {
continue;
}
// set data
for (uint i = 0; i < verts; i++) {
const TSpace &tSpace = tSpaces[index++];
mesh.SetTangentSpace(f, i, tSpace.tangent, tSpace.orientPreserving);
}
}
}
protected:
template<typename F> void runParallel(uint start, uint end, F func)
{
#ifdef WITH_TBB
if (isParallel) {
tbb::parallel_for(start, end, func);
}
else
#endif
{
for (uint i = start; i < end; i++) {
func(i);
}
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
float3 getPosition(uint vertexID)
{
uint f, v;
unpack_index(f, v, vertexID);
return mesh.GetPosition(f, v);
}
float3 getNormal(uint vertexID)
{
uint f, v;
unpack_index(f, v, vertexID);
return mesh.GetNormal(f, v);
}
float3 getTexCoord(uint vertexID)
{
uint f, v;
unpack_index(f, v, vertexID);
return mesh.GetTexCoord(f, v);
}
///////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
void generateInitialVerticesIndexList()
{
nrTriangles = 0;
for (uint f = 0; f < nrFaces; f++) {
const uint verts = mesh.GetNumVerticesOfFace(f);
if (verts == 3) {
nrTriangles += 1;
}
else if (verts == 4) {
nrTriangles += 2;
}
}
triangles.reserve(nrTriangles);
nrTSpaces = 0;
for (uint f = 0; f < nrFaces; f++) {
const uint verts = mesh.GetNumVerticesOfFace(f);
if (verts != 3 && verts != 4)
continue;
uint tA = uint(triangles.size());
triangles.emplace_back(f, nrTSpaces);
if (verts == 3) {
Triangle &triA = triangles[tA];
triA.setVertices(0, 1, 2);
}
else {
uint tB = uint(triangles.size());
triangles.emplace_back(f, nrTSpaces);
Triangle &triA = triangles[tA];
Triangle &triB = triangles[tB];
// need an order independent way to evaluate
// tspace on quads. This is done by splitting
// along the shortest diagonal.
float distSQ_02 = (mesh.GetTexCoord(f, 2) - mesh.GetTexCoord(f, 0)).length_squared();
float distSQ_13 = (mesh.GetTexCoord(f, 3) - mesh.GetTexCoord(f, 1)).length_squared();
bool quadDiagIs_02;
if (distSQ_02 != distSQ_13)
quadDiagIs_02 = (distSQ_02 < distSQ_13);
else {
distSQ_02 = (mesh.GetPosition(f, 2) - mesh.GetPosition(f, 0)).length_squared();
distSQ_13 = (mesh.GetPosition(f, 3) - mesh.GetPosition(f, 1)).length_squared();
quadDiagIs_02 = !(distSQ_13 < distSQ_02);
}
if (quadDiagIs_02) {
triA.setVertices(0, 1, 2);
triB.setVertices(0, 2, 3);
}
else {
triA.setVertices(0, 1, 3);
triB.setVertices(1, 2, 3);
}
}
nrTSpaces += verts;
}
}
struct VertexHash {
RefMikktspace<Mesh> *mikk;
inline uint operator()(const uint &k) const
{
return hash_float3x3(mikk->getPosition(k), mikk->getNormal(k), mikk->getTexCoord(k));
}
};
struct VertexEqual {
RefMikktspace<Mesh> *mikk;
inline bool operator()(const uint &kA, const uint &kB) const
{
return mikk->getTexCoord(kA) == mikk->getTexCoord(kB) &&
mikk->getNormal(kA) == mikk->getNormal(kB) &&
mikk->getPosition(kA) == mikk->getPosition(kB);
}
};
/* Merge identical vertices.
* To find vertices with identical position, normal and texcoord, we calculate a hash of the 9
* values. Then, by sorting based on that hash, identical elements (having identical hashes) will
* be moved next to each other. Since there might be hash collisions, the elements of each block
* are then compared with each other and duplicates are merged.
*/
template<bool isAtomic> void generateSharedVerticesIndexList_impl()
{
uint numVertices = nrTriangles * 3;
AtomicHashSet<uint, isAtomic, VertexHash, VertexEqual> set(numVertices, {this}, {this});
runParallel(0u, nrTriangles, [&](uint t) {
for (uint i = 0; i < 3; i++) {
auto res = set.emplace(triangles[t].vertices[i]);
if (!res.second) {
triangles[t].vertices[i] = res.first;
}
}
});
}
void generateSharedVerticesIndexList()
{
if (isParallel) {
generateSharedVerticesIndexList_impl<true>();
}
else {
generateSharedVerticesIndexList_impl<false>();
}
}
/////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////// Degenerate triangles ////////////////////////////////////
void degenPrologue()
{
// Mark all degenerate triangles
totalTriangles = nrTriangles;
std::atomic<uint> degenTriangles(0);
runParallel(0u, totalTriangles, [&](uint t) {
const float3 p0 = getPosition(triangles[t].vertices[0]);
const float3 p1 = getPosition(triangles[t].vertices[1]);
const float3 p2 = getPosition(triangles[t].vertices[2]);
if (p0 == p1 || p0 == p2 || p1 == p2) // degenerate
{
triangles[t].markDegenerate = true;
degenTriangles.fetch_add(1);
}
});
nrTriangles -= degenTriangles.load();
if (totalTriangles == nrTriangles) {
return;
}
// locate quads with only one good triangle
runParallel(0u, totalTriangles - 1, [&](uint t) {
Triangle &triangleA = triangles[t], &triangleB = triangles[t + 1];
if (triangleA.faceIdx != triangleB.faceIdx) {
/* Individual triangle, skip. */
return;
}
if (triangleA.markDegenerate != triangleB.markDegenerate) {
triangleA.quadOneDegenTri = true;
triangleB.quadOneDegenTri = true;
}
});
std::stable_partition(triangles.begin(), triangles.end(), [](const Triangle &tri) {
return !tri.markDegenerate;
});
}
void degenEpilogue()
{
if (nrTriangles == totalTriangles) {
return;
}
std::unordered_map<uint, uint> goodTriangleMap;
for (uint t = 0; t < nrTriangles; t++) {
for (uint i = 0; i < 3; i++) {
goodTriangleMap.emplace(triangles[t].vertices[i], pack_index(t, i));
}
}
// deal with degenerate triangles
// punishment for degenerate triangles is O(nrTriangles) extra memory.
for (uint t = nrTriangles; t < totalTriangles; t++) {
// degenerate triangles on a quad with one good triangle are skipped
// here but processed in the next loop
if (triangles[t].quadOneDegenTri) {
continue;
}
for (uint i = 0; i < 3; i++) {
const auto entry = goodTriangleMap.find(triangles[t].vertices[i]);
if (entry == goodTriangleMap.end()) {
// Matching vertex from good triangle is not found.
continue;
}
uint tSrc, iSrc;
unpack_index(tSrc, iSrc, entry->second);
const uint iSrcVert = triangles[tSrc].faceVertex[iSrc];
const uint iSrcOffs = triangles[tSrc].tSpaceIdx;
const uint iDstVert = triangles[t].faceVertex[i];
const uint iDstOffs = triangles[t].tSpaceIdx;
// copy tspace
tSpaces[iDstOffs + iDstVert] = tSpaces[iSrcOffs + iSrcVert];
}
}
// deal with degenerate quads with one good triangle
for (uint t = 0; t < nrTriangles; t++) {
// this triangle belongs to a quad where the
// other triangle is degenerate
if (!triangles[t].quadOneDegenTri) {
continue;
}
uint vertFlag = (1u << triangles[t].faceVertex[0]) | (1u << triangles[t].faceVertex[1]) |
(1u << triangles[t].faceVertex[2]);
uint missingFaceVertex = 0;
if ((vertFlag & 2) == 0)
missingFaceVertex = 1;
else if ((vertFlag & 4) == 0)
missingFaceVertex = 2;
else if ((vertFlag & 8) == 0)
missingFaceVertex = 3;
uint faceIdx = triangles[t].faceIdx;
float3 dstP = mesh.GetPosition(faceIdx, missingFaceVertex);
bool found = false;
for (uint i = 0; i < 3; i++) {
const uint faceVertex = triangles[t].faceVertex[i];
const float3 srcP = mesh.GetPosition(faceIdx, faceVertex);
if (srcP == dstP) {
const uint offset = triangles[t].tSpaceIdx;
tSpaces[offset + missingFaceVertex] = tSpaces[offset + faceVertex];
found = true;
break;
}
}
assert(found);
(void)found;
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
// returns the texture area times 2
float calcTexArea(uint tri)
{
const float3 t1 = getTexCoord(triangles[tri].vertices[0]);
const float3 t2 = getTexCoord(triangles[tri].vertices[1]);
const float3 t3 = getTexCoord(triangles[tri].vertices[2]);
const float t21x = t2.x - t1.x;
const float t21y = t2.y - t1.y;
const float t31x = t3.x - t1.x;
const float t31y = t3.y - t1.y;
const float signedAreaSTx2 = t21x * t31y - t21y * t31x;
return fabsf(signedAreaSTx2);
}
void initTriangle()
{
// triangles[f].iFlag is cleared in generateInitialVerticesIndexList()
// which is called before this function.
// printf("initTriangle %d\n", nrTriangles);
// evaluate first order derivatives
runParallel(0u, nrTriangles, [&](uint t) {
Triangle &triangle = triangles[t];
// if(t==1844 && nrFaces==923)
// printf("Tri: %d Vertices: %d %d %d\n", t, triangle.vertices[0], triangle.vertices[1],
// triangle.vertices[2]);
// initial values
const float3 v1 = getPosition(triangle.vertices[0]);
const float3 v2 = getPosition(triangle.vertices[1]);
const float3 v3 = getPosition(triangle.vertices[2]);
const float3 t1 = getTexCoord(triangle.vertices[0]);
const float3 t2 = getTexCoord(triangle.vertices[1]);
const float3 t3 = getTexCoord(triangle.vertices[2]);
const float t21x = t2.x - t1.x;
const float t21y = t2.y - t1.y;
const float t31x = t3.x - t1.x;
const float t31y = t3.y - t1.y;
const float3 d1 = v2 - v1, d2 = v3 - v1;
const float signedAreaSTx2 = t21x * t31y - t21y * t31x;
const float3 vOs = (t31y * d1) - (t21y * d2); // eq 18
const float3 vOt = (-t31x * d1) + (t21x * d2); // eq 19
triangle.orientPreserving = (signedAreaSTx2 > 0);
#if 0
if(t==1844 && nrFaces==923)
{
/*
const uint32_t* p1 = (const uint32_t*)&v1.x;
const uint32_t* p2 = (const uint32_t*)&v2.x;
const uint32_t* p3 = (const uint32_t*)&v3.x;
const uint32_t* q1 = (const uint32_t*)&t1.x;
const uint32_t* q2 = (const uint32_t*)&t2.x;
const uint32_t* q3 = (const uint32_t*)&t3.x;
printf("Coord: \n%08x %08x %08x\n%08x %08x %08x\n%08x %08x %08x\n",
p1[0], p1[1], p1[2],
p2[0], p2[1], p2[2],
p3[0], p3[1], p3[2]);
printf("Tex: \n%08x %08x %08x\n%08x %08x %08x\n%08x %08x %08x\n",
q1[0], q1[1], q1[2],
q2[0], q2[1], q2[2],
q3[0], q3[1], q3[2]);
printf("signedAreaSTx2 %e\n", signedAreaSTx2);
*/
printf("Coords:\n%f %f %f\n%f %f %f\n%f %f %f\n",
v1.x,v1.y,v1.z,
v2.x,v2.y,v2.z,
v3.x,v3.y,v3.z);
printf("Tex:\n%f %f %f\n%f %f %f\n%f %f %f\n",
t1.x,t1.y,t1.z,
t2.x,t2.y,t2.z,
t3.x,t3.y,t3.z);
}
#endif
if (not_zero(signedAreaSTx2)) {
const float lenOs2 = vOs.length_squared();
const float lenOt2 = vOt.length_squared();
// if(t==5858)
// printf("len0s2 %e, len0t2 %e\n", lenOs2, lenOt2);
const float fS = triangle.orientPreserving ? 1.0f : (-1.0f);
if (not_zero(lenOs2))
triangle.tangent = vOs * (fS / sqrtf(lenOs2));
// if this is a good triangle
if (not_zero(lenOs2) && not_zero(lenOt2))
triangle.groupWithAny = false;
}
});
// force otherwise healthy quads to a fixed orientation
runParallel(0u, nrTriangles - 1, [&](uint t) {
Triangle &triangleA = triangles[t], &triangleB = triangles[t + 1];
if (triangleA.faceIdx != triangleB.faceIdx) {
// this is not a quad
return;
}
// bad triangles should already have been removed by
// degenPrologue(), but just in case check that neither are degenerate
if (!(triangleA.markDegenerate || triangleB.markDegenerate)) {
// if this happens the quad has extremely bad mapping!!
if (triangleA.orientPreserving != triangleB.orientPreserving) {
bool chooseOrientFirstTri = false;
if (triangleB.groupWithAny)
chooseOrientFirstTri = true;
else if (calcTexArea(t) >= calcTexArea(t + 1))
chooseOrientFirstTri = true;
// force match
const uint t0 = chooseOrientFirstTri ? t : (t + 1);
const uint t1 = chooseOrientFirstTri ? (t + 1) : t;
triangles[t1].orientPreserving = triangles[t0].orientPreserving;
}
}
});
}
/////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////// Edges ///////////////////////////////////////////
struct NeighborShard {
struct Entry {
Entry(uint32_t key_, uint data_) : key(key_), data(data_) {}
uint key, data;
};
std::vector<Entry> entries;
uint id;
NeighborShard(size_t capacity)
{
entries.reserve(capacity);
}
void buildNeighbors(RefMikktspace<Mesh> *mikk)
{
/* Entries are added by iterating over t, so by using a stable sort,
* we don't have to compare based on t as well. */
{
std::vector<Entry> tempEntries(entries.size(), {0, 0});
radixsort(entries, tempEntries, [](const Entry &e) { return e.key; });
}
for (uint i = 0; i < entries.size(); i++) {
const Entry &a = entries[i];
uint tA, iA;
unpack_index(tA, iA, a.data);
RefMikktspace<Mesh>::Triangle &triA = mikk->triangles[tA];
if (triA.neighbor[iA] != UNSET_ENTRY) {
continue;
}
uint i0A = triA.vertices[iA], i1A = triA.vertices[(iA != 2) ? (iA + 1) : 0];
for (uint j = i + 1; j < entries.size(); j++) {
const Entry &b = entries[j];
uint tB, iB;
unpack_index(tB, iB, b.data);
RefMikktspace<Mesh>::Triangle &triB = mikk->triangles[tB];
if (b.key != a.key)
break;
if (triB.neighbor[iB] != UNSET_ENTRY) {
continue;
}
uint i1B = triB.vertices[iB], i0B = triB.vertices[(iB != 2) ? (iB + 1) : 0];
if (i0A == i0B && i1A == i1B) {
triA.neighbor[iA] = tB;
triB.neighbor[iB] = tA;
break;
}
}
}
}
};
void buildNeighbors()
{
/* In order to parallelize the processing, we divide the vertices into shards.
* Since only vertex pairs with the same key will be checked, we can process
* shards independently as long as we ensure that all vertices with the same
* key go into the same shard.
* This is done by hashing the key to get the shard index of each vertex.
*/
// TODO: Two-step filling that first counts and then fills? Could be parallel then.
uint targetNrShards = isParallel ? uint(4 * nrThreads) : 1;
uint nrShards = 1, hashShift = 32;
while (nrShards < targetNrShards) {
nrShards *= 2;
hashShift -= 1;
}
/* Reserve 25% extra to account for variation due to hashing. */
size_t reserveSize = size_t(double(3 * nrTriangles) * 1.25 / nrShards);
std::vector<NeighborShard> shards(nrShards, {reserveSize});
for (uint t = 0; t < nrShards; t++)
shards[t].id = t;
for (uint t = 0; t < nrTriangles; t++) {
Triangle &triangle = triangles[t];
for (uint i = 0; i < 3; i++) {
const uint i0 = triangle.vertices[i];
const uint i1 = triangle.vertices[(i != 2) ? (i + 1) : 0];
const uint high = std::max(i0, i1), low = std::min(i0, i1);
const uint hash = hash_uint3(high, low, 0);
/* TODO: Reusing the hash here means less hash space inside each shard.
* Computing a second hash with a different seed it probably not worth it? */
const uint shard = isParallel ? (hash >> hashShift) : 0;
shards[shard].entries.emplace_back(hash, pack_index(t, i));
}
}
runParallel(0u, nrShards, [&](uint s) { shards[s].buildNeighbors(this); });
}
///////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
void assignRecur(const uint t, uint groupId)
{
if (t == UNSET_ENTRY) {
return;
}
Triangle &triangle = triangles[t];
Group &group = groups[groupId];
// track down vertex
const uint vertRep = group.vertexRepresentative;
uint i = 3;
if (triangle.vertices[0] == vertRep)
i = 0;
else if (triangle.vertices[1] == vertRep)
i = 1;
else if (triangle.vertices[2] == vertRep)
i = 2;
assert(i < 3);
// early out
if (triangle.group[i] != UNSET_ENTRY) {
return;
}
if (triangle.groupWithAny) {
// first to group with a group-with-anything triangle
// determines its orientation.
// This is the only existing order dependency in the code!!
if (triangle.group[0] == UNSET_ENTRY && triangle.group[1] == UNSET_ENTRY &&
triangle.group[2] == UNSET_ENTRY)
{
triangle.orientPreserving = group.orientPreserving;
}
else {
}
}
if (triangle.orientPreserving != group.orientPreserving) {
return;
}
triangle.group[i] = groupId;
// if(groupId==0)
// printf("triangle %d -> group %d\n", t, groupId);
const uint t_L = triangle.neighbor[i];
const uint t_R = triangle.neighbor[i > 0 ? (i - 1) : 2];
if (groupId == 0) {
// printf("<2>Propagating group %d to %d, %d\n", groupId, t_L, t_R);
}
assignRecur(t_L, groupId);
assignRecur(t_R, groupId);
}
void build4RuleGroups()
{
/* NOTE: This could be parallelized by grouping all [t, i] pairs into
* shards by hash(triangles[t].vertices[i]). This way, each shard can be processed
* independently and in parallel.
* However, the `groupWithAny` logic needs special handling (e.g. lock a mutex when
* encountering a `groupWithAny` triangle, then sort it out, then unlock and proceed). */
for (uint t = 0; t < nrTriangles; t++) {
Triangle &triangle = triangles[t];
for (uint i = 0; i < 3; i++) {
// if not assigned to a group
if (triangle.groupWithAny || triangle.group[i] != UNSET_ENTRY) {
// if(t==230 || t==231)
// printf("<ref> t %d i %d: skipping (groupWithAny %d, group %d, neighbors %d %d
// %d)\n",
// t, i, triangle.groupWithAny, triangle.group[i], triangle.neighbor[0],
// triangle.neighbor[1], triangle.neighbor[2]);
continue;
}
const uint newGroupId = uint(groups.size());
triangle.group[i] = newGroupId;
groups.emplace_back(triangle.vertices[i], bool(triangle.orientPreserving));
if (newGroupId == 1151 && nrFaces == 923) {
// printf("Initializing group %d: tri %d vert %d\n", newGroupId, t, i);
}
const uint t_L = triangle.neighbor[i];
const uint t_R = triangle.neighbor[i > 0 ? (i - 1) : 2];
assignRecur(t_L, newGroupId);
assignRecur(t_R, newGroupId);
}
}
// printf("%d triangles -> %d groups\n", nrTriangles, groups.size());
}
///////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////
template<bool atomic> void accumulateTSpaces(uint t)
{
const Triangle &triangle = triangles[t];
// only valid triangles get to add their contribution
if (triangle.groupWithAny) {
return;
}
/* Todo: Vectorize?
* Also: Could add special case for flat shading, when all normals are equal half of the fCos
* projections and two of the three tangent projections are unnecessary. */
std::array<float3, 3> n, p;
for (uint i = 0; i < 3; i++) {
n[i] = getNormal(triangle.vertices[i]);
p[i] = getPosition(triangle.vertices[i]);
}
std::array<float, 3> fCos = {dot(project(n[0], p[1] - p[0]), project(n[0], p[2] - p[0])),
dot(project(n[1], p[2] - p[1]), project(n[1], p[0] - p[1])),
dot(project(n[2], p[0] - p[2]), project(n[2], p[1] - p[2]))};
for (uint i = 0; i < 3; i++) {
uint groupId = triangle.group[i];
if (groupId != UNSET_ENTRY) {
float3 tangent = project(n[i], triangle.tangent) *
fast_acosf(std::clamp(fCos[i], -1.0f, 1.0f));
if constexpr (atomic) {
groups[groupId].accumulateTSpaceAtomic(tangent);
}
else {
groups[groupId].accumulateTSpace(tangent);
}
}
}
}
void generateTSpaces()
{
if (isParallel) {
runParallel(0u, nrTriangles, [&](uint t) { accumulateTSpaces<true>(t); });
}
else {
for (uint t = 0; t < nrTriangles; t++) {
accumulateTSpaces<false>(t);
}
}
/* TODO: Worth parallelizing? Probably not. */
for (Group &group : groups) {
group.normalizeTSpace();
}
tSpaces.resize(nrTSpaces);
for (uint t = 0; t < nrTriangles; t++) {
Triangle &triangle = triangles[t];
for (uint i = 0; i < 3; i++) {
const uint offset = triangle.tSpaceIdx;
const uint faceVertex = triangle.faceVertex[i];
uint groupId = triangle.group[i];
if (groupId == UNSET_ENTRY) {
continue;
}
const Group group = groups[groupId];
assert(triangle.orientPreserving == group.orientPreserving);
tSpaces[offset + faceVertex].accumulateGroup(group);
}
}
}
};
} // namespace mikk

View File

@ -64,7 +64,11 @@ void BKE_mesh_calc_loop_tangent_ex(const float (*vert_positions)[3],
/* result */
CustomData *loopdata_out,
uint loopdata_out_len,
short *tangent_mask_curr_p);
short *tangent_mask_curr_p,
int len_vert_positions,
int len_corner_verts,
int len_vert_normals,
int len_face_normals);
void BKE_mesh_calc_loop_tangents(Mesh *mesh_eval,
bool calc_active_tangent,

View File

@ -23,6 +23,7 @@
/* interface */
#include "mikktspace.hh"
#include "mikktspace_ref.hh"
/* -------------------------------------------------------------------- */
/** \name Tangent Space Calculation
@ -147,7 +148,8 @@ static void emDM_calc_loop_tangents_thread(TaskPool *__restrict /*pool*/, void *
{
SGLSLEditMeshToTangent *mesh_data = static_cast<SGLSLEditMeshToTangent *>(taskdata);
mikk::Mikktspace<SGLSLEditMeshToTangent> mikk(*mesh_data);
mikk::RefMikktspace<SGLSLEditMeshToTangent> mikk(*mesh_data);
printf("EditMesh::tangent\n");
mikk.genTangSpace();
}

View File

@ -25,6 +25,7 @@
#include "BKE_report.hh"
#include "mikktspace.hh"
#include "mikktspace_ref.hh"
#include "BLI_strict_flags.h" /* Keep last. */
@ -75,6 +76,8 @@ struct BKEMeshToTangent {
const float (*corner_normals)[3]; /* loops' normals */
float (*tangents)[4]; /* output tangents */
int num_faces; /* number of polygons */
size_t tangent_len;
int num_face_as_quad_map;
};
void BKE_mesh_calc_loop_tangent_single_ex(const float (*vert_positions)[3],
@ -83,7 +86,7 @@ void BKE_mesh_calc_loop_tangent_single_ex(const float (*vert_positions)[3],
float (*r_looptangent)[4],
const float (*corner_normals)[3],
const float (*loop_uvs)[2],
const int /*numLoops*/,
const int numLoops,
const blender::OffsetIndices<int> faces,
ReportList *reports)
{
@ -96,8 +99,9 @@ void BKE_mesh_calc_loop_tangent_single_ex(const float (*vert_positions)[3],
mesh_to_tangent.corner_normals = corner_normals;
mesh_to_tangent.tangents = r_looptangent;
mesh_to_tangent.num_faces = int(faces.size());
mesh_to_tangent.tangent_len = (size_t)numLoops * 4;
mikk::Mikktspace<BKEMeshToTangent> mikk(mesh_to_tangent);
mikk::RefMikktspace<BKEMeshToTangent> mikk(mesh_to_tangent);
/* First check we do have a tris/quads only mesh. */
for (const int64_t i : faces.index_range()) {
@ -200,6 +204,68 @@ struct SGLSLMeshToTangent {
return uint(tri[int(vert_num)]);
}
mikk::float3 GetPositionDirect(const uint loop_index)
{
return mikk::float3(positions[corner_verts[loop_index]]);
}
inline mikk::float3 GetTexCoordDirect(const uint loop_index)
{
if (mloopuv != nullptr) {
const float2 &uv = mloopuv[loop_index];
return mikk::float3(uv[0], uv[1], 1.0f);
}
const float *l_orco = orco[corner_verts[loop_index]];
float u, v;
map_to_sphere(&u, &v, l_orco[0], l_orco[1], l_orco[2]);
return mikk::float3(u, v, 1.0f);
}
inline mikk::float3 GetNormalDirect(const int face_index, const uint loop_index)
{
blender::int3 tri;
if (precomputedLoopNormals) {
return mikk::float3(precomputedLoopNormals[loop_index]);
}
if (!sharp_faces.is_empty() && sharp_faces[face_index]) { /* flat */
if (precomputedFaceNormals) {
return mikk::float3(precomputedFaceNormals[face_index]);
}
#ifdef USE_TRI_DETECT_QUADS
const blender::IndexRange face = faces[face_index];
float normal[3];
if (face.size() == 4) {
normal_quad_v3(normal,
positions[corner_verts[face[0]]],
positions[corner_verts[face[1]]],
positions[corner_verts[face[2]]],
positions[corner_verts[face[3]]]);
}
else
#endif
{
normal_tri_v3(normal,
positions[corner_verts[tri[0]]],
positions[corner_verts[tri[1]]],
positions[corner_verts[tri[2]]]);
}
return mikk::float3(normal);
}
return mikk::float3(vert_normals[corner_verts[loop_index]]);
}
uint GetStoreIndex(const uint face_num, const uint vert_num)
{
blender::int3 tri;
int face_index;
return GetLoop(face_num, vert_num, tri, face_index);
}
void SetTangentSpaceDirect(const uint loop_index, mikk::float3 T, bool orientation)
{
copy_v4_fl4(tangent[loop_index], T.x, T.y, T.z, orientation ? 1.0f : -1.0f);
}
mikk::float3 GetPosition(const uint face_num, const uint vert_num)
{
blender::int3 tri;
@ -280,6 +346,12 @@ struct SGLSLMeshToTangent {
float (*tangent)[4]; /* destination */
blender::Span<bool> sharp_faces;
int numTessFaces;
size_t tangent_len;
int len_corner_verts;
int len_positions;
int len_face_normals;
int len_vert_normals;
#ifdef USE_TRI_DETECT_QUADS
/* map from 'fake' face index to corner_tris,
@ -292,7 +364,7 @@ struct SGLSLMeshToTangent {
static void DM_calc_loop_tangents_thread(TaskPool *__restrict /*pool*/, void *taskdata)
{
SGLSLMeshToTangent *mesh_data = static_cast<SGLSLMeshToTangent *>(taskdata);
// mikk::RefMikktspace<SGLSLMeshToTangent> mikk(*mesh_data);
mikk::Mikktspace<SGLSLMeshToTangent> mikk(*mesh_data);
mikk.genTangSpace();
}
@ -392,7 +464,6 @@ void BKE_mesh_calc_loop_tangent_ex(const float (*vert_positions)[3],
const int *corner_tri_faces,
const uint corner_tris_len,
const blender::Span<bool> sharp_faces,
const CustomData *loopdata,
bool calc_active_tangent,
const char (*tangent_names)[MAX_CUSTOMDATA_LAYER_NAME],
@ -404,7 +475,12 @@ void BKE_mesh_calc_loop_tangent_ex(const float (*vert_positions)[3],
/* result */
CustomData *loopdata_out,
const uint loopdata_out_len,
short *tangent_mask_curr_p)
short *tangent_mask_curr_p,
int len_vert_positions,
int len_corner_verts,
[[maybe_unused]] int len_vert_normals,
int len_face_normals)
{
int act_uv_n = -1;
int ren_uv_n = -1;
@ -508,6 +584,11 @@ void BKE_mesh_calc_loop_tangent_ex(const float (*vert_positions)[3],
mesh2tangent->mloopuv = static_cast<const float2 *>(CustomData_get_layer_named(
loopdata, CD_PROP_FLOAT2, loopdata_out->layers[index].name));
mesh2tangent->len_corner_verts = len_corner_verts;
mesh2tangent->len_positions = len_vert_positions;
mesh2tangent->len_face_normals = len_face_normals;
mesh2tangent->tangent_len = loopdata_out_len;
/* Fill the resulting tangent_mask */
if (!mesh2tangent->mloopuv) {
mesh2tangent->orco = vert_orco;
@ -591,9 +672,10 @@ void BKE_mesh_calc_loop_tangents(Mesh *mesh_eval,
/* may be nullptr */
static_cast<const float(*)[3]>(CustomData_get_layer(&mesh_eval->vert_data, CD_ORCO)),
/* result */
&mesh_eval->corner_data,
uint(mesh_eval->corners_num),
&tangent_mask);
(int)me_eval->vert_positions().size(),
(int)me_eval->corner_verts().size(),
(int)me_eval->vert_normals().size(),
(int)me_eval->face_normals().size());
}
/** \} */

View File

@ -137,7 +137,11 @@ static void extract_tan_init_common(const MeshRenderData &mr,
orco,
r_loop_data,
mr.corner_verts.size(),
&tangent_mask);
&tangent_mask,
(uint)mr.vert_positions.size(),
(uint)mr.corner_verts.size(),
(uint)mr.vert_normals.size(),
(uint)mr.face_normals.size());
}
}

View File

@ -539,7 +539,11 @@ static void do_multires_bake(MultiresBakeRender *bkr,
/* result */
&dm->loopData,
dm->getNumLoops(dm),
&dm->tangent_mask);
&dm->tangent_mask,
(uint)positions.size(),
(uint)corner_verts.size(),
(uint)vert_normals.size(),
(uint)face_normals.size());
}
pvtangent = static_cast<float *>(DM_get_loop_data_layer(dm, CD_TANGENT));