Mikktspace tangent calculator optimizations #112256
|
@ -26,7 +26,7 @@
|
|||
#include "util/log.h"
|
||||
#include "util/math.h"
|
||||
|
||||
#include "mikktspace.hh"
|
||||
#include "mikktspace_ref.hh"
|
||||
|
||||
#include "BKE_attribute.hh"
|
||||
#include "BKE_attribute_math.hh"
|
||||
|
@ -229,12 +229,12 @@ static void mikk_compute_tangents(
|
|||
if (is_subd) {
|
||||
MikkMeshWrapper<true> userdata(b_mesh, layer_name, mesh, tangent, tangent_sign);
|
||||
/* Compute tangents. */
|
||||
mikk::Mikktspace(userdata).genTangSpace();
|
||||
mikk::RefMikktspace(userdata).genTangSpace();
|
||||
}
|
||||
else {
|
||||
MikkMeshWrapper<false> userdata(b_mesh, layer_name, mesh, tangent, tangent_sign);
|
||||
/* Compute tangents. */
|
||||
mikk::Mikktspace(userdata).genTangSpace();
|
||||
mikk::RefMikktspace(userdata).genTangSpace();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -77,6 +77,14 @@ class AtomicHashSet {
|
|||
typedef typename std::conditional<isAtomic, std::atomic<KeyT>, KeyT>::type cell_type;
|
||||
std::vector<cell_type> cells_;
|
||||
|
||||
static inline size_t power_of_two(size_t x)
|
||||
{
|
||||
size_t y = 1;
|
||||
while (y < x)
|
||||
y *= 2;
|
||||
return y;
|
||||
}
|
||||
|
||||
public:
|
||||
struct Config {
|
||||
KeyT emptyKey;
|
||||
|
@ -97,7 +105,7 @@ class AtomicHashSet {
|
|||
KeyHash hasher = KeyHash(),
|
||||
KeyEqual equalityChecker = KeyEqual(),
|
||||
const Config &c = Config())
|
||||
: capacity_(size_t(double(maxSize) / c.maxLoadFactor) + 1),
|
||||
: capacity_(power_of_two(size_t(double(maxSize) / c.maxLoadFactor) + 1)),
|
||||
kEmptyKey_(c.emptyKey),
|
||||
hasher_(hasher),
|
||||
equalityChecker_(equalityChecker),
|
||||
|
|
|
@ -0,0 +1,402 @@
|
|||
#pragma once
|
||||
|
||||
namespace mikk {
|
||||
|
||||
/* Only functions using v8sf require AVX2 (and some of those only require AVX).
|
||||
* The rest requires at most SSE4.1, which has been included in all x86 CPUs since
|
||||
* 2013 or so, and may be assumed to exist.
|
||||
*
|
||||
* We must separately test for AVX2 CPU support at runtime, becase Intel
|
||||
* was still making non-AVX2 CPUs as late as 2020. */
|
||||
|
||||
#ifdef __GNUC__
|
||||
# pragma GCC push_options
|
||||
# pragma GCC target("avx2", "sse4.1")
|
||||
#endif
|
||||
Eugene-Kuznetsov marked this conversation as resolved
Outdated
|
||||
|
||||
#ifdef __GNUC__
|
||||
typedef float v4sf __attribute__((vector_size(16)));
|
||||
typedef float v8sf __attribute__((vector_size(32)));
|
||||
|
||||
inline v4sf to_float4(const float3 &x)
|
||||
{
|
||||
return v4sf{x.x, x.y, x.z, 0.0f};
|
||||
}
|
||||
|
||||
inline v4sf to_float4_masked(const float3 &x)
|
||||
{
|
||||
typedef int v4si __attribute__((vector_size(16)));
|
||||
return __builtin_ia32_maskloadps((const v4sf *)&x, v4si{-1, -1, -1, 0});
|
||||
}
|
||||
|
||||
inline float3 to_float3(v4sf x)
|
||||
{
|
||||
return float3(x[0], x[1], x[2]);
|
||||
}
|
||||
inline v8sf from_2xv4sf(v4sf a, v4sf b)
|
||||
{
|
||||
v8sf a2 = __builtin_ia32_ps256_ps(a);
|
||||
return __builtin_ia32_vinsertf128_ps256(a2, b, 1);
|
||||
}
|
||||
|
||||
inline v8sf from_v4sf(v4sf a)
|
||||
{
|
||||
v8sf a2 = __builtin_ia32_ps256_ps(a);
|
||||
return __builtin_ia32_vinsertf128_ps256(a2, a, 1);
|
||||
}
|
||||
|
||||
inline v4sf v4sf_set(float x)
|
||||
{
|
||||
return v4sf{x, x, x, x};
|
||||
}
|
||||
inline v4sf v4sf_setzero()
|
||||
{
|
||||
return v4sf{0, 0, 0, 0};
|
||||
}
|
||||
inline v8sf v8sf_set(float x)
|
||||
{
|
||||
return v8sf{x, x, x, x, x, x, x, x};
|
||||
}
|
||||
inline v8sf v8sf_setzero()
|
||||
{
|
||||
return v8sf{0, 0, 0, 0, 0, 0, 0, 0};
|
||||
}
|
||||
inline v8sf v8sf_set2(float x, float y)
|
||||
{
|
||||
return v8sf{x, x, x, x, y, y, y, y};
|
||||
}
|
||||
|
||||
inline float extract(v4sf v, uint x)
|
||||
{
|
||||
return v[x];
|
||||
}
|
||||
inline float extract(v8sf v, uint x)
|
||||
{
|
||||
return v[x];
|
||||
}
|
||||
|
||||
inline v4sf insert(v4sf v, int x, float y)
|
||||
{
|
||||
v[x] = y;
|
||||
return v;
|
||||
}
|
||||
inline v8sf insert(v8sf v, int x, float y)
|
||||
{
|
||||
v[x] = y;
|
||||
return v;
|
||||
}
|
||||
|
||||
#elif _MSC_VER
|
||||
|
||||
/* Without this line, the compiler may use FMA ops to rewrite float arithmetics.
|
||||
* Sometimes this results in slight changes in numerical results.
|
||||
*
|
||||
* For example,
|
||||
* 'a*b-c*d' might compile into "temp = mul(c*d); out = fmsub(a,b,temp);"
|
||||
*
|
||||
* Here, temp is truncated to float32, but fmsub does everything internally in
|
||||
* float64 precision, so executing this for a=c and b=d no longer produces a zero
|
||||
* (but something on the order of |ab| * 10^-13).
|
||||
*
|
||||
* This would break our reference compatibility, because, e.g., the output of initTriangle()
|
||||
* changes fundamentally if some internal values go from exactly zero to almost zero. */
|
||||
# pragma fp_contract(off)
|
||||
|
||||
typedef __m128 v4sf;
|
||||
typedef __m128i v4si;
|
||||
typedef __m256 v8sf;
|
||||
|
||||
inline v4sf to_float4(const float3 &x)
|
||||
{
|
||||
return _mm_set_ps(0, x.z, x.y, x.x);
|
||||
}
|
||||
inline v4sf to_float4_masked(const float3 &x)
|
||||
{
|
||||
return _mm_maskload_ps(&x.x, _mm_set_epi32(0, -1, -1, -1));
|
||||
}
|
||||
|
||||
inline float3 to_float3(v4sf x)
|
||||
{
|
||||
return float3(x.m128_f32[0], x.m128_f32[1], x.m128_f32[2]);
|
||||
}
|
||||
inline v8sf from_2xv4sf(v4sf a, v4sf b)
|
||||
{
|
||||
return _mm256_set_m128(b, a);
|
||||
}
|
||||
|
||||
inline v8sf from_v4sf(v4sf a)
|
||||
{
|
||||
return _mm256_set_m128(a, a);
|
||||
}
|
||||
|
||||
inline v4sf v4sf_set(float x)
|
||||
{
|
||||
return _mm_set1_ps(x);
|
||||
}
|
||||
inline v4si v4si_set(int x)
|
||||
{
|
||||
return _mm_set1_epi32(x);
|
||||
}
|
||||
inline v4sf v4sf_setzero()
|
||||
{
|
||||
return _mm_setzero_ps();
|
||||
}
|
||||
inline v8sf v8sf_setzero()
|
||||
{
|
||||
return _mm256_setzero_ps();
|
||||
}
|
||||
inline v8sf v8sf_set(float x)
|
||||
{
|
||||
return _mm256_set1_ps(x);
|
||||
}
|
||||
inline v8sf v8sf_set2(float x, float y)
|
||||
{
|
||||
return _mm256_set_ps(y, y, y, y, x, x, x, x);
|
||||
}
|
||||
|
||||
# define __builtin_ia32_dpps _mm_dp_ps
|
||||
# define __builtin_ia32_hsubps _mm_hsub_ps
|
||||
# define __builtin_ia32_maxss _mm_max_ss
|
||||
# define __builtin_ia32_maxps _mm_max_ps
|
||||
# define __builtin_ia32_minps _mm_min_ps
|
||||
# define __builtin_ia32_mulps _mm_mul_ps
|
||||
# define __builtin_ia32_shufps _mm_shuffle_ps
|
||||
# define __builtin_ia32_cmpltps _mm_cmplt_ps
|
||||
# define __builtin_ia32_andps _mm_and_ps
|
||||
# define __builtin_ia32_andnps _mm_andnot_ps
|
||||
# define __builtin_ia32_rsqrtss _mm_rsqrt_ss
|
||||
# define __builtin_ia32_sqrtps _mm_sqrt_ps
|
||||
# define __builtin_ia32_maxps256 _mm256_max_ps
|
||||
# define __builtin_ia32_sqrtps256 _mm256_sqrt_ps
|
||||
# define __builtin_ia32_dpps256 _mm256_dp_ps
|
||||
# define __builtin_ia32_vpermilps256 _mm256_permute_ps
|
||||
# define __builtin_ia32_vextractf128_ps256 _mm256_extractf128_ps
|
||||
# define __builtin_ia32_vperm2f128_ps256 _mm256_permute2f128_ps
|
||||
# define __builtin_ia32_blendps256 _mm256_blend_ps
|
||||
# define __builtin_ia32_permvarsf256 _mm256_permutevar8x32_ps
|
||||
# define __builtin_ia32_cmpgtps _mm_cmpgt_ps
|
||||
# define __builtin_ia32_cmpps256 _mm256_cmp_ps
|
||||
# define __builtin_ia32_andps256 _mm256_and_ps
|
||||
# define __builtin_ia32_andnps256 _mm256_andnot_ps
|
||||
# define __builtin_ia32_haddps256 _mm256_hadd_ps
|
||||
|
||||
inline float extract(__m128 v, uint x)
|
||||
{
|
||||
return v.m128_f32[x];
|
||||
}
|
||||
inline int extract(__m128i v, uint x)
|
||||
{
|
||||
return v.m128i_i32[x];
|
||||
}
|
||||
inline float extract(__m256 v, uint x)
|
||||
{
|
||||
return v.m256_f32[x];
|
||||
}
|
||||
|
||||
inline __m128 insert(__m128 v, int x, float y)
|
||||
{
|
||||
v.m128_f32[x] = y;
|
||||
return v;
|
||||
}
|
||||
inline __m256 insert(__m256 v, int x, float y)
|
||||
{
|
||||
v.m256_f32[x] = y;
|
||||
return v;
|
||||
}
|
||||
|
||||
// GCC provides all these automatically
|
||||
inline v4sf operator*(v4sf a, v4sf b)
|
||||
{
|
||||
return _mm_mul_ps(a, b);
|
||||
}
|
||||
inline v4sf operator*(v4sf a, float b)
|
||||
{
|
||||
return _mm_mul_ps(a, v4sf_set(b));
|
||||
}
|
||||
inline v4sf operator+(float a, v4sf b)
|
||||
{
|
||||
return _mm_add_ps(v4sf_set(a), b);
|
||||
}
|
||||
inline v4sf operator-(float a, v4sf b)
|
||||
{
|
||||
return _mm_sub_ps(v4sf_set(a), b);
|
||||
}
|
||||
inline v8sf operator*(v8sf a, v8sf b)
|
||||
{
|
||||
return _mm256_mul_ps(a, b);
|
||||
}
|
||||
inline v4sf operator-(v4sf a, v4sf b)
|
||||
{
|
||||
return _mm_sub_ps(a, b);
|
||||
}
|
||||
inline v4sf operator+(v4sf a, v4sf b)
|
||||
{
|
||||
return _mm_add_ps(a, b);
|
||||
}
|
||||
inline v4si operator+(v4si a, v4si b)
|
||||
{
|
||||
return _mm_add_epi32(a, b);
|
||||
}
|
||||
inline v8sf operator+(v8sf a, v8sf b)
|
||||
{
|
||||
return _mm256_add_ps(a, b);
|
||||
}
|
||||
inline v8sf operator-(v8sf a, v8sf b)
|
||||
{
|
||||
return _mm256_sub_ps(a, b);
|
||||
}
|
||||
inline v8sf operator/(v8sf a, v8sf b)
|
||||
{
|
||||
return _mm256_div_ps(a, b);
|
||||
}
|
||||
inline v4sf operator/(float a, v4sf b)
|
||||
{
|
||||
return _mm_div_ps(v4sf_set(a), b);
|
||||
}
|
||||
inline v8sf operator/(float a, v8sf b)
|
||||
{
|
||||
return _mm256_div_ps(v8sf_set(a), b);
|
||||
}
|
||||
inline void operator+=(v8sf &a, v8sf b)
|
||||
{
|
||||
a = _mm256_add_ps(a, b);
|
||||
}
|
||||
inline v4si operator*(v4si a, int b)
|
||||
{
|
||||
return _mm_mul_epi32(a, v4si_set(b));
|
||||
}
|
||||
inline v4si operator^(v4si a, v4si b)
|
||||
{
|
||||
return _mm_xor_si128(a, b);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
inline v4sf to_float4(v4sf x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
inline void unpack(v4sf &p0, v4sf &p1, v8sf v)
|
||||
{
|
||||
p0 = __builtin_ia32_vextractf128_ps256(v, 0);
|
||||
p1 = __builtin_ia32_vextractf128_ps256(v, 1);
|
||||
}
|
||||
|
||||
template<int i> inline v4sf bcast(v4sf x)
|
||||
{
|
||||
return __builtin_ia32_shufps(x, x, i * 0x55);
|
||||
}
|
||||
|
||||
inline float dot(const v4sf &a, const v4sf &b)
|
||||
{
|
||||
return extract(__builtin_ia32_dpps(a, b, 255), 0);
|
||||
}
|
||||
|
||||
inline v8sf dot_single(v8sf a, v8sf b)
|
||||
{
|
||||
return __builtin_ia32_dpps256(a, b, 0xFF);
|
||||
}
|
||||
|
||||
inline float dot_inner(const v8sf &a)
|
||||
{
|
||||
return extract(dot_single(a, __builtin_ia32_vperm2f128_ps256(a, a, 1)), 0);
|
||||
}
|
||||
|
||||
/* Potentially somewhat expensive (creates a global constant and adds a memory access, though the
|
||||
* constant may be expected to stay in cache), use sparingly. Sometimes unavoidable since few AVX2
|
||||
* ops send data across the 128-bit boundary */
|
||||
inline v8sf cross_lane_permute(
|
||||
v8sf v, int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7)
|
||||
{
|
||||
#if __GNUC__
|
||||
typedef int v8si __attribute__((vector_size(32)));
|
||||
v8si perm_mask{i0, i1, i2, i3, i4, i5, i6, i7};
|
||||
return __builtin_ia32_permvarsf256(v, perm_mask);
|
||||
#else
|
||||
return __builtin_ia32_permvarsf256(v, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* input: [... i j ..... ], with <i> in position <offset> and <j> in position <offset+1>
|
||||
* output: [ i i i i j j j j ] */
|
||||
template<int offset> inline v8sf unpack4x(v8sf a)
|
||||
{
|
||||
return cross_lane_permute(
|
||||
a, offset, offset, offset, offset, offset + 1, offset + 1, offset + 1, offset + 1);
|
||||
}
|
||||
|
||||
inline float length_squared(const v4sf &x)
|
||||
{
|
||||
return dot(x, x);
|
||||
}
|
||||
inline float length(const v4sf &x)
|
||||
{
|
||||
v4sf v = __builtin_ia32_dpps(x, x, 255);
|
||||
v = __builtin_ia32_sqrtps(v);
|
||||
return extract(v, 0);
|
||||
}
|
||||
|
||||
inline float inv_length(const v4sf &x)
|
||||
{
|
||||
v4sf v = __builtin_ia32_dpps(x, x, 255);
|
||||
v4sf one = v4sf_set(1.0f), zero = v4sf_setzero();
|
||||
v4sf rv = 1.0f / __builtin_ia32_sqrtps(v);
|
||||
v4sf nonzero = __builtin_ia32_cmpgtps(v, zero);
|
||||
rv = __builtin_ia32_andps(nonzero, rv) + __builtin_ia32_andnps(nonzero, one);
|
||||
return extract(rv, 0);
|
||||
}
|
||||
|
||||
/* input: 8 float4's packed into 4 v8sf
|
||||
* output: 8 floats containing inverse lengths of each */
|
||||
inline v8sf inv_length8(v8sf px0, v8sf px1, v8sf px2, v8sf px3)
|
||||
{
|
||||
v8sf v;
|
||||
v = __builtin_ia32_dpps256(px0, px0, 0xF1);
|
||||
v += __builtin_ia32_dpps256(px1, px1, 0xF2);
|
||||
v += __builtin_ia32_dpps256(px2, px2, 0xF4);
|
||||
v += __builtin_ia32_dpps256(px3, px3, 0xF8);
|
||||
|
||||
/* v is [0 2 4 6 1 3 5 7], and we want [0 1 2 3 4 5 6 7] */
|
||||
v = cross_lane_permute(v, 0, 4, 1, 5, 2, 6, 3, 7);
|
||||
|
||||
/* There is an intrinsic for rsqrt, but it has relative accuracy of ~1/1000 */
|
||||
|
||||
v8sf one = v8sf_set(1.0f), zero = v8sf_setzero();
|
||||
v8sf nonzero = __builtin_ia32_cmpps256(v, zero, 4);
|
||||
v8sf rv = 1.0f / __builtin_ia32_sqrtps256(v);
|
||||
/* we must match reference behavior (return 1 if input is zero) or else
|
||||
* we'll have significant mismatches in some situations */
|
||||
return __builtin_ia32_andps256(nonzero, rv) + __builtin_ia32_andnps256(nonzero, one);
|
||||
}
|
||||
|
||||
/* Projects v onto the surface with normal n. */
|
||||
inline v4sf project1(v4sf n, v4sf v)
|
||||
{
|
||||
return v - (n * dot(n, v));
|
||||
}
|
||||
|
||||
/* Projects two packed float3's in v8 onto surfaces in n8. */
|
||||
inline v8sf project2(v8sf n8, v8sf v8)
|
||||
{
|
||||
v8sf d8 = dot_single(n8, v8);
|
||||
return v8 - n8 * d8;
|
||||
}
|
||||
|
||||
inline v4sf fast_acosf_4x(v4sf x)
|
||||
{
|
||||
v4sf f = __builtin_ia32_maxps(x, x * -1.0f);
|
||||
v4sf one = v4sf_set(1.0f), zero = v4sf_setzero();
|
||||
v4sf m = __builtin_ia32_minps(f, one);
|
||||
v4sf a = __builtin_ia32_sqrtps(one - m) *
|
||||
(1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
|
||||
|
||||
v4sf sign = __builtin_ia32_cmpltps(x, zero);
|
||||
return __builtin_ia32_andps(sign, 3.1415926535897932f - a) + __builtin_ia32_andnps(sign, a);
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
# pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
} // namespace mikk
|
|
@ -19,7 +19,7 @@ namespace mikk {
|
|||
|
||||
inline bool not_zero(const float fX)
|
||||
{
|
||||
return fabsf(fX) > FLT_MIN;
|
||||
return (fX > FLT_MIN) || (-fX > FLT_MIN);
|
||||
}
|
||||
|
||||
/* Helpers for (un)packing a 2-bit vertex index and a 30-bit face index to one integer. */
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,880 @@
|
|||
/* SPDX-FileCopyrightText: 2011 Morten S. Mikkelsen
|
||||
* SPDX-FileCopyrightText: 2022 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
/** \file
|
||||
* \ingroup mikktspace
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <unordered_map>
|
||||
|
||||
#ifdef WITH_TBB
|
||||
# include <tbb/parallel_for.h>
|
||||
#endif
|
||||
|
||||
#include "mikk_atomic_hash_set.hh"
|
||||
#include "mikk_float3.hh"
|
||||
#include "mikk_util.hh"
|
||||
|
||||
namespace mikk {
|
||||
|
||||
static constexpr uint UNSET_ENTRY = 0xffffffffu;
|
||||
|
||||
template<typename Mesh> class RefMikktspace {
|
||||
struct Triangle {
|
||||
/* Stores neighboring triangle for group assignment. */
|
||||
std::array<uint, 3> neighbor;
|
||||
/* Stores assigned group of each vertex. */
|
||||
std::array<uint, 3> group;
|
||||
/* Stores vertex indices that make up the triangle. */
|
||||
std::array<uint, 3> vertices;
|
||||
|
||||
/* Computed face tangent, will be accumulated into group. */
|
||||
float3 tangent;
|
||||
|
||||
/* Index of the face that this triangle belongs to. */
|
||||
uint faceIdx;
|
||||
/* Index of the first of this triangle's vertices' TSpaces. */
|
||||
uint tSpaceIdx;
|
||||
|
||||
/* Stores mapping from this triangle's vertices to the original
|
||||
* face's vertices (relevant for quads). */
|
||||
std::array<uint8_t, 3> faceVertex;
|
||||
|
||||
// flags
|
||||
bool markDegenerate : 1;
|
||||
bool quadOneDegenTri : 1;
|
||||
bool groupWithAny : 1;
|
||||
bool orientPreserving : 1;
|
||||
|
||||
Triangle(uint faceIdx_, uint tSpaceIdx_)
|
||||
: tangent{0.0f, 0.0f, 0.0f},
|
||||
faceIdx{faceIdx_},
|
||||
tSpaceIdx{tSpaceIdx_},
|
||||
markDegenerate{false},
|
||||
quadOneDegenTri{false},
|
||||
groupWithAny{true},
|
||||
orientPreserving{false}
|
||||
{
|
||||
neighbor.fill(UNSET_ENTRY);
|
||||
group.fill(UNSET_ENTRY);
|
||||
}
|
||||
|
||||
void setVertices(uint8_t i0, uint8_t i1, uint8_t i2)
|
||||
{
|
||||
faceVertex[0] = i0;
|
||||
faceVertex[1] = i1;
|
||||
faceVertex[2] = i2;
|
||||
vertices[0] = pack_index(faceIdx, i0);
|
||||
vertices[1] = pack_index(faceIdx, i1);
|
||||
vertices[2] = pack_index(faceIdx, i2);
|
||||
}
|
||||
};
|
||||
|
||||
struct Group {
|
||||
float3 tangent;
|
||||
uint vertexRepresentative;
|
||||
bool orientPreserving;
|
||||
|
||||
Group(uint vertexRepresentative_, bool orientPreserving_)
|
||||
: tangent{0.0f},
|
||||
vertexRepresentative{vertexRepresentative_},
|
||||
orientPreserving{orientPreserving_}
|
||||
{
|
||||
}
|
||||
|
||||
void normalizeTSpace()
|
||||
{
|
||||
tangent = tangent.normalize();
|
||||
}
|
||||
|
||||
void accumulateTSpaceAtomic(float3 v_tangent)
|
||||
{
|
||||
float_add_atomic(&tangent.x, v_tangent.x);
|
||||
float_add_atomic(&tangent.y, v_tangent.y);
|
||||
float_add_atomic(&tangent.z, v_tangent.z);
|
||||
}
|
||||
|
||||
void accumulateTSpace(float3 v_tangent)
|
||||
{
|
||||
tangent += v_tangent;
|
||||
}
|
||||
};
|
||||
|
||||
struct TSpace {
|
||||
float3 tangent = float3(1.0f, 0.0f, 0.0f);
|
||||
uint counter = 0;
|
||||
bool orientPreserving = false;
|
||||
|
||||
void accumulateGroup(const Group &group)
|
||||
{
|
||||
assert(counter < 2);
|
||||
|
||||
if (counter == 0) {
|
||||
tangent = group.tangent;
|
||||
}
|
||||
else if (tangent == group.tangent) {
|
||||
// this if is important. Due to floating point precision
|
||||
// averaging when ts0==ts1 will cause a slight difference
|
||||
// which results in tangent space splits later on, so do nothing
|
||||
}
|
||||
else {
|
||||
tangent = (tangent + group.tangent).normalize();
|
||||
}
|
||||
|
||||
counter++;
|
||||
orientPreserving = group.orientPreserving;
|
||||
}
|
||||
};
|
||||
|
||||
Mesh &mesh;
|
||||
|
||||
std::vector<Triangle> triangles;
|
||||
std::vector<TSpace> tSpaces;
|
||||
std::vector<Group> groups;
|
||||
|
||||
uint nrTSpaces, nrFaces, nrTriangles, totalTriangles;
|
||||
|
||||
int nrThreads;
|
||||
bool isParallel;
|
||||
|
||||
public:
|
||||
bool profile = false;
|
||||
bool trace_on = false;
|
||||
RefMikktspace(Mesh &mesh_) : mesh(mesh_) {}
|
||||
|
||||
void genTangSpace()
|
||||
{
|
||||
nrFaces = uint(mesh.GetNumFaces());
|
||||
|
||||
#ifdef WITH_TBB
|
||||
nrThreads = tbb::this_task_arena::max_concurrency();
|
||||
isParallel = (nrThreads > 1) && (nrFaces > 10000);
|
||||
// isParallel = false;
|
||||
#else
|
||||
nrThreads = 1;
|
||||
isParallel = false;
|
||||
#endif
|
||||
// make an initial triangle --> face index list
|
||||
generateInitialVerticesIndexList();
|
||||
|
||||
if (nrTriangles == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// make a welded index list of identical positions and attributes (pos, norm, texc)
|
||||
generateSharedVerticesIndexList();
|
||||
|
||||
// mark all triangle pairs that belong to a quad with only one
|
||||
// good triangle. These need special treatment in degenEpilogue().
|
||||
// Additionally, move all good triangles to the start of
|
||||
// triangles[] without changing order and
|
||||
// put the degenerate triangles last.
|
||||
degenPrologue();
|
||||
|
||||
if (nrTriangles == 0) {
|
||||
// No point in building tangents if there are no non-degenerate triangles, so just zero them
|
||||
tSpaces.resize(nrTSpaces);
|
||||
}
|
||||
else {
|
||||
// evaluate triangle level attributes and neighbor list
|
||||
initTriangle();
|
||||
|
||||
// match up edge pairs
|
||||
buildNeighbors();
|
||||
|
||||
// based on the 4 rules, identify groups based on connectivity
|
||||
build4RuleGroups();
|
||||
|
||||
// make tspaces, each group is split up into subgroups.
|
||||
// Finally a tangent space is made for every resulting subgroup
|
||||
generateTSpaces();
|
||||
|
||||
// degenerate quads with one good triangle will be fixed by copying a space from
|
||||
// the good triangle to the coinciding vertex.
|
||||
// all other degenerate triangles will just copy a space from any good triangle
|
||||
// with the same welded index in vertices[].
|
||||
degenEpilogue();
|
||||
}
|
||||
|
||||
uint index = 0;
|
||||
|
||||
for (uint f = 0; f < nrFaces; f++) {
|
||||
const uint verts = mesh.GetNumVerticesOfFace(f);
|
||||
if (verts != 3 && verts != 4) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// set data
|
||||
for (uint i = 0; i < verts; i++) {
|
||||
const TSpace &tSpace = tSpaces[index++];
|
||||
mesh.SetTangentSpace(f, i, tSpace.tangent, tSpace.orientPreserving);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
template<typename F> void runParallel(uint start, uint end, F func)
|
||||
{
|
||||
#ifdef WITH_TBB
|
||||
if (isParallel) {
|
||||
tbb::parallel_for(start, end, func);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for (uint i = start; i < end; i++) {
|
||||
func(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
float3 getPosition(uint vertexID)
|
||||
{
|
||||
uint f, v;
|
||||
unpack_index(f, v, vertexID);
|
||||
return mesh.GetPosition(f, v);
|
||||
}
|
||||
|
||||
float3 getNormal(uint vertexID)
|
||||
{
|
||||
uint f, v;
|
||||
unpack_index(f, v, vertexID);
|
||||
|
||||
return mesh.GetNormal(f, v);
|
||||
}
|
||||
|
||||
float3 getTexCoord(uint vertexID)
|
||||
{
|
||||
uint f, v;
|
||||
unpack_index(f, v, vertexID);
|
||||
return mesh.GetTexCoord(f, v);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void generateInitialVerticesIndexList()
|
||||
{
|
||||
nrTriangles = 0;
|
||||
for (uint f = 0; f < nrFaces; f++) {
|
||||
const uint verts = mesh.GetNumVerticesOfFace(f);
|
||||
if (verts == 3) {
|
||||
nrTriangles += 1;
|
||||
}
|
||||
else if (verts == 4) {
|
||||
nrTriangles += 2;
|
||||
}
|
||||
}
|
||||
|
||||
triangles.reserve(nrTriangles);
|
||||
|
||||
nrTSpaces = 0;
|
||||
for (uint f = 0; f < nrFaces; f++) {
|
||||
const uint verts = mesh.GetNumVerticesOfFace(f);
|
||||
if (verts != 3 && verts != 4)
|
||||
continue;
|
||||
|
||||
uint tA = uint(triangles.size());
|
||||
triangles.emplace_back(f, nrTSpaces);
|
||||
|
||||
if (verts == 3) {
|
||||
Triangle &triA = triangles[tA];
|
||||
triA.setVertices(0, 1, 2);
|
||||
}
|
||||
else {
|
||||
uint tB = uint(triangles.size());
|
||||
triangles.emplace_back(f, nrTSpaces);
|
||||
Triangle &triA = triangles[tA];
|
||||
Triangle &triB = triangles[tB];
|
||||
|
||||
// need an order independent way to evaluate
|
||||
// tspace on quads. This is done by splitting
|
||||
// along the shortest diagonal.
|
||||
float distSQ_02 = (mesh.GetTexCoord(f, 2) - mesh.GetTexCoord(f, 0)).length_squared();
|
||||
float distSQ_13 = (mesh.GetTexCoord(f, 3) - mesh.GetTexCoord(f, 1)).length_squared();
|
||||
bool quadDiagIs_02;
|
||||
if (distSQ_02 != distSQ_13)
|
||||
quadDiagIs_02 = (distSQ_02 < distSQ_13);
|
||||
else {
|
||||
distSQ_02 = (mesh.GetPosition(f, 2) - mesh.GetPosition(f, 0)).length_squared();
|
||||
distSQ_13 = (mesh.GetPosition(f, 3) - mesh.GetPosition(f, 1)).length_squared();
|
||||
quadDiagIs_02 = !(distSQ_13 < distSQ_02);
|
||||
}
|
||||
|
||||
if (quadDiagIs_02) {
|
||||
triA.setVertices(0, 1, 2);
|
||||
triB.setVertices(0, 2, 3);
|
||||
}
|
||||
else {
|
||||
triA.setVertices(0, 1, 3);
|
||||
triB.setVertices(1, 2, 3);
|
||||
}
|
||||
}
|
||||
|
||||
nrTSpaces += verts;
|
||||
}
|
||||
}
|
||||
|
||||
struct VertexHash {
|
||||
RefMikktspace<Mesh> *mikk;
|
||||
inline uint operator()(const uint &k) const
|
||||
{
|
||||
return hash_float3x3(mikk->getPosition(k), mikk->getNormal(k), mikk->getTexCoord(k));
|
||||
}
|
||||
};
|
||||
|
||||
struct VertexEqual {
|
||||
RefMikktspace<Mesh> *mikk;
|
||||
inline bool operator()(const uint &kA, const uint &kB) const
|
||||
{
|
||||
return mikk->getTexCoord(kA) == mikk->getTexCoord(kB) &&
|
||||
mikk->getNormal(kA) == mikk->getNormal(kB) &&
|
||||
mikk->getPosition(kA) == mikk->getPosition(kB);
|
||||
}
|
||||
};
|
||||
|
||||
/* Merge identical vertices.
|
||||
* To find vertices with identical position, normal and texcoord, we calculate a hash of the 9
|
||||
* values. Then, by sorting based on that hash, identical elements (having identical hashes) will
|
||||
* be moved next to each other. Since there might be hash collisions, the elements of each block
|
||||
* are then compared with each other and duplicates are merged.
|
||||
*/
|
||||
template<bool isAtomic> void generateSharedVerticesIndexList_impl()
|
||||
{
|
||||
uint numVertices = nrTriangles * 3;
|
||||
AtomicHashSet<uint, isAtomic, VertexHash, VertexEqual> set(numVertices, {this}, {this});
|
||||
runParallel(0u, nrTriangles, [&](uint t) {
|
||||
for (uint i = 0; i < 3; i++) {
|
||||
auto res = set.emplace(triangles[t].vertices[i]);
|
||||
if (!res.second) {
|
||||
triangles[t].vertices[i] = res.first;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
void generateSharedVerticesIndexList()
|
||||
{
|
||||
if (isParallel) {
|
||||
generateSharedVerticesIndexList_impl<true>();
|
||||
}
|
||||
else {
|
||||
generateSharedVerticesIndexList_impl<false>();
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////// Degenerate triangles ////////////////////////////////////
|
||||
|
||||
void degenPrologue()
|
||||
{
|
||||
// Mark all degenerate triangles
|
||||
totalTriangles = nrTriangles;
|
||||
std::atomic<uint> degenTriangles(0);
|
||||
runParallel(0u, totalTriangles, [&](uint t) {
|
||||
const float3 p0 = getPosition(triangles[t].vertices[0]);
|
||||
const float3 p1 = getPosition(triangles[t].vertices[1]);
|
||||
const float3 p2 = getPosition(triangles[t].vertices[2]);
|
||||
if (p0 == p1 || p0 == p2 || p1 == p2) // degenerate
|
||||
{
|
||||
triangles[t].markDegenerate = true;
|
||||
degenTriangles.fetch_add(1);
|
||||
}
|
||||
});
|
||||
nrTriangles -= degenTriangles.load();
|
||||
|
||||
if (totalTriangles == nrTriangles) {
|
||||
return;
|
||||
}
|
||||
|
||||
// locate quads with only one good triangle
|
||||
runParallel(0u, totalTriangles - 1, [&](uint t) {
|
||||
Triangle &triangleA = triangles[t], &triangleB = triangles[t + 1];
|
||||
if (triangleA.faceIdx != triangleB.faceIdx) {
|
||||
/* Individual triangle, skip. */
|
||||
return;
|
||||
}
|
||||
if (triangleA.markDegenerate != triangleB.markDegenerate) {
|
||||
triangleA.quadOneDegenTri = true;
|
||||
triangleB.quadOneDegenTri = true;
|
||||
}
|
||||
});
|
||||
|
||||
std::stable_partition(triangles.begin(), triangles.end(), [](const Triangle &tri) {
|
||||
return !tri.markDegenerate;
|
||||
});
|
||||
}
|
||||
|
||||
void degenEpilogue()
|
||||
{
|
||||
if (nrTriangles == totalTriangles) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::unordered_map<uint, uint> goodTriangleMap;
|
||||
for (uint t = 0; t < nrTriangles; t++) {
|
||||
for (uint i = 0; i < 3; i++) {
|
||||
goodTriangleMap.emplace(triangles[t].vertices[i], pack_index(t, i));
|
||||
}
|
||||
}
|
||||
|
||||
// deal with degenerate triangles
|
||||
// punishment for degenerate triangles is O(nrTriangles) extra memory.
|
||||
for (uint t = nrTriangles; t < totalTriangles; t++) {
|
||||
// degenerate triangles on a quad with one good triangle are skipped
|
||||
// here but processed in the next loop
|
||||
if (triangles[t].quadOneDegenTri) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (uint i = 0; i < 3; i++) {
|
||||
const auto entry = goodTriangleMap.find(triangles[t].vertices[i]);
|
||||
if (entry == goodTriangleMap.end()) {
|
||||
// Matching vertex from good triangle is not found.
|
||||
continue;
|
||||
}
|
||||
|
||||
uint tSrc, iSrc;
|
||||
unpack_index(tSrc, iSrc, entry->second);
|
||||
const uint iSrcVert = triangles[tSrc].faceVertex[iSrc];
|
||||
const uint iSrcOffs = triangles[tSrc].tSpaceIdx;
|
||||
const uint iDstVert = triangles[t].faceVertex[i];
|
||||
const uint iDstOffs = triangles[t].tSpaceIdx;
|
||||
// copy tspace
|
||||
tSpaces[iDstOffs + iDstVert] = tSpaces[iSrcOffs + iSrcVert];
|
||||
}
|
||||
}
|
||||
|
||||
// deal with degenerate quads with one good triangle
|
||||
for (uint t = 0; t < nrTriangles; t++) {
|
||||
// this triangle belongs to a quad where the
|
||||
// other triangle is degenerate
|
||||
if (!triangles[t].quadOneDegenTri) {
|
||||
continue;
|
||||
}
|
||||
uint vertFlag = (1u << triangles[t].faceVertex[0]) | (1u << triangles[t].faceVertex[1]) |
|
||||
(1u << triangles[t].faceVertex[2]);
|
||||
uint missingFaceVertex = 0;
|
||||
if ((vertFlag & 2) == 0)
|
||||
missingFaceVertex = 1;
|
||||
else if ((vertFlag & 4) == 0)
|
||||
missingFaceVertex = 2;
|
||||
else if ((vertFlag & 8) == 0)
|
||||
missingFaceVertex = 3;
|
||||
|
||||
uint faceIdx = triangles[t].faceIdx;
|
||||
float3 dstP = mesh.GetPosition(faceIdx, missingFaceVertex);
|
||||
bool found = false;
|
||||
for (uint i = 0; i < 3; i++) {
|
||||
const uint faceVertex = triangles[t].faceVertex[i];
|
||||
const float3 srcP = mesh.GetPosition(faceIdx, faceVertex);
|
||||
if (srcP == dstP) {
|
||||
const uint offset = triangles[t].tSpaceIdx;
|
||||
tSpaces[offset + missingFaceVertex] = tSpaces[offset + faceVertex];
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(found);
|
||||
(void)found;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// returns the texture area times 2
|
||||
float calcTexArea(uint tri)
|
||||
{
|
||||
const float3 t1 = getTexCoord(triangles[tri].vertices[0]);
|
||||
const float3 t2 = getTexCoord(triangles[tri].vertices[1]);
|
||||
const float3 t3 = getTexCoord(triangles[tri].vertices[2]);
|
||||
|
||||
const float t21x = t2.x - t1.x;
|
||||
const float t21y = t2.y - t1.y;
|
||||
const float t31x = t3.x - t1.x;
|
||||
const float t31y = t3.y - t1.y;
|
||||
|
||||
const float signedAreaSTx2 = t21x * t31y - t21y * t31x;
|
||||
return fabsf(signedAreaSTx2);
|
||||
}
|
||||
|
||||
void initTriangle()
|
||||
{
|
||||
// triangles[f].iFlag is cleared in generateInitialVerticesIndexList()
|
||||
// which is called before this function.
|
||||
// printf("initTriangle %d\n", nrTriangles);
|
||||
// evaluate first order derivatives
|
||||
runParallel(0u, nrTriangles, [&](uint t) {
|
||||
Triangle &triangle = triangles[t];
|
||||
// if(t==1844 && nrFaces==923)
|
||||
// printf("Tri: %d Vertices: %d %d %d\n", t, triangle.vertices[0], triangle.vertices[1],
|
||||
// triangle.vertices[2]);
|
||||
|
||||
// initial values
|
||||
const float3 v1 = getPosition(triangle.vertices[0]);
|
||||
const float3 v2 = getPosition(triangle.vertices[1]);
|
||||
const float3 v3 = getPosition(triangle.vertices[2]);
|
||||
const float3 t1 = getTexCoord(triangle.vertices[0]);
|
||||
const float3 t2 = getTexCoord(triangle.vertices[1]);
|
||||
const float3 t3 = getTexCoord(triangle.vertices[2]);
|
||||
|
||||
const float t21x = t2.x - t1.x;
|
||||
const float t21y = t2.y - t1.y;
|
||||
const float t31x = t3.x - t1.x;
|
||||
const float t31y = t3.y - t1.y;
|
||||
const float3 d1 = v2 - v1, d2 = v3 - v1;
|
||||
|
||||
const float signedAreaSTx2 = t21x * t31y - t21y * t31x;
|
||||
const float3 vOs = (t31y * d1) - (t21y * d2); // eq 18
|
||||
const float3 vOt = (-t31x * d1) + (t21x * d2); // eq 19
|
||||
|
||||
triangle.orientPreserving = (signedAreaSTx2 > 0);
|
||||
#if 0
|
||||
if(t==1844 && nrFaces==923)
|
||||
{
|
||||
/*
|
||||
const uint32_t* p1 = (const uint32_t*)&v1.x;
|
||||
const uint32_t* p2 = (const uint32_t*)&v2.x;
|
||||
const uint32_t* p3 = (const uint32_t*)&v3.x;
|
||||
const uint32_t* q1 = (const uint32_t*)&t1.x;
|
||||
const uint32_t* q2 = (const uint32_t*)&t2.x;
|
||||
const uint32_t* q3 = (const uint32_t*)&t3.x;
|
||||
printf("Coord: \n%08x %08x %08x\n%08x %08x %08x\n%08x %08x %08x\n",
|
||||
p1[0], p1[1], p1[2],
|
||||
p2[0], p2[1], p2[2],
|
||||
p3[0], p3[1], p3[2]);
|
||||
printf("Tex: \n%08x %08x %08x\n%08x %08x %08x\n%08x %08x %08x\n",
|
||||
q1[0], q1[1], q1[2],
|
||||
q2[0], q2[1], q2[2],
|
||||
q3[0], q3[1], q3[2]);
|
||||
printf("signedAreaSTx2 %e\n", signedAreaSTx2);
|
||||
*/
|
||||
printf("Coords:\n%f %f %f\n%f %f %f\n%f %f %f\n",
|
||||
v1.x,v1.y,v1.z,
|
||||
v2.x,v2.y,v2.z,
|
||||
v3.x,v3.y,v3.z);
|
||||
printf("Tex:\n%f %f %f\n%f %f %f\n%f %f %f\n",
|
||||
t1.x,t1.y,t1.z,
|
||||
t2.x,t2.y,t2.z,
|
||||
t3.x,t3.y,t3.z);
|
||||
}
|
||||
#endif
|
||||
if (not_zero(signedAreaSTx2)) {
|
||||
const float lenOs2 = vOs.length_squared();
|
||||
const float lenOt2 = vOt.length_squared();
|
||||
// if(t==5858)
|
||||
// printf("len0s2 %e, len0t2 %e\n", lenOs2, lenOt2);
|
||||
const float fS = triangle.orientPreserving ? 1.0f : (-1.0f);
|
||||
if (not_zero(lenOs2))
|
||||
triangle.tangent = vOs * (fS / sqrtf(lenOs2));
|
||||
|
||||
// if this is a good triangle
|
||||
if (not_zero(lenOs2) && not_zero(lenOt2))
|
||||
triangle.groupWithAny = false;
|
||||
}
|
||||
});
|
||||
|
||||
// force otherwise healthy quads to a fixed orientation
|
||||
runParallel(0u, nrTriangles - 1, [&](uint t) {
|
||||
Triangle &triangleA = triangles[t], &triangleB = triangles[t + 1];
|
||||
if (triangleA.faceIdx != triangleB.faceIdx) {
|
||||
// this is not a quad
|
||||
return;
|
||||
}
|
||||
|
||||
// bad triangles should already have been removed by
|
||||
// degenPrologue(), but just in case check that neither are degenerate
|
||||
if (!(triangleA.markDegenerate || triangleB.markDegenerate)) {
|
||||
// if this happens the quad has extremely bad mapping!!
|
||||
if (triangleA.orientPreserving != triangleB.orientPreserving) {
|
||||
bool chooseOrientFirstTri = false;
|
||||
if (triangleB.groupWithAny)
|
||||
chooseOrientFirstTri = true;
|
||||
else if (calcTexArea(t) >= calcTexArea(t + 1))
|
||||
chooseOrientFirstTri = true;
|
||||
|
||||
// force match
|
||||
const uint t0 = chooseOrientFirstTri ? t : (t + 1);
|
||||
const uint t1 = chooseOrientFirstTri ? (t + 1) : t;
|
||||
triangles[t1].orientPreserving = triangles[t0].orientPreserving;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////////// Edges ///////////////////////////////////////////
|
||||
|
||||
struct NeighborShard {
|
||||
struct Entry {
|
||||
Entry(uint32_t key_, uint data_) : key(key_), data(data_) {}
|
||||
uint key, data;
|
||||
};
|
||||
std::vector<Entry> entries;
|
||||
uint id;
|
||||
|
||||
NeighborShard(size_t capacity)
|
||||
{
|
||||
entries.reserve(capacity);
|
||||
}
|
||||
|
||||
void buildNeighbors(RefMikktspace<Mesh> *mikk)
|
||||
{
|
||||
/* Entries are added by iterating over t, so by using a stable sort,
|
||||
* we don't have to compare based on t as well. */
|
||||
{
|
||||
std::vector<Entry> tempEntries(entries.size(), {0, 0});
|
||||
radixsort(entries, tempEntries, [](const Entry &e) { return e.key; });
|
||||
}
|
||||
|
||||
for (uint i = 0; i < entries.size(); i++) {
|
||||
const Entry &a = entries[i];
|
||||
uint tA, iA;
|
||||
unpack_index(tA, iA, a.data);
|
||||
RefMikktspace<Mesh>::Triangle &triA = mikk->triangles[tA];
|
||||
|
||||
if (triA.neighbor[iA] != UNSET_ENTRY) {
|
||||
continue;
|
||||
}
|
||||
|
||||
uint i0A = triA.vertices[iA], i1A = triA.vertices[(iA != 2) ? (iA + 1) : 0];
|
||||
for (uint j = i + 1; j < entries.size(); j++) {
|
||||
const Entry &b = entries[j];
|
||||
uint tB, iB;
|
||||
unpack_index(tB, iB, b.data);
|
||||
RefMikktspace<Mesh>::Triangle &triB = mikk->triangles[tB];
|
||||
|
||||
if (b.key != a.key)
|
||||
break;
|
||||
|
||||
if (triB.neighbor[iB] != UNSET_ENTRY) {
|
||||
continue;
|
||||
}
|
||||
|
||||
uint i1B = triB.vertices[iB], i0B = triB.vertices[(iB != 2) ? (iB + 1) : 0];
|
||||
if (i0A == i0B && i1A == i1B) {
|
||||
triA.neighbor[iA] = tB;
|
||||
triB.neighbor[iB] = tA;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void buildNeighbors()
|
||||
{
|
||||
/* In order to parallelize the processing, we divide the vertices into shards.
|
||||
* Since only vertex pairs with the same key will be checked, we can process
|
||||
* shards independently as long as we ensure that all vertices with the same
|
||||
* key go into the same shard.
|
||||
* This is done by hashing the key to get the shard index of each vertex.
|
||||
*/
|
||||
// TODO: Two-step filling that first counts and then fills? Could be parallel then.
|
||||
uint targetNrShards = isParallel ? uint(4 * nrThreads) : 1;
|
||||
uint nrShards = 1, hashShift = 32;
|
||||
while (nrShards < targetNrShards) {
|
||||
nrShards *= 2;
|
||||
hashShift -= 1;
|
||||
}
|
||||
|
||||
/* Reserve 25% extra to account for variation due to hashing. */
|
||||
size_t reserveSize = size_t(double(3 * nrTriangles) * 1.25 / nrShards);
|
||||
std::vector<NeighborShard> shards(nrShards, {reserveSize});
|
||||
for (uint t = 0; t < nrShards; t++)
|
||||
shards[t].id = t;
|
||||
|
||||
for (uint t = 0; t < nrTriangles; t++) {
|
||||
Triangle &triangle = triangles[t];
|
||||
for (uint i = 0; i < 3; i++) {
|
||||
const uint i0 = triangle.vertices[i];
|
||||
const uint i1 = triangle.vertices[(i != 2) ? (i + 1) : 0];
|
||||
const uint high = std::max(i0, i1), low = std::min(i0, i1);
|
||||
const uint hash = hash_uint3(high, low, 0);
|
||||
/* TODO: Reusing the hash here means less hash space inside each shard.
|
||||
* Computing a second hash with a different seed it probably not worth it? */
|
||||
const uint shard = isParallel ? (hash >> hashShift) : 0;
|
||||
shards[shard].entries.emplace_back(hash, pack_index(t, i));
|
||||
}
|
||||
}
|
||||
|
||||
runParallel(0u, nrShards, [&](uint s) { shards[s].buildNeighbors(this); });
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void assignRecur(const uint t, uint groupId)
|
||||
{
|
||||
if (t == UNSET_ENTRY) {
|
||||
return;
|
||||
}
|
||||
|
||||
Triangle &triangle = triangles[t];
|
||||
Group &group = groups[groupId];
|
||||
|
||||
// track down vertex
|
||||
const uint vertRep = group.vertexRepresentative;
|
||||
uint i = 3;
|
||||
if (triangle.vertices[0] == vertRep)
|
||||
i = 0;
|
||||
else if (triangle.vertices[1] == vertRep)
|
||||
i = 1;
|
||||
else if (triangle.vertices[2] == vertRep)
|
||||
i = 2;
|
||||
assert(i < 3);
|
||||
|
||||
// early out
|
||||
if (triangle.group[i] != UNSET_ENTRY) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (triangle.groupWithAny) {
|
||||
// first to group with a group-with-anything triangle
|
||||
// determines its orientation.
|
||||
// This is the only existing order dependency in the code!!
|
||||
if (triangle.group[0] == UNSET_ENTRY && triangle.group[1] == UNSET_ENTRY &&
|
||||
triangle.group[2] == UNSET_ENTRY)
|
||||
{
|
||||
triangle.orientPreserving = group.orientPreserving;
|
||||
}
|
||||
else {
|
||||
}
|
||||
}
|
||||
|
||||
if (triangle.orientPreserving != group.orientPreserving) {
|
||||
return;
|
||||
}
|
||||
triangle.group[i] = groupId;
|
||||
// if(groupId==0)
|
||||
// printf("triangle %d -> group %d\n", t, groupId);
|
||||
|
||||
const uint t_L = triangle.neighbor[i];
|
||||
const uint t_R = triangle.neighbor[i > 0 ? (i - 1) : 2];
|
||||
if (groupId == 0) {
|
||||
// printf("<2>Propagating group %d to %d, %d\n", groupId, t_L, t_R);
|
||||
}
|
||||
assignRecur(t_L, groupId);
|
||||
assignRecur(t_R, groupId);
|
||||
}
|
||||
|
||||
void build4RuleGroups()
|
||||
{
|
||||
/* NOTE: This could be parallelized by grouping all [t, i] pairs into
|
||||
* shards by hash(triangles[t].vertices[i]). This way, each shard can be processed
|
||||
* independently and in parallel.
|
||||
* However, the `groupWithAny` logic needs special handling (e.g. lock a mutex when
|
||||
* encountering a `groupWithAny` triangle, then sort it out, then unlock and proceed). */
|
||||
for (uint t = 0; t < nrTriangles; t++) {
|
||||
Triangle &triangle = triangles[t];
|
||||
for (uint i = 0; i < 3; i++) {
|
||||
// if not assigned to a group
|
||||
if (triangle.groupWithAny || triangle.group[i] != UNSET_ENTRY) {
|
||||
// if(t==230 || t==231)
|
||||
// printf("<ref> t %d i %d: skipping (groupWithAny %d, group %d, neighbors %d %d
|
||||
// %d)\n",
|
||||
// t, i, triangle.groupWithAny, triangle.group[i], triangle.neighbor[0],
|
||||
// triangle.neighbor[1], triangle.neighbor[2]);
|
||||
continue;
|
||||
}
|
||||
|
||||
const uint newGroupId = uint(groups.size());
|
||||
triangle.group[i] = newGroupId;
|
||||
groups.emplace_back(triangle.vertices[i], bool(triangle.orientPreserving));
|
||||
|
||||
if (newGroupId == 1151 && nrFaces == 923) {
|
||||
// printf("Initializing group %d: tri %d vert %d\n", newGroupId, t, i);
|
||||
}
|
||||
|
||||
const uint t_L = triangle.neighbor[i];
|
||||
const uint t_R = triangle.neighbor[i > 0 ? (i - 1) : 2];
|
||||
assignRecur(t_L, newGroupId);
|
||||
assignRecur(t_R, newGroupId);
|
||||
}
|
||||
}
|
||||
// printf("%d triangles -> %d groups\n", nrTriangles, groups.size());
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<bool atomic> void accumulateTSpaces(uint t)
|
||||
{
|
||||
const Triangle &triangle = triangles[t];
|
||||
// only valid triangles get to add their contribution
|
||||
if (triangle.groupWithAny) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Todo: Vectorize?
|
||||
* Also: Could add special case for flat shading, when all normals are equal half of the fCos
|
||||
* projections and two of the three tangent projections are unnecessary. */
|
||||
std::array<float3, 3> n, p;
|
||||
for (uint i = 0; i < 3; i++) {
|
||||
n[i] = getNormal(triangle.vertices[i]);
|
||||
p[i] = getPosition(triangle.vertices[i]);
|
||||
}
|
||||
|
||||
std::array<float, 3> fCos = {dot(project(n[0], p[1] - p[0]), project(n[0], p[2] - p[0])),
|
||||
dot(project(n[1], p[2] - p[1]), project(n[1], p[0] - p[1])),
|
||||
dot(project(n[2], p[0] - p[2]), project(n[2], p[1] - p[2]))};
|
||||
|
||||
for (uint i = 0; i < 3; i++) {
|
||||
uint groupId = triangle.group[i];
|
||||
if (groupId != UNSET_ENTRY) {
|
||||
float3 tangent = project(n[i], triangle.tangent) *
|
||||
fast_acosf(std::clamp(fCos[i], -1.0f, 1.0f));
|
||||
if constexpr (atomic) {
|
||||
groups[groupId].accumulateTSpaceAtomic(tangent);
|
||||
}
|
||||
else {
|
||||
groups[groupId].accumulateTSpace(tangent);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void generateTSpaces()
|
||||
{
|
||||
if (isParallel) {
|
||||
runParallel(0u, nrTriangles, [&](uint t) { accumulateTSpaces<true>(t); });
|
||||
}
|
||||
else {
|
||||
for (uint t = 0; t < nrTriangles; t++) {
|
||||
accumulateTSpaces<false>(t);
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: Worth parallelizing? Probably not. */
|
||||
for (Group &group : groups) {
|
||||
group.normalizeTSpace();
|
||||
}
|
||||
|
||||
tSpaces.resize(nrTSpaces);
|
||||
|
||||
for (uint t = 0; t < nrTriangles; t++) {
|
||||
Triangle &triangle = triangles[t];
|
||||
for (uint i = 0; i < 3; i++) {
|
||||
const uint offset = triangle.tSpaceIdx;
|
||||
const uint faceVertex = triangle.faceVertex[i];
|
||||
|
||||
uint groupId = triangle.group[i];
|
||||
if (groupId == UNSET_ENTRY) {
|
||||
continue;
|
||||
}
|
||||
const Group group = groups[groupId];
|
||||
assert(triangle.orientPreserving == group.orientPreserving);
|
||||
tSpaces[offset + faceVertex].accumulateGroup(group);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace mikk
|
|
@ -64,7 +64,11 @@ void BKE_mesh_calc_loop_tangent_ex(const float (*vert_positions)[3],
|
|||
/* result */
|
||||
CustomData *loopdata_out,
|
||||
uint loopdata_out_len,
|
||||
short *tangent_mask_curr_p);
|
||||
short *tangent_mask_curr_p,
|
||||
int len_vert_positions,
|
||||
int len_corner_verts,
|
||||
int len_vert_normals,
|
||||
int len_face_normals);
|
||||
|
||||
void BKE_mesh_calc_loop_tangents(Mesh *mesh_eval,
|
||||
bool calc_active_tangent,
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
/* interface */
|
||||
#include "mikktspace.hh"
|
||||
#include "mikktspace_ref.hh"
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Tangent Space Calculation
|
||||
|
@ -147,7 +148,8 @@ static void emDM_calc_loop_tangents_thread(TaskPool *__restrict /*pool*/, void *
|
|||
{
|
||||
SGLSLEditMeshToTangent *mesh_data = static_cast<SGLSLEditMeshToTangent *>(taskdata);
|
||||
|
||||
mikk::Mikktspace<SGLSLEditMeshToTangent> mikk(*mesh_data);
|
||||
mikk::RefMikktspace<SGLSLEditMeshToTangent> mikk(*mesh_data);
|
||||
printf("EditMesh::tangent\n");
|
||||
mikk.genTangSpace();
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "BKE_report.hh"
|
||||
|
||||
#include "mikktspace.hh"
|
||||
#include "mikktspace_ref.hh"
|
||||
|
||||
#include "BLI_strict_flags.h" /* Keep last. */
|
||||
|
||||
|
@ -75,6 +76,8 @@ struct BKEMeshToTangent {
|
|||
const float (*corner_normals)[3]; /* loops' normals */
|
||||
float (*tangents)[4]; /* output tangents */
|
||||
int num_faces; /* number of polygons */
|
||||
size_t tangent_len;
|
||||
int num_face_as_quad_map;
|
||||
};
|
||||
|
||||
void BKE_mesh_calc_loop_tangent_single_ex(const float (*vert_positions)[3],
|
||||
|
@ -83,7 +86,7 @@ void BKE_mesh_calc_loop_tangent_single_ex(const float (*vert_positions)[3],
|
|||
float (*r_looptangent)[4],
|
||||
const float (*corner_normals)[3],
|
||||
const float (*loop_uvs)[2],
|
||||
const int /*numLoops*/,
|
||||
const int numLoops,
|
||||
const blender::OffsetIndices<int> faces,
|
||||
ReportList *reports)
|
||||
{
|
||||
|
@ -96,8 +99,9 @@ void BKE_mesh_calc_loop_tangent_single_ex(const float (*vert_positions)[3],
|
|||
mesh_to_tangent.corner_normals = corner_normals;
|
||||
mesh_to_tangent.tangents = r_looptangent;
|
||||
mesh_to_tangent.num_faces = int(faces.size());
|
||||
mesh_to_tangent.tangent_len = (size_t)numLoops * 4;
|
||||
|
||||
mikk::Mikktspace<BKEMeshToTangent> mikk(mesh_to_tangent);
|
||||
mikk::RefMikktspace<BKEMeshToTangent> mikk(mesh_to_tangent);
|
||||
|
||||
/* First check we do have a tris/quads only mesh. */
|
||||
for (const int64_t i : faces.index_range()) {
|
||||
|
@ -200,6 +204,68 @@ struct SGLSLMeshToTangent {
|
|||
return uint(tri[int(vert_num)]);
|
||||
}
|
||||
|
||||
mikk::float3 GetPositionDirect(const uint loop_index)
|
||||
{
|
||||
return mikk::float3(positions[corner_verts[loop_index]]);
|
||||
}
|
||||
|
||||
inline mikk::float3 GetTexCoordDirect(const uint loop_index)
|
||||
{
|
||||
if (mloopuv != nullptr) {
|
||||
const float2 &uv = mloopuv[loop_index];
|
||||
return mikk::float3(uv[0], uv[1], 1.0f);
|
||||
}
|
||||
const float *l_orco = orco[corner_verts[loop_index]];
|
||||
float u, v;
|
||||
map_to_sphere(&u, &v, l_orco[0], l_orco[1], l_orco[2]);
|
||||
return mikk::float3(u, v, 1.0f);
|
||||
}
|
||||
|
||||
inline mikk::float3 GetNormalDirect(const int face_index, const uint loop_index)
|
||||
{
|
||||
blender::int3 tri;
|
||||
if (precomputedLoopNormals) {
|
||||
return mikk::float3(precomputedLoopNormals[loop_index]);
|
||||
}
|
||||
if (!sharp_faces.is_empty() && sharp_faces[face_index]) { /* flat */
|
||||
if (precomputedFaceNormals) {
|
||||
return mikk::float3(precomputedFaceNormals[face_index]);
|
||||
}
|
||||
#ifdef USE_TRI_DETECT_QUADS
|
||||
const blender::IndexRange face = faces[face_index];
|
||||
float normal[3];
|
||||
if (face.size() == 4) {
|
||||
normal_quad_v3(normal,
|
||||
positions[corner_verts[face[0]]],
|
||||
positions[corner_verts[face[1]]],
|
||||
positions[corner_verts[face[2]]],
|
||||
positions[corner_verts[face[3]]]);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
normal_tri_v3(normal,
|
||||
positions[corner_verts[tri[0]]],
|
||||
positions[corner_verts[tri[1]]],
|
||||
positions[corner_verts[tri[2]]]);
|
||||
}
|
||||
return mikk::float3(normal);
|
||||
}
|
||||
return mikk::float3(vert_normals[corner_verts[loop_index]]);
|
||||
}
|
||||
|
||||
uint GetStoreIndex(const uint face_num, const uint vert_num)
|
||||
{
|
||||
blender::int3 tri;
|
||||
int face_index;
|
||||
return GetLoop(face_num, vert_num, tri, face_index);
|
||||
}
|
||||
|
||||
void SetTangentSpaceDirect(const uint loop_index, mikk::float3 T, bool orientation)
|
||||
{
|
||||
copy_v4_fl4(tangent[loop_index], T.x, T.y, T.z, orientation ? 1.0f : -1.0f);
|
||||
}
|
||||
|
||||
mikk::float3 GetPosition(const uint face_num, const uint vert_num)
|
||||
{
|
||||
blender::int3 tri;
|
||||
|
@ -280,6 +346,12 @@ struct SGLSLMeshToTangent {
|
|||
float (*tangent)[4]; /* destination */
|
||||
blender::Span<bool> sharp_faces;
|
||||
int numTessFaces;
|
||||
size_t tangent_len;
|
||||
|
||||
int len_corner_verts;
|
||||
int len_positions;
|
||||
int len_face_normals;
|
||||
int len_vert_normals;
|
||||
|
||||
#ifdef USE_TRI_DETECT_QUADS
|
||||
/* map from 'fake' face index to corner_tris,
|
||||
|
@ -292,7 +364,7 @@ struct SGLSLMeshToTangent {
|
|||
static void DM_calc_loop_tangents_thread(TaskPool *__restrict /*pool*/, void *taskdata)
|
||||
{
|
||||
SGLSLMeshToTangent *mesh_data = static_cast<SGLSLMeshToTangent *>(taskdata);
|
||||
|
||||
// mikk::RefMikktspace<SGLSLMeshToTangent> mikk(*mesh_data);
|
||||
mikk::Mikktspace<SGLSLMeshToTangent> mikk(*mesh_data);
|
||||
mikk.genTangSpace();
|
||||
}
|
||||
|
@ -392,7 +464,6 @@ void BKE_mesh_calc_loop_tangent_ex(const float (*vert_positions)[3],
|
|||
const int *corner_tri_faces,
|
||||
const uint corner_tris_len,
|
||||
const blender::Span<bool> sharp_faces,
|
||||
|
||||
const CustomData *loopdata,
|
||||
bool calc_active_tangent,
|
||||
const char (*tangent_names)[MAX_CUSTOMDATA_LAYER_NAME],
|
||||
|
@ -404,7 +475,12 @@ void BKE_mesh_calc_loop_tangent_ex(const float (*vert_positions)[3],
|
|||
/* result */
|
||||
CustomData *loopdata_out,
|
||||
const uint loopdata_out_len,
|
||||
short *tangent_mask_curr_p)
|
||||
short *tangent_mask_curr_p,
|
||||
|
||||
int len_vert_positions,
|
||||
int len_corner_verts,
|
||||
[[maybe_unused]] int len_vert_normals,
|
||||
int len_face_normals)
|
||||
{
|
||||
int act_uv_n = -1;
|
||||
int ren_uv_n = -1;
|
||||
|
@ -508,6 +584,11 @@ void BKE_mesh_calc_loop_tangent_ex(const float (*vert_positions)[3],
|
|||
mesh2tangent->mloopuv = static_cast<const float2 *>(CustomData_get_layer_named(
|
||||
loopdata, CD_PROP_FLOAT2, loopdata_out->layers[index].name));
|
||||
|
||||
mesh2tangent->len_corner_verts = len_corner_verts;
|
||||
mesh2tangent->len_positions = len_vert_positions;
|
||||
mesh2tangent->len_face_normals = len_face_normals;
|
||||
mesh2tangent->tangent_len = loopdata_out_len;
|
||||
|
||||
/* Fill the resulting tangent_mask */
|
||||
if (!mesh2tangent->mloopuv) {
|
||||
mesh2tangent->orco = vert_orco;
|
||||
|
@ -591,9 +672,10 @@ void BKE_mesh_calc_loop_tangents(Mesh *mesh_eval,
|
|||
/* may be nullptr */
|
||||
static_cast<const float(*)[3]>(CustomData_get_layer(&mesh_eval->vert_data, CD_ORCO)),
|
||||
/* result */
|
||||
&mesh_eval->corner_data,
|
||||
uint(mesh_eval->corners_num),
|
||||
&tangent_mask);
|
||||
(int)me_eval->vert_positions().size(),
|
||||
(int)me_eval->corner_verts().size(),
|
||||
(int)me_eval->vert_normals().size(),
|
||||
(int)me_eval->face_normals().size());
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
|
|
@ -137,7 +137,11 @@ static void extract_tan_init_common(const MeshRenderData &mr,
|
|||
orco,
|
||||
r_loop_data,
|
||||
mr.corner_verts.size(),
|
||||
&tangent_mask);
|
||||
&tangent_mask,
|
||||
(uint)mr.vert_positions.size(),
|
||||
(uint)mr.corner_verts.size(),
|
||||
(uint)mr.vert_normals.size(),
|
||||
(uint)mr.face_normals.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -539,7 +539,11 @@ static void do_multires_bake(MultiresBakeRender *bkr,
|
|||
/* result */
|
||||
&dm->loopData,
|
||||
dm->getNumLoops(dm),
|
||||
&dm->tangent_mask);
|
||||
&dm->tangent_mask,
|
||||
(uint)positions.size(),
|
||||
(uint)corner_verts.size(),
|
||||
(uint)vert_normals.size(),
|
||||
(uint)face_normals.size());
|
||||
}
|
||||
|
||||
pvtangent = static_cast<float *>(DM_get_loop_data_layer(dm, CD_TANGENT));
|
||||
|
|
Loading…
Reference in New Issue
left over debug print