FBX Export: Base patch for numpy speedup #104447
@ -9,6 +9,7 @@ except:
|
||||
|
||||
from struct import pack
|
||||
import array
|
||||
import numpy as np
|
||||
import zlib
|
||||
|
||||
_BLOCK_SENTINEL_LENGTH = 13
|
||||
@ -112,17 +113,7 @@ class FBXElem:
|
||||
self.props_type.append(data_types.STRING)
|
||||
self.props.append(data)
|
||||
|
||||
def _add_array_helper(self, data, array_type, prop_type):
|
||||
assert(isinstance(data, array.array))
|
||||
assert(data.typecode == array_type)
|
||||
|
||||
length = len(data)
|
||||
|
||||
if _IS_BIG_ENDIAN:
|
||||
data = data[:]
|
||||
data.byteswap()
|
||||
data = data.tobytes()
|
||||
|
||||
def _add_array_helper(self, data, prop_type, length):
|
||||
# mimic behavior of fbxconverter (also common sense)
|
||||
# we could make this configurable.
|
||||
encoding = 0 if len(data) <= 128 else 1
|
||||
@ -138,35 +129,78 @@ class FBXElem:
|
||||
self.props_type.append(prop_type)
|
||||
self.props.append(data)
|
||||
|
||||
def _add_parray_helper(self, data, array_type, prop_type):
|
||||
assert (isinstance(data, array.array))
|
||||
assert (data.typecode == array_type)
|
||||
|
||||
length = len(data)
|
||||
|
||||
if _IS_BIG_ENDIAN:
|
||||
data = data[:]
|
||||
data.byteswap()
|
||||
data = data.tobytes()
|
||||
|
||||
self._add_array_helper(data, prop_type, length)
|
||||
|
||||
def _add_ndarray_helper(self, data, dtype, prop_type):
|
||||
assert (isinstance(data, np.ndarray))
|
||||
assert (data.dtype == dtype)
|
||||
|
||||
length = data.size
|
||||
|
||||
if _IS_BIG_ENDIAN and data.dtype.isnative:
|
||||
data = data.byteswap()
|
||||
data = data.tobytes()
|
||||
|
||||
self._add_array_helper(data, prop_type, length)
|
||||
|
||||
def add_int32_array(self, data):
|
||||
if not isinstance(data, array.array):
|
||||
data = array.array(data_types.ARRAY_INT32, data)
|
||||
self._add_array_helper(data, data_types.ARRAY_INT32, data_types.INT32_ARRAY)
|
||||
if isinstance(data, np.ndarray):
|
||||
self._add_ndarray_helper(data, np.int32, data_types.INT32_ARRAY)
|
||||
else:
|
||||
if not isinstance(data, array.array):
|
||||
data = array.array(data_types.ARRAY_INT32, data)
|
||||
self._add_parray_helper(data, data_types.ARRAY_INT32, data_types.INT32_ARRAY)
|
||||
|
||||
def add_int64_array(self, data):
|
||||
if not isinstance(data, array.array):
|
||||
data = array.array(data_types.ARRAY_INT64, data)
|
||||
self._add_array_helper(data, data_types.ARRAY_INT64, data_types.INT64_ARRAY)
|
||||
if isinstance(data, np.ndarray):
|
||||
self._add_ndarray_helper(data, np.int64, data_types.INT64_ARRAY)
|
||||
else:
|
||||
if not isinstance(data, array.array):
|
||||
data = array.array(data_types.ARRAY_INT64, data)
|
||||
self._add_parray_helper(data, data_types.ARRAY_INT64, data_types.INT64_ARRAY)
|
||||
|
||||
def add_float32_array(self, data):
|
||||
if not isinstance(data, array.array):
|
||||
data = array.array(data_types.ARRAY_FLOAT32, data)
|
||||
self._add_array_helper(data, data_types.ARRAY_FLOAT32, data_types.FLOAT32_ARRAY)
|
||||
if isinstance(data, np.ndarray):
|
||||
self._add_ndarray_helper(data, np.float32, data_types.FLOAT32_ARRAY)
|
||||
else:
|
||||
if not isinstance(data, array.array):
|
||||
data = array.array(data_types.ARRAY_FLOAT32, data)
|
||||
self._add_parray_helper(data, data_types.ARRAY_FLOAT32, data_types.FLOAT32_ARRAY)
|
||||
|
||||
def add_float64_array(self, data):
|
||||
if not isinstance(data, array.array):
|
||||
data = array.array(data_types.ARRAY_FLOAT64, data)
|
||||
self._add_array_helper(data, data_types.ARRAY_FLOAT64, data_types.FLOAT64_ARRAY)
|
||||
if isinstance(data, np.ndarray):
|
||||
self._add_ndarray_helper(data, np.float64, data_types.FLOAT64_ARRAY)
|
||||
else:
|
||||
if not isinstance(data, array.array):
|
||||
data = array.array(data_types.ARRAY_FLOAT64, data)
|
||||
self._add_parray_helper(data, data_types.ARRAY_FLOAT64, data_types.FLOAT64_ARRAY)
|
||||
|
||||
def add_bool_array(self, data):
|
||||
if not isinstance(data, array.array):
|
||||
data = array.array(data_types.ARRAY_BOOL, data)
|
||||
self._add_array_helper(data, data_types.ARRAY_BOOL, data_types.BOOL_ARRAY)
|
||||
if isinstance(data, np.ndarray):
|
||||
self._add_ndarray_helper(data, bool, data_types.BOOL_ARRAY)
|
||||
else:
|
||||
if not isinstance(data, array.array):
|
||||
data = array.array(data_types.ARRAY_BOOL, data)
|
||||
self._add_parray_helper(data, data_types.ARRAY_BOOL, data_types.BOOL_ARRAY)
|
||||
|
||||
def add_byte_array(self, data):
|
||||
if not isinstance(data, array.array):
|
||||
data = array.array(data_types.ARRAY_BYTE, data)
|
||||
self._add_array_helper(data, data_types.ARRAY_BYTE, data_types.BYTE_ARRAY)
|
||||
if isinstance(data, np.ndarray):
|
||||
self._add_ndarray_helper(data, np.byte, data_types.BYTE_ARRAY)
|
||||
else:
|
||||
if not isinstance(data, array.array):
|
||||
data = array.array(data_types.ARRAY_BYTE, data)
|
||||
self._add_parray_helper(data, data_types.ARRAY_BYTE, data_types.BYTE_ARRAY)
|
||||
|
||||
# -------------------------
|
||||
# internal helper functions
|
||||
|
@ -6,6 +6,7 @@
|
||||
import array
|
||||
import datetime
|
||||
import math
|
||||
import numpy as np
|
||||
import os
|
||||
import time
|
||||
|
||||
@ -46,9 +47,9 @@ from .fbx_utils import (
|
||||
# Miscellaneous utils.
|
||||
PerfMon,
|
||||
units_blender_to_fbx_factor, units_convertor, units_convertor_iter,
|
||||
matrix4_to_array, similar_values, similar_values_iter,
|
||||
matrix4_to_array, similar_values, similar_values_iter, astype_view_signedness, fast_first_axis_unique,
|
||||
# Mesh transform helpers.
|
||||
vcos_transformed_gen, nors_transformed_gen,
|
||||
vcos_transformed_gen, nors_transformed_gen, vcos_transformed, nors_transformed,
|
||||
# UUID from key.
|
||||
get_fbx_uuid_from_key,
|
||||
# Key generators.
|
||||
|
@ -9,6 +9,7 @@ import time
|
||||
from collections import namedtuple
|
||||
from collections.abc import Iterable
|
||||
from itertools import zip_longest, chain
|
||||
import numpy as np
|
||||
|
||||
import bpy
|
||||
import bpy_extras
|
||||
@ -272,6 +273,246 @@ def nors_transformed_gen(raw_nors, m=None):
|
||||
return gen if m is None else (m @ Vector(v) for v in gen)
|
||||
|
||||
|
||||
def _mat4_vec3_array_multiply(mat4, vec3_array, dtype=None, return_4d=False):
|
||||
"""Multiply a 4d matrix by each 3d vector in an array and return as an array of either 3d or 4d vectors.
|
||||
|
||||
A view of the input array is returned if return_4d=False, the dtype matches the input array and either the matrix is
|
||||
None or, ignoring the last row, is a 3x3 identity matrix with no translation:
|
||||
┌1, 0, 0, 0┐
|
||||
│0, 1, 0, 0│
|
||||
└0, 0, 1, 0┘
|
||||
|
||||
When dtype=None, it defaults to the dtype of the input array."""
|
||||
return_dtype = dtype if dtype is not None else vec3_array.dtype
|
||||
vec3_array = vec3_array.reshape(-1, 3)
|
||||
|
||||
# Multiplying a 4d mathutils.Matrix by a 3d mathutils.Vector implicitly extends the Vector to 4d during the
|
||||
# calculation by appending 1.0 to the Vector and then the 4d result is truncated back to 3d.
|
||||
# Numpy does not do an implicit extension to 4d, so it would have to be done explicitly by extending the entire
|
||||
# vec3_array to 4d.
|
||||
# However, since the w component of the vectors is always 1.0, the last column can be excluded from the
|
||||
# multiplication and then added to every multiplied vector afterwards, which avoids having to make a 4d copy of
|
||||
# vec3_array beforehand.
|
||||
# For a single column vector:
|
||||
# ┌a, b, c, d┐ ┌x┐ ┌ax+by+cz+d┐
|
||||
# │e, f, g, h│ @ │y│ = │ex+fy+gz+h│
|
||||
# │i, j, k, l│ │z│ │ix+jy+kz+l│
|
||||
# └m, n, o, p┘ └1┘ └mx+ny+oz+p┘
|
||||
# ┌a, b, c┐ ┌x┐ ┌d┐ ┌ax+by+cz┐ ┌d┐ ┌ax+by+cz+d┐
|
||||
# │e, f, g│ @ │y│ + │h│ = │ex+fy+gz│ + │h│ = │ex+fy+gz+h│
|
||||
# │i, j, k│ └z┘ │l│ │ix+jy+kz│ │l│ │ix+jy+kz+l│
|
||||
# └m, n, o┘ └p┘ └mx+ny+oz┘ └p┘ └mx+ny+oz+p┘
|
||||
|
||||
# column_vector_multiplication in mathutils_Vector.c uses double precision math for Matrix @ Vector by casting the
|
||||
# matrix's values to double precision and then casts back to single precision when returning the result, so at least
|
||||
# double precision math is always be used to match standard Blender behaviour.
|
||||
math_precision = np.result_type(np.double, vec3_array)
|
||||
|
||||
to_multiply = None
|
||||
to_add = None
|
||||
w_to_set = 1.0
|
||||
if mat4 is not None:
|
||||
mat_np = np.array(mat4, dtype=math_precision)
|
||||
# Identity matrix is compared against to check if any matrix multiplication is required.
|
||||
identity = np.identity(4, dtype=math_precision)
|
||||
if not return_4d:
|
||||
# If returning 3d, the entire last row of the matrix can be ignored because it only affects the w component.
|
||||
mat_np = mat_np[:3]
|
||||
identity = identity[:3]
|
||||
|
||||
# Split mat_np into the columns to multiply and the column to add afterwards.
|
||||
# First 3 columns
|
||||
multiply_columns = mat_np[:, :3]
|
||||
multiply_identity = identity[:, :3]
|
||||
# Last column only
|
||||
add_column = mat_np.T[3]
|
||||
|
||||
# Analyze the split parts of the matrix to figure out if there is anything to multiply and anything to add.
|
||||
if not np.array_equal(multiply_columns, multiply_identity):
|
||||
to_multiply = multiply_columns
|
||||
|
||||
if return_4d and to_multiply is None:
|
||||
# When there's nothing to multiply, the w component of add_column can be set directly into the array because
|
||||
# mx+ny+oz+p becomes 0x+0y+0z+p where p is add_column[3].
|
||||
w_to_set = add_column[3]
|
||||
# Replace add_column with a view of only the translation.
|
||||
add_column = add_column[:3]
|
||||
|
||||
if add_column.any():
|
||||
to_add = add_column
|
||||
|
||||
if to_multiply is None:
|
||||
# If there's anything to add, ensure it's added using the precision being used for math.
|
||||
array_dtype = math_precision if to_add is not None else return_dtype
|
||||
if return_4d:
|
||||
multiplied_vectors = np.empty((len(vec3_array), 4), dtype=array_dtype)
|
||||
multiplied_vectors[:, :3] = vec3_array
|
||||
multiplied_vectors[:, 3] = w_to_set
|
||||
else:
|
||||
# If there's anything to add, ensure a copy is made so that the input vec3_array isn't modified.
|
||||
multiplied_vectors = vec3_array.astype(array_dtype, copy=to_add is not None)
|
||||
else:
|
||||
# Matrix multiplication has the signature (n,k) @ (k,m) -> (n,m).
|
||||
# Where v is the number of vectors in vec3_array and d is the number of vector dimensions to return:
|
||||
# to_multiply has shape (d,3), vec3_array has shape (v,3) and the result should have shape (v,d).
|
||||
# Either vec3_array or to_multiply must be transposed:
|
||||
# Can transpose vec3_array and then transpose the result:
|
||||
# (v,3).T -> (3,v); (d,3) @ (3,v) -> (d,v); (d,v).T -> (v,d)
|
||||
# Or transpose to_multiply and swap the order of multiplication:
|
||||
# (d,3).T -> (3,d); (v,3) @ (3,d) -> (v,d)
|
||||
# There's no, or negligible, performance difference between the two options, however, the result of the latter
|
||||
# will be C contiguous in memory, making it faster to convert to flattened bytes with .tobytes().
|
||||
multiplied_vectors = vec3_array @ to_multiply.T
|
||||
|
||||
if to_add is not None:
|
||||
for axis, to_add_to_axis in zip(multiplied_vectors.T, to_add):
|
||||
if to_add_to_axis != 0:
|
||||
axis += to_add_to_axis
|
||||
|
||||
# Cast to the desired return type before returning.
|
||||
return multiplied_vectors.astype(return_dtype, copy=False)
|
||||
|
||||
|
||||
def vcos_transformed(raw_cos, m=None, dtype=None):
|
||||
return _mat4_vec3_array_multiply(m, raw_cos, dtype)
|
||||
|
||||
|
||||
def nors_transformed(raw_nors, m=None, dtype=None):
|
||||
# Great, now normals are also expected 4D!
|
||||
# XXX Back to 3D normals for now!
|
||||
# return _mat4_vec3_array_multiply(m, raw_nors, dtype, return_4d=True)
|
||||
return _mat4_vec3_array_multiply(m, raw_nors, dtype)
|
||||
|
||||
|
||||
def astype_view_signedness(arr, new_dtype):
|
||||
"""Unsafely views arr as new_dtype if the itemsize and byteorder of arr matches but the signedness does not,
|
||||
otherwise calls np.ndarray.astype with copy=False.
|
||||
|
||||
The benefit of copy=False is that if the array can be safely viewed as the new type, then a view is made, instead of
|
||||
a copy with the new type.
|
||||
|
||||
Unsigned types can't be viewed safely as signed or vice-versa, meaning that a copy would always be made by
|
||||
.astype(..., copy=False).
|
||||
|
||||
This is intended for viewing uintc data (a common Blender C type with variable itemsize, though usually 4 bytes, so
|
||||
uint32) as int32 (a common FBX type), when the itemsizes match."""
|
||||
arr_dtype = arr.dtype
|
||||
|
||||
if not isinstance(new_dtype, np.dtype):
|
||||
# new_dtype could be a type instance or a string, but it needs to be a dtype to compare its itemsize, byteorder
|
||||
# and kind.
|
||||
new_dtype = np.dtype(new_dtype)
|
||||
|
||||
# For simplicity, only dtypes of the same itemsize and byteorder, but opposite signedness, are handled. Everything
|
||||
# else is left to .astype.
|
||||
arr_kind = arr_dtype.kind
|
||||
new_kind = new_dtype.kind
|
||||
if (
|
||||
# Signed and unsigned int are opposite in terms of signedness. Other types don't have signedness.
|
||||
((arr_kind == 'i' and new_kind == 'u') or (arr_kind == 'u' and new_kind == 'i'))
|
||||
and arr_dtype.itemsize == new_dtype.itemsize
|
||||
and arr_dtype.byteorder == new_dtype.byteorder
|
||||
):
|
||||
# new_dtype has opposite signedness and matching itemsize and byteorder, so return a view of the new type.
|
||||
return arr.view(new_dtype)
|
||||
else:
|
||||
return arr.astype(new_dtype, copy=False)
|
||||
|
||||
|
||||
def fast_first_axis_flat(ar):
|
||||
"""Get a flat view (or a copy if a view is not possible) of the input array whereby each element is a single element
|
||||
of a dtype that is fast to sort, sorts according to individual bytes and contains the data for an entire row (and
|
||||
any further dimensions) of the input array.
|
||||
|
||||
Since the dtype of the view could sort in a different order to the dtype of the input array, this isn't typically
|
||||
useful for actual sorting, but it is useful for sorting-based uniqueness, such as np.unique."""
|
||||
# If there are no rows, each element will be viewed as the new dtype.
|
||||
elements_per_row = math.prod(ar.shape[1:])
|
||||
row_itemsize = ar.itemsize * elements_per_row
|
||||
|
||||
# Get a dtype with itemsize that equals row_itemsize.
|
||||
# Integer types sort the fastest, but are only available for specific itemsizes.
|
||||
uint_dtypes_by_itemsize = {1: np.uint8, 2: np.uint16, 4: np.uint32, 8: np.uint64}
|
||||
# Signed/unsigned makes no noticeable speed difference, but using unsigned will result in ordering according to
|
||||
# individual bytes like the other, non-integer types.
|
||||
if row_itemsize in uint_dtypes_by_itemsize:
|
||||
entire_row_dtype = uint_dtypes_by_itemsize[row_itemsize]
|
||||
else:
|
||||
# When using kind='stable' sorting, numpy only uses radix sort with integer types, but it's still
|
||||
# significantly faster to sort by a single item per row instead of multiple row elements or multiple structured
|
||||
# type fields.
|
||||
# Construct a flexible size dtype with matching itemsize.
|
||||
# Should always be 4 because each character in a unicode string is UCS4.
|
||||
str_itemsize = np.dtype((np.str_, 1)).itemsize
|
||||
if row_itemsize % str_itemsize == 0:
|
||||
# Unicode strings seem to be slightly faster to sort than bytes.
|
||||
entire_row_dtype = np.dtype((np.str_, row_itemsize // str_itemsize))
|
||||
else:
|
||||
# Bytes seem to be slightly faster to sort than raw bytes (np.void).
|
||||
entire_row_dtype = np.dtype((np.bytes_, row_itemsize))
|
||||
|
||||
# View each element along the first axis as a single element.
|
||||
# View (or copy if a view is not possible) as flat
|
||||
ar = ar.reshape(-1)
|
||||
# To view as a dtype of different size, the last axis (entire array in NumPy 1.22 and earlier) must be C-contiguous.
|
||||
if row_itemsize != ar.itemsize and not ar.flags.c_contiguous:
|
||||
ar = np.ascontiguousarray(ar)
|
||||
return ar.view(entire_row_dtype)
|
||||
|
||||
|
||||
def fast_first_axis_unique(ar, return_unique=True, return_index=False, return_inverse=False, return_counts=False):
|
||||
"""np.unique with axis=0 but optimised for when the input array has multiple elements per row, and the returned
|
||||
unique array doesn't need to be sorted.
|
||||
|
||||
Arrays with more than one element per row are more costly to sort in np.unique due to being compared one
|
||||
row-element at a time, like comparing tuples.
|
||||
|
||||
By viewing each entire row as a single non-structured element, much faster sorting can be achieved. Since the values
|
||||
are viewed as a different type to their original, this means that the returned array of unique values may not be
|
||||
sorted according to their original type.
|
||||
|
||||
The array of unique values can be excluded from the returned tuple by specifying return_unique=False.
|
||||
|
||||
Float type caveats:
|
||||
All elements of -0.0 in the input array will be replaced with 0.0 to ensure that both values are collapsed into one.
|
||||
NaN values can have lots of different byte representations (e.g. signalling/quiet and custom payloads). Only the
|
||||
duplicates of each unique byte representation will be collapsed into one."""
|
||||
# At least something should always be returned.
|
||||
assert(return_unique or return_index or return_inverse or return_counts)
|
||||
# Only signed integer, unsigned integer and floating-point kinds of data are allowed. Other kinds of data have not
|
||||
# been tested.
|
||||
assert(ar.dtype.kind in "iuf")
|
||||
|
||||
# Floating-point types have different byte representations for -0.0 and 0.0. Collapse them together by replacing all
|
||||
# -0.0 in the input array with 0.0.
|
||||
if ar.dtype.kind == 'f':
|
||||
ar[ar == -0.0] = 0.0
|
||||
|
||||
# It's a bit annoying that the unique array is always calculated even when it might not be needed, but it is
|
||||
# generally insignificant compared to the cost of sorting.
|
||||
result = np.unique(fast_first_axis_flat(ar), return_index=return_index,
|
||||
return_inverse=return_inverse, return_counts=return_counts)
|
||||
|
||||
if return_unique:
|
||||
unique = result[0] if isinstance(result, tuple) else result
|
||||
# View in the original dtype.
|
||||
unique = unique.view(ar.dtype)
|
||||
# Return the same number of elements per row and any extra dimensions per row as the input array.
|
||||
unique.shape = (-1, *ar.shape[1:])
|
||||
if isinstance(result, tuple):
|
||||
return (unique,) + result[1:]
|
||||
else:
|
||||
return unique
|
||||
else:
|
||||
# Remove the first element, the unique array.
|
||||
result = result[1:]
|
||||
if len(result) == 1:
|
||||
# Unpack single element tuples.
|
||||
return result[0]
|
||||
else:
|
||||
return result
|
||||
|
||||
|
||||
# ##### UIDs code. #####
|
||||
|
||||
# ID class (mere int).
|
||||
|
Loading…
Reference in New Issue
Block a user