FBX Export: Base patch for numpy speedup #104447

Merged
Bastien Montagne merged 1 commits from Mysteryem/blender-addons:fbx_numpy_base_patch_pr into main 2023-02-28 18:03:14 +01:00
3 changed files with 307 additions and 31 deletions

View File

@ -9,6 +9,7 @@ except:
from struct import pack
import array
import numpy as np
import zlib
_BLOCK_SENTINEL_LENGTH = 13
@ -112,17 +113,7 @@ class FBXElem:
self.props_type.append(data_types.STRING)
self.props.append(data)
def _add_array_helper(self, data, array_type, prop_type):
assert(isinstance(data, array.array))
assert(data.typecode == array_type)
length = len(data)
if _IS_BIG_ENDIAN:
data = data[:]
data.byteswap()
data = data.tobytes()
def _add_array_helper(self, data, prop_type, length):
# mimic behavior of fbxconverter (also common sense)
# we could make this configurable.
encoding = 0 if len(data) <= 128 else 1
@ -138,35 +129,78 @@ class FBXElem:
self.props_type.append(prop_type)
self.props.append(data)
def _add_parray_helper(self, data, array_type, prop_type):
assert (isinstance(data, array.array))
assert (data.typecode == array_type)
length = len(data)
if _IS_BIG_ENDIAN:
data = data[:]
data.byteswap()
data = data.tobytes()
self._add_array_helper(data, prop_type, length)
def _add_ndarray_helper(self, data, dtype, prop_type):
assert (isinstance(data, np.ndarray))
assert (data.dtype == dtype)
length = data.size
if _IS_BIG_ENDIAN and data.dtype.isnative:
data = data.byteswap()
data = data.tobytes()
self._add_array_helper(data, prop_type, length)
def add_int32_array(self, data):
if not isinstance(data, array.array):
data = array.array(data_types.ARRAY_INT32, data)
self._add_array_helper(data, data_types.ARRAY_INT32, data_types.INT32_ARRAY)
if isinstance(data, np.ndarray):
self._add_ndarray_helper(data, np.int32, data_types.INT32_ARRAY)
else:
if not isinstance(data, array.array):
data = array.array(data_types.ARRAY_INT32, data)
self._add_parray_helper(data, data_types.ARRAY_INT32, data_types.INT32_ARRAY)
def add_int64_array(self, data):
if not isinstance(data, array.array):
data = array.array(data_types.ARRAY_INT64, data)
self._add_array_helper(data, data_types.ARRAY_INT64, data_types.INT64_ARRAY)
if isinstance(data, np.ndarray):
self._add_ndarray_helper(data, np.int64, data_types.INT64_ARRAY)
else:
if not isinstance(data, array.array):
data = array.array(data_types.ARRAY_INT64, data)
self._add_parray_helper(data, data_types.ARRAY_INT64, data_types.INT64_ARRAY)
def add_float32_array(self, data):
if not isinstance(data, array.array):
data = array.array(data_types.ARRAY_FLOAT32, data)
self._add_array_helper(data, data_types.ARRAY_FLOAT32, data_types.FLOAT32_ARRAY)
if isinstance(data, np.ndarray):
self._add_ndarray_helper(data, np.float32, data_types.FLOAT32_ARRAY)
else:
if not isinstance(data, array.array):
data = array.array(data_types.ARRAY_FLOAT32, data)
self._add_parray_helper(data, data_types.ARRAY_FLOAT32, data_types.FLOAT32_ARRAY)
def add_float64_array(self, data):
if not isinstance(data, array.array):
data = array.array(data_types.ARRAY_FLOAT64, data)
self._add_array_helper(data, data_types.ARRAY_FLOAT64, data_types.FLOAT64_ARRAY)
if isinstance(data, np.ndarray):
self._add_ndarray_helper(data, np.float64, data_types.FLOAT64_ARRAY)
else:
if not isinstance(data, array.array):
data = array.array(data_types.ARRAY_FLOAT64, data)
self._add_parray_helper(data, data_types.ARRAY_FLOAT64, data_types.FLOAT64_ARRAY)
def add_bool_array(self, data):
if not isinstance(data, array.array):
data = array.array(data_types.ARRAY_BOOL, data)
self._add_array_helper(data, data_types.ARRAY_BOOL, data_types.BOOL_ARRAY)
if isinstance(data, np.ndarray):
self._add_ndarray_helper(data, bool, data_types.BOOL_ARRAY)
else:
if not isinstance(data, array.array):
data = array.array(data_types.ARRAY_BOOL, data)
self._add_parray_helper(data, data_types.ARRAY_BOOL, data_types.BOOL_ARRAY)
def add_byte_array(self, data):
if not isinstance(data, array.array):
data = array.array(data_types.ARRAY_BYTE, data)
self._add_array_helper(data, data_types.ARRAY_BYTE, data_types.BYTE_ARRAY)
if isinstance(data, np.ndarray):
self._add_ndarray_helper(data, np.byte, data_types.BYTE_ARRAY)
else:
if not isinstance(data, array.array):
data = array.array(data_types.ARRAY_BYTE, data)
self._add_parray_helper(data, data_types.ARRAY_BYTE, data_types.BYTE_ARRAY)
# -------------------------
# internal helper functions

View File

@ -6,6 +6,7 @@
import array
import datetime
import math
import numpy as np
import os
import time
@ -46,9 +47,9 @@ from .fbx_utils import (
# Miscellaneous utils.
PerfMon,
units_blender_to_fbx_factor, units_convertor, units_convertor_iter,
matrix4_to_array, similar_values, similar_values_iter,
matrix4_to_array, similar_values, similar_values_iter, astype_view_signedness, fast_first_axis_unique,
# Mesh transform helpers.
vcos_transformed_gen, nors_transformed_gen,
vcos_transformed_gen, nors_transformed_gen, vcos_transformed, nors_transformed,
# UUID from key.
get_fbx_uuid_from_key,
# Key generators.

View File

@ -9,6 +9,7 @@ import time
from collections import namedtuple
from collections.abc import Iterable
from itertools import zip_longest, chain
import numpy as np
import bpy
import bpy_extras
@ -272,6 +273,246 @@ def nors_transformed_gen(raw_nors, m=None):
return gen if m is None else (m @ Vector(v) for v in gen)
def _mat4_vec3_array_multiply(mat4, vec3_array, dtype=None, return_4d=False):
"""Multiply a 4d matrix by each 3d vector in an array and return as an array of either 3d or 4d vectors.
A view of the input array is returned if return_4d=False, the dtype matches the input array and either the matrix is
None or, ignoring the last row, is a 3x3 identity matrix with no translation:
1, 0, 0, 0
0, 1, 0, 0
0, 0, 1, 0
When dtype=None, it defaults to the dtype of the input array."""
return_dtype = dtype if dtype is not None else vec3_array.dtype
vec3_array = vec3_array.reshape(-1, 3)
# Multiplying a 4d mathutils.Matrix by a 3d mathutils.Vector implicitly extends the Vector to 4d during the
# calculation by appending 1.0 to the Vector and then the 4d result is truncated back to 3d.
# Numpy does not do an implicit extension to 4d, so it would have to be done explicitly by extending the entire
# vec3_array to 4d.
# However, since the w component of the vectors is always 1.0, the last column can be excluded from the
# multiplication and then added to every multiplied vector afterwards, which avoids having to make a 4d copy of
# vec3_array beforehand.
# For a single column vector:
# ┌a, b, c, d┐ ┌x┐ ┌ax+by+cz+d┐
# │e, f, g, h│ @ │y│ = │ex+fy+gz+h│
# │i, j, k, l│ │z│ │ix+jy+kz+l│
# └m, n, o, p┘ └1┘ └mx+ny+oz+p┘
# ┌a, b, c┐ ┌x┐ ┌d┐ ┌ax+by+cz┐ ┌d┐ ┌ax+by+cz+d┐
# │e, f, g│ @ │y│ + │h│ = │ex+fy+gz│ + │h│ = │ex+fy+gz+h│
# │i, j, k│ └z┘ │l│ │ix+jy+kz│ │l│ │ix+jy+kz+l│
# └m, n, o┘ └p┘ └mx+ny+oz┘ └p┘ └mx+ny+oz+p┘
# column_vector_multiplication in mathutils_Vector.c uses double precision math for Matrix @ Vector by casting the
# matrix's values to double precision and then casts back to single precision when returning the result, so at least
# double precision math is always be used to match standard Blender behaviour.
math_precision = np.result_type(np.double, vec3_array)
to_multiply = None
to_add = None
w_to_set = 1.0
if mat4 is not None:
mat_np = np.array(mat4, dtype=math_precision)
# Identity matrix is compared against to check if any matrix multiplication is required.
identity = np.identity(4, dtype=math_precision)
if not return_4d:
# If returning 3d, the entire last row of the matrix can be ignored because it only affects the w component.
mat_np = mat_np[:3]
identity = identity[:3]
# Split mat_np into the columns to multiply and the column to add afterwards.
# First 3 columns
multiply_columns = mat_np[:, :3]
multiply_identity = identity[:, :3]
# Last column only
add_column = mat_np.T[3]
# Analyze the split parts of the matrix to figure out if there is anything to multiply and anything to add.
if not np.array_equal(multiply_columns, multiply_identity):
to_multiply = multiply_columns
if return_4d and to_multiply is None:
# When there's nothing to multiply, the w component of add_column can be set directly into the array because
# mx+ny+oz+p becomes 0x+0y+0z+p where p is add_column[3].
w_to_set = add_column[3]
# Replace add_column with a view of only the translation.
add_column = add_column[:3]
if add_column.any():
to_add = add_column
if to_multiply is None:
# If there's anything to add, ensure it's added using the precision being used for math.
array_dtype = math_precision if to_add is not None else return_dtype
if return_4d:
multiplied_vectors = np.empty((len(vec3_array), 4), dtype=array_dtype)
multiplied_vectors[:, :3] = vec3_array
multiplied_vectors[:, 3] = w_to_set
else:
# If there's anything to add, ensure a copy is made so that the input vec3_array isn't modified.
multiplied_vectors = vec3_array.astype(array_dtype, copy=to_add is not None)
else:
# Matrix multiplication has the signature (n,k) @ (k,m) -> (n,m).
# Where v is the number of vectors in vec3_array and d is the number of vector dimensions to return:
# to_multiply has shape (d,3), vec3_array has shape (v,3) and the result should have shape (v,d).
# Either vec3_array or to_multiply must be transposed:
# Can transpose vec3_array and then transpose the result:
# (v,3).T -> (3,v); (d,3) @ (3,v) -> (d,v); (d,v).T -> (v,d)
# Or transpose to_multiply and swap the order of multiplication:
# (d,3).T -> (3,d); (v,3) @ (3,d) -> (v,d)
# There's no, or negligible, performance difference between the two options, however, the result of the latter
# will be C contiguous in memory, making it faster to convert to flattened bytes with .tobytes().
multiplied_vectors = vec3_array @ to_multiply.T
if to_add is not None:
for axis, to_add_to_axis in zip(multiplied_vectors.T, to_add):
if to_add_to_axis != 0:
axis += to_add_to_axis
# Cast to the desired return type before returning.
return multiplied_vectors.astype(return_dtype, copy=False)
def vcos_transformed(raw_cos, m=None, dtype=None):
return _mat4_vec3_array_multiply(m, raw_cos, dtype)
def nors_transformed(raw_nors, m=None, dtype=None):
# Great, now normals are also expected 4D!
# XXX Back to 3D normals for now!
# return _mat4_vec3_array_multiply(m, raw_nors, dtype, return_4d=True)
return _mat4_vec3_array_multiply(m, raw_nors, dtype)
def astype_view_signedness(arr, new_dtype):
"""Unsafely views arr as new_dtype if the itemsize and byteorder of arr matches but the signedness does not,
otherwise calls np.ndarray.astype with copy=False.
The benefit of copy=False is that if the array can be safely viewed as the new type, then a view is made, instead of
a copy with the new type.
Unsigned types can't be viewed safely as signed or vice-versa, meaning that a copy would always be made by
.astype(..., copy=False).
This is intended for viewing uintc data (a common Blender C type with variable itemsize, though usually 4 bytes, so
uint32) as int32 (a common FBX type), when the itemsizes match."""
arr_dtype = arr.dtype
if not isinstance(new_dtype, np.dtype):
# new_dtype could be a type instance or a string, but it needs to be a dtype to compare its itemsize, byteorder
# and kind.
new_dtype = np.dtype(new_dtype)
# For simplicity, only dtypes of the same itemsize and byteorder, but opposite signedness, are handled. Everything
# else is left to .astype.
arr_kind = arr_dtype.kind
new_kind = new_dtype.kind
if (
# Signed and unsigned int are opposite in terms of signedness. Other types don't have signedness.
((arr_kind == 'i' and new_kind == 'u') or (arr_kind == 'u' and new_kind == 'i'))
and arr_dtype.itemsize == new_dtype.itemsize
and arr_dtype.byteorder == new_dtype.byteorder
):
# new_dtype has opposite signedness and matching itemsize and byteorder, so return a view of the new type.
return arr.view(new_dtype)
else:
return arr.astype(new_dtype, copy=False)
def fast_first_axis_flat(ar):
"""Get a flat view (or a copy if a view is not possible) of the input array whereby each element is a single element
of a dtype that is fast to sort, sorts according to individual bytes and contains the data for an entire row (and
any further dimensions) of the input array.
Since the dtype of the view could sort in a different order to the dtype of the input array, this isn't typically
useful for actual sorting, but it is useful for sorting-based uniqueness, such as np.unique."""
# If there are no rows, each element will be viewed as the new dtype.
elements_per_row = math.prod(ar.shape[1:])
row_itemsize = ar.itemsize * elements_per_row
# Get a dtype with itemsize that equals row_itemsize.
# Integer types sort the fastest, but are only available for specific itemsizes.
uint_dtypes_by_itemsize = {1: np.uint8, 2: np.uint16, 4: np.uint32, 8: np.uint64}
# Signed/unsigned makes no noticeable speed difference, but using unsigned will result in ordering according to
# individual bytes like the other, non-integer types.
if row_itemsize in uint_dtypes_by_itemsize:
entire_row_dtype = uint_dtypes_by_itemsize[row_itemsize]
else:
# When using kind='stable' sorting, numpy only uses radix sort with integer types, but it's still
# significantly faster to sort by a single item per row instead of multiple row elements or multiple structured
# type fields.
# Construct a flexible size dtype with matching itemsize.
# Should always be 4 because each character in a unicode string is UCS4.
str_itemsize = np.dtype((np.str_, 1)).itemsize
if row_itemsize % str_itemsize == 0:
# Unicode strings seem to be slightly faster to sort than bytes.
entire_row_dtype = np.dtype((np.str_, row_itemsize // str_itemsize))
else:
# Bytes seem to be slightly faster to sort than raw bytes (np.void).
entire_row_dtype = np.dtype((np.bytes_, row_itemsize))
# View each element along the first axis as a single element.
# View (or copy if a view is not possible) as flat
ar = ar.reshape(-1)
# To view as a dtype of different size, the last axis (entire array in NumPy 1.22 and earlier) must be C-contiguous.
if row_itemsize != ar.itemsize and not ar.flags.c_contiguous:
ar = np.ascontiguousarray(ar)
return ar.view(entire_row_dtype)
def fast_first_axis_unique(ar, return_unique=True, return_index=False, return_inverse=False, return_counts=False):
"""np.unique with axis=0 but optimised for when the input array has multiple elements per row, and the returned
unique array doesn't need to be sorted.
Arrays with more than one element per row are more costly to sort in np.unique due to being compared one
row-element at a time, like comparing tuples.
By viewing each entire row as a single non-structured element, much faster sorting can be achieved. Since the values
are viewed as a different type to their original, this means that the returned array of unique values may not be
sorted according to their original type.
The array of unique values can be excluded from the returned tuple by specifying return_unique=False.
Float type caveats:
All elements of -0.0 in the input array will be replaced with 0.0 to ensure that both values are collapsed into one.
NaN values can have lots of different byte representations (e.g. signalling/quiet and custom payloads). Only the
duplicates of each unique byte representation will be collapsed into one."""
# At least something should always be returned.
assert(return_unique or return_index or return_inverse or return_counts)
# Only signed integer, unsigned integer and floating-point kinds of data are allowed. Other kinds of data have not
# been tested.
assert(ar.dtype.kind in "iuf")
# Floating-point types have different byte representations for -0.0 and 0.0. Collapse them together by replacing all
# -0.0 in the input array with 0.0.
if ar.dtype.kind == 'f':
ar[ar == -0.0] = 0.0
# It's a bit annoying that the unique array is always calculated even when it might not be needed, but it is
# generally insignificant compared to the cost of sorting.
result = np.unique(fast_first_axis_flat(ar), return_index=return_index,
return_inverse=return_inverse, return_counts=return_counts)
if return_unique:
unique = result[0] if isinstance(result, tuple) else result
# View in the original dtype.
unique = unique.view(ar.dtype)
# Return the same number of elements per row and any extra dimensions per row as the input array.
unique.shape = (-1, *ar.shape[1:])
if isinstance(result, tuple):
return (unique,) + result[1:]
else:
return unique
else:
# Remove the first element, the unique array.
result = result[1:]
if len(result) == 1:
# Unpack single element tuples.
return result[0]
else:
return result
# ##### UIDs code. #####
# ID class (mere int).