2023-02-28 18:03:14 +01:00
3 changed files with 307 additions and 31 deletions
--- a/io_scene_fbx/encode_bin.py
+++ b/io_scene_fbx/encode_bin.py
@ -9,6 +9,7 @@ except:

 from struct import pack
 import array
+import numpy as np
 import zlib

 _BLOCK_SENTINEL_LENGTH = 13
@ -112,17 +113,7 @@ class FBXElem:
        self.props_type.append(data_types.STRING)
        self.props.append(data)

-    def _add_array_helper(self, data, array_type, prop_type):
-        assert(isinstance(data, array.array))
-        assert(data.typecode == array_type)
-
-        length = len(data)
-
-        if _IS_BIG_ENDIAN:
-            data = data[:]
-            data.byteswap()
-        data = data.tobytes()
-
+    def _add_array_helper(self, data, prop_type, length):
        # mimic behavior of fbxconverter (also common sense)
        # we could make this configurable.
        encoding = 0 if len(data) <= 128 else 1
@ -138,35 +129,78 @@ class FBXElem:
        self.props_type.append(prop_type)
        self.props.append(data)

+    def _add_parray_helper(self, data, array_type, prop_type):
+        assert (isinstance(data, array.array))
+        assert (data.typecode == array_type)
+
+        length = len(data)
+
+        if _IS_BIG_ENDIAN:
+            data = data[:]
+            data.byteswap()
+        data = data.tobytes()
+
+        self._add_array_helper(data, prop_type, length)
+
+    def _add_ndarray_helper(self, data, dtype, prop_type):
+        assert (isinstance(data, np.ndarray))
+        assert (data.dtype == dtype)
+
+        length = data.size
+
+        if _IS_BIG_ENDIAN and data.dtype.isnative:
+            data = data.byteswap()
+        data = data.tobytes()
+
+        self._add_array_helper(data, prop_type, length)
+
    def add_int32_array(self, data):
-        if not isinstance(data, array.array):
-            data = array.array(data_types.ARRAY_INT32, data)
-        self._add_array_helper(data, data_types.ARRAY_INT32, data_types.INT32_ARRAY)
+        if isinstance(data, np.ndarray):
+            self._add_ndarray_helper(data, np.int32, data_types.INT32_ARRAY)
+        else:
+            if not isinstance(data, array.array):
+                data = array.array(data_types.ARRAY_INT32, data)
+            self._add_parray_helper(data, data_types.ARRAY_INT32, data_types.INT32_ARRAY)

    def add_int64_array(self, data):
-        if not isinstance(data, array.array):
-            data = array.array(data_types.ARRAY_INT64, data)
-        self._add_array_helper(data, data_types.ARRAY_INT64, data_types.INT64_ARRAY)
+        if isinstance(data, np.ndarray):
+            self._add_ndarray_helper(data, np.int64, data_types.INT64_ARRAY)
+        else:
+            if not isinstance(data, array.array):
+                data = array.array(data_types.ARRAY_INT64, data)
+            self._add_parray_helper(data, data_types.ARRAY_INT64, data_types.INT64_ARRAY)

    def add_float32_array(self, data):
-        if not isinstance(data, array.array):
-            data = array.array(data_types.ARRAY_FLOAT32, data)
-        self._add_array_helper(data, data_types.ARRAY_FLOAT32, data_types.FLOAT32_ARRAY)
+        if isinstance(data, np.ndarray):
+            self._add_ndarray_helper(data, np.float32, data_types.FLOAT32_ARRAY)
+        else:
+            if not isinstance(data, array.array):
+                data = array.array(data_types.ARRAY_FLOAT32, data)
+            self._add_parray_helper(data, data_types.ARRAY_FLOAT32, data_types.FLOAT32_ARRAY)

    def add_float64_array(self, data):
-        if not isinstance(data, array.array):
-            data = array.array(data_types.ARRAY_FLOAT64, data)
-        self._add_array_helper(data, data_types.ARRAY_FLOAT64, data_types.FLOAT64_ARRAY)
+        if isinstance(data, np.ndarray):
+            self._add_ndarray_helper(data, np.float64, data_types.FLOAT64_ARRAY)
+        else:
+            if not isinstance(data, array.array):
+                data = array.array(data_types.ARRAY_FLOAT64, data)
+            self._add_parray_helper(data, data_types.ARRAY_FLOAT64, data_types.FLOAT64_ARRAY)

    def add_bool_array(self, data):
-        if not isinstance(data, array.array):
-            data = array.array(data_types.ARRAY_BOOL, data)
-        self._add_array_helper(data, data_types.ARRAY_BOOL, data_types.BOOL_ARRAY)
+        if isinstance(data, np.ndarray):
+            self._add_ndarray_helper(data, bool, data_types.BOOL_ARRAY)
+        else:
+            if not isinstance(data, array.array):
+                data = array.array(data_types.ARRAY_BOOL, data)
+            self._add_parray_helper(data, data_types.ARRAY_BOOL, data_types.BOOL_ARRAY)

    def add_byte_array(self, data):
-        if not isinstance(data, array.array):
-            data = array.array(data_types.ARRAY_BYTE, data)
-        self._add_array_helper(data, data_types.ARRAY_BYTE, data_types.BYTE_ARRAY)
+        if isinstance(data, np.ndarray):
+            self._add_ndarray_helper(data, np.byte, data_types.BYTE_ARRAY)
+        else:
+            if not isinstance(data, array.array):
+                data = array.array(data_types.ARRAY_BYTE, data)
+            self._add_parray_helper(data, data_types.ARRAY_BYTE, data_types.BYTE_ARRAY)

    # -------------------------
    # internal helper functions
--- a/io_scene_fbx/export_fbx_bin.py
+++ b/io_scene_fbx/export_fbx_bin.py
@ -6,6 +6,7 @@
 import array
 import datetime
 import math
+import numpy as np
 import os
 import time

@ -46,9 +47,9 @@ from .fbx_utils import (
    # Miscellaneous utils.
    PerfMon,
    units_blender_to_fbx_factor, units_convertor, units_convertor_iter,
-    matrix4_to_array, similar_values, similar_values_iter,
+    matrix4_to_array, similar_values, similar_values_iter, astype_view_signedness, fast_first_axis_unique,
    # Mesh transform helpers.
-    vcos_transformed_gen, nors_transformed_gen,
+    vcos_transformed_gen, nors_transformed_gen, vcos_transformed, nors_transformed,
    # UUID from key.
    get_fbx_uuid_from_key,
    # Key generators.
--- a/io_scene_fbx/fbx_utils.py
+++ b/io_scene_fbx/fbx_utils.py
@ -9,6 +9,7 @@ import time
 from collections import namedtuple
 from collections.abc import Iterable
 from itertools import zip_longest, chain
+import numpy as np

 import bpy
 import bpy_extras
@ -272,6 +273,246 @@ def nors_transformed_gen(raw_nors, m=None):
    return gen if m is None else (m @ Vector(v) for v in gen)


+def _mat4_vec3_array_multiply(mat4, vec3_array, dtype=None, return_4d=False):
+    """Multiply a 4d matrix by each 3d vector in an array and return as an array of either 3d or 4d vectors.
+
+    A view of the input array is returned if return_4d=False, the dtype matches the input array and either the matrix is
+    None or, ignoring the last row, is a 3x3 identity matrix with no translation:
+    ┌1, 0, 0, 0┐
+    │0, 1, 0, 0│
+    └0, 0, 1, 0┘
+
+    When dtype=None, it defaults to the dtype of the input array."""
+    return_dtype = dtype if dtype is not None else vec3_array.dtype
+    vec3_array = vec3_array.reshape(-1, 3)
+
+    # Multiplying a 4d mathutils.Matrix by a 3d mathutils.Vector implicitly extends the Vector to 4d during the
+    # calculation by appending 1.0 to the Vector and then the 4d result is truncated back to 3d.
+    # Numpy does not do an implicit extension to 4d, so it would have to be done explicitly by extending the entire
+    # vec3_array to 4d.
+    # However, since the w component of the vectors is always 1.0, the last column can be excluded from the
+    # multiplication and then added to every multiplied vector afterwards, which avoids having to make a 4d copy of
+    # vec3_array beforehand.
+    # For a single column vector:
+    # ┌a, b, c, d┐   ┌x┐   ┌ax+by+cz+d┐
+    # │e, f, g, h│ @ │y│ = │ex+fy+gz+h│
+    # │i, j, k, l│   │z│   │ix+jy+kz+l│
+    # └m, n, o, p┘   └1┘   └mx+ny+oz+p┘
+    # ┌a, b, c┐   ┌x┐   ┌d┐   ┌ax+by+cz┐   ┌d┐   ┌ax+by+cz+d┐
+    # │e, f, g│ @ │y│ + │h│ = │ex+fy+gz│ + │h│ = │ex+fy+gz+h│
+    # │i, j, k│   └z┘   │l│   │ix+jy+kz│   │l│   │ix+jy+kz+l│
+    # └m, n, o┘         └p┘   └mx+ny+oz┘   └p┘   └mx+ny+oz+p┘
+
+    # column_vector_multiplication in mathutils_Vector.c uses double precision math for Matrix @ Vector by casting the
+    # matrix's values to double precision and then casts back to single precision when returning the result, so at least
+    # double precision math is always be used to match standard Blender behaviour.
+    math_precision = np.result_type(np.double, vec3_array)
+
+    to_multiply = None
+    to_add = None
+    w_to_set = 1.0
+    if mat4 is not None:
+        mat_np = np.array(mat4, dtype=math_precision)
+        # Identity matrix is compared against to check if any matrix multiplication is required.
+        identity = np.identity(4, dtype=math_precision)
+        if not return_4d:
+            # If returning 3d, the entire last row of the matrix can be ignored because it only affects the w component.
+            mat_np = mat_np[:3]
+            identity = identity[:3]
+
+        # Split mat_np into the columns to multiply and the column to add afterwards.
+        # First 3 columns
+        multiply_columns = mat_np[:, :3]
+        multiply_identity = identity[:, :3]
+        # Last column only
+        add_column = mat_np.T[3]
+
+        # Analyze the split parts of the matrix to figure out if there is anything to multiply and anything to add.
+        if not np.array_equal(multiply_columns, multiply_identity):
+            to_multiply = multiply_columns
+
+        if return_4d and to_multiply is None:
+            # When there's nothing to multiply, the w component of add_column can be set directly into the array because
+            # mx+ny+oz+p becomes 0x+0y+0z+p where p is add_column[3].
+            w_to_set = add_column[3]
+            # Replace add_column with a view of only the translation.
+            add_column = add_column[:3]
+
+        if add_column.any():
+            to_add = add_column
+
+    if to_multiply is None:
+        # If there's anything to add, ensure it's added using the precision being used for math.
+        array_dtype = math_precision if to_add is not None else return_dtype
+        if return_4d:
+            multiplied_vectors = np.empty((len(vec3_array), 4), dtype=array_dtype)
+            multiplied_vectors[:, :3] = vec3_array
+            multiplied_vectors[:, 3] = w_to_set
+        else:
+            # If there's anything to add, ensure a copy is made so that the input vec3_array isn't modified.
+            multiplied_vectors = vec3_array.astype(array_dtype, copy=to_add is not None)
+    else:
+        # Matrix multiplication has the signature (n,k) @ (k,m) -> (n,m).
+        # Where v is the number of vectors in vec3_array and d is the number of vector dimensions to return:
+        # to_multiply has shape (d,3), vec3_array has shape (v,3) and the result should have shape (v,d).
+        # Either vec3_array or to_multiply must be transposed:
+        # Can transpose vec3_array and then transpose the result:
+        # (v,3).T -> (3,v); (d,3) @ (3,v) -> (d,v); (d,v).T -> (v,d)
+        # Or transpose to_multiply and swap the order of multiplication:
+        # (d,3).T -> (3,d); (v,3) @ (3,d) -> (v,d)
+        # There's no, or negligible, performance difference between the two options, however, the result of the latter
+        # will be C contiguous in memory, making it faster to convert to flattened bytes with .tobytes().
+        multiplied_vectors = vec3_array @ to_multiply.T
+
+    if to_add is not None:
+        for axis, to_add_to_axis in zip(multiplied_vectors.T, to_add):
+            if to_add_to_axis != 0:
+                axis += to_add_to_axis
+
+    # Cast to the desired return type before returning.
+    return multiplied_vectors.astype(return_dtype, copy=False)
+
+
+def vcos_transformed(raw_cos, m=None, dtype=None):
+    return _mat4_vec3_array_multiply(m, raw_cos, dtype)
+
+
+def nors_transformed(raw_nors, m=None, dtype=None):
+    # Great, now normals are also expected 4D!
+    # XXX Back to 3D normals for now!
+    # return _mat4_vec3_array_multiply(m, raw_nors, dtype, return_4d=True)
+    return _mat4_vec3_array_multiply(m, raw_nors, dtype)
+
+
+def astype_view_signedness(arr, new_dtype):
+    """Unsafely views arr as new_dtype if the itemsize and byteorder of arr matches but the signedness does not,
+    otherwise calls np.ndarray.astype with copy=False.
+
+    The benefit of copy=False is that if the array can be safely viewed as the new type, then a view is made, instead of
+    a copy with the new type.
+
+    Unsigned types can't be viewed safely as signed or vice-versa, meaning that a copy would always be made by
+    .astype(..., copy=False).
+
+    This is intended for viewing uintc data (a common Blender C type with variable itemsize, though usually 4 bytes, so
+    uint32) as int32 (a common FBX type), when the itemsizes match."""
+    arr_dtype = arr.dtype
+
+    if not isinstance(new_dtype, np.dtype):
+        # new_dtype could be a type instance or a string, but it needs to be a dtype to compare its itemsize, byteorder
+        # and kind.
+        new_dtype = np.dtype(new_dtype)
+
+    # For simplicity, only dtypes of the same itemsize and byteorder, but opposite signedness, are handled. Everything
+    # else is left to .astype.
+    arr_kind = arr_dtype.kind
+    new_kind = new_dtype.kind
+    if (
+        # Signed and unsigned int are opposite in terms of signedness. Other types don't have signedness.
+        ((arr_kind == 'i' and new_kind == 'u') or (arr_kind == 'u' and new_kind == 'i'))
+        and arr_dtype.itemsize == new_dtype.itemsize
+        and arr_dtype.byteorder == new_dtype.byteorder
+    ):
+        # new_dtype has opposite signedness and matching itemsize and byteorder, so return a view of the new type.
+        return arr.view(new_dtype)
+    else:
+        return arr.astype(new_dtype, copy=False)
+
+
+def fast_first_axis_flat(ar):
+    """Get a flat view (or a copy if a view is not possible) of the input array whereby each element is a single element
+    of a dtype that is fast to sort, sorts according to individual bytes and contains the data for an entire row (and
+    any further dimensions) of the input array.
+
+    Since the dtype of the view could sort in a different order to the dtype of the input array, this isn't typically
+    useful for actual sorting, but it is useful for sorting-based uniqueness, such as np.unique."""
+    # If there are no rows, each element will be viewed as the new dtype.
+    elements_per_row = math.prod(ar.shape[1:])
+    row_itemsize = ar.itemsize * elements_per_row
+
+    # Get a dtype with itemsize that equals row_itemsize.
+    # Integer types sort the fastest, but are only available for specific itemsizes.
+    uint_dtypes_by_itemsize = {1: np.uint8, 2: np.uint16, 4: np.uint32, 8: np.uint64}
+    # Signed/unsigned makes no noticeable speed difference, but using unsigned will result in ordering according to
+    # individual bytes like the other, non-integer types.
+    if row_itemsize in uint_dtypes_by_itemsize:
+        entire_row_dtype = uint_dtypes_by_itemsize[row_itemsize]
+    else:
+        # When using kind='stable' sorting, numpy only uses radix sort with integer types, but it's still
+        # significantly faster to sort by a single item per row instead of multiple row elements or multiple structured
+        # type fields.
+        # Construct a flexible size dtype with matching itemsize.
+        # Should always be 4 because each character in a unicode string is UCS4.
+        str_itemsize = np.dtype((np.str_, 1)).itemsize
+        if row_itemsize % str_itemsize == 0:
+            # Unicode strings seem to be slightly faster to sort than bytes.
+            entire_row_dtype = np.dtype((np.str_, row_itemsize // str_itemsize))
+        else:
+            # Bytes seem to be slightly faster to sort than raw bytes (np.void).
+            entire_row_dtype = np.dtype((np.bytes_, row_itemsize))
+
+    # View each element along the first axis as a single element.
+    # View (or copy if a view is not possible) as flat
+    ar = ar.reshape(-1)
+    # To view as a dtype of different size, the last axis (entire array in NumPy 1.22 and earlier) must be C-contiguous.
+    if row_itemsize != ar.itemsize and not ar.flags.c_contiguous:
+        ar = np.ascontiguousarray(ar)
+    return ar.view(entire_row_dtype)
+
+
+def fast_first_axis_unique(ar, return_unique=True, return_index=False, return_inverse=False, return_counts=False):
+    """np.unique with axis=0 but optimised for when the input array has multiple elements per row, and the returned
+    unique array doesn't need to be sorted.
+
+    Arrays with more than one element per row are more costly to sort in np.unique due to being compared one
+    row-element at a time, like comparing tuples.
+
+    By viewing each entire row as a single non-structured element, much faster sorting can be achieved. Since the values
+    are viewed as a different type to their original, this means that the returned array of unique values may not be
+    sorted according to their original type.
+
+    The array of unique values can be excluded from the returned tuple by specifying return_unique=False.
+
+    Float type caveats:
+    All elements of -0.0 in the input array will be replaced with 0.0 to ensure that both values are collapsed into one.
+    NaN values can have lots of different byte representations (e.g. signalling/quiet and custom payloads). Only the
+    duplicates of each unique byte representation will be collapsed into one."""
+    # At least something should always be returned.
+    assert(return_unique or return_index or return_inverse or return_counts)
+    # Only signed integer, unsigned integer and floating-point kinds of data are allowed. Other kinds of data have not
+    # been tested.
+    assert(ar.dtype.kind in "iuf")
+
+    # Floating-point types have different byte representations for -0.0 and 0.0. Collapse them together by replacing all
+    # -0.0 in the input array with 0.0.
+    if ar.dtype.kind == 'f':
+        ar[ar == -0.0] = 0.0
+
+    # It's a bit annoying that the unique array is always calculated even when it might not be needed, but it is
+    # generally insignificant compared to the cost of sorting.
+    result = np.unique(fast_first_axis_flat(ar), return_index=return_index,
+                       return_inverse=return_inverse, return_counts=return_counts)
+
+    if return_unique:
+        unique = result[0] if isinstance(result, tuple) else result
+        # View in the original dtype.
+        unique = unique.view(ar.dtype)
+        # Return the same number of elements per row and any extra dimensions per row as the input array.
+        unique.shape = (-1, *ar.shape[1:])
+        if isinstance(result, tuple):
+            return (unique,) + result[1:]
+        else:
+            return unique
+    else:
+        # Remove the first element, the unique array.
+        result = result[1:]
+        if len(result) == 1:
+            # Unpack single element tuples.
+            return result[0]
+        else:
+            return result
+
+
 # ##### UIDs code. #####

 # ID class (mere int).