From 0a68da4c8efd5d0793f2808618f7d8406f72b4e3 Mon Sep 17 00:00:00 2001 From: Thomas Barlow Date: Fri, 10 Feb 2023 04:17:12 +0000 Subject: [PATCH] Speed up FBX export of shape keys with numpy Use buffer matching the C type of the "co" property in foreach_get to avoid having to iterate and cast every single element in the C foreach_getset function. Replace use of vcos_transformed_gen mesh transform helper with numpy version. Only get cos of shape keys that are needed, since shape keys that are relative to themselves and have no other shape keys are relative to them can be skipped. ~7-12 times faster for shape keys that are entirely equal ~11-25 times faster for shape keys that are entirely different and not similar ~16-28 times faster for shape keys that are almost entirely different but similar This patch does usually slightly change the exported file because the math for calculating the difference between a shape key and its relative key is done with float64 precision, matching the fbx export type. Beforehand, the difference would be calculated using mathutils.Vector which are single precision float (usually float32). Because numpy.isclose is not symmetrical, this patch can also rarely result in a difference in which shape key cos are considered similar and are therefore not included in the export. Consider the relative difference between 0.5 and 1.0: If 1.0 is considered the reference value there is a 50% difference, but if 0.5 is considered the reference value there is a 100% difference. math.isclose and fbx_utils.similar_values_iter always picks the value with greater magnitude as the reference value, whereas numpy.isclose picks the second argument as the reference value. math.isclose(0.5, 1.0, rel_tol=0.5) => True math.isclose(1.0, 0.5, rel_tol=0.5) => True numpy.isclose(0.5, 1.0, rtol=0.5) => True numpy.isclose(1.0, 0.5, rtol=0.5) => False The relative key of each shape key is used as the reference value for numpy.isclose. --- io_scene_fbx/export_fbx_bin.py | 83 +++++++++++++++++++++------------- io_scene_fbx/fbx_utils.py | 35 ++++++++++++++ 2 files changed, 86 insertions(+), 32 deletions(-) diff --git a/io_scene_fbx/export_fbx_bin.py b/io_scene_fbx/export_fbx_bin.py index cb4a5d868..137de1c5e 100644 --- a/io_scene_fbx/export_fbx_bin.py +++ b/io_scene_fbx/export_fbx_bin.py @@ -11,6 +11,7 @@ import os import time from itertools import zip_longest, chain +from functools import cache if "bpy" in locals(): import importlib @@ -47,7 +48,7 @@ from .fbx_utils import ( # Miscellaneous utils. PerfMon, units_blender_to_fbx_factor, units_convertor, units_convertor_iter, - matrix4_to_array, similar_values, similar_values_iter, astype_view_signedness, fast_first_axis_unique, + matrix4_to_array, similar_values, shape_difference_exclude_similar, astype_view_signedness, fast_first_axis_unique, # Mesh transform helpers. vcos_transformed_gen, vcos_transformed, nors_transformed, # UUID from key. @@ -760,15 +761,19 @@ def fbx_data_mesh_shapes_elements(root, me_obj, me, scene_data, fbx_me_tmpl, fbx for shape, (channel_key, geom_key, shape_verts_co, shape_verts_idx) in shapes.items(): # Use vgroups as weights, if defined. if shape.vertex_group and shape.vertex_group in me_obj.bdata.vertex_groups: - shape_verts_weights = array.array(data_types.ARRAY_FLOAT64, [0.0]) * (len(shape_verts_co) // 3) + shape_verts_weights = np.zeros(len(shape_verts_idx), dtype=np.float64) + # It's slightly faster to iterate and index the underlying memoryview objects + mv_shape_verts_weights = shape_verts_weights.data + mv_shape_verts_idx = shape_verts_idx.data vg_idx = me_obj.bdata.vertex_groups[shape.vertex_group].index - for sk_idx, v_idx in enumerate(shape_verts_idx): + for sk_idx, v_idx in enumerate(mv_shape_verts_idx): for vg in vertices[v_idx].groups: if vg.group == vg_idx: - shape_verts_weights[sk_idx] = vg.weight * 100.0 + mv_shape_verts_weights[sk_idx] = vg.weight break + shape_verts_weights *= 100.0 else: - shape_verts_weights = array.array(data_types.ARRAY_FLOAT64, [100.0]) * (len(shape_verts_co) // 3) + shape_verts_weights = np.full(len(shape_verts_idx), 100.0, dtype=np.float64) channels.append((channel_key, shape, shape_verts_weights)) geom = elem_data_single_int64(root, b"Geometry", get_fbx_uuid_from_key(geom_key)) @@ -784,8 +789,7 @@ def fbx_data_mesh_shapes_elements(root, me_obj, me, scene_data, fbx_me_tmpl, fbx elem_data_single_int32_array(geom, b"Indexes", shape_verts_idx) elem_data_single_float64_array(geom, b"Vertices", shape_verts_co) if write_normals: - elem_data_single_float64_array(geom, b"Normals", - array.array(data_types.ARRAY_FLOAT64, [0.0]) * len(shape_verts_co)) + elem_data_single_float64_array(geom, b"Normals", np.zeros(len(shape_verts_idx) * 3, dtype=np.float64)) # Yiha! BindPose for shapekeys too! Dodecasigh... # XXX Not sure yet whether several bindposes on same mesh are allowed, or not... :/ @@ -2501,6 +2505,18 @@ def fbx_data_from_scene(scene, depsgraph, settings): # ShapeKeys. data_deformers_shape = {} geom_mat_co = settings.global_matrix if settings.bake_space_transform else None + co_bl_dtype = np.single + co_fbx_dtype = np.float64 + idx_fbx_dtype = np.int32 + + def empty_verts_fallbacks(): + """Create fallback arrays for when there are no verts""" + # FBX does not like empty shapes (makes Unity crash e.g.). + # To prevent this, we add a vertex that does nothing, but it keeps the shape key intact + single_vert_co = np.zeros((1, 3), dtype=co_fbx_dtype) + single_vert_idx = np.zeros(1, dtype=idx_fbx_dtype) + return single_vert_co, single_vert_idx + for me_key, me, _free in data_meshes.values(): if not (me.shape_keys and len(me.shape_keys.key_blocks) > 1): # We do not want basis-only relative skeys... continue @@ -2508,41 +2524,44 @@ def fbx_data_from_scene(scene, depsgraph, settings): continue shapes_key = get_blender_mesh_shape_key(me) - # We gather all vcos first, since some skeys may be based on others... - _cos = array.array(data_types.ARRAY_FLOAT64, (0.0,)) * len(me.vertices) * 3 - me.vertices.foreach_get("co", _cos) - v_cos = tuple(vcos_transformed_gen(_cos, geom_mat_co)) - sk_cos = {} - for shape in me.shape_keys.key_blocks[1:]: - shape.data.foreach_get("co", _cos) - sk_cos[shape] = tuple(vcos_transformed_gen(_cos, geom_mat_co)) + sk_base = me.shape_keys.key_blocks[0] + # Get and cache only the cos that we need + @cache + def sk_cos(shape_key): + _cos = np.empty(len(me.vertices) * 3, dtype=co_bl_dtype) + if shape_key == sk_base: + me.vertices.foreach_get("co", _cos) + else: + shape_key.data.foreach_get("co", _cos) + return vcos_transformed(_cos, geom_mat_co, co_fbx_dtype) + for shape in me.shape_keys.key_blocks[1:]: - # Only write vertices really different from org coordinates! - shape_verts_co = [] - shape_verts_idx = [] + # Only write vertices really different from base coordinates! + relative_key = shape.relative_key + if shape == relative_key: + # Shape is its own relative key, so it does nothing + shape_verts_co, shape_verts_idx = empty_verts_fallbacks() + else: + sv_cos = sk_cos(shape) + ref_cos = sk_cos(shape.relative_key) - sv_cos = sk_cos[shape] - ref_cos = v_cos if shape.relative_key == sk_base else sk_cos[shape.relative_key] - for idx, (sv_co, ref_co) in enumerate(zip(sv_cos, ref_cos)): - if similar_values_iter(sv_co, ref_co): - # Note: Maybe this is a bit too simplistic, should we use real shape base here? Though FBX does not - # have this at all... Anyway, this should cover most common cases imho. - continue - shape_verts_co.extend(Vector(sv_co) - Vector(ref_co)) - shape_verts_idx.append(idx) + # Exclude cos similar to ref_cos and get the indices of the cos that remain + shape_verts_co, shape_verts_idx = shape_difference_exclude_similar(sv_cos, ref_cos) - # FBX does not like empty shapes (makes Unity crash e.g.). - # To prevent this, we add a vertex that does nothing, but it keeps the shape key intact - if not shape_verts_co: - shape_verts_co.extend((0, 0, 0)) - shape_verts_idx.append(0) + if not shape_verts_co.size: + shape_verts_co, shape_verts_idx = empty_verts_fallbacks() + else: + # Ensure the indices are of the correct type + shape_verts_idx = astype_view_signedness(shape_verts_idx, idx_fbx_dtype) channel_key, geom_key = get_blender_mesh_shape_channel_key(me, shape) data = (channel_key, geom_key, shape_verts_co, shape_verts_idx) data_deformers_shape.setdefault(me, (me_key, shapes_key, {}))[2][shape] = data + del sk_cos + perfmon.step("FBX export prepare: Wrapping Armatures...") # Armatures! diff --git a/io_scene_fbx/fbx_utils.py b/io_scene_fbx/fbx_utils.py index 816e6b731..327601534 100644 --- a/io_scene_fbx/fbx_utils.py +++ b/io_scene_fbx/fbx_utils.py @@ -260,6 +260,41 @@ def similar_values_iter(v1, v2, e=1e-6): return False return True + +def shape_difference_exclude_similar(sv_cos, ref_cos, e=1e-6): + """Return a tuple of: + the difference between the vertex cos in sv_cos and ref_cos, excluding any that are nearly the same, + and the indices of the vertices that are not nearly the same""" + assert(sv_cos.size == ref_cos.size) + + # Create views of 1 co per row of the arrays, only making copies if needed. + sv_cos = sv_cos.reshape(-1, 3) + ref_cos = ref_cos.reshape(-1, 3) + + # Quick check for equality + if np.array_equal(sv_cos, ref_cos): + # There's no difference between the two arrays. + empty_cos = np.empty((0, 3), dtype=sv_cos.dtype) + empty_indices = np.empty(0, dtype=np.int32) + return empty_cos, empty_indices + + # Note that unlike math.isclose(a,b), np.isclose(a,b) is not symmetrical and the second argument 'b', is + # considered to be the reference value. + # Note that atol=0 will mean that if only one co component being compared is zero, they won't be considered close. + similar_mask = np.isclose(sv_cos, ref_cos, atol=0, rtol=e) + + # A co is only similar if every component in it is similar. + co_similar_mask = np.all(similar_mask, axis=1) + + # Get the indices of cos that are not similar. + not_similar_verts_idx = np.flatnonzero(~co_similar_mask) + + # Subtracting first over the entire arrays and then indexing seems faster than indexing both arrays first and then + # subtracting, until less than about 3% of the cos are being indexed. + difference_cos = (sv_cos - ref_cos)[not_similar_verts_idx] + return difference_cos, not_similar_verts_idx + + def vcos_transformed_gen(raw_cos, m=None): # Note: we could most likely get much better performances with numpy, but will leave this as TODO for now. gen = zip(*(iter(raw_cos),) * 3) -- 2.30.2