From 5e45e449cd35822f5b8824412f25e7f1c3564658 Mon Sep 17 00:00:00 2001
From: Thomas Barlow <github@mysterymayhem.co.uk>
Date: Fri, 30 Jun 2023 17:04:19 +0100
Subject: [PATCH] FBX Import: Speed up parsing by reading entire subtrees into
 BytesIO

Python's default BufferedReader used with open() has to get the raw
stream position from the OS on every tell() call. This is about 10 times
slower than the tell() function of BytesIO objects. Because the number
of bytes in each FBX element's subtree is known ahead of time, entire
subtrees of bytes can be read at once and exposed through IO API as
BytesIO for a performance increase.

The "Objects" element's subtree is an exception and is not read all at
once because it usually makes up the vast majority of the file size.
Reading what could be a huge number of bytes into memory at once could
impact performance on systems with low free memory, especially because
those bytes won't be freed until they have been entirely parsed.

A small amount of refactoring has also been done to reduce the number of
required tell() calls.

This reduces the time taken to parse fbx files to about 88% to 90% of
the original time in most cases. Array decompression makes up about half
of the time taken currently, but this could be multithreaded, in which
case, this patch would reduce the time to about 75% to 80% instead.

Imported files containing lots of very small or non-nested subtrees
within the "Objects" element's subtree won't see much, if any effect
from this patch, which can happen with bone animations with many
animated bones.
---
 io_scene_fbx/parse_fbx.py | 54 +++++++++++++++++++++++++++++++++++----
 1 file changed, 49 insertions(+), 5 deletions(-)

diff --git a/io_scene_fbx/parse_fbx.py b/io_scene_fbx/parse_fbx.py
index 154bbf494..587cbc8a5 100644
--- a/io_scene_fbx/parse_fbx.py
+++ b/io_scene_fbx/parse_fbx.py
@@ -13,6 +13,7 @@ __all__ = (
 from struct import unpack
 import array
 import zlib
+from io import BytesIO
 
 from . import data_types
 
@@ -104,7 +105,7 @@ def init_version(fbx_version):
     _BLOCK_SENTINEL_DATA = (b'\0' * _BLOCK_SENTINEL_LENGTH)
 
 
-def read_elem(read, tell, use_namedtuple):
+def read_elem(read, tell, use_namedtuple, tell_file_offset=0):
     # [0] the offset at which this block ends
     # [1] the number of properties in the scope
     # [2] the length of the property list
@@ -125,15 +126,58 @@ def read_elem(read, tell, use_namedtuple):
         elem_props_data[i] = read_data_dict[data_type](read)
         elem_props_type[i] = data_type
 
-    if tell() < end_offset:
-        while tell() < (end_offset - _BLOCK_SENTINEL_LENGTH):
-            elem_subtree.append(read_elem(read, tell, use_namedtuple))
+    pos = tell()
+    local_end_offset = end_offset - tell_file_offset
 
+    if pos < local_end_offset:
+        # The default BufferedReader used when `open()`-ing files in 'rb' mode has to get the raw stream position from
+        # the OS every time its tell() function is called. This is about 10 times slower than the tell() function of
+        # BytesIO objects, so reading chunks of bytes from the file into memory at once and exposing them through
+        # BytesIO can give better performance. We know the total size of each element's subtree so can read entire
+        # subtrees into memory at a time.
+        # The "Objects" element's subtree, however, usually makes up most of the file, so we specifically avoid reading
+        # all its sub-elements into memory at once to reduce memory requirements at the cost of slightly worse
+        # performance when memory is not a concern.
+        # If we're currently reading directly from the opened file, then tell_file_offset will be zero.
+        if tell_file_offset == 0 and elem_id != b"Objects":
+            block_bytes_remaining = local_end_offset - pos
+
+            # Read the entire subtree
+            sub_elem_bytes = read(block_bytes_remaining)
+            num_bytes_read = len(sub_elem_bytes)
+            if num_bytes_read != block_bytes_remaining:
+                raise IOError("failed to read complete nested block, expected %i bytes, but only got %i"
+                              % (block_bytes_remaining, num_bytes_read))
+
+            # BytesIO provides IO API for reading bytes in memory, so we can use the same code as reading bytes directly
+            # from a file.
+            f = BytesIO(sub_elem_bytes)
+            tell = f.tell
+            read = f.read
+            # The new `tell` function starts at zero and is offset by `pos` bytes from the start of the file.
+            start_sub_pos = 0
+            tell_file_offset = pos
+            sub_tree_end = block_bytes_remaining - _BLOCK_SENTINEL_LENGTH
+        else:
+            # The `tell` function is unchanged, so starts at the value returned by `tell()`, which is still `pos`
+            # because no reads have been made since then.
+            start_sub_pos = pos
+            sub_tree_end = local_end_offset - _BLOCK_SENTINEL_LENGTH
+
+        sub_pos = start_sub_pos
+        while sub_pos < sub_tree_end:
+            elem_subtree.append(read_elem(read, tell, use_namedtuple, tell_file_offset))
+            sub_pos = tell()
+
+        # At the end of each subtree there should be a sentinel (an empty element with all bytes set to zero).
         if read(_BLOCK_SENTINEL_LENGTH) != _BLOCK_SENTINEL_DATA:
             raise IOError("failed to read nested block sentinel, "
                           "expected all bytes to be 0")
 
-    if tell() != end_offset:
+        # Update `pos` for the number of bytes that have been read.
+        pos += (sub_pos - start_sub_pos) + _BLOCK_SENTINEL_LENGTH
+
+    if pos != local_end_offset:
         raise IOError("scope length not reached, something is wrong")
 
     args = (elem_id, elem_props_data, elem_props_type, elem_subtree)
-- 
2.30.2