FBX Import: Speed up parsing by reading entire subtrees into BytesIO #104822

Merged
Bastien Montagne merged 1 commits from Mysteryem/blender-addons:fbx_subtrees_bytesio_pr into main 2023-08-09 10:43:56 +02:00
Showing only changes of commit 5e45e449cd - Show all commits

View File

@ -13,6 +13,7 @@ __all__ = (
from struct import unpack
import array
import zlib
from io import BytesIO
from . import data_types
@ -104,7 +105,7 @@ def init_version(fbx_version):
_BLOCK_SENTINEL_DATA = (b'\0' * _BLOCK_SENTINEL_LENGTH)
def read_elem(read, tell, use_namedtuple):
def read_elem(read, tell, use_namedtuple, tell_file_offset=0):
# [0] the offset at which this block ends
# [1] the number of properties in the scope
# [2] the length of the property list
@ -125,15 +126,58 @@ def read_elem(read, tell, use_namedtuple):
elem_props_data[i] = read_data_dict[data_type](read)
elem_props_type[i] = data_type
if tell() < end_offset:
while tell() < (end_offset - _BLOCK_SENTINEL_LENGTH):
elem_subtree.append(read_elem(read, tell, use_namedtuple))
pos = tell()
local_end_offset = end_offset - tell_file_offset
if pos < local_end_offset:
# The default BufferedReader used when `open()`-ing files in 'rb' mode has to get the raw stream position from
# the OS every time its tell() function is called. This is about 10 times slower than the tell() function of
# BytesIO objects, so reading chunks of bytes from the file into memory at once and exposing them through

Typo: ...into memory _at_ once... ;)

Typo: `...into memory _at_ once...` ;)
# BytesIO can give better performance. We know the total size of each element's subtree so can read entire
# subtrees into memory at a time.
# The "Objects" element's subtree, however, usually makes up most of the file, so we specifically avoid reading
# all its sub-elements into memory at once to reduce memory requirements at the cost of slightly worse
# performance when memory is not a concern.
# If we're currently reading directly from the opened file, then tell_file_offset will be zero.
if tell_file_offset == 0 and elem_id != b"Objects":
block_bytes_remaining = local_end_offset - pos
# Read the entire subtree
sub_elem_bytes = read(block_bytes_remaining)
num_bytes_read = len(sub_elem_bytes)
if num_bytes_read != block_bytes_remaining:
raise IOError("failed to read complete nested block, expected %i bytes, but only got %i"
% (block_bytes_remaining, num_bytes_read))
# BytesIO provides IO API for reading bytes in memory, so we can use the same code as reading bytes directly
# from a file.
f = BytesIO(sub_elem_bytes)
tell = f.tell
read = f.read
# The new `tell` function starts at zero and is offset by `pos` bytes from the start of the file.
start_sub_pos = 0
tell_file_offset = pos
sub_tree_end = block_bytes_remaining - _BLOCK_SENTINEL_LENGTH
else:
# The `tell` function is unchanged, so starts at the value returned by `tell()`, which is still `pos`
# because no reads have been made since then.
start_sub_pos = pos
sub_tree_end = local_end_offset - _BLOCK_SENTINEL_LENGTH
sub_pos = start_sub_pos
while sub_pos < sub_tree_end:
elem_subtree.append(read_elem(read, tell, use_namedtuple, tell_file_offset))
sub_pos = tell()
# At the end of each subtree there should be a sentinel (an empty element with all bytes set to zero).
if read(_BLOCK_SENTINEL_LENGTH) != _BLOCK_SENTINEL_DATA:
raise IOError("failed to read nested block sentinel, "
"expected all bytes to be 0")
if tell() != end_offset:
# Update `pos` for the number of bytes that have been read.
pos += (sub_pos - start_sub_pos) + _BLOCK_SENTINEL_LENGTH
if pos != local_end_offset:
raise IOError("scope length not reached, something is wrong")
args = (elem_id, elem_props_data, elem_props_type, elem_subtree)