2024-01-12 21:32:36 +01:00
1 changed files with 17 additions and 181 deletions
--- a/io_scene_fbx/parse_fbx.py
+++ b/io_scene_fbx/parse_fbx.py
@ -14,9 +14,9 @@ from struct import unpack
 import array
 import zlib
 from io import BytesIO
 from contextlib import contextmanager, nullcontext
 from . import data_types
 from .fbx_utils_threading import MultiThreadedTaskConsumer
 # at the end of each nested block, there is a NUL record to indicate
 # that the sub-scope exists (i.e. to distinguish between P: and P : {})
@ -28,15 +28,6 @@ _HEAD_MAGIC = b'Kaydara FBX Binary\x20\x20\x00\x1a\x00'
 from collections import namedtuple
 FBXElem = namedtuple("FBXElem", ("id", "props", "props_type", "elems"))
 del namedtuple
 # The maximum number of threads that can be started when decompressing arrays is dynamic and based on the number of
 # CPUs, but has a hard max to limit resource costs. This hard max matches ThreadPoolExecutor's default behaviour.
 HARD_MAX_ARRAY_DECOMPRESSION_THREADS = 32
 # The maximum size of the task queue, per array decompression thread, before another thread is started. This magic
 # has been determined experimentally to usually keep the queue quite small without starting an excessive number of
 # threads when the file being parsed is small or when many tasks are added in quick succession. This tends to start more
 # threads than would be most optimal, but ensures that the queue is close to empty by the time the main thread finishes
 # parsing, so there will be little to no waiting for decompression tasks to finish.
 MAX_QUEUE_PER_DECOMPRESSION_THREAD = 5
 def read_uint(read):
@ -81,6 +72,20 @@ def _create_array(data, length, array_type, array_stride, array_byteswap):
    return data_array
 def _decompress_and_insert_array(elem_props_data, index_to_set, compressed_array_args):
    """Decompress array data and insert the created array into the FBX tree being parsed.
    This is usually called from a separate thread to the main thread."""
    compressed_data, length, array_type, array_stride, array_byteswap = compressed_array_args
    # zlib.decompress releases the Global Interpreter Lock, so another thread can run code while waiting for the
    # decompression to complete.
    data = zlib.decompress(compressed_data, bufsize=length * array_stride)
    # Create and insert the array into the parsed FBX hierarchy.
    elem_props_data[index_to_set] = _create_array(data, length, array_type, array_stride, array_byteswap)
 def unpack_array(read, array_type, array_stride, array_byteswap):
    """Unpack an array from an FBX file being parsed.
@ -124,176 +129,6 @@ read_data_dict = {
    }
 class _MultiThreadedArrayDecompressor:
    """Helper class that encapsulates everything needed to decompress array data on separate threads and then insert
    the arrays into the FBX hierarchy, with a single-threaded fallback if multithreading is not available."""
    # A special task value used to signal array decompression threads to shut down.
    _SHUT_DOWN_THREADS = object()
    __slots__ = "_shared_task_queue", "_worker_futures", "_executor", "_max_workers", "_shutting_down"
    def __init__(self, thread_pool_executor_cls, max_workers):
        from queue import SimpleQueue
        # All the threads share a single queue.
        self._shared_task_queue = SimpleQueue()
        # Reference to each thread is kept through the returned Future objects. This is used as part of determining when
        # new threads should be started and is used to be able to receive and handle exceptions from the threads.
        self._worker_futures = []
        # ThreadPoolExecutor might not be available on the current system. To ensure this class is only instantiated
        # in cases where ThreadPoolExecutor is available, the ThreadPoolExecutor class must be provided as an argument.
        self._executor = thread_pool_executor_cls(max_workers=max_workers)
        # Technically the max workers of the executor is accessible through its `._max_workers`, but since it's private
        # we'll store the max workers ourselves.
        self._max_workers = max_workers
        # When shutting down the threads, this is set to True as an extra safeguard to prevent new array decompression
        # tasks being scheduled.
        self._shutting_down = False
    @classmethod
    def new_cm(cls):
        """Return a context manager that, when entered, returns a function to schedule array decompression tasks on
        separate threads.
        If the system can't use multithreading, then the context manager's returned function will instead immediately
        perform array decompression on the calling thread.
        When exiting the context manager, it waits for all scheduled decompression tasks to complete."""
        max_threads = cls._get_max_threads()
        # The concurrent.futures module does not work or is not available on WebAssembly platforms wasm32-emscripten
        # and wasm32-wasi.
        # wasm32-emscripten raises ModuleNotFoundError, not sure about wasm32-wasi.
        try:
            from concurrent.futures import ThreadPoolExecutor
            thread_pool_executor_cls = ThreadPoolExecutor
        except ModuleNotFoundError:
            thread_pool_executor_cls = None
        # max_threads should always be greater than zero, but it can be useful for debugging and profiling to be able to
        # disable array decompression multithreading by setting MAX_ARRAY_DECOMPRESSION_THREADS to zero.
        if thread_pool_executor_cls and max_threads > 0:
            return cls(thread_pool_executor_cls, max_threads)._wrap_executor_cm()
        else:
            # Fall back to single-threaded.
            return nullcontext(cls._decompress_and_insert_array)
    @staticmethod
    def _get_max_threads():
        """Decompressing arrays is entirely CPU work that releases the GIL, so there shouldn't be any benefit in using
        more threads than there are CPUs.
        The current (main) thread is not counted when considering the maximum number of threads because it can spend
        some of its time waiting for File IO, so even a system with only a single CPU can see a benefit from having a
        separate thread for decompressing arrays."""
        import os
        # os.sched_getaffinity(0) gets the set of CPUs available to the current process, but is only available on some
        # Unix platforms.
        sched_getaffinity = getattr(os, "sched_getaffinity", None)
        if sched_getaffinity is not None:
            max_threads = len(sched_getaffinity(0))
        else:
            # Without sched_getaffinity being available, assume all CPUs are available to the current process.
            max_threads = os.cpu_count() or 1  # assume 1 if cpu_count is indeterminable
        # Cap the maximum number of threads to limit resource costs.
        return min(HARD_MAX_ARRAY_DECOMPRESSION_THREADS, max_threads)
    @staticmethod
    def _decompress_and_insert_array(elem_props_data, index_to_set, compressed_array_args):
        """Decompress array data and insert the created array into the FBX tree being parsed.
        This is usually called from a separate thread to the main thread."""
        compressed_data, length, array_type, array_stride, array_byteswap = compressed_array_args
        # zlib.decompress releases the Global Interpreter Lock, so another thread can run code while waiting for the
        # decompression to complete.
        data = zlib.decompress(compressed_data, bufsize=length * array_stride)
        # Create and insert the array into the parsed FBX hierarchy.
        elem_props_data[index_to_set] = _create_array(data, length, array_type, array_stride, array_byteswap)
    def _worker_callable(self):
        """Callable that is run by each worker thread.
        Signals the other worker threads to stop when stopped intentionally or when an exception occurs."""
        try:
            while True:
                # Blocks until it can get a task.
                task_args = self._shared_task_queue.get()
                if task_args is self._SHUT_DOWN_THREADS:
                    # This special value signals that it's time for all the threads to stop.
                    break
                else:
                    # Decompress the array data, create the array and insert it into the FBX hierarchy.
                    self._decompress_and_insert_array(*task_args)
        finally:
            # Either the thread has been told to shut down because it received _SHUT_DOWN_THREADS or an exception has
            # occurred.
            # Add _SHUT_DOWN_THREADS to the queue so that the other worker threads will also shut down.
            self._shared_task_queue.put(self._SHUT_DOWN_THREADS)
    def _schedule_array_decompression(self, elem_props_data, index_to_set, compressed_array_args):
        """Some FBX files might not have any compressed arrays, so worker threads are only started as compressed arrays
        are found.
        Note that the signature of this function must be the same as, or otherwise be compatible with,
        _decompress_and_insert_array, which is used instead of this function when multithreading is not available.
        This function is a slight misuse of ThreadPoolExecutor. Normally, each task to be scheduled would be submitted
        through ThreadPoolExecutor.submit, but doing so is noticeably slower for these array decompression tasks,
        perhaps because each task can be quick and there can be a lot of them. An alternative would be starting new
        Thread instances manually, but then we would have to implement our own functions that can wait for threads to
        finish and handle exceptions."""
        if self._shutting_down:
            # Shouldn't occur through normal usage.
            raise RuntimeError("Cannot schedule new tasks after shutdown")
        # Schedule the task by adding it to the task queue.
        self._shared_task_queue.put((elem_props_data, index_to_set, compressed_array_args))
        # Check if more worker threads need to be added to account for the rate at which tasks are being scheduled
        # compared to the rate at which tasks are being consumed.
        current_worker_count = len(self._worker_futures)
        if current_worker_count < self._max_workers:
            # Increasing the max queue size whenever a new thread is started gives some time for new threads to start up
            # and begin consuming tasks from the queue before it's determined that another new thread is needed. This
            # helps account for lots of compressed arrays being read in quick succession.
            max_queue_size_for_current_workers = MAX_QUEUE_PER_DECOMPRESSION_THREAD * current_worker_count
            if self._shared_task_queue.qsize() > max_queue_size_for_current_workers:
                # Add a new worker thread because the queue has grown too large.
                self._worker_futures.append(self._executor.submit(self._worker_callable))
    @contextmanager
    def _wrap_executor_cm(self):
        """Wrap the executor's context manager to instead return _schedule_array_decompression and such that the threads
        automatically start shutting down before the executor starts shutting down."""
        # .__enter__()
        # Exiting the context manager of the executor will wait for all threads to finish and prevent new threads from
        # being created, as if its shutdown() method had been called.
        with self._executor:
            try:
                yield self._schedule_array_decompression
            finally:
                # .__exit__()
                self._shutting_down = True
                # Signal all worker threads to finish up and shut down so that the executor can shut down.
                # Because this is run on the main thread and because decompression tasks are only scheduled from the
                # main thread, it is guaranteed that no more decompression tasks will be scheduled after the worker
                # threads start to shut down.
                self._shared_task_queue.put(self._SHUT_DOWN_THREADS)
                # Because `self._executor` was entered with a context manager, it will wait for all the worker threads
                # to finish even if an exception is propagated from one of the threads.
                for future in self._worker_futures:
                    # .exception() waits for the future to finish and returns its raised exception or None.
                    ex = future.exception()
                    if ex is not None:
                        # If one of the threads raised an exception, propagate it to the main thread.
                        # Only the first exception will be propagated if there were multiple.
                        raise ex
 # FBX 7500 (aka FBX2016) introduces incompatible changes at binary level:
 #   * The NULL block marking end of nested stuff switches from 13 bytes long to 25 bytes long.
 #   * The FBX element metadata (end_offset, prop_count and prop_length) switch from uint32 to uint64.
@ -418,7 +253,8 @@ def parse_version(fn):
 def parse(fn, use_namedtuple=True):
    root_elems = []
-    with open(fn, 'rb') as f, _MultiThreadedArrayDecompressor.new_cm() as decompress_array_func:
+    multithread_decompress_array_cm = MultiThreadedTaskConsumer.new_cpu_bound_cm(_decompress_and_insert_array)
    with open(fn, 'rb') as f, multithread_decompress_array_cm as decompress_array_func:
        read = f.read
        tell = f.tell