1
1

BLI_task: nano-optimizations to BLI_task_parallel_range feature.

This commit makes use of new taskpool feature (instead of allocating own tasks),
and removes the spinlock used to generate chunks (using atomic ops instead).

In best cases (dynamic scheduled loop with light processing func callback), we
get a few percents of speedup, in most cases there is no sensible enhancement.
This commit is contained in:
2016-05-10 17:49:27 +02:00
parent fdd999042b
commit 98123ae916

View File

@@ -777,23 +777,29 @@ typedef struct ParallelRangeState {
int iter;
int chunk_size;
SpinLock lock;
} ParallelRangeState;
BLI_INLINE bool parallel_range_next_iter_get(
ParallelRangeState * __restrict state,
int * __restrict iter, int * __restrict count)
{
bool result = false;
BLI_spin_lock(&state->lock);
if (state->iter < state->stop) {
*count = min_ii(state->chunk_size, state->stop - state->iter);
*iter = state->iter;
state->iter += *count;
result = true;
uint32_t n, olditer, previter, newiter;
if (state->iter >= state->stop) {
return false;
}
BLI_spin_unlock(&state->lock);
return result;
do {
olditer = state->iter;
n = min_ii(state->chunk_size, state->stop - state->iter);
newiter = olditer + n;
previter = atomic_cas_uint32((uint32_t *)&state->iter, olditer, newiter);
} while (UNLIKELY(previter != olditer));
*iter = previter;
*count = n;
return (n != 0);
}
static void parallel_range_func(
@@ -898,7 +904,6 @@ static void task_parallel_range_ex(
*/
num_tasks = num_threads * 2;
BLI_spin_init(&state.lock);
state.start = start;
state.stop = stop;
state.userdata = userdata;
@@ -917,16 +922,15 @@ static void task_parallel_range_ex(
num_tasks = min_ii(num_tasks, (stop - start) / state.chunk_size);
for (i = 0; i < num_tasks; i++) {
BLI_task_pool_push(task_pool,
parallel_range_func,
NULL, false,
TASK_PRIORITY_HIGH);
/* Use this pool's pre-allocated tasks. */
BLI_task_pool_push_from_thread(task_pool,
parallel_range_func,
NULL, false,
TASK_PRIORITY_HIGH, 0);
}
BLI_task_pool_work_and_wait(task_pool);
BLI_task_pool_free(task_pool);
BLI_spin_end(&state.lock);
}
/**