This patch enables TBB as the default task scheduler. TBB stands for Threading Building Blocks and is developed by Intel. The library contains several threading patters. This patch maps blenders BLI_task_* function to their counterpart. After this patch we can add more patterns. A promising one is TBB:graph that can be used for depsgraph, draw manager and compositor. Performance changes depends on the actual hardware. It was tested on different hardwares from laptops to workstations and we didn't detected any downgrade of the performance. * Linux Xeon E5-2699 v4 got FPS boost from 12 to 17 using Spring's 04_010_A.anim.blend. * AMD Ryzen Threadripper 2990WX 32-Core Animation playback goes from 9.5-10.5 FPS to 13.0-14.0 FPS on Agent 327 , 10_03_B.anim.blend. Reviewed By: brecht, sergey Differential Revision: https://developer.blender.org/D7475
503 lines
14 KiB
C
503 lines
14 KiB
C
/*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*
|
|
* The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
|
|
* All rights reserved.
|
|
*/
|
|
|
|
/** \file
|
|
* \ingroup imbuf
|
|
*
|
|
* This file was moved here from the src/ directory. It is meant to
|
|
* deal with endianness. It resided in a general blending lib. The
|
|
* other functions were only used during rendering. This single
|
|
* function remained. It should probably move to imbuf/intern/util.c,
|
|
* but we'll keep it here for the time being. (nzc)
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include "MEM_guardedalloc.h"
|
|
|
|
#include "BLI_math.h"
|
|
#include "BLI_task.h"
|
|
#include "BLI_utildefines.h"
|
|
|
|
#include "IMB_colormanagement.h"
|
|
#include "IMB_imbuf.h"
|
|
#include "IMB_imbuf_types.h"
|
|
#include <math.h>
|
|
|
|
/* Only this one is used liberally here, and in imbuf */
|
|
void IMB_convert_rgba_to_abgr(struct ImBuf *ibuf)
|
|
{
|
|
size_t size;
|
|
unsigned char rt, *cp = (unsigned char *)ibuf->rect;
|
|
float rtf, *cpf = ibuf->rect_float;
|
|
|
|
if (ibuf->rect) {
|
|
size = ibuf->x * ibuf->y;
|
|
|
|
while (size-- > 0) {
|
|
rt = cp[0];
|
|
cp[0] = cp[3];
|
|
cp[3] = rt;
|
|
rt = cp[1];
|
|
cp[1] = cp[2];
|
|
cp[2] = rt;
|
|
cp += 4;
|
|
}
|
|
}
|
|
|
|
if (ibuf->rect_float) {
|
|
size = ibuf->x * ibuf->y;
|
|
|
|
while (size-- > 0) {
|
|
rtf = cpf[0];
|
|
cpf[0] = cpf[3];
|
|
cpf[3] = rtf;
|
|
rtf = cpf[1];
|
|
cpf[1] = cpf[2];
|
|
cpf[2] = rtf;
|
|
cpf += 4;
|
|
}
|
|
}
|
|
}
|
|
static void pixel_from_buffer(struct ImBuf *ibuf, unsigned char **outI, float **outF, int x, int y)
|
|
|
|
{
|
|
size_t offset = ((size_t)ibuf->x) * y * 4 + 4 * x;
|
|
|
|
if (ibuf->rect) {
|
|
*outI = (unsigned char *)ibuf->rect + offset;
|
|
}
|
|
|
|
if (ibuf->rect_float) {
|
|
*outF = ibuf->rect_float + offset;
|
|
}
|
|
}
|
|
|
|
/* BICUBIC Interpolation */
|
|
|
|
void bicubic_interpolation_color(
|
|
struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v)
|
|
{
|
|
if (outF) {
|
|
BLI_bicubic_interpolation_fl(in->rect_float, outF, in->x, in->y, 4, u, v);
|
|
}
|
|
else {
|
|
BLI_bicubic_interpolation_char((unsigned char *)in->rect, outI, in->x, in->y, 4, u, v);
|
|
}
|
|
}
|
|
|
|
void bicubic_interpolation(ImBuf *in, ImBuf *out, float u, float v, int xout, int yout)
|
|
{
|
|
unsigned char *outI = NULL;
|
|
float *outF = NULL;
|
|
|
|
if (in == NULL || (in->rect == NULL && in->rect_float == NULL)) {
|
|
return;
|
|
}
|
|
|
|
/* gcc warns these could be uninitialized, but its ok. */
|
|
pixel_from_buffer(out, &outI, &outF, xout, yout);
|
|
|
|
bicubic_interpolation_color(in, outI, outF, u, v);
|
|
}
|
|
|
|
/* BILINEAR INTERPOLATION */
|
|
void bilinear_interpolation_color(
|
|
struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v)
|
|
{
|
|
if (outF) {
|
|
BLI_bilinear_interpolation_fl(in->rect_float, outF, in->x, in->y, 4, u, v);
|
|
}
|
|
else {
|
|
BLI_bilinear_interpolation_char((unsigned char *)in->rect, outI, in->x, in->y, 4, u, v);
|
|
}
|
|
}
|
|
|
|
/* function assumes out to be zero'ed, only does RGBA */
|
|
/* BILINEAR INTERPOLATION */
|
|
|
|
/* Note about wrapping, the u/v still needs to be within the image bounds,
|
|
* just the interpolation is wrapped.
|
|
* This the same as bilinear_interpolation_color except it wraps
|
|
* rather than using empty and emptyI. */
|
|
void bilinear_interpolation_color_wrap(
|
|
struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v)
|
|
{
|
|
float *row1, *row2, *row3, *row4, a, b;
|
|
unsigned char *row1I, *row2I, *row3I, *row4I;
|
|
float a_b, ma_b, a_mb, ma_mb;
|
|
int y1, y2, x1, x2;
|
|
|
|
/* ImBuf in must have a valid rect or rect_float, assume this is already checked */
|
|
|
|
x1 = (int)floor(u);
|
|
x2 = (int)ceil(u);
|
|
y1 = (int)floor(v);
|
|
y2 = (int)ceil(v);
|
|
|
|
/* sample area entirely outside image? */
|
|
if (x2 < 0 || x1 > in->x - 1 || y2 < 0 || y1 > in->y - 1) {
|
|
return;
|
|
}
|
|
|
|
/* wrap interpolation pixels - main difference from bilinear_interpolation_color */
|
|
if (x1 < 0) {
|
|
x1 = in->x + x1;
|
|
}
|
|
if (y1 < 0) {
|
|
y1 = in->y + y1;
|
|
}
|
|
|
|
if (x2 >= in->x) {
|
|
x2 = x2 - in->x;
|
|
}
|
|
if (y2 >= in->y) {
|
|
y2 = y2 - in->y;
|
|
}
|
|
|
|
a = u - floorf(u);
|
|
b = v - floorf(v);
|
|
a_b = a * b;
|
|
ma_b = (1.0f - a) * b;
|
|
a_mb = a * (1.0f - b);
|
|
ma_mb = (1.0f - a) * (1.0f - b);
|
|
|
|
if (outF) {
|
|
/* sample including outside of edges of image */
|
|
row1 = in->rect_float + ((size_t)in->x) * y1 * 4 + 4 * x1;
|
|
row2 = in->rect_float + ((size_t)in->x) * y2 * 4 + 4 * x1;
|
|
row3 = in->rect_float + ((size_t)in->x) * y1 * 4 + 4 * x2;
|
|
row4 = in->rect_float + ((size_t)in->x) * y2 * 4 + 4 * x2;
|
|
|
|
outF[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
|
|
outF[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1];
|
|
outF[2] = ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2];
|
|
outF[3] = ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3];
|
|
|
|
/* clamp here or else we can easily get off-range */
|
|
clamp_v4(outF, 0.0f, 1.0f);
|
|
}
|
|
if (outI) {
|
|
/* sample including outside of edges of image */
|
|
row1I = (unsigned char *)in->rect + ((size_t)in->x) * y1 * 4 + 4 * x1;
|
|
row2I = (unsigned char *)in->rect + ((size_t)in->x) * y2 * 4 + 4 * x1;
|
|
row3I = (unsigned char *)in->rect + ((size_t)in->x) * y1 * 4 + 4 * x2;
|
|
row4I = (unsigned char *)in->rect + ((size_t)in->x) * y2 * 4 + 4 * x2;
|
|
|
|
/* need to add 0.5 to avoid rounding down (causes darken with the smear brush)
|
|
* tested with white images and this should not wrap back to zero */
|
|
outI[0] = (ma_mb * row1I[0] + a_mb * row3I[0] + ma_b * row2I[0] + a_b * row4I[0]) + 0.5f;
|
|
outI[1] = (ma_mb * row1I[1] + a_mb * row3I[1] + ma_b * row2I[1] + a_b * row4I[1]) + 0.5f;
|
|
outI[2] = (ma_mb * row1I[2] + a_mb * row3I[2] + ma_b * row2I[2] + a_b * row4I[2]) + 0.5f;
|
|
outI[3] = (ma_mb * row1I[3] + a_mb * row3I[3] + ma_b * row2I[3] + a_b * row4I[3]) + 0.5f;
|
|
}
|
|
}
|
|
|
|
void bilinear_interpolation(ImBuf *in, ImBuf *out, float u, float v, int xout, int yout)
|
|
{
|
|
unsigned char *outI = NULL;
|
|
float *outF = NULL;
|
|
|
|
if (in == NULL || (in->rect == NULL && in->rect_float == NULL)) {
|
|
return;
|
|
}
|
|
|
|
/* gcc warns these could be uninitialized, but its ok. */
|
|
pixel_from_buffer(out, &outI, &outF, xout, yout);
|
|
|
|
bilinear_interpolation_color(in, outI, outF, u, v);
|
|
}
|
|
|
|
/* function assumes out to be zero'ed, only does RGBA */
|
|
/* NEAREST INTERPOLATION */
|
|
void nearest_interpolation_color(
|
|
struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v)
|
|
{
|
|
const float *dataF;
|
|
unsigned char *dataI;
|
|
int y1, x1;
|
|
|
|
/* ImBuf in must have a valid rect or rect_float, assume this is already checked */
|
|
|
|
x1 = (int)(u);
|
|
y1 = (int)(v);
|
|
|
|
/* sample area entirely outside image? */
|
|
if (x1 < 0 || x1 > in->x - 1 || y1 < 0 || y1 > in->y - 1) {
|
|
if (outI) {
|
|
outI[0] = outI[1] = outI[2] = outI[3] = 0;
|
|
}
|
|
if (outF) {
|
|
outF[0] = outF[1] = outF[2] = outF[3] = 0.0f;
|
|
}
|
|
return;
|
|
}
|
|
|
|
/* sample including outside of edges of image */
|
|
if (x1 < 0 || y1 < 0) {
|
|
if (outI) {
|
|
outI[0] = 0;
|
|
outI[1] = 0;
|
|
outI[2] = 0;
|
|
outI[3] = 0;
|
|
}
|
|
if (outF) {
|
|
outF[0] = 0.0f;
|
|
outF[1] = 0.0f;
|
|
outF[2] = 0.0f;
|
|
outF[3] = 0.0f;
|
|
}
|
|
}
|
|
else {
|
|
dataI = (unsigned char *)in->rect + ((size_t)in->x) * y1 * 4 + 4 * x1;
|
|
if (outI) {
|
|
outI[0] = dataI[0];
|
|
outI[1] = dataI[1];
|
|
outI[2] = dataI[2];
|
|
outI[3] = dataI[3];
|
|
}
|
|
dataF = in->rect_float + ((size_t)in->x) * y1 * 4 + 4 * x1;
|
|
if (outF) {
|
|
outF[0] = dataF[0];
|
|
outF[1] = dataF[1];
|
|
outF[2] = dataF[2];
|
|
outF[3] = dataF[3];
|
|
}
|
|
}
|
|
}
|
|
|
|
void nearest_interpolation_color_wrap(
|
|
struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v)
|
|
{
|
|
const float *dataF;
|
|
unsigned char *dataI;
|
|
int y, x;
|
|
|
|
/* ImBuf in must have a valid rect or rect_float, assume this is already checked */
|
|
|
|
x = (int)floor(u);
|
|
y = (int)floor(v);
|
|
|
|
x = x % in->x;
|
|
y = y % in->y;
|
|
|
|
/* wrap interpolation pixels - main difference from nearest_interpolation_color */
|
|
if (x < 0) {
|
|
x += in->x;
|
|
}
|
|
if (y < 0) {
|
|
y += in->y;
|
|
}
|
|
|
|
dataI = (unsigned char *)in->rect + ((size_t)in->x) * y * 4 + 4 * x;
|
|
if (outI) {
|
|
outI[0] = dataI[0];
|
|
outI[1] = dataI[1];
|
|
outI[2] = dataI[2];
|
|
outI[3] = dataI[3];
|
|
}
|
|
dataF = in->rect_float + ((size_t)in->x) * y * 4 + 4 * x;
|
|
if (outF) {
|
|
outF[0] = dataF[0];
|
|
outF[1] = dataF[1];
|
|
outF[2] = dataF[2];
|
|
outF[3] = dataF[3];
|
|
}
|
|
}
|
|
|
|
void nearest_interpolation(ImBuf *in, ImBuf *out, float x, float y, int xout, int yout)
|
|
{
|
|
unsigned char *outI = NULL;
|
|
float *outF = NULL;
|
|
|
|
if (in == NULL || (in->rect == NULL && in->rect_float == NULL)) {
|
|
return;
|
|
}
|
|
|
|
/* gcc warns these could be uninitialized, but its ok. */
|
|
pixel_from_buffer(out, &outI, &outF, xout, yout);
|
|
|
|
nearest_interpolation_color(in, outI, outF, x, y);
|
|
}
|
|
|
|
/*********************** Threaded image processing *************************/
|
|
|
|
static void processor_apply_func(TaskPool *__restrict pool, void *taskdata)
|
|
{
|
|
void (*do_thread)(void *) = (void (*)(void *))BLI_task_pool_user_data(pool);
|
|
do_thread(taskdata);
|
|
}
|
|
|
|
void IMB_processor_apply_threaded(
|
|
int buffer_lines,
|
|
int handle_size,
|
|
void *init_customdata,
|
|
void(init_handle)(void *handle, int start_line, int tot_line, void *customdata),
|
|
void *(do_thread)(void *))
|
|
{
|
|
const int lines_per_task = 64;
|
|
|
|
TaskPool *task_pool;
|
|
|
|
void *handles;
|
|
int total_tasks = (buffer_lines + lines_per_task - 1) / lines_per_task;
|
|
int i, start_line;
|
|
|
|
task_pool = BLI_task_pool_create(do_thread, TASK_PRIORITY_LOW);
|
|
|
|
handles = MEM_callocN(handle_size * total_tasks, "processor apply threaded handles");
|
|
|
|
start_line = 0;
|
|
|
|
for (i = 0; i < total_tasks; i++) {
|
|
int lines_per_current_task;
|
|
void *handle = ((char *)handles) + handle_size * i;
|
|
|
|
if (i < total_tasks - 1) {
|
|
lines_per_current_task = lines_per_task;
|
|
}
|
|
else {
|
|
lines_per_current_task = buffer_lines - start_line;
|
|
}
|
|
|
|
init_handle(handle, start_line, lines_per_current_task, init_customdata);
|
|
|
|
BLI_task_pool_push(task_pool, processor_apply_func, handle, false, NULL);
|
|
|
|
start_line += lines_per_task;
|
|
}
|
|
|
|
/* work and wait until tasks are done */
|
|
BLI_task_pool_work_and_wait(task_pool);
|
|
|
|
/* Free memory. */
|
|
MEM_freeN(handles);
|
|
BLI_task_pool_free(task_pool);
|
|
}
|
|
|
|
typedef struct ScanlineGlobalData {
|
|
void *custom_data;
|
|
ScanlineThreadFunc do_thread;
|
|
int scanlines_per_task;
|
|
int total_scanlines;
|
|
} ScanlineGlobalData;
|
|
|
|
static void processor_apply_scanline_func(TaskPool *__restrict pool, void *taskdata)
|
|
{
|
|
ScanlineGlobalData *data = BLI_task_pool_user_data(pool);
|
|
int start_scanline = POINTER_AS_INT(taskdata);
|
|
int num_scanlines = min_ii(data->scanlines_per_task, data->total_scanlines - start_scanline);
|
|
data->do_thread(data->custom_data, start_scanline, num_scanlines);
|
|
}
|
|
|
|
void IMB_processor_apply_threaded_scanlines(int total_scanlines,
|
|
ScanlineThreadFunc do_thread,
|
|
void *custom_data)
|
|
{
|
|
const int scanlines_per_task = 64;
|
|
ScanlineGlobalData data;
|
|
data.custom_data = custom_data;
|
|
data.do_thread = do_thread;
|
|
data.scanlines_per_task = scanlines_per_task;
|
|
data.total_scanlines = total_scanlines;
|
|
const int total_tasks = (total_scanlines + scanlines_per_task - 1) / scanlines_per_task;
|
|
TaskPool *task_pool = BLI_task_pool_create(&data, TASK_PRIORITY_LOW);
|
|
for (int i = 0, start_line = 0; i < total_tasks; i++) {
|
|
BLI_task_pool_push(
|
|
task_pool, processor_apply_scanline_func, POINTER_FROM_INT(start_line), false, NULL);
|
|
start_line += scanlines_per_task;
|
|
}
|
|
|
|
/* work and wait until tasks are done */
|
|
BLI_task_pool_work_and_wait(task_pool);
|
|
|
|
/* Free memory. */
|
|
BLI_task_pool_free(task_pool);
|
|
}
|
|
|
|
/* Alpha-under */
|
|
|
|
void IMB_alpha_under_color_float(float *rect_float, int x, int y, float backcol[3])
|
|
{
|
|
size_t a = ((size_t)x) * y;
|
|
float *fp = rect_float;
|
|
|
|
while (a--) {
|
|
if (fp[3] == 0.0f) {
|
|
copy_v3_v3(fp, backcol);
|
|
}
|
|
else {
|
|
float mul = 1.0f - fp[3];
|
|
|
|
fp[0] += mul * backcol[0];
|
|
fp[1] += mul * backcol[1];
|
|
fp[2] += mul * backcol[2];
|
|
}
|
|
|
|
fp[3] = 1.0f;
|
|
|
|
fp += 4;
|
|
}
|
|
}
|
|
|
|
void IMB_alpha_under_color_byte(unsigned char *rect, int x, int y, float backcol[3])
|
|
{
|
|
size_t a = ((size_t)x) * y;
|
|
unsigned char *cp = rect;
|
|
|
|
while (a--) {
|
|
if (cp[3] == 255) {
|
|
/* pass */
|
|
}
|
|
else if (cp[3] == 0) {
|
|
cp[0] = backcol[0] * 255;
|
|
cp[1] = backcol[1] * 255;
|
|
cp[2] = backcol[2] * 255;
|
|
}
|
|
else {
|
|
float alpha = cp[3] / 255.0;
|
|
float mul = 1.0f - alpha;
|
|
|
|
cp[0] = (cp[0] * alpha) + mul * backcol[0];
|
|
cp[1] = (cp[1] * alpha) + mul * backcol[1];
|
|
cp[2] = (cp[2] * alpha) + mul * backcol[2];
|
|
}
|
|
|
|
cp[3] = 255;
|
|
|
|
cp += 4;
|
|
}
|
|
}
|
|
|
|
/* Sample pixel of image using NEAREST method. */
|
|
void IMB_sampleImageAtLocation(ImBuf *ibuf, float x, float y, bool make_linear_rgb, float color[4])
|
|
{
|
|
if (ibuf->rect_float) {
|
|
nearest_interpolation_color(ibuf, NULL, color, x, y);
|
|
}
|
|
else {
|
|
unsigned char byte_color[4];
|
|
nearest_interpolation_color(ibuf, byte_color, NULL, x, y);
|
|
rgba_uchar_to_float(color, byte_color);
|
|
if (make_linear_rgb) {
|
|
IMB_colormanagement_colorspace_to_scene_linear_v4(color, false, ibuf->rect_colorspace);
|
|
}
|
|
}
|
|
}
|