ImBuf: optimize IMB_transform #115653

Merged
Aras Pranckevicius merged 15 commits from aras_p/blender:imb_transform_opt into main 2023-12-14 15:10:41 +01:00
8 changed files with 425 additions and 358 deletions

View File

@ -15,24 +15,14 @@ extern "C" {
void BLI_bicubic_interpolation_fl(
const float *buffer, float *output, int width, int height, int components, float u, float v);
void BLI_bicubic_interpolation_char(const unsigned char *buffer,
unsigned char *output,
int width,
int height,
int components,
float u,
float v);
void BLI_bicubic_interpolation_char(
const unsigned char *buffer, unsigned char *output, int width, int height, float u, float v);
void BLI_bilinear_interpolation_fl(
const float *buffer, float *output, int width, int height, int components, float u, float v);
void BLI_bilinear_interpolation_char(const unsigned char *buffer,
unsigned char *output,
int width,
int height,
int components,
float u,
float v);
void BLI_bilinear_interpolation_char(
const unsigned char *buffer, unsigned char *output, int width, int height, float u, float v);
void BLI_bilinear_interpolation_wrap_fl(const float *buffer,
float *output,
@ -44,16 +34,6 @@ void BLI_bilinear_interpolation_wrap_fl(const float *buffer,
bool wrap_x,
bool wrap_y);
void BLI_bilinear_interpolation_wrap_char(const unsigned char *buffer,
unsigned char *output,
int width,
int height,
int components,
float u,
float v,
bool wrap_x,
bool wrap_y);
#define EWA_MAXIDX 255
extern const float EWA_WTS[EWA_MAXIDX + 1];

View File

@ -524,6 +524,7 @@ if(WITH_GTESTS)
tests/BLI_math_bits_test.cc
tests/BLI_math_color_test.cc
tests/BLI_math_geom_test.cc
tests/BLI_math_interp_test.cc
tests/BLI_math_matrix_test.cc
tests/BLI_math_matrix_types_test.cc
tests/BLI_math_rotation_test.cc

View File

@ -7,12 +7,18 @@
*/
#include <math.h>
#include <string.h>
#include "BLI_math_base.h"
#include "BLI_math_interp.h"
#include "BLI_math_vector.h"
#include "BLI_simd.h"
#include "BLI_strict_flags.h"
#if BLI_HAVE_SSE2 && defined(__SSE4_1__)
# include <smmintrin.h> /* _mm_floor_ps */
#endif
/**************************************************************************
* INTERPOLATIONS
*
@ -236,221 +242,298 @@ void BLI_bicubic_interpolation_fl(
}
void BLI_bicubic_interpolation_char(
const uchar *buffer, uchar *output, int width, int height, int components, float u, float v)
const uchar *buffer, uchar *output, int width, int height, float u, float v)
{
bicubic_interpolation(buffer, NULL, output, NULL, width, height, components, u, v);
bicubic_interpolation(buffer, NULL, output, NULL, width, height, 4, u, v);
}
/* BILINEAR INTERPOLATION */
BLI_INLINE void bilinear_interpolation(const uchar *byte_buffer,
const float *float_buffer,
uchar *byte_output,
float *float_output,
int width,
int height,
int components,
float u,
float v,
bool wrap_x,
bool wrap_y)
BLI_INLINE void bilinear_interpolation_fl(const float *float_buffer,
float *float_output,
int width,
int height,
int components,
float u,
float v,
bool wrap_x,
bool wrap_y)
{
float a, b;
float a_b, ma_b, a_mb, ma_mb;
int y1, y2, x1, x2;
/* ImBuf in must have a valid rect or rect_float, assume this is already checked */
float uf = floorf(u);
float vf = floorf(v);
x1 = (int)floor(u);
x2 = (int)ceil(u);
y1 = (int)floor(v);
y2 = (int)ceil(v);
x1 = (int)uf;
x2 = x1 + 1;
y1 = (int)vf;
y2 = y1 + 1;
if (float_output) {
const float *row1, *row2, *row3, *row4;
const float empty[4] = {0.0f, 0.0f, 0.0f, 0.0f};
const float *row1, *row2, *row3, *row4;
const float empty[4] = {0.0f, 0.0f, 0.0f, 0.0f};
/* pixel value must be already wrapped, however values at boundaries may flip */
if (wrap_x) {
if (x1 < 0) {
x1 = width - 1;
}
if (x2 >= width) {
x2 = 0;
}
/* pixel value must be already wrapped, however values at boundaries may flip */
if (wrap_x) {
if (x1 < 0) {
x1 = width - 1;
}
else if (x2 < 0 || x1 >= width) {
copy_vn_fl(float_output, components, 0.0f);
return;
if (x2 >= width) {
x2 = 0;
}
}
else if (x2 < 0 || x1 >= width) {
copy_vn_fl(float_output, components, 0.0f);
return;
}
if (wrap_y) {
if (y1 < 0) {
y1 = height - 1;
}
if (y2 >= height) {
y2 = 0;
}
if (wrap_y) {
if (y1 < 0) {
y1 = height - 1;
}
else if (y2 < 0 || y1 >= height) {
copy_vn_fl(float_output, components, 0.0f);
return;
if (y2 >= height) {
y2 = 0;
}
}
else if (y2 < 0 || y1 >= height) {
copy_vn_fl(float_output, components, 0.0f);
return;
}
/* sample including outside of edges of image */
if (x1 < 0 || y1 < 0) {
row1 = empty;
}
else {
row1 = float_buffer + width * y1 * components + components * x1;
}
if (x1 < 0 || y2 > height - 1) {
row2 = empty;
}
else {
row2 = float_buffer + width * y2 * components + components * x1;
}
if (x2 > width - 1 || y1 < 0) {
row3 = empty;
}
else {
row3 = float_buffer + width * y1 * components + components * x2;
}
if (x2 > width - 1 || y2 > height - 1) {
row4 = empty;
}
else {
row4 = float_buffer + width * y2 * components + components * x2;
}
a = u - floorf(u);
b = v - floorf(v);
a_b = a * b;
ma_b = (1.0f - a) * b;
a_mb = a * (1.0f - b);
ma_mb = (1.0f - a) * (1.0f - b);
if (components == 1) {
float_output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
}
else if (components == 3) {
float_output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
float_output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1];
float_output[2] = ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2];
}
else {
float_output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
float_output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1];
float_output[2] = ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2];
float_output[3] = ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3];
}
/* sample including outside of edges of image */
if (x1 < 0 || y1 < 0) {
row1 = empty;
}
else {
const uchar *row1, *row2, *row3, *row4;
uchar empty[4] = {0, 0, 0, 0};
/* pixel value must be already wrapped, however values at boundaries may flip */
if (wrap_x) {
if (x1 < 0) {
x1 = width - 1;
}
if (x2 >= width) {
x2 = 0;
}
}
else if (x2 < 0 || x1 >= width) {
copy_vn_uchar(byte_output, components, 0);
return;
}
if (wrap_y) {
if (y1 < 0) {
y1 = height - 1;
}
if (y2 >= height) {
y2 = 0;
}
}
else if (y2 < 0 || y1 >= height) {
copy_vn_uchar(byte_output, components, 0);
return;
}
/* sample including outside of edges of image */
if (x1 < 0 || y1 < 0) {
row1 = empty;
}
else {
row1 = byte_buffer + width * y1 * components + components * x1;
}
if (x1 < 0 || y2 > height - 1) {
row2 = empty;
}
else {
row2 = byte_buffer + width * y2 * components + components * x1;
}
if (x2 > width - 1 || y1 < 0) {
row3 = empty;
}
else {
row3 = byte_buffer + width * y1 * components + components * x2;
}
if (x2 > width - 1 || y2 > height - 1) {
row4 = empty;
}
else {
row4 = byte_buffer + width * y2 * components + components * x2;
}
a = u - floorf(u);
b = v - floorf(v);
a_b = a * b;
ma_b = (1.0f - a) * b;
a_mb = a * (1.0f - b);
ma_mb = (1.0f - a) * (1.0f - b);
if (components == 1) {
byte_output[0] = (uchar)(ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0] +
0.5f);
}
else if (components == 3) {
byte_output[0] = (uchar)(ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0] +
0.5f);
byte_output[1] = (uchar)(ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1] +
0.5f);
byte_output[2] = (uchar)(ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2] +
0.5f);
}
else {
byte_output[0] = (uchar)(ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0] +
0.5f);
byte_output[1] = (uchar)(ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1] +
0.5f);
byte_output[2] = (uchar)(ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2] +
0.5f);
byte_output[3] = (uchar)(ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3] +
0.5f);
}
row1 = float_buffer + width * y1 * components + components * x1;
}
if (x1 < 0 || y2 > height - 1) {
row2 = empty;
}
else {
row2 = float_buffer + width * y2 * components + components * x1;
}
if (x2 > width - 1 || y1 < 0) {
row3 = empty;
}
else {
row3 = float_buffer + width * y1 * components + components * x2;
}
if (x2 > width - 1 || y2 > height - 1) {
row4 = empty;
}
else {
row4 = float_buffer + width * y2 * components + components * x2;
}
a = u - uf;
b = v - vf;
a_b = a * b;
ma_b = (1.0f - a) * b;
a_mb = a * (1.0f - b);
ma_mb = (1.0f - a) * (1.0f - b);
if (components == 1) {
float_output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
}
else if (components == 3) {
float_output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
float_output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1];
float_output[2] = ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2];
}
else {
#if BLI_HAVE_SSE2
__m128 rgba1 = _mm_loadu_ps(row1);
__m128 rgba2 = _mm_loadu_ps(row2);
__m128 rgba3 = _mm_loadu_ps(row3);
__m128 rgba4 = _mm_loadu_ps(row4);
rgba1 = _mm_mul_ps(_mm_set1_ps(ma_mb), rgba1);
rgba2 = _mm_mul_ps(_mm_set1_ps(ma_b), rgba2);
rgba3 = _mm_mul_ps(_mm_set1_ps(a_mb), rgba3);
rgba4 = _mm_mul_ps(_mm_set1_ps(a_b), rgba4);
__m128 rgba13 = _mm_add_ps(rgba1, rgba3);
__m128 rgba24 = _mm_add_ps(rgba2, rgba4);
__m128 rgba = _mm_add_ps(rgba13, rgba24);
_mm_storeu_ps(float_output, rgba);
#else
float_output[0] = ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0];
float_output[1] = ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1];
float_output[2] = ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2];
float_output[3] = ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3];
#endif
}
}
void BLI_bilinear_interpolation_char(
const uchar *buffer, uchar *output, int width, int height, float u, float v)
{
#if BLI_HAVE_SSE2
/* Bilinear interpolation needs to read and blend four image pixels, while
* also handling conditions of sample coordinate being outside of the
* image, in which case black (all zeroes) should be used as the sample
* contribution.
*
* Code below does all that without any branches, by making outside the
* image sample locations still read the first pixel of the image, but
* later making sure that the result is set to zero for that sample. */
__m128 uvuv = _mm_set_ps(v, u, v, u);
# if defined(__SSE4_1__) || defined(__ARM_NEON) && defined(WITH_SSE2NEON)
/* If we're on SSE4 or ARM NEON, just use the simple floor() way. */
__m128 uvuv_floor = _mm_floor_ps(uvuv);
# else
/* The hard way: truncate, for negative inputs this will round towards zero.
* Then compare with input UV, and subtract 1 for the inputs that were
* negative. */
__m128 uv_trunc = _mm_cvtepi32_ps(_mm_cvttps_epi32(uvuv));
__m128 uv_neg = _mm_cmplt_ps(uvuv, uv_trunc);
__m128 uvuv_floor = _mm_sub_ps(uv_trunc, _mm_and_ps(uv_neg, _mm_set1_ps(1.0f)));
# endif
/* x1, y1, x2, y2 */
__m128i xy12 = _mm_add_epi32(_mm_cvttps_epi32(uvuv_floor), _mm_set_epi32(1, 1, 0, 0));
/* Check whether any of the coordinates are outside of the image. */
__m128i size_minus_1 = _mm_sub_epi32(_mm_set_epi32(height, width, height, width),
_mm_set1_epi32(1));
__m128i too_lo_xy12 = _mm_cmplt_epi32(xy12, _mm_setzero_si128());
__m128i too_hi_xy12 = _mm_cmplt_epi32(size_minus_1, xy12);
__m128i invalid_xy12 = _mm_or_si128(too_lo_xy12, too_hi_xy12);
/* Samples 1,2,3,4 are in this order: x1y1, x1y2, x2y1, x2y2 */
__m128i x1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(2, 2, 0, 0));
__m128i y1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(3, 1, 3, 1));
__m128i invalid_1234 = _mm_or_si128(_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(2, 2, 0, 0)),
_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(3, 1, 3, 1)));
/* Set x & y to zero for invalid samples. */
x1234 = _mm_andnot_si128(invalid_1234, x1234);
y1234 = _mm_andnot_si128(invalid_1234, y1234);
/* Read the four sample values. Do address calculations in C, since SSE
* before 4.1 makes it very cumbersome to do full integer multiplies. */
int xcoord[4];
int ycoord[4];
_mm_storeu_ps((float *)xcoord, _mm_castsi128_ps(x1234));
_mm_storeu_ps((float *)ycoord, _mm_castsi128_ps(y1234));
int sample1 = ((const int *)buffer)[ycoord[0] * (int64_t)width + xcoord[0]];
int sample2 = ((const int *)buffer)[ycoord[1] * (int64_t)width + xcoord[1]];
int sample3 = ((const int *)buffer)[ycoord[2] * (int64_t)width + xcoord[2]];
int sample4 = ((const int *)buffer)[ycoord[3] * (int64_t)width + xcoord[3]];
__m128i samples1234 = _mm_set_epi32(sample4, sample3, sample2, sample1);
/* Set samples to black for the ones that were actually invalid. */
samples1234 = _mm_andnot_si128(invalid_1234, samples1234);
/* Expand samples from packed 8-bit RGBA to full floats:
* spread to 16 bit values. */
__m128i rgba16_12 = _mm_unpacklo_epi8(samples1234, _mm_setzero_si128());
__m128i rgba16_34 = _mm_unpackhi_epi8(samples1234, _mm_setzero_si128());
/* Spread to 32 bit values and convert to float. */
__m128 rgba1 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(rgba16_12, _mm_setzero_si128()));
__m128 rgba2 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(rgba16_12, _mm_setzero_si128()));
__m128 rgba3 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(rgba16_34, _mm_setzero_si128()));
__m128 rgba4 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(rgba16_34, _mm_setzero_si128()));
/* Calculate interpolation factors: (1-a)*(1-b), (1-a)*b, a*(1-b), a*b */
__m128 abab = _mm_sub_ps(uvuv, uvuv_floor);
__m128 m_abab = _mm_sub_ps(_mm_set1_ps(1.0f), abab);
__m128 ab_mab = _mm_shuffle_ps(abab, m_abab, _MM_SHUFFLE(3, 2, 1, 0));
__m128 factors = _mm_mul_ps(_mm_shuffle_ps(ab_mab, ab_mab, _MM_SHUFFLE(0, 0, 2, 2)),
_mm_shuffle_ps(ab_mab, ab_mab, _MM_SHUFFLE(1, 3, 1, 3)));
/* Blend the samples. */
rgba1 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(0, 0, 0, 0)), rgba1);
rgba2 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(1, 1, 1, 1)), rgba2);
rgba3 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(2, 2, 2, 2)), rgba3);
rgba4 = _mm_mul_ps(_mm_shuffle_ps(factors, factors, _MM_SHUFFLE(3, 3, 3, 3)), rgba4);
__m128 rgba13 = _mm_add_ps(rgba1, rgba3);
__m128 rgba24 = _mm_add_ps(rgba2, rgba4);
__m128 rgba = _mm_add_ps(rgba13, rgba24);
rgba = _mm_add_ps(rgba, _mm_set1_ps(0.5f));
/* Pack and write to destination: pack to 16 bit signed, then to 8 bit
* unsigned, then write resulting 32-bit value. */
__m128i rgba32 = _mm_cvttps_epi32(rgba);
__m128i rgba16 = _mm_packs_epi32(rgba32, _mm_setzero_si128());
__m128i rgba8 = _mm_packus_epi16(rgba16, _mm_setzero_si128());
_mm_store_ss((float *)output, _mm_castsi128_ps(rgba8));
#else
float a, b;
float a_b, ma_b, a_mb, ma_mb;
int y1, y2, x1, x2;
float uf = floorf(u);
float vf = floorf(v);
x1 = (int)uf;
x2 = x1 + 1;
y1 = (int)vf;
y2 = y1 + 1;
const uchar *row1, *row2, *row3, *row4;
uchar empty[4] = {0, 0, 0, 0};
/* completely outside of the image? */
if (x2 < 0 || x1 >= width) {
copy_vn_uchar(output, 4, 0);
return;
}
if (y2 < 0 || y1 >= height) {
copy_vn_uchar(output, 4, 0);
return;
}
/* sample including outside of edges of image */
if (x1 < 0 || y1 < 0) {
row1 = empty;
}
else {
row1 = buffer + width * y1 * 4 + 4 * x1;
}
if (x1 < 0 || y2 > height - 1) {
row2 = empty;
}
else {
row2 = buffer + width * y2 * 4 + 4 * x1;
}
if (x2 > width - 1 || y1 < 0) {
row3 = empty;
}
else {
row3 = buffer + width * y1 * 4 + 4 * x2;
}
if (x2 > width - 1 || y2 > height - 1) {
row4 = empty;
}
else {
row4 = buffer + width * y2 * 4 + 4 * x2;
}
a = u - uf;
b = v - vf;
a_b = a * b;
ma_b = (1.0f - a) * b;
a_mb = a * (1.0f - b);
ma_mb = (1.0f - a) * (1.0f - b);
output[0] = (uchar)(ma_mb * row1[0] + a_mb * row3[0] + ma_b * row2[0] + a_b * row4[0] + 0.5f);
output[1] = (uchar)(ma_mb * row1[1] + a_mb * row3[1] + ma_b * row2[1] + a_b * row4[1] + 0.5f);
output[2] = (uchar)(ma_mb * row1[2] + a_mb * row3[2] + ma_b * row2[2] + a_b * row4[2] + 0.5f);
output[3] = (uchar)(ma_mb * row1[3] + a_mb * row3[3] + ma_b * row2[3] + a_b * row4[3] + 0.5f);
#endif
}
void BLI_bilinear_interpolation_fl(
const float *buffer, float *output, int width, int height, int components, float u, float v)
{
bilinear_interpolation(
NULL, buffer, NULL, output, width, height, components, u, v, false, false);
}
void BLI_bilinear_interpolation_char(
const uchar *buffer, uchar *output, int width, int height, int components, float u, float v)
{
bilinear_interpolation(
buffer, NULL, output, NULL, width, height, components, u, v, false, false);
bilinear_interpolation_fl(buffer, output, width, height, components, u, v, false, false);
}
void BLI_bilinear_interpolation_wrap_fl(const float *buffer,
@ -463,22 +546,7 @@ void BLI_bilinear_interpolation_wrap_fl(const float *buffer,
bool wrap_x,
bool wrap_y)
{
bilinear_interpolation(
NULL, buffer, NULL, output, width, height, components, u, v, wrap_x, wrap_y);
}
void BLI_bilinear_interpolation_wrap_char(const uchar *buffer,
uchar *output,
int width,
int height,
int components,
float u,
float v,
bool wrap_x,
bool wrap_y)
{
bilinear_interpolation(
buffer, NULL, output, NULL, width, height, components, u, v, wrap_x, wrap_y);
bilinear_interpolation_fl(buffer, output, width, height, components, u, v, wrap_x, wrap_y);
}
/**************************************************************************

View File

@ -0,0 +1,98 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: Apache-2.0 */
#include "testing/testing.h"
#include "BLI_math_interp.h"
static constexpr int image_width = 3;
static constexpr int image_height = 3;
static constexpr unsigned char image_char[image_height][image_width][4] = {
{{255, 254, 217, 216}, {230, 230, 230, 230}, {240, 160, 90, 20}},
{{0, 1, 2, 3}, {62, 72, 82, 92}, {126, 127, 128, 129}},
{{1, 2, 3, 4}, {73, 108, 153, 251}, {128, 129, 130, 131}},
};
TEST(math_interp, BilinearCharExactSamples)
{
unsigned char res[4];
unsigned char exp1[4] = {73, 108, 153, 251};
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 1.0f, 2.0f);
EXPECT_EQ_ARRAY(exp1, res, 4);
unsigned char exp2[4] = {240, 160, 90, 20};
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 2.0f, 0.0f);
EXPECT_EQ_ARRAY(exp2, res, 4);
}
TEST(math_interp, BilinearCharHalfwayUSamples)
{
unsigned char res[4];
unsigned char exp1[4] = {31, 37, 42, 48};
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 0.5f, 1.0f);
EXPECT_EQ_ARRAY(exp1, res, 4);
unsigned char exp2[4] = {243, 242, 224, 223};
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 0.5f, 0.0f);
EXPECT_EQ_ARRAY(exp2, res, 4);
}
TEST(math_interp, BilinearCharHalfwayVSamples)
{
unsigned char res[4];
unsigned char exp1[4] = {1, 2, 3, 4};
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 0.0f, 1.5f);
EXPECT_EQ_ARRAY(exp1, res, 4);
unsigned char exp2[4] = {127, 128, 129, 130};
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 2.0f, 1.5f);
EXPECT_EQ_ARRAY(exp2, res, 4);
}
TEST(math_interp, BilinearCharSamples)
{
unsigned char res[4];
unsigned char exp1[4] = {136, 133, 132, 130};
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 1.25f, 0.625f);
EXPECT_EQ_ARRAY(exp1, res, 4);
unsigned char exp2[4] = {219, 191, 167, 142};
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 1.4f, 0.1f);
EXPECT_EQ_ARRAY(exp2, res, 4);
}
TEST(math_interp, BilinearCharPartiallyOutsideImage)
{
unsigned char res[4];
unsigned char exp1[4] = {1, 1, 2, 2};
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, -0.5f, 2.0f);
EXPECT_EQ_ARRAY(exp1, res, 4);
unsigned char exp2[4] = {9, 11, 15, 22};
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 1.25f, 2.9f);
EXPECT_EQ_ARRAY(exp2, res, 4);
unsigned char exp3[4] = {173, 115, 65, 14};
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 2.2f, -0.1f);
EXPECT_EQ_ARRAY(exp3, res, 4);
}
TEST(math_interp, BilinearCharFullyOutsideImage)
{
unsigned char res[4];
unsigned char exp[4] = {0, 0, 0, 0};
/* Out of range on U */
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, -1.5f, 0);
EXPECT_EQ_ARRAY(exp, res, 4);
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, -1.1f, 0);
EXPECT_EQ_ARRAY(exp, res, 4);
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 3, 0);
EXPECT_EQ_ARRAY(exp, res, 4);
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 5, 0);
EXPECT_EQ_ARRAY(exp, res, 4);
/* Out of range on V */
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 0, -3.2f);
EXPECT_EQ_ARRAY(exp, res, 4);
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 0, -1.5f);
EXPECT_EQ_ARRAY(exp, res, 4);
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 0, 3.1f);
EXPECT_EQ_ARRAY(exp, res, 4);
BLI_bilinear_interpolation_char(image_char[0][0], res, image_width, image_height, 0, 500.0f);
EXPECT_EQ_ARRAY(exp, res, 4);
}

View File

@ -684,10 +684,11 @@ void nearest_interpolation_color_wrap(
const struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v);
void bilinear_interpolation_color(
const struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v);
void bilinear_interpolation_color_char(
const struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v);
void bilinear_interpolation_color_fl(
const struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v);
void bilinear_interpolation_color_char(const struct ImBuf *in,
unsigned char outI[4],
float u,
float v);
void bilinear_interpolation_color_fl(const struct ImBuf *in, float outF[4], float u, float v);
/**
* Note about wrapping, the u/v still needs to be within the image bounds,
* just the interpolation is wrapped.

View File

@ -83,7 +83,7 @@ void bicubic_interpolation_color(const ImBuf *in, uchar outI[4], float outF[4],
BLI_bicubic_interpolation_fl(in->float_buffer.data, outF, in->x, in->y, 4, u, v);
}
else {
BLI_bicubic_interpolation_char(in->byte_buffer.data, outI, in->x, in->y, 4, u, v);
BLI_bicubic_interpolation_char(in->byte_buffer.data, outI, in->x, in->y, u, v);
}
}
@ -108,20 +108,18 @@ void bicubic_interpolation(const ImBuf *in, ImBuf *out, float u, float v, int xo
/** \name Bi-Linear Interpolation
* \{ */
void bilinear_interpolation_color_fl(
const ImBuf *in, uchar /*outI*/[4], float outF[4], float u, float v)
void bilinear_interpolation_color_fl(const ImBuf *in, float outF[4], float u, float v)
{
BLI_assert(outF);
BLI_assert(in->float_buffer.data);
BLI_bilinear_interpolation_fl(in->float_buffer.data, outF, in->x, in->y, 4, u, v);
}
void bilinear_interpolation_color_char(
const ImBuf *in, uchar outI[4], float /*outF*/[4], float u, float v)
void bilinear_interpolation_color_char(const ImBuf *in, uchar outI[4], float u, float v)
{
BLI_assert(outI);
BLI_assert(in->byte_buffer.data);
BLI_bilinear_interpolation_char(in->byte_buffer.data, outI, in->x, in->y, 4, u, v);
BLI_bilinear_interpolation_char(in->byte_buffer.data, outI, in->x, in->y, u, v);
}
void bilinear_interpolation_color(const ImBuf *in, uchar outI[4], float outF[4], float u, float v)
@ -130,7 +128,7 @@ void bilinear_interpolation_color(const ImBuf *in, uchar outI[4], float outF[4],
BLI_bilinear_interpolation_fl(in->float_buffer.data, outF, in->x, in->y, 4, u, v);
}
else {
BLI_bilinear_interpolation_char(in->byte_buffer.data, outI, in->x, in->y, 4, u, v);
BLI_bilinear_interpolation_char(in->byte_buffer.data, outI, in->x, in->y, u, v);
}
}

View File

@ -1762,7 +1762,7 @@ static void *do_scale_thread(void *data_v)
if (data->byte_buffer) {
uchar *pixel = data->byte_buffer + 4 * offset;
BLI_bilinear_interpolation_char(ibuf->byte_buffer.data, pixel, ibuf->x, ibuf->y, 4, u, v);
BLI_bilinear_interpolation_char(ibuf->byte_buffer.data, pixel, ibuf->x, ibuf->y, u, v);
}
if (data->float_buffer) {

View File

@ -145,34 +145,16 @@ struct TransformUserData {
}
};
/**
* \brief Base class for source discarding.
*
* The class decides if a specific uv coordinate from the source buffer should be ignored.
* This is used to mix multiple images over a single output buffer. Discarded pixels will
* not change the output buffer.
*/
class BaseDiscard {
public:
virtual ~BaseDiscard() = default;
/**
* \brief Should the source pixel at the given uv coordinate be discarded.
*/
virtual bool should_discard(const TransformUserData &user_data, const double2 &uv) = 0;
};
/**
* \brief Crop uv-coordinates that are outside the user data src_crop rect.
*/
class CropSource : public BaseDiscard {
public:
struct CropSource {
/**
* \brief Should the source pixel at the given uv coordinate be discarded.
*
* Uses user_data.src_crop to determine if the uv coordinate should be skipped.
*/
bool should_discard(const TransformUserData &user_data, const double2 &uv) override
static bool should_discard(const TransformUserData &user_data, const double2 &uv)
{
return uv.x < user_data.src_crop.xmin || uv.x >= user_data.src_crop.xmax ||
uv.y < user_data.src_crop.ymin || uv.y >= user_data.src_crop.ymax;
@ -182,14 +164,13 @@ class CropSource : public BaseDiscard {
/**
* \brief Discard that does not discard anything.
*/
class NoDiscard : public BaseDiscard {
public:
struct NoDiscard {
/**
* \brief Should the source pixel at the given uv coordinate be discarded.
*
* Will never discard any pixels.
*/
bool should_discard(const TransformUserData & /*user_data*/, const double2 & /*uv*/) override
static bool should_discard(const TransformUserData & /*user_data*/, const double2 & /*uv*/)
{
return false;
}
@ -250,73 +231,19 @@ class PixelPointer {
};
/**
* \brief Wrapping mode for the uv coordinates.
*
* Subclasses have the ability to change the UV coordinates when sampling the source buffer.
* \brief Repeats UV coordinate.
*/
class BaseUVWrapping {
public:
/**
* \brief modify the given u coordinate.
*/
virtual double modify_u(const ImBuf *source_buffer, double u) = 0;
/**
* \brief modify the given v coordinate.
*/
virtual double modify_v(const ImBuf *source_buffer, double v) = 0;
/**
* \brief modify the given uv coordinate.
*/
double2 modify_uv(const ImBuf *source_buffer, const double2 &uv)
{
return double2(modify_u(source_buffer, uv.x), modify_v(source_buffer, uv.y));
}
};
/**
* \brief UVWrapping method that does not modify the UV coordinates.
*/
class PassThroughUV : public BaseUVWrapping {
public:
double modify_u(const ImBuf * /*source_buffer*/, double u) override
{
return u;
}
double modify_v(const ImBuf * /*source_buffer*/, double v) override
{
return v;
}
};
/**
* \brief UVWrapping method that wrap repeats the UV coordinates.
*/
class WrapRepeatUV : public BaseUVWrapping {
public:
double modify_u(const ImBuf *source_buffer, double u) override
{
int x = int(floor(u));
x = x % source_buffer->x;
static float wrap_uv(float value, int size)
{
int x = int(floorf(value));
if (UNLIKELY(x < 0 || x >= size)) {
x %= size;
if (x < 0) {
x += source_buffer->x;
x += size;
}
return x;
}
double modify_v(const ImBuf *source_buffer, double v) override
{
int y = int(floor(v));
y = y % source_buffer->y;
if (y < 0) {
y += source_buffer->y;
}
return y;
}
};
return x;
}
/* TODO: should we use math_vectors for this. */
template<typename StorageType, int NumChannels>
@ -369,14 +296,10 @@ template<
*/
int NumChannels,
/**
* \brief Wrapping method to perform
*
* Should be a subclass of BaseUVWrapper
* \brief Should UVs wrap
*/
typename UVWrapping>
bool UVWrapping>
class Sampler {
UVWrapping uv_wrapper;
public:
using ChannelType = StorageType;
static const int ChannelLen = NumChannels;
@ -384,26 +307,29 @@ class Sampler {
void sample(const ImBuf *source, const double2 &uv, SampleType &r_sample)
{
float u = float(uv.x);
float v = float(uv.y);
if constexpr (UVWrapping) {
u = wrap_uv(u, source->x);
v = wrap_uv(v, source->y);
}
if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v<StorageType, float> &&
NumChannels == 4)
{
const double2 wrapped_uv = uv_wrapper.modify_uv(source, uv);
bilinear_interpolation_color_fl(source, nullptr, r_sample.data(), UNPACK2(wrapped_uv));
bilinear_interpolation_color_fl(source, r_sample.data(), u, v);
}
else if constexpr (Filter == IMB_FILTER_NEAREST && std::is_same_v<StorageType, uchar> &&
NumChannels == 4)
{
const double2 wrapped_uv = uv_wrapper.modify_uv(source, uv);
nearest_interpolation_color_char(source, r_sample.data(), nullptr, UNPACK2(wrapped_uv));
nearest_interpolation_color_char(source, r_sample.data(), nullptr, u, v);
}
else if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v<StorageType, uchar> &&
NumChannels == 4)
{
const double2 wrapped_uv = uv_wrapper.modify_uv(source, uv);
bilinear_interpolation_color_char(source, r_sample.data(), nullptr, UNPACK2(wrapped_uv));
bilinear_interpolation_color_char(source, r_sample.data(), u, v);
}
else if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v<StorageType, float>) {
if constexpr (std::is_same_v<UVWrapping, WrapRepeatUV>) {
if constexpr (UVWrapping) {
BLI_bilinear_interpolation_wrap_fl(source->float_buffer.data,
r_sample.data(),
source->x,
@ -414,18 +340,12 @@ class Sampler {
true);
}
else {
const double2 wrapped_uv = uv_wrapper.modify_uv(source, uv);
BLI_bilinear_interpolation_fl(source->float_buffer.data,
r_sample.data(),
source->x,
source->y,
NumChannels,
UNPACK2(wrapped_uv));
BLI_bilinear_interpolation_fl(
source->float_buffer.data, r_sample.data(), source->x, source->y, NumChannels, u, v);
}
}
else if constexpr (Filter == IMB_FILTER_NEAREST && std::is_same_v<StorageType, float>) {
const double2 wrapped_uv = uv_wrapper.modify_uv(source, uv);
sample_nearest_float(source, wrapped_uv, r_sample);
sample_nearest_float(source, u, v, r_sample);
}
else {
/* Unsupported sampler. */
@ -434,13 +354,16 @@ class Sampler {
}
private:
void sample_nearest_float(const ImBuf *source, const double2 &uv, SampleType &r_sample)
void sample_nearest_float(const ImBuf *source,
const float u,
const float v,
SampleType &r_sample)
{
BLI_STATIC_ASSERT(std::is_same_v<StorageType, float>);
/* ImBuf in must have a valid rect or rect_float, assume this is already checked */
int x1 = int(uv.x);
int y1 = int(uv.y);
int x1 = int(u);
int y1 = int(v);
/* Break when sample outside image is requested. */
if (x1 < 0 || x1 >= source->x || y1 < 0 || y1 >= source->y) {
@ -537,9 +460,7 @@ class ChannelConverter {
*/
template<
/**
* \brief Discard function to use.
*
* \attention Should be a subclass of BaseDiscard.
* \brief Discard functor that implements `should_discard`.
*/
typename Discard,
@ -659,17 +580,17 @@ ScanlineThreadFunc get_scanline_function(const eIMBTransformMode mode)
case IMB_TRANSFORM_MODE_REGULAR:
return transform_scanline_function<
ScanlineProcessor<NoDiscard,
Sampler<Filter, StorageType, SourceNumChannels, PassThroughUV>,
Sampler<Filter, StorageType, SourceNumChannels, false>,
PixelPointer<StorageType, DestinationNumChannels>>>;
case IMB_TRANSFORM_MODE_CROP_SRC:
return transform_scanline_function<
ScanlineProcessor<CropSource,
Sampler<Filter, StorageType, SourceNumChannels, PassThroughUV>,
Sampler<Filter, StorageType, SourceNumChannels, false>,
PixelPointer<StorageType, DestinationNumChannels>>>;
case IMB_TRANSFORM_MODE_WRAP_REPEAT:
return transform_scanline_function<
ScanlineProcessor<NoDiscard,
Sampler<Filter, StorageType, SourceNumChannels, WrapRepeatUV>,
Sampler<Filter, StorageType, SourceNumChannels, true>,
PixelPointer<StorageType, DestinationNumChannels>>>;
}