VSE: bilinear upscaling no longer adds transparent border around the image #117717
|
@ -142,7 +142,7 @@ inline void interpolate_nearest_wrap_fl(
|
|||
}
|
||||
|
||||
/**
|
||||
* Bilinear sampling.
|
||||
* Bilinear sampling (with black border).
|
||||
*
|
||||
* Takes four image samples at floor(u,v) and floor(u,v)+1, and blends them
|
||||
* based on fractional parts of u,v. Samples outside the image are turned
|
||||
|
@ -161,6 +161,26 @@ inline void interpolate_nearest_wrap_fl(
|
|||
void interpolate_bilinear_border_fl(
|
||||
const float *buffer, float *output, int width, int height, int components, float u, float v);
|
||||
|
||||
/**
|
||||
* Bilinear sampling.
|
||||
*
|
||||
* Takes four image samples at floor(u,v) and floor(u,v)+1, and blends them
|
||||
* based on fractional parts of u,v.
|
||||
* Samples outside the image are clamped to texels at image edge.
|
||||
*
|
||||
* Note that you probably want to subtract 0.5 from u,v before this function,
|
||||
* to get proper filtering.
|
||||
*/
|
||||
|
||||
[[nodiscard]] uchar4 interpolate_bilinear_byte(
|
||||
const uchar *buffer, int width, int height, float u, float v);
|
||||
|
||||
[[nodiscard]] float4 interpolate_bilinear_fl(
|
||||
const float *buffer, int width, int height, float u, float v);
|
||||
|
||||
void interpolate_bilinear_fl(
|
||||
const float *buffer, float *output, int width, int height, int components, float u, float v);
|
||||
|
||||
/**
|
||||
* Wrapped bilinear sampling. (u,v) is repeated to be inside the image size,
|
||||
* including properly wrapping samples that are right on the edges.
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#include "BLI_simd.h"
|
||||
#include "BLI_strict_flags.h"
|
||||
|
||||
namespace blender::math {
|
||||
|
||||
enum class eCubicFilter {
|
||||
BSpline,
|
||||
Mitchell,
|
||||
|
@ -24,7 +26,7 @@ enum class eCubicFilter {
|
|||
|
||||
/* Calculate cubic filter coefficients, for samples at -1,0,+1,+2.
|
||||
* f is 0..1 offset from texel center in pixel space. */
|
||||
template<enum eCubicFilter filter> static blender::float4 cubic_filter_coefficients(float f)
|
||||
template<enum eCubicFilter filter> static float4 cubic_filter_coefficients(float f)
|
||||
{
|
||||
float f2 = f * f;
|
||||
float f3 = f2 * f;
|
||||
|
@ -35,7 +37,7 @@ template<enum eCubicFilter filter> static blender::float4 cubic_filter_coefficie
|
|||
float w0 = -w3 + f2 * 0.5f - f * 0.5f + 1.0f / 6.0f;
|
||||
float w1 = f3 * 0.5f - f2 * 1.0f + 2.0f / 3.0f;
|
||||
float w2 = 1.0f - w0 - w1 - w3;
|
||||
return blender::float4(w0, w1, w2, w3);
|
||||
return float4(w0, w1, w2, w3);
|
||||
}
|
||||
else if constexpr (filter == eCubicFilter::Mitchell) {
|
||||
/* Cubic Mitchell-Netravali filter with B=1/3, C=1/3 parameters. */
|
||||
|
@ -43,7 +45,7 @@ template<enum eCubicFilter filter> static blender::float4 cubic_filter_coefficie
|
|||
float w1 = 7.0f / 6.0f * f3 - 2.0f * f2 + 8.0f / 9.0f;
|
||||
float w2 = -7.0f / 6.0f * f3 + 3.0f / 2.0f * f2 + 0.5f * f + 1.0f / 18.0f;
|
||||
float w3 = 7.0f / 18.0f * f3 - 1.0f / 3.0f * f2;
|
||||
return blender::float4(w0, w1, w2, w3);
|
||||
return float4(w0, w1, w2, w3);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -52,15 +54,17 @@ template<enum eCubicFilter filter> static blender::float4 cubic_filter_coefficie
|
|||
# include <smmintrin.h> /* _mm_floor_ps */
|
||||
# endif
|
||||
|
||||
/* Functions below are hard to express before SSE4. If compiling to that
|
||||
* or NEON via sse2neon, just use the simple forms. On SSE2, do it the
|
||||
* hard way. */
|
||||
|
||||
BLI_INLINE __m128 floor_simd(__m128 v)
|
||||
{
|
||||
# if defined(__SSE4_1__) || defined(__ARM_NEON) && defined(WITH_SSE2NEON)
|
||||
/* If we're on SSE4 or ARM NEON, just use the simple floor() way. */
|
||||
__m128 v_floor = _mm_floor_ps(v);
|
||||
# else
|
||||
/* The hard way: truncate, for negative inputs this will round towards zero.
|
||||
* Then compare with input, and subtract 1 for the inputs that were
|
||||
* negative. */
|
||||
/* Truncate, for negative inputs this will round towards zero. Then compare
|
||||
* with input, and subtract 1 for the inputs that were negative. */
|
||||
__m128 v_trunc = _mm_cvtepi32_ps(_mm_cvttps_epi32(v));
|
||||
__m128 v_neg = _mm_cmplt_ps(v, v_trunc);
|
||||
__m128 v_floor = _mm_sub_ps(v_trunc, _mm_and_ps(v_neg, _mm_set1_ps(1.0f)));
|
||||
|
@ -68,6 +72,30 @@ BLI_INLINE __m128 floor_simd(__m128 v)
|
|||
return v_floor;
|
||||
}
|
||||
|
||||
BLI_INLINE __m128i min_i_simd(__m128i a, __m128i b)
|
||||
{
|
||||
# if defined(__SSE4_1__) || defined(__ARM_NEON) && defined(WITH_SSE2NEON)
|
||||
aras_p marked this conversation as resolved
|
||||
return _mm_min_epi32(a, b);
|
||||
# else
|
||||
__m128i cmp = _mm_cmplt_epi32(a, b);
|
||||
a = _mm_and_si128(cmp, a);
|
||||
b = _mm_andnot_si128(cmp, b);
|
||||
return _mm_or_si128(a, b);
|
||||
# endif
|
||||
}
|
||||
|
||||
BLI_INLINE __m128i max_i_simd(__m128i a, __m128i b)
|
||||
{
|
||||
# if defined(__SSE4_1__) || defined(__ARM_NEON) && defined(WITH_SSE2NEON)
|
||||
return _mm_max_epi32(a, b);
|
||||
# else
|
||||
__m128i cmp = _mm_cmplt_epi32(b, a);
|
||||
a = _mm_and_si128(cmp, a);
|
||||
b = _mm_andnot_si128(cmp, b);
|
||||
return _mm_or_si128(a, b);
|
||||
# endif
|
||||
}
|
||||
|
||||
template<eCubicFilter filter>
|
||||
BLI_INLINE void bicubic_interpolation_uchar_simd(
|
||||
const uchar *src_buffer, uchar *output, int width, int height, float u, float v)
|
||||
|
@ -90,8 +118,8 @@ BLI_INLINE void bicubic_interpolation_uchar_simd(
|
|||
__m128 frac_uv = _mm_sub_ps(uv, uv_floor);
|
||||
|
||||
/* Calculate pixel weights. */
|
||||
blender::float4 wx = cubic_filter_coefficients<filter>(_mm_cvtss_f32(frac_uv));
|
||||
blender::float4 wy = cubic_filter_coefficients<filter>(
|
||||
float4 wx = cubic_filter_coefficients<filter>(_mm_cvtss_f32(frac_uv));
|
||||
float4 wy = cubic_filter_coefficients<filter>(
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(frac_uv, frac_uv, 1)));
|
||||
|
||||
/* Read 4x4 source pixels and blend them. */
|
||||
|
@ -134,8 +162,6 @@ template<typename T, eCubicFilter filter>
|
|||
static void bicubic_interpolation(
|
||||
const T *src_buffer, T *output, int width, int height, int components, float u, float v)
|
||||
{
|
||||
using namespace blender;
|
||||
|
||||
BLI_assert(src_buffer && output);
|
||||
|
||||
#if BLI_HAVE_SSE2
|
||||
|
@ -234,6 +260,7 @@ static void bicubic_interpolation(
|
|||
}
|
||||
}
|
||||
|
||||
template<bool border>
|
||||
BLI_INLINE void bilinear_fl_impl(const float *buffer,
|
||||
float *output,
|
||||
int width,
|
||||
|
@ -288,33 +315,23 @@ BLI_INLINE void bilinear_fl_impl(const float *buffer,
|
|||
return;
|
||||
}
|
||||
|
||||
/* Sample including outside of edges of image. */
|
||||
if (x1 < 0 || y1 < 0) {
|
||||
row1 = empty;
|
||||
/* Sample locations. */
|
||||
if constexpr (border) {
|
||||
row1 = (x1 < 0 || y1 < 0) ? empty : buffer + (size_t(width) * y1 + x1) * components;
|
||||
row2 = (x1 < 0 || y2 > height - 1) ? empty : buffer + (size_t(width) * y2 + x1) * components;
|
||||
row3 = (x2 > width - 1 || y1 < 0) ? empty : buffer + (size_t(width) * y1 + x2) * components;
|
||||
row4 = (x2 > width - 1 || y2 > height - 1) ? empty :
|
||||
buffer + (size_t(width) * y2 + x2) * components;
|
||||
}
|
||||
else {
|
||||
row1 = buffer + width * y1 * components + components * x1;
|
||||
}
|
||||
|
||||
if (x1 < 0 || y2 > height - 1) {
|
||||
row2 = empty;
|
||||
}
|
||||
else {
|
||||
row2 = buffer + width * y2 * components + components * x1;
|
||||
}
|
||||
|
||||
if (x2 > width - 1 || y1 < 0) {
|
||||
row3 = empty;
|
||||
}
|
||||
else {
|
||||
row3 = buffer + width * y1 * components + components * x2;
|
||||
}
|
||||
|
||||
if (x2 > width - 1 || y2 > height - 1) {
|
||||
row4 = empty;
|
||||
}
|
||||
else {
|
||||
row4 = buffer + width * y2 * components + components * x2;
|
||||
x1 = blender::math::clamp(x1, 0, width - 1);
|
||||
x2 = blender::math::clamp(x2, 0, width - 1);
|
||||
y1 = blender::math::clamp(y1, 0, height - 1);
|
||||
y2 = blender::math::clamp(y2, 0, height - 1);
|
||||
row1 = buffer + (size_t(width) * y1 + x1) * components;
|
||||
row2 = buffer + (size_t(width) * y2 + x1) * components;
|
||||
row3 = buffer + (size_t(width) * y1 + x2) * components;
|
||||
row4 = buffer + (size_t(width) * y2 + x2) * components;
|
||||
}
|
||||
|
||||
a = u - uf;
|
||||
|
@ -355,24 +372,13 @@ BLI_INLINE void bilinear_fl_impl(const float *buffer,
|
|||
}
|
||||
}
|
||||
|
||||
namespace blender::math {
|
||||
|
||||
uchar4 interpolate_bilinear_border_byte(
|
||||
const uchar *buffer, int width, int height, float u, float v)
|
||||
template<bool border>
|
||||
BLI_INLINE uchar4 bilinear_byte_impl(const uchar *buffer, int width, int height, float u, float v)
|
||||
{
|
||||
BLI_assert(buffer);
|
||||
uchar4 res;
|
||||
|
||||
#if BLI_HAVE_SSE2
|
||||
/* Bilinear interpolation needs to read and blend four image pixels, while
|
||||
* also handling conditions of sample coordinate being outside of the
|
||||
* image, in which case black (all zeroes) should be used as the sample
|
||||
* contribution.
|
||||
*
|
||||
* Code below does all that without any branches, by making outside the
|
||||
* image sample locations still read the first pixel of the image, but
|
||||
* later making sure that the result is set to zero for that sample. */
|
||||
|
||||
__m128 uvuv = _mm_set_ps(v, u, v, u);
|
||||
__m128 uvuv_floor = floor_simd(uvuv);
|
||||
|
||||
|
@ -381,18 +387,42 @@ uchar4 interpolate_bilinear_border_byte(
|
|||
/* Check whether any of the coordinates are outside of the image. */
|
||||
__m128i size_minus_1 = _mm_sub_epi32(_mm_set_epi32(height, width, height, width),
|
||||
_mm_set1_epi32(1));
|
||||
__m128i too_lo_xy12 = _mm_cmplt_epi32(xy12, _mm_setzero_si128());
|
||||
__m128i too_hi_xy12 = _mm_cmplt_epi32(size_minus_1, xy12);
|
||||
__m128i invalid_xy12 = _mm_or_si128(too_lo_xy12, too_hi_xy12);
|
||||
|
||||
/* Samples 1,2,3,4 are in this order: x1y1, x1y2, x2y1, x2y2 */
|
||||
__m128i x1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
__m128i y1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(3, 1, 3, 1));
|
||||
__m128i invalid_1234 = _mm_or_si128(_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(2, 2, 0, 0)),
|
||||
_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(3, 1, 3, 1)));
|
||||
/* Set x & y to zero for invalid samples. */
|
||||
x1234 = _mm_andnot_si128(invalid_1234, x1234);
|
||||
y1234 = _mm_andnot_si128(invalid_1234, y1234);
|
||||
/* Samples 1,2,3,4 will be in this order: x1y1, x1y2, x2y1, x2y2. */
|
||||
__m128i x1234, y1234, invalid_1234;
|
||||
|
||||
if constexpr (border) {
|
||||
/* Blend black colors for samples right outside the image: figure out
|
||||
* which of the 4 samples were outside, set their coordinates to zero
|
||||
* and later on put black color into their place. */
|
||||
__m128i too_lo_xy12 = _mm_cmplt_epi32(xy12, _mm_setzero_si128());
|
||||
__m128i too_hi_xy12 = _mm_cmplt_epi32(size_minus_1, xy12);
|
||||
__m128i invalid_xy12 = _mm_or_si128(too_lo_xy12, too_hi_xy12);
|
||||
|
||||
/* Samples 1,2,3,4 are in this order: x1y1, x1y2, x2y1, x2y2 */
|
||||
x1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
y1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(3, 1, 3, 1));
|
||||
invalid_1234 = _mm_or_si128(_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(2, 2, 0, 0)),
|
||||
_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(3, 1, 3, 1)));
|
||||
/* Set x & y to zero for invalid samples. */
|
||||
x1234 = _mm_andnot_si128(invalid_1234, x1234);
|
||||
y1234 = _mm_andnot_si128(invalid_1234, y1234);
|
||||
}
|
||||
else {
|
||||
/* Clamp samples to image edges, unless all four of them are outside
|
||||
* in which case return black. */
|
||||
__m128i xy12_clamped = max_i_simd(xy12, _mm_setzero_si128());
|
||||
xy12_clamped = min_i_simd(xy12_clamped, size_minus_1);
|
||||
__m128i valid_xy12 = _mm_cmpeq_epi32(xy12, xy12_clamped);
|
||||
__m128i valid_pairs = _mm_and_si128(valid_xy12,
|
||||
_mm_shuffle_epi32(valid_xy12, _MM_SHUFFLE(0, 3, 2, 1)));
|
||||
if (_mm_movemask_ps(_mm_castsi128_ps(valid_pairs)) == 0) {
|
||||
return uchar4(0);
|
||||
}
|
||||
|
||||
x1234 = _mm_shuffle_epi32(xy12_clamped, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
y1234 = _mm_shuffle_epi32(xy12_clamped, _MM_SHUFFLE(3, 1, 3, 1));
|
||||
}
|
||||
|
||||
/* Read the four sample values. Do address calculations in C, since SSE
|
||||
* before 4.1 makes it very cumbersome to do full integer multiplies. */
|
||||
|
@ -405,8 +435,10 @@ uchar4 interpolate_bilinear_border_byte(
|
|||
int sample3 = ((const int *)buffer)[ycoord[2] * int64_t(width) + xcoord[2]];
|
||||
int sample4 = ((const int *)buffer)[ycoord[3] * int64_t(width) + xcoord[3]];
|
||||
__m128i samples1234 = _mm_set_epi32(sample4, sample3, sample2, sample1);
|
||||
/* Set samples to black for the ones that were actually invalid. */
|
||||
samples1234 = _mm_andnot_si128(invalid_1234, samples1234);
|
||||
if constexpr (border) {
|
||||
/* Set samples to black for the ones that were actually invalid. */
|
||||
samples1234 = _mm_andnot_si128(invalid_1234, samples1234);
|
||||
}
|
||||
|
||||
/* Expand samples from packed 8-bit RGBA to full floats:
|
||||
* spread to 16 bit values. */
|
||||
|
@ -503,17 +535,41 @@ uchar4 interpolate_bilinear_border_byte(
|
|||
return res;
|
||||
}
|
||||
|
||||
uchar4 interpolate_bilinear_border_byte(
|
||||
const uchar *buffer, int width, int height, float u, float v)
|
||||
{
|
||||
return bilinear_byte_impl<true>(buffer, width, height, u, v);
|
||||
}
|
||||
|
||||
uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, float u, float v)
|
||||
{
|
||||
return bilinear_byte_impl<false>(buffer, width, height, u, v);
|
||||
}
|
||||
|
||||
float4 interpolate_bilinear_border_fl(const float *buffer, int width, int height, float u, float v)
|
||||
{
|
||||
float4 res;
|
||||
bilinear_fl_impl(buffer, res, width, height, 4, u, v);
|
||||
bilinear_fl_impl<true>(buffer, res, width, height, 4, u, v);
|
||||
return res;
|
||||
}
|
||||
|
||||
void interpolate_bilinear_border_fl(
|
||||
const float *buffer, float *output, int width, int height, int components, float u, float v)
|
||||
{
|
||||
bilinear_fl_impl(buffer, output, width, height, components, u, v);
|
||||
bilinear_fl_impl<true>(buffer, output, width, height, components, u, v);
|
||||
}
|
||||
|
||||
float4 interpolate_bilinear_fl(const float *buffer, int width, int height, float u, float v)
|
||||
{
|
||||
float4 res;
|
||||
bilinear_fl_impl<false>(buffer, res, width, height, 4, u, v);
|
||||
return res;
|
||||
}
|
||||
|
||||
void interpolate_bilinear_fl(
|
||||
const float *buffer, float *output, int width, int height, int components, float u, float v)
|
||||
{
|
||||
bilinear_fl_impl<false>(buffer, output, width, height, components, u, v);
|
||||
}
|
||||
|
||||
void interpolate_bilinear_wrap_fl(const float *buffer,
|
||||
|
@ -526,7 +582,7 @@ void interpolate_bilinear_wrap_fl(const float *buffer,
|
|||
bool wrap_x,
|
||||
bool wrap_y)
|
||||
{
|
||||
bilinear_fl_impl(buffer, output, width, height, components, u, v, wrap_x, wrap_y);
|
||||
bilinear_fl_impl<false>(buffer, output, width, height, components, u, v, wrap_x, wrap_y);
|
||||
}
|
||||
|
||||
uchar4 interpolate_bilinear_wrap_byte(const uchar *buffer, int width, int height, float u, float v)
|
||||
|
@ -574,7 +630,7 @@ uchar4 interpolate_bilinear_wrap_byte(const uchar *buffer, int width, int height
|
|||
float4 interpolate_bilinear_wrap_fl(const float *buffer, int width, int height, float u, float v)
|
||||
{
|
||||
float4 res;
|
||||
bilinear_fl_impl(buffer, res, width, height, 4, u, v, true, true);
|
||||
bilinear_fl_impl<false>(buffer, res, width, height, 4, u, v, true, true);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ TEST(math_interp, BilinearFloatSamples)
|
|||
EXPECT_V4_NEAR(exp2, res, float_tolerance);
|
||||
}
|
||||
|
||||
TEST(math_interp, BilinearCharPartiallyOutsideImage)
|
||||
TEST(math_interp, BilinearCharPartiallyOutsideImageBorder)
|
||||
{
|
||||
uchar4 res;
|
||||
uchar4 exp1 = {1, 1, 2, 2};
|
||||
|
@ -94,6 +94,20 @@ TEST(math_interp, BilinearCharPartiallyOutsideImage)
|
|||
EXPECT_EQ(exp3, res);
|
||||
}
|
||||
|
||||
TEST(math_interp, BilinearCharPartiallyOutsideImage)
|
||||
{
|
||||
uchar4 res;
|
||||
uint4 exp1 = {1, 2, 3, 4};
|
||||
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, -0.5f, 2.0f);
|
||||
EXPECT_EQ(exp1, uint4(res));
|
||||
uint4 exp2 = {87, 113, 147, 221};
|
||||
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 1.25f, 2.9f);
|
||||
EXPECT_EQ(exp2, uint4(res));
|
||||
uint4 exp3 = {240, 160, 90, 20};
|
||||
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 2.2f, -0.1f);
|
||||
EXPECT_EQ(exp3, uint4(res));
|
||||
}
|
||||
|
||||
TEST(math_interp, BilinearCharPartiallyOutsideImageWrap)
|
||||
{
|
||||
uchar4 res;
|
||||
|
@ -108,7 +122,7 @@ TEST(math_interp, BilinearCharPartiallyOutsideImageWrap)
|
|||
EXPECT_EQ(exp3, res);
|
||||
}
|
||||
|
||||
TEST(math_interp, BilinearFloatPartiallyOutsideImage)
|
||||
TEST(math_interp, BilinearFloatPartiallyOutsideImageBorder)
|
||||
{
|
||||
float4 res;
|
||||
float4 exp1 = {0.5f, 1, 1.5f, 2};
|
||||
|
@ -122,6 +136,20 @@ TEST(math_interp, BilinearFloatPartiallyOutsideImage)
|
|||
EXPECT_V4_NEAR(exp3, res, float_tolerance);
|
||||
}
|
||||
|
||||
TEST(math_interp, BilinearFloatPartiallyOutsideImage)
|
||||
{
|
||||
float4 res;
|
||||
float4 exp1 = {1.0f, 2.0f, 3.0f, 4.0f};
|
||||
res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, -0.5f, 2.0f);
|
||||
EXPECT_V4_NEAR(exp1, res, float_tolerance);
|
||||
float4 exp2 = {86.75f, 113.25f, 147.25f, 221.0f};
|
||||
res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 1.25f, 2.9f);
|
||||
EXPECT_V4_NEAR(exp2, res, float_tolerance);
|
||||
float4 exp3 = {240.0f, 160.0f, 90.0f, 20.0f};
|
||||
res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 2.2f, -0.1f);
|
||||
EXPECT_V4_NEAR(exp3, res, float_tolerance);
|
||||
}
|
||||
|
||||
TEST(math_interp, BilinearFloatPartiallyOutsideImageWrap)
|
||||
{
|
||||
float4 res;
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
|
||||
namespace blender::imbuf {
|
||||
|
||||
/* Nearest sampling. */
|
||||
|
||||
[[nodiscard]] inline uchar4 interpolate_nearest_byte(const ImBuf *in, float u, float v)
|
||||
{
|
||||
return math::interpolate_nearest_byte(in->byte_buffer.data, in->x, in->y, u, v);
|
||||
|
@ -35,6 +37,8 @@ inline void interpolate_nearest_fl(const ImBuf *in, float output[4], float u, fl
|
|||
math::interpolate_nearest_fl(in->float_buffer.data, output, in->x, in->y, 4, u, v);
|
||||
}
|
||||
|
||||
/* Nearest sampling with UV wrapping. */
|
||||
|
||||
[[nodiscard]] inline uchar4 interpolate_nearest_wrap_byte(const ImBuf *in, float u, float v)
|
||||
{
|
||||
return math::interpolate_nearest_wrap_byte(in->byte_buffer.data, in->x, in->y, u, v);
|
||||
|
@ -44,6 +48,29 @@ inline void interpolate_nearest_fl(const ImBuf *in, float output[4], float u, fl
|
|||
return math::interpolate_nearest_wrap_fl(in->float_buffer.data, in->x, in->y, u, v);
|
||||
}
|
||||
|
||||
/* Bilinear sampling. */
|
||||
|
||||
[[nodiscard]] inline uchar4 interpolate_bilinear_byte(const ImBuf *in, float u, float v)
|
||||
{
|
||||
return math::interpolate_bilinear_byte(in->byte_buffer.data, in->x, in->y, u, v);
|
||||
}
|
||||
[[nodiscard]] inline float4 interpolate_bilinear_fl(const ImBuf *in, float u, float v)
|
||||
{
|
||||
return math::interpolate_bilinear_fl(in->float_buffer.data, in->x, in->y, u, v);
|
||||
}
|
||||
inline void interpolate_bilinear_byte(const ImBuf *in, uchar output[4], float u, float v)
|
||||
{
|
||||
uchar4 col = math::interpolate_bilinear_byte(in->byte_buffer.data, in->x, in->y, u, v);
|
||||
memcpy(output, &col, sizeof(col));
|
||||
}
|
||||
inline void interpolate_bilinear_fl(const ImBuf *in, float output[4], float u, float v)
|
||||
{
|
||||
float4 col = math::interpolate_bilinear_fl(in->float_buffer.data, in->x, in->y, u, v);
|
||||
memcpy(output, &col, sizeof(col));
|
||||
}
|
||||
|
||||
/* Bilinear sampling, samples near edge blend into transparency. */
|
||||
|
||||
[[nodiscard]] inline uchar4 interpolate_bilinear_border_byte(const ImBuf *in, float u, float v)
|
||||
{
|
||||
return math::interpolate_bilinear_border_byte(in->byte_buffer.data, in->x, in->y, u, v);
|
||||
|
@ -63,6 +90,8 @@ inline void interpolate_bilinear_border_fl(const ImBuf *in, float output[4], flo
|
|||
memcpy(output, &col, sizeof(col));
|
||||
}
|
||||
|
||||
/* Bilinear sampling with UV wrapping. */
|
||||
|
||||
[[nodiscard]] inline uchar4 interpolate_bilinear_wrap_byte(const ImBuf *in, float u, float v)
|
||||
{
|
||||
return math::interpolate_bilinear_wrap_byte(in->byte_buffer.data, in->x, in->y, u, v);
|
||||
|
@ -72,6 +101,8 @@ inline void interpolate_bilinear_border_fl(const ImBuf *in, float output[4], flo
|
|||
return math::interpolate_bilinear_wrap_fl(in->float_buffer.data, in->x, in->y, u, v);
|
||||
}
|
||||
|
||||
/* Cubic B-Spline sampling. */
|
||||
|
||||
[[nodiscard]] inline uchar4 interpolate_cubic_bspline_byte(const ImBuf *in, float u, float v)
|
||||
{
|
||||
return math::interpolate_cubic_bspline_byte(in->byte_buffer.data, in->x, in->y, u, v);
|
||||
|
@ -91,6 +122,8 @@ inline void interpolate_cubic_bspline_fl(const ImBuf *in, float output[4], float
|
|||
memcpy(output, &col, sizeof(col));
|
||||
}
|
||||
|
||||
/* Cubic Mitchell sampling. */
|
||||
|
||||
[[nodiscard]] inline uchar4 interpolate_cubic_mitchell_byte(const ImBuf *in, float u, float v)
|
||||
{
|
||||
return math::interpolate_cubic_mitchell_byte(in->byte_buffer.data, in->x, in->y, u, v);
|
||||
|
|
|
@ -136,7 +136,7 @@ static void sample_image(const ImBuf *source, float u, float v, T *r_sample)
|
|||
v -= 0.5f;
|
||||
}
|
||||
if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v<T, float> && NumChannels == 4) {
|
||||
interpolate_bilinear_border_fl(source, r_sample, u, v);
|
||||
interpolate_bilinear_fl(source, r_sample, u, v);
|
||||
}
|
||||
else if constexpr (Filter == IMB_FILTER_NEAREST && std::is_same_v<T, uchar> && NumChannels == 4)
|
||||
{
|
||||
|
@ -144,7 +144,7 @@ static void sample_image(const ImBuf *source, float u, float v, T *r_sample)
|
|||
}
|
||||
else if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v<T, uchar> && NumChannels == 4)
|
||||
{
|
||||
interpolate_bilinear_border_byte(source, r_sample, u, v);
|
||||
interpolate_bilinear_byte(source, r_sample, u, v);
|
||||
}
|
||||
else if constexpr (Filter == IMB_FILTER_BILINEAR && std::is_same_v<T, float>) {
|
||||
if constexpr (WrapUV) {
|
||||
|
@ -159,7 +159,7 @@ static void sample_image(const ImBuf *source, float u, float v, T *r_sample)
|
|||
true);
|
||||
}
|
||||
else {
|
||||
math::interpolate_bilinear_border_fl(
|
||||
math::interpolate_bilinear_fl(
|
||||
source->float_buffer.data, r_sample, source->x, source->y, NumChannels, u, v);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
This check is coming from some older code, but it seems to be spreading out in a lot of other cases. The confusing part of it is the order of operations. Can we make them explicit, like
defined(__SSE4_1__) || (defined(__ARM_NEON) && defined(WITH_SSE2NEON))
?Added
BLI_HAVE_SSE4
toBLI_simd.h
and use that.