VSE: bilinear upscaling no longer adds transparent border around the image #117717

Merged
Aras Pranckevicius merged 8 commits from aras_p/blender:vse_filter_aa into main 2024-02-02 16:29:01 +01:00
16 changed files with 393 additions and 155 deletions

View File

@ -1316,7 +1316,7 @@ static void tracking_stabilize_frame_interpolation_cb(void *__restrict userdata,
for (int x = 0; x < tmpibuf->x; x++, dst++) {
vec[0] = float(x);
mul_v3_m4v3(rvec, mat, vec);
*dst = imbuf::interpolate_bilinear_fl(ibuf, rvec[0], rvec[1]);
*dst = imbuf::interpolate_bilinear_border_fl(ibuf, rvec[0], rvec[1]);
}
}
else if (data->tracking_filter == TRACKING_FILTER_BICUBIC) {
@ -1342,7 +1342,7 @@ static void tracking_stabilize_frame_interpolation_cb(void *__restrict userdata,
for (int x = 0; x < tmpibuf->x; x++, dst++) {
vec[0] = float(x);
mul_v3_m4v3(rvec, mat, vec);
*dst = imbuf::interpolate_bilinear_byte(ibuf, rvec[0], rvec[1]);
*dst = imbuf::interpolate_bilinear_border_byte(ibuf, rvec[0], rvec[1]);
}
}
else if (data->tracking_filter == TRACKING_FILTER_BICUBIC) {

View File

@ -142,7 +142,7 @@ inline void interpolate_nearest_wrap_fl(
}
/**
* Bilinear sampling.
* Bilinear sampling (with black border).
*
* Takes four image samples at floor(u,v) and floor(u,v)+1, and blends them
* based on fractional parts of u,v. Samples outside the image are turned
@ -152,6 +152,26 @@ inline void interpolate_nearest_wrap_fl(
* to get proper filtering.
*/
[[nodiscard]] uchar4 interpolate_bilinear_border_byte(
const uchar *buffer, int width, int height, float u, float v);
[[nodiscard]] float4 interpolate_bilinear_border_fl(
const float *buffer, int width, int height, float u, float v);
void interpolate_bilinear_border_fl(
const float *buffer, float *output, int width, int height, int components, float u, float v);
/**
* Bilinear sampling.
*
* Takes four image samples at floor(u,v) and floor(u,v)+1, and blends them
* based on fractional parts of u,v.
* Samples outside the image are clamped to texels at image edge.
*
* Note that you probably want to subtract 0.5 from u,v before this function,
* to get proper filtering.
*/
[[nodiscard]] uchar4 interpolate_bilinear_byte(
const uchar *buffer, int width, int height, float u, float v);

View File

@ -30,3 +30,9 @@
#else
# define BLI_HAVE_SSE2 0
#endif
#if defined(__SSE4_1__) || (defined(__ARM_NEON) && defined(WITH_SSE2NEON))
# define BLI_HAVE_SSE4 1
#else
# define BLI_HAVE_SSE4 0
#endif

View File

@ -17,6 +17,8 @@
#include "BLI_simd.h"
#include "BLI_strict_flags.h"
namespace blender::math {
enum class eCubicFilter {
BSpline,
Mitchell,
@ -24,7 +26,7 @@ enum class eCubicFilter {
/* Calculate cubic filter coefficients, for samples at -1,0,+1,+2.
* f is 0..1 offset from texel center in pixel space. */
template<enum eCubicFilter filter> static blender::float4 cubic_filter_coefficients(float f)
template<enum eCubicFilter filter> static float4 cubic_filter_coefficients(float f)
{
float f2 = f * f;
float f3 = f2 * f;
@ -35,7 +37,7 @@ template<enum eCubicFilter filter> static blender::float4 cubic_filter_coefficie
float w0 = -w3 + f2 * 0.5f - f * 0.5f + 1.0f / 6.0f;
float w1 = f3 * 0.5f - f2 * 1.0f + 2.0f / 3.0f;
float w2 = 1.0f - w0 - w1 - w3;
return blender::float4(w0, w1, w2, w3);
return float4(w0, w1, w2, w3);
}
else if constexpr (filter == eCubicFilter::Mitchell) {
/* Cubic Mitchell-Netravali filter with B=1/3, C=1/3 parameters. */
@ -43,7 +45,7 @@ template<enum eCubicFilter filter> static blender::float4 cubic_filter_coefficie
float w1 = 7.0f / 6.0f * f3 - 2.0f * f2 + 8.0f / 9.0f;
float w2 = -7.0f / 6.0f * f3 + 3.0f / 2.0f * f2 + 0.5f * f + 1.0f / 18.0f;
float w3 = 7.0f / 18.0f * f3 - 1.0f / 3.0f * f2;
return blender::float4(w0, w1, w2, w3);
return float4(w0, w1, w2, w3);
}
}
@ -54,13 +56,11 @@ template<enum eCubicFilter filter> static blender::float4 cubic_filter_coefficie
BLI_INLINE __m128 floor_simd(__m128 v)
{
# if defined(__SSE4_1__) || defined(__ARM_NEON) && defined(WITH_SSE2NEON)
/* If we're on SSE4 or ARM NEON, just use the simple floor() way. */
# if BLI_HAVE_SSE4
__m128 v_floor = _mm_floor_ps(v);
# else
/* The hard way: truncate, for negative inputs this will round towards zero.
* Then compare with input, and subtract 1 for the inputs that were
* negative. */
/* Truncate, for negative inputs this will round towards zero. Then compare
* with input, and subtract 1 for the inputs that were negative. */
__m128 v_trunc = _mm_cvtepi32_ps(_mm_cvttps_epi32(v));
__m128 v_neg = _mm_cmplt_ps(v, v_trunc);
__m128 v_floor = _mm_sub_ps(v_trunc, _mm_and_ps(v_neg, _mm_set1_ps(1.0f)));
@ -68,6 +68,30 @@ BLI_INLINE __m128 floor_simd(__m128 v)
return v_floor;
}
BLI_INLINE __m128i min_i_simd(__m128i a, __m128i b)
{
# if BLI_HAVE_SSE4
return _mm_min_epi32(a, b);
# else
__m128i cmp = _mm_cmplt_epi32(a, b);
a = _mm_and_si128(cmp, a);
aras_p marked this conversation as resolved
Review

This check is coming from some older code, but it seems to be spreading out in a lot of other cases. The confusing part of it is the order of operations. Can we make them explicit, like defined(__SSE4_1__) || (defined(__ARM_NEON) && defined(WITH_SSE2NEON)) ?

This check is coming from some older code, but it seems to be spreading out in a lot of other cases. The confusing part of it is the order of operations. Can we make them explicit, like `defined(__SSE4_1__) || (defined(__ARM_NEON) && defined(WITH_SSE2NEON))` ?

Added BLI_HAVE_SSE4 to BLI_simd.h and use that.

Added `BLI_HAVE_SSE4` to `BLI_simd.h` and use that.
b = _mm_andnot_si128(cmp, b);
return _mm_or_si128(a, b);
# endif
}
BLI_INLINE __m128i max_i_simd(__m128i a, __m128i b)
{
# if BLI_HAVE_SSE4
return _mm_max_epi32(a, b);
# else
__m128i cmp = _mm_cmplt_epi32(b, a);
a = _mm_and_si128(cmp, a);
b = _mm_andnot_si128(cmp, b);
return _mm_or_si128(a, b);
# endif
}
template<eCubicFilter filter>
BLI_INLINE void bicubic_interpolation_uchar_simd(
const uchar *src_buffer, uchar *output, int width, int height, float u, float v)
@ -90,8 +114,8 @@ BLI_INLINE void bicubic_interpolation_uchar_simd(
__m128 frac_uv = _mm_sub_ps(uv, uv_floor);
/* Calculate pixel weights. */
blender::float4 wx = cubic_filter_coefficients<filter>(_mm_cvtss_f32(frac_uv));
blender::float4 wy = cubic_filter_coefficients<filter>(
float4 wx = cubic_filter_coefficients<filter>(_mm_cvtss_f32(frac_uv));
float4 wy = cubic_filter_coefficients<filter>(
_mm_cvtss_f32(_mm_shuffle_ps(frac_uv, frac_uv, 1)));
/* Read 4x4 source pixels and blend them. */
@ -134,8 +158,6 @@ template<typename T, eCubicFilter filter>
static void bicubic_interpolation(
const T *src_buffer, T *output, int width, int height, int components, float u, float v)
{
using namespace blender;
BLI_assert(src_buffer && output);
#if BLI_HAVE_SSE2
@ -234,6 +256,7 @@ static void bicubic_interpolation(
}
}
template<bool border>
BLI_INLINE void bilinear_fl_impl(const float *buffer,
float *output,
int width,
@ -288,33 +311,23 @@ BLI_INLINE void bilinear_fl_impl(const float *buffer,
return;
}
/* Sample including outside of edges of image. */
if (x1 < 0 || y1 < 0) {
row1 = empty;
/* Sample locations. */
if constexpr (border) {
row1 = (x1 < 0 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x1) * components;
row2 = (x1 < 0 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x1) * components;
row3 = (x2 > width - 1 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x2) * components;
row4 = (x2 > width - 1 || y2 > height - 1) ? empty :
buffer + (int64_t(width) * y2 + x2) * components;
}
else {
row1 = buffer + width * y1 * components + components * x1;
}
if (x1 < 0 || y2 > height - 1) {
row2 = empty;
}
else {
row2 = buffer + width * y2 * components + components * x1;
}
if (x2 > width - 1 || y1 < 0) {
row3 = empty;
}
else {
row3 = buffer + width * y1 * components + components * x2;
}
if (x2 > width - 1 || y2 > height - 1) {
row4 = empty;
}
else {
row4 = buffer + width * y2 * components + components * x2;
x1 = blender::math::clamp(x1, 0, width - 1);
x2 = blender::math::clamp(x2, 0, width - 1);
y1 = blender::math::clamp(y1, 0, height - 1);
y2 = blender::math::clamp(y2, 0, height - 1);
row1 = buffer + (int64_t(width) * y1 + x1) * components;
row2 = buffer + (int64_t(width) * y2 + x1) * components;
row3 = buffer + (int64_t(width) * y1 + x2) * components;
row4 = buffer + (int64_t(width) * y2 + x2) * components;
}
a = u - uf;
@ -355,23 +368,13 @@ BLI_INLINE void bilinear_fl_impl(const float *buffer,
}
}
namespace blender::math {
uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, float u, float v)
template<bool border>
BLI_INLINE uchar4 bilinear_byte_impl(const uchar *buffer, int width, int height, float u, float v)
{
BLI_assert(buffer);
uchar4 res;
#if BLI_HAVE_SSE2
/* Bilinear interpolation needs to read and blend four image pixels, while
* also handling conditions of sample coordinate being outside of the
* image, in which case black (all zeroes) should be used as the sample
* contribution.
*
* Code below does all that without any branches, by making outside the
* image sample locations still read the first pixel of the image, but
* later making sure that the result is set to zero for that sample. */
__m128 uvuv = _mm_set_ps(v, u, v, u);
__m128 uvuv_floor = floor_simd(uvuv);
@ -380,18 +383,42 @@ uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, flo
/* Check whether any of the coordinates are outside of the image. */
__m128i size_minus_1 = _mm_sub_epi32(_mm_set_epi32(height, width, height, width),
_mm_set1_epi32(1));
__m128i too_lo_xy12 = _mm_cmplt_epi32(xy12, _mm_setzero_si128());
__m128i too_hi_xy12 = _mm_cmplt_epi32(size_minus_1, xy12);
__m128i invalid_xy12 = _mm_or_si128(too_lo_xy12, too_hi_xy12);
/* Samples 1,2,3,4 are in this order: x1y1, x1y2, x2y1, x2y2 */
__m128i x1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(2, 2, 0, 0));
__m128i y1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(3, 1, 3, 1));
__m128i invalid_1234 = _mm_or_si128(_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(2, 2, 0, 0)),
_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(3, 1, 3, 1)));
/* Set x & y to zero for invalid samples. */
x1234 = _mm_andnot_si128(invalid_1234, x1234);
y1234 = _mm_andnot_si128(invalid_1234, y1234);
/* Samples 1,2,3,4 will be in this order: x1y1, x1y2, x2y1, x2y2. */
__m128i x1234, y1234, invalid_1234;
if constexpr (border) {
/* Blend black colors for samples right outside the image: figure out
* which of the 4 samples were outside, set their coordinates to zero
* and later on put black color into their place. */
__m128i too_lo_xy12 = _mm_cmplt_epi32(xy12, _mm_setzero_si128());
__m128i too_hi_xy12 = _mm_cmplt_epi32(size_minus_1, xy12);
__m128i invalid_xy12 = _mm_or_si128(too_lo_xy12, too_hi_xy12);
/* Samples 1,2,3,4 are in this order: x1y1, x1y2, x2y1, x2y2 */
x1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(2, 2, 0, 0));
y1234 = _mm_shuffle_epi32(xy12, _MM_SHUFFLE(3, 1, 3, 1));
invalid_1234 = _mm_or_si128(_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(2, 2, 0, 0)),
_mm_shuffle_epi32(invalid_xy12, _MM_SHUFFLE(3, 1, 3, 1)));
/* Set x & y to zero for invalid samples. */
x1234 = _mm_andnot_si128(invalid_1234, x1234);
y1234 = _mm_andnot_si128(invalid_1234, y1234);
}
else {
/* Clamp samples to image edges, unless all four of them are outside
* in which case return black. */
__m128i xy12_clamped = max_i_simd(xy12, _mm_setzero_si128());
xy12_clamped = min_i_simd(xy12_clamped, size_minus_1);
__m128i valid_xy12 = _mm_cmpeq_epi32(xy12, xy12_clamped);
__m128i valid_pairs = _mm_and_si128(valid_xy12,
_mm_shuffle_epi32(valid_xy12, _MM_SHUFFLE(0, 3, 2, 1)));
if (_mm_movemask_ps(_mm_castsi128_ps(valid_pairs)) == 0) {
return uchar4(0);
}
x1234 = _mm_shuffle_epi32(xy12_clamped, _MM_SHUFFLE(2, 2, 0, 0));
y1234 = _mm_shuffle_epi32(xy12_clamped, _MM_SHUFFLE(3, 1, 3, 1));
}
/* Read the four sample values. Do address calculations in C, since SSE
* before 4.1 makes it very cumbersome to do full integer multiplies. */
@ -404,8 +431,10 @@ uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, flo
int sample3 = ((const int *)buffer)[ycoord[2] * int64_t(width) + xcoord[2]];
int sample4 = ((const int *)buffer)[ycoord[3] * int64_t(width) + xcoord[3]];
__m128i samples1234 = _mm_set_epi32(sample4, sample3, sample2, sample1);
/* Set samples to black for the ones that were actually invalid. */
samples1234 = _mm_andnot_si128(invalid_1234, samples1234);
if constexpr (border) {
/* Set samples to black for the ones that were actually invalid. */
samples1234 = _mm_andnot_si128(invalid_1234, samples1234);
}
/* Expand samples from packed 8-bit RGBA to full floats:
* spread to 16 bit values. */
@ -455,35 +484,24 @@ uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, flo
return uchar4(0);
}
/* Sample including outside of edges of image. */
/* Sample locations. */
const uchar *row1, *row2, *row3, *row4;
uchar empty[4] = {0, 0, 0, 0};
if (x1 < 0 || y1 < 0) {
row1 = empty;
if constexpr (border) {
row1 = (x1 < 0 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x1) * 4;
row2 = (x1 < 0 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x1) * 4;
row3 = (x2 > width - 1 || y1 < 0) ? empty : buffer + (int64_t(width) * y1 + x2) * 4;
row4 = (x2 > width - 1 || y2 > height - 1) ? empty : buffer + (int64_t(width) * y2 + x2) * 4;
}
else {
row1 = buffer + width * y1 * 4 + 4 * x1;
}
if (x1 < 0 || y2 > height - 1) {
row2 = empty;
}
else {
row2 = buffer + width * y2 * 4 + 4 * x1;
}
if (x2 > width - 1 || y1 < 0) {
row3 = empty;
}
else {
row3 = buffer + width * y1 * 4 + 4 * x2;
}
if (x2 > width - 1 || y2 > height - 1) {
row4 = empty;
}
else {
row4 = buffer + width * y2 * 4 + 4 * x2;
x1 = blender::math::clamp(x1, 0, width - 1);
x2 = blender::math::clamp(x2, 0, width - 1);
y1 = blender::math::clamp(y1, 0, height - 1);
y2 = blender::math::clamp(y2, 0, height - 1);
row1 = buffer + (int64_t(width) * y1 + x1) * 4;
row2 = buffer + (int64_t(width) * y2 + x1) * 4;
row3 = buffer + (int64_t(width) * y1 + x2) * 4;
row4 = buffer + (int64_t(width) * y2 + x2) * 4;
}
float a = u - uf;
@ -502,17 +520,41 @@ uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, flo
return res;
}
uchar4 interpolate_bilinear_border_byte(
const uchar *buffer, int width, int height, float u, float v)
{
return bilinear_byte_impl<true>(buffer, width, height, u, v);
}
uchar4 interpolate_bilinear_byte(const uchar *buffer, int width, int height, float u, float v)
{
return bilinear_byte_impl<false>(buffer, width, height, u, v);
}
float4 interpolate_bilinear_border_fl(const float *buffer, int width, int height, float u, float v)
{
float4 res;
bilinear_fl_impl<true>(buffer, res, width, height, 4, u, v);
return res;
}
void interpolate_bilinear_border_fl(
const float *buffer, float *output, int width, int height, int components, float u, float v)
{
bilinear_fl_impl<true>(buffer, output, width, height, components, u, v);
}
float4 interpolate_bilinear_fl(const float *buffer, int width, int height, float u, float v)
{
float4 res;
bilinear_fl_impl(buffer, res, width, height, 4, u, v);
bilinear_fl_impl<false>(buffer, res, width, height, 4, u, v);
return res;
}
void interpolate_bilinear_fl(
const float *buffer, float *output, int width, int height, int components, float u, float v)
{
bilinear_fl_impl(buffer, output, width, height, components, u, v);
bilinear_fl_impl<false>(buffer, output, width, height, components, u, v);
}
void interpolate_bilinear_wrap_fl(const float *buffer,
@ -525,7 +567,7 @@ void interpolate_bilinear_wrap_fl(const float *buffer,
bool wrap_x,
bool wrap_y)
{
bilinear_fl_impl(buffer, output, width, height, components, u, v, wrap_x, wrap_y);
bilinear_fl_impl<false>(buffer, output, width, height, components, u, v, wrap_x, wrap_y);
}
uchar4 interpolate_bilinear_wrap_byte(const uchar *buffer, int width, int height, float u, float v)
@ -573,7 +615,7 @@ uchar4 interpolate_bilinear_wrap_byte(const uchar *buffer, int width, int height
float4 interpolate_bilinear_wrap_fl(const float *buffer, int width, int height, float u, float v)
{
float4 res;
bilinear_fl_impl(buffer, res, width, height, 4, u, v, true, true);
bilinear_fl_impl<false>(buffer, res, width, height, 4, u, v, true, true);
return res;
}

View File

@ -28,10 +28,10 @@ TEST(math_interp, BilinearCharExactSamples)
{
uchar4 res;
uchar4 exp1 = {73, 108, 153, 251};
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 1.0f, 2.0f);
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 1.0f, 2.0f);
EXPECT_EQ(exp1, res);
uchar4 exp2 = {240, 160, 90, 20};
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 2.0f, 0.0f);
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 2.0f, 0.0f);
EXPECT_EQ(exp2, res);
}
@ -39,10 +39,10 @@ TEST(math_interp, BilinearCharHalfwayUSamples)
{
uchar4 res;
uchar4 exp1 = {31, 37, 42, 48};
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0.5f, 1.0f);
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0.5f, 1.0f);
EXPECT_EQ(exp1, res);
uchar4 exp2 = {243, 242, 224, 223};
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0.5f, 0.0f);
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0.5f, 0.0f);
EXPECT_EQ(exp2, res);
}
@ -50,10 +50,10 @@ TEST(math_interp, BilinearCharHalfwayVSamples)
{
uchar4 res;
uchar4 exp1 = {1, 2, 3, 4};
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0.0f, 1.5f);
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0.0f, 1.5f);
EXPECT_EQ(exp1, res);
uchar4 exp2 = {127, 128, 129, 130};
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 2.0f, 1.5f);
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 2.0f, 1.5f);
EXPECT_EQ(exp2, res);
}
@ -61,10 +61,11 @@ TEST(math_interp, BilinearCharSamples)
{
uchar4 res;
uchar4 exp1 = {136, 133, 132, 130};
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 1.25f, 0.625f);
res = interpolate_bilinear_border_byte(
image_char[0][0], image_width, image_height, 1.25f, 0.625f);
EXPECT_EQ(exp1, res);
uchar4 exp2 = {219, 191, 167, 142};
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 1.4f, 0.1f);
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 1.4f, 0.1f);
EXPECT_EQ(exp2, res);
}
@ -72,25 +73,39 @@ TEST(math_interp, BilinearFloatSamples)
{
float4 res;
float4 exp1 = {135.9375f, 133.28125f, 131.5625f, 129.84375f};
res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 1.25f, 0.625f);
res = interpolate_bilinear_border_fl(image_fl[0][0], image_width, image_height, 1.25f, 0.625f);
EXPECT_V4_NEAR(exp1, res, float_tolerance);
float4 exp2 = {219.36f, 191.2f, 166.64f, 142.08f};
res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 1.4f, 0.1f);
res = interpolate_bilinear_border_fl(image_fl[0][0], image_width, image_height, 1.4f, 0.1f);
EXPECT_V4_NEAR(exp2, res, float_tolerance);
}
TEST(math_interp, BilinearCharPartiallyOutsideImageBorder)
{
uchar4 res;
uchar4 exp1 = {1, 1, 2, 2};
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, -0.5f, 2.0f);
EXPECT_EQ(exp1, res);
uchar4 exp2 = {9, 11, 15, 22};
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 1.25f, 2.9f);
EXPECT_EQ(exp2, res);
uchar4 exp3 = {173, 115, 65, 14};
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 2.2f, -0.1f);
EXPECT_EQ(exp3, res);
}
TEST(math_interp, BilinearCharPartiallyOutsideImage)
{
uchar4 res;
uchar4 exp1 = {1, 1, 2, 2};
uint4 exp1 = {1, 2, 3, 4};
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, -0.5f, 2.0f);
EXPECT_EQ(exp1, res);
uchar4 exp2 = {9, 11, 15, 22};
EXPECT_EQ(exp1, uint4(res));
uint4 exp2 = {87, 113, 147, 221};
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 1.25f, 2.9f);
EXPECT_EQ(exp2, res);
uchar4 exp3 = {173, 115, 65, 14};
EXPECT_EQ(exp2, uint4(res));
uint4 exp3 = {240, 160, 90, 20};
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 2.2f, -0.1f);
EXPECT_EQ(exp3, res);
EXPECT_EQ(exp3, uint4(res));
}
TEST(math_interp, BilinearCharPartiallyOutsideImageWrap)
@ -107,16 +122,30 @@ TEST(math_interp, BilinearCharPartiallyOutsideImageWrap)
EXPECT_EQ(exp3, res);
}
TEST(math_interp, BilinearFloatPartiallyOutsideImage)
TEST(math_interp, BilinearFloatPartiallyOutsideImageBorder)
{
float4 res;
float4 exp1 = {0.5f, 1, 1.5f, 2};
res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, -0.5f, 2.0f);
res = interpolate_bilinear_border_fl(image_fl[0][0], image_width, image_height, -0.5f, 2.0f);
EXPECT_V4_NEAR(exp1, res, float_tolerance);
float4 exp2 = {8.675f, 11.325f, 14.725f, 22.1f};
res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 1.25f, 2.9f);
res = interpolate_bilinear_border_fl(image_fl[0][0], image_width, image_height, 1.25f, 2.9f);
EXPECT_V4_NEAR(exp2, res, float_tolerance);
float4 exp3 = {172.8f, 115.2f, 64.8f, 14.4f};
res = interpolate_bilinear_border_fl(image_fl[0][0], image_width, image_height, 2.2f, -0.1f);
EXPECT_V4_NEAR(exp3, res, float_tolerance);
}
TEST(math_interp, BilinearFloatPartiallyOutsideImage)
{
float4 res;
float4 exp1 = {1.0f, 2.0f, 3.0f, 4.0f};
res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, -0.5f, 2.0f);
EXPECT_V4_NEAR(exp1, res, float_tolerance);
float4 exp2 = {86.75f, 113.25f, 147.25f, 221.0f};
res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 1.25f, 2.9f);
EXPECT_V4_NEAR(exp2, res, float_tolerance);
float4 exp3 = {240.0f, 160.0f, 90.0f, 20.0f};
res = interpolate_bilinear_fl(image_fl[0][0], image_width, image_height, 2.2f, -0.1f);
EXPECT_V4_NEAR(exp3, res, float_tolerance);
}
@ -151,23 +180,23 @@ TEST(math_interp, BilinearCharFullyOutsideImage)
uchar4 res;
uchar4 exp = {0, 0, 0, 0};
/* Out of range on U */
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, -1.5f, 0);
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, -1.5f, 0);
EXPECT_EQ(exp, res);
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, -1.1f, 0);
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, -1.1f, 0);
EXPECT_EQ(exp, res);
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 3, 0);
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 3, 0);
EXPECT_EQ(exp, res);
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 5, 0);
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 5, 0);
EXPECT_EQ(exp, res);
/* Out of range on V */
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0, -3.2f);
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0, -3.2f);
EXPECT_EQ(exp, res);
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0, -1.5f);
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0, -1.5f);
EXPECT_EQ(exp, res);
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0, 3.1f);
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0, 3.1f);
EXPECT_EQ(exp, res);
res = interpolate_bilinear_byte(image_char[0][0], image_width, image_height, 0, 500.0f);
res = interpolate_bilinear_border_byte(image_char[0][0], image_width, image_height, 0, 500.0f);
EXPECT_EQ(exp, res);
}

View File

@ -249,17 +249,17 @@ class MemoryBuffer {
single_y = rel_y - last_y;
}
math::interpolate_bilinear_fl(buffer_, out, 1, 1, num_channels_, single_x, single_y);
math::interpolate_bilinear_border_fl(buffer_, out, 1, 1, num_channels_, single_x, single_y);
return;
}
math::interpolate_bilinear_fl(buffer_,
out,
get_width(),
get_height(),
num_channels_,
get_relative_x(x),
get_relative_y(y));
math::interpolate_bilinear_border_fl(buffer_,
out,
get_width(),
get_height(),
num_channels_,
get_relative_x(x),
get_relative_y(y));
}
void read_elem_sampled(float x, float y, PixelSampler sampler, float *out) const

View File

@ -104,7 +104,7 @@ static void sample_image_at_location(ImBuf *ibuf,
imbuf::interpolate_nearest_fl(ibuf, color, x, y);
break;
case PixelSampler::Bilinear:
imbuf::interpolate_bilinear_fl(ibuf, color, x, y);
imbuf::interpolate_bilinear_border_fl(ibuf, color, x, y);
break;
case PixelSampler::Bicubic:
imbuf::interpolate_cubic_bspline_fl(ibuf, color, x, y);
@ -118,7 +118,7 @@ static void sample_image_at_location(ImBuf *ibuf,
byte_color = imbuf::interpolate_nearest_byte(ibuf, x, y);
break;
case PixelSampler::Bilinear:
byte_color = imbuf::interpolate_bilinear_byte(ibuf, x, y);
byte_color = imbuf::interpolate_bilinear_border_byte(ibuf, x, y);
break;
case PixelSampler::Bicubic:
byte_color = imbuf::interpolate_cubic_bspline_byte(ibuf, x, y);

View File

@ -85,7 +85,7 @@ void MovieClipBaseOperation::execute_pixel_sampled(float output[4],
imbuf::interpolate_nearest_fl(ibuf, output, x, y);
break;
case PixelSampler::Bilinear:
imbuf::interpolate_bilinear_fl(ibuf, output, x, y);
imbuf::interpolate_bilinear_border_fl(ibuf, output, x, y);
break;
case PixelSampler::Bicubic:
imbuf::interpolate_cubic_bspline_fl(ibuf, output, x, y);

View File

@ -91,7 +91,7 @@ void MultilayerColorOperation::execute_pixel_sampled(float output[4],
imbuf::interpolate_nearest_fl(buffer_, output, x, y);
break;
case PixelSampler::Bilinear:
imbuf::interpolate_bilinear_fl(buffer_, output, x, y);
imbuf::interpolate_bilinear_border_fl(buffer_, output, x, y);
break;
case PixelSampler::Bicubic:
imbuf::interpolate_cubic_bspline_fl(buffer_, output, x, y);

View File

@ -77,7 +77,8 @@ void RenderLayersProg::do_interpolation(float output[4], float x, float y, Pixel
math::interpolate_nearest_fl(input_buffer_, output, width, height, elementsize_, x, y);
break;
case PixelSampler::Bilinear:
math::interpolate_bilinear_fl(input_buffer_, output, width, height, elementsize_, x, y);
math::interpolate_bilinear_border_fl(
input_buffer_, output, width, height, elementsize_, x, y);
break;
case PixelSampler::Bicubic:
math::interpolate_cubic_bspline_fl(input_buffer_, output, width, height, elementsize_, x, y);

View File

@ -18,6 +18,8 @@
namespace blender::imbuf {
/* Nearest sampling. */
[[nodiscard]] inline uchar4 interpolate_nearest_byte(const ImBuf *in, float u, float v)
{
return math::interpolate_nearest_byte(in->byte_buffer.data, in->x, in->y, u, v);
@ -35,6 +37,8 @@ inline void interpolate_nearest_fl(const ImBuf *in, float output[4], float u, fl
math::interpolate_nearest_fl(in->float_buffer.data, output, in->x, in->y, 4, u, v);
}
/* Nearest sampling with UV wrapping. */
[[nodiscard]] inline uchar4 interpolate_nearest_wrap_byte(const ImBuf *in, float u, float v)
{
return math::interpolate_nearest_wrap_byte(in->byte_buffer.data, in->x, in->y, u, v);
@ -44,6 +48,8 @@ inline void interpolate_nearest_fl(const ImBuf *in, float output[4], float u, fl
return math::interpolate_nearest_wrap_fl(in->float_buffer.data, in->x, in->y, u, v);
}
/* Bilinear sampling. */
[[nodiscard]] inline uchar4 interpolate_bilinear_byte(const ImBuf *in, float u, float v)
{
return math::interpolate_bilinear_byte(in->byte_buffer.data, in->x, in->y, u, v);
@ -63,6 +69,29 @@ inline void interpolate_bilinear_fl(const ImBuf *in, float output[4], float u, f
memcpy(output, &col, sizeof(col));
}
/* Bilinear sampling, samples near edge blend into transparency. */
[[nodiscard]] inline uchar4 interpolate_bilinear_border_byte(const ImBuf *in, float u, float v)
{
return math::interpolate_bilinear_border_byte(in->byte_buffer.data, in->x, in->y, u, v);
}
[[nodiscard]] inline float4 interpolate_bilinear_border_fl(const ImBuf *in, float u, float v)
{
return math::interpolate_bilinear_border_fl(in->float_buffer.data, in->x, in->y, u, v);
}
inline void interpolate_bilinear_border_byte(const ImBuf *in, uchar output[4], float u, float v)
{
uchar4 col = math::interpolate_bilinear_border_byte(in->byte_buffer.data, in->x, in->y, u, v);
memcpy(output, &col, sizeof(col));
}
inline void interpolate_bilinear_border_fl(const ImBuf *in, float output[4], float u, float v)
{
float4 col = math::interpolate_bilinear_border_fl(in->float_buffer.data, in->x, in->y, u, v);
memcpy(output, &col, sizeof(col));
}
/* Bilinear sampling with UV wrapping. */
[[nodiscard]] inline uchar4 interpolate_bilinear_wrap_byte(const ImBuf *in, float u, float v)
{
return math::interpolate_bilinear_wrap_byte(in->byte_buffer.data, in->x, in->y, u, v);
@ -72,6 +101,8 @@ inline void interpolate_bilinear_fl(const ImBuf *in, float output[4], float u, f
return math::interpolate_bilinear_wrap_fl(in->float_buffer.data, in->x, in->y, u, v);
}
/* Cubic B-Spline sampling. */
[[nodiscard]] inline uchar4 interpolate_cubic_bspline_byte(const ImBuf *in, float u, float v)
{
return math::interpolate_cubic_bspline_byte(in->byte_buffer.data, in->x, in->y, u, v);
@ -91,6 +122,8 @@ inline void interpolate_cubic_bspline_fl(const ImBuf *in, float output[4], float
memcpy(output, &col, sizeof(col));
}
/* Cubic Mitchell sampling. */
[[nodiscard]] inline uchar4 interpolate_cubic_mitchell_byte(const ImBuf *in, float u, float v)
{
return math::interpolate_cubic_mitchell_byte(in->byte_buffer.data, in->x, in->y, u, v);

View File

@ -1761,12 +1761,12 @@ static void *do_scale_thread(void *data_v)
int offset = y * data->newx + x;
if (data->byte_buffer) {
interpolate_bilinear_byte(ibuf, data->byte_buffer + 4 * offset, u, v);
interpolate_bilinear_border_byte(ibuf, data->byte_buffer + 4 * offset, u, v);
}
if (data->float_buffer) {
float *pixel = data->float_buffer + ibuf->channels * offset;
blender::math::interpolate_bilinear_fl(
blender::math::interpolate_bilinear_border_fl(
ibuf->float_buffer.data, pixel, ibuf->x, ibuf->y, ibuf->channels, u, v);
}
}

View File

@ -38,6 +38,9 @@ struct TransformContext {
/* Source UV step delta, when moving along one destination pixel in Y axis. */
float2 add_y;
/* Source corners in destination pixel space, counter-clockwise. */
float2 src_corners[4];
IndexRange dst_region_x_range;
IndexRange dst_region_y_range;
@ -66,14 +69,15 @@ struct TransformContext {
rcti rect;
BLI_rcti_init_minmax(&rect);
float4x4 inverse = math::invert(transform_matrix);
for (const int2 &src_coords : {
int2(src_crop.xmin, src_crop.ymin),
int2(src_crop.xmax, src_crop.ymin),
int2(src_crop.xmin, src_crop.ymax),
int2(src_crop.xmax, src_crop.ymax),
})
{
float3 dst_co = math::transform_point(inverse, float3(src_coords.x, src_coords.y, 0.0f));
const int2 src_coords[4] = {int2(src_crop.xmin, src_crop.ymin),
int2(src_crop.xmax, src_crop.ymin),
int2(src_crop.xmax, src_crop.ymax),
int2(src_crop.xmin, src_crop.ymax)};
for (int i = 0; i < 4; i++) {
int2 src_co = src_coords[i];
float3 dst_co = math::transform_point(inverse, float3(src_co.x, src_co.y, 0.0f));
src_corners[i] = float2(dst_co.x, dst_co.y);
BLI_rcti_do_minmax_v(&rect, int2(dst_co) + margin);
BLI_rcti_do_minmax_v(&rect, int2(dst_co) - margin);
}
@ -251,10 +255,8 @@ static void process_scanlines(const TransformContext &ctx, IndexRange y_range)
*
* Do a box filter: for each destination pixel, accumulate XxY samples from source,
* based on scaling factors (length of X/Y pixel steps). Use at least 2 samples
* along each direction, so that in case of rotation the resulting edges get
* some anti-aliasing, to match previous Subsampled3x3 filter behavior. The
* "at least 2" can be removed once/if transform edge anti-aliasing is implemented
* in general way for all filters. Use at most 100 samples along each direction,
* along each direction, so that in case of rotation the image gets
* some anti-aliasing. Use at most 100 samples along each direction,
* just as some way of clamping possible upper cost. Scaling something down by more
* than 100x should rarely if ever happen, worst case they will get some aliasing.
*/
@ -336,8 +338,9 @@ template<eIMBInterpolationFilterMode Filter>
static void transform_scanlines_filter(const TransformContext &ctx, IndexRange y_range)
{
int channels = ctx.src->channels;
if (ctx.dst->float_buffer.data && ctx.src->float_buffer.data) {
/* Float images. */
/* Float pixels. */
if (channels == 4) {
transform_scanlines<Filter, float, 4>(ctx, y_range);
}
@ -351,14 +354,109 @@ static void transform_scanlines_filter(const TransformContext &ctx, IndexRange y
transform_scanlines<Filter, float, 1>(ctx, y_range);
}
}
else if (ctx.dst->byte_buffer.data && ctx.src->byte_buffer.data) {
/* Byte images. */
if (ctx.dst->byte_buffer.data && ctx.src->byte_buffer.data) {
/* Byte pixels. */
if (channels == 4) {
transform_scanlines<Filter, uchar, 4>(ctx, y_range);
}
}
}
static float calc_coverage(float2 pos, int2 ipos, float2 delta, bool is_steep)
{
/* Very approximate: just take difference from coordinate (x or y based on
* steepness) to the integer coordinate. Adjust based on directions
* of the edges. */
float cov;
if (is_steep) {
cov = fabsf(ipos.x - pos.x);
if (delta.y < 0) {
cov = 1.0f - cov;
}
}
else {
cov = fabsf(ipos.y - pos.y);
if (delta.x > 0) {
cov = 1.0f - cov;
}
}
cov = math::clamp(cov, 0.0f, 1.0f);
/* Resulting coverage is 0.5 .. 1.0 range, since we are only covering
* half of the pixels that should be AA'd (the other half is outside the
* quad and does not get rasterized). Square the coverage to get
* more range, and it looks a bit nicer that way. */
cov *= cov;
return cov;
}
static void edge_aa(const TransformContext &ctx)
{
/* Rasterize along outer source edges into the destination image,
* reducing alpha based on pixel distance to the edge at each pixel.
* This is very approximate and not 100% correct "analytical AA",
* but simple to do and better than nothing. */
for (int line_idx = 0; line_idx < 4; line_idx++) {
float2 ptA = ctx.src_corners[line_idx];
float2 ptB = ctx.src_corners[(line_idx + 1) & 3];
float2 delta = ptB - ptA;
float2 abs_delta = math::abs(delta);
float length = math::max(abs_delta.x, abs_delta.y);
if (length < 1) {
continue;
}
bool is_steep = length == abs_delta.y;
/* It is very common to have non-rotated strips; check if edge line is
* horizontal or vertical and would not alter the coverage and can
* be skipped. */
constexpr float NO_ROTATION = 1.0e-6f;
constexpr float NO_AA_CONTRIB = 1.0e-2f;
if (is_steep) {
if ((abs_delta.x < NO_ROTATION) && (fabsf(ptA.x - roundf(ptA.x)) < NO_AA_CONTRIB)) {
continue;
}
}
else {
if ((abs_delta.y < NO_ROTATION) && (fabsf(ptA.y - roundf(ptA.y)) < NO_AA_CONTRIB)) {
continue;
}
}
/* DDA line raster: step one pixel along the longer direction. */
delta /= length;
if (ctx.dst->float_buffer.data != nullptr) {
/* Float pixels. */
float *dst = ctx.dst->float_buffer.data;
for (int i = 0; i < length; i++) {
float2 pos = ptA + i * delta;
int2 ipos = int2(pos);
if (ipos.x >= 0 && ipos.x < ctx.dst->x && ipos.y >= 0 && ipos.y < ctx.dst->y) {
float cov = calc_coverage(pos, ipos, delta, is_steep);
size_t idx = (size_t(ipos.y) * ctx.dst->x + ipos.x) * 4;
dst[idx + 0] *= cov;
dst[idx + 1] *= cov;
dst[idx + 2] *= cov;
dst[idx + 3] *= cov;
}
}
aras_p marked this conversation as resolved
Review

ImBuf can have both float and byte buffers. The commonly typical approach in the ImBuf's transfomration is to handle both cases:

if (ctx.dst->float_buffer.data) {
}
if (ctx.dst->byte_buffer.data) {
}
`ImBuf` can have both float and byte buffers. The commonly typical approach in the ImBuf's transfomration is to handle both cases: ``` if (ctx.dst->float_buffer.data) { } if (ctx.dst->byte_buffer.data) { } ```

Hmm, a lot of existing code (at least within sequencer/transform) have different logic, i.e. it does "float buffer exists? do that, otehrwise do this". Or, put it different way, pretty much none of the code within sequencer is prepared to work with images that have both byte and float buffers.

Hmm, a lot of existing code (at least within sequencer/transform) have different logic, i.e. it does "float buffer exists? do that, otehrwise do this". Or, put it different way, pretty much _none_ of the code within sequencer is prepared to work with images that have both byte and float buffers.
Review

Sequencer will do that because it has some strong opinion on what the source of truth of pixels is, and will ensure that the result does not have both float and byte buffers in an inconsistent states. Generic transform code in ImBuf does not know what the caller will consider that source of truth, so it applies transform on both buffers.

Some recent additions to the ImBuf seems to diverge from this, but I do not think it is that great of idea. It just leads to a silent inconsistency, which is likely to case problems later on. What is even more sad is that some of those additions will modify float buffer, and will not even tag byte buffer as dirty (it probably still would lead to incorrect result somewhere, but at least you'd have an indication that something is in fact out of date). If we say only one of those buffers can exist, then it needs to become a part of overall design.

Sequencer will do that because it has some strong opinion on what the source of truth of pixels is, and will ensure that the result does not have both float and byte buffers in an inconsistent states. Generic transform code in `ImBuf` does not know what the caller will consider that source of truth, so it applies transform on both buffers. Some recent additions to the `ImBuf` seems to diverge from this, but I do not think it is that great of idea. It just leads to a silent inconsistency, which is likely to case problems later on. What is even more sad is that some of those additions will modify float buffer, and will not even tag byte buffer as dirty (it probably still would lead to incorrect result somewhere, but at least you'd have an indication that something is in fact out of date). If we say only one of those buffers can exist, then it needs to become a part of overall design.

Ah ok! Makes sense, will do.

Ah ok! Makes sense, will do.
}
if (ctx.dst->byte_buffer.data != nullptr) {
/* Byte pixels. */
uchar *dst = ctx.dst->byte_buffer.data;
for (int i = 0; i < length; i++) {
float2 pos = ptA + i * delta;
int2 ipos = int2(pos);
if (ipos.x >= 0 && ipos.x < ctx.dst->x && ipos.y >= 0 && ipos.y < ctx.dst->y) {
float cov = calc_coverage(pos, ipos, delta, is_steep);
size_t idx = (size_t(ipos.y) * ctx.dst->x + ipos.x) * 4;
dst[idx + 3] *= cov;
}
}
}
}
}
} // namespace blender::imbuf::transform
using namespace blender::imbuf::transform;
@ -403,4 +501,8 @@ void IMB_transform(const ImBuf *src,
transform_scanlines_filter<IMB_FILTER_BOX>(ctx, y_range);
}
});
if (crop && (filter != IMB_FILTER_NEAREST)) {
edge_aa(ctx);
}
}

View File

@ -274,10 +274,12 @@ class TextureMarginMap {
if (found_pixel_in_polygon) {
if (ibuf_ptr_fl) {
ibuf_ptr_fl[pixel_index] = imbuf::interpolate_bilinear_fl(ibuf, destX, destY);
ibuf_ptr_fl[pixel_index] = imbuf::interpolate_bilinear_border_fl(
ibuf, destX, destY);
}
if (ibuf_ptr_ch) {
ibuf_ptr_ch[pixel_index] = imbuf::interpolate_bilinear_byte(ibuf, destX, destY);
ibuf_ptr_ch[pixel_index] = imbuf::interpolate_bilinear_border_byte(
ibuf, destX, destY);
}
/* Add our new pixels to the assigned pixel map. */
mask[pixel_index] = 1;

View File

@ -1584,10 +1584,10 @@ static void transform_image(int x,
break;
case 1:
if (dst_fl) {
dst_fl[offset] = imbuf::interpolate_bilinear_fl(ibuf, xt, yt);
dst_fl[offset] = imbuf::interpolate_bilinear_border_fl(ibuf, xt, yt);
}
else {
dst_ch[offset] = imbuf::interpolate_bilinear_byte(ibuf, xt, yt);
dst_ch[offset] = imbuf::interpolate_bilinear_border_byte(ibuf, xt, yt);
}
break;
case 2:

View File

@ -50,6 +50,9 @@ def main():
from modules import render_report
report = render_report.Report("Sequencer", output_dir, oiiotool)
report.set_pixelated(True)
# default error tolerances are quite large, lower them
report.set_fail_threshold(1.0 / 255.0)
report.set_fail_percent(0.01)
report.set_reference_dir("reference")
test_dir_name = Path(test_dir).name