ImBuf: optimize IMB_transform #115653

Merged
Aras Pranckevicius merged 15 commits from aras_p/blender:imb_transform_opt into main 2023-12-14 15:10:41 +01:00
1 changed files with 9 additions and 3 deletions
Showing only changes of commit fbd9716206 - Show all commits

View File

@ -377,12 +377,18 @@ void BLI_bilinear_interpolation_char(
* later making sure that the result is set to zero for that sample. */
__m128 uvuv = _mm_set_ps(v, u, v, u);
/* No easy way to do floor() without SSE4, so do it the hard way: truncate,
* for negative inputs this will round towards zero. Then compare with input
* UV, and subtract 1 for the inputs that were negative. */
# if defined(__SSE4_1__) || defined(__ARM_NEON) && defined(WITH_SSE2NEON)
/* If we're on SSE4 or ARM NEON, just use the simple floor() way. */
__m128 uvuv_floor = _mm_floor_ps(uvuv);
# else
/* The hard way: truncate, for negative inputs this will round towards zero.
* Then compare with input UV, and subtract 1 for the inputs that were
* negative. */
__m128 uv_trunc = _mm_cvtepi32_ps(_mm_cvttps_epi32(uvuv));
__m128 uv_neg = _mm_cmplt_ps(uvuv, uv_trunc);
__m128 uvuv_floor = _mm_sub_ps(uv_trunc, _mm_and_ps(uv_neg, _mm_set1_ps(1.0f)));
# endif
/* x1, y1, x2, y2 */
__m128i xy12 = _mm_add_epi32(_mm_cvttps_epi32(uvuv_floor), _mm_set_epi32(1, 1, 0, 0));