2024-02-02 16:29:01 +01:00 · 2024-02-02 09:58:16 +01:00 · 2024-02-02 10:00:18 +01:00 · 2024-02-02 10:14:59 +01:00 · 2024-02-02 10:16:19 +01:00
3 changed files with 18 additions and 14 deletions
--- a/source/blender/blenlib/BLI_simd.h
+++ b/source/blender/blenlib/BLI_simd.h
@ -30,3 +30,9 @@
 #else
 #  define BLI_HAVE_SSE2 0
 #endif
+
+#if defined(__SSE4_1__) || (defined(__ARM_NEON) && defined(WITH_SSE2NEON))
+#  define BLI_HAVE_SSE4 1
+#else
+#  define BLI_HAVE_SSE4 0
+#endif
--- a/source/blender/blenlib/intern/math_interp.cc
+++ b/source/blender/blenlib/intern/math_interp.cc
@ -54,13 +54,9 @@ template<enum eCubicFilter filter> static float4 cubic_filter_coefficients(float
 #    include <smmintrin.h> /* _mm_floor_ps */
 #  endif

-/* Functions below are hard to express before SSE4. If compiling to that
- * or NEON via sse2neon, just use the simple forms. On SSE2, do it the
- * hard way. */
-
 BLI_INLINE __m128 floor_simd(__m128 v)
 {
-#  if defined(__SSE4_1__) || defined(__ARM_NEON) && defined(WITH_SSE2NEON)
+#  if BLI_HAVE_SSE4
  __m128 v_floor = _mm_floor_ps(v);
 #  else
  /* Truncate, for negative inputs this will round towards zero. Then compare
@ -74,7 +70,7 @@ BLI_INLINE __m128 floor_simd(__m128 v)

 BLI_INLINE __m128i min_i_simd(__m128i a, __m128i b)
 {
-#  if defined(__SSE4_1__) || defined(__ARM_NEON) && defined(WITH_SSE2NEON)
+#  if BLI_HAVE_SSE4
  return _mm_min_epi32(a, b);
 #  else
  __m128i cmp = _mm_cmplt_epi32(a, b);
@ -86,7 +82,7 @@ BLI_INLINE __m128i min_i_simd(__m128i a, __m128i b)

 BLI_INLINE __m128i max_i_simd(__m128i a, __m128i b)
 {
-#  if defined(__SSE4_1__) || defined(__ARM_NEON) && defined(WITH_SSE2NEON)
+#  if BLI_HAVE_SSE4
  return _mm_max_epi32(a, b);
 #  else
  __m128i cmp = _mm_cmplt_epi32(b, a);
--- a/source/blender/imbuf/intern/transform.cc
+++ b/source/blender/imbuf/intern/transform.cc
@ -338,8 +338,9 @@ template<eIMBInterpolationFilterMode Filter>
 static void transform_scanlines_filter(const TransformContext &ctx, IndexRange y_range)
 {
  int channels = ctx.src->channels;
+
  if (ctx.dst->float_buffer.data && ctx.src->float_buffer.data) {
-    /* Float images. */
+    /* Float pixels. */
    if (channels == 4) {
      transform_scanlines<Filter, float, 4>(ctx, y_range);
    }
@ -353,8 +354,9 @@ static void transform_scanlines_filter(const TransformContext &ctx, IndexRange y
      transform_scanlines<Filter, float, 1>(ctx, y_range);
    }
  }
-  else if (ctx.dst->byte_buffer.data && ctx.src->byte_buffer.data) {
-    /* Byte images. */
+
+  if (ctx.dst->byte_buffer.data && ctx.src->byte_buffer.data) {
+    /* Byte pixels. */
    if (channels == 4) {
      transform_scanlines<Filter, uchar, 4>(ctx, y_range);
    }
@ -423,8 +425,8 @@ static void edge_aa(const TransformContext &ctx)

    /* DDA line raster: step one pixel along the longer direction. */
    delta /= length;
-    if (ctx.dst->float_buffer.data) {
-      /* Float image. */
+    if (ctx.dst->float_buffer.data != nullptr) {
+      /* Float pixels. */
      float *dst = ctx.dst->float_buffer.data;
      for (int i = 0; i < length; i++) {
        float2 pos = ptA + i * delta;
@ -439,8 +441,8 @@ static void edge_aa(const TransformContext &ctx)
        }
      }
    }
-    else {
-      /* Byte image. */
+    if (ctx.dst->byte_buffer.data != nullptr) {
+      /* Byte pixels. */
      uchar *dst = ctx.dst->byte_buffer.data;
      for (int i = 0; i < length; i++) {
        float2 pos = ptA + i * delta;