From 5756d8b8d4deb1319e1d6360bca868f1946d7a05 Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Thu, 27 Apr 2023 11:05:22 +0200 Subject: [PATCH 1/2] Cycles: Support newer version of sse2neon Since the version v1.5.0 of sse2neon the functionality for denormals flushing is implemented in the library. This commit makes it so the _MM_SET_FLUSH_ZERO_MODE and _MM_SET_DENORMALS_ZERO_MODE are used from the ss2neon if available. This solves macro re-definition when a newer sse2neon is used. The change is implemented in a way that both current and new sse2neon are supported. --- intern/cycles/util/simd.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/intern/cycles/util/simd.h b/intern/cycles/util/simd.h index 03783abd20f..7da0a0f8390 100644 --- a/intern/cycles/util/simd.h +++ b/intern/cycles/util/simd.h @@ -33,11 +33,19 @@ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); \ _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); #elif defined(__aarch64__) || defined(_M_ARM64) -# define _MM_FLUSH_ZERO_ON 24 -# define __get_fpcr(__fpcr) __asm__ __volatile__("mrs %0,fpcr" : "=r"(__fpcr)) -# define __set_fpcr(__fpcr) __asm__ __volatile__("msr fpcr,%0" : : "ri"(__fpcr)) -# define SIMD_SET_FLUSH_TO_ZERO set_fz(_MM_FLUSH_ZERO_ON); -# define SIMD_GET_FLUSH_TO_ZERO get_fz(_MM_FLUSH_ZERO_ON) +/* The get/set denormals to zero was implemented in sse2neon v1.5.0. + * Keep the compatibility code until the minimum library version is increased. */ +# if defined(_MM_SET_FLUSH_ZERO_MODE) +# define SIMD_SET_FLUSH_TO_ZERO \ + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); \ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +# else +# define _MM_FLUSH_ZERO_ON 24 +# define __get_fpcr(__fpcr) __asm__ __volatile__("mrs %0,fpcr" : "=r"(__fpcr)) +# define __set_fpcr(__fpcr) __asm__ __volatile__("msr fpcr,%0" : : "ri"(__fpcr)) +# define SIMD_SET_FLUSH_TO_ZERO set_fz(_MM_FLUSH_ZERO_ON); +# define SIMD_GET_FLUSH_TO_ZERO get_fz(_MM_FLUSH_ZERO_ON) +# endif #else # define SIMD_SET_FLUSH_TO_ZERO #endif @@ -111,7 +119,7 @@ static struct StepTy { } step ccl_attr_maybe_unused; #endif -#if defined(__aarch64__) || defined(_M_ARM64) +#if (defined(__aarch64__) || defined(_M_ARM64)) && !defined(_MM_SET_FLUSH_ZERO_MODE) __forceinline int set_fz(uint32_t flag) { uint64_t old_fpcr, new_fpcr; -- 2.30.2 From abade6ea69a83a852bdcd949e97b411b3dac35f2 Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Thu, 27 Apr 2023 11:12:25 +0200 Subject: [PATCH 2/2] Deps: Bump version of sse2neon This brings the following improvements: - Implementation of _MM_SET_FLUSH_ZERO_MODE and _MM_SET_DENORMALS_ZERO_MODE - Implementation of _mm_round_ss Does not seem to be any performance impact with just this upgrade. --- build_files/build_environment/cmake/versions.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build_files/build_environment/cmake/versions.cmake b/build_files/build_environment/cmake/versions.cmake index d32ac42930c..34565b15cf5 100644 --- a/build_files/build_environment/cmake/versions.cmake +++ b/build_files/build_environment/cmake/versions.cmake @@ -579,9 +579,9 @@ set(ZSTD_HASH_TYPE SHA256) set(ZSTD_FILE zstd-${ZSTD_VERSION}.tar.gz) set(ZSTD_CPE "cpe:2.3:a:facebook:zstandard:${ZSTD_VERSION}:*:*:*:*:*:*:*") -set(SSE2NEON_VERSION fe5ff00bb8d19b327714a3c290f3e2ce81ba3525) +set(SSE2NEON_VERSION v1.6.0) set(SSE2NEON_URI https://github.com/DLTcollab/sse2neon/archive/${SSE2NEON_VERSION}.tar.gz) -set(SSE2NEON_HASH 0780253525d299c31775ef95853698d03db9c7739942af8570000f4a25a5d605) +set(SSE2NEON_HASH 06f4693219deccb91b457135d836fc514a1c0a57e9fa66b143982901d2d19677) set(SSE2NEON_HASH_TYPE SHA256) set(SSE2NEON_FILE sse2neon-${SSE2NEON_VERSION}.tar.gz) -- 2.30.2