From b131a4db2439ea1ca4ba86cbc86aa962914915c5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 27 Nov 2018 23:45:00 +0100 Subject: bug #1631: fix compilation with ARM NEON and clang, and cleanup the weird pshiftright_and_cast and pcast_and_shiftleft functions. --- Eigen/src/Core/arch/AVX/PacketMath.h | 35 ++++-------- Eigen/src/Core/arch/AltiVec/PacketMath.h | 66 ++++++++++++++++++---- .../Core/arch/Default/GenericPacketMathFunctions.h | 20 +++++++ Eigen/src/Core/arch/NEON/PacketMath.h | 28 +++++---- Eigen/src/Core/arch/NEON/TypeCasting.h | 8 +++ Eigen/src/Core/arch/SSE/PacketMath.h | 13 +---- 6 files changed, 111 insertions(+), 59 deletions(-) (limited to 'Eigen/src/Core/arch') diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 969f68d79..4c1abe43f 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -256,7 +256,17 @@ template<> EIGEN_STRONG_INLINE Packet8f pselect(const Packet8f& mask, template<> EIGEN_STRONG_INLINE Packet4d pselect(const Packet4d& mask, const Packet4d& a, const Packet4d& b) { return _mm256_blendv_pd(b,a,mask); } -template EIGEN_STRONG_INLINE Packet8i pshiftleft(const Packet8i& a) { +template EIGEN_STRONG_INLINE Packet8i pshiftright(Packet8i a) { +#ifdef EIGEN_VECTORIZE_AVX2 + return _mm256_srli_epi32(a, N); +#else + __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(a, 0), N); + __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(a, 1), N); + return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1); +#endif +} + +template EIGEN_STRONG_INLINE Packet8i pshiftleft(Packet8i a) { #ifdef EIGEN_VECTORIZE_AVX2 return _mm256_slli_epi32(a, N); #else @@ -409,33 +419,10 @@ template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a) return _mm256_and_pd(a,mask); } -template<> EIGEN_STRONG_INLINE Packet8f pshiftright_and_cast(Packet8f v, int n) -{ -#ifdef EIGEN_VECTORIZE_AVX2 - return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n)); -#else - __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n); - __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n); - return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)); -#endif -} - template<> EIGEN_STRONG_INLINE Packet8f pfrexp(const Packet8f& a, Packet8f& exponent) { return pfrexp_float(a,exponent); } -template<> EIGEN_STRONG_INLINE Packet8f pcast_and_shiftleft(Packet8f v, int n) -{ - Packet8i vi = _mm256_cvttps_epi32(v); -#ifdef EIGEN_VECTORIZE_AVX2 - return _mm256_castsi256_ps(_mm256_slli_epi32(vi, n)); -#else - __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(vi, 0), n); - __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(vi, 1), n); - return _mm256_castsi256_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)); -#endif -} - template<> EIGEN_STRONG_INLINE Packet8f pldexp(const Packet8f& a, const Packet8f& exponent) { return pldexp_float(a,exponent); } diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index d64550d8a..446065fb7 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -187,8 +187,19 @@ template<> struct packet_traits : default_packet_traits }; -template<> struct unpacket_traits { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; }; -template<> struct unpacket_traits { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; +template<> struct unpacket_traits +{ + typedef float type; + typedef Packet4f half; + typedef Packet4i integer_packet; + enum {size=4, alignment=Aligned16}; +}; +template<> struct unpacket_traits +{ + typedef int type; + typedef Packet4i half; + enum {size=4, alignment=Aligned16}; +}; inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v) { @@ -567,21 +578,15 @@ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); } template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); } -template<> EIGEN_STRONG_INLINE Packet4f pshiftright_and_cast(Packet4f a, int n) { - return vec_ctf(vec_sr(reinterpret_cast(a), - reinterpret_cast(pset1(n))),0); -} +template EIGEN_STRONG_INLINE Packet4i pshiftright(Packet4i a) +{ return vec_sr(a,reinterpret_cast(pset1(N))); } +template EIGEN_STRONG_INLINE Packet4i pshiftleft(Packet4i a) +{ return vec_sl(a,reinterpret_cast(pset1(N))); } template<> EIGEN_STRONG_INLINE Packet4f pfrexp(const Packet4f& a, Packet4f& exponent) { return pfrexp_float(a,exponent); } -template<> EIGEN_STRONG_INLINE Packet4f pcast_and_shiftleft(Packet4f v, int n) -{ - Packet4i vi = vec_cts(v,0); - return reinterpret_cast(vec_sl(vi, reinterpret_cast(pset1(n)))); -} - template<> EIGEN_STRONG_INLINE Packet4f pldexp(const Packet4f& a, const Packet4f& exponent) { return pldexp_float(a,exponent); } @@ -807,6 +812,43 @@ template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, cons } +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + + +template<> EIGEN_STRONG_INLINE Packet4i pcast(const Packet4f& a) { + return vec_cts(a,0); +} + +template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet4i& a) { + return vec_ctf(a,0); +} + +template<> EIGEN_STRONG_INLINE Packet4i preinterpret(const Packet4f& a) { + return reinterpret_cast(a); +} + +template<> EIGEN_STRONG_INLINE Packet4f preinterpret(const Packet4i& a) { + return reinterpret_cast(a); +} + + + //---------- double ---------- #ifdef __VSX__ typedef __vector double Packet2d; diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 067d1dbe0..465f9bc3e 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -16,6 +16,26 @@ namespace Eigen { namespace internal { +template EIGEN_STRONG_INLINE Packet +pfrexp_float(const Packet& a, Packet& exponent) { + typedef typename unpacket_traits::integer_packet PacketI; + const Packet cst_126f = pset1(126.0f); + const Packet cst_half = pset1(0.5f); + const Packet cst_inv_mant_mask = pset1frombits(~0x7f800000u); + exponent = psub(pcast(pshiftright<23>(preinterpret(a))), cst_126f); + return por(pand(a, cst_inv_mant_mask), cst_half); +} + +template EIGEN_STRONG_INLINE Packet +pldexp_float(Packet a, Packet exponent) +{ + typedef typename unpacket_traits::integer_packet PacketI; + const Packet cst_127 = pset1(127.f); + // return a * 2^exponent + PacketI ei = pcast(padd(exponent, cst_127)); + return pmul(a, preinterpret(pshiftleft<23>(ei))); +} + // Natural logarithm // Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2) // and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index dc432f0d2..0a50153c7 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -140,8 +140,19 @@ EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); } #endif -template<> struct unpacket_traits { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; }; -template<> struct unpacket_traits { typedef int32_t type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; +template<> struct unpacket_traits +{ + typedef float type; + typedef Packet4f half; + typedef Packet4i integer_packet; + enum {size=4, alignment=Aligned16}; +}; +template<> struct unpacket_traits +{ + typedef int32_t type; + typedef Packet4i half; + enum {size=4, alignment=Aligned16}; +}; template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { return vdupq_n_f32(from); } template<> EIGEN_STRONG_INLINE Packet4i pset1(const int32_t& from) { return vdupq_n_s32(from); } @@ -294,6 +305,9 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, con } template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); } +template EIGEN_STRONG_INLINE Packet4i pshiftright(Packet4i a) { return vshrq_n_s32(a,N); } +template EIGEN_STRONG_INLINE Packet4i pshiftleft(Packet4i a) { return vshlq_n_s32(a,N); } + template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } template<> EIGEN_STRONG_INLINE Packet4i pload(const int32_t* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } @@ -384,20 +398,10 @@ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); } template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); } -template<> EIGEN_STRONG_INLINE Packet4f pshiftright_and_cast(Packet4f a, int n) { - return vcvtq_f32_s32(vshrq_n_s32(vreinterpretq_s32_f32(a),n)); -} - template<> EIGEN_STRONG_INLINE Packet4f pfrexp(const Packet4f& a, Packet4f& exponent) { return pfrexp_float(a,exponent); } -template<> EIGEN_STRONG_INLINE Packet4f pcast_and_shiftleft(Packet4f v, int n) -{ - Packet4i vi = vcvtq_s32_f32(v); - return vreinterpretq_f32_s32(vshlq_n_s32(vi, n)); -} - template<> EIGEN_STRONG_INLINE Packet4f pldexp(const Packet4f& a, const Packet4f& exponent) { return pldexp_float(a,exponent); } diff --git a/Eigen/src/Core/arch/NEON/TypeCasting.h b/Eigen/src/Core/arch/NEON/TypeCasting.h index 95d1fd0e4..20dbe1332 100644 --- a/Eigen/src/Core/arch/NEON/TypeCasting.h +++ b/Eigen/src/Core/arch/NEON/TypeCasting.h @@ -41,6 +41,14 @@ template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet4i return vcvtq_f32_s32(a); } +template<> EIGEN_STRONG_INLINE Packet4i preinterpret(const Packet4f& a) { + return vreinterpretq_s32_f32(a); +} + +template<> EIGEN_STRONG_INLINE Packet4f preinterpret(const Packet4i& a) { + return vreinterpretq_f32_s32(a); +} + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index fbc69ef1f..800eb4d86 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -370,7 +370,8 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, con template<> EIGEN_STRONG_INLINE Packet2d pandnot(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(b,a); } template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(b,a); } -template EIGEN_STRONG_INLINE Packet4i pshiftleft(const Packet4i& a) { return _mm_slli_epi32(a,N); } +template EIGEN_STRONG_INLINE Packet4i pshiftright(Packet4i a) { return _mm_srli_epi32(a,N); } +template EIGEN_STRONG_INLINE Packet4i pshiftleft(Packet4i a) { return _mm_slli_epi32(a,N); } #ifdef EIGEN_VECTORIZE_SSE4_1 template<> EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) { return _mm_round_ps(a, 0); } @@ -569,20 +570,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) #endif } -template<> EIGEN_STRONG_INLINE Packet4f pshiftright_and_cast(Packet4f a, int n) { - return _mm_cvtepi32_ps(_mm_srli_epi32(_mm_castps_si128(a),n)); -} - template<> EIGEN_STRONG_INLINE Packet4f pfrexp(const Packet4f& a, Packet4f& exponent) { return pfrexp_float(a,exponent); } -template<> EIGEN_STRONG_INLINE Packet4f pcast_and_shiftleft(Packet4f v, int n) -{ - Packet4i vi = _mm_cvttps_epi32(v); - return _mm_castsi128_ps(_mm_slli_epi32(vi, n)); -} - template<> EIGEN_STRONG_INLINE Packet4f pldexp(const Packet4f& a, const Packet4f& exponent) { return pldexp_float(a,exponent); } -- cgit v1.2.3