From c4b99f78c7baaab08ef2765618df885a138f9396 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 18 Sep 2020 18:13:20 -0700 Subject: Fix breakage in pcast due to _mm_cvtsi128_si64 not being available on 32 bit x86. If SSE 4.1 is available use the faster _mm_extract_epi64 intrinsic. --- Eigen/src/Core/arch/SSE/TypeCasting.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/arch/SSE/TypeCasting.h b/Eigen/src/Core/arch/SSE/TypeCasting.h index 09a69660f..e75df2bfa 100644 --- a/Eigen/src/Core/arch/SSE/TypeCasting.h +++ b/Eigen/src/Core/arch/SSE/TypeCasting.h @@ -71,10 +71,21 @@ template<> EIGEN_STRONG_INLINE Packet2d pcast(const Packet4f template<> EIGEN_STRONG_INLINE Packet2l pcast(const Packet2d& a) { return _mm_set_epi64x(int64_t(a[1]), int64_t(a[0])); - } +} -template<> EIGEN_STRONG_INLINE Packet2d pcast(const Packet2l& a) { - return _mm_set_pd(double(_mm_cvtsi128_si64(_mm_unpackhi_epi64(a, a))), double(_mm_cvtsi128_si64(a))); +template <> +EIGEN_STRONG_INLINE Packet2d pcast(const Packet2l& a) { +#ifdef EIGEN_VECTORIZE_SSE4_1 + int64_t a0 = _mm_extract_epi64(a, 0); + int64_t a1 = _mm_extract_epi64(a, 1); +#elif EIGEN_ARCH_x86_64 + int64_t a0 = _mm_cvtsi128_si64(a); + int64_t a1 = _mm_cvtsi128_si64(_mm_unpackhi_epi64(a, a)); +#else + int64_t a0 = a.m_val[0]; + int64_t a1 = a.m_val[1]; +#endif + return _mm_set_pd(static_cast(a1), static_cast(a0)); } template<> EIGEN_STRONG_INLINE Packet4i preinterpret(const Packet4f& a) { -- cgit v1.2.3