diff options
author | Benoit Jacob <jacob.benoit.1@gmail.com> | 2011-02-24 10:31:57 -0500 |
---|---|---|
committer | Benoit Jacob <jacob.benoit.1@gmail.com> | 2011-02-24 10:31:57 -0500 |
commit | 5dfae4524b95a82dfd57cb2073471d4179f49c6c (patch) | |
tree | 253775223197120f01e1d837729c4e3386dcc374 /Eigen/src/Core/arch/SSE/PacketMath.h | |
parent | 2064c59878919294ed472ebc5b5ed205676caed4 (diff) |
fix bug #195: fast unaligned load for integer using _mm_load_sd failed when the value interpreted as a NaN
Diffstat (limited to 'Eigen/src/Core/arch/SSE/PacketMath.h')
-rw-r--r-- | Eigen/src/Core/arch/SSE/PacketMath.h | 14 |
1 files changed, 5 insertions, 9 deletions
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index bbe784523..8401efe0b 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -237,7 +237,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E #endif } template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); } - template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); } #else // Fast unaligned loads. Note that here we cannot directly use intrinsics: this would // require pointer casting to incompatible pointer types and leads to invalid code @@ -261,16 +260,13 @@ template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) res = _mm_loadh_pd(res,from+1); return res; } -template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) -{ - EIGEN_DEBUG_UNALIGNED_LOAD - __m128d res; - res = _mm_load_sd((const double*)(from)) ; - res = _mm_loadh_pd(res, (const double*)(from+2)) ; - return _mm_castpd_si128(res); -} #endif +// bug 195: we used to have an optimized ploadu using _mm_load_sd/_mm_loadh_pd but that gave wrong results when some 64bit value, +// interpreted as double, was a NaN +template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); } + + template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) { return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd((const double*)from)), 0, 0, 1, 1); |