aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE/PacketMath.h
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2011-02-24 10:31:57 -0500
committerGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2011-02-24 10:31:57 -0500
commit5dfae4524b95a82dfd57cb2073471d4179f49c6c (patch)
tree253775223197120f01e1d837729c4e3386dcc374 /Eigen/src/Core/arch/SSE/PacketMath.h
parent2064c59878919294ed472ebc5b5ed205676caed4 (diff)
fix bug #195: fast unaligned load for integer using _mm_load_sd failed when the value interpreted as a NaN
Diffstat (limited to 'Eigen/src/Core/arch/SSE/PacketMath.h')
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h14
1 files changed, 5 insertions, 9 deletions
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index bbe784523..8401efe0b 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -237,7 +237,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E
#endif
}
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
- template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
#else
// Fast unaligned loads. Note that here we cannot directly use intrinsics: this would
// require pointer casting to incompatible pointer types and leads to invalid code
@@ -261,16 +260,13 @@ template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
res = _mm_loadh_pd(res,from+1);
return res;
}
-template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
-{
- EIGEN_DEBUG_UNALIGNED_LOAD
- __m128d res;
- res = _mm_load_sd((const double*)(from)) ;
- res = _mm_loadh_pd(res, (const double*)(from+2)) ;
- return _mm_castpd_si128(res);
-}
#endif
+// bug 195: we used to have an optimized ploadu using _mm_load_sd/_mm_loadh_pd but that gave wrong results when some 64bit value,
+// interpreted as double, was a NaN
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
+
+
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
{
return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd((const double*)from)), 0, 0, 1, 1);