aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-04-14 10:44:17 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-04-14 10:44:17 -0700
commitfeaf7c7e6d01a4804cee5949a01ece1f8a46866f (patch)
treea9a0d13174d4e1cf27333e2aef520d42a084bc26 /Eigen
parent3b2321e3ab157b314286227e08d738d74bfbf5d1 (diff)
Optimized SSE unaligned loads and stores when compiling a 64bit target with a recent version of gcc (ie gcc 4.8).
Diffstat (limited to 'Eigen')
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h9
1 files changed, 8 insertions, 1 deletions
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index ea05a3415..bc17726b4 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -271,14 +271,17 @@ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { E
// TODO: do the same for MSVC (ICC is compatible)
// NOTE: with the code below, MSVC's compiler crashes!
-#if defined(__GNUC__) && defined(__i386__)
+#if defined(__GNUC__) && (defined(__i386__) || (defined(__x86_64) && EIGEN_GNUC_AT_LEAST(4, 8)))
// bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
+ #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 1
#elif defined(__clang__)
// bug 201: Segfaults in __mm_loadh_pd with clang 2.8
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1
+ #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0
#else
#define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0
+ #define EIGEN_AVOID_CUSTOM_UNALIGNED_STORES 0
#endif
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
@@ -338,8 +341,12 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
EIGEN_DEBUG_UNALIGNED_STORE
+#if EIGEN_AVOID_CUSTOM_UNALIGNED_STORES
+ _mm_storeu_pd(to, from);
+#else
_mm_storel_pd((to), from);
_mm_storeh_pd((to+1), from);
+#endif
}
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castps_pd(from)); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castsi128_pd(from)); }