aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Thomas Capricelli <orzel@freehackers.org>2010-02-24 21:43:30 +0100
committerGravatar Thomas Capricelli <orzel@freehackers.org>2010-02-24 21:43:30 +0100
commit0f3d69b65ee17d4ca9393fe1318ff239a411bfad (patch)
tree5b8a3bceab4382069523504f7f16a60aa64f6ed2
parent7c98c04412322e56b3b6f7e235bc7ebb61ab6b43 (diff)
Provide "eigen" defines to decide which instruction set is used
(sse3, ssse3 and sse4), independantly from the compiler. Only those defines should be used in other places, and the user can rely on those to know which sets are used.
-rw-r--r--Eigen/Core33
-rw-r--r--Eigen/src/Core/arch/SSE/PacketMath.h8
2 files changed, 33 insertions, 8 deletions
diff --git a/Eigen/Core b/Eigen/Core
index cbca16640..0306be3a8 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -61,20 +61,45 @@
#ifndef EIGEN_DONT_VECTORIZE
#if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
+
+ // Defines symbols for compile-time detection of which instructions are
+ // used.
+ // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_SSE
+ #define EIGEN_VECTORIZE_SSE2
+
+ // Detect sse3/ssse3/sse4:
+ // gcc and icc defines __SSE3__, ..,
+ // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
+ // want to force the use of those instructions with msvc.
+ #ifdef __SSE3__
+ #define EIGEN_VECTORIZE_SSE3
+ #endif
+ #ifdef __SSSE3__
+ #define EIGEN_VECTORIZE_SSSE3
+ #endif
+ #ifdef __SSE4_1__
+ #define EIGEN_VECTORIZE_SSE4_1
+ #endif
+ #ifdef __SSE4_2__
+ #define EIGEN_VECTORIZE_SSE4_2
+ #endif
+
+ // include files
+
#include <emmintrin.h>
#include <xmmintrin.h>
- #ifdef __SSE3__
+ #ifdef EIGEN_VECTORIZE_SSE3
#include <pmmintrin.h>
#endif
- #ifdef __SSSE3__
+ #ifdef EIGEN_VECTORIZE_SSSE3
#include <tmmintrin.h>
#endif
- #ifdef __SSE4_1__
+ #ifdef EIGEN_VECTORIZE_SSE4_1
#include <smmintrin.h>
#endif
- #ifdef __SSE4_2__
+ #ifdef EIGEN_VECTORIZE_SSE4_2
#include <nmmintrin.h>
#endif
#elif defined __ALTIVEC__
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index a5a56f759..f78bf0dd3 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -122,7 +122,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, con
template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
{
-#ifdef __SSE4_1__
+#ifdef EIGEN_VECTORIZE_SSE4_1
return _mm_mullo_epi32(a,b);
#else
// this version is slightly faster than 4 scalar products
@@ -269,7 +269,7 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_pabs(const Packet2d& a)
}
template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
{
- #ifdef __SSSE3__
+ #ifdef EIGEN_VECTORIZE_SSSE3
return _mm_abs_epi32(a);
#else
Packet4i aux = _mm_srai_epi32(a,31);
@@ -278,7 +278,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
}
-#ifdef __SSE3__
+#ifdef EIGEN_VECTORIZE_SSE3
// TODO implement SSE2 versions as well as integer versions
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
{
@@ -439,7 +439,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
// }
#endif
-#ifdef __SSSE3__
+#ifdef EIGEN_VECTORIZE_SSSE3
// SSSE3 versions
template<int Offset>
struct ei_palign_impl<Offset,Packet4f>