Hopefully fix compilation of SSE Packetmath with MSVC.

The reason why we didn't realize until now that it didn't compile at all with MSVC is that before today with MSVC the SSE2 detection didn't work.
author: Benoit Jacob <jacob.benoit.1@gmail.com> 2008-12-16 03:48:49 +0000
committer: Benoit Jacob <jacob.benoit.1@gmail.com> 2008-12-16 03:48:49 +0000
commit: 50105c3ed6a339faee730b22345241907a43fd6d (patch)
tree: 3cda293eb93d304d3c703020a31f81c5f6baf617 /Eigen
parent: 0a220721d11880170495e0770d2bdc28b588e489 (diff)
1 files changed, 46 insertions, 46 deletions
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 8b4348fea..c6740f414 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -37,17 +37,17 @@ template<> struct ei_unpacket_traits<__m128>  { typedef float  type; enum {size=
 template<> struct ei_unpacket_traits<__m128d> { typedef double type; enum {size=2}; };
 template<> struct ei_unpacket_traits<__m128i> { typedef int    type; enum {size=4}; };
 
-template<> inline __m128  ei_padd(const __m128&  a, const __m128&  b) { return _mm_add_ps(a,b); }
-template<> inline __m128d ei_padd(const __m128d& a, const __m128d& b) { return _mm_add_pd(a,b); }
-template<> inline __m128i ei_padd(const __m128i& a, const __m128i& b) { return _mm_add_epi32(a,b); }
+template<> inline __m128  ei_padd<__m128>(const __m128&  a, const __m128&  b) { return _mm_add_ps(a,b); }
+template<> inline __m128d ei_padd<__m128d>(const __m128d& a, const __m128d& b) { return _mm_add_pd(a,b); }
+template<> inline __m128i ei_padd<__m128i>(const __m128i& a, const __m128i& b) { return _mm_add_epi32(a,b); }
 
-template<> inline __m128  ei_psub(const __m128&  a, const __m128&  b) { return _mm_sub_ps(a,b); }
-template<> inline __m128d ei_psub(const __m128d& a, const __m128d& b) { return _mm_sub_pd(a,b); }
-template<> inline __m128i ei_psub(const __m128i& a, const __m128i& b) { return _mm_sub_epi32(a,b); }
+template<> inline __m128  ei_psub<__m128>(const __m128&  a, const __m128&  b) { return _mm_sub_ps(a,b); }
+template<> inline __m128d ei_psub<__m128d>(const __m128d& a, const __m128d& b) { return _mm_sub_pd(a,b); }
+template<> inline __m128i ei_psub<__m128i>(const __m128i& a, const __m128i& b) { return _mm_sub_epi32(a,b); }
 
-template<> inline __m128  ei_pmul(const __m128&  a, const __m128&  b) { return _mm_mul_ps(a,b); }
-template<> inline __m128d ei_pmul(const __m128d& a, const __m128d& b) { return _mm_mul_pd(a,b); }
-template<> inline __m128i ei_pmul(const __m128i& a, const __m128i& b)
+template<> inline __m128  ei_pmul<__m128>(const __m128&  a, const __m128&  b) { return _mm_mul_ps(a,b); }
+template<> inline __m128d ei_pmul<__m128d>(const __m128d& a, const __m128d& b) { return _mm_mul_pd(a,b); }
+template<> inline __m128i ei_pmul<__m128i>(const __m128i& a, const __m128i& b)
 {
   return _mm_or_si128(
     _mm_and_si128(
@@ -59,9 +59,9 @@ template<> inline __m128i ei_pmul(const __m128i& a, const __m128i& b)
         _mm_setr_epi32(0xffffffff,0,0xffffffff,0)), 4));
 }
 
-template<> inline __m128  ei_pdiv(const __m128&  a, const __m128&  b) { return _mm_div_ps(a,b); }
-template<> inline __m128d ei_pdiv(const __m128d& a, const __m128d& b) { return _mm_div_pd(a,b); }
-template<> inline __m128i ei_pdiv(const __m128i& /*a*/, const __m128i& /*b*/)
+template<> inline __m128  ei_pdiv<__m128>(const __m128&  a, const __m128&  b) { return _mm_div_ps(a,b); }
+template<> inline __m128d ei_pdiv<__m128d>(const __m128d& a, const __m128d& b) { return _mm_div_pd(a,b); }
+template<> inline __m128i ei_pdiv<__m128i>(const __m128i& /*a*/, const __m128i& /*b*/)
 { ei_assert(false && "packet integer division are not supported by SSE");
   __m128i dummy;
   return dummy;
@@ -70,61 +70,61 @@ template<> inline __m128i ei_pdiv(const __m128i& /*a*/, const __m128i& /*b*/)
 // for some weird raisons, it has to be overloaded for packet integer
 template<> inline __m128i ei_pmadd(const __m128i& a, const __m128i& b, const __m128i& c) { return ei_padd(ei_pmul(a,b), c); }
 
-template<> inline __m128  ei_pmin(const __m128&  a, const __m128&  b) { return _mm_min_ps(a,b); }
-template<> inline __m128d ei_pmin(const __m128d& a, const __m128d& b) { return _mm_min_pd(a,b); }
+template<> inline __m128  ei_pmin<__m128>(const __m128&  a, const __m128&  b) { return _mm_min_ps(a,b); }
+template<> inline __m128d ei_pmin<__m128d>(const __m128d& a, const __m128d& b) { return _mm_min_pd(a,b); }
 // FIXME this vectorized min operator is likely to be slower than the standard one
-template<> inline __m128i ei_pmin(const __m128i& a, const __m128i& b)
+template<> inline __m128i ei_pmin<__m128i>(const __m128i& a, const __m128i& b)
 {
   __m128i mask = _mm_cmplt_epi32(a,b);
   return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
 }
 
-template<> inline __m128  ei_pmax(const __m128&  a, const __m128&  b) { return _mm_max_ps(a,b); }
-template<> inline __m128d ei_pmax(const __m128d& a, const __m128d& b) { return _mm_max_pd(a,b); }
+template<> inline __m128  ei_pmax<__m128>(const __m128&  a, const __m128&  b) { return _mm_max_ps(a,b); }
+template<> inline __m128d ei_pmax<__m128d>(const __m128d& a, const __m128d& b) { return _mm_max_pd(a,b); }
 // FIXME this vectorized max operator is likely to be slower than the standard one
-template<> inline __m128i ei_pmax(const __m128i& a, const __m128i& b)
+template<> inline __m128i ei_pmax<__m128i>(const __m128i& a, const __m128i& b)
 {
   __m128i mask = _mm_cmpgt_epi32(a,b);
   return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
 }
 
-template<> inline __m128  ei_pload(const float*   from) { return _mm_load_ps(from); }
-template<> inline __m128d ei_pload(const double*  from) { return _mm_load_pd(from); }
-template<> inline __m128i ei_pload(const int* from) { return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
+template<> inline __m128  ei_pload<float>(const float*   from) { return _mm_load_ps(from); }
+template<> inline __m128d ei_pload<double>(const double*  from) { return _mm_load_pd(from); }
+template<> inline __m128i ei_pload<int>(const int* from) { return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
 
-template<> inline __m128  ei_ploadu(const float*   from) { return _mm_loadu_ps(from); }
+template<> inline __m128  ei_ploadu<float>(const float*   from) { return _mm_loadu_ps(from); }
 // template<> inline __m128  ei_ploadu(const float*   from) {
 //   if (size_t(from)&0xF)
 //     return _mm_loadu_ps(from);
 //   else 
 //     return _mm_loadu_ps(from);
 // }
-template<> inline __m128d ei_ploadu(const double*  from) { return _mm_loadu_pd(from); }
-template<> inline __m128i ei_ploadu(const int* from) { return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); }
+template<> inline __m128d ei_ploadu<double>(const double*  from) { return _mm_loadu_pd(from); }
+template<> inline __m128i ei_ploadu<int>(const int* from) { return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from)); }
 
-template<> inline __m128  ei_pset1(const float&  from) { return _mm_set1_ps(from); }
-template<> inline __m128d ei_pset1(const double& from) { return _mm_set1_pd(from); }
-template<> inline __m128i ei_pset1(const int&    from) { return _mm_set1_epi32(from); }
+template<> inline __m128  ei_pset1<float>(const float&  from) { return _mm_set1_ps(from); }
+template<> inline __m128d ei_pset1<double>(const double& from) { return _mm_set1_pd(from); }
+template<> inline __m128i ei_pset1<int>(const int&    from) { return _mm_set1_epi32(from); }
 
-template<> inline void ei_pstore(float*  to, const __m128&  from) { _mm_store_ps(to, from); }
-template<> inline void ei_pstore(double* to, const __m128d& from) { _mm_store_pd(to, from); }
-template<> inline void ei_pstore(int*    to, const __m128i& from) { _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> inline void ei_pstore<float>(float*  to, const __m128&  from) { _mm_store_ps(to, from); }
+template<> inline void ei_pstore<double>(double* to, const __m128d& from) { _mm_store_pd(to, from); }
+template<> inline void ei_pstore<int>(int*    to, const __m128i& from) { _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
 
-template<> inline void ei_pstoreu(float*  to, const __m128&  from) { _mm_storeu_ps(to, from); }
-template<> inline void ei_pstoreu(double* to, const __m128d& from) { _mm_storeu_pd(to, from); }
-template<> inline void ei_pstoreu(int*    to, const __m128i& from) { _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
+template<> inline void ei_pstoreu<float>(float*  to, const __m128&  from) { _mm_storeu_ps(to, from); }
+template<> inline void ei_pstoreu<double>(double* to, const __m128d& from) { _mm_storeu_pd(to, from); }
+template<> inline void ei_pstoreu<int>(int*    to, const __m128i& from) { _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
 
-template<> inline float  ei_pfirst(const __m128&  a) { return _mm_cvtss_f32(a); }
-template<> inline double ei_pfirst(const __m128d& a) { return _mm_cvtsd_f64(a); }
-template<> inline int    ei_pfirst(const __m128i& a) { return _mm_cvtsi128_si32(a); }
+template<> inline float  ei_pfirst<__m128>(const __m128&  a) { return _mm_cvtss_f32(a); }
+template<> inline double ei_pfirst<__m128d>(const __m128d& a) { return _mm_cvtsd_f64(a); }
+template<> inline int    ei_pfirst<__m128i>(const __m128i& a) { return _mm_cvtsi128_si32(a); }
 
 #ifdef __SSE3__
 // TODO implement SSE2 versions as well as integer versions
-inline __m128 ei_preduxp(const __m128* vecs)
+template<> inline __m128 ei_preduxp<__m128>(const __m128* vecs)
 {
   return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
 }
-inline __m128d ei_preduxp(const __m128d* vecs)
+template<> inline __m128d ei_preduxp<__m128d>(const __m128d* vecs)
 {
   return _mm_hadd_pd(vecs[0], vecs[1]);
 }
@@ -134,13 +134,13 @@ inline __m128d ei_preduxp(const __m128d* vecs)
 //   return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
 // }
 
-inline float ei_predux(const __m128& a)
+template<> inline float ei_predux<__m128>(const __m128& a)
 {
   __m128 tmp0 = _mm_hadd_ps(a,a);
   return ei_pfirst(_mm_hadd_ps(tmp0, tmp0));
 }
 
-inline double ei_predux(const __m128d& a) { return ei_pfirst(_mm_hadd_pd(a, a)); }
+template<> inline double ei_predux<__m128d>(const __m128d& a) { return ei_pfirst(_mm_hadd_pd(a, a)); }
 
 // SSSE3 version:
 // inline float ei_predux(const __m128i& a)
@@ -150,17 +150,17 @@ inline double ei_predux(const __m128d& a) { return ei_pfirst(_mm_hadd_pd(a, a));
 // }
 #else
 // SSE2 versions
-inline float ei_predux(const __m128& a)
+template<> inline float ei_predux<__m128>(const __m128& a)
 {
   __m128 tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
   return ei_pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
 }
-inline double ei_predux(const __m128d& a)
+template<> inline double ei_predux<__m128d>(const __m128d& a)
 {
   return ei_pfirst(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
 }
 
-inline __m128 ei_preduxp(const __m128* vecs)
+template<> inline __m128 ei_preduxp<__m128>(const __m128* vecs)
 {
   __m128 tmp0, tmp1, tmp2;
   tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
@@ -174,19 +174,19 @@ inline __m128 ei_preduxp(const __m128* vecs)
   return _mm_add_ps(tmp0, tmp2);
 }
 
-inline __m128d ei_preduxp(const __m128d* vecs)
+template<> inline __m128d ei_preduxp<__m128d>(const __m128d* vecs)
 {
   return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
 }
 #endif  // SSE3
 
-inline int ei_predux(const __m128i& a)
+template<> inline int ei_predux<__m128i>(const __m128i& a)
 {
   __m128i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
   return ei_pfirst(tmp) + ei_pfirst(_mm_shuffle_epi32(tmp, 1));
 }
 
-inline __m128i ei_preduxp(const __m128i* vecs)
+template<> inline __m128i ei_preduxp<__m128i>(const __m128i* vecs)
 {
   __m128i tmp0, tmp1, tmp2;
   tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
author	Benoit Jacob <jacob.benoit.1@gmail.com>	2008-12-16 03:48:49 +0000
committer	Benoit Jacob <jacob.benoit.1@gmail.com>	2008-12-16 03:48:49 +0000
commit	50105c3ed6a339faee730b22345241907a43fd6d (patch)
tree	3cda293eb93d304d3c703020a31f81c5f6baf617 /Eigen
parent	0a220721d11880170495e0770d2bdc28b588e489 (diff)