Remove unused packet op "palign".

Clean up a compiler warning in c++03 mode in AVX512/Complex.h.
author: Rasmus Munk Larsen <rmlarsen@google.com> 2020-05-07 17:14:26 -0700
committer: Rasmus Munk Larsen <rmlarsen@google.com> 2020-05-07 17:14:26 -0700
commit: 225ab040e078b923ece75b7a49ae0cef980c226f (patch)
tree: e2f71052495f7741a81a785c23fcf5fb82fabc60
parent: 74ec8e6618c02a71dba28029b33dbe2a3f4da590 (diff)
16 files changed, 1 insertions, 915 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 3f2489b46..0ed5d2cc5 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -685,35 +685,6 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_t
   return ploadt<Packet, LoadMode>(from);
 }
 
-/** \internal default implementation of palign() allowing partial specialization */
-template<int Offset,typename PacketType>
-struct palign_impl
-{
-  // by default data are aligned, so there is nothing to be done :)
-  static inline void run(PacketType&, const PacketType&) {}
-};
-
-/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
-  * of \a first and \a Offset first elements of \a second.
-  * 
-  * This function is currently only used to optimize matrix-vector products on unligned matrices.
-  * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
-  * at the position \a Offset. For instance, for packets of 4 elements, we have:
-  *  Input:
-  *  - first = {f0,f1,f2,f3}
-  *  - second = {s0,s1,s2,s3}
-  * Output: 
-  *   - if Offset==0 then {f0,f1,f2,f3}
-  *   - if Offset==1 then {f1,f2,f3,s0}
-  *   - if Offset==2 then {f2,f3,s0,s1}
-  *   - if Offset==3 then {f3,s0,s1,s3}
-  */
-template<int Offset,typename PacketType>
-inline void palign(PacketType& first, const PacketType& second)
-{
-  palign_impl<Offset,PacketType>::run(first,second);
-}
-
 /***************************************************************************
 * Fast complex products (GCC generates a function call which is very slow)
 ***************************************************************************/
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index 03a097e49..893eb2702 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -157,16 +157,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const P
                          Packet2cf(_mm256_extractf128_ps(a.v, 1))));
 }
 
-template<int Offset>
-struct palign_impl<Offset,Packet4cf>
-{
-  static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
-  {
-    if (Offset==0) return;
-    palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
-  }
-};
-
 template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
 {
   EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
@@ -339,16 +329,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const
                      Packet1cd(_mm256_extractf128_pd(a.v,1))));
 }
 
-template<int Offset>
-struct palign_impl<Offset,Packet2cd>
-{
-  static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
-  {
-    if (Offset==0) return;
-    palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
-  }
-};
-
 template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
 {
   EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 3ed713eee..10196fd6d 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -691,93 +691,6 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet8f& x)
   return _mm256_movemask_ps(x)!=0;
 }
 
-template<int Offset>
-struct palign_impl<Offset,Packet8f>
-{
-  static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
-  {
-    if (Offset==1)
-    {
-      first = _mm256_blend_ps(first, second, 1);
-      Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
-      Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
-      first = _mm256_blend_ps(tmp1, tmp2, 0x88);
-    }
-    else if (Offset==2)
-    {
-      first = _mm256_blend_ps(first, second, 3);
-      Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
-      Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
-      first = _mm256_blend_ps(tmp1, tmp2, 0xcc);
-    }
-    else if (Offset==3)
-    {
-      first = _mm256_blend_ps(first, second, 7);
-      Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
-      Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
-      first = _mm256_blend_ps(tmp1, tmp2, 0xee);
-    }
-    else if (Offset==4)
-    {
-      first = _mm256_blend_ps(first, second, 15);
-      Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
-      Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
-      first = _mm256_permute_ps(tmp2, _MM_SHUFFLE(3,2,1,0));
-    }
-    else if (Offset==5)
-    {
-      first = _mm256_blend_ps(first, second, 31);
-      first = _mm256_permute2f128_ps(first, first, 1);
-      Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
-      first = _mm256_permute2f128_ps(tmp, tmp, 1);
-      first = _mm256_blend_ps(tmp, first, 0x88);
-    }
-    else if (Offset==6)
-    {
-      first = _mm256_blend_ps(first, second, 63);
-      first = _mm256_permute2f128_ps(first, first, 1);
-      Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
-      first = _mm256_permute2f128_ps(tmp, tmp, 1);
-      first = _mm256_blend_ps(tmp, first, 0xcc);
-    }
-    else if (Offset==7)
-    {
-      first = _mm256_blend_ps(first, second, 127);
-      first = _mm256_permute2f128_ps(first, first, 1);
-      Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
-      first = _mm256_permute2f128_ps(tmp, tmp, 1);
-      first = _mm256_blend_ps(tmp, first, 0xee);
-    }
-  }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet4d>
-{
-  static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
-  {
-    if (Offset==1)
-    {
-      first = _mm256_blend_pd(first, second, 1);
-      __m256d tmp = _mm256_permute_pd(first, 5);
-      first = _mm256_permute2f128_pd(tmp, tmp, 1);
-      first = _mm256_blend_pd(tmp, first, 0xA);
-    }
-    else if (Offset==2)
-    {
-      first = _mm256_blend_pd(first, second, 3);
-      first = _mm256_permute2f128_pd(first, first, 1);
-    }
-    else if (Offset==3)
-    {
-      first = _mm256_blend_pd(first, second, 7);
-      __m256d tmp = _mm256_permute_pd(first, 5);
-      first = _mm256_permute2f128_pd(tmp, tmp, 1);
-      first = _mm256_blend_pd(tmp, first, 5);
-    }
-  }
-};
-
 EIGEN_DEVICE_FUNC inline void
 ptranspose(PacketBlock<Packet8f,8>& kernel) {
   __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
@@ -1078,16 +991,6 @@ template<> EIGEN_STRONG_INLINE Packet8h pinsertlast(const Packet8h& a, Eigen::ha
   return _mm_insert_epi16(a,int(b.x),7);
 }
 
-template<int Offset>
-struct palign_impl<Offset,Packet8h>
-{
-  static EIGEN_STRONG_INLINE void run(Packet8h& first, const Packet8h& second)
-  {
-    if (Offset!=0)
-      first = _mm_alignr_epi8(second,first, Offset*2);
-  }
-};
-
 EIGEN_STRONG_INLINE void
 ptranspose(PacketBlock<Packet8h,8>& kernel) {
   __m128i a = kernel.packet[0];
diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h
index 219de36db..75bdf57f1 100644
--- a/Eigen/src/Core/arch/AVX512/Complex.h
+++ b/Eigen/src/Core/arch/AVX512/Complex.h
@@ -153,16 +153,6 @@ EIGEN_STRONG_INLINE Packet4cf predux_half_dowto4<Packet8cf>(const Packet8cf& a)
   return Packet4cf(res);
 }
 
-template<int Offset>
-struct palign_impl<Offset,Packet8cf>
-{
-  static EIGEN_STRONG_INLINE void run(Packet8cf& first, const Packet8cf& second)
-  {
-    if (Offset==0) return;
-    palign_impl<Offset*2,Packet16f>::run(first.v, second.v);
-  }
-};
-
 template<> struct conj_helper<Packet8cf, Packet8cf, false,true>
 {
   EIGEN_STRONG_INLINE Packet8cf pmadd(const Packet8cf& x, const Packet8cf& y, const Packet8cf& c) const
@@ -239,7 +229,7 @@ template<> struct packet_traits<std::complex<double> >  : default_packet_traits
     HasAbs2   = 0,
     HasMin    = 0,
     HasMax    = 0,
-    HasSetLinear = 0,
+    HasSetLinear = 0
   };
 };
 
@@ -351,16 +341,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet4cd>(const
                          Packet2cd(_mm512_extractf64x4_pd(a.v,1))));
 }
 
-template<int Offset>
-struct palign_impl<Offset,Packet4cd>
-{
-  static EIGEN_STRONG_INLINE void run(Packet4cd& first, const Packet4cd& second)
-  {
-    if (Offset==0) return;
-    palign_impl<Offset*2,Packet8d>::run(first.v, second.v);
-  }
-};
-
 template<> struct conj_helper<Packet4cd, Packet4cd, false,true>
 {
   EIGEN_STRONG_INLINE Packet4cd pmadd(const Packet4cd& x, const Packet4cd& y, const Packet4cd& c) const
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 3a48ea028..346d1f06e 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -919,52 +919,6 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet16f& x)
   return !_mm512_kortestz(tmp,tmp);
 }
 
-template <int Offset>
-struct palign_impl<Offset, Packet16f> {
-  static EIGEN_STRONG_INLINE void run(Packet16f& first,
-                                      const Packet16f& second) {
-    if (Offset != 0) {
-      __m512i first_idx = _mm512_set_epi32(
-          Offset + 15, Offset + 14, Offset + 13, Offset + 12, Offset + 11,
-          Offset + 10, Offset + 9, Offset + 8, Offset + 7, Offset + 6,
-          Offset + 5, Offset + 4, Offset + 3, Offset + 2, Offset + 1, Offset);
-
-      __m512i second_idx =
-          _mm512_set_epi32(Offset - 1, Offset - 2, Offset - 3, Offset - 4,
-                           Offset - 5, Offset - 6, Offset - 7, Offset - 8,
-                           Offset - 9, Offset - 10, Offset - 11, Offset - 12,
-                           Offset - 13, Offset - 14, Offset - 15, Offset - 16);
-
-      unsigned short mask = 0xFFFF;
-      mask <<= (16 - Offset);
-
-      first = _mm512_permutexvar_ps(first_idx, first);
-      Packet16f tmp = _mm512_permutexvar_ps(second_idx, second);
-      first = _mm512_mask_blend_ps(mask, first, tmp);
-    }
-  }
-};
-template <int Offset>
-struct palign_impl<Offset, Packet8d> {
-  static EIGEN_STRONG_INLINE void run(Packet8d& first, const Packet8d& second) {
-    if (Offset != 0) {
-      __m512i first_idx = _mm512_set_epi32(
-          0, Offset + 7, 0, Offset + 6, 0, Offset + 5, 0, Offset + 4, 0,
-          Offset + 3, 0, Offset + 2, 0, Offset + 1, 0, Offset);
-
-      __m512i second_idx = _mm512_set_epi32(
-          0, Offset - 1, 0, Offset - 2, 0, Offset - 3, 0, Offset - 4, 0,
-          Offset - 5, 0, Offset - 6, 0, Offset - 7, 0, Offset - 8);
-
-      unsigned char mask = 0xFF;
-      mask <<= (8 - Offset);
-
-      first = _mm512_permutexvar_pd(first_idx, first);
-      Packet8d tmp = _mm512_permutexvar_pd(second_idx, second);
-      first = _mm512_mask_blend_pd(mask, first, tmp);
-    }
-  }
-};
 
 
 #define PACK_OUTPUT(OUTPUT, INPUT, INDEX, STRIDE) \
diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h
index 2a2689bc6..69d2ceca8 100644
--- a/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -159,22 +159,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
   return pfirst<Packet2cf>(prod);
 }
 
-template<int Offset>
-struct palign_impl<Offset,Packet2cf>
-{
-  static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
-  {
-    if (Offset==1)
-    {
-#ifdef _BIG_ENDIAN
-      first.v = vec_sld(first.v, second.v, 8);
-#else
-      first.v = vec_sld(second.v, first.v, 8);
-#endif
-    }
-  }
-};
-
 template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
 {
   EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
@@ -346,16 +330,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Pack
 
 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
 
-template<int Offset>
-struct palign_impl<Offset,Packet1cd>
-{
-  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
-  {
-    // FIXME is it sure we never have to align a Packet1cd?
-    // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
-  }
-};
-
 template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
 {
   EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index f2dd98c06..83b75b974 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -1524,176 +1524,6 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
   return vec_any_ne(x, pzero(x));
 }
 
-template<int Offset>
-struct palign_impl<Offset,Packet4f>
-{
-  static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
-  {
-#ifdef _BIG_ENDIAN
-    switch (Offset % 4) {
-    case 1:
-      first = vec_sld(first, second, 4); break;
-    case 2:
-      first = vec_sld(first, second, 8); break;
-    case 3:
-      first = vec_sld(first, second, 12); break;
-    }
-#else
-    switch (Offset % 4) {
-    case 1:
-      first = vec_sld(second, first, 12); break;
-    case 2:
-      first = vec_sld(second, first, 8); break;
-    case 3:
-      first = vec_sld(second, first, 4); break;
-    }
-#endif
-  }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet4i>
-{
-  static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
-  {
-#ifdef _BIG_ENDIAN
-    switch (Offset % 4) {
-    case 1:
-      first = vec_sld(first, second, 4); break;
-    case 2:
-      first = vec_sld(first, second, 8); break;
-    case 3:
-      first = vec_sld(first, second, 12); break;
-    }
-#else
-    switch (Offset % 4) {
-    case 1:
-      first = vec_sld(second, first, 12); break;
-    case 2:
-      first = vec_sld(second, first, 8); break;
-    case 3:
-      first = vec_sld(second, first, 4); break;
-    }
-#endif
-  }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet8s>
-{
-  static EIGEN_STRONG_INLINE void run(Packet8s& first, const Packet8s& second)
-  {
-#ifdef _BIG_ENDIAN
-    switch (Offset % 8) {
-    case 1:
-      first = vec_sld(first, second, 2); break;
-    case 2:
-      first = vec_sld(first, second, 4); break;
-    case 3:
-      first = vec_sld(first, second, 6); break;
-    case 4:
-      first = vec_sld(first, second, 8); break;
-    case 5:
-      first = vec_sld(first, second, 10); break;
-    case 6:
-      first = vec_sld(first, second, 12); break;
-    case 7:
-      first = vec_sld(first, second, 14); break;
-    }
-#else
-    switch (Offset % 8) {
-    case 1:
-      first = vec_sld(second, first, 14); break;
-    case 2:
-      first = vec_sld(second, first, 12); break;
-    case 3:
-      first = vec_sld(second, first, 10); break;
-    case 4:
-      first = vec_sld(second, first, 8); break;
-    case 5:
-      first = vec_sld(second, first, 6); break;
-    case 6:
-      first = vec_sld(second, first, 4); break;
-    case 7:
-      first = vec_sld(second, first, 2); break;
-    }
-#endif
-  }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet8us>
-{
-  static EIGEN_STRONG_INLINE void run(Packet8us& first, const Packet8us& second)
-  {
-#ifdef _BIG_ENDIAN
-    switch (Offset % 8) {
-    case 1:
-      first = vec_sld(first, second, 2); break;
-    case 2:
-      first = vec_sld(first, second, 4); break;
-    case 3:
-      first = vec_sld(first, second, 6); break;
-    case 4:
-      first = vec_sld(first, second, 8); break;
-    case 5:
-      first = vec_sld(first, second, 10); break;
-    case 6:
-      first = vec_sld(first, second, 12); break;
-    case 7:
-      first = vec_sld(first, second, 14); break;
-    }
-#else
-    switch (Offset % 8) {
-    case 1:
-      first = vec_sld(second, first, 14); break;
-    case 2:
-      first = vec_sld(second, first, 12); break;
-    case 3:
-      first = vec_sld(second, first, 10); break;
-    case 4:
-      first = vec_sld(second, first, 8); break;
-    case 5:
-      first = vec_sld(second, first, 6); break;
-    case 6:
-      first = vec_sld(second, first, 4); break;
-    case 7:
-      first = vec_sld(second, first, 2); break;
-    }
-#endif
-  }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet16c>
-{
-  static EIGEN_STRONG_INLINE void run(Packet16c& first, const Packet16c& second)
-  {
-    const int shift = Offset % 16;
-    if ( shift == 0 ) return;
-#ifdef _BIG_ENDIAN
-    first = vec_sld(first, second, shift);
-#else
-    first = vec_sld(first, second, shift);
-#endif
-  }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet16uc>
-{
-  static EIGEN_STRONG_INLINE void run(Packet16uc& first, const Packet16uc& second)
-  {
-    const int shift = Offset % 16;
-    if ( shift == 0 ) return;
-#ifdef _BIG_ENDIAN
-    first = vec_sld(first, second, shift);
-#else
-    first = vec_sld(first, second, shift);
-#endif
-  }
-};
-
 EIGEN_DEVICE_FUNC inline void
 ptranspose(PacketBlock<Packet4f,4>& kernel) {
   Packet4f t0, t1, t2, t3;
@@ -2362,20 +2192,6 @@ template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
   return pfirst(pmax(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
 }
 
-template<int Offset>
-struct palign_impl<Offset,Packet2d>
-{
-  static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
-  {
-    if (Offset == 1)
-#ifdef _BIG_ENDIAN
-      first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(first), reinterpret_cast<Packet4ui>(second), 8));
-#else
-      first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(second), reinterpret_cast<Packet4ui>(first), 8));
-#endif
-  }
-};
-
 EIGEN_DEVICE_FUNC inline void
 ptranspose(PacketBlock<Packet2d,2>& kernel) {
   Packet2d t0, t1;
diff --git a/Eigen/src/Core/arch/MSA/Complex.h b/Eigen/src/Core/arch/MSA/Complex.h
index 7baa25e33..4877a95a8 100644
--- a/Eigen/src/Core/arch/MSA/Complex.h
+++ b/Eigen/src/Core/arch/MSA/Complex.h
@@ -305,15 +305,6 @@ EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a
                              (a.v[0] * a.v[3]) + (a.v[1] * a.v[2]));
 }
 
-template <int Offset>
-struct palign_impl<Offset, Packet2cf> {
-  EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second) {
-    if (Offset == 1) {
-      first.v = (Packet4f)__builtin_msa_sldi_b((v16i8)second.v, (v16i8)first.v, Offset * 8);
-    }
-  }
-};
-
 template <>
 struct conj_helper<Packet2cf, Packet2cf, false, true> {
   EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y,
@@ -653,15 +644,6 @@ EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd&
   return pfirst(a);
 }
 
-template <int Offset>
-struct palign_impl<Offset, Packet1cd> {
-  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/) {
-    // FIXME is it sure we never have to align a Packet1cd?
-    // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes
-    // boundary...
-  }
-};
-
 template <>
 struct conj_helper<Packet1cd, Packet1cd, false, true> {
   EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y,
diff --git a/Eigen/src/Core/arch/MSA/PacketMath.h b/Eigen/src/Core/arch/MSA/PacketMath.h
index ff4e1d5f1..f03cf61ff 100644
--- a/Eigen/src/Core/arch/MSA/PacketMath.h
+++ b/Eigen/src/Core/arch/MSA/PacketMath.h
@@ -675,25 +675,6 @@ EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a) {
   return m[0];
 }
 
-#define PALIGN_MSA(Offset, Type, Command)                                                \
-  template <>                                                                            \
-  struct palign_impl<Offset, Type> {                                                     \
-    EIGEN_STRONG_INLINE static void run(Type& first, const Type& second) {               \
-      if (Offset != 0) first = (Type)(Command((v16i8)second, (v16i8)first, Offset * 4)); \
-    }                                                                                    \
-  };
-
-PALIGN_MSA(0, Packet4f, __builtin_msa_sldi_b)
-PALIGN_MSA(1, Packet4f, __builtin_msa_sldi_b)
-PALIGN_MSA(2, Packet4f, __builtin_msa_sldi_b)
-PALIGN_MSA(3, Packet4f, __builtin_msa_sldi_b)
-PALIGN_MSA(0, Packet4i, __builtin_msa_sldi_b)
-PALIGN_MSA(1, Packet4i, __builtin_msa_sldi_b)
-PALIGN_MSA(2, Packet4i, __builtin_msa_sldi_b)
-PALIGN_MSA(3, Packet4i, __builtin_msa_sldi_b)
-
-#undef PALIGN_MSA
-
 inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4f, 4>& value) {
   os << "[ " << value.packet[0] << "," << std::endl
      << "  " << value.packet[1] << "," << std::endl
@@ -1168,19 +1149,6 @@ EIGEN_STRONG_INLINE Packet2d prsqrt(const Packet2d& a) {
 #endif
 }
 
-#define PALIGN_MSA(Offset, Type, Command)                                                \
-  template <>                                                                            \
-  struct palign_impl<Offset, Type> {                                                     \
-    EIGEN_STRONG_INLINE static void run(Type& first, const Type& second) {               \
-      if (Offset != 0) first = (Type)(Command((v16i8)second, (v16i8)first, Offset * 8)); \
-    }                                                                                    \
-  };
-
-PALIGN_MSA(0, Packet2d, __builtin_msa_sldi_b)
-PALIGN_MSA(1, Packet2d, __builtin_msa_sldi_b)
-
-#undef PALIGN_MSA
-
 inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet2d, 2>& value) {
   os << "[ " << value.packet[0] << "," << std::endl << "  " << value.packet[1] << " ]";
   return os;
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index b03c66122..8cd2a5ebe 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -340,16 +340,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
   return s;
 }
 
-template<int Offset>
-struct palign_impl<Offset,Packet2cf>
-{
-  EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
-  {
-    if (Offset == 1)
-      first.v = vextq_f32(first.v, second.v, 2);
-  }
-};
-
 template<> struct conj_helper<Packet1cf,Packet1cf,false,true>
 {
   EIGEN_STRONG_INLINE Packet1cf pmadd(const Packet1cf& x, const Packet1cf& y, const Packet1cf& c) const
@@ -602,16 +592,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Pack
 
 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
 
-template<int Offset>
-struct palign_impl<Offset,Packet1cd>
-{
-  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
-  {
-    // FIXME is it sure we never have to align a Packet1cd?
-    // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
-  }
-};
-
 template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
 {
   EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 3d24f00ce..5937433f5 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -2708,147 +2708,6 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
   return vget_lane_u32(vpmax_u32(tmp, tmp), 0);
 }
 
-// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
-// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
-#define PALIGN_NEON(Offset,Type,Command) \
-template<>\
-struct palign_impl<Offset,Type>\
-{\
-    EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
-    {\
-        if (Offset!=0)\
-            first = Command(first, second, Offset);\
-    }\
-};\
-
-template<typename T>
-EIGEN_STRONG_INLINE T palign_4c(const T& first, const T &second, const int n)
-{
-  return static_cast<T>((static_cast<uint32_t>(second) << (32 - n * 8)) | (static_cast<uint32_t>(first) >> (n * 8)));
-}
-
-PALIGN_NEON(0, Packet2f, vext_f32)
-PALIGN_NEON(1, Packet2f, vext_f32)
-
-PALIGN_NEON(0, Packet4f, vextq_f32)
-PALIGN_NEON(1, Packet4f, vextq_f32)
-PALIGN_NEON(2, Packet4f, vextq_f32)
-PALIGN_NEON(3, Packet4f, vextq_f32)
-
-PALIGN_NEON(0, Packet4c, palign_4c)
-PALIGN_NEON(1, Packet4c, palign_4c)
-PALIGN_NEON(2, Packet4c, palign_4c)
-PALIGN_NEON(3, Packet4c, palign_4c)
-
-PALIGN_NEON(0, Packet8c, vext_s8)
-PALIGN_NEON(1, Packet8c, vext_s8)
-PALIGN_NEON(2, Packet8c, vext_s8)
-PALIGN_NEON(3, Packet8c, vext_s8)
-PALIGN_NEON(4, Packet8c, vext_s8)
-PALIGN_NEON(5, Packet8c, vext_s8)
-PALIGN_NEON(6, Packet8c, vext_s8)
-PALIGN_NEON(7, Packet8c, vext_s8)
-
-PALIGN_NEON(0, Packet16c, vextq_s8)
-PALIGN_NEON(1, Packet16c, vextq_s8)
-PALIGN_NEON(2, Packet16c, vextq_s8)
-PALIGN_NEON(3, Packet16c, vextq_s8)
-PALIGN_NEON(4, Packet16c, vextq_s8)
-PALIGN_NEON(5, Packet16c, vextq_s8)
-PALIGN_NEON(6, Packet16c, vextq_s8)
-PALIGN_NEON(7, Packet16c, vextq_s8)
-PALIGN_NEON(8, Packet16c, vextq_s8)
-PALIGN_NEON(9, Packet16c, vextq_s8)
-PALIGN_NEON(10, Packet16c, vextq_s8)
-PALIGN_NEON(11, Packet16c, vextq_s8)
-PALIGN_NEON(12, Packet16c, vextq_s8)
-PALIGN_NEON(13, Packet16c, vextq_s8)
-PALIGN_NEON(14, Packet16c, vextq_s8)
-PALIGN_NEON(15, Packet16c, vextq_s8)
-
-PALIGN_NEON(0, Packet4uc, palign_4c)
-PALIGN_NEON(1, Packet4uc, palign_4c)
-PALIGN_NEON(2, Packet4uc, palign_4c)
-PALIGN_NEON(3, Packet4uc, palign_4c)
-
-PALIGN_NEON(0, Packet8uc, vext_u8)
-PALIGN_NEON(1, Packet8uc, vext_u8)
-PALIGN_NEON(2, Packet8uc, vext_u8)
-PALIGN_NEON(3, Packet8uc, vext_u8)
-PALIGN_NEON(4, Packet8uc, vext_u8)
-PALIGN_NEON(5, Packet8uc, vext_u8)
-PALIGN_NEON(6, Packet8uc, vext_u8)
-PALIGN_NEON(7, Packet8uc, vext_u8)
-
-PALIGN_NEON(0, Packet16uc, vextq_u8)
-PALIGN_NEON(1, Packet16uc, vextq_u8)
-PALIGN_NEON(2, Packet16uc, vextq_u8)
-PALIGN_NEON(3, Packet16uc, vextq_u8)
-PALIGN_NEON(4, Packet16uc, vextq_u8)
-PALIGN_NEON(5, Packet16uc, vextq_u8)
-PALIGN_NEON(6, Packet16uc, vextq_u8)
-PALIGN_NEON(7, Packet16uc, vextq_u8)
-PALIGN_NEON(8, Packet16uc, vextq_u8)
-PALIGN_NEON(9, Packet16uc, vextq_u8)
-PALIGN_NEON(10, Packet16uc, vextq_u8)
-PALIGN_NEON(11, Packet16uc, vextq_u8)
-PALIGN_NEON(12, Packet16uc, vextq_u8)
-PALIGN_NEON(13, Packet16uc, vextq_u8)
-PALIGN_NEON(14, Packet16uc, vextq_u8)
-PALIGN_NEON(15, Packet16uc, vextq_u8)
-
-PALIGN_NEON(0, Packet4s, vext_s16)
-PALIGN_NEON(1, Packet4s, vext_s16)
-PALIGN_NEON(2, Packet4s, vext_s16)
-PALIGN_NEON(3, Packet4s, vext_s16)
-
-PALIGN_NEON(0, Packet8s, vextq_s16)
-PALIGN_NEON(1, Packet8s, vextq_s16)
-PALIGN_NEON(2, Packet8s, vextq_s16)
-PALIGN_NEON(3, Packet8s, vextq_s16)
-PALIGN_NEON(4, Packet8s, vextq_s16)
-PALIGN_NEON(5, Packet8s, vextq_s16)
-PALIGN_NEON(6, Packet8s, vextq_s16)
-PALIGN_NEON(7, Packet8s, vextq_s16)
-
-PALIGN_NEON(0, Packet4us, vext_u16)
-PALIGN_NEON(1, Packet4us, vext_u16)
-PALIGN_NEON(2, Packet4us, vext_u16)
-PALIGN_NEON(3, Packet4us, vext_u16)
-
-PALIGN_NEON(0, Packet8us, vextq_u16)
-PALIGN_NEON(1, Packet8us, vextq_u16)
-PALIGN_NEON(2, Packet8us, vextq_u16)
-PALIGN_NEON(3, Packet8us, vextq_u16)
-PALIGN_NEON(4, Packet8us, vextq_u16)
-PALIGN_NEON(5, Packet8us, vextq_u16)
-PALIGN_NEON(6, Packet8us, vextq_u16)
-PALIGN_NEON(7, Packet8us, vextq_u16)
-
-PALIGN_NEON(0, Packet2i, vext_s32)
-PALIGN_NEON(1, Packet2i, vext_s32)
-
-PALIGN_NEON(0, Packet4i, vextq_s32)
-PALIGN_NEON(1, Packet4i, vextq_s32)
-PALIGN_NEON(2, Packet4i, vextq_s32)
-PALIGN_NEON(3, Packet4i, vextq_s32)
-
-PALIGN_NEON(0, Packet2ui, vext_u32)
-PALIGN_NEON(1, Packet2ui, vext_u32)
-
-PALIGN_NEON(0, Packet4ui, vextq_u32)
-PALIGN_NEON(1, Packet4ui, vextq_u32)
-PALIGN_NEON(2, Packet4ui, vextq_u32)
-PALIGN_NEON(3, Packet4ui, vextq_u32)
-
-PALIGN_NEON(0, Packet2l, vextq_s64)
-PALIGN_NEON(1, Packet2l, vextq_s64)
-
-PALIGN_NEON(0, Packet2ul, vextq_u64)
-PALIGN_NEON(1, Packet2ul, vextq_u64)
-
-#undef PALIGN_NEON
-
 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2f, 2>& kernel)
 {
   const float32x2x2_t z = vzip_f32(kernel.packet[0], kernel.packet[1]);
@@ -3563,22 +3422,6 @@ template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
 template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
 { return vgetq_lane_f64(vpmaxq_f64(a,a), 0); }
 
-// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
-// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
-#define PALIGN_NEON(Offset,Type,Command) \
-template<>\
-struct palign_impl<Offset,Type>\
-{\
-    EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
-    {\
-        if (Offset!=0)\
-            first = Command(first, second, Offset);\
-    }\
-};\
-
-PALIGN_NEON(0, Packet2d, vextq_f64)
-PALIGN_NEON(1, Packet2d, vextq_f64)
-#undef PALIGN_NEON
 
 EIGEN_DEVICE_FUNC inline void
 ptranspose(PacketBlock<Packet2d, 2>& kernel)
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index d6bfeafe4..a16d73e27 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -161,19 +161,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
   return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
 }
 
-template<int Offset>
-struct palign_impl<Offset,Packet2cf>
-{
-  static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
-  {
-    if (Offset==1)
-    {
-      first.v = _mm_movehl_ps(first.v, first.v);
-      first.v = _mm_movelh_ps(first.v, second.v);
-    }
-  }
-};
-
 template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
 {
   EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
@@ -346,16 +333,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const
   return pfirst(a);
 }
 
-template<int Offset>
-struct palign_impl<Offset,Packet1cd>
-{
-  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
-  {
-    // FIXME is it sure we never have to align a Packet1cd?
-    // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
-  }
-};
-
 template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
 {
   EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index f4a409430..cf2f0be17 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -867,114 +867,6 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
   return _mm_movemask_ps(x) != 0x0;
 }
 
-#if EIGEN_COMP_GNUC
-// template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f&  a, const Packet4f&  b, const Packet4f&  c)
-// {
-//   Packet4f res = b;
-//   asm("mulps %[a], %[b] \n\taddps %[c], %[b]" : [b] "+x" (res) : [a] "x" (a), [c] "x" (c));
-//   return res;
-// }
-// EIGEN_STRONG_INLINE Packet4i _mm_alignr_epi8(const Packet4i&  a, const Packet4i&  b, const int i)
-// {
-//   Packet4i res = a;
-//   asm("palignr %[i], %[a], %[b] " : [b] "+x" (res) : [a] "x" (a), [i] "i" (i));
-//   return res;
-// }
-#endif
-
-#ifdef EIGEN_VECTORIZE_SSSE3
-// SSSE3 versions
-template<int Offset>
-struct palign_impl<Offset,Packet4f>
-{
-  static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
-  {
-    if (Offset!=0)
-      first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
-  }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet4i>
-{
-  static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
-  {
-    if (Offset!=0)
-      first = _mm_alignr_epi8(second,first, Offset*4);
-  }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet2d>
-{
-  static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
-  {
-    if (Offset==1)
-      first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
-  }
-};
-#else
-// SSE2 versions
-template<int Offset>
-struct palign_impl<Offset,Packet4f>
-{
-  static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
-  {
-    if (Offset==1)
-    {
-      first = _mm_move_ss(first,second);
-      first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39));
-    }
-    else if (Offset==2)
-    {
-      first = _mm_movehl_ps(first,first);
-      first = _mm_movelh_ps(first,second);
-    }
-    else if (Offset==3)
-    {
-      first = _mm_move_ss(first,second);
-      first = _mm_shuffle_ps(first,second,0x93);
-    }
-  }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet4i>
-{
-  static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
-  {
-    if (Offset==1)
-    {
-      first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
-      first = _mm_shuffle_epi32(first,0x39);
-    }
-    else if (Offset==2)
-    {
-      first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first)));
-      first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
-    }
-    else if (Offset==3)
-    {
-      first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
-      first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93));
-    }
-  }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet2d>
-{
-  static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
-  {
-    if (Offset==1)
-    {
-      first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
-      first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
-    }
-  }
-};
-#endif
-
 EIGEN_DEVICE_FUNC inline void
 ptranspose(PacketBlock<Packet4f,4>& kernel) {
   _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h
index f589fddd8..d3e41b43e 100644
--- a/Eigen/src/Core/arch/ZVector/Complex.h
+++ b/Eigen/src/Core/arch/ZVector/Complex.h
@@ -160,16 +160,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const
 {
   return pfirst(a);
 }
-template<int Offset>
-struct palign_impl<Offset,Packet1cd>
-{
-  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
-  {
-    // FIXME is it sure we never have to align a Packet1cd?
-    // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
-  }
-};
-
 template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
 {
   EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
@@ -331,18 +321,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
   return res;
 }
 
-template<int Offset>
-struct palign_impl<Offset,Packet2cf>
-{
-  static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
-  {
-    if (Offset == 1) {
-      first.cd[0] = first.cd[1];
-      first.cd[1] = second.cd[0];
-    }
-  }
-};
-
 template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
 {
   EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
@@ -457,18 +435,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
   return pfirst<Packet2cf>(prod);
 }
 
-template<int Offset>
-struct palign_impl<Offset,Packet2cf>
-{
-  static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
-  {
-    if (Offset==1)
-    {
-      first.v = vec_sld(first.v, second.v, 8);
-    }
-  }
-};
-
 template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
 {
   EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h
index 3435f7c1e..3fb642a38 100755
--- a/Eigen/src/Core/arch/ZVector/PacketMath.h
+++ b/Eigen/src/Core/arch/ZVector/PacketMath.h
@@ -298,33 +298,6 @@ inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
 }
 #endif
 
-
-template<int Offset>
-struct palign_impl<Offset,Packet4i>
-{
-  static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
-  {
-    switch (Offset % 4) {
-    case 1:
-      first = vec_sld(first, second, 4); break;
-    case 2:
-      first = vec_sld(first, second, 8); break;
-    case 3:
-      first = vec_sld(first, second, 12); break;
-    }
-  }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet2d>
-{
-  static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
-  {
-    if (Offset == 1)
-      first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(first), reinterpret_cast<Packet4i>(second), 8));
-  }
-};
-
 template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int*     from)
 {
   // FIXME: No intrinsic yet
@@ -636,30 +609,6 @@ template<int element> EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Pack
   return splat;
 }
 
-/* This is a tricky one, we have to translate float alignment to vector elements of sizeof double
- */
-template<int Offset>
-struct palign_impl<Offset,Packet4f>
-{
-  static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
-  {
-    switch (Offset % 4) {
-    case 1:
-      first.v4f[0] = vec_sld(first.v4f[0], first.v4f[1], 8);
-      first.v4f[1] = vec_sld(first.v4f[1], second.v4f[0], 8);
-      break;
-    case 2:
-      first.v4f[0] = first.v4f[1];
-      first.v4f[1] = second.v4f[0];
-      break;
-    case 3:
-      first.v4f[0] = vec_sld(first.v4f[1],  second.v4f[0], 8);
-      first.v4f[1] = vec_sld(second.v4f[0], second.v4f[1], 8);
-      break;
-    }
-  }
-};
-
 template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float*   from)
 {
   // FIXME: No intrinsic yet
@@ -942,22 +891,6 @@ template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, cons
   return result;
 }
 #else
-template<int Offset>
-struct palign_impl<Offset,Packet4f>
-{
-  static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
-  {
-    switch (Offset % 4) {
-    case 1:
-      first = vec_sld(first, second, 4); break;
-    case 2:
-      first = vec_sld(first, second, 8); break;
-    case 3:
-      first = vec_sld(first, second, 12); break;
-    }
-  }
-};
-
 template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
 {
   // FIXME: No intrinsic yet
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index cceaff7c5..7341d67e7 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -103,7 +103,6 @@ template<typename Scalar,typename Packet> void packetmath()
   EIGEN_ALIGN_MAX Scalar data1[size];
   EIGEN_ALIGN_MAX Scalar data2[size];
   EIGEN_ALIGN_MAX Scalar data3[size];
-  EIGEN_ALIGN_MAX Packet packets[PacketSize*2];
   EIGEN_ALIGN_MAX Scalar ref[size];
   RealScalar refvalue = RealScalar(0);
   for (int i=0; i<size; ++i)
@@ -163,38 +162,6 @@ template<typename Scalar,typename Packet> void packetmath()
     }
   }
 
-  for (int offset=0; offset<PacketSize; ++offset)
-  {
-    #define MIN(A,B) (A<B?A:B)
-    packets[0] = internal::pload<Packet>(data1);
-    packets[1] = internal::pload<Packet>(data1+PacketSize);
-         if (offset==0) internal::palign<0>(packets[0], packets[1]);
-    else if (offset==1) internal::palign<MIN(1,PacketSize-1)>(packets[0], packets[1]);
-    else if (offset==2) internal::palign<MIN(2,PacketSize-1)>(packets[0], packets[1]);
-    else if (offset==3) internal::palign<MIN(3,PacketSize-1)>(packets[0], packets[1]);
-    else if (offset==4) internal::palign<MIN(4,PacketSize-1)>(packets[0], packets[1]);
-    else if (offset==5) internal::palign<MIN(5,PacketSize-1)>(packets[0], packets[1]);
-    else if (offset==6) internal::palign<MIN(6,PacketSize-1)>(packets[0], packets[1]);
-    else if (offset==7) internal::palign<MIN(7,PacketSize-1)>(packets[0], packets[1]);
-    else if (offset==8) internal::palign<MIN(8,PacketSize-1)>(packets[0], packets[1]);
-    else if (offset==9) internal::palign<MIN(9,PacketSize-1)>(packets[0], packets[1]);
-    else if (offset==10) internal::palign<MIN(10,PacketSize-1)>(packets[0], packets[1]);
-    else if (offset==11) internal::palign<MIN(11,PacketSize-1)>(packets[0], packets[1]);
-    else if (offset==12) internal::palign<MIN(12,PacketSize-1)>(packets[0], packets[1]);
-    else if (offset==13) internal::palign<MIN(13,PacketSize-1)>(packets[0], packets[1]);
-    else if (offset==14) internal::palign<MIN(14,PacketSize-1)>(packets[0], packets[1]);
-    else if (offset==15) internal::palign<MIN(15,PacketSize-1)>(packets[0], packets[1]);
-    internal::pstore(data2, packets[0]);
-
-    for (int i=0; i<PacketSize; ++i)
-      ref[i] = data1[i+offset];
-
-    // palign is not used anymore, so let's just put a warning if it fails
-    ++g_test_level;
-    VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::palign");
-    --g_test_level;
-  }
-
   VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasAdd);
   VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasSub);
   VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMul);
author	Rasmus Munk Larsen <rmlarsen@google.com>	2020-05-07 17:14:26 -0700
committer	Rasmus Munk Larsen <rmlarsen@google.com>	2020-05-07 17:14:26 -0700
commit	225ab040e078b923ece75b7a49ae0cef980c226f (patch)
tree	e2f71052495f7741a81a785c23fcf5fb82fabc60
parent	74ec8e6618c02a71dba28029b33dbe2a3f4da590 (diff)