aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-05-07 17:14:26 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-05-07 17:14:26 -0700
commit225ab040e078b923ece75b7a49ae0cef980c226f (patch)
treee2f71052495f7741a81a785c23fcf5fb82fabc60
parent74ec8e6618c02a71dba28029b33dbe2a3f4da590 (diff)
Remove unused packet op "palign".
Clean up a compiler warning in c++03 mode in AVX512/Complex.h.
-rw-r--r--Eigen/src/Core/GenericPacketMath.h29
-rw-r--r--Eigen/src/Core/arch/AVX/Complex.h20
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h97
-rw-r--r--Eigen/src/Core/arch/AVX512/Complex.h22
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h46
-rw-r--r--Eigen/src/Core/arch/AltiVec/Complex.h26
-rwxr-xr-xEigen/src/Core/arch/AltiVec/PacketMath.h184
-rw-r--r--Eigen/src/Core/arch/MSA/Complex.h18
-rw-r--r--Eigen/src/Core/arch/MSA/PacketMath.h32
-rw-r--r--Eigen/src/Core/arch/NEON/Complex.h20
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h157
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h23
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h108
-rw-r--r--Eigen/src/Core/arch/ZVector/Complex.h34
-rwxr-xr-xEigen/src/Core/arch/ZVector/PacketMath.h67
-rw-r--r--test/packetmath.cpp33
16 files changed, 1 insertions, 915 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 3f2489b46..0ed5d2cc5 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -685,35 +685,6 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_t
return ploadt<Packet, LoadMode>(from);
}
-/** \internal default implementation of palign() allowing partial specialization */
-template<int Offset,typename PacketType>
-struct palign_impl
-{
- // by default data are aligned, so there is nothing to be done :)
- static inline void run(PacketType&, const PacketType&) {}
-};
-
-/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
- * of \a first and \a Offset first elements of \a second.
- *
- * This function is currently only used to optimize matrix-vector products on unligned matrices.
- * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
- * at the position \a Offset. For instance, for packets of 4 elements, we have:
- * Input:
- * - first = {f0,f1,f2,f3}
- * - second = {s0,s1,s2,s3}
- * Output:
- * - if Offset==0 then {f0,f1,f2,f3}
- * - if Offset==1 then {f1,f2,f3,s0}
- * - if Offset==2 then {f2,f3,s0,s1}
- * - if Offset==3 then {f3,s0,s1,s3}
- */
-template<int Offset,typename PacketType>
-inline void palign(PacketType& first, const PacketType& second)
-{
- palign_impl<Offset,PacketType>::run(first,second);
-}
-
/***************************************************************************
* Fast complex products (GCC generates a function call which is very slow)
***************************************************************************/
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index 03a097e49..893eb2702 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -157,16 +157,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const P
Packet2cf(_mm256_extractf128_ps(a.v, 1))));
}
-template<int Offset>
-struct palign_impl<Offset,Packet4cf>
-{
- static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
- {
- if (Offset==0) return;
- palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
- }
-};
-
template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
@@ -339,16 +329,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const
Packet1cd(_mm256_extractf128_pd(a.v,1))));
}
-template<int Offset>
-struct palign_impl<Offset,Packet2cd>
-{
- static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
- {
- if (Offset==0) return;
- palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
- }
-};
-
template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 3ed713eee..10196fd6d 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -691,93 +691,6 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet8f& x)
return _mm256_movemask_ps(x)!=0;
}
-template<int Offset>
-struct palign_impl<Offset,Packet8f>
-{
- static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
- {
- if (Offset==1)
- {
- first = _mm256_blend_ps(first, second, 1);
- Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
- Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
- first = _mm256_blend_ps(tmp1, tmp2, 0x88);
- }
- else if (Offset==2)
- {
- first = _mm256_blend_ps(first, second, 3);
- Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
- Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
- first = _mm256_blend_ps(tmp1, tmp2, 0xcc);
- }
- else if (Offset==3)
- {
- first = _mm256_blend_ps(first, second, 7);
- Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
- Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
- first = _mm256_blend_ps(tmp1, tmp2, 0xee);
- }
- else if (Offset==4)
- {
- first = _mm256_blend_ps(first, second, 15);
- Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
- Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
- first = _mm256_permute_ps(tmp2, _MM_SHUFFLE(3,2,1,0));
- }
- else if (Offset==5)
- {
- first = _mm256_blend_ps(first, second, 31);
- first = _mm256_permute2f128_ps(first, first, 1);
- Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
- first = _mm256_permute2f128_ps(tmp, tmp, 1);
- first = _mm256_blend_ps(tmp, first, 0x88);
- }
- else if (Offset==6)
- {
- first = _mm256_blend_ps(first, second, 63);
- first = _mm256_permute2f128_ps(first, first, 1);
- Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
- first = _mm256_permute2f128_ps(tmp, tmp, 1);
- first = _mm256_blend_ps(tmp, first, 0xcc);
- }
- else if (Offset==7)
- {
- first = _mm256_blend_ps(first, second, 127);
- first = _mm256_permute2f128_ps(first, first, 1);
- Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
- first = _mm256_permute2f128_ps(tmp, tmp, 1);
- first = _mm256_blend_ps(tmp, first, 0xee);
- }
- }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet4d>
-{
- static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
- {
- if (Offset==1)
- {
- first = _mm256_blend_pd(first, second, 1);
- __m256d tmp = _mm256_permute_pd(first, 5);
- first = _mm256_permute2f128_pd(tmp, tmp, 1);
- first = _mm256_blend_pd(tmp, first, 0xA);
- }
- else if (Offset==2)
- {
- first = _mm256_blend_pd(first, second, 3);
- first = _mm256_permute2f128_pd(first, first, 1);
- }
- else if (Offset==3)
- {
- first = _mm256_blend_pd(first, second, 7);
- __m256d tmp = _mm256_permute_pd(first, 5);
- first = _mm256_permute2f128_pd(tmp, tmp, 1);
- first = _mm256_blend_pd(tmp, first, 5);
- }
- }
-};
-
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet8f,8>& kernel) {
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
@@ -1078,16 +991,6 @@ template<> EIGEN_STRONG_INLINE Packet8h pinsertlast(const Packet8h& a, Eigen::ha
return _mm_insert_epi16(a,int(b.x),7);
}
-template<int Offset>
-struct palign_impl<Offset,Packet8h>
-{
- static EIGEN_STRONG_INLINE void run(Packet8h& first, const Packet8h& second)
- {
- if (Offset!=0)
- first = _mm_alignr_epi8(second,first, Offset*2);
- }
-};
-
EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<Packet8h,8>& kernel) {
__m128i a = kernel.packet[0];
diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h
index 219de36db..75bdf57f1 100644
--- a/Eigen/src/Core/arch/AVX512/Complex.h
+++ b/Eigen/src/Core/arch/AVX512/Complex.h
@@ -153,16 +153,6 @@ EIGEN_STRONG_INLINE Packet4cf predux_half_dowto4<Packet8cf>(const Packet8cf& a)
return Packet4cf(res);
}
-template<int Offset>
-struct palign_impl<Offset,Packet8cf>
-{
- static EIGEN_STRONG_INLINE void run(Packet8cf& first, const Packet8cf& second)
- {
- if (Offset==0) return;
- palign_impl<Offset*2,Packet16f>::run(first.v, second.v);
- }
-};
-
template<> struct conj_helper<Packet8cf, Packet8cf, false,true>
{
EIGEN_STRONG_INLINE Packet8cf pmadd(const Packet8cf& x, const Packet8cf& y, const Packet8cf& c) const
@@ -239,7 +229,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
- HasSetLinear = 0,
+ HasSetLinear = 0
};
};
@@ -351,16 +341,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet4cd>(const
Packet2cd(_mm512_extractf64x4_pd(a.v,1))));
}
-template<int Offset>
-struct palign_impl<Offset,Packet4cd>
-{
- static EIGEN_STRONG_INLINE void run(Packet4cd& first, const Packet4cd& second)
- {
- if (Offset==0) return;
- palign_impl<Offset*2,Packet8d>::run(first.v, second.v);
- }
-};
-
template<> struct conj_helper<Packet4cd, Packet4cd, false,true>
{
EIGEN_STRONG_INLINE Packet4cd pmadd(const Packet4cd& x, const Packet4cd& y, const Packet4cd& c) const
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 3a48ea028..346d1f06e 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -919,52 +919,6 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet16f& x)
return !_mm512_kortestz(tmp,tmp);
}
-template <int Offset>
-struct palign_impl<Offset, Packet16f> {
- static EIGEN_STRONG_INLINE void run(Packet16f& first,
- const Packet16f& second) {
- if (Offset != 0) {
- __m512i first_idx = _mm512_set_epi32(
- Offset + 15, Offset + 14, Offset + 13, Offset + 12, Offset + 11,
- Offset + 10, Offset + 9, Offset + 8, Offset + 7, Offset + 6,
- Offset + 5, Offset + 4, Offset + 3, Offset + 2, Offset + 1, Offset);
-
- __m512i second_idx =
- _mm512_set_epi32(Offset - 1, Offset - 2, Offset - 3, Offset - 4,
- Offset - 5, Offset - 6, Offset - 7, Offset - 8,
- Offset - 9, Offset - 10, Offset - 11, Offset - 12,
- Offset - 13, Offset - 14, Offset - 15, Offset - 16);
-
- unsigned short mask = 0xFFFF;
- mask <<= (16 - Offset);
-
- first = _mm512_permutexvar_ps(first_idx, first);
- Packet16f tmp = _mm512_permutexvar_ps(second_idx, second);
- first = _mm512_mask_blend_ps(mask, first, tmp);
- }
- }
-};
-template <int Offset>
-struct palign_impl<Offset, Packet8d> {
- static EIGEN_STRONG_INLINE void run(Packet8d& first, const Packet8d& second) {
- if (Offset != 0) {
- __m512i first_idx = _mm512_set_epi32(
- 0, Offset + 7, 0, Offset + 6, 0, Offset + 5, 0, Offset + 4, 0,
- Offset + 3, 0, Offset + 2, 0, Offset + 1, 0, Offset);
-
- __m512i second_idx = _mm512_set_epi32(
- 0, Offset - 1, 0, Offset - 2, 0, Offset - 3, 0, Offset - 4, 0,
- Offset - 5, 0, Offset - 6, 0, Offset - 7, 0, Offset - 8);
-
- unsigned char mask = 0xFF;
- mask <<= (8 - Offset);
-
- first = _mm512_permutexvar_pd(first_idx, first);
- Packet8d tmp = _mm512_permutexvar_pd(second_idx, second);
- first = _mm512_mask_blend_pd(mask, first, tmp);
- }
- }
-};
#define PACK_OUTPUT(OUTPUT, INPUT, INDEX, STRIDE) \
diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h
index 2a2689bc6..69d2ceca8 100644
--- a/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -159,22 +159,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return pfirst<Packet2cf>(prod);
}
-template<int Offset>
-struct palign_impl<Offset,Packet2cf>
-{
- static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
- {
- if (Offset==1)
- {
-#ifdef _BIG_ENDIAN
- first.v = vec_sld(first.v, second.v, 8);
-#else
- first.v = vec_sld(second.v, first.v, 8);
-#endif
- }
- }
-};
-
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
@@ -346,16 +330,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Pack
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
-template<int Offset>
-struct palign_impl<Offset,Packet1cd>
-{
- static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
- {
- // FIXME is it sure we never have to align a Packet1cd?
- // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
- }
-};
-
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index f2dd98c06..83b75b974 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -1524,176 +1524,6 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
return vec_any_ne(x, pzero(x));
}
-template<int Offset>
-struct palign_impl<Offset,Packet4f>
-{
- static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
- {
-#ifdef _BIG_ENDIAN
- switch (Offset % 4) {
- case 1:
- first = vec_sld(first, second, 4); break;
- case 2:
- first = vec_sld(first, second, 8); break;
- case 3:
- first = vec_sld(first, second, 12); break;
- }
-#else
- switch (Offset % 4) {
- case 1:
- first = vec_sld(second, first, 12); break;
- case 2:
- first = vec_sld(second, first, 8); break;
- case 3:
- first = vec_sld(second, first, 4); break;
- }
-#endif
- }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet4i>
-{
- static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
- {
-#ifdef _BIG_ENDIAN
- switch (Offset % 4) {
- case 1:
- first = vec_sld(first, second, 4); break;
- case 2:
- first = vec_sld(first, second, 8); break;
- case 3:
- first = vec_sld(first, second, 12); break;
- }
-#else
- switch (Offset % 4) {
- case 1:
- first = vec_sld(second, first, 12); break;
- case 2:
- first = vec_sld(second, first, 8); break;
- case 3:
- first = vec_sld(second, first, 4); break;
- }
-#endif
- }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet8s>
-{
- static EIGEN_STRONG_INLINE void run(Packet8s& first, const Packet8s& second)
- {
-#ifdef _BIG_ENDIAN
- switch (Offset % 8) {
- case 1:
- first = vec_sld(first, second, 2); break;
- case 2:
- first = vec_sld(first, second, 4); break;
- case 3:
- first = vec_sld(first, second, 6); break;
- case 4:
- first = vec_sld(first, second, 8); break;
- case 5:
- first = vec_sld(first, second, 10); break;
- case 6:
- first = vec_sld(first, second, 12); break;
- case 7:
- first = vec_sld(first, second, 14); break;
- }
-#else
- switch (Offset % 8) {
- case 1:
- first = vec_sld(second, first, 14); break;
- case 2:
- first = vec_sld(second, first, 12); break;
- case 3:
- first = vec_sld(second, first, 10); break;
- case 4:
- first = vec_sld(second, first, 8); break;
- case 5:
- first = vec_sld(second, first, 6); break;
- case 6:
- first = vec_sld(second, first, 4); break;
- case 7:
- first = vec_sld(second, first, 2); break;
- }
-#endif
- }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet8us>
-{
- static EIGEN_STRONG_INLINE void run(Packet8us& first, const Packet8us& second)
- {
-#ifdef _BIG_ENDIAN
- switch (Offset % 8) {
- case 1:
- first = vec_sld(first, second, 2); break;
- case 2:
- first = vec_sld(first, second, 4); break;
- case 3:
- first = vec_sld(first, second, 6); break;
- case 4:
- first = vec_sld(first, second, 8); break;
- case 5:
- first = vec_sld(first, second, 10); break;
- case 6:
- first = vec_sld(first, second, 12); break;
- case 7:
- first = vec_sld(first, second, 14); break;
- }
-#else
- switch (Offset % 8) {
- case 1:
- first = vec_sld(second, first, 14); break;
- case 2:
- first = vec_sld(second, first, 12); break;
- case 3:
- first = vec_sld(second, first, 10); break;
- case 4:
- first = vec_sld(second, first, 8); break;
- case 5:
- first = vec_sld(second, first, 6); break;
- case 6:
- first = vec_sld(second, first, 4); break;
- case 7:
- first = vec_sld(second, first, 2); break;
- }
-#endif
- }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet16c>
-{
- static EIGEN_STRONG_INLINE void run(Packet16c& first, const Packet16c& second)
- {
- const int shift = Offset % 16;
- if ( shift == 0 ) return;
-#ifdef _BIG_ENDIAN
- first = vec_sld(first, second, shift);
-#else
- first = vec_sld(first, second, shift);
-#endif
- }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet16uc>
-{
- static EIGEN_STRONG_INLINE void run(Packet16uc& first, const Packet16uc& second)
- {
- const int shift = Offset % 16;
- if ( shift == 0 ) return;
-#ifdef _BIG_ENDIAN
- first = vec_sld(first, second, shift);
-#else
- first = vec_sld(first, second, shift);
-#endif
- }
-};
-
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4f,4>& kernel) {
Packet4f t0, t1, t2, t3;
@@ -2362,20 +2192,6 @@ template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
return pfirst(pmax(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
}
-template<int Offset>
-struct palign_impl<Offset,Packet2d>
-{
- static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
- {
- if (Offset == 1)
-#ifdef _BIG_ENDIAN
- first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(first), reinterpret_cast<Packet4ui>(second), 8));
-#else
- first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(second), reinterpret_cast<Packet4ui>(first), 8));
-#endif
- }
-};
-
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet2d,2>& kernel) {
Packet2d t0, t1;
diff --git a/Eigen/src/Core/arch/MSA/Complex.h b/Eigen/src/Core/arch/MSA/Complex.h
index 7baa25e33..4877a95a8 100644
--- a/Eigen/src/Core/arch/MSA/Complex.h
+++ b/Eigen/src/Core/arch/MSA/Complex.h
@@ -305,15 +305,6 @@ EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a
(a.v[0] * a.v[3]) + (a.v[1] * a.v[2]));
}
-template <int Offset>
-struct palign_impl<Offset, Packet2cf> {
- EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second) {
- if (Offset == 1) {
- first.v = (Packet4f)__builtin_msa_sldi_b((v16i8)second.v, (v16i8)first.v, Offset * 8);
- }
- }
-};
-
template <>
struct conj_helper<Packet2cf, Packet2cf, false, true> {
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y,
@@ -653,15 +644,6 @@ EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd&
return pfirst(a);
}
-template <int Offset>
-struct palign_impl<Offset, Packet1cd> {
- static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/) {
- // FIXME is it sure we never have to align a Packet1cd?
- // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes
- // boundary...
- }
-};
-
template <>
struct conj_helper<Packet1cd, Packet1cd, false, true> {
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y,
diff --git a/Eigen/src/Core/arch/MSA/PacketMath.h b/Eigen/src/Core/arch/MSA/PacketMath.h
index ff4e1d5f1..f03cf61ff 100644
--- a/Eigen/src/Core/arch/MSA/PacketMath.h
+++ b/Eigen/src/Core/arch/MSA/PacketMath.h
@@ -675,25 +675,6 @@ EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a) {
return m[0];
}
-#define PALIGN_MSA(Offset, Type, Command) \
- template <> \
- struct palign_impl<Offset, Type> { \
- EIGEN_STRONG_INLINE static void run(Type& first, const Type& second) { \
- if (Offset != 0) first = (Type)(Command((v16i8)second, (v16i8)first, Offset * 4)); \
- } \
- };
-
-PALIGN_MSA(0, Packet4f, __builtin_msa_sldi_b)
-PALIGN_MSA(1, Packet4f, __builtin_msa_sldi_b)
-PALIGN_MSA(2, Packet4f, __builtin_msa_sldi_b)
-PALIGN_MSA(3, Packet4f, __builtin_msa_sldi_b)
-PALIGN_MSA(0, Packet4i, __builtin_msa_sldi_b)
-PALIGN_MSA(1, Packet4i, __builtin_msa_sldi_b)
-PALIGN_MSA(2, Packet4i, __builtin_msa_sldi_b)
-PALIGN_MSA(3, Packet4i, __builtin_msa_sldi_b)
-
-#undef PALIGN_MSA
-
inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4f, 4>& value) {
os << "[ " << value.packet[0] << "," << std::endl
<< " " << value.packet[1] << "," << std::endl
@@ -1168,19 +1149,6 @@ EIGEN_STRONG_INLINE Packet2d prsqrt(const Packet2d& a) {
#endif
}
-#define PALIGN_MSA(Offset, Type, Command) \
- template <> \
- struct palign_impl<Offset, Type> { \
- EIGEN_STRONG_INLINE static void run(Type& first, const Type& second) { \
- if (Offset != 0) first = (Type)(Command((v16i8)second, (v16i8)first, Offset * 8)); \
- } \
- };
-
-PALIGN_MSA(0, Packet2d, __builtin_msa_sldi_b)
-PALIGN_MSA(1, Packet2d, __builtin_msa_sldi_b)
-
-#undef PALIGN_MSA
-
inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet2d, 2>& value) {
os << "[ " << value.packet[0] << "," << std::endl << " " << value.packet[1] << " ]";
return os;
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index b03c66122..8cd2a5ebe 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -340,16 +340,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return s;
}
-template<int Offset>
-struct palign_impl<Offset,Packet2cf>
-{
- EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
- {
- if (Offset == 1)
- first.v = vextq_f32(first.v, second.v, 2);
- }
-};
-
template<> struct conj_helper<Packet1cf,Packet1cf,false,true>
{
EIGEN_STRONG_INLINE Packet1cf pmadd(const Packet1cf& x, const Packet1cf& y, const Packet1cf& c) const
@@ -602,16 +592,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Pack
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
-template<int Offset>
-struct palign_impl<Offset,Packet1cd>
-{
- static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
- {
- // FIXME is it sure we never have to align a Packet1cd?
- // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
- }
-};
-
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 3d24f00ce..5937433f5 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -2708,147 +2708,6 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
return vget_lane_u32(vpmax_u32(tmp, tmp), 0);
}
-// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
-// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
-#define PALIGN_NEON(Offset,Type,Command) \
-template<>\
-struct palign_impl<Offset,Type>\
-{\
- EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
- {\
- if (Offset!=0)\
- first = Command(first, second, Offset);\
- }\
-};\
-
-template<typename T>
-EIGEN_STRONG_INLINE T palign_4c(const T& first, const T &second, const int n)
-{
- return static_cast<T>((static_cast<uint32_t>(second) << (32 - n * 8)) | (static_cast<uint32_t>(first) >> (n * 8)));
-}
-
-PALIGN_NEON(0, Packet2f, vext_f32)
-PALIGN_NEON(1, Packet2f, vext_f32)
-
-PALIGN_NEON(0, Packet4f, vextq_f32)
-PALIGN_NEON(1, Packet4f, vextq_f32)
-PALIGN_NEON(2, Packet4f, vextq_f32)
-PALIGN_NEON(3, Packet4f, vextq_f32)
-
-PALIGN_NEON(0, Packet4c, palign_4c)
-PALIGN_NEON(1, Packet4c, palign_4c)
-PALIGN_NEON(2, Packet4c, palign_4c)
-PALIGN_NEON(3, Packet4c, palign_4c)
-
-PALIGN_NEON(0, Packet8c, vext_s8)
-PALIGN_NEON(1, Packet8c, vext_s8)
-PALIGN_NEON(2, Packet8c, vext_s8)
-PALIGN_NEON(3, Packet8c, vext_s8)
-PALIGN_NEON(4, Packet8c, vext_s8)
-PALIGN_NEON(5, Packet8c, vext_s8)
-PALIGN_NEON(6, Packet8c, vext_s8)
-PALIGN_NEON(7, Packet8c, vext_s8)
-
-PALIGN_NEON(0, Packet16c, vextq_s8)
-PALIGN_NEON(1, Packet16c, vextq_s8)
-PALIGN_NEON(2, Packet16c, vextq_s8)
-PALIGN_NEON(3, Packet16c, vextq_s8)
-PALIGN_NEON(4, Packet16c, vextq_s8)
-PALIGN_NEON(5, Packet16c, vextq_s8)
-PALIGN_NEON(6, Packet16c, vextq_s8)
-PALIGN_NEON(7, Packet16c, vextq_s8)
-PALIGN_NEON(8, Packet16c, vextq_s8)
-PALIGN_NEON(9, Packet16c, vextq_s8)
-PALIGN_NEON(10, Packet16c, vextq_s8)
-PALIGN_NEON(11, Packet16c, vextq_s8)
-PALIGN_NEON(12, Packet16c, vextq_s8)
-PALIGN_NEON(13, Packet16c, vextq_s8)
-PALIGN_NEON(14, Packet16c, vextq_s8)
-PALIGN_NEON(15, Packet16c, vextq_s8)
-
-PALIGN_NEON(0, Packet4uc, palign_4c)
-PALIGN_NEON(1, Packet4uc, palign_4c)
-PALIGN_NEON(2, Packet4uc, palign_4c)
-PALIGN_NEON(3, Packet4uc, palign_4c)
-
-PALIGN_NEON(0, Packet8uc, vext_u8)
-PALIGN_NEON(1, Packet8uc, vext_u8)
-PALIGN_NEON(2, Packet8uc, vext_u8)
-PALIGN_NEON(3, Packet8uc, vext_u8)
-PALIGN_NEON(4, Packet8uc, vext_u8)
-PALIGN_NEON(5, Packet8uc, vext_u8)
-PALIGN_NEON(6, Packet8uc, vext_u8)
-PALIGN_NEON(7, Packet8uc, vext_u8)
-
-PALIGN_NEON(0, Packet16uc, vextq_u8)
-PALIGN_NEON(1, Packet16uc, vextq_u8)
-PALIGN_NEON(2, Packet16uc, vextq_u8)
-PALIGN_NEON(3, Packet16uc, vextq_u8)
-PALIGN_NEON(4, Packet16uc, vextq_u8)
-PALIGN_NEON(5, Packet16uc, vextq_u8)
-PALIGN_NEON(6, Packet16uc, vextq_u8)
-PALIGN_NEON(7, Packet16uc, vextq_u8)
-PALIGN_NEON(8, Packet16uc, vextq_u8)
-PALIGN_NEON(9, Packet16uc, vextq_u8)
-PALIGN_NEON(10, Packet16uc, vextq_u8)
-PALIGN_NEON(11, Packet16uc, vextq_u8)
-PALIGN_NEON(12, Packet16uc, vextq_u8)
-PALIGN_NEON(13, Packet16uc, vextq_u8)
-PALIGN_NEON(14, Packet16uc, vextq_u8)
-PALIGN_NEON(15, Packet16uc, vextq_u8)
-
-PALIGN_NEON(0, Packet4s, vext_s16)
-PALIGN_NEON(1, Packet4s, vext_s16)
-PALIGN_NEON(2, Packet4s, vext_s16)
-PALIGN_NEON(3, Packet4s, vext_s16)
-
-PALIGN_NEON(0, Packet8s, vextq_s16)
-PALIGN_NEON(1, Packet8s, vextq_s16)
-PALIGN_NEON(2, Packet8s, vextq_s16)
-PALIGN_NEON(3, Packet8s, vextq_s16)
-PALIGN_NEON(4, Packet8s, vextq_s16)
-PALIGN_NEON(5, Packet8s, vextq_s16)
-PALIGN_NEON(6, Packet8s, vextq_s16)
-PALIGN_NEON(7, Packet8s, vextq_s16)
-
-PALIGN_NEON(0, Packet4us, vext_u16)
-PALIGN_NEON(1, Packet4us, vext_u16)
-PALIGN_NEON(2, Packet4us, vext_u16)
-PALIGN_NEON(3, Packet4us, vext_u16)
-
-PALIGN_NEON(0, Packet8us, vextq_u16)
-PALIGN_NEON(1, Packet8us, vextq_u16)
-PALIGN_NEON(2, Packet8us, vextq_u16)
-PALIGN_NEON(3, Packet8us, vextq_u16)
-PALIGN_NEON(4, Packet8us, vextq_u16)
-PALIGN_NEON(5, Packet8us, vextq_u16)
-PALIGN_NEON(6, Packet8us, vextq_u16)
-PALIGN_NEON(7, Packet8us, vextq_u16)
-
-PALIGN_NEON(0, Packet2i, vext_s32)
-PALIGN_NEON(1, Packet2i, vext_s32)
-
-PALIGN_NEON(0, Packet4i, vextq_s32)
-PALIGN_NEON(1, Packet4i, vextq_s32)
-PALIGN_NEON(2, Packet4i, vextq_s32)
-PALIGN_NEON(3, Packet4i, vextq_s32)
-
-PALIGN_NEON(0, Packet2ui, vext_u32)
-PALIGN_NEON(1, Packet2ui, vext_u32)
-
-PALIGN_NEON(0, Packet4ui, vextq_u32)
-PALIGN_NEON(1, Packet4ui, vextq_u32)
-PALIGN_NEON(2, Packet4ui, vextq_u32)
-PALIGN_NEON(3, Packet4ui, vextq_u32)
-
-PALIGN_NEON(0, Packet2l, vextq_s64)
-PALIGN_NEON(1, Packet2l, vextq_s64)
-
-PALIGN_NEON(0, Packet2ul, vextq_u64)
-PALIGN_NEON(1, Packet2ul, vextq_u64)
-
-#undef PALIGN_NEON
-
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2f, 2>& kernel)
{
const float32x2x2_t z = vzip_f32(kernel.packet[0], kernel.packet[1]);
@@ -3563,22 +3422,6 @@ template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
{ return vgetq_lane_f64(vpmaxq_f64(a,a), 0); }
-// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
-// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
-#define PALIGN_NEON(Offset,Type,Command) \
-template<>\
-struct palign_impl<Offset,Type>\
-{\
- EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
- {\
- if (Offset!=0)\
- first = Command(first, second, Offset);\
- }\
-};\
-
-PALIGN_NEON(0, Packet2d, vextq_f64)
-PALIGN_NEON(1, Packet2d, vextq_f64)
-#undef PALIGN_NEON
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet2d, 2>& kernel)
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index d6bfeafe4..a16d73e27 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -161,19 +161,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
}
-template<int Offset>
-struct palign_impl<Offset,Packet2cf>
-{
- static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
- {
- if (Offset==1)
- {
- first.v = _mm_movehl_ps(first.v, first.v);
- first.v = _mm_movelh_ps(first.v, second.v);
- }
- }
-};
-
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
@@ -346,16 +333,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const
return pfirst(a);
}
-template<int Offset>
-struct palign_impl<Offset,Packet1cd>
-{
- static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
- {
- // FIXME is it sure we never have to align a Packet1cd?
- // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
- }
-};
-
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index f4a409430..cf2f0be17 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -867,114 +867,6 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
return _mm_movemask_ps(x) != 0x0;
}
-#if EIGEN_COMP_GNUC
-// template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
-// {
-// Packet4f res = b;
-// asm("mulps %[a], %[b] \n\taddps %[c], %[b]" : [b] "+x" (res) : [a] "x" (a), [c] "x" (c));
-// return res;
-// }
-// EIGEN_STRONG_INLINE Packet4i _mm_alignr_epi8(const Packet4i& a, const Packet4i& b, const int i)
-// {
-// Packet4i res = a;
-// asm("palignr %[i], %[a], %[b] " : [b] "+x" (res) : [a] "x" (a), [i] "i" (i));
-// return res;
-// }
-#endif
-
-#ifdef EIGEN_VECTORIZE_SSSE3
-// SSSE3 versions
-template<int Offset>
-struct palign_impl<Offset,Packet4f>
-{
- static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
- {
- if (Offset!=0)
- first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
- }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet4i>
-{
- static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
- {
- if (Offset!=0)
- first = _mm_alignr_epi8(second,first, Offset*4);
- }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet2d>
-{
- static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
- {
- if (Offset==1)
- first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
- }
-};
-#else
-// SSE2 versions
-template<int Offset>
-struct palign_impl<Offset,Packet4f>
-{
- static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
- {
- if (Offset==1)
- {
- first = _mm_move_ss(first,second);
- first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39));
- }
- else if (Offset==2)
- {
- first = _mm_movehl_ps(first,first);
- first = _mm_movelh_ps(first,second);
- }
- else if (Offset==3)
- {
- first = _mm_move_ss(first,second);
- first = _mm_shuffle_ps(first,second,0x93);
- }
- }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet4i>
-{
- static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
- {
- if (Offset==1)
- {
- first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
- first = _mm_shuffle_epi32(first,0x39);
- }
- else if (Offset==2)
- {
- first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first)));
- first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
- }
- else if (Offset==3)
- {
- first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
- first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93));
- }
- }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet2d>
-{
- static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
- {
- if (Offset==1)
- {
- first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
- first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
- }
- }
-};
-#endif
-
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4f,4>& kernel) {
_MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h
index f589fddd8..d3e41b43e 100644
--- a/Eigen/src/Core/arch/ZVector/Complex.h
+++ b/Eigen/src/Core/arch/ZVector/Complex.h
@@ -160,16 +160,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const
{
return pfirst(a);
}
-template<int Offset>
-struct palign_impl<Offset,Packet1cd>
-{
- static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
- {
- // FIXME is it sure we never have to align a Packet1cd?
- // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
- }
-};
-
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
@@ -331,18 +321,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return res;
}
-template<int Offset>
-struct palign_impl<Offset,Packet2cf>
-{
- static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
- {
- if (Offset == 1) {
- first.cd[0] = first.cd[1];
- first.cd[1] = second.cd[0];
- }
- }
-};
-
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
@@ -457,18 +435,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return pfirst<Packet2cf>(prod);
}
-template<int Offset>
-struct palign_impl<Offset,Packet2cf>
-{
- static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
- {
- if (Offset==1)
- {
- first.v = vec_sld(first.v, second.v, 8);
- }
- }
-};
-
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h
index 3435f7c1e..3fb642a38 100755
--- a/Eigen/src/Core/arch/ZVector/PacketMath.h
+++ b/Eigen/src/Core/arch/ZVector/PacketMath.h
@@ -298,33 +298,6 @@ inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
}
#endif
-
-template<int Offset>
-struct palign_impl<Offset,Packet4i>
-{
- static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
- {
- switch (Offset % 4) {
- case 1:
- first = vec_sld(first, second, 4); break;
- case 2:
- first = vec_sld(first, second, 8); break;
- case 3:
- first = vec_sld(first, second, 12); break;
- }
- }
-};
-
-template<int Offset>
-struct palign_impl<Offset,Packet2d>
-{
- static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
- {
- if (Offset == 1)
- first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(first), reinterpret_cast<Packet4i>(second), 8));
- }
-};
-
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
{
// FIXME: No intrinsic yet
@@ -636,30 +609,6 @@ template<int element> EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Pack
return splat;
}
-/* This is a tricky one, we have to translate float alignment to vector elements of sizeof double
- */
-template<int Offset>
-struct palign_impl<Offset,Packet4f>
-{
- static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
- {
- switch (Offset % 4) {
- case 1:
- first.v4f[0] = vec_sld(first.v4f[0], first.v4f[1], 8);
- first.v4f[1] = vec_sld(first.v4f[1], second.v4f[0], 8);
- break;
- case 2:
- first.v4f[0] = first.v4f[1];
- first.v4f[1] = second.v4f[0];
- break;
- case 3:
- first.v4f[0] = vec_sld(first.v4f[1], second.v4f[0], 8);
- first.v4f[1] = vec_sld(second.v4f[0], second.v4f[1], 8);
- break;
- }
- }
-};
-
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
{
// FIXME: No intrinsic yet
@@ -942,22 +891,6 @@ template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, cons
return result;
}
#else
-template<int Offset>
-struct palign_impl<Offset,Packet4f>
-{
- static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
- {
- switch (Offset % 4) {
- case 1:
- first = vec_sld(first, second, 4); break;
- case 2:
- first = vec_sld(first, second, 8); break;
- case 3:
- first = vec_sld(first, second, 12); break;
- }
- }
-};
-
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
{
// FIXME: No intrinsic yet
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index cceaff7c5..7341d67e7 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -103,7 +103,6 @@ template<typename Scalar,typename Packet> void packetmath()
EIGEN_ALIGN_MAX Scalar data1[size];
EIGEN_ALIGN_MAX Scalar data2[size];
EIGEN_ALIGN_MAX Scalar data3[size];
- EIGEN_ALIGN_MAX Packet packets[PacketSize*2];
EIGEN_ALIGN_MAX Scalar ref[size];
RealScalar refvalue = RealScalar(0);
for (int i=0; i<size; ++i)
@@ -163,38 +162,6 @@ template<typename Scalar,typename Packet> void packetmath()
}
}
- for (int offset=0; offset<PacketSize; ++offset)
- {
- #define MIN(A,B) (A<B?A:B)
- packets[0] = internal::pload<Packet>(data1);
- packets[1] = internal::pload<Packet>(data1+PacketSize);
- if (offset==0) internal::palign<0>(packets[0], packets[1]);
- else if (offset==1) internal::palign<MIN(1,PacketSize-1)>(packets[0], packets[1]);
- else if (offset==2) internal::palign<MIN(2,PacketSize-1)>(packets[0], packets[1]);
- else if (offset==3) internal::palign<MIN(3,PacketSize-1)>(packets[0], packets[1]);
- else if (offset==4) internal::palign<MIN(4,PacketSize-1)>(packets[0], packets[1]);
- else if (offset==5) internal::palign<MIN(5,PacketSize-1)>(packets[0], packets[1]);
- else if (offset==6) internal::palign<MIN(6,PacketSize-1)>(packets[0], packets[1]);
- else if (offset==7) internal::palign<MIN(7,PacketSize-1)>(packets[0], packets[1]);
- else if (offset==8) internal::palign<MIN(8,PacketSize-1)>(packets[0], packets[1]);
- else if (offset==9) internal::palign<MIN(9,PacketSize-1)>(packets[0], packets[1]);
- else if (offset==10) internal::palign<MIN(10,PacketSize-1)>(packets[0], packets[1]);
- else if (offset==11) internal::palign<MIN(11,PacketSize-1)>(packets[0], packets[1]);
- else if (offset==12) internal::palign<MIN(12,PacketSize-1)>(packets[0], packets[1]);
- else if (offset==13) internal::palign<MIN(13,PacketSize-1)>(packets[0], packets[1]);
- else if (offset==14) internal::palign<MIN(14,PacketSize-1)>(packets[0], packets[1]);
- else if (offset==15) internal::palign<MIN(15,PacketSize-1)>(packets[0], packets[1]);
- internal::pstore(data2, packets[0]);
-
- for (int i=0; i<PacketSize; ++i)
- ref[i] = data1[i+offset];
-
- // palign is not used anymore, so let's just put a warning if it fails
- ++g_test_level;
- VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::palign");
- --g_test_level;
- }
-
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasAdd);
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasSub);
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMul);