aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-04-23 18:17:14 +0000
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-04-23 18:17:14 +0000
commite80ec24357aaaa1e205cda3be466206e7830fcda (patch)
tree6b3d585938c9927fce763caccbf5987a1f67217b
parent0aebe19aca58d78434cf724ef954a8383b2bf7c9 (diff)
Remove unused packet op "preduxp".
-rw-r--r--Eigen/src/Core/GenericPacketMath.h5
-rw-r--r--Eigen/src/Core/arch/AVX/Complex.h23
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h57
-rw-r--r--Eigen/src/Core/arch/AVX512/Complex.h2
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h212
-rw-r--r--Eigen/src/Core/arch/AltiVec/Complex.h17
-rwxr-xr-xEigen/src/Core/arch/AltiVec/PacketMath.h142
-rw-r--r--Eigen/src/Core/arch/MSA/Complex.h21
-rw-r--r--Eigen/src/Core/arch/MSA/PacketMath.h49
-rw-r--r--Eigen/src/Core/arch/NEON/Complex.h10
-rw-r--r--Eigen/src/Core/arch/NEON/PacketMath.h289
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h10
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h53
-rw-r--r--Eigen/src/Core/arch/ZVector/Complex.h25
-rwxr-xr-xEigen/src/Core/arch/ZVector/PacketMath.h82
-rw-r--r--test/packetmath.cpp13
16 files changed, 1 insertions, 1009 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index ec61ac697..5612ef449 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -59,7 +59,6 @@ struct default_packet_traits
HasSetLinear = 1,
HasBlend = 0,
HasInsert = 0,
- HasReduxp = 1,
HasDiv = 0,
HasSqrt = 0,
@@ -485,10 +484,6 @@ template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* a
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
{ return a; }
-/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
-template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-preduxp(const Packet* vecs) { return vecs[0]; }
-
/** \internal \returns the sum of the elements of \a a*/
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
{ return a; }
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index 4a80bae05..03a097e49 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -151,21 +151,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packe
Packet2cf(_mm256_extractf128_ps(a.v,1))));
}
-template<> EIGEN_STRONG_INLINE Packet4cf preduxp<Packet4cf>(const Packet4cf* vecs)
-{
- Packet8f t0 = _mm256_shuffle_ps(vecs[0].v, vecs[0].v, _MM_SHUFFLE(3, 1, 2 ,0));
- Packet8f t1 = _mm256_shuffle_ps(vecs[1].v, vecs[1].v, _MM_SHUFFLE(3, 1, 2 ,0));
- t0 = _mm256_hadd_ps(t0,t1);
- Packet8f t2 = _mm256_shuffle_ps(vecs[2].v, vecs[2].v, _MM_SHUFFLE(3, 1, 2 ,0));
- Packet8f t3 = _mm256_shuffle_ps(vecs[3].v, vecs[3].v, _MM_SHUFFLE(3, 1, 2 ,0));
- t2 = _mm256_hadd_ps(t2,t3);
-
- t1 = _mm256_permute2f128_ps(t0,t2, 0 + (2<<4));
- t3 = _mm256_permute2f128_ps(t0,t2, 1 + (3<<4));
-
- return Packet4cf(_mm256_add_ps(t1,t3));
-}
-
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a)
{
return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),
@@ -348,14 +333,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Pack
Packet1cd(_mm256_extractf128_pd(a.v,1))));
}
-template<> EIGEN_STRONG_INLINE Packet2cd preduxp<Packet2cd>(const Packet2cd* vecs)
-{
- Packet4d t0 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 0 + (2<<4));
- Packet4d t1 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 1 + (3<<4));
-
- return Packet2cd(_mm256_add_pd(t0,t1));
-}
-
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a)
{
return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 56a344970..3ed713eee 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -627,49 +627,6 @@ template<> EIGEN_STRONG_INLINE Packet4d pldexp<Packet4d>(const Packet4d& a, cons
return pmul(a,_mm256_castsi256_pd(e));
}
-// preduxp should be ok
-// FIXME: why is this ok? why isn't the simply implementation working as expected?
-template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
-{
- __m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
- __m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
- __m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
- __m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);
-
- __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
- __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
- __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
- __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
-
- __m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
- __m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
- __m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
- __m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
-
- __m256 sum1 = _mm256_add_ps(perm1, hsum5);
- __m256 sum2 = _mm256_add_ps(perm2, hsum6);
- __m256 sum3 = _mm256_add_ps(perm3, hsum7);
- __m256 sum4 = _mm256_add_ps(perm4, hsum8);
-
- __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
- __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
-
- __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
- return final;
-}
-template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
-{
- Packet4d tmp0, tmp1;
-
- tmp0 = _mm256_hadd_pd(vecs[0], vecs[1]);
- tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
-
- tmp1 = _mm256_hadd_pd(vecs[2], vecs[3]);
- tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
-
- return _mm256_blend_pd(tmp0, tmp1, 0xC);
-}
-
template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
{
return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));
@@ -1105,20 +1062,6 @@ template<> EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet8h>(const Packet8h&
return Eigen::half(reduced);
}
-template<> EIGEN_STRONG_INLINE Packet8h preduxp<Packet8h>(const Packet8h* p) {
- Packet8f pf[8];
- pf[0] = half2float(p[0]);
- pf[1] = half2float(p[1]);
- pf[2] = half2float(p[2]);
- pf[3] = half2float(p[3]);
- pf[4] = half2float(p[4]);
- pf[5] = half2float(p[5]);
- pf[6] = half2float(p[6]);
- pf[7] = half2float(p[7]);
- Packet8f reduced = preduxp<Packet8f>(pf);
- return float2half(reduced);
-}
-
template<> EIGEN_STRONG_INLINE Packet8h preverse(const Packet8h& a)
{
__m128i m = _mm_setr_epi8(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1);
diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h
index 4d0318611..219de36db 100644
--- a/Eigen/src/Core/arch/AVX512/Complex.h
+++ b/Eigen/src/Core/arch/AVX512/Complex.h
@@ -42,7 +42,6 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
HasMin = 0,
HasMax = 0,
HasSetLinear = 0,
- HasReduxp = 0,
HasInsert = 1
};
};
@@ -241,7 +240,6 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
HasMin = 0,
HasMax = 0,
HasSetLinear = 0,
- HasReduxp = 0
};
};
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 0ee73d5d6..3a48ea028 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -793,196 +793,6 @@ EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 0), 2); \
OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 1), 3);
#endif
-template<> EIGEN_STRONG_INLINE Packet16f preduxp<Packet16f>(const Packet16f*
-vecs)
-{
- EIGEN_EXTRACT_8f_FROM_16f(vecs[0], vecs0);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[1], vecs1);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[2], vecs2);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[3], vecs3);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[4], vecs4);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[5], vecs5);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[6], vecs6);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[7], vecs7);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[8], vecs8);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[9], vecs9);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[10], vecs10);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[11], vecs11);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[12], vecs12);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[13], vecs13);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[14], vecs14);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[15], vecs15);
-
- __m256 hsum1 = _mm256_hadd_ps(vecs0_0, vecs1_0);
- __m256 hsum2 = _mm256_hadd_ps(vecs2_0, vecs3_0);
- __m256 hsum3 = _mm256_hadd_ps(vecs4_0, vecs5_0);
- __m256 hsum4 = _mm256_hadd_ps(vecs6_0, vecs7_0);
-
- __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
- __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
- __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
- __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
-
- __m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
- __m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
- __m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
- __m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
-
- __m256 sum1 = _mm256_add_ps(perm1, hsum5);
- __m256 sum2 = _mm256_add_ps(perm2, hsum6);
- __m256 sum3 = _mm256_add_ps(perm3, hsum7);
- __m256 sum4 = _mm256_add_ps(perm4, hsum8);
-
- __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
- __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
-
- __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
-
- hsum1 = _mm256_hadd_ps(vecs0_1, vecs1_1);
- hsum2 = _mm256_hadd_ps(vecs2_1, vecs3_1);
- hsum3 = _mm256_hadd_ps(vecs4_1, vecs5_1);
- hsum4 = _mm256_hadd_ps(vecs6_1, vecs7_1);
-
- hsum5 = _mm256_hadd_ps(hsum1, hsum1);
- hsum6 = _mm256_hadd_ps(hsum2, hsum2);
- hsum7 = _mm256_hadd_ps(hsum3, hsum3);
- hsum8 = _mm256_hadd_ps(hsum4, hsum4);
-
- perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
- perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
- perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
- perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
-
- sum1 = _mm256_add_ps(perm1, hsum5);
- sum2 = _mm256_add_ps(perm2, hsum6);
- sum3 = _mm256_add_ps(perm3, hsum7);
- sum4 = _mm256_add_ps(perm4, hsum8);
-
- blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
- blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
-
- final = _mm256_add_ps(final, _mm256_blend_ps(blend1, blend2, 0xf0));
-
- hsum1 = _mm256_hadd_ps(vecs8_0, vecs9_0);
- hsum2 = _mm256_hadd_ps(vecs10_0, vecs11_0);
- hsum3 = _mm256_hadd_ps(vecs12_0, vecs13_0);
- hsum4 = _mm256_hadd_ps(vecs14_0, vecs15_0);
-
- hsum5 = _mm256_hadd_ps(hsum1, hsum1);
- hsum6 = _mm256_hadd_ps(hsum2, hsum2);
- hsum7 = _mm256_hadd_ps(hsum3, hsum3);
- hsum8 = _mm256_hadd_ps(hsum4, hsum4);
-
- perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
- perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
- perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
- perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
-
- sum1 = _mm256_add_ps(perm1, hsum5);
- sum2 = _mm256_add_ps(perm2, hsum6);
- sum3 = _mm256_add_ps(perm3, hsum7);
- sum4 = _mm256_add_ps(perm4, hsum8);
-
- blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
- blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
-
- __m256 final_1 = _mm256_blend_ps(blend1, blend2, 0xf0);
-
- hsum1 = _mm256_hadd_ps(vecs8_1, vecs9_1);
- hsum2 = _mm256_hadd_ps(vecs10_1, vecs11_1);
- hsum3 = _mm256_hadd_ps(vecs12_1, vecs13_1);
- hsum4 = _mm256_hadd_ps(vecs14_1, vecs15_1);
-
- hsum5 = _mm256_hadd_ps(hsum1, hsum1);
- hsum6 = _mm256_hadd_ps(hsum2, hsum2);
- hsum7 = _mm256_hadd_ps(hsum3, hsum3);
- hsum8 = _mm256_hadd_ps(hsum4, hsum4);
-
- perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
- perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
- perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
- perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
-
- sum1 = _mm256_add_ps(perm1, hsum5);
- sum2 = _mm256_add_ps(perm2, hsum6);
- sum3 = _mm256_add_ps(perm3, hsum7);
- sum4 = _mm256_add_ps(perm4, hsum8);
-
- blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
- blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
-
- final_1 = _mm256_add_ps(final_1, _mm256_blend_ps(blend1, blend2, 0xf0));
-
- __m512 final_output;
-
- EIGEN_INSERT_8f_INTO_16f(final_output, final, final_1);
- return final_output;
-}
-
-template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs)
-{
- Packet4d vecs0_0 = _mm512_extractf64x4_pd(vecs[0], 0);
- Packet4d vecs0_1 = _mm512_extractf64x4_pd(vecs[0], 1);
-
- Packet4d vecs1_0 = _mm512_extractf64x4_pd(vecs[1], 0);
- Packet4d vecs1_1 = _mm512_extractf64x4_pd(vecs[1], 1);
-
- Packet4d vecs2_0 = _mm512_extractf64x4_pd(vecs[2], 0);
- Packet4d vecs2_1 = _mm512_extractf64x4_pd(vecs[2], 1);
-
- Packet4d vecs3_0 = _mm512_extractf64x4_pd(vecs[3], 0);
- Packet4d vecs3_1 = _mm512_extractf64x4_pd(vecs[3], 1);
-
- Packet4d vecs4_0 = _mm512_extractf64x4_pd(vecs[4], 0);
- Packet4d vecs4_1 = _mm512_extractf64x4_pd(vecs[4], 1);
-
- Packet4d vecs5_0 = _mm512_extractf64x4_pd(vecs[5], 0);
- Packet4d vecs5_1 = _mm512_extractf64x4_pd(vecs[5], 1);
-
- Packet4d vecs6_0 = _mm512_extractf64x4_pd(vecs[6], 0);
- Packet4d vecs6_1 = _mm512_extractf64x4_pd(vecs[6], 1);
-
- Packet4d vecs7_0 = _mm512_extractf64x4_pd(vecs[7], 0);
- Packet4d vecs7_1 = _mm512_extractf64x4_pd(vecs[7], 1);
-
- Packet4d tmp0, tmp1;
-
- tmp0 = _mm256_hadd_pd(vecs0_0, vecs1_0);
- tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
-
- tmp1 = _mm256_hadd_pd(vecs2_0, vecs3_0);
- tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
-
- __m256d final_0 = _mm256_blend_pd(tmp0, tmp1, 0xC);
-
- tmp0 = _mm256_hadd_pd(vecs0_1, vecs1_1);
- tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
-
- tmp1 = _mm256_hadd_pd(vecs2_1, vecs3_1);
- tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
-
- final_0 = _mm256_add_pd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC));
-
- tmp0 = _mm256_hadd_pd(vecs4_0, vecs5_0);
- tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
-
- tmp1 = _mm256_hadd_pd(vecs6_0, vecs7_0);
- tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
-
- __m256d final_1 = _mm256_blend_pd(tmp0, tmp1, 0xC);
-
- tmp0 = _mm256_hadd_pd(vecs4_1, vecs5_1);
- tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
-
- tmp1 = _mm256_hadd_pd(vecs6_1, vecs7_1);
- tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
-
- final_1 = _mm256_add_pd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC));
-
- __m512d final_output = _mm512_castpd256_pd512(final_0);
-
- return _mm512_insertf64x4(final_output, final_1, 1);
-}
template <>
EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) {
@@ -1660,28 +1470,6 @@ template<> EIGEN_STRONG_INLINE half predux_mul<Packet16h>(const Packet16h& from)
return half(predux_mul(from_float));
}
-template<> EIGEN_STRONG_INLINE Packet16h preduxp<Packet16h>(const Packet16h* p) {
- Packet16f pf[16];
- pf[0] = half2float(p[0]);
- pf[1] = half2float(p[1]);
- pf[2] = half2float(p[2]);
- pf[3] = half2float(p[3]);
- pf[4] = half2float(p[4]);
- pf[5] = half2float(p[5]);
- pf[6] = half2float(p[6]);
- pf[7] = half2float(p[7]);
- pf[8] = half2float(p[8]);
- pf[9] = half2float(p[9]);
- pf[10] = half2float(p[10]);
- pf[11] = half2float(p[11]);
- pf[12] = half2float(p[12]);
- pf[13] = half2float(p[13]);
- pf[14] = half2float(p[14]);
- pf[15] = half2float(p[15]);
- Packet16f reduced = preduxp<Packet16f>(pf);
- return float2half(reduced);
-}
-
template<> EIGEN_STRONG_INLINE Packet16h preverse(const Packet16h& a)
{
__m128i m = _mm_setr_epi8(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1);
diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h
index 62df67ac9..2a2689bc6 100644
--- a/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -149,22 +149,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packe
return pfirst<Packet2cf>(Packet2cf(b));
}
-template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
-{
- Packet4f b1, b2;
-#ifdef _BIG_ENDIAN
- b1 = vec_sld(vecs[0].v, vecs[1].v, 8);
- b2 = vec_sld(vecs[1].v, vecs[0].v, 8);
-#else
- b1 = vec_sld(vecs[1].v, vecs[0].v, 8);
- b2 = vec_sld(vecs[0].v, vecs[1].v, 8);
-#endif
- b2 = vec_sld(b2, b2, 8);
- b2 = padd<Packet4f>(b1, b2);
-
- return Packet2cf(b2);
-}
-
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
{
Packet4f b;
@@ -359,7 +343,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Pac
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
-template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 451e1396c..3000c32d8 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -929,106 +929,6 @@ template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
return pfirst(sum);
}
-template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
-{
- Packet4f v[4], sum[4];
-
- // It's easier and faster to transpose then add as columns
- // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
- // Do the transpose, first set of moves
- v[0] = vec_mergeh(vecs[0], vecs[2]);
- v[1] = vec_mergel(vecs[0], vecs[2]);
- v[2] = vec_mergeh(vecs[1], vecs[3]);
- v[3] = vec_mergel(vecs[1], vecs[3]);
- // Get the resulting vectors
- sum[0] = vec_mergeh(v[0], v[2]);
- sum[1] = vec_mergel(v[0], v[2]);
- sum[2] = vec_mergeh(v[1], v[3]);
- sum[3] = vec_mergel(v[1], v[3]);
-
- // Now do the summation:
- // Lines 0+1
- sum[0] = sum[0] + sum[1];
- // Lines 2+3
- sum[1] = sum[2] + sum[3];
- // Add the results
- sum[0] = sum[0] + sum[1];
-
- return sum[0];
-}
-
-template<> EIGEN_STRONG_INLINE Packet8s preduxp<Packet8s>(const Packet8s* vecs)
-{
- Packet8s step1[8], step2[8], step3[8];
-
- step1[0] = vec_mergeh(vecs[0], vecs[4]);
- step1[1] = vec_mergel(vecs[0], vecs[4]);
- step1[2] = vec_mergeh(vecs[1], vecs[5]);
- step1[3] = vec_mergel(vecs[1], vecs[5]);
- step1[4] = vec_mergeh(vecs[2], vecs[6]);
- step1[5] = vec_mergel(vecs[2], vecs[6]);
- step1[6] = vec_mergeh(vecs[3], vecs[7]);
- step1[7] = vec_mergel(vecs[3], vecs[7]);
-
- step2[0] = vec_mergeh(step1[0], step1[4]);
- step2[1] = vec_mergel(step1[0], step1[4]);
- step2[2] = vec_mergeh(step1[1], step1[5]);
- step2[3] = vec_mergel(step1[1], step1[5]);
- step2[4] = vec_mergeh(step1[2], step1[6]);
- step2[5] = vec_mergel(step1[2], step1[6]);
- step2[6] = vec_mergeh(step1[3], step1[7]);
- step2[7] = vec_mergel(step1[3], step1[7]);
-
- step3[0] = vec_mergeh(step2[0], step2[4]);
- step3[1] = vec_mergel(step2[0], step2[4]);
- step3[2] = vec_mergeh(step2[1], step2[5]);
- step3[3] = vec_mergel(step2[1], step2[5]);
- step3[4] = vec_mergeh(step2[2], step2[6]);
- step3[5] = vec_mergel(step2[2], step2[6]);
- step3[6] = vec_mergeh(step2[3], step2[7]);
- step3[7] = vec_mergel(step2[3], step2[7]);
-
- step3[0] += step3[1] + step3[2] + step3[3] + step3[4] + step3[5] + step3[6] + step3[7];
-
- return step3[0];
-}
-
-template<> EIGEN_STRONG_INLINE Packet8us preduxp<Packet8us>(const Packet8us* vecs)
-{
- Packet8us step1[8], step2[8], step3[8];
-
- step1[0] = vec_mergeh(vecs[0], vecs[4]);
- step1[1] = vec_mergel(vecs[0], vecs[4]);
- step1[2] = vec_mergeh(vecs[1], vecs[5]);
- step1[3] = vec_mergel(vecs[1], vecs[5]);
- step1[4] = vec_mergeh(vecs[2], vecs[6]);
- step1[5] = vec_mergel(vecs[2], vecs[6]);
- step1[6] = vec_mergeh(vecs[3], vecs[7]);
- step1[7] = vec_mergel(vecs[3], vecs[7]);
-
- step2[0] = vec_mergeh(step1[0], step1[4]);
- step2[1] = vec_mergel(step1[0], step1[4]);
- step2[2] = vec_mergeh(step1[1], step1[5]);
- step2[3] = vec_mergel(step1[1], step1[5]);
- step2[4] = vec_mergeh(step1[2], step1[6]);
- step2[5] = vec_mergel(step1[2], step1[6]);
- step2[6] = vec_mergeh(step1[3], step1[7]);
- step2[7] = vec_mergel(step1[3], step1[7]);
-
- step3[0] = vec_mergeh(step2[0], step2[4]);
- step3[1] = vec_mergel(step2[0], step2[4]);
- step3[2] = vec_mergeh(step2[1], step2[5]);
- step3[3] = vec_mergel(step2[1], step2[5]);
- step3[4] = vec_mergeh(step2[2], step2[6]);
- step3[5] = vec_mergel(step2[2], step2[6]);
- step3[6] = vec_mergeh(step2[3], step2[7]);
- step3[7] = vec_mergel(step2[3], step2[7]);
-
- step3[0] += step3[1] + step3[2] + step3[3] + step3[4] + step3[5] + step3[6] + step3[7];
-
- return step3[0];
-}
-
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
{
Packet4i sum;
@@ -1074,34 +974,6 @@ template<> EIGEN_STRONG_INLINE unsigned short int predux<Packet8us>(const Packet
return static_cast<unsigned short int>(predux(first_half) + predux(second_half));
}
-template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
-{
- Packet4i v[4], sum[4];
-
- // It's easier and faster to transpose then add as columns
- // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
- // Do the transpose, first set of moves
- v[0] = vec_mergeh(vecs[0], vecs[2]);
- v[1] = vec_mergel(vecs[0], vecs[2]);
- v[2] = vec_mergeh(vecs[1], vecs[3]);
- v[3] = vec_mergel(vecs[1], vecs[3]);
- // Get the resulting vectors
- sum[0] = vec_mergeh(v[0], v[2]);
- sum[1] = vec_mergel(v[0], v[2]);
- sum[2] = vec_mergeh(v[1], v[3]);
- sum[3] = vec_mergel(v[1], v[3]);
-
- // Now do the summation:
- // Lines 0+1
- sum[0] = sum[0] + sum[1];
- // Lines 2+3
- sum[1] = sum[2] + sum[3];
- // Add the results
- sum[0] = sum[0] + sum[1];
-
- return sum[0];
-}
-
// Other reduction functions:
// mul
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
@@ -1835,20 +1707,6 @@ template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
return pfirst<Packet2d>(sum);
}
-template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
-{
- Packet2d v[2], sum;
- v[0] = vecs[0] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[0]), reinterpret_cast<Packet4f>(vecs[0]), 8));
- v[1] = vecs[1] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[1]), reinterpret_cast<Packet4f>(vecs[1]), 8));
-
-#ifdef _BIG_ENDIAN
- sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[0]), reinterpret_cast<Packet4f>(v[1]), 8));
-#else
- sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[1]), reinterpret_cast<Packet4f>(v[0]), 8));
-#endif
-
- return sum;
-}
// Other reduction functions:
// mul
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
diff --git a/Eigen/src/Core/arch/MSA/Complex.h b/Eigen/src/Core/arch/MSA/Complex.h
index c09e1e4fb..7baa25e33 100644
--- a/Eigen/src/Core/arch/MSA/Complex.h
+++ b/Eigen/src/Core/arch/MSA/Complex.h
@@ -298,20 +298,6 @@ EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a) {
}
template <>
-EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs) {
- EIGEN_MSA_DEBUG;
-
- Packet4f sum1, sum2, sum;
-
- // Add the first two 64-bit float32x2_t of vecs[0]
- sum1 = (Packet4f)__builtin_msa_ilvr_d((v2i64)vecs[1].v, (v2i64)vecs[0].v);
- sum2 = (Packet4f)__builtin_msa_ilvl_d((v2i64)vecs[1].v, (v2i64)vecs[0].v);
- sum = padd(sum1, sum2);
-
- return Packet2cf(sum);
-}
-
-template <>
EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a) {
EIGEN_MSA_DEBUG;
@@ -661,13 +647,6 @@ EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) {
}
template <>
-EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) {
- EIGEN_MSA_DEBUG;
-
- return vecs[0];
-}
-
-template <>
EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) {
EIGEN_MSA_DEBUG;
diff --git a/Eigen/src/Core/arch/MSA/PacketMath.h b/Eigen/src/Core/arch/MSA/PacketMath.h
index 94ee0e302..ff4e1d5f1 100644
--- a/Eigen/src/Core/arch/MSA/PacketMath.h
+++ b/Eigen/src/Core/arch/MSA/PacketMath.h
@@ -575,45 +575,6 @@ EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) {
return s[0];
}
-template <>
-EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs) {
- EIGEN_MSA_DEBUG;
-
- v4i32 tmp1, tmp2, tmp3, tmp4;
- Packet4f sum;
-
- tmp1 = __builtin_msa_ilvr_w((v4i32)vecs[1], (v4i32)vecs[0]);
- tmp2 = __builtin_msa_ilvr_w((v4i32)vecs[3], (v4i32)vecs[2]);
- tmp3 = __builtin_msa_ilvl_w((v4i32)vecs[1], (v4i32)vecs[0]);
- tmp4 = __builtin_msa_ilvl_w((v4i32)vecs[3], (v4i32)vecs[2]);
-
- sum = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
- sum = padd(sum, (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1));
- sum = padd(sum, (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3));
- sum = padd(sum, (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3));
-
- return sum;
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs) {
- EIGEN_MSA_DEBUG;
-
- v4i32 tmp1, tmp2, tmp3, tmp4;
- Packet4i sum;
-
- tmp1 = __builtin_msa_ilvr_w((v4i32)vecs[1], (v4i32)vecs[0]);
- tmp2 = __builtin_msa_ilvr_w((v4i32)vecs[3], (v4i32)vecs[2]);
- tmp3 = __builtin_msa_ilvl_w((v4i32)vecs[1], (v4i32)vecs[0]);
- tmp4 = __builtin_msa_ilvl_w((v4i32)vecs[3], (v4i32)vecs[2]);
-
- sum = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
- sum = padd(sum, (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1));
- sum = padd(sum, (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3));
- sum = padd(sum, (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3));
-
- return sum;
-}
template <>
EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a) {
@@ -1148,16 +1109,6 @@ EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) {
return s[0];
}
-template <>
-EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs) {
- EIGEN_MSA_DEBUG;
-
- Packet2d v0 = (Packet2d)__builtin_msa_ilvev_d((v2i64)vecs[1], (v2i64)vecs[0]);
- Packet2d v1 = (Packet2d)__builtin_msa_ilvod_d((v2i64)vecs[1], (v2i64)vecs[0]);
-
- return padd(v0, v1);
-}
-
// Other reduction functions:
// mul
template <>
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index aca3c9e81..b03c66122 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -307,14 +307,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packe
return s;
}
-template<> EIGEN_STRONG_INLINE Packet1cf preduxp<Packet1cf>(const Packet1cf* vecs) { return vecs[0]; }
-template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
-{
- const Packet4f sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
- const Packet4f sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
- return Packet2cf(vaddq_f32(sum1, sum2));
-}
-
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet1cf>(const Packet1cf& a)
{
std::complex<float> s;
@@ -608,8 +600,6 @@ template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
-template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
-
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
template<int Offset>
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 6ff86ba35..3d24f00ce 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -137,7 +137,6 @@ struct packet_traits<float> : default_packet_traits
HasSetLinear = 0,
HasBlend = 0,
HasInsert = 1,
- HasReduxp = 1,
HasDiv = 1,
HasFloor = 1,
@@ -180,7 +179,6 @@ struct packet_traits<int8_t> : default_packet_traits
HasSetLinear = 0,
HasBlend = 0,
HasInsert = 1,
- HasReduxp = 1
};
};
@@ -212,7 +210,6 @@ struct packet_traits<uint8_t> : default_packet_traits
HasSetLinear = 0,
HasBlend = 0,
HasInsert = 1,
- HasReduxp = 1,
HasSqrt = 1
};
@@ -246,7 +243,6 @@ struct packet_traits<int16_t> : default_packet_traits
HasSetLinear = 0,
HasBlend = 0,
HasInsert = 1,
- HasReduxp = 1
};
};
@@ -278,7 +274,6 @@ struct packet_traits<uint16_t> : default_packet_traits
HasSetLinear = 0,
HasBlend = 0,
HasInsert = 1,
- HasReduxp = 1,
HasSqrt = 1
};
@@ -312,7 +307,6 @@ struct packet_traits<int32_t> : default_packet_traits
HasSetLinear = 0,
HasBlend = 0,
HasInsert = 1,
- HasReduxp = 1
};
};
@@ -344,7 +338,6 @@ struct packet_traits<uint32_t> : default_packet_traits
HasSetLinear = 0,
HasBlend = 0,
HasInsert = 1,
- HasReduxp = 1,
HasSqrt = 1
};
@@ -379,7 +372,6 @@ struct packet_traits<int64_t> : default_packet_traits
HasSetLinear = 0,
HasBlend = 0,
HasInsert = 1,
- HasReduxp = 1
};
};
@@ -412,7 +404,6 @@ struct packet_traits<uint64_t> : default_packet_traits
HasSetLinear = 0,
HasBlend = 0,
HasInsert = 1,
- HasReduxp = 1
};
};
@@ -2422,281 +2413,6 @@ template<> EIGEN_STRONG_INLINE int64_t predux<Packet2l>(const Packet2l& a)
template<> EIGEN_STRONG_INLINE uint64_t predux<Packet2ul>(const Packet2ul& a)
{ return vgetq_lane_u64(a, 0) + vgetq_lane_u64(a, 1); }
-template<> EIGEN_STRONG_INLINE Packet2f preduxp<Packet2f>(const Packet2f* vecs)
-{
- const float32x2x2_t vtrn = vzip_f32(vecs[0], vecs[1]);
- return vadd_f32(vtrn.val[0], vtrn.val[1]);
-}
-template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
-{
- const float32x4x2_t vtrn1 = vzipq_f32(vecs[0], vecs[2]);
- const float32x4x2_t vtrn2 = vzipq_f32(vecs[1], vecs[3]);
- const float32x4x2_t res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]);
- const float32x4x2_t res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]);
- return vaddq_f32(vaddq_f32(res1.val[0], res1.val[1]), vaddq_f32(res2.val[0], res2.val[1]));
-}
-template<> EIGEN_STRONG_INLINE Packet4c preduxp<Packet4c>(const Packet4c* vecs)
-{
- const int8x8x2_t zip8 = vzip_s8(
- vreinterpret_s8_s32(vset_lane_s32(vecs[2], vdup_n_s32(vecs[0]), 1)),
- vreinterpret_s8_s32(vset_lane_s32(vecs[3], vdup_n_s32(vecs[1]), 1)));
- const uint16x4x2_t zip16 = vzip_u16(
- vreinterpret_u16_s8(zip8.val[0]),
- vreinterpret_u16_s8(zip8.val[1]));
- const int8x8_t sum = vadd_s8(
- vreinterpret_s8_u16(zip16.val[0]),
- vreinterpret_s8_u16(zip16.val[1]));
- return vget_lane_s32(vreinterpret_s32_s8(vadd_s8(sum,
- vreinterpret_s8_s32(vrev64_s32(vreinterpret_s32_s8(sum))))), 0);
-}
-template<> EIGEN_STRONG_INLINE Packet8c preduxp<Packet8c>(const Packet8c* vecs)
-{
- int8x8_t sum[4];
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 4; i++)
- {
- const int8x8x2_t z = vzip_s8(vecs[i*2], vecs[i*2+1]);
- sum[i] = vadd_s8(z.val[0], z.val[1]);
- }
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 2; i++)
- {
- const uint16x4x2_t z = vzip_u16(vreinterpret_u16_s8(sum[i*2]), vreinterpret_u16_s8(sum[i*2+1]));
- sum[i] = vadd_s8(vreinterpret_s8_u16(z.val[0]), vreinterpret_s8_u16(z.val[1]));
- }
-
- const uint32x2x2_t z = vzip_u32(vreinterpret_u32_s8(sum[0]), vreinterpret_u32_s8(sum[1]));
- return vadd_s8(vreinterpret_s8_u32(z.val[0]), vreinterpret_s8_u32(z.val[1]));
-}
-template<> EIGEN_STRONG_INLINE Packet16c preduxp<Packet16c>(const Packet16c* vecs)
-{
- int8x16_t sum[8];
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 8; i++)
- {
- const int8x16x2_t z = vzipq_s8(vecs[i*2], vecs[i*2+1]);
- sum[i] = vaddq_s8(z.val[0], z.val[1]);
- }
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 4; i++)
- {
- const uint16x8x2_t z = vzipq_u16(vreinterpretq_u16_s8(sum[i*2]), vreinterpretq_u16_s8(sum[i*2+1]));
- sum[i] = vaddq_s8(vreinterpretq_s8_u16(z.val[0]), vreinterpretq_s8_u16(z.val[1]));
- }
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 2; i++)
- {
- const uint32x4x2_t z = vzipq_u32(vreinterpretq_u32_s8(sum[i*2]), vreinterpretq_u32_s8(sum[i*2+1]));
- sum[i] = vaddq_s8(vreinterpretq_s8_u32(z.val[0]), vreinterpretq_s8_u32(z.val[1]));
- }
-
- return vcombine_s8(
- vadd_s8(vget_low_s8(sum[0]), vget_high_s8(sum[0])),
- vadd_s8(vget_low_s8(sum[1]), vget_high_s8(sum[1])));
-}
-template<> EIGEN_STRONG_INLINE Packet4uc preduxp<Packet4uc>(const Packet4uc* vecs)
-{
- const uint8x8x2_t zip8 = vzip_u8(
- vreinterpret_u8_u32(vset_lane_u32(vecs[2], vdup_n_u32(vecs[0]), 1)),
- vreinterpret_u8_u32(vset_lane_u32(vecs[3], vdup_n_u32(vecs[1]), 1)));
- const uint16x4x2_t zip16 = vzip_u16(
- vreinterpret_u16_u8(zip8.val[0]),
- vreinterpret_u16_u8(zip8.val[1]));
- const uint8x8_t sum = vadd_u8(
- vreinterpret_u8_u16(zip16.val[0]),
- vreinterpret_u8_u16(zip16.val[1]));
- return vget_lane_u32(vreinterpret_u32_u8(vadd_u8(sum,
- vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(sum))))), 0);
-}
-template<> EIGEN_STRONG_INLINE Packet8uc preduxp<Packet8uc>(const Packet8uc* vecs)
-{
- uint8x8_t sum[4];
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 4; i++)
- {
- const uint8x8x2_t z = vzip_u8(vecs[i*2], vecs[i*2+1]);
- sum[i] = vadd_u8(z.val[0], z.val[1]);
- }
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 2; i++)
- {
- const uint16x4x2_t z = vzip_u16(vreinterpret_u16_u8(sum[i*2]), vreinterpret_u16_u8(sum[i*2+1]));
- sum[i] = vadd_u8(vreinterpret_u8_u16(z.val[0]), vreinterpret_u8_u16(z.val[1]));
- }
-
- const uint32x2x2_t z = vzip_u32(vreinterpret_u32_u8(sum[0]), vreinterpret_u32_u8(sum[1]));
- return vadd_u8(vreinterpret_u8_u32(z.val[0]), vreinterpret_u8_u32(z.val[1]));
-}
-template<> EIGEN_STRONG_INLINE Packet16uc preduxp<Packet16uc>(const Packet16uc* vecs)
-{
- uint8x16_t sum[8];
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 8; i++)
- {
- const uint8x16x2_t z = vzipq_u8(vecs[i*2], vecs[i*2+1]);
- sum[i] = vaddq_u8(z.val[0], z.val[1]);
- }
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 4; i++)
- {
- const uint16x8x2_t z = vzipq_u16(vreinterpretq_u16_u8(sum[i*2]), vreinterpretq_u16_u8(sum[i*2+1]));
- sum[i] = vaddq_u8(vreinterpretq_u8_u16(z.val[0]), vreinterpretq_u8_u16(z.val[1]));
- }
-
- EIGEN_UNROLL_LOOP
- for (int i = 0; i != 2; i++)
- {
- const uint32x4x2_t z = vzipq_u32(vreinterpretq_u32_u8(sum[i*2]), vreinterpretq_u32_u8(sum[i*2+1]));
- sum[i] = vaddq_u8(vreinterpretq_u8_u32(z.val[0]), vreinterpretq_u8_u32(z.val[1]));
- }
-
- return vcombine_u8(
- vadd_u8(vget_low_u8(sum[0]), vget_high_u8(sum[0])),
- vadd_u8(vget_low_u8(sum[1]), vget_high_u8(sum[1])));
-}
-template<> EIGEN_STRONG_INLINE Packet4s preduxp<Packet4s>(const Packet4s* vecs)
-{
- int16x4x2_t zip16;
- int32x2x2_t zip32;
- int16x4_t sum1, sum2;
-
- zip16 = vzip_s16(vecs[0], vecs[1]);
- sum1 = vadd_s16(zip16.val[0], zip16.val[1]);
- zip16 = vzip_s16(vecs[2], vecs[3]);
- sum2 = vadd_s16(zip16.val[0], zip16.val[1]);
-
- zip32 = vzip_s32(vreinterpret_s32_s16(sum1), vreinterpret_s32_s16(sum2));
- return vadd_s16(vreinterpret_s16_s32(zip32.val[0]), vreinterpret_s16_s32(zip32.val[1]));
-}
-template<> EIGEN_STRONG_INLINE Packet8s preduxp<Packet8s>(const Packet8s* vecs)
-{
- int16x8x2_t zip16;
- int32x4x2_t zip32;
- int16x8_t sum1, sum2, sum3, sum4;
-
- zip16 = vzipq_s16(vecs[0], vecs[1]);
- sum1 = vaddq_s16(zip16.val[0], zip16.val[1]);
- zip16 = vzipq_s16(vecs[2], vecs[3]);
- sum2 = vaddq_s16(zip16.val[0], zip16.val[1]);
- zip16 = vzipq_s16(vecs[4], vecs[5]);
- sum3 = vaddq_s16(zip16.val[0], zip16.val[1]);
- zip16 = vzipq_s16(vecs[6], vecs[7]);
- sum4 = vaddq_s16(zip16.val[0], zip16.val[1]);
-
- zip32 = vzipq_s32(vreinterpretq_s32_s16(sum1), vreinterpretq_s32_s16(sum2));
- sum1 = vaddq_s16(vreinterpretq_s16_s32(zip32.val[0]), vreinterpretq_s16_s32(zip32.val[1]));
- zip32 = vzipq_s32(vreinterpretq_s32_s16(sum3), vreinterpretq_s32_s16(sum4));
- sum2 = vaddq_s16(vreinterpretq_s16_s32(zip32.val[0]), vreinterpretq_s16_s32(zip32.val[1]));
-
- return vcombine_s16(
- vadd_s16(vget_low_s16(sum1), vget_high_s16(sum1)),
- vadd_s16(vget_low_s16(sum2), vget_high_s16(sum2)));
-}
-template<> EIGEN_STRONG_INLINE Packet4us preduxp<Packet4us>(const Packet4us* vecs)
-{
- uint16x4x2_t zip16;
- uint32x2x2_t zip32;
- uint16x4_t sum1, sum2;
-
- zip16 = vzip_u16(vecs[0], vecs[1]);
- sum1 = vadd_u16(zip16.val[0], zip16.val[1]);
- zip16 = vzip_u16(vecs[2], vecs[3]);
- sum2 = vadd_u16(zip16.val[0], zip16.val[1]);
-
- zip32 = vzip_u32(vreinterpret_u32_u16(sum1), vreinterpret_u32_u16(sum2));
- return vadd_u16(vreinterpret_u16_u32(zip32.val[0]), vreinterpret_u16_u32(zip32.val[1]));
-}
-template<> EIGEN_STRONG_INLINE Packet8us preduxp<Packet8us>(const Packet8us* vecs)
-{
- uint16x8x2_t zip16;
- uint32x4x2_t zip32;
- uint16x8_t sum1, sum2, sum3, sum4;
-
- zip16 = vzipq_u16(vecs[0], vecs[1]);
- sum1 = vaddq_u16(zip16.val[0], zip16.val[1]);
- zip16 = vzipq_u16(vecs[2], vecs[3]);
- sum2 = vaddq_u16(zip16.val[0], zip16.val[1]);
- zip16 = vzipq_u16(vecs[4], vecs[5]);
- sum3 = vaddq_u16(zip16.val[0], zip16.val[1]);
- zip16 = vzipq_u16(vecs[6], vecs[7]);
- sum4 = vaddq_u16(zip16.val[0], zip16.val[1]);
-
- zip32 = vzipq_u32(vreinterpretq_u32_u16(sum1), vreinterpretq_u32_u16(sum2));
- sum1 = vaddq_u16(vreinterpretq_u16_u32(zip32.val[0]), vreinterpretq_u16_u32(zip32.val[1]));
- zip32 = vzipq_u32(vreinterpretq_u32_u16(sum3), vreinterpretq_u32_u16(sum4));
- sum2 = vaddq_u16(vreinterpretq_u16_u32(zip32.val[0]), vreinterpretq_u16_u32(zip32.val[1]));
-
- return vcombine_u16(
- vadd_u16(vget_low_u16(sum1), vget_high_u16(sum1)),
- vadd_u16(vget_low_u16(sum2), vget_high_u16(sum2)));
-}
-template<> EIGEN_STRONG_INLINE Packet2i preduxp<Packet2i>(const Packet2i* vecs)
-{
- const int32x2x2_t vtrn = vzip_s32(vecs[0], vecs[1]);
- return vadd_s32(vtrn.val[0], vtrn.val[1]);
-}
-template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
-{
- const int32x4x2_t vtrn1 = vzipq_s32(vecs[0], vecs[2]);
- const int32x4x2_t vtrn2 = vzipq_s32(vecs[1], vecs[3]);
- const int32x4x2_t res1 = vzipq_s32(vtrn1.val[0], vtrn2.val[0]);
- const int32x4x2_t res2 = vzipq_s32(vtrn1.val[1], vtrn2.val[1]);
- return vaddq_s32(vaddq_s32(res1.val[0], res1.val[1]), vaddq_s32(res2.val[0], res2.val[1]));
-}
-template<> EIGEN_STRONG_INLINE Packet2ui preduxp<Packet2ui>(const Packet2ui* vecs)
-{
- const uint32x2x2_t vtrn = vzip_u32(vecs[0], vecs[1]);
- return vadd_u32(vtrn.val[0], vtrn.val[1]);
-}
-template<> EIGEN_STRONG_INLINE Packet4ui preduxp<Packet4ui>(const Packet4ui* vecs)
-{
- uint32x4x2_t vtrn1, vtrn2, res1, res2;
- Packet4ui sum1, sum2, sum;
-
- // NEON zip performs interleaving of the supplied vectors.
- // We perform two interleaves in a row to acquire the transposed vector
- vtrn1 = vzipq_u32(vecs[0], vecs[2]);
- vtrn2 = vzipq_u32(vecs[1], vecs[3]);
- res1 = vzipq_u32(vtrn1.val[0], vtrn2.val[0]);
- res2 = vzipq_u32(vtrn1.val[1], vtrn2.val[1]);
-
- // Do the addition of the resulting vectors
- sum1 = vaddq_u32(res1.val[0], res1.val[1]);
- sum2 = vaddq_u32(res2.val[0], res2.val[1]);
- sum = vaddq_u32(sum1, sum2);
-
- return sum;
-}
-template<> EIGEN_STRONG_INLINE Packet2l preduxp<Packet2l>(const Packet2l* vecs)
-{
- return vsetq_lane_s64(
- vget_lane_s64(vget_low_s64(vecs[0]), 0) +
- vget_lane_s64(vget_high_s64(vecs[0]), 0),
- vdupq_n_s64(
- vget_lane_s64(vget_low_s64(vecs[1]), 0) +
- vget_lane_s64(vget_high_s64(vecs[1]), 0)),
- 0);
-}
-template<> EIGEN_STRONG_INLINE Packet2ul preduxp<Packet2ul>(const Packet2ul* vecs)
-{
- return vsetq_lane_u64(
- vget_lane_u64(vget_low_u64(vecs[0]), 0) +
- vget_lane_u64(vget_high_u64(vecs[0]), 0),
- vdupq_n_u64(
- vget_lane_u64(vget_low_u64(vecs[1]), 0) +
- vget_lane_u64(vget_high_u64(vecs[1]), 0)),
- 0);
-}
-
template<> EIGEN_DEVICE_FUNC inline Packet4c predux_half_dowto4(const Packet8c& a)
{
return vget_lane_s32(vreinterpret_s32_s8(vadd_s8(a,
@@ -3687,7 +3403,6 @@ template<> struct packet_traits<double> : default_packet_traits
HasSetLinear = 0,
HasBlend = 0,
HasInsert = 1,
- HasReduxp = 1,
HasDiv = 1,
HasFloor = 1,
@@ -3830,10 +3545,6 @@ template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
{ return vget_lane_f64(vget_low_f64(a) + vget_high_f64(a), 0); }
#endif
-template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
-{
- return vaddq_f64(vzip1q_f64(vecs[0], vecs[1]), vzip2q_f64(vecs[0], vecs[1]));
-}
// Other reduction functions:
// mul
#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index a80395b68..d6bfeafe4 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -156,11 +156,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packe
return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
}
-template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
-{
- return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
-}
-
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
{
return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
@@ -346,11 +341,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Pack
return pfirst(a);
}
-template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
-{
- return vecs[0];
-}
-
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
{
return pfirst(a);
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index f96cd91bb..ac0799467 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -179,8 +179,7 @@ template<> struct packet_traits<bool> : default_packet_traits
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
- HasConj = 0,
- HasReduxp = 0
+ HasConj = 0
};
};
@@ -733,38 +732,6 @@ EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
}
-#ifdef EIGEN_VECTORIZE_SSE3
-template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
-{
- return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
-}
-
-template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
-{
- return _mm_hadd_pd(vecs[0], vecs[1]);
-}
-
-#else
-template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
-{
- Packet4f tmp0, tmp1, tmp2;
- tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
- tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]);
- tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]);
- tmp0 = _mm_add_ps(tmp0, tmp1);
- tmp1 = _mm_unpacklo_ps(vecs[2], vecs[3]);
- tmp1 = _mm_add_ps(tmp1, tmp2);
- tmp2 = _mm_movehl_ps(tmp1, tmp0);
- tmp0 = _mm_movelh_ps(tmp0, tmp1);
- return _mm_add_ps(tmp0, tmp2);
-}
-
-template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
-{
- return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
-}
-#endif // SSE3
-
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
{
// Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
@@ -790,10 +757,6 @@ template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
}
#ifdef EIGEN_VECTORIZE_SSSE3
-template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
-{
- return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
-}
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
{
Packet4i tmp0 = _mm_hadd_epi32(a,a);
@@ -805,20 +768,6 @@ template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
}
-
-template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
-{
- Packet4i tmp0, tmp1, tmp2;
- tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
- tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
- tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
- tmp0 = _mm_add_epi32(tmp0, tmp1);
- tmp1 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
- tmp1 = _mm_add_epi32(tmp1, tmp2);
- tmp2 = _mm_unpacklo_epi64(tmp0, tmp1);
- tmp0 = _mm_unpackhi_epi64(tmp0, tmp1);
- return _mm_add_epi32(tmp0, tmp2);
-}
#endif
// Other reduction functions:
diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h
index 8939619f5..f589fddd8 100644
--- a/Eigen/src/Core/arch/ZVector/Complex.h
+++ b/Eigen/src/Core/arch/ZVector/Complex.h
@@ -156,10 +156,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Pack
{
return pfirst(a);
}
-template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
-{
- return vecs[0];
-}
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
{
return pfirst(a);
@@ -327,16 +323,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packe
return res;
}
-template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
-{
- PacketBlock<Packet2cf,2> transpose;
- transpose.packet[0] = vecs[0];
- transpose.packet[1] = vecs[1];
- ptranspose(transpose);
-
- return padd<Packet2cf>(transpose.packet[0], transpose.packet[1]);
-}
-
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
{
std::complex<float> res;
@@ -461,17 +447,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packe
return pfirst<Packet2cf>(Packet2cf(b));
}
-template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
-{
- Packet4f b1, b2;
- b1 = vec_sld(vecs[0].v, vecs[1].v, 8);
- b2 = vec_sld(vecs[1].v, vecs[0].v, 8);
- b2 = vec_sld(b2, b2, 8);
- b2 = padd<Packet4f>(b1, b2);
-
- return Packet2cf(b2);
-}
-
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
{
Packet4f b;
diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h
index 5036106cd..3435f7c1e 100755
--- a/Eigen/src/Core/arch/ZVector/PacketMath.h
+++ b/Eigen/src/Core/arch/ZVector/PacketMath.h
@@ -530,45 +530,6 @@ template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
return pfirst(sum);
}
-template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
-{
- Packet4i v[4], sum[4];
-
- // It's easier and faster to transpose then add as columns
- // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
- // Do the transpose, first set of moves
- v[0] = vec_mergeh(vecs[0], vecs[2]);
- v[1] = vec_mergel(vecs[0], vecs[2]);
- v[2] = vec_mergeh(vecs[1], vecs[3]);
- v[3] = vec_mergel(vecs[1], vecs[3]);
- // Get the resulting vectors
- sum[0] = vec_mergeh(v[0], v[2]);
- sum[1] = vec_mergel(v[0], v[2]);
- sum[2] = vec_mergeh(v[1], v[3]);
- sum[3] = vec_mergel(v[1], v[3]);
-
- // Now do the summation:
- // Lines 0+1
- sum[0] = padd<Packet4i>(sum[0], sum[1]);
- // Lines 2+3
- sum[1] = padd<Packet4i>(sum[2], sum[3]);
- // Add the results
- sum[0] = padd<Packet4i>(sum[0], sum[1]);
-
- return sum[0];
-}
-
-template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
-{
- Packet2d v[2], sum;
- v[0] = padd<Packet2d>(vecs[0], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[0]), reinterpret_cast<Packet4ui>(vecs[0]), 8)));
- v[1] = padd<Packet2d>(vecs[1], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[1]), reinterpret_cast<Packet4ui>(vecs[1]), 8)));
-
- sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v[0]), reinterpret_cast<Packet4ui>(v[1]), 8));
-
- return sum;
-}
-
// Other reduction functions:
// mul
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
@@ -910,21 +871,6 @@ template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
return static_cast<float>(first);
}
-template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
-{
- PacketBlock<Packet4f,4> transpose;
- transpose.packet[0] = vecs[0];
- transpose.packet[1] = vecs[1];
- transpose.packet[2] = vecs[2];
- transpose.packet[3] = vecs[3];
- ptranspose(transpose);
-
- Packet4f sum = padd(transpose.packet[0], transpose.packet[1]);
- sum = padd(sum, transpose.packet[2]);
- sum = padd(sum, transpose.packet[3]);
- return sum;
-}
-
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
{
// Return predux_mul<Packet2d> of the subvectors product
@@ -1106,34 +1052,6 @@ template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
return pfirst(sum);
}
-template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
-{
- Packet4f v[4], sum[4];
-
- // It's easier and faster to transpose then add as columns
- // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
- // Do the transpose, first set of moves
- v[0] = vec_mergeh(vecs[0], vecs[2]);
- v[1] = vec_mergel(vecs[0], vecs[2]);
- v[2] = vec_mergeh(vecs[1], vecs[3]);
- v[3] = vec_mergel(vecs[1], vecs[3]);
- // Get the resulting vectors
- sum[0] = vec_mergeh(v[0], v[2]);
- sum[1] = vec_mergel(v[0], v[2]);
- sum[2] = vec_mergeh(v[1], v[3]);
- sum[3] = vec_mergel(v[1], v[3]);
-
- // Now do the summation:
- // Lines 0+1
- sum[0] = padd<Packet4f>(sum[0], sum[1]);
- // Lines 2+3
- sum[1] = padd<Packet4f>(sum[2], sum[3]);
- // Add the results
- sum[0] = padd<Packet4f>(sum[0], sum[1]);
-
- return sum[0];
-}
-
// Other reduction functions:
// mul
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index 761273b86..cceaff7c5 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -288,19 +288,6 @@ template<typename Scalar,typename Packet> void packetmath()
ref[0] *= data1[i];
VERIFY(internal::isApprox(ref[0], internal::predux_mul(internal::pload<Packet>(data1))) && "internal::predux_mul");
- if (PacketTraits::HasReduxp)
- {
- for (int j=0; j<PacketSize; ++j)
- {
- ref[j] = Scalar(0);
- for (int i=0; i<PacketSize; ++i)
- ref[j] += data1[i+j*PacketSize];
- packets[j] = internal::pload<Packet>(data1+j*PacketSize);
- }
- internal::pstore(data2, internal::preduxp(packets));
- VERIFY(test::areApproxAbs(ref, data2, PacketSize, refvalue) && "internal::preduxp");
- }
-
for (int i=0; i<PacketSize; ++i)
ref[i] = data1[PacketSize-i-1];
internal::pstore(data2, internal::preverse(internal::pload<Packet>(data1)));