aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AVX512
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-04-23 18:17:14 +0000
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-04-23 18:17:14 +0000
commite80ec24357aaaa1e205cda3be466206e7830fcda (patch)
tree6b3d585938c9927fce763caccbf5987a1f67217b /Eigen/src/Core/arch/AVX512
parent0aebe19aca58d78434cf724ef954a8383b2bf7c9 (diff)
Remove unused packet op "preduxp".
Diffstat (limited to 'Eigen/src/Core/arch/AVX512')
-rw-r--r--Eigen/src/Core/arch/AVX512/Complex.h2
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h212
2 files changed, 0 insertions, 214 deletions
diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h
index 4d0318611..219de36db 100644
--- a/Eigen/src/Core/arch/AVX512/Complex.h
+++ b/Eigen/src/Core/arch/AVX512/Complex.h
@@ -42,7 +42,6 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
HasMin = 0,
HasMax = 0,
HasSetLinear = 0,
- HasReduxp = 0,
HasInsert = 1
};
};
@@ -241,7 +240,6 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
HasMin = 0,
HasMax = 0,
HasSetLinear = 0,
- HasReduxp = 0
};
};
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 0ee73d5d6..3a48ea028 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -793,196 +793,6 @@ EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 0), 2); \
OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 1), 3);
#endif
-template<> EIGEN_STRONG_INLINE Packet16f preduxp<Packet16f>(const Packet16f*
-vecs)
-{
- EIGEN_EXTRACT_8f_FROM_16f(vecs[0], vecs0);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[1], vecs1);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[2], vecs2);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[3], vecs3);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[4], vecs4);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[5], vecs5);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[6], vecs6);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[7], vecs7);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[8], vecs8);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[9], vecs9);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[10], vecs10);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[11], vecs11);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[12], vecs12);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[13], vecs13);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[14], vecs14);
- EIGEN_EXTRACT_8f_FROM_16f(vecs[15], vecs15);
-
- __m256 hsum1 = _mm256_hadd_ps(vecs0_0, vecs1_0);
- __m256 hsum2 = _mm256_hadd_ps(vecs2_0, vecs3_0);
- __m256 hsum3 = _mm256_hadd_ps(vecs4_0, vecs5_0);
- __m256 hsum4 = _mm256_hadd_ps(vecs6_0, vecs7_0);
-
- __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
- __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
- __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
- __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
-
- __m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
- __m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
- __m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
- __m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
-
- __m256 sum1 = _mm256_add_ps(perm1, hsum5);
- __m256 sum2 = _mm256_add_ps(perm2, hsum6);
- __m256 sum3 = _mm256_add_ps(perm3, hsum7);
- __m256 sum4 = _mm256_add_ps(perm4, hsum8);
-
- __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
- __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
-
- __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
-
- hsum1 = _mm256_hadd_ps(vecs0_1, vecs1_1);
- hsum2 = _mm256_hadd_ps(vecs2_1, vecs3_1);
- hsum3 = _mm256_hadd_ps(vecs4_1, vecs5_1);
- hsum4 = _mm256_hadd_ps(vecs6_1, vecs7_1);
-
- hsum5 = _mm256_hadd_ps(hsum1, hsum1);
- hsum6 = _mm256_hadd_ps(hsum2, hsum2);
- hsum7 = _mm256_hadd_ps(hsum3, hsum3);
- hsum8 = _mm256_hadd_ps(hsum4, hsum4);
-
- perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
- perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
- perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
- perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
-
- sum1 = _mm256_add_ps(perm1, hsum5);
- sum2 = _mm256_add_ps(perm2, hsum6);
- sum3 = _mm256_add_ps(perm3, hsum7);
- sum4 = _mm256_add_ps(perm4, hsum8);
-
- blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
- blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
-
- final = _mm256_add_ps(final, _mm256_blend_ps(blend1, blend2, 0xf0));
-
- hsum1 = _mm256_hadd_ps(vecs8_0, vecs9_0);
- hsum2 = _mm256_hadd_ps(vecs10_0, vecs11_0);
- hsum3 = _mm256_hadd_ps(vecs12_0, vecs13_0);
- hsum4 = _mm256_hadd_ps(vecs14_0, vecs15_0);
-
- hsum5 = _mm256_hadd_ps(hsum1, hsum1);
- hsum6 = _mm256_hadd_ps(hsum2, hsum2);
- hsum7 = _mm256_hadd_ps(hsum3, hsum3);
- hsum8 = _mm256_hadd_ps(hsum4, hsum4);
-
- perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
- perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
- perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
- perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
-
- sum1 = _mm256_add_ps(perm1, hsum5);
- sum2 = _mm256_add_ps(perm2, hsum6);
- sum3 = _mm256_add_ps(perm3, hsum7);
- sum4 = _mm256_add_ps(perm4, hsum8);
-
- blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
- blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
-
- __m256 final_1 = _mm256_blend_ps(blend1, blend2, 0xf0);
-
- hsum1 = _mm256_hadd_ps(vecs8_1, vecs9_1);
- hsum2 = _mm256_hadd_ps(vecs10_1, vecs11_1);
- hsum3 = _mm256_hadd_ps(vecs12_1, vecs13_1);
- hsum4 = _mm256_hadd_ps(vecs14_1, vecs15_1);
-
- hsum5 = _mm256_hadd_ps(hsum1, hsum1);
- hsum6 = _mm256_hadd_ps(hsum2, hsum2);
- hsum7 = _mm256_hadd_ps(hsum3, hsum3);
- hsum8 = _mm256_hadd_ps(hsum4, hsum4);
-
- perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
- perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
- perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
- perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
-
- sum1 = _mm256_add_ps(perm1, hsum5);
- sum2 = _mm256_add_ps(perm2, hsum6);
- sum3 = _mm256_add_ps(perm3, hsum7);
- sum4 = _mm256_add_ps(perm4, hsum8);
-
- blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
- blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
-
- final_1 = _mm256_add_ps(final_1, _mm256_blend_ps(blend1, blend2, 0xf0));
-
- __m512 final_output;
-
- EIGEN_INSERT_8f_INTO_16f(final_output, final, final_1);
- return final_output;
-}
-
-template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs)
-{
- Packet4d vecs0_0 = _mm512_extractf64x4_pd(vecs[0], 0);
- Packet4d vecs0_1 = _mm512_extractf64x4_pd(vecs[0], 1);
-
- Packet4d vecs1_0 = _mm512_extractf64x4_pd(vecs[1], 0);
- Packet4d vecs1_1 = _mm512_extractf64x4_pd(vecs[1], 1);
-
- Packet4d vecs2_0 = _mm512_extractf64x4_pd(vecs[2], 0);
- Packet4d vecs2_1 = _mm512_extractf64x4_pd(vecs[2], 1);
-
- Packet4d vecs3_0 = _mm512_extractf64x4_pd(vecs[3], 0);
- Packet4d vecs3_1 = _mm512_extractf64x4_pd(vecs[3], 1);
-
- Packet4d vecs4_0 = _mm512_extractf64x4_pd(vecs[4], 0);
- Packet4d vecs4_1 = _mm512_extractf64x4_pd(vecs[4], 1);
-
- Packet4d vecs5_0 = _mm512_extractf64x4_pd(vecs[5], 0);
- Packet4d vecs5_1 = _mm512_extractf64x4_pd(vecs[5], 1);
-
- Packet4d vecs6_0 = _mm512_extractf64x4_pd(vecs[6], 0);
- Packet4d vecs6_1 = _mm512_extractf64x4_pd(vecs[6], 1);
-
- Packet4d vecs7_0 = _mm512_extractf64x4_pd(vecs[7], 0);
- Packet4d vecs7_1 = _mm512_extractf64x4_pd(vecs[7], 1);
-
- Packet4d tmp0, tmp1;
-
- tmp0 = _mm256_hadd_pd(vecs0_0, vecs1_0);
- tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
-
- tmp1 = _mm256_hadd_pd(vecs2_0, vecs3_0);
- tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
-
- __m256d final_0 = _mm256_blend_pd(tmp0, tmp1, 0xC);
-
- tmp0 = _mm256_hadd_pd(vecs0_1, vecs1_1);
- tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
-
- tmp1 = _mm256_hadd_pd(vecs2_1, vecs3_1);
- tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
-
- final_0 = _mm256_add_pd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC));
-
- tmp0 = _mm256_hadd_pd(vecs4_0, vecs5_0);
- tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
-
- tmp1 = _mm256_hadd_pd(vecs6_0, vecs7_0);
- tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
-
- __m256d final_1 = _mm256_blend_pd(tmp0, tmp1, 0xC);
-
- tmp0 = _mm256_hadd_pd(vecs4_1, vecs5_1);
- tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
-
- tmp1 = _mm256_hadd_pd(vecs6_1, vecs7_1);
- tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
-
- final_1 = _mm256_add_pd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC));
-
- __m512d final_output = _mm512_castpd256_pd512(final_0);
-
- return _mm512_insertf64x4(final_output, final_1, 1);
-}
template <>
EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) {
@@ -1660,28 +1470,6 @@ template<> EIGEN_STRONG_INLINE half predux_mul<Packet16h>(const Packet16h& from)
return half(predux_mul(from_float));
}
-template<> EIGEN_STRONG_INLINE Packet16h preduxp<Packet16h>(const Packet16h* p) {
- Packet16f pf[16];
- pf[0] = half2float(p[0]);
- pf[1] = half2float(p[1]);
- pf[2] = half2float(p[2]);
- pf[3] = half2float(p[3]);
- pf[4] = half2float(p[4]);
- pf[5] = half2float(p[5]);
- pf[6] = half2float(p[6]);
- pf[7] = half2float(p[7]);
- pf[8] = half2float(p[8]);
- pf[9] = half2float(p[9]);
- pf[10] = half2float(p[10]);
- pf[11] = half2float(p[11]);
- pf[12] = half2float(p[12]);
- pf[13] = half2float(p[13]);
- pf[14] = half2float(p[14]);
- pf[15] = half2float(p[15]);
- Packet16f reduced = preduxp<Packet16f>(pf);
- return float2half(reduced);
-}
-
template<> EIGEN_STRONG_INLINE Packet16h preverse(const Packet16h& a)
{
__m128i m = _mm_setr_epi8(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1);