aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AVX
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-05-08 15:41:50 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-05-08 15:41:50 -0700
commitc1d944dd913d05180b7d2d1229072c9c52a11f29 (patch)
tree80a936a6403af21079445e000c897c7357c122bf /Eigen/src/Core/arch/AVX
parent5c4e19fbe7ea19d9d8b51243515f972237a49267 (diff)
Remove packet ops pinsertfirst and pinsertlast that are only used in a single place, and can be replaced by other ops when constructing the first/final packet in linspaced_op_impl::packetOp.
I cannot measure any performance changes for SSE, AVX, or AVX512. name old time/op new time/op delta BM_LinSpace<float>/1 1.63ns ± 0% 1.63ns ± 0% ~ (p=0.762 n=5+5) BM_LinSpace<float>/8 4.92ns ± 3% 4.89ns ± 3% ~ (p=0.421 n=5+5) BM_LinSpace<float>/64 34.6ns ± 0% 34.6ns ± 0% ~ (p=0.841 n=5+5) BM_LinSpace<float>/512 217ns ± 0% 217ns ± 0% ~ (p=0.421 n=5+5) BM_LinSpace<float>/4k 1.68µs ± 0% 1.68µs ± 0% ~ (p=1.000 n=5+5) BM_LinSpace<float>/32k 13.3µs ± 0% 13.3µs ± 0% ~ (p=0.905 n=5+4) BM_LinSpace<float>/256k 107µs ± 0% 107µs ± 0% ~ (p=0.841 n=5+5) BM_LinSpace<float>/1M 427µs ± 0% 427µs ± 0% ~ (p=0.690 n=5+5)
Diffstat (limited to 'Eigen/src/Core/arch/AVX')
-rw-r--r--Eigen/src/Core/arch/AVX/Complex.h20
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h31
2 files changed, 0 insertions, 51 deletions
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index 893eb2702..c2d5205f2 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -402,26 +402,6 @@ ptranspose(PacketBlock<Packet2cd,2>& kernel) {
kernel.packet[0].v = tmp;
}
-template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b)
-{
- return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
-}
-
-template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
-{
- return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
-}
-
-template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b)
-{
- return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
-}
-
-template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
-{
- return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
-}
-
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 10196fd6d..35a329e3f 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -763,27 +763,6 @@ template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, cons
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
}
-template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
-{
- return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
-}
-
-template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
-{
- return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
-}
-
-template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
-{
- return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
-}
-
-template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
-{
- return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
-}
-
-
// Packet math for Eigen::half
template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet8h half; };
@@ -981,16 +960,6 @@ template<> EIGEN_STRONG_INLINE Packet8h preverse(const Packet8h& a)
return _mm_shuffle_epi8(a,m);
}
-template<> EIGEN_STRONG_INLINE Packet8h pinsertfirst(const Packet8h& a, Eigen::half b)
-{
- return _mm_insert_epi16(a,int(b.x),0);
-}
-
-template<> EIGEN_STRONG_INLINE Packet8h pinsertlast(const Packet8h& a, Eigen::half b)
-{
- return _mm_insert_epi16(a,int(b.x),7);
-}
-
EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<Packet8h,8>& kernel) {
__m128i a = kernel.packet[0];