aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-05-08 15:41:50 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-05-08 15:41:50 -0700
commitc1d944dd913d05180b7d2d1229072c9c52a11f29 (patch)
tree80a936a6403af21079445e000c897c7357c122bf /Eigen/src/Core/arch/SSE
parent5c4e19fbe7ea19d9d8b51243515f972237a49267 (diff)
Remove packet ops pinsertfirst and pinsertlast that are only used in a single place, and can be replaced by other ops when constructing the first/final packet in linspaced_op_impl::packetOp.
I cannot measure any performance changes for SSE, AVX, or AVX512. name old time/op new time/op delta BM_LinSpace<float>/1 1.63ns ± 0% 1.63ns ± 0% ~ (p=0.762 n=5+5) BM_LinSpace<float>/8 4.92ns ± 3% 4.89ns ± 3% ~ (p=0.421 n=5+5) BM_LinSpace<float>/64 34.6ns ± 0% 34.6ns ± 0% ~ (p=0.841 n=5+5) BM_LinSpace<float>/512 217ns ± 0% 217ns ± 0% ~ (p=0.421 n=5+5) BM_LinSpace<float>/4k 1.68µs ± 0% 1.68µs ± 0% ~ (p=1.000 n=5+5) BM_LinSpace<float>/32k 13.3µs ± 0% 13.3µs ± 0% ~ (p=0.905 n=5+4) BM_LinSpace<float>/256k 107µs ± 0% 107µs ± 0% ~ (p=0.841 n=5+5) BM_LinSpace<float>/1M 427µs ± 0% 427µs ± 0% ~ (p=0.690 n=5+5)
Diffstat (limited to 'Eigen/src/Core/arch/SSE')
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h20
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h38
2 files changed, 0 insertions, 58 deletions
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index a16d73e27..8bf8bfe85 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -429,26 +429,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, co
return Packet2cf(_mm_castpd_ps(result));
}
-template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b)
-{
- return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b)));
-}
-
-template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b)
-{
- return pset1<Packet1cd>(b);
-}
-
-template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b)
-{
- return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b)));
-}
-
-template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b)
-{
- return pset1<Packet1cd>(b);
-}
-
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index cf2f0be17..645aee0cd 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -936,44 +936,6 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons
#endif
}
-template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
-{
-#ifdef EIGEN_VECTORIZE_SSE4_1
- return _mm_blend_ps(a,pset1<Packet4f>(b),1);
-#else
- return _mm_move_ss(a, _mm_load_ss(&b));
-#endif
-}
-
-template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
-{
-#ifdef EIGEN_VECTORIZE_SSE4_1
- return _mm_blend_pd(a,pset1<Packet2d>(b),1);
-#else
- return _mm_move_sd(a, _mm_load_sd(&b));
-#endif
-}
-
-template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
-{
-#ifdef EIGEN_VECTORIZE_SSE4_1
- return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
-#else
- const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
- return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
-#endif
-}
-
-template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
-{
-#ifdef EIGEN_VECTORIZE_SSE4_1
- return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
-#else
- const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
- return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
-#endif
-}
-
// Scalar path for pmadd with FMA to ensure consistency with vectorized path.
#ifdef EIGEN_VECTORIZE_FMA
template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {