aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AVX/PacketMath.h
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-01-09 17:20:33 -0800
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-01-09 17:20:33 -0800
commitfcfced13ed875644b93bf346f4dbce19ac8851ba (patch)
tree4dab83f34f4ee1695e23f5c9f7a54c1f9d02cdc7 /Eigen/src/Core/arch/AVX/PacketMath.h
parenta05ec7993e04bd04c29c120efd48103af85e5daf (diff)
parentf6ba6071c583ae45cb379603e5a57cf65f01f44a (diff)
Rename pones -> ptrue. Use _CMP_TRUE_UQ where appropriate.
Diffstat (limited to 'Eigen/src/Core/arch/AVX/PacketMath.h')
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h32
1 files changed, 22 insertions, 10 deletions
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index f6a514fbf..c18c18cc3 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -251,22 +251,34 @@ template<> EIGEN_STRONG_INLINE Packet8f pfloor<Packet8f>(const Packet8f& a) { re
template<> EIGEN_STRONG_INLINE Packet4d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet8i ptrue<Packet8i>(const Packet8i& a) {
#ifdef EIGEN_VECTORIZE_AVX2
-template<> EIGEN_STRONG_INLINE Packet8i pones<Packet8i>(const Packet8i& a) {
- return _mm256_cmpeq_epi64(a,a);
-}
+ // vpcmpeqd has lower latency than the more general vcmpps
+ return _mm256_cmpeq_epi32(a,a);
#else
-template<> EIGEN_STRONG_INLINE Packet8i pones<Packet8i>(const Packet8i& /*a*/) {
- const unsigned int o = 0xffffffffu;
- return _mm256_set_epi32(o, o, o, o, o, o, o, o);
+ const __m256 b = _mm256_castsi256_ps(a);
+ return _mm256_castps_si256(_mm256_cmp_ps(b,b,_CMP_TRUE_UQ));
+#endif
}
+
+template<> EIGEN_STRONG_INLINE Packet8f ptrue<Packet8f>(const Packet8f& a) {
+#ifdef EIGEN_VECTORIZE_AVX2
+ // vpcmpeqd has lower latency than the more general vcmpps
+ const __m256i b = _mm256_castps_si256(a);
+ return _mm256_castsi256_ps(_mm256_cmpeq_epi32(b,b));
+#else
+ return _mm256_cmp_ps(a,a,_CMP_TRUE_UQ);
#endif
-template<> EIGEN_STRONG_INLINE Packet8f pones<Packet8f>(const Packet8f& a) {
- return _mm256_castsi256_ps(pones<Packet8i>(_mm256_castps_si256(a)));
}
-template<> EIGEN_STRONG_INLINE Packet4d pones<Packet4d>(const Packet4d& a) {
- return _mm256_castsi256_pd(pones<Packet8i>(_mm256_castpd_si256(a)));
+template<> EIGEN_STRONG_INLINE Packet4d ptrue<Packet4d>(const Packet4d& a) {
+#ifdef EIGEN_VECTORIZE_AVX2
+ // vpcmpeqq has lower latency than the more general vcmppd
+ const __m256i b = _mm256_castpd_si256(a);
+ return _mm256_castsi256_pd(_mm256_cmpeq_epi64(b,b));
+#else
+ return _mm256_cmp_pd(a,a,_CMP_TRUE_UQ);
+#endif
}
template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }