aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-01-09 17:20:33 -0800
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-01-09 17:20:33 -0800
commitfcfced13ed875644b93bf346f4dbce19ac8851ba (patch)
tree4dab83f34f4ee1695e23f5c9f7a54c1f9d02cdc7
parenta05ec7993e04bd04c29c120efd48103af85e5daf (diff)
parentf6ba6071c583ae45cb379603e5a57cf65f01f44a (diff)
Rename pones -> ptrue. Use _CMP_TRUE_UQ where appropriate.
-rw-r--r--Eigen/src/Core/GenericPacketMath.h10
-rw-r--r--Eigen/src/Core/arch/AVX/Complex.h4
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h32
-rw-r--r--Eigen/src/Core/arch/AVX512/Complex.h4
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h13
-rw-r--r--Eigen/src/Core/arch/GPU/PacketMathHalf.h8
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h4
-rwxr-xr-xEigen/src/Core/arch/SSE/PacketMath.h6
-rw-r--r--test/packetmath.cpp2
9 files changed, 49 insertions, 34 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 8bcceaa7b..95c4e4027 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -216,11 +216,11 @@ pandnot(const Packet& a, const Packet& b) { return a & (~b); }
/** \internal \returns ones */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pones(const Packet& /*a*/) { Packet b; memset(&b, 0xff, sizeof(b)); return b;}
+ptrue(const Packet& /*a*/) { Packet b; memset(&b, 0xff, sizeof(b)); return b;}
/** \internal \returns the bitwise not of \a a */
template <typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pnot(const Packet& a) { return pxor(pones(a), a);}
+pnot(const Packet& a) { return pxor(ptrue(a), a);}
/** \internal \returns \a a shifted by N bits to the right */
template<int N> EIGEN_DEVICE_FUNC inline int
@@ -258,15 +258,15 @@ pselect(const Packet& mask, const Packet& a, const Packet& b) {
/** \internal \returns a <= b as a bit mask */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pcmp_le(const Packet& a, const Packet& b) { return a<=b ? pones(a) : pzero(a); }
+pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); }
/** \internal \returns a < b as a bit mask */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pcmp_lt(const Packet& a, const Packet& b) { return a<b ? pones(a) : pzero(a); }
+pcmp_lt(const Packet& a, const Packet& b) { return a<b ? ptrue(a) : pzero(a); }
/** \internal \returns a == b as a bit mask */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
-pcmp_eq(const Packet& a, const Packet& b) { return a==b ? pones(a) : pzero(a); }
+pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); }
/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index 9f1bb969e..dcca35279 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -75,7 +75,7 @@ EIGEN_STRONG_INLINE Packet4cf pcmp_eq(const Packet4cf& a, const Packet4cf& b) {
return Packet4cf(_mm256_and_ps(eq, _mm256_permute_ps(eq, 0xb1)));
}
-template<> EIGEN_STRONG_INLINE Packet4cf pones<Packet4cf>(const Packet4cf& a) { return Packet4cf(pones(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf ptrue<Packet4cf>(const Packet4cf& a) { return Packet4cf(ptrue(a.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf pnot<Packet4cf>(const Packet4cf& a) { return Packet4cf(pnot(a.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }
@@ -290,7 +290,7 @@ EIGEN_STRONG_INLINE Packet2cd pcmp_eq(const Packet2cd& a, const Packet2cd& b) {
return Packet2cd(pand(eq, _mm256_permute_pd(eq, 0x5)));
}
-template<> EIGEN_STRONG_INLINE Packet2cd pones<Packet2cd>(const Packet2cd& a) { return Packet2cd(pones(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd ptrue<Packet2cd>(const Packet2cd& a) { return Packet2cd(ptrue(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd pnot<Packet2cd>(const Packet2cd& a) { return Packet2cd(pnot(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index f6a514fbf..c18c18cc3 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -251,22 +251,34 @@ template<> EIGEN_STRONG_INLINE Packet8f pfloor<Packet8f>(const Packet8f& a) { re
template<> EIGEN_STRONG_INLINE Packet4d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); }
+template<> EIGEN_STRONG_INLINE Packet8i ptrue<Packet8i>(const Packet8i& a) {
#ifdef EIGEN_VECTORIZE_AVX2
-template<> EIGEN_STRONG_INLINE Packet8i pones<Packet8i>(const Packet8i& a) {
- return _mm256_cmpeq_epi64(a,a);
-}
+ // vpcmpeqd has lower latency than the more general vcmpps
+ return _mm256_cmpeq_epi32(a,a);
#else
-template<> EIGEN_STRONG_INLINE Packet8i pones<Packet8i>(const Packet8i& /*a*/) {
- const unsigned int o = 0xffffffffu;
- return _mm256_set_epi32(o, o, o, o, o, o, o, o);
+ const __m256 b = _mm256_castsi256_ps(a);
+ return _mm256_castps_si256(_mm256_cmp_ps(b,b,_CMP_TRUE_UQ));
+#endif
}
+
+template<> EIGEN_STRONG_INLINE Packet8f ptrue<Packet8f>(const Packet8f& a) {
+#ifdef EIGEN_VECTORIZE_AVX2
+ // vpcmpeqd has lower latency than the more general vcmpps
+ const __m256i b = _mm256_castps_si256(a);
+ return _mm256_castsi256_ps(_mm256_cmpeq_epi32(b,b));
+#else
+ return _mm256_cmp_ps(a,a,_CMP_TRUE_UQ);
#endif
-template<> EIGEN_STRONG_INLINE Packet8f pones<Packet8f>(const Packet8f& a) {
- return _mm256_castsi256_ps(pones<Packet8i>(_mm256_castps_si256(a)));
}
-template<> EIGEN_STRONG_INLINE Packet4d pones<Packet4d>(const Packet4d& a) {
- return _mm256_castsi256_pd(pones<Packet8i>(_mm256_castpd_si256(a)));
+template<> EIGEN_STRONG_INLINE Packet4d ptrue<Packet4d>(const Packet4d& a) {
+#ifdef EIGEN_VECTORIZE_AVX2
+ // vpcmpeqq has lower latency than the more general vcmppd
+ const __m256i b = _mm256_castpd_si256(a);
+ return _mm256_castsi256_pd(_mm256_cmpeq_epi64(b,b));
+#else
+ return _mm256_cmp_pd(a,a,_CMP_TRUE_UQ);
+#endif
}
template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h
index 154fedc25..b7e68d2ab 100644
--- a/Eigen/src/Core/arch/AVX512/Complex.h
+++ b/Eigen/src/Core/arch/AVX512/Complex.h
@@ -55,6 +55,8 @@ template<> struct unpacket_traits<Packet8cf> {
typedef Packet4cf half;
};
+template<> EIGEN_STRONG_INLINE Packet8cf ptrue<Packet8cf>(const Packet8cf& a) { return Packet8cf(ptrue(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet8cf pnot<Packet8cf>(const Packet8cf& a) { return Packet8cf(pnot(a.v)); }
template<> EIGEN_STRONG_INLINE Packet8cf padd<Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet8cf psub<Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_sub_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet8cf pnegate(const Packet8cf& a)
@@ -268,6 +270,8 @@ template<> EIGEN_STRONG_INLINE Packet4cd pmul<Packet4cd>(const Packet4cd& a, con
return Packet4cd(_mm512_fmaddsub_pd(tmp1, b.v, odd));
}
+template<> EIGEN_STRONG_INLINE Packet4cd ptrue<Packet4cd>(const Packet4cd& a) { return Packet4cd(ptrue(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cd pnot<Packet4cd>(const Packet4cd& a) { return Packet4cd(pnot(a.v)); }
template<> EIGEN_STRONG_INLINE Packet4cd pand <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pand(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cd por <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(por(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cd pxor <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pxor(a.v,b.v)); }
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 9666c4e22..1164f24b1 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -321,19 +321,18 @@ EIGEN_STRONG_INLINE Packet8d pcmp_eq(const Packet8d& a, const Packet8d& b) {
}
template <>
-EIGEN_STRONG_INLINE Packet16i pones<Packet16i>(const Packet16i& /*a*/) {
- const unsigned int o = 0xffffffffu;
- return _mm512_set_epi32(o, o, o, o, o, o, o, o, o, o, o, o, o, o, o, o);
+EIGEN_STRONG_INLINE Packet16i ptrue<Packet16i>(const Packet16i& /*a*/) {
+ return _mm512_set1_epi32(0xffffffffu);
}
template <>
-EIGEN_STRONG_INLINE Packet16f pones<Packet16f>(const Packet16f& a) {
- return _mm512_castsi512_ps(pones<Packet16i>(_mm512_castps_si512(a)));
+EIGEN_STRONG_INLINE Packet16f ptrue<Packet16f>(const Packet16f& a) {
+ return _mm512_castsi512_ps(ptrue<Packet16i>(_mm512_castps_si512(a)));
}
template <>
-EIGEN_STRONG_INLINE Packet8d pones<Packet8d>(const Packet8d& a) {
- return _mm512_castsi512_pd(pones<Packet16i>(_mm512_castpd_si512(a)));
+EIGEN_STRONG_INLINE Packet8d ptrue<Packet8d>(const Packet8d& a) {
+ return _mm512_castsi512_pd(ptrue<Packet16i>(_mm512_castpd_si512(a)));
}
template <>
diff --git a/Eigen/src/Core/arch/GPU/PacketMathHalf.h b/Eigen/src/Core/arch/GPU/PacketMathHalf.h
index c4dfedcf8..00e40d40b 100644
--- a/Eigen/src/Core/arch/GPU/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/GPU/PacketMathHalf.h
@@ -143,7 +143,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pabs<half2>(const half2&
return result;
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pones<half2>(const half2& a) {
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ptrue<half2>(const half2& a) {
half2 result;
*(reinterpret_cast<unsigned*>(&(result))) = 0xffffffffu;
}
@@ -648,8 +648,8 @@ template<> EIGEN_STRONG_INLINE Packet16h pnot(const Packet16h& a) {
Packet16h r; r.x = _mm256_xor_si256(a.x, pcmp_eq(a.x, a.x)); return r;
}
-template<> EIGEN_STRONG_INLINE Packet16h pones(const Packet16h& a) {
- Packet16h r; r.x = Packet8i(pones(a.x)); return r;
+template<> EIGEN_STRONG_INLINE Packet16h ptrue(const Packet16h& a) {
+ Packet16h r; r.x = Packet8i(ptrue(a.x)); return r;
}
template<> EIGEN_STRONG_INLINE Packet16h por(const Packet16h& a,const Packet16h& b) {
@@ -1097,7 +1097,7 @@ EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) {
#endif
}
-template<> EIGEN_STRONG_INLINE Packet8h pones(const Packet8h& a) {
+template<> EIGEN_STRONG_INLINE Packet8h ptrue(const Packet8h& a) {
Packet8h r; r.x = _mm_cmpeq_epi32(a.x, a.x); return r;
}
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index fa84097ac..c3b1de5ce 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -82,7 +82,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con
#endif
}
-template<> EIGEN_STRONG_INLINE Packet2cf pones <Packet2cf>(const Packet2cf& a) { return Packet2cf(pones(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ptrue <Packet2cf>(const Packet2cf& a) { return Packet2cf(ptrue(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pnot <Packet2cf>(const Packet2cf& a) { return Packet2cf(pnot(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
@@ -308,7 +308,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
#endif
}
-template<> EIGEN_STRONG_INLINE Packet1cd pones <Packet1cd>(const Packet1cd& a) { return Packet1cd(pones(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ptrue <Packet1cd>(const Packet1cd& a) { return Packet1cd(ptrue(a.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pnot <Packet1cd>(const Packet1cd& a) { return Packet1cd(pnot(a.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 6dd2f8a46..ebc540e24 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -378,14 +378,14 @@ template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4
template<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return _mm_cmpeq_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { return _mm_cmpnge_ps(a,b); }
-template<> EIGEN_STRONG_INLINE Packet4i pones<Packet4i>(const Packet4i& a) { return _mm_cmpeq_epi32(a, a); }
+template<> EIGEN_STRONG_INLINE Packet4i ptrue<Packet4i>(const Packet4i& a) { return _mm_cmpeq_epi32(a, a); }
template<> EIGEN_STRONG_INLINE Packet4f
-pones<Packet4f>(const Packet4f& a) {
+ptrue<Packet4f>(const Packet4f& a) {
Packet4i b = _mm_castps_si128(a);
return _mm_castsi128_ps(_mm_cmpeq_epi32(b, b));
}
template<> EIGEN_STRONG_INLINE Packet2d
-pones<Packet2d>(const Packet2d& a) {
+ptrue<Packet2d>(const Packet2d& a) {
Packet4i b = _mm_castpd_si128(a);
return _mm_castsi128_pd(_mm_cmpeq_epi32(b, b));
}
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index 460cfbdbe..04f93108f 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -240,7 +240,7 @@ template<typename Scalar,typename Packet> void packetmath()
CHECK_CWISE1(internal::pnot, internal::pnot);
CHECK_CWISE1(internal::pzero, internal::pzero);
- CHECK_CWISE1(internal::pones, internal::pones);
+ CHECK_CWISE1(internal::ptrue, internal::ptrue);
CHECK_CWISE1(internal::negate, internal::pnegate);
CHECK_CWISE1(numext::conj, internal::pconj);