aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AVX512/PacketMath.h
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-01-09 16:17:08 -0800
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-01-09 16:17:08 -0800
commitcb955df9a6fd5cb2673a7a15172609ce2dafdde8 (patch)
tree0be65a82aa8cfc22c6fe18856217f888cff16082 /Eigen/src/Core/arch/AVX512/PacketMath.h
parentcb3c059fa4449f7ea1344ea8c677d8b427f6a273 (diff)
Add packet up "pones". Write pnot(a) as pxor(pones(a), a).
Diffstat (limited to 'Eigen/src/Core/arch/AVX512/PacketMath.h')
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h26
1 files changed, 18 insertions, 8 deletions
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 68adf5e57..d258fd07b 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -295,12 +295,6 @@ template<> EIGEN_STRONG_INLINE Packet16f pcmp_lt(const Packet16f& a, const Packe
return cat256(lo, hi);
}
-template<> EIGEN_STRONG_INLINE Packet16f pcmp_eq(const Packet16f& a, const Packet16f& b) {
- __m256 lo = pcmp_eq(extract256<0>(a), extract256<0>(b));
- __m256 hi = pcmp_eq(extract256<1>(a), extract256<1>(b));
- return cat256(lo, hi);
-}
-
template<> EIGEN_STRONG_INLINE Packet16f pcmp_lt_or_nan(const Packet16f& a, const Packet16f& b) {
__m256 lo = pcmp_lt_or_nan(extract256<0>(a), extract256<0>(b));
__m256 hi = pcmp_lt_or_nan(extract256<1>(a), extract256<1>(b));
@@ -317,14 +311,30 @@ template <>
EIGEN_STRONG_INLINE Packet16f pcmp_eq(const Packet16f& a, const Packet16f& b) {
__mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ);
return _mm512_castsi512_ps(
- _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffff));
+ _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu));
}
template <>
EIGEN_STRONG_INLINE Packet8d pcmp_eq(const Packet8d& a, const Packet8d& b) {
__mmask8 mask = _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ);
return _mm512_castsi512_pd(
- _mm512_mask_set1_epi64(_mm512_set1_epi64(0), mask, 0xffffffffffffffff));
+ _mm512_mask_set1_epi64(_mm512_set1_epi64(0), mask, 0xffffffffffffffffu));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16i pones<Packet16i>(const Packet16i& /*a*/) {
+ const unsigned int o = 0xffffffffu;
+ return _mm512_set_epi32(o, o, o, o, o, o, o, o, o, o, o, o, o, o, o, o);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pones<Packet16f>(const Packet16f& a) {
+ return _mm512_castsi512_ps(pones<Packet16i>(_mm512_castps_si512(a)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8d pones<Packet8d>(const Packet8d& a) {
+ return _mm512_castsi512_pd(pones<Packet16i>(_mm512_castpd_si512(a)));
}
template <>