aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Christoph Hertzberg <chtz@informatik.uni-bremen.de>2021-03-23 18:52:19 +0000
committerGravatar Christoph Hertzberg <chtz@informatik.uni-bremen.de>2021-03-23 18:52:19 +0000
commit69a4f709562604d7323355ac54403ad5cbf2e99c (patch)
tree3425c3a4d3d895cd9737bf78b54ad5683bde0080
parent824272cde8ca2541e8b67b0887f5ded92b128d1f (diff)
Revert "Uses _mm512_abs_pd for Packet8d pabs"
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h13
1 files changed, 7 insertions, 6 deletions
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index a964b932c..f8741372d 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -139,7 +139,6 @@ template<> struct packet_traits<double> : default_packet_traits
size = 8,
HasHalfPacket = 1,
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
- HasAbs = 1,
HasLog = 1,
HasSqrt = EIGEN_FAST_MATH,
HasRsqrt = EIGEN_FAST_MATH,
@@ -889,7 +888,9 @@ template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a)
}
template <>
EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
- return _mm512_abs_pd(a);
+ // _mm512_abs_ps intrinsic not found, so hack around it
+ return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(a),
+ _mm512_set1_epi64(0x7fffffffffffffff)));
}
template<>
@@ -899,7 +900,7 @@ EIGEN_STRONG_INLINE Packet16f pfrexp<Packet16f>(const Packet16f& a, Packet16f& e
// Extract exponent without existence of Packet8l.
template<>
-EIGEN_STRONG_INLINE
+EIGEN_STRONG_INLINE
Packet8d pfrexp_generic_get_biased_exponent(const Packet8d& a) {
const Packet8d cst_exp_mask = pset1frombits<Packet8d>(static_cast<uint64_t>(0x7ff0000000000000ull));
#ifdef EIGEN_VECTORIZE_AVX512DQ
@@ -922,18 +923,18 @@ template<> EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, cons
// Clamp exponent to [-2099, 2099]
const Packet8d max_exponent = pset1<Packet8d>(2099.0);
const Packet8i e = _mm512_cvtpd_epi32(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
-
+
// Split 2^e into four factors and multiply.
const Packet8i bias = pset1<Packet8i>(1023);
Packet8i b = parithmetic_shift_right<2>(e); // floor(e/4)
-
+
// 2^b
Packet8i hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0));
Packet8i lo = _mm256_slli_epi64(hi, 52);
hi = _mm256_slli_epi64(_mm256_srli_epi64(hi, 32), 52);
Packet8d c = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1));
Packet8d out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)
-
+
// 2^(e - 3b)
b = psub(psub(psub(e, b), b), b); // e - 3b
hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0));