aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AVX
diff options
context:
space:
mode:
authorGravatar Guoqiang QI <425418567@qq.com>2020-10-15 00:54:45 +0000
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2020-10-15 00:54:45 +0000
commit4700713faf92a7b72a926ee1ac75f75d59e58887 (patch)
tree1962e245d0b890f5d18fa836e2bd3e313465d975 /Eigen/src/Core/arch/AVX
parentaf6f43d7ff7a7c9cfa2a1355e2b7e60f94e192fe (diff)
Add AVX plog<Packet4d> and AVX512 plog<Packet8d> ops,also unified AVX512 plog<Packet16f> op with generic api
Diffstat (limited to 'Eigen/src/Core/arch/AVX')
-rw-r--r--Eigen/src/Core/arch/AVX/MathFunctions.h6
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h27
2 files changed, 33 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h
index 461696170..9b123db00 100644
--- a/Eigen/src/Core/arch/AVX/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX/MathFunctions.h
@@ -36,6 +36,12 @@ plog<Packet8f>(const Packet8f& _x) {
return plog_float(_x);
}
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
+plog<Packet4d>(const Packet4d& _x) {
+ return plog_double(_x);
+}
+
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet8f plog1p<Packet8f>(const Packet8f& _x) {
return generic_plog1p(_x);
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 49d2df566..5dbb84616 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -98,6 +98,7 @@ template<> struct packet_traits<double> : default_packet_traits
HasCmp = 1,
HasDiv = 1,
+ HasLog = 1,
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
@@ -215,6 +216,7 @@ template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { re
template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int& from) { return _mm256_set1_epi32(from); }
template<> EIGEN_STRONG_INLINE Packet8f pset1frombits<Packet8f>(unsigned int from) { return _mm256_castsi256_ps(pset1<Packet8i>(from)); }
+template<> EIGEN_STRONG_INLINE Packet4d pset1frombits<Packet4d>(uint64_t from) { return _mm256_castsi256_pd(_mm256_set1_epi64x(from)); }
template<> EIGEN_STRONG_INLINE Packet8f pzero(const Packet8f& /*a*/) { return _mm256_setzero_ps(); }
template<> EIGEN_STRONG_INLINE Packet4d pzero(const Packet4d& /*a*/) { return _mm256_setzero_pd(); }
@@ -686,6 +688,31 @@ template<> EIGEN_STRONG_INLINE Packet8f pfrexp<Packet8f>(const Packet8f& a, Pack
return pfrexp_float(a,exponent);
}
+template<> EIGEN_STRONG_INLINE Packet4d pfrexp<Packet4d>(const Packet4d& a, Packet4d& exponent) {
+ const Packet4d cst_1022d = pset1<Packet4d>(1022.0);
+ const Packet4d cst_half = pset1<Packet4d>(0.5);
+ const Packet4d cst_inv_mant_mask = pset1frombits<Packet4d>(static_cast<uint64_t>(~0x7ff0000000000000ull));
+ __m256i a_expo = _mm256_castpd_si256(a);
+#ifdef EIGEN_VECTORIZE_AVX2
+ a_expo = _mm256_srli_epi64(a_expo, 52);
+#else
+ __m128i lo = _mm_srli_epi64(_mm256_extractf128_si256(a_expo, 0), 52);
+ __m128i hi = _mm_srli_epi64(_mm256_extractf128_si256(a_expo, 1), 52);
+ a_expo = _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
+#endif
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+ // AVX512DQ finally provides an instruction for this
+ exponent = _mm256_cvtepi64_pd(a_expo);
+#else
+ exponent = _mm256_set_pd(static_cast<double>(_mm256_extract_epi64(a_expo, 3)),
+ static_cast<double>(_mm256_extract_epi64(a_expo, 2)),
+ static_cast<double>(_mm256_extract_epi64(a_expo, 1)),
+ static_cast<double>(_mm256_extract_epi64(a_expo, 0)));
+#endif
+ exponent = psub(exponent, cst_1022d);
+ return por(pand(a, cst_inv_mant_mask), cst_half);
+}
+
template<> EIGEN_STRONG_INLINE Packet8f pldexp<Packet8f>(const Packet8f& a, const Packet8f& exponent) {
return pldexp_float(a,exponent);
}