aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AVX
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-10-05 18:48:55 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-10-05 18:48:55 -0700
commit78b569f68540c5609388864bd805dcf21dd6a187 (patch)
tree0a5757bb11834d0109f99310f4493dfd63579901 /Eigen/src/Core/arch/AVX
parent9c2b6c049be19fd4c571b0df537169d277b26291 (diff)
parent4387433acf9cd2eab3713349163cd1e8905b5854 (diff)
Merged latest updates from trunk
Diffstat (limited to 'Eigen/src/Core/arch/AVX')
-rw-r--r--Eigen/src/Core/arch/AVX/CMakeLists.txt6
-rw-r--r--Eigen/src/Core/arch/AVX/MathFunctions.h46
-rw-r--r--Eigen/src/Core/arch/AVX/PacketMath.h7
3 files changed, 7 insertions, 52 deletions
diff --git a/Eigen/src/Core/arch/AVX/CMakeLists.txt b/Eigen/src/Core/arch/AVX/CMakeLists.txt
deleted file mode 100644
index bdb71ab99..000000000
--- a/Eigen/src/Core/arch/AVX/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Core_arch_AVX_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_Core_arch_AVX_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AVX COMPONENT Devel
-)
diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h
index 98d8e029f..d21ec39dd 100644
--- a/Eigen/src/Core/arch/AVX/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX/MathFunctions.h
@@ -266,52 +266,10 @@ pexp<Packet8f>(const Packet8f& _x) {
}
// Hyperbolic Tangent function.
-// Doesn't do anything fancy, just a 13/6-degree rational interpolant which
-// is accurate up to a couple of ulp in the range [-9, 9], outside of which the
-// fl(tanh(x)) = +/-1.
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
-ptanh<Packet8f>(const Packet8f& _x) {
- // Clamp the inputs to the range [-9, 9] since anything outside
- // this range is +/-1.0f in single-precision.
- _EIGEN_DECLARE_CONST_Packet8f(plus_9, 9.0f);
- _EIGEN_DECLARE_CONST_Packet8f(minus_9, -9.0f);
- const Packet8f x = pmax(p8f_minus_9, pmin(p8f_plus_9, _x));
-
- // The monomial coefficients of the numerator polynomial (odd).
- _EIGEN_DECLARE_CONST_Packet8f(alpha_1, 4.89352455891786e-03f);
- _EIGEN_DECLARE_CONST_Packet8f(alpha_3, 6.37261928875436e-04f);
- _EIGEN_DECLARE_CONST_Packet8f(alpha_5, 1.48572235717979e-05f);
- _EIGEN_DECLARE_CONST_Packet8f(alpha_7, 5.12229709037114e-08f);
- _EIGEN_DECLARE_CONST_Packet8f(alpha_9, -8.60467152213735e-11f);
- _EIGEN_DECLARE_CONST_Packet8f(alpha_11, 2.00018790482477e-13f);
- _EIGEN_DECLARE_CONST_Packet8f(alpha_13, -2.76076847742355e-16f);
-
- // The monomial coefficients of the denominator polynomial (even).
- _EIGEN_DECLARE_CONST_Packet8f(beta_0, 4.89352518554385e-03f);
- _EIGEN_DECLARE_CONST_Packet8f(beta_2, 2.26843463243900e-03f);
- _EIGEN_DECLARE_CONST_Packet8f(beta_4, 1.18534705686654e-04f);
- _EIGEN_DECLARE_CONST_Packet8f(beta_6, 1.19825839466702e-06f);
-
- // Since the polynomials are odd/even, we need x^2.
- const Packet8f x2 = pmul(x, x);
-
- // Evaluate the numerator polynomial p.
- Packet8f p = pmadd(x2, p8f_alpha_13, p8f_alpha_11);
- p = pmadd(x2, p, p8f_alpha_9);
- p = pmadd(x2, p, p8f_alpha_7);
- p = pmadd(x2, p, p8f_alpha_5);
- p = pmadd(x2, p, p8f_alpha_3);
- p = pmadd(x2, p, p8f_alpha_1);
- p = pmul(x, p);
-
- // Evaluate the denominator polynomial p.
- Packet8f q = pmadd(x2, p8f_beta_6, p8f_beta_4);
- q = pmadd(x2, q, p8f_beta_2);
- q = pmadd(x2, q, p8f_beta_0);
-
- // Divide the numerator by the denominator.
- return pdiv(p, q);
+ptanh<Packet8f>(const Packet8f& x) {
+ return internal::generic_fast_tanh_float(x);
}
template <>
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index ba2a6c1e1..beb3e577d 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -97,6 +97,9 @@ template<> struct packet_traits<double> : default_packet_traits
};
#endif
+template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
+
/* Proper support for integers is only provided by AVX2. In the meantime, we'll
use SSE instructions and packets to deal with integers.
template<> struct packet_traits<int> : default_packet_traits
@@ -156,7 +159,7 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
#ifdef __FMA__
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
-#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
+#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
// clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
// and gcc stupidly generates a vfmadd132ps instruction,
// so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
@@ -169,7 +172,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
#endif
}
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
-#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
+#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
// see above
Packet4d res = c;
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));