diff options
Diffstat (limited to 'Eigen/src/Core/arch')
-rw-r--r-- | Eigen/src/Core/arch/AVX/CMakeLists.txt | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/MathFunctions.h | 46 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/PacketMath.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AltiVec/CMakeLists.txt | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CMakeLists.txt | 9 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/CMakeLists.txt | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/Half.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/arch/Default/CMakeLists.txt | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/CMakeLists.txt | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/CMakeLists.txt | 6 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/MathFunctions.h | 46 | ||||
-rwxr-xr-x | Eigen/src/Core/arch/SSE/PacketMath.h | 15 | ||||
-rw-r--r-- | Eigen/src/Core/arch/ZVector/CMakeLists.txt | 6 |
13 files changed, 33 insertions, 142 deletions
diff --git a/Eigen/src/Core/arch/AVX/CMakeLists.txt b/Eigen/src/Core/arch/AVX/CMakeLists.txt deleted file mode 100644 index bdb71ab99..000000000 --- a/Eigen/src/Core/arch/AVX/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_AVX_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_AVX_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AVX COMPONENT Devel -) diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h index 98d8e029f..d21ec39dd 100644 --- a/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -266,52 +266,10 @@ pexp<Packet8f>(const Packet8f& _x) { } // Hyperbolic Tangent function. -// Doesn't do anything fancy, just a 13/6-degree rational interpolant which -// is accurate up to a couple of ulp in the range [-9, 9], outside of which the -// fl(tanh(x)) = +/-1. template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f -ptanh<Packet8f>(const Packet8f& _x) { - // Clamp the inputs to the range [-9, 9] since anything outside - // this range is +/-1.0f in single-precision. - _EIGEN_DECLARE_CONST_Packet8f(plus_9, 9.0f); - _EIGEN_DECLARE_CONST_Packet8f(minus_9, -9.0f); - const Packet8f x = pmax(p8f_minus_9, pmin(p8f_plus_9, _x)); - - // The monomial coefficients of the numerator polynomial (odd). - _EIGEN_DECLARE_CONST_Packet8f(alpha_1, 4.89352455891786e-03f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_3, 6.37261928875436e-04f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_5, 1.48572235717979e-05f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_7, 5.12229709037114e-08f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_9, -8.60467152213735e-11f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_11, 2.00018790482477e-13f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_13, -2.76076847742355e-16f); - - // The monomial coefficients of the denominator polynomial (even). - _EIGEN_DECLARE_CONST_Packet8f(beta_0, 4.89352518554385e-03f); - _EIGEN_DECLARE_CONST_Packet8f(beta_2, 2.26843463243900e-03f); - _EIGEN_DECLARE_CONST_Packet8f(beta_4, 1.18534705686654e-04f); - _EIGEN_DECLARE_CONST_Packet8f(beta_6, 1.19825839466702e-06f); - - // Since the polynomials are odd/even, we need x^2. - const Packet8f x2 = pmul(x, x); - - // Evaluate the numerator polynomial p. - Packet8f p = pmadd(x2, p8f_alpha_13, p8f_alpha_11); - p = pmadd(x2, p, p8f_alpha_9); - p = pmadd(x2, p, p8f_alpha_7); - p = pmadd(x2, p, p8f_alpha_5); - p = pmadd(x2, p, p8f_alpha_3); - p = pmadd(x2, p, p8f_alpha_1); - p = pmul(x, p); - - // Evaluate the denominator polynomial p. - Packet8f q = pmadd(x2, p8f_beta_6, p8f_beta_4); - q = pmadd(x2, q, p8f_beta_2); - q = pmadd(x2, q, p8f_beta_0); - - // Divide the numerator by the denominator. - return pdiv(p, q); +ptanh<Packet8f>(const Packet8f& x) { + return internal::generic_fast_tanh_float(x); } template <> diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 4fec14f44..dae0ca5d0 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -94,6 +94,9 @@ template<> struct packet_traits<double> : default_packet_traits }; }; +template<> struct scalar_div_cost<float,true> { enum { value = 14 }; }; +template<> struct scalar_div_cost<double,true> { enum { value = 16 }; }; + /* Proper support for integers is only provided by AVX2. In the meantime, we'll use SSE instructions and packets to deal with integers. template<> struct packet_traits<int> : default_packet_traits @@ -153,7 +156,7 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co #ifdef __FMA__ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) { -#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG +#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) ) // clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers, // and gcc stupidly generates a vfmadd132ps instruction, // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate @@ -166,7 +169,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& #endif } template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) { -#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG +#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) ) // see above Packet4d res = c; __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b)); diff --git a/Eigen/src/Core/arch/AltiVec/CMakeLists.txt b/Eigen/src/Core/arch/AltiVec/CMakeLists.txt deleted file mode 100644 index 9f8d2e9c4..000000000 --- a/Eigen/src/Core/arch/AltiVec/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_AltiVec_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_AltiVec_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AltiVec COMPONENT Devel -) diff --git a/Eigen/src/Core/arch/CMakeLists.txt b/Eigen/src/Core/arch/CMakeLists.txt deleted file mode 100644 index 42b0b486e..000000000 --- a/Eigen/src/Core/arch/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -ADD_SUBDIRECTORY(AltiVec) -ADD_SUBDIRECTORY(AVX) -ADD_SUBDIRECTORY(CUDA) -ADD_SUBDIRECTORY(Default) -ADD_SUBDIRECTORY(NEON) -ADD_SUBDIRECTORY(SSE) - - - diff --git a/Eigen/src/Core/arch/CUDA/CMakeLists.txt b/Eigen/src/Core/arch/CUDA/CMakeLists.txt deleted file mode 100644 index 7ba28da7c..000000000 --- a/Eigen/src/Core/arch/CUDA/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_CUDA_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_CUDA_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/CUDA COMPONENT Devel -) diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 4d91420d0..52892db38 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -389,10 +389,14 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { return half(::expf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { +#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 + return Eigen::half(::hlog(a)); +#else return half(::logf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log1p(const half& a) { - return half(::log1pf(float(a))); + return half(numext::log1p(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) { return half(::log10f(float(a))); @@ -503,7 +507,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) { return Eigen::half(::expf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) { +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 + return Eigen::half(::hlog(a)); +#else return Eigen::half(::logf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) { return Eigen::half(::sqrtf(float(a))); diff --git a/Eigen/src/Core/arch/Default/CMakeLists.txt b/Eigen/src/Core/arch/Default/CMakeLists.txt deleted file mode 100644 index 339c091d1..000000000 --- a/Eigen/src/Core/arch/Default/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_Default_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_Default_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/Default COMPONENT Devel -) diff --git a/Eigen/src/Core/arch/NEON/CMakeLists.txt b/Eigen/src/Core/arch/NEON/CMakeLists.txt deleted file mode 100644 index fd4d4af50..000000000 --- a/Eigen/src/Core/arch/NEON/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_NEON_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_NEON_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/NEON COMPONENT Devel -) diff --git a/Eigen/src/Core/arch/SSE/CMakeLists.txt b/Eigen/src/Core/arch/SSE/CMakeLists.txt deleted file mode 100644 index 46ea7cc62..000000000 --- a/Eigen/src/Core/arch/SSE/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_SSE_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_SSE_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/SSE COMPONENT Devel -) diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 28f103eeb..ac2fd8103 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -517,52 +517,10 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) { } // Hyperbolic Tangent function. -// Doesn't do anything fancy, just a 13/6-degree rational interpolant which -// is accurate up to a couple of ulp in the range [-9, 9], outside of which the -// fl(tanh(x)) = +/-1. template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f -ptanh<Packet4f>(const Packet4f& _x) { - // Clamp the inputs to the range [-9, 9] since anything outside - // this range is +/-1.0f in single-precision. - _EIGEN_DECLARE_CONST_Packet4f(plus_9, 9.0f); - _EIGEN_DECLARE_CONST_Packet4f(minus_9, -9.0f); - const Packet4f x = pmax(p4f_minus_9, pmin(p4f_plus_9, _x)); - - // The monomial coefficients of the numerator polynomial (odd). - _EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-03f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-04f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-05f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-08f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f); - - // The monomial coefficients of the denominator polynomial (even). - _EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-03f); - _EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-03f); - _EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-04f); - _EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-06f); - - // Since the polynomials are odd/even, we need x^2. - const Packet4f x2 = pmul(x, x); - - // Evaluate the numerator polynomial p. - Packet4f p = pmadd(x2, p4f_alpha_13, p4f_alpha_11); - p = pmadd(x2, p, p4f_alpha_9); - p = pmadd(x2, p, p4f_alpha_7); - p = pmadd(x2, p, p4f_alpha_5); - p = pmadd(x2, p, p4f_alpha_3); - p = pmadd(x2, p, p4f_alpha_1); - p = pmul(x, p); - - // Evaluate the denominator polynomial p. - Packet4f q = pmadd(x2, p4f_beta_6, p4f_beta_4); - q = pmadd(x2, q, p4f_beta_2); - q = pmadd(x2, q, p4f_beta_0); - - // Divide the numerator by the denominator. - return pdiv(p, q); +ptanh<Packet4f>(const Packet4f& x) { + return internal::generic_fast_tanh_float(x); } } // end namespace internal diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 70839d68d..baad692e3 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -162,6 +162,11 @@ template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; }; template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; }; +#ifndef EIGEN_VECTORIZE_AVX +template<> struct scalar_div_cost<float,true> { enum { value = 7 }; }; +template<> struct scalar_div_cost<double,true> { enum { value = 8 }; }; +#endif + #if EIGEN_COMP_MSVC==1500 // Workaround MSVC 9 internal compiler error. // TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode @@ -813,6 +818,16 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons #endif } +// Scalar path for pmadd with FMA to ensure consistency with vectorized path. +#ifdef __FMA__ +template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) { + return ::fmaf(a,b,c); +} +template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) { + return ::fma(a,b,c); +} +#endif + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/ZVector/CMakeLists.txt b/Eigen/src/Core/arch/ZVector/CMakeLists.txt deleted file mode 100644 index 5eb0957eb..000000000 --- a/Eigen/src/Core/arch/ZVector/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_ZVector_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_ZVector_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/ZVector COMPONENT Devel -) |