diff options
Diffstat (limited to 'Eigen/src/Core/functors/UnaryFunctors.h')
-rw-r--r-- | Eigen/src/Core/functors/UnaryFunctors.h | 106 |
1 files changed, 85 insertions, 21 deletions
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 7ba0abedc..5baba1494 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -234,9 +234,33 @@ template<typename Scalar> struct scalar_exp_op { template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); } }; -template<typename Scalar> -struct functor_traits<scalar_exp_op<Scalar> > -{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasExp }; }; +template <typename Scalar> +struct functor_traits<scalar_exp_op<Scalar> > { + enum { + PacketAccess = packet_traits<Scalar>::HasExp, + // The following numbers are based on the AVX implementation. +#ifdef EIGEN_VECTORIZE_FMA + // Haswell can issue 2 add/mul/madd per cycle. + Cost = + (sizeof(Scalar) == 4 + // float: 8 pmadd, 4 pmul, 2 padd/psub, 6 other + ? (8 * NumTraits<Scalar>::AddCost + 6 * NumTraits<Scalar>::MulCost) + // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other + : (14 * NumTraits<Scalar>::AddCost + + 6 * NumTraits<Scalar>::MulCost + + NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost)) +#else + Cost = + (sizeof(Scalar) == 4 + // float: 7 pmadd, 6 pmul, 4 padd/psub, 10 other + ? (21 * NumTraits<Scalar>::AddCost + 13 * NumTraits<Scalar>::MulCost) + // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div, 13 other + : (23 * NumTraits<Scalar>::AddCost + + 12 * NumTraits<Scalar>::MulCost + + NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost)) +#endif + }; +}; /** \internal * @@ -250,9 +274,24 @@ template<typename Scalar> struct scalar_log_op { template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog(a); } }; -template<typename Scalar> -struct functor_traits<scalar_log_op<Scalar> > -{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog }; }; +template <typename Scalar> +struct functor_traits<scalar_log_op<Scalar> > { + enum { + PacketAccess = packet_traits<Scalar>::HasLog, + Cost = + (PacketAccess + // The following numbers are based on the AVX implementation. +#ifdef EIGEN_VECTORIZE_FMA + // 8 pmadd, 6 pmul, 8 padd/psub, 16 other, can issue 2 add/mul/madd per cycle. + ? (20 * NumTraits<Scalar>::AddCost + 7 * NumTraits<Scalar>::MulCost) +#else + // 8 pmadd, 6 pmul, 8 padd/psub, 20 other + ? (36 * NumTraits<Scalar>::AddCost + 14 * NumTraits<Scalar>::MulCost) +#endif + // Measured cost of std::log. + : sizeof(Scalar)==4 ? 40 : 85) + }; +}; /** \internal * @@ -280,10 +319,19 @@ template<typename Scalar> struct scalar_sqrt_op { template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); } }; -template<typename Scalar> -struct functor_traits<scalar_sqrt_op<Scalar> > -{ enum { - Cost = 5 * NumTraits<Scalar>::MulCost, +template <typename Scalar> +struct functor_traits<scalar_sqrt_op<Scalar> > { + enum { +#if EIGEN_FAST_MATH + // The following numbers are based on the AVX implementation. + Cost = (sizeof(Scalar) == 8 ? 28 + // 4 pmul, 1 pmadd, 3 other + : (3 * NumTraits<Scalar>::AddCost + + 5 * NumTraits<Scalar>::MulCost)), +#else + // The following numbers are based on min VSQRT throughput on Haswell. + Cost = (sizeof(Scalar) == 8 ? 28 : 14), +#endif PacketAccess = packet_traits<Scalar>::HasSqrt }; }; @@ -313,7 +361,7 @@ struct functor_traits<scalar_rsqrt_op<Scalar> > */ template<typename Scalar> struct scalar_cos_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op) - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); } + EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return numext::cos(a); } template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcos(a); } }; @@ -332,7 +380,7 @@ struct functor_traits<scalar_cos_op<Scalar> > */ template<typename Scalar> struct scalar_sin_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sin(a); } template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psin(a); } }; @@ -352,7 +400,7 @@ struct functor_traits<scalar_sin_op<Scalar> > */ template<typename Scalar> struct scalar_tan_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::tan(a); } template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptan(a); } }; @@ -371,7 +419,7 @@ struct functor_traits<scalar_tan_op<Scalar> > */ template<typename Scalar> struct scalar_acos_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::acos(a); } template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pacos(a); } }; @@ -390,7 +438,7 @@ struct functor_traits<scalar_acos_op<Scalar> > */ template<typename Scalar> struct scalar_asin_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::asin(a); } template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pasin(a); } }; @@ -546,7 +594,7 @@ struct functor_traits<scalar_erfc_op<Scalar> > */ template<typename Scalar> struct scalar_atan_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::atan; return atan(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::atan(a); } template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::patan(a); } }; @@ -566,7 +614,7 @@ struct functor_traits<scalar_atan_op<Scalar> > */ template<typename Scalar> struct scalar_tanh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tanh; return tanh(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::tanh(a); } template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptanh(a); } }; @@ -574,8 +622,24 @@ template<typename Scalar> struct functor_traits<scalar_tanh_op<Scalar> > { enum { - Cost = 5 * NumTraits<Scalar>::MulCost, - PacketAccess = packet_traits<Scalar>::HasTanh + PacketAccess = packet_traits<Scalar>::HasTanh, + Cost = + (PacketAccess + // The following numbers are based on the AVX implementation, +#ifdef EIGEN_VECTORIZE_FMA + // Haswell can issue 2 add/mul/madd per cycle. + // 9 pmadd, 2 pmul, 1 div, 2 other + ? (2 * NumTraits<Scalar>::AddCost + 6 * NumTraits<Scalar>::MulCost + + NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost) +#else + ? (11 * NumTraits<Scalar>::AddCost + + 11 * NumTraits<Scalar>::MulCost + + NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost) +#endif + // This number assumes a naive implementation of tanh + : (6 * NumTraits<Scalar>::AddCost + 3 * NumTraits<Scalar>::MulCost + + 2 * NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost + + functor_traits<scalar_exp_op<Scalar> >::Cost)) }; }; @@ -585,7 +649,7 @@ struct functor_traits<scalar_tanh_op<Scalar> > */ template<typename Scalar> struct scalar_sinh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_sinh_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sinh; return sinh(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sinh(a); } template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psinh(a); } }; @@ -604,7 +668,7 @@ struct functor_traits<scalar_sinh_op<Scalar> > */ template<typename Scalar> struct scalar_cosh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_cosh_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::cosh; return cosh(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::cosh(a); } template <typename Packet> EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); } }; |