diff options
author | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-09-19 12:48:30 -0700 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-09-19 12:48:30 -0700 |
commit | 6de5ed08d88239080b9381f60f75e5abea731d75 (patch) | |
tree | af9fcc1e95e23d7faece933d8c7a16a9778147a7 /unsupported/Eigen/src | |
parent | e02d42963750531490a69fc87926b60f32180456 (diff) |
Add generic PacketMath implementation of the Error Function (erf).
Diffstat (limited to 'unsupported/Eigen/src')
-rw-r--r-- | unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h | 40 | ||||
-rw-r--r-- | unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h | 5 |
2 files changed, 27 insertions, 18 deletions
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h index a4287c31f..7447f4bfa 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h @@ -238,25 +238,39 @@ struct functor_traits<scalar_polygamma_op<Scalar> > }; /** \internal - * \brief Template functor to compute the Gauss error function of a - * scalar - * \sa class CwiseUnaryOp, Cwise::erf() + * \brief Template functor to compute the error function of a scalar + * \sa class CwiseUnaryOp, ArrayBase::erf() */ template<typename Scalar> struct scalar_erf_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { - using numext::erf; return erf(a); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar + operator()(const Scalar& a) const { + return numext::erf(a); + } + template <typename Packet> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return perf(x); } - typedef typename packet_traits<Scalar>::type Packet; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::perf(a); } }; -template<typename Scalar> -struct functor_traits<scalar_erf_op<Scalar> > -{ +template <typename Scalar> +struct functor_traits<scalar_erf_op<Scalar> > { enum { - // Guesstimate - Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, - PacketAccess = packet_traits<Scalar>::HasErf + PacketAccess = packet_traits<Scalar>::HasErf, + Cost = + (PacketAccess +#ifdef EIGEN_VECTORIZE_FMA + // Haswell can issue 2 add/mul/madd per cycle. + // 10 pmadd, 2 pmul, 1 div, 2 other + ? (2 * NumTraits<Scalar>::AddCost + + 7 * NumTraits<Scalar>::MulCost + + scalar_div_cost<Scalar, packet_traits<Scalar>::HasDiv>::value) +#else + ? (12 * NumTraits<Scalar>::AddCost + + 12 * NumTraits<Scalar>::MulCost + + scalar_div_cost<Scalar, packet_traits<Scalar>::HasDiv>::value) +#endif + // Assume for simplicity that this is as expensive as an exp(). + : (functor_traits<scalar_exp_op<Scalar> >::Cost)) }; }; diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h index 577015690..77fdb031a 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h @@ -30,10 +30,6 @@ Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); } -/** \internal \returns the erf(\a a) (coeff-wise) */ -template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet perf(const Packet& a) { using numext::erf; return erf(a); } - /** \internal \returns the erfc(\a a) (coeff-wise) */ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } @@ -77,4 +73,3 @@ Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext } // end namespace Eigen #endif // EIGEN_SPECIALFUNCTIONS_PACKETMATH_H - |