diff options
author | 2016-02-19 23:01:27 +0100 | |
---|---|---|
committer | 2016-02-19 23:01:27 +0100 | |
commit | d90a2dac5e987130ad01a7ad8cdf97051688fcfe (patch) | |
tree | 7255475e45453c70138cc0230693f622ab6dda31 /Eigen/src/Core | |
parent | 485823b5f516e96e6b387cb4c9f5fbe5d2ce26d6 (diff) | |
parent | 46fc23f91c1c5ea21bab67976773c613bd7e4ab0 (diff) |
merge
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r-- | Eigen/src/Core/SpecialFunctions.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/MathFunctions.h | 42 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/PacketMath.h | 30 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 237 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/TypeCasting.h | 123 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/MathFunctions.h | 33 |
6 files changed, 429 insertions, 44 deletions
diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index 6c6b21f98..6b4598e3e 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -182,10 +182,10 @@ struct digamma_impl_maybe_poly<float> { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE float run(const float s) { const float A[] = { - -4.16666666666666666667E-3, - 3.96825396825396825397E-3, - -8.33333333333333333333E-3, - 8.33333333333333333333E-2 + -4.16666666666666666667E-3f, + 3.96825396825396825397E-3f, + -8.33333333333333333333E-3f, + 8.33333333333333333333E-2f }; float z; diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h index a24bf6e26..98d8e029f 100644 --- a/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -267,31 +267,34 @@ pexp<Packet8f>(const Packet8f& _x) { // Hyperbolic Tangent function. // Doesn't do anything fancy, just a 13/6-degree rational interpolant which -// is accurate up to a couple of ulp in the range [-8, 8], outside of which the +// is accurate up to a couple of ulp in the range [-9, 9], outside of which the // fl(tanh(x)) = +/-1. template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f ptanh<Packet8f>(const Packet8f& _x) { - // Map the range [-8, 8] to [-1, 1], we will clamp bad coefficients later. - const Packet8f x = _mm256_mul_ps(_x, _mm256_set1_ps(0.125f)); + // Clamp the inputs to the range [-9, 9] since anything outside + // this range is +/-1.0f in single-precision. + _EIGEN_DECLARE_CONST_Packet8f(plus_9, 9.0f); + _EIGEN_DECLARE_CONST_Packet8f(minus_9, -9.0f); + const Packet8f x = pmax(p8f_minus_9, pmin(p8f_plus_9, _x)); // The monomial coefficients of the numerator polynomial (odd). - _EIGEN_DECLARE_CONST_Packet8f(alpha_1, -2.47030171958948e-03f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_3, -2.06804010015822e-02f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_5, -3.13693994587418e-02f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_7, -7.19851201683627e-03f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_9, 8.31561269687160e-04f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_11, -1.37626659546502e-04f); - _EIGEN_DECLARE_CONST_Packet8f(alpha_13, 1.39116714700458e-05f); + _EIGEN_DECLARE_CONST_Packet8f(alpha_1, 4.89352455891786e-03f); + _EIGEN_DECLARE_CONST_Packet8f(alpha_3, 6.37261928875436e-04f); + _EIGEN_DECLARE_CONST_Packet8f(alpha_5, 1.48572235717979e-05f); + _EIGEN_DECLARE_CONST_Packet8f(alpha_7, 5.12229709037114e-08f); + _EIGEN_DECLARE_CONST_Packet8f(alpha_9, -8.60467152213735e-11f); + _EIGEN_DECLARE_CONST_Packet8f(alpha_11, 2.00018790482477e-13f); + _EIGEN_DECLARE_CONST_Packet8f(alpha_13, -2.76076847742355e-16f); // The monomial coefficients of the denominator polynomial (even). - _EIGEN_DECLARE_CONST_Packet8f(beta_0, -3.08787724141615e-04f); - _EIGEN_DECLARE_CONST_Packet8f(beta_2, -9.17251911622436e-03f); - _EIGEN_DECLARE_CONST_Packet8f(beta_4, -3.09625062090444e-02f); - _EIGEN_DECLARE_CONST_Packet8f(beta_6, -2.05669680763032e-02f); + _EIGEN_DECLARE_CONST_Packet8f(beta_0, 4.89352518554385e-03f); + _EIGEN_DECLARE_CONST_Packet8f(beta_2, 2.26843463243900e-03f); + _EIGEN_DECLARE_CONST_Packet8f(beta_4, 1.18534705686654e-04f); + _EIGEN_DECLARE_CONST_Packet8f(beta_6, 1.19825839466702e-06f); // Since the polynomials are odd/even, we need x^2. - const Packet8f x2 = _mm256_mul_ps(x, x); + const Packet8f x2 = pmul(x, x); // Evaluate the numerator polynomial p. Packet8f p = pmadd(x2, p8f_alpha_13, p8f_alpha_11); @@ -308,14 +311,7 @@ ptanh<Packet8f>(const Packet8f& _x) { q = pmadd(x2, q, p8f_beta_0); // Divide the numerator by the denominator. - const Packet8f res = pdiv(p, q); - - // Mask-out values outside of [-8, 8]. - _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f); - _EIGEN_DECLARE_CONST_Packet8f(minus_one, -1.0f); - return _mm256_blendv_ps( - _mm256_blendv_ps(res, p8f_one, _mm256_cmp_ps(x, p8f_one, _CMP_GT_OQ)), - p8f_minus_one, _mm256_cmp_ps(x, p8f_minus_one, _CMP_LT_OQ)); + return pdiv(p, q); } template <> diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index d3d9f910e..a32b41e18 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -21,7 +21,6 @@ namespace internal { template<> struct is_arithmetic<float4> { enum { value = true }; }; template<> struct is_arithmetic<double2> { enum { value = true }; }; - template<> struct packet_traits<float> : default_packet_traits { typedef float4 type; @@ -273,6 +272,35 @@ template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) return a.x * a.y; } +template<size_t offset> +struct protate_impl<offset, float4> +{ + static float4 run(const float4& a) { + if (offset == 0) { + return make_float4(a.x, a.y, a.z, a.w); + } + if (offset == 1) { + return make_float4(a.w, a.x, a.y, a.z); + } + if (offset == 2) { + return make_float4(a.z, a.w, a.x, a.y); + } + return make_float4(a.y, a.z, a.w, a.x); + } +}; + +template<size_t offset> +struct protate_impl<offset, double2> +{ + static double2 run(const double2& a) { + if (offset == 0) { + return make_double2(a.x, a.y); + } + return make_double2(a.y, a.x); + } +}; + + template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) { return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w)); } diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h new file mode 100644 index 000000000..d0106f4f1 --- /dev/null +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -0,0 +1,237 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PACKET_MATH_HALF_CUDA_H +#define EIGEN_PACKET_MATH_HALF_CUDA_H + +namespace Eigen { + +namespace internal { + +#if defined(EIGEN_HAS_CUDA_FP16) + +// Make sure this is only available when targeting a GPU: we don't want to +// introduce conflicts between these packet_traits definitions and the ones +// we'll use on the host side (SSE, AVX, ...) +#if defined(__CUDACC__) && defined(EIGEN_USE_GPU) + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 + +__device__ half operator + (const half& a, const half& b) { + return __hadd(a, b); +} +__device__ half operator * (const half& a, const half& b) { + return __hmul(a, b); +} +__device__ half operator - (const half& a, const half& b) { + return __hsub(a, b); +} +__device__ half operator / (const half& a, const half& b) { + float num = __half2float(a); + float denom = __half2float(b); + return __float2half(num / denom); +} +__device__ half operator - (const half& a) { + return __hneg(a); +} +__device__ half operator += (half& a, const half& b) { + a = __hadd(a, b); + return a; +} +__device__ half operator *= (half& a, const half& b) { + a = __hmul(a, b); + return a; +} +__device__ half operator -= (half& a, const half& b) { + a = __hsub(a, b); + return a; +} +__device__ half operator /= (half& a, const half& b) { + a = a / b; + return a; +} + + +template<> struct is_arithmetic<half2> { enum { value = true }; }; + +template<> struct packet_traits<half> : default_packet_traits +{ + typedef half2 type; + typedef half2 half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size=2, + HasHalfPacket = 0, + + HasDiv = 1, + HasLog = 1, + HasExp = 1, + HasSqrt = 1, + HasRsqrt = 1, + HasLGamma = 1, + HasDiGamma = 1, + HasErf = 1, + HasErfc = 1, + + HasBlend = 0, + }; +}; + + +template<> struct unpacket_traits<half2> { typedef half type; enum {size=2, alignment=Aligned16}; typedef half2 half; }; + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const half& from) { + return __half2half2(from); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const half& a) { + return __halves2half2(a, __hadd(a, __float2half(1))); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) { + return __hadd2(a, b); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) { + return __hsub2(a, b); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) { + return __hneg2(a); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; } + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) { + return __hmul2(a, b); +} + + template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) { + return __hfma2(a, b, c); + } + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float b1 = __low2float(b); + float b2 = __high2float(b); + float r1 = a1 / b1; + float r2 = a2 / b2; + return __floats2half2_rn(r1, r2); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float b1 = __low2float(b); + float b2 = __high2float(b); + half r1 = a1 < b1 ? __low2half(a) : __low2half(b); + half r2 = a2 < b2 ? __high2half(a) : __high2half(b); + return __halves2half2(r1, r2); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float b1 = __low2float(b); + float b2 = __high2float(b); + half r1 = a1 > b1 ? __low2half(a) : __low2half(b); + half r2 = a2 > b2 ? __high2half(a) : __high2half(b); + return __halves2half2(r1, r2); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload<half2>(const half* from) { + return *reinterpret_cast<const half2*>(from); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu<half2>(const half* from) { + return __halves2half2(from[0], from[1]); +} + +template<> EIGEN_STRONG_INLINE half2 ploaddup<half2>(const half* from) { + return __halves2half2(from[0], from[0]); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<half>(half* to, const half2& from) { + *reinterpret_cast<half2*>(to) = from; +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<half>(half* to, const half2& from) { + to[0] = __low2half(from); + to[1] = __high2half(from); +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const half* from) { + return __ldg((const half2*)from); +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const half* from) { + return __halves2half2(__ldg(from+0), __ldg(from+1)); +} + +template<> EIGEN_DEVICE_FUNC inline half2 pgather<half, half2>(const half* from, Index stride) { + return __halves2half2(from[0*stride], from[1*stride]); +} + +template<> EIGEN_DEVICE_FUNC inline void pscatter<half, half2>(half* to, const half2& from, Index stride) { + to[stride*0] = __low2half(from); + to[stride*1] = __high2half(from); +} + +template<> EIGEN_DEVICE_FUNC inline half pfirst<half2>(const half2& a) { + return __low2half(a); +} + +template<> EIGEN_DEVICE_FUNC inline half predux<half2>(const half2& a) { + return __hadd(__low2half(a), __high2half(a)); +} + +template<> EIGEN_DEVICE_FUNC inline half predux_max<half2>(const half2& a) { + half first = __low2half(a); + half second = __high2half(a); + return __hgt(first, second) ? first : second; +} + +template<> EIGEN_DEVICE_FUNC inline half predux_min<half2>(const half2& a) { + half first = __low2half(a); + half second = __high2half(a); + return __hlt(first, second) ? first : second; +} + +template<> EIGEN_DEVICE_FUNC inline half predux_mul<half2>(const half2& a) { + return __hmul(__low2half(a), __high2half(a)); +} + +template<> EIGEN_DEVICE_FUNC inline half2 pabs<half2>(const half2& a) { + assert(false && "tbd"); + return half2(); +} + + +EIGEN_DEVICE_FUNC inline void +ptranspose(PacketBlock<half2,2>& kernel) { + assert(false && "tbd"); + // half tmp = kernel.packet[0].y; + // kernel.packet[0].y = kernel.packet[1].x; + // kernel.packet[1].x = tmp; +} + +#endif +#endif +#endif + +} // end namespace internal + +} // end namespace Eigen + + +#endif // EIGEN_PACKET_MATH_HALF_CUDA_H diff --git a/Eigen/src/Core/arch/CUDA/TypeCasting.h b/Eigen/src/Core/arch/CUDA/TypeCasting.h new file mode 100644 index 000000000..2742a4e7b --- /dev/null +++ b/Eigen/src/Core/arch/CUDA/TypeCasting.h @@ -0,0 +1,123 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TYPE_CASTING_CUDA_H +#define EIGEN_TYPE_CASTING_CUDA_H + +namespace Eigen { + +namespace internal { + +#if defined(EIGEN_HAS_CUDA_FP16) + +template<> +struct scalar_cast_op<float, half> { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) + typedef half result_type; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const float& a) const { + #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 + return __float2half(a); + #else + assert(false && "tbd"); + return half(); + #endif + } +}; + +template<> +struct functor_traits<scalar_cast_op<float, half> > +{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; }; + + +template<> +struct scalar_cast_op<int, half> { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) + typedef half result_type; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const int& a) const { + #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 + return __float2half(static_cast<float>(a)); + #else + assert(false && "tbd"); + return half(); + #endif + } +}; + +template<> +struct functor_traits<scalar_cast_op<int, half> > +{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; }; + + +template<> +struct scalar_cast_op<half, float> { + EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) + typedef float result_type; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const half& a) const { + #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 + return __half2float(a); + #else + assert(false && "tbd"); + return 0.0f; + #endif + } +}; + +template<> +struct functor_traits<scalar_cast_op<half, float> > +{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; }; + + + + +template <> +struct type_casting_traits<half, float> { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 2, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 + float2 r1 = __half22float2(a); + float2 r2 = __half22float2(b); + return make_float4(r1.x, r1.y, r2.x, r2.y); +#else + assert(false && "tbd"); + return float4(); +#endif +} + +template <> +struct type_casting_traits<float, half> { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 2 + }; +}; + +template<> EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) { + // Simply discard the second half of the input +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 + return __float22half2_rn(make_float2(a.x, a.y)); +#else + assert(false && "tbd"); + return half2(); +#endif +} + +#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TYPE_CASTING_CUDA_H diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index a7a0d906f..28f103eeb 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -518,30 +518,31 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) { // Hyperbolic Tangent function. // Doesn't do anything fancy, just a 13/6-degree rational interpolant which -// is accurate up to a couple of ulp in the range [-8, 8], outside of which the +// is accurate up to a couple of ulp in the range [-9, 9], outside of which the // fl(tanh(x)) = +/-1. template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f ptanh<Packet4f>(const Packet4f& _x) { - // Map the range [-8, 8] to [-1, 1], we will clamp bad coefficients later. - const Packet4f x = - pmax(pset1<Packet4f>(-1.0f), - pmin(pset1<Packet4f>(1.0f), pmul(_x, pset1<Packet4f>(0.125f)))); + // Clamp the inputs to the range [-9, 9] since anything outside + // this range is +/-1.0f in single-precision. + _EIGEN_DECLARE_CONST_Packet4f(plus_9, 9.0f); + _EIGEN_DECLARE_CONST_Packet4f(minus_9, -9.0f); + const Packet4f x = pmax(p4f_minus_9, pmin(p4f_plus_9, _x)); // The monomial coefficients of the numerator polynomial (odd). - _EIGEN_DECLARE_CONST_Packet4f(alpha_1, -2.47030171958948e-03f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_3, -2.06804010015822e-02f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_5, -3.13693994587418e-02f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_7, -7.19851201683627e-03f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_9, 8.31561269687160e-04f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_11, -1.37626659546502e-04f); - _EIGEN_DECLARE_CONST_Packet4f(alpha_13, 1.39116714700458e-05f); + _EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-03f); + _EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-04f); + _EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-05f); + _EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-08f); + _EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f); + _EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f); + _EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f); // The monomial coefficients of the denominator polynomial (even). - _EIGEN_DECLARE_CONST_Packet4f(beta_0, -3.08787724141615e-04f); - _EIGEN_DECLARE_CONST_Packet4f(beta_2, -9.17251911622436e-03f); - _EIGEN_DECLARE_CONST_Packet4f(beta_4, -3.09625062090444e-02f); - _EIGEN_DECLARE_CONST_Packet4f(beta_6, -2.05669680763032e-02f); + _EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-03f); + _EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-03f); + _EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-04f); + _EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-06f); // Since the polynomials are odd/even, we need x^2. const Packet4f x2 = pmul(x, x); |