diff options
author | Srinivas Vasudevan <srvasude@gmail.com> | 2016-12-02 14:13:01 -0800 |
---|---|---|
committer | Srinivas Vasudevan <srvasude@gmail.com> | 2016-12-02 14:13:01 -0800 |
commit | 218764ee1f0a21e1faf20ed314ffafeae79eb170 (patch) | |
tree | fc8901c4b69b57f889b3a88e94ec162e4c19bd98 /Eigen/src/Core/arch/CUDA | |
parent | 27873008d431a307bed9c200a12622a361af4d14 (diff) |
Added support for expm1 in Eigen.
Diffstat (limited to 'Eigen/src/Core/arch/CUDA')
-rw-r--r-- | Eigen/src/Core/arch/CUDA/Half.h | 3 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/MathFunctions.h | 13 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 11 |
3 files changed, 26 insertions, 1 deletions
diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 5a400307b..db9878796 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -392,6 +392,9 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { return half(::expf(float(a))); #endif } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) { + return half(numext::expm1(float(a))); +} EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { #if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return half(::hlog(a)); diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index 0348b41db..3548f2fa2 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -57,6 +57,19 @@ double2 pexp<double2>(const double2& a) } template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pexp<float4>(const float4& a) +{ + return make_float4(expm1f(a.x), expm1f(a.y), expm1f(a.z), expm1f(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pexp<double2>(const double2& a) +{ + using ::expm1; + return make_double2(expm1(a.x), expm1(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psqrt<float4>(const float4& a) { return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w)); diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index ae54225f8..35cb0efd5 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -34,6 +34,7 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits HasSqrt = 1, HasRsqrt = 1, HasExp = 1, + HasExpm1 = 1, HasLog = 1, HasLog1p = 1 }; @@ -267,7 +268,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const ha #endif } -template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1<half2>(const half2& a) { float a1 = __low2float(a); float a2 = __high2float(a); float r1 = log1pf(a1); @@ -275,6 +276,14 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) { return __floats2half2_rn(r1, r2); } +template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1<half2>(const half2& a) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float r1 = expm1f(a1); + float r2 = expm1f(a2); + return __floats2half2_rn(r1, r2); +} + #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 template<> __device__ EIGEN_STRONG_INLINE |