From 218764ee1f0a21e1faf20ed314ffafeae79eb170 Mon Sep 17 00:00:00 2001 From: Srinivas Vasudevan Date: Fri, 2 Dec 2016 14:13:01 -0800 Subject: Added support for expm1 in Eigen. --- Eigen/src/Core/arch/CUDA/Half.h | 3 +++ Eigen/src/Core/arch/CUDA/MathFunctions.h | 13 +++++++++++++ Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 11 ++++++++++- 3 files changed, 26 insertions(+), 1 deletion(-) (limited to 'Eigen/src/Core/arch/CUDA') diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 5a400307b..db9878796 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -392,6 +392,9 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { return half(::expf(float(a))); #endif } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) { + return half(numext::expm1(float(a))); +} EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { #if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return half(::hlog(a)); diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index 0348b41db..3548f2fa2 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -56,6 +56,19 @@ double2 pexp(const double2& a) return make_double2(exp(a.x), exp(a.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pexp(const float4& a) +{ + return make_float4(expm1f(a.x), expm1f(a.y), expm1f(a.z), expm1f(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pexp(const double2& a) +{ + using ::expm1; + return make_double2(expm1(a.x), expm1(a.y)); +} + template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psqrt(const float4& a) { diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index ae54225f8..35cb0efd5 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -34,6 +34,7 @@ template<> struct packet_traits : default_packet_traits HasSqrt = 1, HasRsqrt = 1, HasExp = 1, + HasExpm1 = 1, HasLog = 1, HasLog1p = 1 }; @@ -267,7 +268,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul(const ha #endif } -template<> __device__ EIGEN_STRONG_INLINE half2 plog1p(const half2& a) { +template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1(const half2& a) { float a1 = __low2float(a); float a2 = __high2float(a); float r1 = log1pf(a1); @@ -275,6 +276,14 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p(const half2& a) { return __floats2half2_rn(r1, r2); } +template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1(const half2& a) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float r1 = expm1f(a1); + float r2 = expm1f(a2); + return __floats2half2_rn(r1, r2); +} + #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 template<> __device__ EIGEN_STRONG_INLINE -- cgit v1.2.3