Added support for expm1 in Eigen.

author: Srinivas Vasudevan <srvasude@gmail.com> 2016-12-02 14:13:01 -0800
committer: Srinivas Vasudevan <srvasude@gmail.com> 2016-12-02 14:13:01 -0800
commit: 218764ee1f0a21e1faf20ed314ffafeae79eb170 (patch)
tree: fc8901c4b69b57f889b3a88e94ec162e4c19bd98 /Eigen/src/Core/arch/CUDA
parent: 27873008d431a307bed9c200a12622a361af4d14 (diff)
3 files changed, 26 insertions, 1 deletions
diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h
index 5a400307b..db9878796 100644
--- a/Eigen/src/Core/arch/CUDA/Half.h
+++ b/Eigen/src/Core/arch/CUDA/Half.h
@@ -392,6 +392,9 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
    return half(::expf(float(a)));
 #endif
 }
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) {
+  return half(numext::expm1(float(a)));
+}
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
 #if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
   return half(::hlog(a));
diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h
index 0348b41db..3548f2fa2 100644
--- a/Eigen/src/Core/arch/CUDA/MathFunctions.h
+++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h
@@ -57,6 +57,19 @@ double2 pexp<double2>(const double2& a)
 }
 
 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pexp<float4>(const float4& a)
+{
+  return make_float4(expm1f(a.x), expm1f(a.y), expm1f(a.z), expm1f(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pexp<double2>(const double2& a)
+{
+  using ::expm1;
+  return make_double2(expm1(a.x), expm1(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 float4 psqrt<float4>(const float4& a)
 {
   return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
index ae54225f8..35cb0efd5 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -34,6 +34,7 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
     HasSqrt   = 1,
     HasRsqrt  = 1,
     HasExp    = 1,
+    HasExpm1  = 1,
     HasLog    = 1,
     HasLog1p  = 1
   };
@@ -267,7 +268,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const ha
 #endif
 }
 
-template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
+template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1<half2>(const half2& a) {
   float a1 = __low2float(a);
   float a2 = __high2float(a);
   float r1 = log1pf(a1);
@@ -275,6 +276,14 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
   return __floats2half2_rn(r1, r2);
 }
 
+template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1<half2>(const half2& a) {
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  float r1 = expm1f(a1);
+  float r2 = expm1f(a2);
+  return __floats2half2_rn(r1, r2);
+}
+
 #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
 
 template<>  __device__ EIGEN_STRONG_INLINE
author	Srinivas Vasudevan <srvasude@gmail.com>	2016-12-02 14:13:01 -0800
committer	Srinivas Vasudevan <srvasude@gmail.com>	2016-12-02 14:13:01 -0800
commit	218764ee1f0a21e1faf20ed314ffafeae79eb170 (patch)
tree	fc8901c4b69b57f889b3a88e94ec162e4c19bd98 /Eigen/src/Core/arch/CUDA
parent	27873008d431a307bed9c200a12622a361af4d14 (diff)