From 5707004d6b947c202085c3ead889e277264ea36a Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Mon, 7 Mar 2016 14:08:56 -0800 Subject: Fix Eigen's building of sharded tests that use CUDA & more igamma/igammac bugfixes. 0. Prior to this PR, not a single sharded CUDA test was actually being *run*. Fixed that. GPU tests are still failing for igamma/igammac. 1. Add calls for igamma/igammac to TensorBase 2. Fix up CUDA-specific calls of igamma/igammac 3. Add unit tests for digamma, igamma, igammac in CUDA. --- Eigen/src/Core/GenericPacketMath.h | 4 ++-- Eigen/src/Core/arch/CUDA/MathFunctions.h | 34 ++++++++++++++++++++++++-------- Eigen/src/Core/arch/CUDA/PacketMath.h | 1 - 3 files changed, 28 insertions(+), 11 deletions(-) (limited to 'Eigen/src/Core') diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index ead0253df..802def51d 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -460,11 +460,11 @@ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } /** \internal \returns the incomplete gamma function igamma(\a a, \a x) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); } /** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); } /*************************************************************************** diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index 6e84d3af8..6822700f8 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -116,24 +116,42 @@ double2 perfc(const double2& a) return make_double2(erfc(a.x), erfc(a.y)); } + template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pigamma(const float4& a, const float4& x) { - using numext::pigamma; + using numext::igamma; return make_float4( - pigamma(a.x, x.x), - pigamma(a.y, x.y), - pigamma(a.z, x.z), - pigamma(a.w, x.w)); + igamma(a.x, x.x), + igamma(a.y, x.y), + igamma(a.z, x.z), + igamma(a.w, x.w)); } template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pigammac(const double2& a, const double& x) +double2 pigamma(const double2& a, const double2& x) { - using numext::pigammac; - return make_double2(pigammac(a.x, x.x), pigammac(a.y, x.y)); + using numext::igamma; + return make_double2(igamma(a.x, x.x), igamma(a.y, x.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pigammac(const float4& a, const float4& x) +{ + using numext::igammac; + return make_float4( + igammac(a.x, x.x), + igammac(a.y, x.y), + igammac(a.z, x.z), + igammac(a.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pigammac(const double2& a, const double2& x) +{ + using numext::igammac; + return make_double2(igammac(a.x, x.x), igammac(a.y, x.y)); +} #endif diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index d2563030b..25d964600 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -284,7 +284,6 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs(const double2& a) { return make_double2(fabs(a.x), fabs(a.y)); } - EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { double tmp = kernel.packet[0].y; -- cgit v1.2.3