diff options
author | 2016-03-07 14:08:56 -0800 | |
---|---|---|
committer | 2016-03-07 14:08:56 -0800 | |
commit | 5707004d6b947c202085c3ead889e277264ea36a (patch) | |
tree | e6500972547d089adb9735a119c751e8cfe5aade /Eigen/src/Core | |
parent | 0b9e0abc96d5c0367ee6c443f71754637b0db7e4 (diff) |
Fix Eigen's building of sharded tests that use CUDA & more igamma/igammac bugfixes.
0. Prior to this PR, not a single sharded CUDA test was actually being *run*.
Fixed that.
GPU tests are still failing for igamma/igammac.
1. Add calls for igamma/igammac to TensorBase
2. Fix up CUDA-specific calls of igamma/igammac
3. Add unit tests for digamma, igamma, igammac in CUDA.
Diffstat (limited to 'Eigen/src/Core')
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/MathFunctions.h | 34 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/PacketMath.h | 1 |
3 files changed, 28 insertions, 11 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index ead0253df..802def51d 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -460,11 +460,11 @@ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } /** \internal \returns the incomplete gamma function igamma(\a a, \a x) */ -template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); } /** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */ -template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); } /*************************************************************************** diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index 6e84d3af8..6822700f8 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -116,24 +116,42 @@ double2 perfc<double2>(const double2& a) return make_double2(erfc(a.x), erfc(a.y)); } + template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pigamma<float4>(const float4& a, const float4& x) { - using numext::pigamma; + using numext::igamma; return make_float4( - pigamma(a.x, x.x), - pigamma(a.y, x.y), - pigamma(a.z, x.z), - pigamma(a.w, x.w)); + igamma(a.x, x.x), + igamma(a.y, x.y), + igamma(a.z, x.z), + igamma(a.w, x.w)); } template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pigammac<double2>(const double2& a, const double& x) +double2 pigamma<double2>(const double2& a, const double2& x) { - using numext::pigammac; - return make_double2(pigammac(a.x, x.x), pigammac(a.y, x.y)); + using numext::igamma; + return make_double2(igamma(a.x, x.x), igamma(a.y, x.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pigammac<float4>(const float4& a, const float4& x) +{ + using numext::igammac; + return make_float4( + igammac(a.x, x.x), + igammac(a.y, x.y), + igammac(a.z, x.z), + igammac(a.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pigammac<double2>(const double2& a, const double2& x) +{ + using numext::igammac; + return make_double2(igammac(a.x, x.x), igammac(a.y, x.y)); +} #endif diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index d2563030b..25d964600 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -284,7 +284,6 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) { return make_double2(fabs(a.x), fabs(a.y)); } - EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<float4,4>& kernel) { double tmp = kernel.packet[0].y; |