diff options
author | Eugene Brevdo <ebrevdo@gmail.com> | 2016-03-08 17:28:21 -0800 |
---|---|---|
committer | Eugene Brevdo <ebrevdo@gmail.com> | 2016-03-08 17:28:21 -0800 |
commit | 73220d2bb044e5f01b3eee4838c5b32ca679f027 (patch) | |
tree | 37bcb2485d9fece4991c9a2e07dd6524a7e2bab1 | |
parent | 5f17de33933cdfa33af22621863864bcd3f73e42 (diff) |
Resolve bad merge.
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 10 | ||||
-rw-r--r-- | Eigen/src/Core/GlobalFunctions.h | 30 | ||||
-rw-r--r-- | Eigen/src/Core/NumTraits.h | 5 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/MathFunctions.h | 36 | ||||
-rw-r--r-- | Eigen/src/Core/functors/BinaryFunctors.h | 49 | ||||
-rw-r--r-- | Eigen/src/Core/util/ForwardDeclarations.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/util/Meta.h | 5 | ||||
-rw-r--r-- | cmake/EigenTesting.cmake | 7 | ||||
-rw-r--r-- | test/array.cpp | 51 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_cuda.cu | 201 |
10 files changed, 393 insertions, 3 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 02882bdea..802def51d 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -78,6 +78,8 @@ struct default_packet_traits HasDiGamma = 0, HasErf = 0, HasErfc = 0, + HasIGamma = 0, + HasIGammac = 0, HasRound = 0, HasFloor = 0, @@ -457,6 +459,14 @@ Packet perf(const Packet& a) { using numext::erf; return erf(a); } template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } +/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */ +template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); } + +/** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */ +template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); } + /*************************************************************************** * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 396da8e71..7df0fdda9 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -129,6 +129,36 @@ namespace Eigen ); } + /** \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays. + * + * This function computes the coefficient-wise incomplete gamma function. + * + */ + template<typename Derived,typename ExponentDerived> + inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived> + igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x) + { + return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>( + a.derived(), + x.derived() + ); + } + + /** \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays. + * + * This function computes the coefficient-wise complementary incomplete gamma function. + * + */ + template<typename Derived,typename ExponentDerived> + inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived> + igammac(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x) + { + return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>( + a.derived(), + x.derived() + ); + } + namespace internal { EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op) diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index 6a596bb7d..7ddb4a867 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -95,6 +95,11 @@ template<typename T> struct GenericNumTraits static inline T infinity() { return numext::numeric_limits<T>::infinity(); } + + EIGEN_DEVICE_FUNC + static inline T quiet_NaN() { + return numext::numeric_limits<T>::quiet_NaN(); + } }; template<typename T> struct NumTraits : GenericNumTraits<T> diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index a2c06a817..6822700f8 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -117,6 +117,42 @@ double2 perfc<double2>(const double2& a) } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pigamma<float4>(const float4& a, const float4& x) +{ + using numext::igamma; + return make_float4( + igamma(a.x, x.x), + igamma(a.y, x.y), + igamma(a.z, x.z), + igamma(a.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pigamma<double2>(const double2& a, const double2& x) +{ + using numext::igamma; + return make_double2(igamma(a.x, x.x), igamma(a.y, x.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pigammac<float4>(const float4& a, const float4& x) +{ + using numext::igammac; + return make_float4( + igammac(a.x, x.x), + igammac(a.y, x.y), + igammac(a.z, x.z), + igammac(a.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pigammac<double2>(const double2& a, const double2& x) +{ + using numext::igammac; + return make_double2(igammac(a.x, x.x), igammac(a.y, x.y)); +} + #endif } // end namespace internal diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 4962d625c..5cdfff845 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -337,6 +337,55 @@ template<> struct functor_traits<scalar_boolean_or_op> { }; }; +/** \internal + * \brief Template functor to compute the incomplete gamma function igamma(a, x) + * + * \sa class CwiseBinaryOp, Cwise::igamma + */ +template<typename Scalar> struct scalar_igamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { + using numext::igamma; return igamma(a, x); + } + template<typename Packet> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const { + return internal::pigammac(a, x); + } +}; +template<typename Scalar> +struct functor_traits<scalar_igamma_op<Scalar> > { + enum { + // Guesstimate + Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasIGamma + }; +}; + + +/** \internal + * \brief Template functor to compute the complementary incomplete gamma function igammac(a, x) + * + * \sa class CwiseBinaryOp, Cwise::igammac + */ +template<typename Scalar> struct scalar_igammac_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { + using numext::igammac; return igammac(a, x); + } + template<typename Packet> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const + { + return internal::pigammac(a, x); + } +}; +template<typename Scalar> +struct functor_traits<scalar_igammac_op<Scalar> > { + enum { + // Guesstimate + Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasIGammac + }; +}; //---------- binary functors bound to a constant, thus appearing as a unary functor ---------- diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index f09632375..a102e5457 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -206,6 +206,8 @@ template<typename Scalar> struct scalar_add_op; template<typename Scalar> struct scalar_constant_op; template<typename Scalar> struct scalar_identity_op; template<typename Scalar,bool iscpx> struct scalar_sign_op; +template<typename Scalar> struct scalar_igamma_op; +template<typename Scalar> struct scalar_igammac_op; template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op; template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op; diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 6b35179f2..24e8a6d8a 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -148,6 +148,7 @@ template<typename T> struct numeric_limits static T (max)() { assert(false && "Highest not supported for this type"); } static T (min)() { assert(false && "Lowest not supported for this type"); } static T infinity() { assert(false && "Infinity not supported for this type"); } + static T quiet_NaN() { assert(false && "quiet_NaN not supported for this type"); } }; template<> struct numeric_limits<float> { @@ -159,6 +160,8 @@ template<> struct numeric_limits<float> static float (min)() { return FLT_MIN; } EIGEN_DEVICE_FUNC static float infinity() { return CUDART_INF_F; } + EIGEN_DEVICE_FUNC + static float quiet_NaN() { return CUDART_NAN_F; } }; template<> struct numeric_limits<double> { @@ -170,6 +173,8 @@ template<> struct numeric_limits<double> static double (min)() { return DBL_MIN; } EIGEN_DEVICE_FUNC static double infinity() { return CUDART_INF; } + EIGEN_DEVICE_FUNC + static double quiet_NaN() { return CUDART_NAN; } }; template<> struct numeric_limits<int> { diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index 5022397a7..5ca800cfe 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -19,12 +19,15 @@ macro(ei_add_test_internal testname testname_with_suffix) endif() if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu) - cuda_add_executable(${targetname} ${filename}) + if (${ARGC} GREATER 2) + cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2}) + else() + cuda_add_executable(${targetname} ${filename}) + endif() else() add_executable(${targetname} ${filename}) endif() - if (targetname MATCHES "^eigen2_") add_dependencies(eigen2_buildtests ${targetname}) else() diff --git a/test/array.cpp b/test/array.cpp index 96aef31c7..c61bfc8ed 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -295,7 +295,6 @@ template<typename ArrayType> void array_real(const ArrayType& m) VERIFY_IS_APPROX(Eigen::pow(m1,2*exponents), m1.square().square()); VERIFY_IS_APPROX(m1.pow(2*exponents), m1.square().square()); VERIFY_IS_APPROX(pow(m1(0,0), exponents), ArrayType::Constant(rows,cols,m1(0,0)*m1(0,0))); - VERIFY_IS_APPROX(m3.pow(RealScalar(0.5)), m3.sqrt()); VERIFY_IS_APPROX(pow(m3,RealScalar(0.5)), m3.sqrt()); @@ -305,6 +304,14 @@ template<typename ArrayType> void array_real(const ArrayType& m) VERIFY_IS_APPROX(log10(m3), log(m3)/log(10)); + // Smoke test to check any compilation issues + ArrayType m1_abs_p1 = m1.abs() + 1; + ArrayType m2_abs_p1 = m2.abs() + 1; + VERIFY_IS_APPROX(Eigen::igamma(m1_abs_p1, m2_abs_p1), Eigen::igamma(m1_abs_p1, m2_abs_p1)); + VERIFY_IS_APPROX(Eigen::igammac(m1_abs_p1, m2_abs_p1), Eigen::igammac(m1_abs_p1, m2_abs_p1)); + VERIFY_IS_APPROX(Eigen::igamma(m2_abs_p1, m1_abs_p1), Eigen::igamma(m2_abs_p1, m1_abs_p1)); + VERIFY_IS_APPROX(Eigen::igammac(m2_abs_p1, m1_abs_p1), Eigen::igammac(m2_abs_p1, m1_abs_p1)); + // scalar by array division const RealScalar tiny = sqrt(std::numeric_limits<RealScalar>::epsilon()); s1 += Scalar(tiny); @@ -323,6 +330,48 @@ template<typename ArrayType> void array_real(const ArrayType& m) std::numeric_limits<RealScalar>::infinity()); VERIFY_IS_EQUAL(numext::digamma(Scalar(-1)), std::numeric_limits<RealScalar>::infinity()); + + Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + + // location i*6+j corresponds to a_s[i], x_s[j]. + Scalar nan = std::numeric_limits<Scalar>::quiet_NaN(); + Scalar igamma_s[][6] = {{0.0, nan, nan, nan, nan, nan}, + {0.0, 0.6321205588285578, 0.7768698398515702, + 0.9816843611112658, 9.999500016666262e-05, 1.0}, + {0.0, 0.4275932955291202, 0.608374823728911, + 0.9539882943107686, 7.522076445089201e-07, 1.0}, + {0.0, 0.01898815687615381, 0.06564245437845008, + 0.5665298796332909, 4.166333347221828e-18, 1.0}, + {0.0, 0.9999780593618628, 0.9999899967080838, + 0.9999996219837988, 0.9991370418689945, 1.0}, + {0.0, 0.0, 0.0, 0.0, 0.0, 0.5042041932513908}}; + Scalar igammac_s[][6] = {{nan, nan, nan, nan, nan, nan}, + {1.0, 0.36787944117144233, 0.22313016014842982, + 0.018315638888734182, 0.9999000049998333, 0.0}, + {1.0, 0.5724067044708798, 0.3916251762710878, + 0.04601170568923136, 0.9999992477923555, 0.0}, + {1.0, 0.9810118431238462, 0.9343575456215499, + 0.4334701203667089, 1.0, 0.0}, + {1.0, 2.1940638138146658e-05, 1.0003291916285e-05, + 3.7801620118431334e-07, 0.0008629581310054535, + 0.0}, + {1.0, 1.0, 1.0, 1.0, 1.0, 0.49579580674813944}}; + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + if ((std::isnan)(igamma_s[i][j])) { + VERIFY((std::isnan)(numext::igamma(a_s[i], x_s[j]))); + } else { + VERIFY_IS_APPROX(numext::igamma(a_s[i], x_s[j]), igamma_s[i][j]); + } + + if ((std::isnan)(igammac_s[i][j])) { + VERIFY((std::isnan)(numext::igammac(a_s[i], x_s[j]))); + } else { + VERIFY_IS_APPROX(numext::igammac(a_s[i], x_s[j]), igammac_s[i][j]); + } + } + } } #endif // EIGEN_HAS_C99_MATH diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 58da21d3b..1964d9e07 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -575,6 +575,191 @@ void test_cuda_lgamma(const Scalar stddev) } template <typename Scalar> +void test_cuda_digamma() +{ + Tensor<Scalar, 1> in(7); + Tensor<Scalar, 1> out(7); + Tensor<Scalar, 1> expected_out(7); + out.setZero(); + + in(0) = Scalar(1); + in(1) = Scalar(1.5); + in(2) = Scalar(4); + in(3) = Scalar(-10.5); + in(4) = Scalar(10000.5); + in(5) = Scalar(0); + in(6) = Scalar(-1); + + expected_out(0) = Scalar(-0.5772156649015329); + expected_out(1) = Scalar(0.03648997397857645); + expected_out(2) = Scalar(1.2561176684318); + expected_out(3) = Scalar(2.398239129535781); + expected_out(4) = Scalar(9.210340372392849); + expected_out(5) = std::numeric_limits<Scalar>::infinity(); + expected_out(6) = std::numeric_limits<Scalar>::infinity(); + + std::size_t bytes = in.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 7); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 7); + + gpu_out.device(gpu_device) = gpu_in.digamma(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 5; ++i) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + for (int i = 5; i < 7; ++i) { + VERIFY_IS_EQUAL(out(i), expected_out(i)); + } +} + +template <typename Scalar> +void test_cuda_igamma() +{ + Tensor<Scalar, 2> a(6, 6); + Tensor<Scalar, 2> x(6, 6); + Tensor<Scalar, 2> out(6, 6); + out.setZero(); + + Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + a(i, j) = a_s[i]; + x(i, j) = x_s[j]; + } + } + + Scalar nan = std::numeric_limits<Scalar>::quiet_NaN(); + Scalar igamma_s[][6] = {{0.0, nan, nan, nan, nan, nan}, + {0.0, 0.6321205588285578, 0.7768698398515702, + 0.9816843611112658, 9.999500016666262e-05, 1.0}, + {0.0, 0.4275932955291202, 0.608374823728911, + 0.9539882943107686, 7.522076445089201e-07, 1.0}, + {0.0, 0.01898815687615381, 0.06564245437845008, + 0.5665298796332909, 4.166333347221828e-18, 1.0}, + {0.0, 0.9999780593618628, 0.9999899967080838, + 0.9999996219837988, 0.9991370418689945, 1.0}, + {0.0, 0.0, 0.0, 0.0, 0.0, 0.5042041932513908}}; + + + + std::size_t bytes = a.size() * sizeof(Scalar); + + Scalar* d_a; + Scalar* d_x; + Scalar* d_out; + cudaMalloc((void**)(&d_a), bytes); + cudaMalloc((void**)(&d_x), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_a(d_a, 6, 6); + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_x(d_x, 6, 6); + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 6, 6); + + gpu_out.device(gpu_device) = gpu_a.igamma(gpu_x); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + if ((std::isnan)(igamma_s[i][j])) { + VERIFY((std::isnan)(out(i, j))); + } else { + VERIFY_IS_APPROX(out(i, j), igamma_s[i][j]); + } + } + } +} + +template <typename Scalar> +void test_cuda_igammac() +{ + Tensor<Scalar, 2> a(6, 6); + Tensor<Scalar, 2> x(6, 6); + Tensor<Scalar, 2> out(6, 6); + out.setZero(); + + Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + a(i, j) = a_s[i]; + x(i, j) = x_s[j]; + } + } + + Scalar nan = std::numeric_limits<Scalar>::quiet_NaN(); + Scalar igammac_s[][6] = {{nan, nan, nan, nan, nan, nan}, + {1.0, 0.36787944117144233, 0.22313016014842982, + 0.018315638888734182, 0.9999000049998333, 0.0}, + {1.0, 0.5724067044708798, 0.3916251762710878, + 0.04601170568923136, 0.9999992477923555, 0.0}, + {1.0, 0.9810118431238462, 0.9343575456215499, + 0.4334701203667089, 1.0, 0.0}, + {1.0, 2.1940638138146658e-05, 1.0003291916285e-05, + 3.7801620118431334e-07, 0.0008629581310054535, + 0.0}, + {1.0, 1.0, 1.0, 1.0, 1.0, 0.49579580674813944}}; + + std::size_t bytes = a.size() * sizeof(Scalar); + + Scalar* d_a; + Scalar* d_x; + Scalar* d_out; + cudaMalloc((void**)(&d_a), bytes); + cudaMalloc((void**)(&d_x), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_a(d_a, 6, 6); + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_x(d_x, 6, 6); + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 6, 6); + + gpu_out.device(gpu_device) = gpu_a.igammac(gpu_x); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + if ((std::isnan)(igammac_s[i][j])) { + VERIFY((std::isnan)(out(i, j))); + } else { + VERIFY_IS_APPROX(out(i, j), igammac_s[i][j]); + } + } + } +} + +template <typename Scalar> void test_cuda_erf(const Scalar stddev) { Tensor<Scalar, 2> in(72,97); @@ -667,30 +852,46 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST_3(test_cuda_convolution_2d<RowMajor>()); CALL_SUBTEST_3(test_cuda_convolution_3d<ColMajor>()); CALL_SUBTEST_3(test_cuda_convolution_3d<RowMajor>()); + CALL_SUBTEST_4(test_cuda_lgamma<float>(1.0f)); CALL_SUBTEST_4(test_cuda_lgamma<float>(100.0f)); CALL_SUBTEST_4(test_cuda_lgamma<float>(0.01f)); CALL_SUBTEST_4(test_cuda_lgamma<float>(0.001f)); + + CALL_SUBTEST_4(test_cuda_digamma<float>()); + CALL_SUBTEST_4(test_cuda_erf<float>(1.0f)); CALL_SUBTEST_4(test_cuda_erf<float>(100.0f)); CALL_SUBTEST_4(test_cuda_erf<float>(0.01f)); CALL_SUBTEST_4(test_cuda_erf<float>(0.001f)); + CALL_SUBTEST_4(test_cuda_erfc<float>(1.0f)); // CALL_SUBTEST(test_cuda_erfc<float>(100.0f)); CALL_SUBTEST_4(test_cuda_erfc<float>(5.0f)); // CUDA erfc lacks precision for large inputs CALL_SUBTEST_4(test_cuda_erfc<float>(0.01f)); CALL_SUBTEST_4(test_cuda_erfc<float>(0.001f)); + CALL_SUBTEST_4(test_cuda_lgamma<double>(1.0)); CALL_SUBTEST_4(test_cuda_lgamma<double>(100.0)); CALL_SUBTEST_4(test_cuda_lgamma<double>(0.01)); CALL_SUBTEST_4(test_cuda_lgamma<double>(0.001)); + + CALL_SUBTEST_4(test_cuda_digamma<double>()); + CALL_SUBTEST_4(test_cuda_erf<double>(1.0)); CALL_SUBTEST_4(test_cuda_erf<double>(100.0)); CALL_SUBTEST_4(test_cuda_erf<double>(0.01)); CALL_SUBTEST_4(test_cuda_erf<double>(0.001)); + CALL_SUBTEST_4(test_cuda_erfc<double>(1.0)); // CALL_SUBTEST(test_cuda_erfc<double>(100.0)); CALL_SUBTEST_4(test_cuda_erfc<double>(5.0)); // CUDA erfc lacks precision for large inputs CALL_SUBTEST_4(test_cuda_erfc<double>(0.01)); CALL_SUBTEST_4(test_cuda_erfc<double>(0.001)); + + CALL_SUBTEST_5(test_cuda_igamma<float>()); + CALL_SUBTEST_5(test_cuda_igammac<float>()); + + CALL_SUBTEST_5(test_cuda_igamma<double>()); + CALL_SUBTEST_5(test_cuda_igammac<double>()); } |