From d1862967a89501f0382834e0d128a53ad5764377 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 10 Dec 2015 22:23:21 +0100 Subject: Make sure ADOLC is recent enough by searching for adtl.h --- cmake/FindAdolc.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cmake') diff --git a/cmake/FindAdolc.cmake b/cmake/FindAdolc.cmake index 1a7ff3628..937e54990 100644 --- a/cmake/FindAdolc.cmake +++ b/cmake/FindAdolc.cmake @@ -5,7 +5,7 @@ endif (ADOLC_INCLUDES AND ADOLC_LIBRARIES) find_path(ADOLC_INCLUDES NAMES - adolc/adouble.h + adolc/adtl.h PATHS $ENV{ADOLCDIR} ${INCLUDE_INSTALL_DIR} -- cgit v1.2.3 From 49d96aee6448d67edbb0382fefca746304c5baaa Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Wed, 16 Dec 2015 11:37:16 +0100 Subject: bug #1120: Make sure that SuperLU version is checked --- cmake/FindSuperLU.cmake | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'cmake') diff --git a/cmake/FindSuperLU.cmake b/cmake/FindSuperLU.cmake index 259ed7320..e4142fe4d 100644 --- a/cmake/FindSuperLU.cmake +++ b/cmake/FindSuperLU.cmake @@ -60,11 +60,21 @@ endif() cmake_pop_check_state() +if(SuperLU_FIND_VERSION) + if(${SUPERLU_VERSION_VAR} VERSION_LESS ${SuperLU_FIND_VERSION}) + set(SUPERLU_VERSION_OK FALSE) + else() + set(SUPERLU_VERSION_OK TRUE) + endif() +else() + set(SUPERLU_VERSION_OK TRUE) +endif() + endif() include(FindPackageHandleStandardArgs) find_package_handle_standard_args(SUPERLU - REQUIRED_VARS SUPERLU_INCLUDES SUPERLU_LIBRARIES + REQUIRED_VARS SUPERLU_INCLUDES SUPERLU_LIBRARIES SUPERLU_VERSION_OK VERSION_VAR SUPERLU_VERSION_VAR) mark_as_advanced(SUPERLU_INCLUDES SUPERLU_LIBRARIES) -- cgit v1.2.3 From 73220d2bb044e5f01b3eee4838c5b32ca679f027 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Tue, 8 Mar 2016 17:28:21 -0800 Subject: Resolve bad merge. --- Eigen/src/Core/GenericPacketMath.h | 10 ++ Eigen/src/Core/GlobalFunctions.h | 30 +++++ Eigen/src/Core/NumTraits.h | 5 + Eigen/src/Core/arch/CUDA/MathFunctions.h | 36 ++++++ Eigen/src/Core/functors/BinaryFunctors.h | 49 ++++++++ Eigen/src/Core/util/ForwardDeclarations.h | 2 + Eigen/src/Core/util/Meta.h | 5 + cmake/EigenTesting.cmake | 7 +- test/array.cpp | 51 +++++++- unsupported/test/cxx11_tensor_cuda.cu | 201 ++++++++++++++++++++++++++++++ 10 files changed, 393 insertions(+), 3 deletions(-) (limited to 'cmake') diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 02882bdea..802def51d 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -78,6 +78,8 @@ struct default_packet_traits HasDiGamma = 0, HasErf = 0, HasErfc = 0, + HasIGamma = 0, + HasIGammac = 0, HasRound = 0, HasFloor = 0, @@ -457,6 +459,14 @@ Packet perf(const Packet& a) { using numext::erf; return erf(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } +/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */ +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); } + +/** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */ +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); } + /*************************************************************************** * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 396da8e71..7df0fdda9 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -129,6 +129,36 @@ namespace Eigen ); } + /** \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays. + * + * This function computes the coefficient-wise incomplete gamma function. + * + */ + template + inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> + igamma(const Eigen::ArrayBase& a, const Eigen::ArrayBase& x) + { + return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( + a.derived(), + x.derived() + ); + } + + /** \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays. + * + * This function computes the coefficient-wise complementary incomplete gamma function. + * + */ + template + inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> + igammac(const Eigen::ArrayBase& a, const Eigen::ArrayBase& x) + { + return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( + a.derived(), + x.derived() + ); + } + namespace internal { EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op) diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index 6a596bb7d..7ddb4a867 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -95,6 +95,11 @@ template struct GenericNumTraits static inline T infinity() { return numext::numeric_limits::infinity(); } + + EIGEN_DEVICE_FUNC + static inline T quiet_NaN() { + return numext::numeric_limits::quiet_NaN(); + } }; template struct NumTraits : GenericNumTraits diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index a2c06a817..6822700f8 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -117,6 +117,42 @@ double2 perfc(const double2& a) } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pigamma(const float4& a, const float4& x) +{ + using numext::igamma; + return make_float4( + igamma(a.x, x.x), + igamma(a.y, x.y), + igamma(a.z, x.z), + igamma(a.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pigamma(const double2& a, const double2& x) +{ + using numext::igamma; + return make_double2(igamma(a.x, x.x), igamma(a.y, x.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pigammac(const float4& a, const float4& x) +{ + using numext::igammac; + return make_float4( + igammac(a.x, x.x), + igammac(a.y, x.y), + igammac(a.z, x.z), + igammac(a.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pigammac(const double2& a, const double2& x) +{ + using numext::igammac; + return make_double2(igammac(a.x, x.x), igammac(a.y, x.y)); +} + #endif } // end namespace internal diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 4962d625c..5cdfff845 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -337,6 +337,55 @@ template<> struct functor_traits { }; }; +/** \internal + * \brief Template functor to compute the incomplete gamma function igamma(a, x) + * + * \sa class CwiseBinaryOp, Cwise::igamma + */ +template struct scalar_igamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { + using numext::igamma; return igamma(a, x); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const { + return internal::pigammac(a, x); + } +}; +template +struct functor_traits > { + enum { + // Guesstimate + Cost = 20 * NumTraits::MulCost + 10 * NumTraits::AddCost, + PacketAccess = packet_traits::HasIGamma + }; +}; + + +/** \internal + * \brief Template functor to compute the complementary incomplete gamma function igammac(a, x) + * + * \sa class CwiseBinaryOp, Cwise::igammac + */ +template struct scalar_igammac_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { + using numext::igammac; return igammac(a, x); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const + { + return internal::pigammac(a, x); + } +}; +template +struct functor_traits > { + enum { + // Guesstimate + Cost = 20 * NumTraits::MulCost + 10 * NumTraits::AddCost, + PacketAccess = packet_traits::HasIGammac + }; +}; //---------- binary functors bound to a constant, thus appearing as a unary functor ---------- diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index f09632375..a102e5457 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -206,6 +206,8 @@ template struct scalar_add_op; template struct scalar_constant_op; template struct scalar_identity_op; template struct scalar_sign_op; +template struct scalar_igamma_op; +template struct scalar_igammac_op; template struct scalar_product_op; template struct scalar_multiple2_op; diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 6b35179f2..24e8a6d8a 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -148,6 +148,7 @@ template struct numeric_limits static T (max)() { assert(false && "Highest not supported for this type"); } static T (min)() { assert(false && "Lowest not supported for this type"); } static T infinity() { assert(false && "Infinity not supported for this type"); } + static T quiet_NaN() { assert(false && "quiet_NaN not supported for this type"); } }; template<> struct numeric_limits { @@ -159,6 +160,8 @@ template<> struct numeric_limits static float (min)() { return FLT_MIN; } EIGEN_DEVICE_FUNC static float infinity() { return CUDART_INF_F; } + EIGEN_DEVICE_FUNC + static float quiet_NaN() { return CUDART_NAN_F; } }; template<> struct numeric_limits { @@ -170,6 +173,8 @@ template<> struct numeric_limits static double (min)() { return DBL_MIN; } EIGEN_DEVICE_FUNC static double infinity() { return CUDART_INF; } + EIGEN_DEVICE_FUNC + static double quiet_NaN() { return CUDART_NAN; } }; template<> struct numeric_limits { diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index 5022397a7..5ca800cfe 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -19,12 +19,15 @@ macro(ei_add_test_internal testname testname_with_suffix) endif() if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu) - cuda_add_executable(${targetname} ${filename}) + if (${ARGC} GREATER 2) + cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2}) + else() + cuda_add_executable(${targetname} ${filename}) + endif() else() add_executable(${targetname} ${filename}) endif() - if (targetname MATCHES "^eigen2_") add_dependencies(eigen2_buildtests ${targetname}) else() diff --git a/test/array.cpp b/test/array.cpp index 96aef31c7..c61bfc8ed 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -295,7 +295,6 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(Eigen::pow(m1,2*exponents), m1.square().square()); VERIFY_IS_APPROX(m1.pow(2*exponents), m1.square().square()); VERIFY_IS_APPROX(pow(m1(0,0), exponents), ArrayType::Constant(rows,cols,m1(0,0)*m1(0,0))); - VERIFY_IS_APPROX(m3.pow(RealScalar(0.5)), m3.sqrt()); VERIFY_IS_APPROX(pow(m3,RealScalar(0.5)), m3.sqrt()); @@ -305,6 +304,14 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(log10(m3), log(m3)/log(10)); + // Smoke test to check any compilation issues + ArrayType m1_abs_p1 = m1.abs() + 1; + ArrayType m2_abs_p1 = m2.abs() + 1; + VERIFY_IS_APPROX(Eigen::igamma(m1_abs_p1, m2_abs_p1), Eigen::igamma(m1_abs_p1, m2_abs_p1)); + VERIFY_IS_APPROX(Eigen::igammac(m1_abs_p1, m2_abs_p1), Eigen::igammac(m1_abs_p1, m2_abs_p1)); + VERIFY_IS_APPROX(Eigen::igamma(m2_abs_p1, m1_abs_p1), Eigen::igamma(m2_abs_p1, m1_abs_p1)); + VERIFY_IS_APPROX(Eigen::igammac(m2_abs_p1, m1_abs_p1), Eigen::igammac(m2_abs_p1, m1_abs_p1)); + // scalar by array division const RealScalar tiny = sqrt(std::numeric_limits::epsilon()); s1 += Scalar(tiny); @@ -323,6 +330,48 @@ template void array_real(const ArrayType& m) std::numeric_limits::infinity()); VERIFY_IS_EQUAL(numext::digamma(Scalar(-1)), std::numeric_limits::infinity()); + + Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + + // location i*6+j corresponds to a_s[i], x_s[j]. + Scalar nan = std::numeric_limits::quiet_NaN(); + Scalar igamma_s[][6] = {{0.0, nan, nan, nan, nan, nan}, + {0.0, 0.6321205588285578, 0.7768698398515702, + 0.9816843611112658, 9.999500016666262e-05, 1.0}, + {0.0, 0.4275932955291202, 0.608374823728911, + 0.9539882943107686, 7.522076445089201e-07, 1.0}, + {0.0, 0.01898815687615381, 0.06564245437845008, + 0.5665298796332909, 4.166333347221828e-18, 1.0}, + {0.0, 0.9999780593618628, 0.9999899967080838, + 0.9999996219837988, 0.9991370418689945, 1.0}, + {0.0, 0.0, 0.0, 0.0, 0.0, 0.5042041932513908}}; + Scalar igammac_s[][6] = {{nan, nan, nan, nan, nan, nan}, + {1.0, 0.36787944117144233, 0.22313016014842982, + 0.018315638888734182, 0.9999000049998333, 0.0}, + {1.0, 0.5724067044708798, 0.3916251762710878, + 0.04601170568923136, 0.9999992477923555, 0.0}, + {1.0, 0.9810118431238462, 0.9343575456215499, + 0.4334701203667089, 1.0, 0.0}, + {1.0, 2.1940638138146658e-05, 1.0003291916285e-05, + 3.7801620118431334e-07, 0.0008629581310054535, + 0.0}, + {1.0, 1.0, 1.0, 1.0, 1.0, 0.49579580674813944}}; + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + if ((std::isnan)(igamma_s[i][j])) { + VERIFY((std::isnan)(numext::igamma(a_s[i], x_s[j]))); + } else { + VERIFY_IS_APPROX(numext::igamma(a_s[i], x_s[j]), igamma_s[i][j]); + } + + if ((std::isnan)(igammac_s[i][j])) { + VERIFY((std::isnan)(numext::igammac(a_s[i], x_s[j]))); + } else { + VERIFY_IS_APPROX(numext::igammac(a_s[i], x_s[j]), igammac_s[i][j]); + } + } + } } #endif // EIGEN_HAS_C99_MATH diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 58da21d3b..1964d9e07 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -574,6 +574,191 @@ void test_cuda_lgamma(const Scalar stddev) cudaFree(d_out); } +template +void test_cuda_digamma() +{ + Tensor in(7); + Tensor out(7); + Tensor expected_out(7); + out.setZero(); + + in(0) = Scalar(1); + in(1) = Scalar(1.5); + in(2) = Scalar(4); + in(3) = Scalar(-10.5); + in(4) = Scalar(10000.5); + in(5) = Scalar(0); + in(6) = Scalar(-1); + + expected_out(0) = Scalar(-0.5772156649015329); + expected_out(1) = Scalar(0.03648997397857645); + expected_out(2) = Scalar(1.2561176684318); + expected_out(3) = Scalar(2.398239129535781); + expected_out(4) = Scalar(9.210340372392849); + expected_out(5) = std::numeric_limits::infinity(); + expected_out(6) = std::numeric_limits::infinity(); + + std::size_t bytes = in.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in(d_in, 7); + Eigen::TensorMap > gpu_out(d_out, 7); + + gpu_out.device(gpu_device) = gpu_in.digamma(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 5; ++i) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + for (int i = 5; i < 7; ++i) { + VERIFY_IS_EQUAL(out(i), expected_out(i)); + } +} + +template +void test_cuda_igamma() +{ + Tensor a(6, 6); + Tensor x(6, 6); + Tensor out(6, 6); + out.setZero(); + + Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + a(i, j) = a_s[i]; + x(i, j) = x_s[j]; + } + } + + Scalar nan = std::numeric_limits::quiet_NaN(); + Scalar igamma_s[][6] = {{0.0, nan, nan, nan, nan, nan}, + {0.0, 0.6321205588285578, 0.7768698398515702, + 0.9816843611112658, 9.999500016666262e-05, 1.0}, + {0.0, 0.4275932955291202, 0.608374823728911, + 0.9539882943107686, 7.522076445089201e-07, 1.0}, + {0.0, 0.01898815687615381, 0.06564245437845008, + 0.5665298796332909, 4.166333347221828e-18, 1.0}, + {0.0, 0.9999780593618628, 0.9999899967080838, + 0.9999996219837988, 0.9991370418689945, 1.0}, + {0.0, 0.0, 0.0, 0.0, 0.0, 0.5042041932513908}}; + + + + std::size_t bytes = a.size() * sizeof(Scalar); + + Scalar* d_a; + Scalar* d_x; + Scalar* d_out; + cudaMalloc((void**)(&d_a), bytes); + cudaMalloc((void**)(&d_x), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_a(d_a, 6, 6); + Eigen::TensorMap > gpu_x(d_x, 6, 6); + Eigen::TensorMap > gpu_out(d_out, 6, 6); + + gpu_out.device(gpu_device) = gpu_a.igamma(gpu_x); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + if ((std::isnan)(igamma_s[i][j])) { + VERIFY((std::isnan)(out(i, j))); + } else { + VERIFY_IS_APPROX(out(i, j), igamma_s[i][j]); + } + } + } +} + +template +void test_cuda_igammac() +{ + Tensor a(6, 6); + Tensor x(6, 6); + Tensor out(6, 6); + out.setZero(); + + Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + a(i, j) = a_s[i]; + x(i, j) = x_s[j]; + } + } + + Scalar nan = std::numeric_limits::quiet_NaN(); + Scalar igammac_s[][6] = {{nan, nan, nan, nan, nan, nan}, + {1.0, 0.36787944117144233, 0.22313016014842982, + 0.018315638888734182, 0.9999000049998333, 0.0}, + {1.0, 0.5724067044708798, 0.3916251762710878, + 0.04601170568923136, 0.9999992477923555, 0.0}, + {1.0, 0.9810118431238462, 0.9343575456215499, + 0.4334701203667089, 1.0, 0.0}, + {1.0, 2.1940638138146658e-05, 1.0003291916285e-05, + 3.7801620118431334e-07, 0.0008629581310054535, + 0.0}, + {1.0, 1.0, 1.0, 1.0, 1.0, 0.49579580674813944}}; + + std::size_t bytes = a.size() * sizeof(Scalar); + + Scalar* d_a; + Scalar* d_x; + Scalar* d_out; + cudaMalloc((void**)(&d_a), bytes); + cudaMalloc((void**)(&d_x), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_a(d_a, 6, 6); + Eigen::TensorMap > gpu_x(d_x, 6, 6); + Eigen::TensorMap > gpu_out(d_out, 6, 6); + + gpu_out.device(gpu_device) = gpu_a.igammac(gpu_x); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + if ((std::isnan)(igammac_s[i][j])) { + VERIFY((std::isnan)(out(i, j))); + } else { + VERIFY_IS_APPROX(out(i, j), igammac_s[i][j]); + } + } + } +} + template void test_cuda_erf(const Scalar stddev) { @@ -667,30 +852,46 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST_3(test_cuda_convolution_2d()); CALL_SUBTEST_3(test_cuda_convolution_3d()); CALL_SUBTEST_3(test_cuda_convolution_3d()); + CALL_SUBTEST_4(test_cuda_lgamma(1.0f)); CALL_SUBTEST_4(test_cuda_lgamma(100.0f)); CALL_SUBTEST_4(test_cuda_lgamma(0.01f)); CALL_SUBTEST_4(test_cuda_lgamma(0.001f)); + + CALL_SUBTEST_4(test_cuda_digamma()); + CALL_SUBTEST_4(test_cuda_erf(1.0f)); CALL_SUBTEST_4(test_cuda_erf(100.0f)); CALL_SUBTEST_4(test_cuda_erf(0.01f)); CALL_SUBTEST_4(test_cuda_erf(0.001f)); + CALL_SUBTEST_4(test_cuda_erfc(1.0f)); // CALL_SUBTEST(test_cuda_erfc(100.0f)); CALL_SUBTEST_4(test_cuda_erfc(5.0f)); // CUDA erfc lacks precision for large inputs CALL_SUBTEST_4(test_cuda_erfc(0.01f)); CALL_SUBTEST_4(test_cuda_erfc(0.001f)); + CALL_SUBTEST_4(test_cuda_lgamma(1.0)); CALL_SUBTEST_4(test_cuda_lgamma(100.0)); CALL_SUBTEST_4(test_cuda_lgamma(0.01)); CALL_SUBTEST_4(test_cuda_lgamma(0.001)); + + CALL_SUBTEST_4(test_cuda_digamma()); + CALL_SUBTEST_4(test_cuda_erf(1.0)); CALL_SUBTEST_4(test_cuda_erf(100.0)); CALL_SUBTEST_4(test_cuda_erf(0.01)); CALL_SUBTEST_4(test_cuda_erf(0.001)); + CALL_SUBTEST_4(test_cuda_erfc(1.0)); // CALL_SUBTEST(test_cuda_erfc(100.0)); CALL_SUBTEST_4(test_cuda_erfc(5.0)); // CUDA erfc lacks precision for large inputs CALL_SUBTEST_4(test_cuda_erfc(0.01)); CALL_SUBTEST_4(test_cuda_erfc(0.001)); + + CALL_SUBTEST_5(test_cuda_igamma()); + CALL_SUBTEST_5(test_cuda_igammac()); + + CALL_SUBTEST_5(test_cuda_igamma()); + CALL_SUBTEST_5(test_cuda_igammac()); } -- cgit v1.2.3 From bb0e73c1914c675755e8fab2a6db168b65a4de51 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 18 Mar 2016 12:17:37 -0700 Subject: Gate all the CUDA tests under the EIGEN_TEST_NVCC option --- cmake/EigenTesting.cmake | 6 ++++++ unsupported/test/CMakeLists.txt | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'cmake') diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index 5ca800cfe..c70ec2c24 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -309,6 +309,12 @@ macro(ei_testing_print_summary) message(STATUS "C++11: OFF") endif() + if(EIGEN_TEST_NVCC) + message(STATUS "CUDA: ON") + else() + message(STATUS "CUDA: OFF") + endif() + endif() # vectorization / alignment options message(STATUS "\n${EIGEN_TESTING_SUMMARY}") diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 79c26fb72..19893cc25 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -174,7 +174,7 @@ endif() # These tests needs nvcc find_package(CUDA 7.0) -if(CUDA_FOUND) +if(CUDA_FOUND AND EIGEN_TEST_NVCC) # Mke sure to compile without the -pedantic and -Wundef flags since they trigger thousands of compilation warnings in the CUDA runtime string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") -- cgit v1.2.3 From a9a6710e151ccd5d4fa9a6178db4413ed0c74911 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Mon, 21 Mar 2016 13:46:47 -0400 Subject: add initial s390x(zEC13) ZVECTOR support --- CMakeLists.txt | 6 +++++- cmake/EigenTesting.cmake | 10 +++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'cmake') diff --git a/CMakeLists.txt b/CMakeLists.txt index 95f4c8d7c..51beba118 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -250,7 +250,11 @@ if(NOT MSVC) message(STATUS "Enabling NEON in tests/examples") endif() - + option(EIGEN_TEST_ZVECTOR "Enable/Disable S390X(zEC13) ZVECTOR in tests/examples" OFF) + if(EIGEN_TEST_ZVECTOR) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=z13 -mzvector") + message(STATUS "Enabling S390X(zEC13) ZVECTOR in tests/examples") + endif() check_cxx_compiler_flag("-fopenmp" COMPILER_SUPPORT_OPENMP) if(COMPILER_SUPPORT_OPENMP) diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index c70ec2c24..1709e0334 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -302,7 +302,13 @@ macro(ei_testing_print_summary) else() message(STATUS "ARMv8 NEON: Using architecture defaults") endif() - + + if(EIGEN_TEST_ZVECTOR) + message(STATUS "S390X ZVECTOR: ON") + else() + message(STATUS "S390X ZVECTOR: Using architecture defaults") + endif() + if(EIGEN_TEST_CXX11) message(STATUS "C++11: ON") else() @@ -446,6 +452,8 @@ macro(ei_get_cxxflags VAR) set(${VAR} NEON) elseif(EIGEN_TEST_NEON64) set(${VAR} NEON) + elseif(EIGEN_TEST_ZVECTOR) + set(${VAR} ZVECTOR) elseif(EIGEN_TEST_VSX) set(${VAR} VSX) elseif(EIGEN_TEST_ALTIVEC) -- cgit v1.2.3 From 2b457f8e5ec2bd38ce5049bd9220d3f37c7fa1ff Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 4 Apr 2016 11:47:46 +0200 Subject: Fix cross-compiling windows version detection --- cmake/EigenDetermineOSVersion.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cmake') diff --git a/cmake/EigenDetermineOSVersion.cmake b/cmake/EigenDetermineOSVersion.cmake index 3c48d4c37..9246fa67c 100644 --- a/cmake/EigenDetermineOSVersion.cmake +++ b/cmake/EigenDetermineOSVersion.cmake @@ -26,7 +26,7 @@ function(DetermineShortWindowsName WIN_VERSION win_num_version) endfunction() function(DetermineOSVersion OS_VERSION) - if (WIN32) + if (WIN32 AND CMAKE_HOST_SYSTEM_NAME MATCHES Windows) file (TO_NATIVE_PATH "$ENV{COMSPEC}" SHELL) exec_program( ${SHELL} ARGS "/c" "ver" OUTPUT_VARIABLE ver_output) -- cgit v1.2.3 From 7781f865cb6cc3faff3b1dfce557439abe3b56b9 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 6 Apr 2016 09:35:23 -0700 Subject: Renamed the EIGEN_TEST_NVCC cmake option into EIGEN_TEST_CUDA per the discussion in bug #1173. --- cmake/EigenTesting.cmake | 2 +- test/CMakeLists.txt | 8 ++++---- unsupported/test/CMakeLists.txt | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'cmake') diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index 1709e0334..d5e3972b5 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -315,7 +315,7 @@ macro(ei_testing_print_summary) message(STATUS "C++11: OFF") endif() - if(EIGEN_TEST_NVCC) + if(EIGEN_TEST_CUDA) message(STATUS "CUDA: ON") else() message(STATUS "CUDA: OFF") diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4420e0c51..841c4572b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -325,9 +325,9 @@ if(EIGEN_TEST_EIGEN2) endif() -# NVCC unit tests -option(EIGEN_TEST_NVCC "Enable NVCC support in unit tests" OFF) -if(EIGEN_TEST_NVCC) +# CUDA unit tests +option(EIGEN_TEST_CUDA "Enable CUDA support in unit tests" OFF) +if(EIGEN_TEST_CUDA) find_package(CUDA 5.0) if(CUDA_FOUND) @@ -345,7 +345,7 @@ if(CUDA_FOUND) endif(CUDA_FOUND) -endif(EIGEN_TEST_NVCC) +endif(EIGEN_TEST_CUDA) file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/failtests) diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 6bd8cfb92..7972e6776 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -175,7 +175,7 @@ endif() # These tests needs nvcc find_package(CUDA 7.0) -if(CUDA_FOUND AND EIGEN_TEST_NVCC) +if(CUDA_FOUND AND EIGEN_TEST_CUDA) # Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor # and -fno-check-new flags since they trigger thousands of compilation warnings # in the CUDA runtime -- cgit v1.2.3 From 0d2a532fc3b25199af03106b6d4ade0f92a30dfc Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 8 Apr 2016 13:16:08 -0700 Subject: Created the new EIGEN_TEST_CUDA_CLANG option to compile the CUDA tests using clang instead of nvcc --- cmake/EigenTesting.cmake | 27 +++++++++++++++++++++++---- test/CMakeLists.txt | 11 ++++++++++- unsupported/test/CMakeLists.txt | 4 ++++ 3 files changed, 37 insertions(+), 5 deletions(-) (limited to 'cmake') diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index d5e3972b5..6f3661921 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -19,10 +19,25 @@ macro(ei_add_test_internal testname testname_with_suffix) endif() if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu) - if (${ARGC} GREATER 2) - cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2}) + if(EIGEN_TEST_CUDA_CLANG) + set_source_files_properties(${filename} PROPERTIES LANGUAGE CXX) + if(CUDA_64_BIT_DEVICE_CODE) + link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") + else() + link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib") + endif() + if (${ARGC} GREATER 2) + add_executable(${targetname} ${filename}) + else() + add_executable(${targetname} ${filename} OPTIONS ${ARGV2}) + endif() + target_link_libraries(${targetname} "cudart_static" "cuda" "dl" "rt" "pthread") else() - cuda_add_executable(${targetname} ${filename}) + if (${ARGC} GREATER 2) + cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2}) + else() + cuda_add_executable(${targetname} ${filename}) + endif() endif() else() add_executable(${targetname} ${filename}) @@ -316,7 +331,11 @@ macro(ei_testing_print_summary) endif() if(EIGEN_TEST_CUDA) - message(STATUS "CUDA: ON") + if(EIGEN_TEST_CUDA_CLANG) + message(STATUS "CUDA: ON (using clang)") + else() + message(STATUS "CUDA: ON (using nvcc)") + endif() else() message(STATUS "CUDA: OFF") endif() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 841c4572b..7bed6a45c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -327,8 +327,14 @@ endif() # CUDA unit tests option(EIGEN_TEST_CUDA "Enable CUDA support in unit tests" OFF) +option(EIGEN_TEST_CUDA_CLANG "Use clang instead of nvcc to compile the CUDA tests" OFF) + +if(EIGEN_TEST_CUDA_CLANG AND NOT CMAKE_CXX_COMPILER MATCHES "clang") + message(WARNING "EIGEN_TEST_CUDA_CLANG is set, but CMAKE_CXX_COMPILER does not appear to be clang.") +endif() + if(EIGEN_TEST_CUDA) - + find_package(CUDA 5.0) if(CUDA_FOUND) @@ -336,6 +342,9 @@ if(CUDA_FOUND) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE) endif() + if(EIGEN_TEST_CUDA_CLANG) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_30") + endif() cuda_include_directories(${CMAKE_CURRENT_BINARY_DIR}) set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index c6a92fe73..b1931d80a 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -190,6 +190,10 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE) endif() + if(EIGEN_TEST_CUDA_CLANG) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_${EIGEN_CUDA_COMPUTE_ARCH}") + endif() + set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\"") cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include") set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") -- cgit v1.2.3