diff options
author | 2016-04-09 12:47:24 -0700 | |
---|---|---|
committer | 2016-04-09 12:47:24 -0700 | |
commit | 4a92b590a01f5e28486f428f2ba56d5aaddd6931 (patch) | |
tree | 52663abf7b17f2e28f829c97dca0e0ce38fd5bf8 /unsupported | |
parent | ee6c69733aeb06942cabf3bccd12715ef0e43ecf (diff) | |
parent | 0b81a18d129d638f1c95e55f4fe4c958471a79d2 (diff) |
Merge.
Diffstat (limited to 'unsupported')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 59 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h | 1 | ||||
-rwxr-xr-x | unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h | 13 | ||||
-rw-r--r-- | unsupported/Eigen/src/Splines/SplineFitting.h | 4 | ||||
-rw-r--r-- | unsupported/test/CMakeLists.txt | 3 | ||||
-rw-r--r-- | unsupported/test/autodiff.cpp | 2 | ||||
-rw-r--r-- | unsupported/test/autodiff_scalar.cpp | 4 | ||||
-rw-r--r-- | unsupported/test/cxx11_float16.cpp | 155 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_cuda.cu | 186 |
9 files changed, 394 insertions, 33 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 6ee9c88b9..69d1802d5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -133,6 +133,34 @@ class TensorBase<Derived, ReadOnlyAccessors> return unaryExpr(internal::scalar_digamma_op<Scalar>()); } + // igamma(a = this, x = other) + template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp<internal::scalar_igamma_op<Scalar>, const Derived, const OtherDerived> + igamma(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_igamma_op<Scalar>()); + } + + // igammac(a = this, x = other) + template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp<internal::scalar_igammac_op<Scalar>, const Derived, const OtherDerived> + igammac(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_igammac_op<Scalar>()); + } + + // zeta(x = this, q = other) + template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp<internal::scalar_zeta_op<Scalar>, const Derived, const OtherDerived> + zeta(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_zeta_op<Scalar>()); + } + + // polygamma(n = this, x = other) + template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp<internal::scalar_polygamma_op<Scalar>, const Derived, const OtherDerived> + polygamma(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_polygamma_op<Scalar>()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> erf() const { @@ -340,20 +368,6 @@ class TensorBase<Derived, ReadOnlyAccessors> return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>()); } - // igamma(a = this, x = other) - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_igamma_op<Scalar>, const Derived, const OtherDerived> - igamma(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_igamma_op<Scalar>()); - } - - // igammac(a = this, x = other) - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_igammac_op<Scalar>, const Derived, const OtherDerived> - igammac(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_igammac_op<Scalar>()); - } - // comparisons and tests for Scalars EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > @@ -386,6 +400,23 @@ class TensorBase<Derived, ReadOnlyAccessors> return operator!=(constant(threshold)); } + // Checks + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isnan_op<Scalar>, const Derived> + (isnan)() const { + return unaryExpr(internal::scalar_isnan_op<Scalar>()); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isinf_op<Scalar>, const Derived> + (isinf)() const { + return unaryExpr(internal::scalar_isinf_op<Scalar>()); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isfinite_op<Scalar>, const Derived> + (isfinite)() const { + return unaryExpr(internal::scalar_isfinite_op<Scalar>()); + } + // Coefficient-wise ternary operators. template<typename ThenDerived, typename ElseDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorSelectOp<const Derived, const ThenDerived, const ElseDerived> diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index f2dee3ee8..a96776a77 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -87,6 +87,7 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> { template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl) {} diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index e30ad5b6d..481dfa91a 100755 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -589,23 +589,24 @@ EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(log, return ReturnType(log(x.value()),x.derivatives() * (Scalar(1)/x.value()));) template<typename DerType> -inline const Eigen::AutoDiffScalar<Eigen::CwiseUnaryOp<Eigen::internal::scalar_multiple_op<typename Eigen::internal::traits<DerType>::Scalar>, const DerType> > -pow(const Eigen::AutoDiffScalar<DerType>& x, typename Eigen::internal::traits<DerType>::Scalar y) +inline const Eigen::AutoDiffScalar<Eigen::CwiseUnaryOp<Eigen::internal::scalar_multiple_op<typename internal::traits<typename internal::remove_all<DerType>::type>::Scalar>, const typename internal::remove_all<DerType>::type> > +pow(const Eigen::AutoDiffScalar<DerType>& x, typename internal::traits<typename internal::remove_all<DerType>::type>::Scalar y) { using namespace Eigen; - typedef typename Eigen::internal::traits<DerType>::Scalar Scalar; - return AutoDiffScalar<CwiseUnaryOp<Eigen::internal::scalar_multiple_op<Scalar>, const DerType> >( + typedef typename internal::remove_all<DerType>::type DerTypeCleaned; + typedef typename Eigen::internal::traits<DerTypeCleaned>::Scalar Scalar; + return AutoDiffScalar<CwiseUnaryOp<Eigen::internal::scalar_multiple_op<Scalar>, const DerTypeCleaned> >( std::pow(x.value(),y), x.derivatives() * (y * std::pow(x.value(),y-1))); } template<typename DerTypeA,typename DerTypeB> -inline const AutoDiffScalar<Matrix<typename internal::traits<DerTypeA>::Scalar,Dynamic,1> > +inline const AutoDiffScalar<Matrix<typename internal::traits<typename internal::remove_all<DerTypeA>::type>::Scalar,Dynamic,1> > atan2(const AutoDiffScalar<DerTypeA>& a, const AutoDiffScalar<DerTypeB>& b) { using std::atan2; - typedef typename internal::traits<DerTypeA>::Scalar Scalar; + typedef typename internal::traits<typename internal::remove_all<DerTypeA>::type>::Scalar Scalar; typedef AutoDiffScalar<Matrix<Scalar,Dynamic,1> > PlainADS; PlainADS ret; ret.value() = atan2(a.value(), b.value()); diff --git a/unsupported/Eigen/src/Splines/SplineFitting.h b/unsupported/Eigen/src/Splines/SplineFitting.h index 8e6a5aaed..c761a9b3d 100644 --- a/unsupported/Eigen/src/Splines/SplineFitting.h +++ b/unsupported/Eigen/src/Splines/SplineFitting.h @@ -130,12 +130,12 @@ namespace Eigen ParameterVectorType temporaryParameters(numParameters + 1); KnotVectorType derivativeKnots(numInternalDerivatives); - for (unsigned int i = 0; i < numAverageKnots - 1; ++i) + for (DenseIndex i = 0; i < numAverageKnots - 1; ++i) { temporaryParameters[0] = averageKnots[i]; ParameterVectorType parameterIndices(numParameters); int temporaryParameterIndex = 1; - for (int j = 0; j < numParameters; ++j) + for (DenseIndex j = 0; j < numParameters; ++j) { Scalar parameter = parameters[j]; if (parameter >= averageKnots[i] && parameter < averageKnots[i + 1]) diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 6bd8cfb92..c6a92fe73 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -115,6 +115,7 @@ if(EIGEN_TEST_CXX11) # older compiler that don't support cxx11. set(CMAKE_CXX_STANDARD 11) + ei_add_test(cxx11_float16) ei_add_test(cxx11_meta) ei_add_test(cxx11_tensor_simple) # ei_add_test(cxx11_tensor_symmetry) @@ -175,7 +176,7 @@ endif() # These tests needs nvcc find_package(CUDA 7.0) -if(CUDA_FOUND AND EIGEN_TEST_NVCC) +if(CUDA_FOUND AND EIGEN_TEST_CUDA) # Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor # and -fno-check-new flags since they trigger thousands of compilation warnings # in the CUDA runtime diff --git a/unsupported/test/autodiff.cpp b/unsupported/test/autodiff.cpp index 1aa1b3d2d..374f86df9 100644 --- a/unsupported/test/autodiff.cpp +++ b/unsupported/test/autodiff.cpp @@ -16,7 +16,7 @@ EIGEN_DONT_INLINE Scalar foo(const Scalar& x, const Scalar& y) using namespace std; // return x+std::sin(y); EIGEN_ASM_COMMENT("mybegin"); - return static_cast<Scalar>(x*2 - pow(x,2) + 2*sqrt(y*y) - 4 * sin(x) + 2 * cos(y) - exp(-0.5*x*x)); + return static_cast<Scalar>(x*2 - 1 + pow(1+x,2) + 2*sqrt(y*y+0) - 4 * sin(0+x) + 2 * cos(y+0) - exp(-0.5*x*x+0)); //return x+2*y*x;//x*2 -std::pow(x,2);//(2*y/x);// - y*2; EIGEN_ASM_COMMENT("myend"); } diff --git a/unsupported/test/autodiff_scalar.cpp b/unsupported/test/autodiff_scalar.cpp index ba4b5aec4..c631c734a 100644 --- a/unsupported/test/autodiff_scalar.cpp +++ b/unsupported/test/autodiff_scalar.cpp @@ -30,6 +30,10 @@ template<typename Scalar> void check_atan2() VERIFY_IS_APPROX(res.value(), x.value()); VERIFY_IS_APPROX(res.derivatives(), x.derivatives()); + + res = atan2(r*s+0, r*c+0); + VERIFY_IS_APPROX(res.value(), x.value()); + VERIFY_IS_APPROX(res.derivatives(), x.derivatives()); } diff --git a/unsupported/test/cxx11_float16.cpp b/unsupported/test/cxx11_float16.cpp new file mode 100644 index 000000000..2dc0872d8 --- /dev/null +++ b/unsupported/test/cxx11_float16.cpp @@ -0,0 +1,155 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_float16 + +#include "main.h" +#include <Eigen/src/Core/arch/CUDA/Half.h> + +using Eigen::half; + +void test_conversion() +{ + // Conversion from float. + VERIFY_IS_EQUAL(half(1.0f).x, 0x3c00); + VERIFY_IS_EQUAL(half(0.5f).x, 0x3800); + VERIFY_IS_EQUAL(half(0.33333f).x, 0x3555); + VERIFY_IS_EQUAL(half(0.0f).x, 0x0000); + VERIFY_IS_EQUAL(half(-0.0f).x, 0x8000); + VERIFY_IS_EQUAL(half(65504.0f).x, 0x7bff); + VERIFY_IS_EQUAL(half(65536.0f).x, 0x7c00); // Becomes infinity. + + // Denormals. + VERIFY_IS_EQUAL(half(-5.96046e-08f).x, 0x8001); + VERIFY_IS_EQUAL(half(5.96046e-08f).x, 0x0001); + VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002); + + // Verify round-to-nearest-even behavior. + float val1 = float(half(__half{0x3c00})); + float val2 = float(half(__half{0x3c01})); + float val3 = float(half(__half{0x3c02})); + VERIFY_IS_EQUAL(half(0.5 * (val1 + val2)).x, 0x3c00); + VERIFY_IS_EQUAL(half(0.5 * (val2 + val3)).x, 0x3c02); + + // Conversion from int. + VERIFY_IS_EQUAL(half(-1).x, 0xbc00); + VERIFY_IS_EQUAL(half(0).x, 0x0000); + VERIFY_IS_EQUAL(half(1).x, 0x3c00); + VERIFY_IS_EQUAL(half(2).x, 0x4000); + VERIFY_IS_EQUAL(half(3).x, 0x4200); + + // Conversion from bool. + VERIFY_IS_EQUAL(half(false).x, 0x0000); + VERIFY_IS_EQUAL(half(true).x, 0x3c00); + + // Conversion to float. + VERIFY_IS_EQUAL(float(half(__half{0x0000})), 0.0f); + VERIFY_IS_EQUAL(float(half(__half{0x3c00})), 1.0f); + + // Denormals. + VERIFY_IS_APPROX(float(half(__half{0x8001})), -5.96046e-08f); + VERIFY_IS_APPROX(float(half(__half{0x0001})), 5.96046e-08f); + VERIFY_IS_APPROX(float(half(__half{0x0002})), 1.19209e-07f); + + // NaNs and infinities. + VERIFY(!(numext::isinf)(float(half(65504.0f)))); // Largest finite number. + VERIFY(!(numext::isnan)(float(half(0.0f)))); + VERIFY((numext::isinf)(float(half(__half{0xfc00})))); + VERIFY((numext::isnan)(float(half(__half{0xfc01})))); + VERIFY((numext::isinf)(float(half(__half{0x7c00})))); + VERIFY((numext::isnan)(float(half(__half{0x7c01})))); + +#if !EIGEN_COMP_MSVC + // Visual Studio errors out on divisions by 0 + VERIFY((numext::isnan)(float(half(0.0 / 0.0)))); + VERIFY((numext::isinf)(float(half(1.0 / 0.0)))); + VERIFY((numext::isinf)(float(half(-1.0 / 0.0)))); +#endif + + // Exactly same checks as above, just directly on the half representation. + VERIFY(!(numext::isinf)(half(__half{0x7bff}))); + VERIFY(!(numext::isnan)(half(__half{0x0000}))); + VERIFY((numext::isinf)(half(__half{0xfc00}))); + VERIFY((numext::isnan)(half(__half{0xfc01}))); + VERIFY((numext::isinf)(half(__half{0x7c00}))); + VERIFY((numext::isnan)(half(__half{0x7c01}))); + +#if !EIGEN_COMP_MSVC + // Visual Studio errors out on divisions by 0 + VERIFY((numext::isnan)(half(0.0 / 0.0))); + VERIFY((numext::isinf)(half(1.0 / 0.0))); + VERIFY((numext::isinf)(half(-1.0 / 0.0))); +#endif +} + +void test_arithmetic() +{ + VERIFY_IS_EQUAL(float(half(2) + half(2)), 4); + VERIFY_IS_EQUAL(float(half(2) + half(-2)), 0); + VERIFY_IS_APPROX(float(half(0.33333f) + half(0.66667f)), 1.0f); + VERIFY_IS_EQUAL(float(half(2.0f) * half(-5.5f)), -11.0f); + VERIFY_IS_APPROX(float(half(1.0f) / half(3.0f)), 0.33333f); + VERIFY_IS_EQUAL(float(-half(4096.0f)), -4096.0f); + VERIFY_IS_EQUAL(float(-half(-4096.0f)), 4096.0f); +} + +void test_comparison() +{ + VERIFY(half(1.0f) > half(0.5f)); + VERIFY(half(0.5f) < half(1.0f)); + VERIFY(!(half(1.0f) < half(0.5f))); + VERIFY(!(half(0.5f) > half(1.0f))); + + VERIFY(!(half(4.0f) > half(4.0f))); + VERIFY(!(half(4.0f) < half(4.0f))); + + VERIFY(!(half(0.0f) < half(-0.0f))); + VERIFY(!(half(-0.0f) < half(0.0f))); + VERIFY(!(half(0.0f) > half(-0.0f))); + VERIFY(!(half(-0.0f) > half(0.0f))); + + VERIFY(half(0.2f) > half(-1.0f)); + VERIFY(half(-1.0f) < half(0.2f)); + VERIFY(half(-16.0f) < half(-15.0f)); + + VERIFY(half(1.0f) == half(1.0f)); + VERIFY(half(1.0f) != half(2.0f)); + + // Comparisons with NaNs and infinities. + VERIFY(!(half(0.0 / 0.0) == half(0.0 / 0.0))); + VERIFY(half(0.0 / 0.0) != half(0.0 / 0.0)); + + VERIFY(!(half(1.0) == half(0.0 / 0.0))); + VERIFY(!(half(1.0) < half(0.0 / 0.0))); + VERIFY(!(half(1.0) > half(0.0 / 0.0))); + VERIFY(half(1.0) != half(0.0 / 0.0)); + + VERIFY(half(1.0) < half(1.0 / 0.0)); + VERIFY(half(1.0) > half(-1.0 / 0.0)); +} + +void test_functions() +{ + VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f); + VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f); + + VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f); + VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), float(20.0 + EIGEN_PI)); + + VERIFY_IS_EQUAL(float(numext::log(half(1.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f); +} + +void test_cxx11_float16() +{ + CALL_SUBTEST(test_conversion()); + CALL_SUBTEST(test_arithmetic()); + CALL_SUBTEST(test_comparison()); + CALL_SUBTEST(test_functions()); +} diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 4d8465756..134359611 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -122,6 +122,43 @@ void test_cuda_elementwise() cudaFree(d_out); } +void test_cuda_props() { + Tensor<float, 1> in1(200); + Tensor<bool, 1> out(200); + in1.setRandom(); + + std::size_t in1_bytes = in1.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(bool); + + float* d_in1; + bool* d_out; + cudaMalloc((void**)(&d_in1), in1_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in1( + d_in1, 200); + Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_out( + d_out, 200); + + gpu_out.device(gpu_device) = (gpu_in1.isnan)(); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 200; ++i) { + VERIFY_IS_EQUAL(out(i), (std::isnan)(in1(i))); + } + + cudaFree(d_in1); + cudaFree(d_out); +} + void test_cuda_reduction() { Tensor<float, 4> in1(72,53,97,113); @@ -627,6 +664,132 @@ void test_cuda_digamma() } template <typename Scalar> +void test_cuda_zeta() +{ + Tensor<Scalar, 1> in_x(6); + Tensor<Scalar, 1> in_q(6); + Tensor<Scalar, 1> out(6); + Tensor<Scalar, 1> expected_out(6); + out.setZero(); + + in_x(0) = Scalar(1); + in_x(1) = Scalar(1.5); + in_x(2) = Scalar(4); + in_x(3) = Scalar(-10.5); + in_x(4) = Scalar(10000.5); + in_x(5) = Scalar(3); + + in_q(0) = Scalar(1.2345); + in_q(1) = Scalar(2); + in_q(2) = Scalar(1.5); + in_q(3) = Scalar(3); + in_q(4) = Scalar(1.0001); + in_q(5) = Scalar(-2.5); + + expected_out(0) = std::numeric_limits<Scalar>::infinity(); + expected_out(1) = Scalar(1.61237534869); + expected_out(2) = Scalar(0.234848505667); + expected_out(3) = Scalar(1.03086757337e-5); + expected_out(4) = Scalar(0.367879440865); + expected_out(5) = Scalar(0.054102025820864097); + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in_x; + Scalar* d_in_q; + Scalar* d_out; + cudaMalloc((void**)(&d_in_x), bytes); + cudaMalloc((void**)(&d_in_q), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in_q, in_q.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 6); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_q(d_in_q, 6); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 6); + + gpu_out.device(gpu_device) = gpu_in_x.zeta(gpu_in_q); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + VERIFY_IS_EQUAL(out(0), expected_out(0)); + VERIFY_IS_APPROX_OR_LESS_THAN(out(3), expected_out(3)); + + for (int i = 1; i < 6; ++i) { + if (i != 3) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + } +} + +template <typename Scalar> +void test_cuda_polygamma() +{ + Tensor<Scalar, 1> in_x(7); + Tensor<Scalar, 1> in_n(7); + Tensor<Scalar, 1> out(7); + Tensor<Scalar, 1> expected_out(7); + out.setZero(); + + in_n(0) = Scalar(1); + in_n(1) = Scalar(1); + in_n(2) = Scalar(1); + in_n(3) = Scalar(17); + in_n(4) = Scalar(31); + in_n(5) = Scalar(28); + in_n(6) = Scalar(8); + + in_x(0) = Scalar(2); + in_x(1) = Scalar(3); + in_x(2) = Scalar(25.5); + in_x(3) = Scalar(4.7); + in_x(4) = Scalar(11.8); + in_x(5) = Scalar(17.7); + in_x(6) = Scalar(30.2); + + expected_out(0) = Scalar(0.644934066848); + expected_out(1) = Scalar(0.394934066848); + expected_out(2) = Scalar(0.0399946696496); + expected_out(3) = Scalar(293.334565435); + expected_out(4) = Scalar(0.445487887616); + expected_out(5) = Scalar(-2.47810300902e-07); + expected_out(6) = Scalar(-8.29668781082e-09); + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in_x; + Scalar* d_in_n; + Scalar* d_out; + cudaMalloc((void**)(&d_in_x), bytes); + cudaMalloc((void**)(&d_in_n), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in_n, in_n.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 7); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_n(d_in_n, 7); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 7); + + gpu_out.device(gpu_device) = gpu_in_n.polygamma(gpu_in_x); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 7; ++i) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } +} + +template <typename Scalar> void test_cuda_igamma() { Tensor<Scalar, 2> a(6, 6); @@ -841,6 +1004,7 @@ void test_cxx11_tensor_cuda() { CALL_SUBTEST_1(test_cuda_elementwise_small()); CALL_SUBTEST_1(test_cuda_elementwise()); + CALL_SUBTEST_1(test_cuda_props()); CALL_SUBTEST_1(test_cuda_reduction()); CALL_SUBTEST_2(test_cuda_contraction<ColMajor>()); CALL_SUBTEST_2(test_cuda_contraction<RowMajor>()); @@ -862,8 +1026,10 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST_4(test_cuda_lgamma<float>(0.01f)); CALL_SUBTEST_4(test_cuda_lgamma<float>(0.001f)); - CALL_SUBTEST_4(test_cuda_digamma<float>()); - + CALL_SUBTEST_4(test_cuda_lgamma<double>(1.0)); + CALL_SUBTEST_4(test_cuda_lgamma<double>(100.0)); + CALL_SUBTEST_4(test_cuda_lgamma<double>(0.01)); + CALL_SUBTEST_4(test_cuda_lgamma<double>(0.001)); CALL_SUBTEST_4(test_cuda_erf<float>(1.0f)); CALL_SUBTEST_4(test_cuda_erf<float>(100.0f)); @@ -876,13 +1042,6 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST_4(test_cuda_erfc<float>(0.01f)); CALL_SUBTEST_4(test_cuda_erfc<float>(0.001f)); - CALL_SUBTEST_4(test_cuda_lgamma<double>(1.0)); - CALL_SUBTEST_4(test_cuda_lgamma<double>(100.0)); - CALL_SUBTEST_4(test_cuda_lgamma<double>(0.01)); - CALL_SUBTEST_4(test_cuda_lgamma<double>(0.001)); - - CALL_SUBTEST_4(test_cuda_digamma<double>()); - CALL_SUBTEST_4(test_cuda_erf<double>(1.0)); CALL_SUBTEST_4(test_cuda_erf<double>(100.0)); CALL_SUBTEST_4(test_cuda_erf<double>(0.01)); @@ -894,6 +1053,15 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST_4(test_cuda_erfc<double>(0.01)); CALL_SUBTEST_4(test_cuda_erfc<double>(0.001)); + CALL_SUBTEST_5(test_cuda_digamma<float>()); + CALL_SUBTEST_5(test_cuda_digamma<double>()); + + CALL_SUBTEST_5(test_cuda_polygamma<float>()); + CALL_SUBTEST_5(test_cuda_polygamma<double>()); + + CALL_SUBTEST_5(test_cuda_zeta<float>()); + CALL_SUBTEST_5(test_cuda_zeta<double>()); + CALL_SUBTEST_5(test_cuda_igamma<float>()); CALL_SUBTEST_5(test_cuda_igammac<float>()); |