aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2016-04-09 12:47:24 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2016-04-09 12:47:24 -0700
commit4a92b590a01f5e28486f428f2ba56d5aaddd6931 (patch)
tree52663abf7b17f2e28f829c97dca0e0ce38fd5bf8 /unsupported
parentee6c69733aeb06942cabf3bccd12715ef0e43ecf (diff)
parent0b81a18d129d638f1c95e55f4fe4c958471a79d2 (diff)
Merge.
Diffstat (limited to 'unsupported')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBase.h59
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h1
-rwxr-xr-xunsupported/Eigen/src/AutoDiff/AutoDiffScalar.h13
-rw-r--r--unsupported/Eigen/src/Splines/SplineFitting.h4
-rw-r--r--unsupported/test/CMakeLists.txt3
-rw-r--r--unsupported/test/autodiff.cpp2
-rw-r--r--unsupported/test/autodiff_scalar.cpp4
-rw-r--r--unsupported/test/cxx11_float16.cpp155
-rw-r--r--unsupported/test/cxx11_tensor_cuda.cu186
9 files changed, 394 insertions, 33 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 6ee9c88b9..69d1802d5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -133,6 +133,34 @@ class TensorBase<Derived, ReadOnlyAccessors>
return unaryExpr(internal::scalar_digamma_op<Scalar>());
}
+ // igamma(a = this, x = other)
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const TensorCwiseBinaryOp<internal::scalar_igamma_op<Scalar>, const Derived, const OtherDerived>
+ igamma(const OtherDerived& other) const {
+ return binaryExpr(other.derived(), internal::scalar_igamma_op<Scalar>());
+ }
+
+ // igammac(a = this, x = other)
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const TensorCwiseBinaryOp<internal::scalar_igammac_op<Scalar>, const Derived, const OtherDerived>
+ igammac(const OtherDerived& other) const {
+ return binaryExpr(other.derived(), internal::scalar_igammac_op<Scalar>());
+ }
+
+ // zeta(x = this, q = other)
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const TensorCwiseBinaryOp<internal::scalar_zeta_op<Scalar>, const Derived, const OtherDerived>
+ zeta(const OtherDerived& other) const {
+ return binaryExpr(other.derived(), internal::scalar_zeta_op<Scalar>());
+ }
+
+ // polygamma(n = this, x = other)
+ template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const TensorCwiseBinaryOp<internal::scalar_polygamma_op<Scalar>, const Derived, const OtherDerived>
+ polygamma(const OtherDerived& other) const {
+ return binaryExpr(other.derived(), internal::scalar_polygamma_op<Scalar>());
+ }
+
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived>
erf() const {
@@ -340,20 +368,6 @@ class TensorBase<Derived, ReadOnlyAccessors>
return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>());
}
- // igamma(a = this, x = other)
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_igamma_op<Scalar>, const Derived, const OtherDerived>
- igamma(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_igamma_op<Scalar>());
- }
-
- // igammac(a = this, x = other)
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_igammac_op<Scalar>, const Derived, const OtherDerived>
- igammac(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_igammac_op<Scalar>());
- }
-
// comparisons and tests for Scalars
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
@@ -386,6 +400,23 @@ class TensorBase<Derived, ReadOnlyAccessors>
return operator!=(constant(threshold));
}
+ // Checks
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isnan_op<Scalar>, const Derived>
+ (isnan)() const {
+ return unaryExpr(internal::scalar_isnan_op<Scalar>());
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isinf_op<Scalar>, const Derived>
+ (isinf)() const {
+ return unaryExpr(internal::scalar_isinf_op<Scalar>());
+ }
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isfinite_op<Scalar>, const Derived>
+ (isfinite)() const {
+ return unaryExpr(internal::scalar_isfinite_op<Scalar>());
+ }
+
// Coefficient-wise ternary operators.
template<typename ThenDerived, typename ElseDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorSelectOp<const Derived, const ThenDerived, const ElseDerived>
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
index f2dee3ee8..a96776a77 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
@@ -87,6 +87,7 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
PacketConverter(const TensorEvaluator& impl)
: m_impl(impl) {}
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index e30ad5b6d..481dfa91a 100755
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -589,23 +589,24 @@ EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(log,
return ReturnType(log(x.value()),x.derivatives() * (Scalar(1)/x.value()));)
template<typename DerType>
-inline const Eigen::AutoDiffScalar<Eigen::CwiseUnaryOp<Eigen::internal::scalar_multiple_op<typename Eigen::internal::traits<DerType>::Scalar>, const DerType> >
-pow(const Eigen::AutoDiffScalar<DerType>& x, typename Eigen::internal::traits<DerType>::Scalar y)
+inline const Eigen::AutoDiffScalar<Eigen::CwiseUnaryOp<Eigen::internal::scalar_multiple_op<typename internal::traits<typename internal::remove_all<DerType>::type>::Scalar>, const typename internal::remove_all<DerType>::type> >
+pow(const Eigen::AutoDiffScalar<DerType>& x, typename internal::traits<typename internal::remove_all<DerType>::type>::Scalar y)
{
using namespace Eigen;
- typedef typename Eigen::internal::traits<DerType>::Scalar Scalar;
- return AutoDiffScalar<CwiseUnaryOp<Eigen::internal::scalar_multiple_op<Scalar>, const DerType> >(
+ typedef typename internal::remove_all<DerType>::type DerTypeCleaned;
+ typedef typename Eigen::internal::traits<DerTypeCleaned>::Scalar Scalar;
+ return AutoDiffScalar<CwiseUnaryOp<Eigen::internal::scalar_multiple_op<Scalar>, const DerTypeCleaned> >(
std::pow(x.value(),y),
x.derivatives() * (y * std::pow(x.value(),y-1)));
}
template<typename DerTypeA,typename DerTypeB>
-inline const AutoDiffScalar<Matrix<typename internal::traits<DerTypeA>::Scalar,Dynamic,1> >
+inline const AutoDiffScalar<Matrix<typename internal::traits<typename internal::remove_all<DerTypeA>::type>::Scalar,Dynamic,1> >
atan2(const AutoDiffScalar<DerTypeA>& a, const AutoDiffScalar<DerTypeB>& b)
{
using std::atan2;
- typedef typename internal::traits<DerTypeA>::Scalar Scalar;
+ typedef typename internal::traits<typename internal::remove_all<DerTypeA>::type>::Scalar Scalar;
typedef AutoDiffScalar<Matrix<Scalar,Dynamic,1> > PlainADS;
PlainADS ret;
ret.value() = atan2(a.value(), b.value());
diff --git a/unsupported/Eigen/src/Splines/SplineFitting.h b/unsupported/Eigen/src/Splines/SplineFitting.h
index 8e6a5aaed..c761a9b3d 100644
--- a/unsupported/Eigen/src/Splines/SplineFitting.h
+++ b/unsupported/Eigen/src/Splines/SplineFitting.h
@@ -130,12 +130,12 @@ namespace Eigen
ParameterVectorType temporaryParameters(numParameters + 1);
KnotVectorType derivativeKnots(numInternalDerivatives);
- for (unsigned int i = 0; i < numAverageKnots - 1; ++i)
+ for (DenseIndex i = 0; i < numAverageKnots - 1; ++i)
{
temporaryParameters[0] = averageKnots[i];
ParameterVectorType parameterIndices(numParameters);
int temporaryParameterIndex = 1;
- for (int j = 0; j < numParameters; ++j)
+ for (DenseIndex j = 0; j < numParameters; ++j)
{
Scalar parameter = parameters[j];
if (parameter >= averageKnots[i] && parameter < averageKnots[i + 1])
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 6bd8cfb92..c6a92fe73 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -115,6 +115,7 @@ if(EIGEN_TEST_CXX11)
# older compiler that don't support cxx11.
set(CMAKE_CXX_STANDARD 11)
+ ei_add_test(cxx11_float16)
ei_add_test(cxx11_meta)
ei_add_test(cxx11_tensor_simple)
# ei_add_test(cxx11_tensor_symmetry)
@@ -175,7 +176,7 @@ endif()
# These tests needs nvcc
find_package(CUDA 7.0)
-if(CUDA_FOUND AND EIGEN_TEST_NVCC)
+if(CUDA_FOUND AND EIGEN_TEST_CUDA)
# Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
# and -fno-check-new flags since they trigger thousands of compilation warnings
# in the CUDA runtime
diff --git a/unsupported/test/autodiff.cpp b/unsupported/test/autodiff.cpp
index 1aa1b3d2d..374f86df9 100644
--- a/unsupported/test/autodiff.cpp
+++ b/unsupported/test/autodiff.cpp
@@ -16,7 +16,7 @@ EIGEN_DONT_INLINE Scalar foo(const Scalar& x, const Scalar& y)
using namespace std;
// return x+std::sin(y);
EIGEN_ASM_COMMENT("mybegin");
- return static_cast<Scalar>(x*2 - pow(x,2) + 2*sqrt(y*y) - 4 * sin(x) + 2 * cos(y) - exp(-0.5*x*x));
+ return static_cast<Scalar>(x*2 - 1 + pow(1+x,2) + 2*sqrt(y*y+0) - 4 * sin(0+x) + 2 * cos(y+0) - exp(-0.5*x*x+0));
//return x+2*y*x;//x*2 -std::pow(x,2);//(2*y/x);// - y*2;
EIGEN_ASM_COMMENT("myend");
}
diff --git a/unsupported/test/autodiff_scalar.cpp b/unsupported/test/autodiff_scalar.cpp
index ba4b5aec4..c631c734a 100644
--- a/unsupported/test/autodiff_scalar.cpp
+++ b/unsupported/test/autodiff_scalar.cpp
@@ -30,6 +30,10 @@ template<typename Scalar> void check_atan2()
VERIFY_IS_APPROX(res.value(), x.value());
VERIFY_IS_APPROX(res.derivatives(), x.derivatives());
+
+ res = atan2(r*s+0, r*c+0);
+ VERIFY_IS_APPROX(res.value(), x.value());
+ VERIFY_IS_APPROX(res.derivatives(), x.derivatives());
}
diff --git a/unsupported/test/cxx11_float16.cpp b/unsupported/test/cxx11_float16.cpp
new file mode 100644
index 000000000..2dc0872d8
--- /dev/null
+++ b/unsupported/test/cxx11_float16.cpp
@@ -0,0 +1,155 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_float16
+
+#include "main.h"
+#include <Eigen/src/Core/arch/CUDA/Half.h>
+
+using Eigen::half;
+
+void test_conversion()
+{
+ // Conversion from float.
+ VERIFY_IS_EQUAL(half(1.0f).x, 0x3c00);
+ VERIFY_IS_EQUAL(half(0.5f).x, 0x3800);
+ VERIFY_IS_EQUAL(half(0.33333f).x, 0x3555);
+ VERIFY_IS_EQUAL(half(0.0f).x, 0x0000);
+ VERIFY_IS_EQUAL(half(-0.0f).x, 0x8000);
+ VERIFY_IS_EQUAL(half(65504.0f).x, 0x7bff);
+ VERIFY_IS_EQUAL(half(65536.0f).x, 0x7c00); // Becomes infinity.
+
+ // Denormals.
+ VERIFY_IS_EQUAL(half(-5.96046e-08f).x, 0x8001);
+ VERIFY_IS_EQUAL(half(5.96046e-08f).x, 0x0001);
+ VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002);
+
+ // Verify round-to-nearest-even behavior.
+ float val1 = float(half(__half{0x3c00}));
+ float val2 = float(half(__half{0x3c01}));
+ float val3 = float(half(__half{0x3c02}));
+ VERIFY_IS_EQUAL(half(0.5 * (val1 + val2)).x, 0x3c00);
+ VERIFY_IS_EQUAL(half(0.5 * (val2 + val3)).x, 0x3c02);
+
+ // Conversion from int.
+ VERIFY_IS_EQUAL(half(-1).x, 0xbc00);
+ VERIFY_IS_EQUAL(half(0).x, 0x0000);
+ VERIFY_IS_EQUAL(half(1).x, 0x3c00);
+ VERIFY_IS_EQUAL(half(2).x, 0x4000);
+ VERIFY_IS_EQUAL(half(3).x, 0x4200);
+
+ // Conversion from bool.
+ VERIFY_IS_EQUAL(half(false).x, 0x0000);
+ VERIFY_IS_EQUAL(half(true).x, 0x3c00);
+
+ // Conversion to float.
+ VERIFY_IS_EQUAL(float(half(__half{0x0000})), 0.0f);
+ VERIFY_IS_EQUAL(float(half(__half{0x3c00})), 1.0f);
+
+ // Denormals.
+ VERIFY_IS_APPROX(float(half(__half{0x8001})), -5.96046e-08f);
+ VERIFY_IS_APPROX(float(half(__half{0x0001})), 5.96046e-08f);
+ VERIFY_IS_APPROX(float(half(__half{0x0002})), 1.19209e-07f);
+
+ // NaNs and infinities.
+ VERIFY(!(numext::isinf)(float(half(65504.0f)))); // Largest finite number.
+ VERIFY(!(numext::isnan)(float(half(0.0f))));
+ VERIFY((numext::isinf)(float(half(__half{0xfc00}))));
+ VERIFY((numext::isnan)(float(half(__half{0xfc01}))));
+ VERIFY((numext::isinf)(float(half(__half{0x7c00}))));
+ VERIFY((numext::isnan)(float(half(__half{0x7c01}))));
+
+#if !EIGEN_COMP_MSVC
+ // Visual Studio errors out on divisions by 0
+ VERIFY((numext::isnan)(float(half(0.0 / 0.0))));
+ VERIFY((numext::isinf)(float(half(1.0 / 0.0))));
+ VERIFY((numext::isinf)(float(half(-1.0 / 0.0))));
+#endif
+
+ // Exactly same checks as above, just directly on the half representation.
+ VERIFY(!(numext::isinf)(half(__half{0x7bff})));
+ VERIFY(!(numext::isnan)(half(__half{0x0000})));
+ VERIFY((numext::isinf)(half(__half{0xfc00})));
+ VERIFY((numext::isnan)(half(__half{0xfc01})));
+ VERIFY((numext::isinf)(half(__half{0x7c00})));
+ VERIFY((numext::isnan)(half(__half{0x7c01})));
+
+#if !EIGEN_COMP_MSVC
+ // Visual Studio errors out on divisions by 0
+ VERIFY((numext::isnan)(half(0.0 / 0.0)));
+ VERIFY((numext::isinf)(half(1.0 / 0.0)));
+ VERIFY((numext::isinf)(half(-1.0 / 0.0)));
+#endif
+}
+
+void test_arithmetic()
+{
+ VERIFY_IS_EQUAL(float(half(2) + half(2)), 4);
+ VERIFY_IS_EQUAL(float(half(2) + half(-2)), 0);
+ VERIFY_IS_APPROX(float(half(0.33333f) + half(0.66667f)), 1.0f);
+ VERIFY_IS_EQUAL(float(half(2.0f) * half(-5.5f)), -11.0f);
+ VERIFY_IS_APPROX(float(half(1.0f) / half(3.0f)), 0.33333f);
+ VERIFY_IS_EQUAL(float(-half(4096.0f)), -4096.0f);
+ VERIFY_IS_EQUAL(float(-half(-4096.0f)), 4096.0f);
+}
+
+void test_comparison()
+{
+ VERIFY(half(1.0f) > half(0.5f));
+ VERIFY(half(0.5f) < half(1.0f));
+ VERIFY(!(half(1.0f) < half(0.5f)));
+ VERIFY(!(half(0.5f) > half(1.0f)));
+
+ VERIFY(!(half(4.0f) > half(4.0f)));
+ VERIFY(!(half(4.0f) < half(4.0f)));
+
+ VERIFY(!(half(0.0f) < half(-0.0f)));
+ VERIFY(!(half(-0.0f) < half(0.0f)));
+ VERIFY(!(half(0.0f) > half(-0.0f)));
+ VERIFY(!(half(-0.0f) > half(0.0f)));
+
+ VERIFY(half(0.2f) > half(-1.0f));
+ VERIFY(half(-1.0f) < half(0.2f));
+ VERIFY(half(-16.0f) < half(-15.0f));
+
+ VERIFY(half(1.0f) == half(1.0f));
+ VERIFY(half(1.0f) != half(2.0f));
+
+ // Comparisons with NaNs and infinities.
+ VERIFY(!(half(0.0 / 0.0) == half(0.0 / 0.0)));
+ VERIFY(half(0.0 / 0.0) != half(0.0 / 0.0));
+
+ VERIFY(!(half(1.0) == half(0.0 / 0.0)));
+ VERIFY(!(half(1.0) < half(0.0 / 0.0)));
+ VERIFY(!(half(1.0) > half(0.0 / 0.0)));
+ VERIFY(half(1.0) != half(0.0 / 0.0));
+
+ VERIFY(half(1.0) < half(1.0 / 0.0));
+ VERIFY(half(1.0) > half(-1.0 / 0.0));
+}
+
+void test_functions()
+{
+ VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f);
+ VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f);
+
+ VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f);
+ VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), float(20.0 + EIGEN_PI));
+
+ VERIFY_IS_EQUAL(float(numext::log(half(1.0f))), 0.0f);
+ VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f);
+}
+
+void test_cxx11_float16()
+{
+ CALL_SUBTEST(test_conversion());
+ CALL_SUBTEST(test_arithmetic());
+ CALL_SUBTEST(test_comparison());
+ CALL_SUBTEST(test_functions());
+}
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu
index 4d8465756..134359611 100644
--- a/unsupported/test/cxx11_tensor_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cuda.cu
@@ -122,6 +122,43 @@ void test_cuda_elementwise()
cudaFree(d_out);
}
+void test_cuda_props() {
+ Tensor<float, 1> in1(200);
+ Tensor<bool, 1> out(200);
+ in1.setRandom();
+
+ std::size_t in1_bytes = in1.size() * sizeof(float);
+ std::size_t out_bytes = out.size() * sizeof(bool);
+
+ float* d_in1;
+ bool* d_out;
+ cudaMalloc((void**)(&d_in1), in1_bytes);
+ cudaMalloc((void**)(&d_out), out_bytes);
+
+ cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in1(
+ d_in1, 200);
+ Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_out(
+ d_out, 200);
+
+ gpu_out.device(gpu_device) = (gpu_in1.isnan)();
+
+ assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 200; ++i) {
+ VERIFY_IS_EQUAL(out(i), (std::isnan)(in1(i)));
+ }
+
+ cudaFree(d_in1);
+ cudaFree(d_out);
+}
+
void test_cuda_reduction()
{
Tensor<float, 4> in1(72,53,97,113);
@@ -627,6 +664,132 @@ void test_cuda_digamma()
}
template <typename Scalar>
+void test_cuda_zeta()
+{
+ Tensor<Scalar, 1> in_x(6);
+ Tensor<Scalar, 1> in_q(6);
+ Tensor<Scalar, 1> out(6);
+ Tensor<Scalar, 1> expected_out(6);
+ out.setZero();
+
+ in_x(0) = Scalar(1);
+ in_x(1) = Scalar(1.5);
+ in_x(2) = Scalar(4);
+ in_x(3) = Scalar(-10.5);
+ in_x(4) = Scalar(10000.5);
+ in_x(5) = Scalar(3);
+
+ in_q(0) = Scalar(1.2345);
+ in_q(1) = Scalar(2);
+ in_q(2) = Scalar(1.5);
+ in_q(3) = Scalar(3);
+ in_q(4) = Scalar(1.0001);
+ in_q(5) = Scalar(-2.5);
+
+ expected_out(0) = std::numeric_limits<Scalar>::infinity();
+ expected_out(1) = Scalar(1.61237534869);
+ expected_out(2) = Scalar(0.234848505667);
+ expected_out(3) = Scalar(1.03086757337e-5);
+ expected_out(4) = Scalar(0.367879440865);
+ expected_out(5) = Scalar(0.054102025820864097);
+
+ std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+ Scalar* d_in_x;
+ Scalar* d_in_q;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in_x), bytes);
+ cudaMalloc((void**)(&d_in_q), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_in_q, in_q.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 6);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_q(d_in_q, 6);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 6);
+
+ gpu_out.device(gpu_device) = gpu_in_x.zeta(gpu_in_q);
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ VERIFY_IS_EQUAL(out(0), expected_out(0));
+ VERIFY_IS_APPROX_OR_LESS_THAN(out(3), expected_out(3));
+
+ for (int i = 1; i < 6; ++i) {
+ if (i != 3) {
+ VERIFY_IS_APPROX(out(i), expected_out(i));
+ }
+ }
+}
+
+template <typename Scalar>
+void test_cuda_polygamma()
+{
+ Tensor<Scalar, 1> in_x(7);
+ Tensor<Scalar, 1> in_n(7);
+ Tensor<Scalar, 1> out(7);
+ Tensor<Scalar, 1> expected_out(7);
+ out.setZero();
+
+ in_n(0) = Scalar(1);
+ in_n(1) = Scalar(1);
+ in_n(2) = Scalar(1);
+ in_n(3) = Scalar(17);
+ in_n(4) = Scalar(31);
+ in_n(5) = Scalar(28);
+ in_n(6) = Scalar(8);
+
+ in_x(0) = Scalar(2);
+ in_x(1) = Scalar(3);
+ in_x(2) = Scalar(25.5);
+ in_x(3) = Scalar(4.7);
+ in_x(4) = Scalar(11.8);
+ in_x(5) = Scalar(17.7);
+ in_x(6) = Scalar(30.2);
+
+ expected_out(0) = Scalar(0.644934066848);
+ expected_out(1) = Scalar(0.394934066848);
+ expected_out(2) = Scalar(0.0399946696496);
+ expected_out(3) = Scalar(293.334565435);
+ expected_out(4) = Scalar(0.445487887616);
+ expected_out(5) = Scalar(-2.47810300902e-07);
+ expected_out(6) = Scalar(-8.29668781082e-09);
+
+ std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+ Scalar* d_in_x;
+ Scalar* d_in_n;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in_x), bytes);
+ cudaMalloc((void**)(&d_in_n), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_in_n, in_n.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 7);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_n(d_in_n, 7);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 7);
+
+ gpu_out.device(gpu_device) = gpu_in_n.polygamma(gpu_in_x);
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 7; ++i) {
+ VERIFY_IS_APPROX(out(i), expected_out(i));
+ }
+}
+
+template <typename Scalar>
void test_cuda_igamma()
{
Tensor<Scalar, 2> a(6, 6);
@@ -841,6 +1004,7 @@ void test_cxx11_tensor_cuda()
{
CALL_SUBTEST_1(test_cuda_elementwise_small());
CALL_SUBTEST_1(test_cuda_elementwise());
+ CALL_SUBTEST_1(test_cuda_props());
CALL_SUBTEST_1(test_cuda_reduction());
CALL_SUBTEST_2(test_cuda_contraction<ColMajor>());
CALL_SUBTEST_2(test_cuda_contraction<RowMajor>());
@@ -862,8 +1026,10 @@ void test_cxx11_tensor_cuda()
CALL_SUBTEST_4(test_cuda_lgamma<float>(0.01f));
CALL_SUBTEST_4(test_cuda_lgamma<float>(0.001f));
- CALL_SUBTEST_4(test_cuda_digamma<float>());
-
+ CALL_SUBTEST_4(test_cuda_lgamma<double>(1.0));
+ CALL_SUBTEST_4(test_cuda_lgamma<double>(100.0));
+ CALL_SUBTEST_4(test_cuda_lgamma<double>(0.01));
+ CALL_SUBTEST_4(test_cuda_lgamma<double>(0.001));
CALL_SUBTEST_4(test_cuda_erf<float>(1.0f));
CALL_SUBTEST_4(test_cuda_erf<float>(100.0f));
@@ -876,13 +1042,6 @@ void test_cxx11_tensor_cuda()
CALL_SUBTEST_4(test_cuda_erfc<float>(0.01f));
CALL_SUBTEST_4(test_cuda_erfc<float>(0.001f));
- CALL_SUBTEST_4(test_cuda_lgamma<double>(1.0));
- CALL_SUBTEST_4(test_cuda_lgamma<double>(100.0));
- CALL_SUBTEST_4(test_cuda_lgamma<double>(0.01));
- CALL_SUBTEST_4(test_cuda_lgamma<double>(0.001));
-
- CALL_SUBTEST_4(test_cuda_digamma<double>());
-
CALL_SUBTEST_4(test_cuda_erf<double>(1.0));
CALL_SUBTEST_4(test_cuda_erf<double>(100.0));
CALL_SUBTEST_4(test_cuda_erf<double>(0.01));
@@ -894,6 +1053,15 @@ void test_cxx11_tensor_cuda()
CALL_SUBTEST_4(test_cuda_erfc<double>(0.01));
CALL_SUBTEST_4(test_cuda_erfc<double>(0.001));
+ CALL_SUBTEST_5(test_cuda_digamma<float>());
+ CALL_SUBTEST_5(test_cuda_digamma<double>());
+
+ CALL_SUBTEST_5(test_cuda_polygamma<float>());
+ CALL_SUBTEST_5(test_cuda_polygamma<double>());
+
+ CALL_SUBTEST_5(test_cuda_zeta<float>());
+ CALL_SUBTEST_5(test_cuda_zeta<double>());
+
CALL_SUBTEST_5(test_cuda_igamma<float>());
CALL_SUBTEST_5(test_cuda_igammac<float>());