aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/test
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/test')
-rw-r--r--unsupported/test/CMakeLists.txt167
-rw-r--r--unsupported/test/NonLinearOptimization.cpp16
-rw-r--r--unsupported/test/autodiff.cpp2
-rw-r--r--unsupported/test/autodiff_scalar.cpp4
-rw-r--r--unsupported/test/cxx11_float16.cpp155
-rw-r--r--unsupported/test/cxx11_meta.cpp23
-rw-r--r--unsupported/test/cxx11_tensor_argmax_cuda.cu (renamed from unsupported/test/cxx11_tensor_argmax_cuda.cpp)24
-rw-r--r--unsupported/test/cxx11_tensor_assign.cpp12
-rw-r--r--unsupported/test/cxx11_tensor_cast_float16_cuda.cu80
-rw-r--r--unsupported/test/cxx11_tensor_casts.cpp4
-rw-r--r--unsupported/test/cxx11_tensor_contract_cuda.cu (renamed from unsupported/test/cxx11_tensor_contract_cuda.cpp)108
-rw-r--r--unsupported/test/cxx11_tensor_contraction.cpp54
-rw-r--r--unsupported/test/cxx11_tensor_cuda.cu (renamed from unsupported/test/cxx11_tensor_cuda.cpp)618
-rw-r--r--unsupported/test/cxx11_tensor_custom_op.cpp12
-rw-r--r--unsupported/test/cxx11_tensor_device.cu (renamed from unsupported/test/cxx11_tensor_device.cpp)22
-rw-r--r--unsupported/test/cxx11_tensor_empty.cpp40
-rw-r--r--unsupported/test/cxx11_tensor_fft.cpp48
-rw-r--r--unsupported/test/cxx11_tensor_notification.cpp81
-rw-r--r--unsupported/test/cxx11_tensor_of_complex.cpp24
-rw-r--r--unsupported/test/cxx11_tensor_of_float16_cuda.cu256
-rw-r--r--unsupported/test/cxx11_tensor_random.cpp2
-rw-r--r--unsupported/test/cxx11_tensor_random_cuda.cu (renamed from unsupported/test/cxx11_tensor_random_cuda.cpp)0
-rw-r--r--unsupported/test/cxx11_tensor_reduction.cpp1
-rw-r--r--unsupported/test/cxx11_tensor_reduction_cuda.cu (renamed from unsupported/test/cxx11_tensor_reduction_cuda.cpp)7
-rw-r--r--unsupported/test/cxx11_tensor_reverse.cpp16
-rw-r--r--unsupported/test/cxx11_tensor_roundings.cpp62
-rw-r--r--unsupported/test/cxx11_tensor_sugar.cpp25
-rw-r--r--unsupported/test/cxx11_tensor_thread_pool.cpp85
-rw-r--r--unsupported/test/cxx11_tensor_uint128.cpp50
-rw-r--r--unsupported/test/levenberg_marquardt.cpp28
-rw-r--r--unsupported/test/matrix_function.cpp4
-rw-r--r--unsupported/test/matrix_power.cpp2
-rw-r--r--unsupported/test/mpreal/mpreal.h10
-rw-r--r--unsupported/test/splines.cpp2
34 files changed, 1760 insertions, 284 deletions
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 937acc432..f75bf9798 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -1,3 +1,17 @@
+# generate split test header file only if it does not yet exist
+# in order to prevent a rebuild everytime cmake is configured
+if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h)
+ file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h "")
+ foreach(i RANGE 1 999)
+ file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h
+ "#ifdef EIGEN_TEST_PART_${i}\n"
+ "#define CALL_SUBTEST_${i}(FUNC) CALL_SUBTEST(FUNC)\n"
+ "#else\n"
+ "#define CALL_SUBTEST_${i}(FUNC)\n"
+ "#endif\n\n"
+ )
+ endforeach()
+endif()
set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Unsupported")
add_custom_target(BuildUnsupported)
@@ -99,61 +113,104 @@ ei_add_test(kronecker_product)
if(EIGEN_TEST_CXX11)
# It should be safe to always run these tests as there is some fallback code for
# older compiler that don't support cxx11.
- ei_add_test(cxx11_meta "-std=c++0x")
- ei_add_test(cxx11_tensor_simple "-std=c++0x")
-# ei_add_test(cxx11_tensor_symmetry "-std=c++0x")
- ei_add_test(cxx11_tensor_assign "-std=c++0x")
- ei_add_test(cxx11_tensor_dimension "-std=c++0x")
- ei_add_test(cxx11_tensor_index_list "-std=c++0x")
- ei_add_test(cxx11_tensor_mixed_indices "-std=c++0x")
- ei_add_test(cxx11_tensor_comparisons "-std=c++0x")
- ei_add_test(cxx11_tensor_contraction "-std=c++0x")
- ei_add_test(cxx11_tensor_convolution "-std=c++0x")
- ei_add_test(cxx11_tensor_expr "-std=c++0x")
- ei_add_test(cxx11_tensor_math "-std=c++0x")
- ei_add_test(cxx11_tensor_forced_eval "-std=c++0x")
- ei_add_test(cxx11_tensor_fixed_size "-std=c++0x")
- ei_add_test(cxx11_tensor_const "-std=c++0x")
- ei_add_test(cxx11_tensor_of_const_values "-std=c++0x")
- ei_add_test(cxx11_tensor_of_complex "-std=c++0x")
- ei_add_test(cxx11_tensor_of_strings "-std=c++0x")
- ei_add_test(cxx11_tensor_uint128 "-std=c++0x")
- ei_add_test(cxx11_tensor_intdiv "-std=c++0x")
- ei_add_test(cxx11_tensor_lvalue "-std=c++0x")
- ei_add_test(cxx11_tensor_map "-std=c++0x")
- ei_add_test(cxx11_tensor_broadcasting "-std=c++0x")
- ei_add_test(cxx11_tensor_chipping "-std=c++0x")
- ei_add_test(cxx11_tensor_concatenation "-std=c++0x")
- ei_add_test(cxx11_tensor_inflation "-std=c++0x")
- ei_add_test(cxx11_tensor_morphing "-std=c++0x")
- ei_add_test(cxx11_tensor_padding "-std=c++0x")
- ei_add_test(cxx11_tensor_patch "-std=c++0x")
- ei_add_test(cxx11_tensor_image_patch "-std=c++0x")
- ei_add_test(cxx11_tensor_volume_patch "-std=c++0x")
- ei_add_test(cxx11_tensor_reduction "-std=c++0x")
- ei_add_test(cxx11_tensor_argmax "-std=c++0x")
- ei_add_test(cxx11_tensor_shuffling "-std=c++0x")
- ei_add_test(cxx11_tensor_striding "-std=c++0x")
+ set(CMAKE_CXX_STANDARD 11)
+
+ ei_add_test(cxx11_float16)
+ ei_add_test(cxx11_meta)
+ ei_add_test(cxx11_tensor_simple)
+# ei_add_test(cxx11_tensor_symmetry)
+ ei_add_test(cxx11_tensor_assign)
+ ei_add_test(cxx11_tensor_dimension)
+ ei_add_test(cxx11_tensor_index_list)
+ ei_add_test(cxx11_tensor_mixed_indices)
+ ei_add_test(cxx11_tensor_comparisons)
+ ei_add_test(cxx11_tensor_contraction)
+ ei_add_test(cxx11_tensor_convolution)
+ ei_add_test(cxx11_tensor_expr)
+ ei_add_test(cxx11_tensor_math)
+ ei_add_test(cxx11_tensor_forced_eval)
+ ei_add_test(cxx11_tensor_fixed_size)
+ ei_add_test(cxx11_tensor_const)
+ ei_add_test(cxx11_tensor_of_const_values)
+ ei_add_test(cxx11_tensor_of_complex)
+ ei_add_test(cxx11_tensor_of_strings)
+ ei_add_test(cxx11_tensor_intdiv)
+ ei_add_test(cxx11_tensor_lvalue)
+ ei_add_test(cxx11_tensor_map)
+ ei_add_test(cxx11_tensor_broadcasting)
+ ei_add_test(cxx11_tensor_chipping)
+ ei_add_test(cxx11_tensor_concatenation)
+ ei_add_test(cxx11_tensor_inflation)
+ ei_add_test(cxx11_tensor_morphing)
+ ei_add_test(cxx11_tensor_padding)
+ ei_add_test(cxx11_tensor_patch)
+ ei_add_test(cxx11_tensor_image_patch)
+ ei_add_test(cxx11_tensor_volume_patch)
+ ei_add_test(cxx11_tensor_reduction)
+ ei_add_test(cxx11_tensor_argmax)
+ ei_add_test(cxx11_tensor_shuffling)
+ ei_add_test(cxx11_tensor_striding)
+ ei_add_test(cxx11_tensor_notification "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
- ei_add_test(cxx11_tensor_ref "-std=c++0x")
- ei_add_test(cxx11_tensor_random "-std=c++0x")
- ei_add_test(cxx11_tensor_casts "-std=c++0x")
- ei_add_test(cxx11_tensor_reverse "-std=c++0x")
- ei_add_test(cxx11_tensor_layout_swap "-std=c++0x")
- ei_add_test(cxx11_tensor_io "-std=c++0x")
- ei_add_test(cxx11_tensor_generator "-std=c++0x")
- ei_add_test(cxx11_tensor_custom_op "-std=c++0x")
- ei_add_test(cxx11_tensor_custom_index "-std=c++0x")
- ei_add_test(cxx11_tensor_sugar "-std=c++0x")
- ei_add_test(cxx11_tensor_fft "-std=c++0x")
- ei_add_test(cxx11_tensor_ifft "-std=c++0x")
-
- # These tests needs nvcc
-# ei_add_test(cxx11_tensor_device "-std=c++0x")
-# ei_add_test(cxx11_tensor_cuda "-std=c++0x")
-# ei_add_test(cxx11_tensor_contract_cuda "-std=c++0x")
-# ei_add_test(cxx11_tensor_reduction_cuda "-std=c++0x")
-# ei_add_test(cxx11_tensor_random_cuda "-std=c++0x")
-# ei_add_test(cxx11_tensor_argmax_cuda "-std=c++0x")
+ ei_add_test(cxx11_tensor_ref)
+ ei_add_test(cxx11_tensor_random)
+ ei_add_test(cxx11_tensor_casts)
+ ei_add_test(cxx11_tensor_roundings)
+ ei_add_test(cxx11_tensor_reverse)
+ ei_add_test(cxx11_tensor_layout_swap)
+ ei_add_test(cxx11_tensor_io)
+ ei_add_test(cxx11_tensor_generator)
+ ei_add_test(cxx11_tensor_custom_op)
+ ei_add_test(cxx11_tensor_custom_index)
+ ei_add_test(cxx11_tensor_sugar)
+ ei_add_test(cxx11_tensor_fft)
+ ei_add_test(cxx11_tensor_ifft)
+ ei_add_test(cxx11_tensor_empty)
+
+ if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
+ # This test requires __uint128_t which is only available on 64bit systems
+ ei_add_test(cxx11_tensor_uint128)
+ endif()
+
+endif()
+
+# These tests needs nvcc
+find_package(CUDA 7.0)
+if(CUDA_FOUND AND EIGEN_TEST_CUDA)
+ # Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
+ # and -fno-check-new flags since they trigger thousands of compilation warnings
+ # in the CUDA runtime
+ string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+ string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+ string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+ string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+
+ message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS})
+
+ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+ set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE)
+ endif()
+ if(EIGEN_TEST_CUDA_CLANG)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_${EIGEN_CUDA_COMPUTE_ARCH}")
+ endif()
+
+ set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\"")
+ cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
+ set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
+
+ ei_add_test(cxx11_tensor_device)
+ ei_add_test(cxx11_tensor_cuda)
+ ei_add_test(cxx11_tensor_contract_cuda)
+ ei_add_test(cxx11_tensor_reduction_cuda)
+ ei_add_test(cxx11_tensor_argmax_cuda)
+ ei_add_test(cxx11_tensor_cast_float16_cuda)
+
+ # The random number generation code requires arch 3.5 or greater.
+ if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 34)
+ ei_add_test(cxx11_tensor_random_cuda)
+ endif()
+
+ ei_add_test(cxx11_tensor_of_float16_cuda)
+ unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
endif()
diff --git a/unsupported/test/NonLinearOptimization.cpp b/unsupported/test/NonLinearOptimization.cpp
index 724ea7b5b..6a5ed057f 100644
--- a/unsupported/test/NonLinearOptimization.cpp
+++ b/unsupported/test/NonLinearOptimization.cpp
@@ -14,6 +14,9 @@
using std::sqrt;
+// tolerance for chekcing number of iterations
+#define LM_EVAL_COUNT_TOL 4/3
+
int fcn_chkder(const VectorXd &x, VectorXd &fvec, MatrixXd &fjac, int iflag)
{
/* subroutine fcn for chkder example. */
@@ -1023,7 +1026,8 @@ void testNistLanczos1(void)
VERIFY_IS_EQUAL(lm.njev, 72);
// check norm^2
std::cout.precision(30);
- VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4290986055242372e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats
+ std::cout << lm.fvec.squaredNorm() << "\n";
+ VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
// check x
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
@@ -1044,7 +1048,7 @@ void testNistLanczos1(void)
VERIFY_IS_EQUAL(lm.nfev, 9);
VERIFY_IS_EQUAL(lm.njev, 8);
// check norm^2
- VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.430571737783119393e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats
+ VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
// check x
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
@@ -1354,8 +1358,12 @@ void testNistMGH17(void)
// check return value
VERIFY_IS_EQUAL(info, 2);
- VERIFY(lm.nfev < 650); // 602
- VERIFY(lm.njev < 600); // 545
+ ++g_test_level;
+ VERIFY_IS_EQUAL(lm.nfev, 602); // 602
+ VERIFY_IS_EQUAL(lm.njev, 545); // 545
+ --g_test_level;
+ VERIFY(lm.nfev < 602 * LM_EVAL_COUNT_TOL);
+ VERIFY(lm.njev < 545 * LM_EVAL_COUNT_TOL);
/*
* Second try
diff --git a/unsupported/test/autodiff.cpp b/unsupported/test/autodiff.cpp
index 1aa1b3d2d..374f86df9 100644
--- a/unsupported/test/autodiff.cpp
+++ b/unsupported/test/autodiff.cpp
@@ -16,7 +16,7 @@ EIGEN_DONT_INLINE Scalar foo(const Scalar& x, const Scalar& y)
using namespace std;
// return x+std::sin(y);
EIGEN_ASM_COMMENT("mybegin");
- return static_cast<Scalar>(x*2 - pow(x,2) + 2*sqrt(y*y) - 4 * sin(x) + 2 * cos(y) - exp(-0.5*x*x));
+ return static_cast<Scalar>(x*2 - 1 + pow(1+x,2) + 2*sqrt(y*y+0) - 4 * sin(0+x) + 2 * cos(y+0) - exp(-0.5*x*x+0));
//return x+2*y*x;//x*2 -std::pow(x,2);//(2*y/x);// - y*2;
EIGEN_ASM_COMMENT("myend");
}
diff --git a/unsupported/test/autodiff_scalar.cpp b/unsupported/test/autodiff_scalar.cpp
index ba4b5aec4..c631c734a 100644
--- a/unsupported/test/autodiff_scalar.cpp
+++ b/unsupported/test/autodiff_scalar.cpp
@@ -30,6 +30,10 @@ template<typename Scalar> void check_atan2()
VERIFY_IS_APPROX(res.value(), x.value());
VERIFY_IS_APPROX(res.derivatives(), x.derivatives());
+
+ res = atan2(r*s+0, r*c+0);
+ VERIFY_IS_APPROX(res.value(), x.value());
+ VERIFY_IS_APPROX(res.derivatives(), x.derivatives());
}
diff --git a/unsupported/test/cxx11_float16.cpp b/unsupported/test/cxx11_float16.cpp
new file mode 100644
index 000000000..2dc0872d8
--- /dev/null
+++ b/unsupported/test/cxx11_float16.cpp
@@ -0,0 +1,155 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_float16
+
+#include "main.h"
+#include <Eigen/src/Core/arch/CUDA/Half.h>
+
+using Eigen::half;
+
+void test_conversion()
+{
+ // Conversion from float.
+ VERIFY_IS_EQUAL(half(1.0f).x, 0x3c00);
+ VERIFY_IS_EQUAL(half(0.5f).x, 0x3800);
+ VERIFY_IS_EQUAL(half(0.33333f).x, 0x3555);
+ VERIFY_IS_EQUAL(half(0.0f).x, 0x0000);
+ VERIFY_IS_EQUAL(half(-0.0f).x, 0x8000);
+ VERIFY_IS_EQUAL(half(65504.0f).x, 0x7bff);
+ VERIFY_IS_EQUAL(half(65536.0f).x, 0x7c00); // Becomes infinity.
+
+ // Denormals.
+ VERIFY_IS_EQUAL(half(-5.96046e-08f).x, 0x8001);
+ VERIFY_IS_EQUAL(half(5.96046e-08f).x, 0x0001);
+ VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002);
+
+ // Verify round-to-nearest-even behavior.
+ float val1 = float(half(__half{0x3c00}));
+ float val2 = float(half(__half{0x3c01}));
+ float val3 = float(half(__half{0x3c02}));
+ VERIFY_IS_EQUAL(half(0.5 * (val1 + val2)).x, 0x3c00);
+ VERIFY_IS_EQUAL(half(0.5 * (val2 + val3)).x, 0x3c02);
+
+ // Conversion from int.
+ VERIFY_IS_EQUAL(half(-1).x, 0xbc00);
+ VERIFY_IS_EQUAL(half(0).x, 0x0000);
+ VERIFY_IS_EQUAL(half(1).x, 0x3c00);
+ VERIFY_IS_EQUAL(half(2).x, 0x4000);
+ VERIFY_IS_EQUAL(half(3).x, 0x4200);
+
+ // Conversion from bool.
+ VERIFY_IS_EQUAL(half(false).x, 0x0000);
+ VERIFY_IS_EQUAL(half(true).x, 0x3c00);
+
+ // Conversion to float.
+ VERIFY_IS_EQUAL(float(half(__half{0x0000})), 0.0f);
+ VERIFY_IS_EQUAL(float(half(__half{0x3c00})), 1.0f);
+
+ // Denormals.
+ VERIFY_IS_APPROX(float(half(__half{0x8001})), -5.96046e-08f);
+ VERIFY_IS_APPROX(float(half(__half{0x0001})), 5.96046e-08f);
+ VERIFY_IS_APPROX(float(half(__half{0x0002})), 1.19209e-07f);
+
+ // NaNs and infinities.
+ VERIFY(!(numext::isinf)(float(half(65504.0f)))); // Largest finite number.
+ VERIFY(!(numext::isnan)(float(half(0.0f))));
+ VERIFY((numext::isinf)(float(half(__half{0xfc00}))));
+ VERIFY((numext::isnan)(float(half(__half{0xfc01}))));
+ VERIFY((numext::isinf)(float(half(__half{0x7c00}))));
+ VERIFY((numext::isnan)(float(half(__half{0x7c01}))));
+
+#if !EIGEN_COMP_MSVC
+ // Visual Studio errors out on divisions by 0
+ VERIFY((numext::isnan)(float(half(0.0 / 0.0))));
+ VERIFY((numext::isinf)(float(half(1.0 / 0.0))));
+ VERIFY((numext::isinf)(float(half(-1.0 / 0.0))));
+#endif
+
+ // Exactly same checks as above, just directly on the half representation.
+ VERIFY(!(numext::isinf)(half(__half{0x7bff})));
+ VERIFY(!(numext::isnan)(half(__half{0x0000})));
+ VERIFY((numext::isinf)(half(__half{0xfc00})));
+ VERIFY((numext::isnan)(half(__half{0xfc01})));
+ VERIFY((numext::isinf)(half(__half{0x7c00})));
+ VERIFY((numext::isnan)(half(__half{0x7c01})));
+
+#if !EIGEN_COMP_MSVC
+ // Visual Studio errors out on divisions by 0
+ VERIFY((numext::isnan)(half(0.0 / 0.0)));
+ VERIFY((numext::isinf)(half(1.0 / 0.0)));
+ VERIFY((numext::isinf)(half(-1.0 / 0.0)));
+#endif
+}
+
+void test_arithmetic()
+{
+ VERIFY_IS_EQUAL(float(half(2) + half(2)), 4);
+ VERIFY_IS_EQUAL(float(half(2) + half(-2)), 0);
+ VERIFY_IS_APPROX(float(half(0.33333f) + half(0.66667f)), 1.0f);
+ VERIFY_IS_EQUAL(float(half(2.0f) * half(-5.5f)), -11.0f);
+ VERIFY_IS_APPROX(float(half(1.0f) / half(3.0f)), 0.33333f);
+ VERIFY_IS_EQUAL(float(-half(4096.0f)), -4096.0f);
+ VERIFY_IS_EQUAL(float(-half(-4096.0f)), 4096.0f);
+}
+
+void test_comparison()
+{
+ VERIFY(half(1.0f) > half(0.5f));
+ VERIFY(half(0.5f) < half(1.0f));
+ VERIFY(!(half(1.0f) < half(0.5f)));
+ VERIFY(!(half(0.5f) > half(1.0f)));
+
+ VERIFY(!(half(4.0f) > half(4.0f)));
+ VERIFY(!(half(4.0f) < half(4.0f)));
+
+ VERIFY(!(half(0.0f) < half(-0.0f)));
+ VERIFY(!(half(-0.0f) < half(0.0f)));
+ VERIFY(!(half(0.0f) > half(-0.0f)));
+ VERIFY(!(half(-0.0f) > half(0.0f)));
+
+ VERIFY(half(0.2f) > half(-1.0f));
+ VERIFY(half(-1.0f) < half(0.2f));
+ VERIFY(half(-16.0f) < half(-15.0f));
+
+ VERIFY(half(1.0f) == half(1.0f));
+ VERIFY(half(1.0f) != half(2.0f));
+
+ // Comparisons with NaNs and infinities.
+ VERIFY(!(half(0.0 / 0.0) == half(0.0 / 0.0)));
+ VERIFY(half(0.0 / 0.0) != half(0.0 / 0.0));
+
+ VERIFY(!(half(1.0) == half(0.0 / 0.0)));
+ VERIFY(!(half(1.0) < half(0.0 / 0.0)));
+ VERIFY(!(half(1.0) > half(0.0 / 0.0)));
+ VERIFY(half(1.0) != half(0.0 / 0.0));
+
+ VERIFY(half(1.0) < half(1.0 / 0.0));
+ VERIFY(half(1.0) > half(-1.0 / 0.0));
+}
+
+void test_functions()
+{
+ VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f);
+ VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f);
+
+ VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f);
+ VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), float(20.0 + EIGEN_PI));
+
+ VERIFY_IS_EQUAL(float(numext::log(half(1.0f))), 0.0f);
+ VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f);
+}
+
+void test_cxx11_float16()
+{
+ CALL_SUBTEST(test_conversion());
+ CALL_SUBTEST(test_arithmetic());
+ CALL_SUBTEST(test_comparison());
+ CALL_SUBTEST(test_functions());
+}
diff --git a/unsupported/test/cxx11_meta.cpp b/unsupported/test/cxx11_meta.cpp
index 4f45e1dd3..ecac3add1 100644
--- a/unsupported/test/cxx11_meta.cpp
+++ b/unsupported/test/cxx11_meta.cpp
@@ -9,6 +9,7 @@
#include "main.h"
+#include <array>
#include <Eigen/CXX11/Core>
using Eigen::internal::is_same;
@@ -249,8 +250,8 @@ static void test_is_same_gf()
{
VERIFY((!is_same_gf<dummy_a, dummy_b>::value));
VERIFY((!!is_same_gf<dummy_a, dummy_a>::value));
- VERIFY_IS_EQUAL((!!is_same_gf<dummy_a, dummy_b>::global_flags), 0);
- VERIFY_IS_EQUAL((!!is_same_gf<dummy_a, dummy_a>::global_flags), 0);
+ VERIFY_IS_EQUAL((!!is_same_gf<dummy_a, dummy_b>::global_flags), false);
+ VERIFY_IS_EQUAL((!!is_same_gf<dummy_a, dummy_a>::global_flags), false);
}
static void test_apply_op()
@@ -293,8 +294,8 @@ static void test_arg_reductions()
static void test_array_reverse_and_reduce()
{
- std::array<int, 6> a{{4, 8, 15, 16, 23, 42}};
- std::array<int, 6> b{{42, 23, 16, 15, 8, 4}};
+ array<int, 6> a{{4, 8, 15, 16, 23, 42}};
+ array<int, 6> b{{42, 23, 16, 15, 8, 4}};
// there is no operator<< for std::array, so VERIFY_IS_EQUAL will
// not compile
@@ -308,11 +309,11 @@ static void test_array_reverse_and_reduce()
static void test_array_zip_and_apply()
{
- std::array<int, 6> a{{4, 8, 15, 16, 23, 42}};
- std::array<int, 6> b{{0, 1, 2, 3, 4, 5}};
- std::array<int, 6> c{{4, 9, 17, 19, 27, 47}};
- std::array<int, 6> d{{0, 8, 30, 48, 92, 210}};
- std::array<int, 6> e{{0, 2, 4, 6, 8, 10}};
+ array<int, 6> a{{4, 8, 15, 16, 23, 42}};
+ array<int, 6> b{{0, 1, 2, 3, 4, 5}};
+ array<int, 6> c{{4, 9, 17, 19, 27, 47}};
+ array<int, 6> d{{0, 8, 30, 48, 92, 210}};
+ array<int, 6> e{{0, 2, 4, 6, 8, 10}};
VERIFY((array_zip<sum_op>(a, b) == c));
VERIFY((array_zip<product_op>(a, b) == d));
@@ -325,8 +326,8 @@ static void test_array_zip_and_apply()
static void test_array_misc()
{
- std::array<int, 3> a3{{1, 1, 1}};
- std::array<int, 6> a6{{2, 2, 2, 2, 2, 2}};
+ array<int, 3> a3{{1, 1, 1}};
+ array<int, 6> a6{{2, 2, 2, 2, 2, 2}};
VERIFY((repeat<3, int>(1) == a3));
VERIFY((repeat<6, int>(2) == a6));
diff --git a/unsupported/test/cxx11_tensor_argmax_cuda.cpp b/unsupported/test/cxx11_tensor_argmax_cuda.cu
index d37490d15..41ccbe974 100644
--- a/unsupported/test/cxx11_tensor_argmax_cuda.cpp
+++ b/unsupported/test/cxx11_tensor_argmax_cuda.cu
@@ -7,8 +7,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-// TODO(mdevin): Free the cuda memory.
+#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_FUNC cxx11_tensor_cuda
#define EIGEN_USE_GPU
@@ -56,6 +56,10 @@ void test_cuda_simple_argmax()
VERIFY_IS_EQUAL(out_max(Eigen::array<DenseIndex, 1>(0)), 72*53*97 - 1);
VERIFY_IS_EQUAL(out_min(Eigen::array<DenseIndex, 1>(0)), 0);
+
+ cudaFree(d_in);
+ cudaFree(d_out_max);
+ cudaFree(d_out_min);
}
template <int DataLayout>
@@ -141,6 +145,9 @@ void test_cuda_argmax_dim()
// Expect max to be in the last index of the reduced dimension
VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
}
+
+ cudaFree(d_in);
+ cudaFree(d_out);
}
}
@@ -227,15 +234,18 @@ void test_cuda_argmin_dim()
// Expect max to be in the last index of the reduced dimension
VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
}
+
+ cudaFree(d_in);
+ cudaFree(d_out);
}
}
void test_cxx11_tensor_cuda()
{
- CALL_SUBTEST(test_cuda_simple_argmax<RowMajor>());
- CALL_SUBTEST(test_cuda_simple_argmax<ColMajor>());
- CALL_SUBTEST(test_cuda_argmax_dim<RowMajor>());
- CALL_SUBTEST(test_cuda_argmax_dim<ColMajor>());
- CALL_SUBTEST(test_cuda_argmin_dim<RowMajor>());
- CALL_SUBTEST(test_cuda_argmin_dim<ColMajor>());
+ CALL_SUBTEST_1(test_cuda_simple_argmax<RowMajor>());
+ CALL_SUBTEST_1(test_cuda_simple_argmax<ColMajor>());
+ CALL_SUBTEST_2(test_cuda_argmax_dim<RowMajor>());
+ CALL_SUBTEST_2(test_cuda_argmax_dim<ColMajor>());
+ CALL_SUBTEST_3(test_cuda_argmin_dim<RowMajor>());
+ CALL_SUBTEST_3(test_cuda_argmin_dim<ColMajor>());
}
diff --git a/unsupported/test/cxx11_tensor_assign.cpp b/unsupported/test/cxx11_tensor_assign.cpp
index d16aaf847..e5cf61fe1 100644
--- a/unsupported/test/cxx11_tensor_assign.cpp
+++ b/unsupported/test/cxx11_tensor_assign.cpp
@@ -29,8 +29,8 @@ static void test_1d()
int row_major[6];
memset(col_major, 0, 6*sizeof(int));
memset(row_major, 0, 6*sizeof(int));
- TensorMap<Tensor<int, 1>> vec3(col_major, 6);
- TensorMap<Tensor<int, 1, RowMajor>> vec4(row_major, 6);
+ TensorMap<Tensor<int, 1> > vec3(col_major, 6);
+ TensorMap<Tensor<int, 1, RowMajor> > vec4(row_major, 6);
vec3 = vec1;
vec4 = vec2;
@@ -92,8 +92,8 @@ static void test_2d()
int row_major[6];
memset(col_major, 0, 6*sizeof(int));
memset(row_major, 0, 6*sizeof(int));
- TensorMap<Tensor<int, 2>> mat3(row_major, 2, 3);
- TensorMap<Tensor<int, 2, RowMajor>> mat4(col_major, 2, 3);
+ TensorMap<Tensor<int, 2> > mat3(row_major, 2, 3);
+ TensorMap<Tensor<int, 2, RowMajor> > mat4(col_major, 2, 3);
mat3 = mat1;
mat4 = mat2;
@@ -152,8 +152,8 @@ static void test_3d()
int row_major[2*3*7];
memset(col_major, 0, 2*3*7*sizeof(int));
memset(row_major, 0, 2*3*7*sizeof(int));
- TensorMap<Tensor<int, 3>> mat3(col_major, 2, 3, 7);
- TensorMap<Tensor<int, 3, RowMajor>> mat4(row_major, 2, 3, 7);
+ TensorMap<Tensor<int, 3> > mat3(col_major, 2, 3, 7);
+ TensorMap<Tensor<int, 3, RowMajor> > mat4(row_major, 2, 3, 7);
mat3 = mat1;
mat4 = mat2;
diff --git a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
new file mode 100644
index 000000000..f22b99de8
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
@@ -0,0 +1,80 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_cast_float16_cuda
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_GPU
+
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+void test_cuda_conversion() {
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+ int num_elem = 101;
+
+ Tensor<float, 1> floats(num_elem);
+ floats.setRandom();
+
+ float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ Eigen::half* d_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+ float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+ d_float, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_half(
+ d_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_conv(
+ d_conv, num_elem);
+
+ gpu_device.memcpyHostToDevice(d_float, floats.data(), num_elem*sizeof(float));
+
+ gpu_half.device(gpu_device) = gpu_float.cast<Eigen::half>();
+ gpu_conv.device(gpu_device) = gpu_half.cast<float>();
+
+ Tensor<float, 1> initial(num_elem);
+ Tensor<float, 1> final(num_elem);
+ gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float));
+ gpu_device.synchronize();
+
+ for (int i = 0; i < num_elem; ++i) {
+ VERIFY_IS_APPROX(initial(i), final(i));
+ }
+
+ gpu_device.deallocate(d_float);
+ gpu_device.deallocate(d_half);
+ gpu_device.deallocate(d_conv);
+}
+
+
+void test_fallback_conversion() {
+ int num_elem = 101;
+ Tensor<float, 1> floats(num_elem);
+ floats.setRandom();
+
+ Eigen::Tensor<Eigen::half, 1> halfs = floats.cast<Eigen::half>();
+ Eigen::Tensor<float, 1> conv = halfs.cast<float>();
+
+ for (int i = 0; i < num_elem; ++i) {
+ VERIFY_IS_APPROX(floats(i), conv(i));
+ }
+}
+
+
+void test_cxx11_tensor_cast_float16_cuda()
+{
+ CALL_SUBTEST(test_cuda_conversion());
+ CALL_SUBTEST(test_fallback_conversion());
+}
diff --git a/unsupported/test/cxx11_tensor_casts.cpp b/unsupported/test/cxx11_tensor_casts.cpp
index 729e43327..3c6d0d2ff 100644
--- a/unsupported/test/cxx11_tensor_casts.cpp
+++ b/unsupported/test/cxx11_tensor_casts.cpp
@@ -24,12 +24,12 @@ static void test_simple_cast()
cplextensor.setRandom();
chartensor = ftensor.cast<char>();
- cplextensor = ftensor.cast<std::complex<float>>();
+ cplextensor = ftensor.cast<std::complex<float> >();
for (int i = 0; i < 20; ++i) {
for (int j = 0; j < 30; ++j) {
VERIFY_IS_EQUAL(chartensor(i,j), static_cast<char>(ftensor(i,j)));
- VERIFY_IS_EQUAL(cplextensor(i,j), static_cast<std::complex<float>>(ftensor(i,j)));
+ VERIFY_IS_EQUAL(cplextensor(i,j), static_cast<std::complex<float> >(ftensor(i,j)));
}
}
}
diff --git a/unsupported/test/cxx11_tensor_contract_cuda.cpp b/unsupported/test/cxx11_tensor_contract_cuda.cu
index 035a093e6..6d1ef07f9 100644
--- a/unsupported/test/cxx11_tensor_contract_cuda.cpp
+++ b/unsupported/test/cxx11_tensor_contract_cuda.cu
@@ -22,16 +22,16 @@ using Eigen::Tensor;
typedef Tensor<float, 1>::DimensionPair DimPair;
template<int DataLayout>
-static void test_cuda_contraction(int m_size, int k_size, int n_size)
+void test_cuda_contraction(int m_size, int k_size, int n_size)
{
- cout<<"Calling with ("<<m_size<<","<<k_size<<","<<n_size<<")"<<std::endl;
+ std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
// with these dimensions, the output has 300 * 140 elements, which is
// more than 30 * 1024, which is the number of threads in blocks on
// a 15 SM GK110 GPU
- Tensor<float, 2, DataLayout> t_left(Eigen::array<int, 2>(m_size, k_size));
- Tensor<float, 2, DataLayout> t_right(Eigen::array<int, 2>(k_size, n_size));
- Tensor<float, 2, DataLayout> t_result(Eigen::array<int, 2>(m_size, n_size));
- Tensor<float, 2, DataLayout> t_result_gpu(Eigen::array<int, 2>(m_size, n_size));
+ Tensor<float, 2, DataLayout> t_left(m_size, k_size);
+ Tensor<float, 2, DataLayout> t_right(k_size, n_size);
+ Tensor<float, 2, DataLayout> t_result(m_size, n_size);
+ Tensor<float, 2, DataLayout> t_result_gpu(m_size, n_size);
Eigen::array<DimPair, 1> dims(DimPair(1, 0));
t_left.setRandom();
@@ -67,12 +67,16 @@ static void test_cuda_contraction(int m_size, int k_size, int n_size)
t_result = t_left.contract(t_right, dims);
cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost);
- for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) {
- if (fabs(t_result.data()[i] - t_result_gpu.data()[i]) >= 1e-4) {
- cout << "mismatch detected at index " << i << ": " << t_result.data()[i]
- << " vs " << t_result_gpu.data()[i] << endl;
- assert(false);
+ for (size_t i = 0; i < t_result.size(); i++) {
+ if (fabs(t_result(i) - t_result_gpu(i)) < 1e-4f) {
+ continue;
}
+ if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), 1e-4f)) {
+ continue;
+ }
+ std::cout << "mismatch detected at index " << i << ": " << t_result(i)
+ << " vs " << t_result_gpu(i) << std::endl;
+ assert(false);
}
cudaFree((void*)d_t_left);
@@ -80,41 +84,69 @@ static void test_cuda_contraction(int m_size, int k_size, int n_size)
cudaFree((void*)d_t_result);
}
-
-void test_cxx11_tensor_cuda()
-{
- cout<<"Calling contraction tests"<<std::endl;
- CALL_SUBTEST(test_cuda_contraction<ColMajor>(128, 128, 128));
- CALL_SUBTEST(test_cuda_contraction<RowMajor>(128, 128, 128));
+template<int DataLayout>
+void test_cuda_contraction_m() {
for (int k = 32; k < 256; k++) {
- CALL_SUBTEST(test_cuda_contraction<ColMajor>(128, k, 128));
- CALL_SUBTEST(test_cuda_contraction<RowMajor>(128, k, 128));
+ test_cuda_contraction<ColMajor>(k, 128, 128);
+ test_cuda_contraction<RowMajor>(k, 128, 128);
}
+}
+
+template<int DataLayout>
+void test_cuda_contraction_k() {
for (int k = 32; k < 256; k++) {
- CALL_SUBTEST(test_cuda_contraction<ColMajor>(128, 128, k));
- CALL_SUBTEST(test_cuda_contraction<RowMajor>(128, 128, k));
+ test_cuda_contraction<ColMajor>(128, k, 128);
+ test_cuda_contraction<RowMajor>(128, k, 128);
}
+}
+
+template<int DataLayout>
+void test_cuda_contraction_n() {
for (int k = 32; k < 256; k++) {
- CALL_SUBTEST(test_cuda_contraction<ColMajor>(k, 128, 128));
- CALL_SUBTEST(test_cuda_contraction<RowMajor>(k, 128, 128));
+ test_cuda_contraction<ColMajor>(128, 128, k);
+ test_cuda_contraction<RowMajor>(128, 128, k);
}
+}
- int m_sizes[] = {31, 39, 63, 64, 65,
- 127, 129, 255, 257, 511,
- 512, 513, 1023, 1024, 1025 };
- int n_sizes[] = {31, 39, 63, 64, 65,
- 127, 129, 255, 257, 511,
- 512, 513, 1023, 1024, 1025 };
-
- int k_sizes[] = { 31, 39, 63, 64, 65,
- 95, 96, 127, 129, 255,
- 257, 511, 512, 513, 1023,
- 1024, 1025};
- for (int i = 0; i <15; i++)
- for (int j = 0; j < 15; j++)
+template<int DataLayout>
+void test_cuda_contraction_sizes() {
+ int m_sizes[] = { 31, 39, 63, 64, 65,
+ 127, 129, 255, 257 , 511,
+ 512, 513, 1023, 1024, 1025};
+
+ int n_sizes[] = { 31, 39, 63, 64, 65,
+ 127, 129, 255, 257, 511,
+ 512, 513, 1023, 1024, 1025};
+
+ int k_sizes[] = { 31, 39, 63, 64, 65,
+ 95, 96, 127, 129, 255,
+ 257, 511, 512, 513, 1023,
+ 1024, 1025};
+
+ for (int i = 0; i < 15; i++) {
+ for (int j = 0; j < 15; j++) {
for (int k = 0; k < 17; k++) {
- CALL_SUBTEST(test_cuda_contraction<ColMajor>(m_sizes[i], n_sizes[j], k_sizes[k]));
- CALL_SUBTEST(test_cuda_contraction<RowMajor>(m_sizes[i], n_sizes[j], k_sizes[k]));
+ test_cuda_contraction<DataLayout>(m_sizes[i], n_sizes[j], k_sizes[k]);
}
+ }
+ }
+}
+
+void test_cxx11_tensor_cuda()
+{
+ CALL_SUBTEST_1(test_cuda_contraction<ColMajor>(128, 128, 128));
+ CALL_SUBTEST_1(test_cuda_contraction<RowMajor>(128, 128, 128));
+
+ CALL_SUBTEST_2(test_cuda_contraction_m<ColMajor>());
+ CALL_SUBTEST_3(test_cuda_contraction_m<RowMajor>());
+
+ CALL_SUBTEST_4(test_cuda_contraction_k<ColMajor>());
+ CALL_SUBTEST_5(test_cuda_contraction_k<RowMajor>());
+
+ CALL_SUBTEST_6(test_cuda_contraction_n<ColMajor>());
+ CALL_SUBTEST_7(test_cuda_contraction_n<RowMajor>());
+
+ CALL_SUBTEST_8(test_cuda_contraction_sizes<ColMajor>());
+ CALL_SUBTEST_9(test_cuda_contraction_sizes<RowMajor>());
}
diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp
index b0d52c6cf..0e16308a2 100644
--- a/unsupported/test/cxx11_tensor_contraction.cpp
+++ b/unsupported/test/cxx11_tensor_contraction.cpp
@@ -29,7 +29,7 @@ static void test_evals()
Tensor<float, 2, DataLayout> mat4(3,3);
mat4.setZero();
- Eigen::array<DimPair, 1> dims3({{DimPair(0, 0)}});
+ Eigen::array<DimPair, 1> dims3 = {{DimPair(0, 0)}};
typedef TensorEvaluator<decltype(mat1.contract(mat2, dims3)), DefaultDevice> Evaluator;
Evaluator eval(mat1.contract(mat2, dims3), DefaultDevice());
eval.evalTo(mat4.data());
@@ -49,7 +49,7 @@ static void test_evals()
Tensor<float, 2, DataLayout> mat5(2,2);
mat5.setZero();
- Eigen::array<DimPair, 1> dims4({{DimPair(1, 1)}});
+ Eigen::array<DimPair, 1> dims4 = {{DimPair(1, 1)}};
typedef TensorEvaluator<decltype(mat1.contract(mat2, dims4)), DefaultDevice> Evaluator2;
Evaluator2 eval2(mat1.contract(mat2, dims4), DefaultDevice());
eval2.evalTo(mat5.data());
@@ -64,7 +64,7 @@ static void test_evals()
Tensor<float, 2, DataLayout> mat6(2,2);
mat6.setZero();
- Eigen::array<DimPair, 1> dims6({{DimPair(1, 0)}});
+ Eigen::array<DimPair, 1> dims6 = {{DimPair(1, 0)}};
typedef TensorEvaluator<decltype(mat1.contract(mat3, dims6)), DefaultDevice> Evaluator3;
Evaluator3 eval3(mat1.contract(mat3, dims6), DefaultDevice());
eval3.evalTo(mat6.data());
@@ -89,7 +89,7 @@ static void test_scalar()
Tensor<float, 1, DataLayout> scalar(1);
scalar.setZero();
- Eigen::array<DimPair, 1> dims({{DimPair(0, 0)}});
+ Eigen::array<DimPair, 1> dims = {{DimPair(0, 0)}};
typedef TensorEvaluator<decltype(vec1.contract(vec2, dims)), DefaultDevice> Evaluator;
Evaluator eval(vec1.contract(vec2, dims), DefaultDevice());
eval.evalTo(scalar.data());
@@ -113,7 +113,7 @@ static void test_multidims()
Tensor<float, 3, DataLayout> mat3(2, 2, 2);
mat3.setZero();
- Eigen::array<DimPair, 2> dims({{DimPair(1, 2), DimPair(2, 3)}});
+ Eigen::array<DimPair, 2> dims = {{DimPair(1, 2), DimPair(2, 3)}};
typedef TensorEvaluator<decltype(mat1.contract(mat2, dims)), DefaultDevice> Evaluator;
Evaluator eval(mat1.contract(mat2, dims), DefaultDevice());
eval.evalTo(mat3.data());
@@ -138,6 +138,26 @@ static void test_multidims()
mat1(1,0,1)*mat2(1,0,0,1) + mat1(1,1,1)*mat2(1,0,1,1));
VERIFY_IS_APPROX(mat3(1,1,1), mat1(1,0,0)*mat2(1,1,0,0) + mat1(1,1,0)*mat2(1,1,1,0) +
mat1(1,0,1)*mat2(1,1,0,1) + mat1(1,1,1)*mat2(1,1,1,1));
+
+ Tensor<float, 2, DataLayout> mat4(2, 2);
+ Tensor<float, 3, DataLayout> mat5(2, 2, 2);
+
+ mat4.setRandom();
+ mat5.setRandom();
+
+ Tensor<float, 1, DataLayout> mat6(2);
+ mat6.setZero();
+ Eigen::array<DimPair, 2> dims2({{DimPair(0, 1), DimPair(1, 0)}});
+ typedef TensorEvaluator<decltype(mat4.contract(mat5, dims2)), DefaultDevice> Evaluator2;
+ Evaluator2 eval2(mat4.contract(mat5, dims2), DefaultDevice());
+ eval2.evalTo(mat6.data());
+ EIGEN_STATIC_ASSERT(Evaluator2::NumDims==1ul, YOU_MADE_A_PROGRAMMING_MISTAKE);
+ VERIFY_IS_EQUAL(eval2.dimensions()[0], 2);
+
+ VERIFY_IS_APPROX(mat6(0), mat4(0,0)*mat5(0,0,0) + mat4(1,0)*mat5(0,1,0) +
+ mat4(0,1)*mat5(1,0,0) + mat4(1,1)*mat5(1,1,0));
+ VERIFY_IS_APPROX(mat6(1), mat4(0,0)*mat5(0,0,1) + mat4(1,0)*mat5(0,1,1) +
+ mat4(0,1)*mat5(1,0,1) + mat4(1,1)*mat5(1,1,1));
}
template<int DataLayout>
@@ -147,7 +167,7 @@ static void test_holes() {
t1.setRandom();
t2.setRandom();
- Eigen::array<DimPair, 2> dims({{DimPair(0, 0), DimPair(3, 4)}});
+ Eigen::array<DimPair, 2> dims = {{DimPair(0, 0), DimPair(3, 4)}};
Tensor<float, 5, DataLayout> result = t1.contract(t2, dims);
VERIFY_IS_EQUAL(result.dimension(0), 5);
VERIFY_IS_EQUAL(result.dimension(1), 7);
@@ -182,7 +202,7 @@ static void test_full_redux()
t1.setRandom();
t2.setRandom();
- Eigen::array<DimPair, 2> dims({{DimPair(0, 0), DimPair(1, 1)}});
+ Eigen::array<DimPair, 2> dims = {{DimPair(0, 0), DimPair(1, 1)}};
Tensor<float, 1, DataLayout> result = t1.contract(t2, dims);
VERIFY_IS_EQUAL(result.dimension(0), 2);
VERIFY_IS_APPROX(result(0), t1(0, 0) * t2(0, 0, 0) + t1(1, 0) * t2(1, 0, 0)
@@ -212,7 +232,7 @@ static void test_contraction_of_contraction()
t3.setRandom();
t4.setRandom();
- Eigen::array<DimPair, 1> dims({{DimPair(1, 0)}});
+ Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
auto contract1 = t1.contract(t2, dims);
auto diff = t3 - contract1;
auto contract2 = t1.contract(t4, dims);
@@ -243,7 +263,7 @@ static void test_expr()
Tensor<float, 2, DataLayout> mat3(2,2);
- Eigen::array<DimPair, 1> dims({{DimPair(1, 0)}});
+ Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
mat3 = mat1.contract(mat2, dims);
VERIFY_IS_APPROX(mat3(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(1,0) + mat1(0,2)*mat2(2,0));
@@ -263,7 +283,7 @@ static void test_out_of_order_contraction()
Tensor<float, 2, DataLayout> mat3(2, 2);
- Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(0, 2)}});
+ Eigen::array<DimPair, 2> dims = {{DimPair(2, 0), DimPair(0, 2)}};
mat3 = mat1.contract(mat2, dims);
VERIFY_IS_APPROX(mat3(0, 0),
@@ -279,7 +299,7 @@ static void test_out_of_order_contraction()
mat1(0,1,0)*mat2(0,1,0) + mat1(1,1,0)*mat2(0,1,1) +
mat1(0,1,1)*mat2(1,1,0) + mat1(1,1,1)*mat2(1,1,1));
- Eigen::array<DimPair, 2> dims2({{DimPair(0, 2), DimPair(2, 0)}});
+ Eigen::array<DimPair, 2> dims2 = {{DimPair(0, 2), DimPair(2, 0)}};
mat3 = mat1.contract(mat2, dims2);
VERIFY_IS_APPROX(mat3(0, 0),
@@ -311,8 +331,8 @@ static void test_consistency()
Tensor<float, 4, DataLayout> mat4(2, 1, 5, 5);
// contract on dimensions of size 4 and 3
- Eigen::array<DimPair, 2> dims1({{DimPair(0, 4), DimPair(1, 0)}});
- Eigen::array<DimPair, 2> dims2({{DimPair(4, 0), DimPair(0, 1)}});
+ Eigen::array<DimPair, 2> dims1 = {{DimPair(0, 4), DimPair(1, 0)}};
+ Eigen::array<DimPair, 2> dims2 = {{DimPair(4, 0), DimPair(0, 1)}};
mat3 = mat1.contract(mat2, dims1);
mat4 = mat2.contract(mat1, dims2);
@@ -354,7 +374,7 @@ static void test_large_contraction()
Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400);
// this contraction should be equivalent to a single matrix multiplication
- Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}});
+ Eigen::array<DimPair, 2> dims = {{DimPair(2, 0), DimPair(3, 1)}};
// compute results by separate methods
t_result = t_left.contract(t_right, dims);
@@ -399,10 +419,10 @@ static void test_tensor_vector()
{
Tensor<float, 3, DataLayout> t_left(7, 13, 17);
Tensor<float, 2, DataLayout> t_right(1, 7);
-
+
t_left.setRandom();
t_right.setRandom();
-
+
typedef typename Tensor<float, 1, DataLayout>::DimensionPair DimensionPair;
Eigen::array<DimensionPair, 1> dim_pair01{{{0, 1}}};
Tensor<float, 3, DataLayout> t_result = t_left.contract(t_right, dim_pair01);
@@ -434,7 +454,7 @@ static void test_small_blocking_factors()
Eigen::setCpuCacheSizes(896, 1920, 2944);
// this contraction should be equivalent to a single matrix multiplication
- Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}});
+ Eigen::array<DimPair, 2> dims = {{DimPair(2, 0), DimPair(3, 1)}};
Tensor<float, 5, DataLayout> t_result;
t_result = t_left.contract(t_right, dims);
diff --git a/unsupported/test/cxx11_tensor_cuda.cpp b/unsupported/test/cxx11_tensor_cuda.cu
index 5ff082a3a..134359611 100644
--- a/unsupported/test/cxx11_tensor_cuda.cpp
+++ b/unsupported/test/cxx11_tensor_cuda.cu
@@ -7,8 +7,6 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-// TODO(mdevin): Free the cuda memory.
-
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_TEST_FUNC cxx11_tensor_cuda
@@ -63,6 +61,10 @@ void test_cuda_elementwise_small() {
out(Eigen::array<int, 1>(i)),
in1(Eigen::array<int, 1>(i)) + in2(Eigen::array<int, 1>(i)));
}
+
+ cudaFree(d_in1);
+ cudaFree(d_in2);
+ cudaFree(d_out);
}
void test_cuda_elementwise()
@@ -113,6 +115,48 @@ void test_cuda_elementwise()
}
}
}
+
+ cudaFree(d_in1);
+ cudaFree(d_in2);
+ cudaFree(d_in3);
+ cudaFree(d_out);
+}
+
+void test_cuda_props() {
+ Tensor<float, 1> in1(200);
+ Tensor<bool, 1> out(200);
+ in1.setRandom();
+
+ std::size_t in1_bytes = in1.size() * sizeof(float);
+ std::size_t out_bytes = out.size() * sizeof(bool);
+
+ float* d_in1;
+ bool* d_out;
+ cudaMalloc((void**)(&d_in1), in1_bytes);
+ cudaMalloc((void**)(&d_out), out_bytes);
+
+ cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in1(
+ d_in1, 200);
+ Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_out(
+ d_out, 200);
+
+ gpu_out.device(gpu_device) = (gpu_in1.isnan)();
+
+ assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 200; ++i) {
+ VERIFY_IS_EQUAL(out(i), (std::isnan)(in1(i)));
+ }
+
+ cudaFree(d_in1);
+ cudaFree(d_out);
}
void test_cuda_reduction()
@@ -131,8 +175,7 @@ void test_cuda_reduction()
cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice);
- cudaStream_t stream;
- assert(cudaStreamCreate(&stream) == cudaSuccess);
+ Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_in1(d_in1, 72,53,97,113);
@@ -159,10 +202,13 @@ void test_cuda_reduction()
VERIFY_IS_APPROX(out(i,j), expected);
}
}
+
+ cudaFree(d_in1);
+ cudaFree(d_out);
}
template<int DataLayout>
-static void test_cuda_contraction()
+void test_cuda_contraction()
{
// with these dimensions, the output has 300 * 140 elements, which is
// more than 30 * 1024, which is the number of threads in blocks on
@@ -189,8 +235,7 @@ static void test_cuda_contraction()
cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice);
- cudaStream_t stream;
- assert(cudaStreamCreate(&stream) == cudaSuccess);
+ Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_t_left(d_t_left, 6, 50, 3, 31);
@@ -214,14 +259,18 @@ static void test_cuda_contraction()
for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) {
if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) {
- cout << "mismatch detected at index " << i << ": " << t_result.data()[i] << " vs " << m_result.data()[i] << endl;
+ std::cout << "mismatch detected at index " << i << ": " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
assert(false);
}
}
+
+ cudaFree(d_t_left);
+ cudaFree(d_t_right);
+ cudaFree(d_t_result);
}
template<int DataLayout>
-static void test_cuda_convolution_1d()
+void test_cuda_convolution_1d()
{
Tensor<float, 4, DataLayout> input(74,37,11,137);
Tensor<float, 1, DataLayout> kernel(4);
@@ -243,8 +292,7 @@ static void test_cuda_convolution_1d()
cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
- cudaStream_t stream;
- assert(cudaStreamCreate(&stream) == cudaSuccess);
+ Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_input(d_input, 74,37,11,137);
@@ -269,9 +317,13 @@ static void test_cuda_convolution_1d()
}
}
}
+
+ cudaFree(d_input);
+ cudaFree(d_kernel);
+ cudaFree(d_out);
}
-static void test_cuda_convolution_inner_dim_col_major_1d()
+void test_cuda_convolution_inner_dim_col_major_1d()
{
Tensor<float, 4, ColMajor> input(74,9,11,7);
Tensor<float, 1, ColMajor> kernel(4);
@@ -293,8 +345,7 @@ static void test_cuda_convolution_inner_dim_col_major_1d()
cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
- cudaStream_t stream;
- assert(cudaStreamCreate(&stream) == cudaSuccess);
+ Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_input(d_input,74,9,11,7);
@@ -319,9 +370,13 @@ static void test_cuda_convolution_inner_dim_col_major_1d()
}
}
}
+
+ cudaFree(d_input);
+ cudaFree(d_kernel);
+ cudaFree(d_out);
}
-static void test_cuda_convolution_inner_dim_row_major_1d()
+void test_cuda_convolution_inner_dim_row_major_1d()
{
Tensor<float, 4, RowMajor> input(7,9,11,74);
Tensor<float, 1, RowMajor> kernel(4);
@@ -343,8 +398,7 @@ static void test_cuda_convolution_inner_dim_row_major_1d()
cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
- cudaStream_t stream;
- assert(cudaStreamCreate(&stream) == cudaSuccess);
+ Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_input(d_input, 7,9,11,74);
@@ -369,10 +423,14 @@ static void test_cuda_convolution_inner_dim_row_major_1d()
}
}
}
+
+ cudaFree(d_input);
+ cudaFree(d_kernel);
+ cudaFree(d_out);
}
template<int DataLayout>
-static void test_cuda_convolution_2d()
+void test_cuda_convolution_2d()
{
Tensor<float, 4, DataLayout> input(74,37,11,137);
Tensor<float, 2, DataLayout> kernel(3,4);
@@ -394,8 +452,7 @@ static void test_cuda_convolution_2d()
cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
- cudaStream_t stream;
- assert(cudaStreamCreate(&stream) == cudaSuccess);
+ Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_input(d_input,74,37,11,137);
@@ -430,10 +487,14 @@ static void test_cuda_convolution_2d()
}
}
}
+
+ cudaFree(d_input);
+ cudaFree(d_kernel);
+ cudaFree(d_out);
}
template<int DataLayout>
-static void test_cuda_convolution_3d()
+void test_cuda_convolution_3d()
{
Tensor<float, 5, DataLayout> input(Eigen::array<int, 5>(74,37,11,137,17));
Tensor<float, 3, DataLayout> kernel(3,4,2);
@@ -455,8 +516,7 @@ static void test_cuda_convolution_3d()
cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
- cudaStream_t stream;
- assert(cudaStreamCreate(&stream) == cudaSuccess);
+ Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_input(d_input,74,37,11,137,17);
@@ -505,21 +565,507 @@ static void test_cuda_convolution_3d()
}
}
}
+
+ cudaFree(d_input);
+ cudaFree(d_kernel);
+ cudaFree(d_out);
+}
+
+
+template <typename Scalar>
+void test_cuda_lgamma(const Scalar stddev)
+{
+ Tensor<Scalar, 2> in(72,97);
+ in.setRandom();
+ in *= in.constant(stddev);
+ Tensor<Scalar, 2> out(72,97);
+ out.setZero();
+
+ std::size_t bytes = in.size() * sizeof(Scalar);
+
+ Scalar* d_in;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_in(d_in, 72, 97);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 72, 97);
+
+ gpu_out.device(gpu_device) = gpu_in.lgamma();
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 72; ++i) {
+ for (int j = 0; j < 97; ++j) {
+ VERIFY_IS_APPROX(out(i,j), (std::lgamma)(in(i,j)));
+ }
+ }
+
+ cudaFree(d_in);
+ cudaFree(d_out);
+}
+
+template <typename Scalar>
+void test_cuda_digamma()
+{
+ Tensor<Scalar, 1> in(7);
+ Tensor<Scalar, 1> out(7);
+ Tensor<Scalar, 1> expected_out(7);
+ out.setZero();
+
+ in(0) = Scalar(1);
+ in(1) = Scalar(1.5);
+ in(2) = Scalar(4);
+ in(3) = Scalar(-10.5);
+ in(4) = Scalar(10000.5);
+ in(5) = Scalar(0);
+ in(6) = Scalar(-1);
+
+ expected_out(0) = Scalar(-0.5772156649015329);
+ expected_out(1) = Scalar(0.03648997397857645);
+ expected_out(2) = Scalar(1.2561176684318);
+ expected_out(3) = Scalar(2.398239129535781);
+ expected_out(4) = Scalar(9.210340372392849);
+ expected_out(5) = std::numeric_limits<Scalar>::infinity();
+ expected_out(6) = std::numeric_limits<Scalar>::infinity();
+
+ std::size_t bytes = in.size() * sizeof(Scalar);
+
+ Scalar* d_in;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 7);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 7);
+
+ gpu_out.device(gpu_device) = gpu_in.digamma();
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 5; ++i) {
+ VERIFY_IS_APPROX(out(i), expected_out(i));
+ }
+ for (int i = 5; i < 7; ++i) {
+ VERIFY_IS_EQUAL(out(i), expected_out(i));
+ }
+}
+
+template <typename Scalar>
+void test_cuda_zeta()
+{
+ Tensor<Scalar, 1> in_x(6);
+ Tensor<Scalar, 1> in_q(6);
+ Tensor<Scalar, 1> out(6);
+ Tensor<Scalar, 1> expected_out(6);
+ out.setZero();
+
+ in_x(0) = Scalar(1);
+ in_x(1) = Scalar(1.5);
+ in_x(2) = Scalar(4);
+ in_x(3) = Scalar(-10.5);
+ in_x(4) = Scalar(10000.5);
+ in_x(5) = Scalar(3);
+
+ in_q(0) = Scalar(1.2345);
+ in_q(1) = Scalar(2);
+ in_q(2) = Scalar(1.5);
+ in_q(3) = Scalar(3);
+ in_q(4) = Scalar(1.0001);
+ in_q(5) = Scalar(-2.5);
+
+ expected_out(0) = std::numeric_limits<Scalar>::infinity();
+ expected_out(1) = Scalar(1.61237534869);
+ expected_out(2) = Scalar(0.234848505667);
+ expected_out(3) = Scalar(1.03086757337e-5);
+ expected_out(4) = Scalar(0.367879440865);
+ expected_out(5) = Scalar(0.054102025820864097);
+
+ std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+ Scalar* d_in_x;
+ Scalar* d_in_q;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in_x), bytes);
+ cudaMalloc((void**)(&d_in_q), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_in_q, in_q.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 6);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_q(d_in_q, 6);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 6);
+
+ gpu_out.device(gpu_device) = gpu_in_x.zeta(gpu_in_q);
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ VERIFY_IS_EQUAL(out(0), expected_out(0));
+ VERIFY_IS_APPROX_OR_LESS_THAN(out(3), expected_out(3));
+
+ for (int i = 1; i < 6; ++i) {
+ if (i != 3) {
+ VERIFY_IS_APPROX(out(i), expected_out(i));
+ }
+ }
+}
+
+template <typename Scalar>
+void test_cuda_polygamma()
+{
+ Tensor<Scalar, 1> in_x(7);
+ Tensor<Scalar, 1> in_n(7);
+ Tensor<Scalar, 1> out(7);
+ Tensor<Scalar, 1> expected_out(7);
+ out.setZero();
+
+ in_n(0) = Scalar(1);
+ in_n(1) = Scalar(1);
+ in_n(2) = Scalar(1);
+ in_n(3) = Scalar(17);
+ in_n(4) = Scalar(31);
+ in_n(5) = Scalar(28);
+ in_n(6) = Scalar(8);
+
+ in_x(0) = Scalar(2);
+ in_x(1) = Scalar(3);
+ in_x(2) = Scalar(25.5);
+ in_x(3) = Scalar(4.7);
+ in_x(4) = Scalar(11.8);
+ in_x(5) = Scalar(17.7);
+ in_x(6) = Scalar(30.2);
+
+ expected_out(0) = Scalar(0.644934066848);
+ expected_out(1) = Scalar(0.394934066848);
+ expected_out(2) = Scalar(0.0399946696496);
+ expected_out(3) = Scalar(293.334565435);
+ expected_out(4) = Scalar(0.445487887616);
+ expected_out(5) = Scalar(-2.47810300902e-07);
+ expected_out(6) = Scalar(-8.29668781082e-09);
+
+ std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+ Scalar* d_in_x;
+ Scalar* d_in_n;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in_x), bytes);
+ cudaMalloc((void**)(&d_in_n), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_in_n, in_n.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 7);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_n(d_in_n, 7);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 7);
+
+ gpu_out.device(gpu_device) = gpu_in_n.polygamma(gpu_in_x);
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 7; ++i) {
+ VERIFY_IS_APPROX(out(i), expected_out(i));
+ }
+}
+
+template <typename Scalar>
+void test_cuda_igamma()
+{
+ Tensor<Scalar, 2> a(6, 6);
+ Tensor<Scalar, 2> x(6, 6);
+ Tensor<Scalar, 2> out(6, 6);
+ out.setZero();
+
+ Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+ Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+
+ for (int i = 0; i < 6; ++i) {
+ for (int j = 0; j < 6; ++j) {
+ a(i, j) = a_s[i];
+ x(i, j) = x_s[j];
+ }
+ }
+
+ Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
+ Scalar igamma_s[][6] = {{0.0, nan, nan, nan, nan, nan},
+ {0.0, 0.6321205588285578, 0.7768698398515702,
+ 0.9816843611112658, 9.999500016666262e-05, 1.0},
+ {0.0, 0.4275932955291202, 0.608374823728911,
+ 0.9539882943107686, 7.522076445089201e-07, 1.0},
+ {0.0, 0.01898815687615381, 0.06564245437845008,
+ 0.5665298796332909, 4.166333347221828e-18, 1.0},
+ {0.0, 0.9999780593618628, 0.9999899967080838,
+ 0.9999996219837988, 0.9991370418689945, 1.0},
+ {0.0, 0.0, 0.0, 0.0, 0.0, 0.5042041932513908}};
+
+
+
+ std::size_t bytes = a.size() * sizeof(Scalar);
+
+ Scalar* d_a;
+ Scalar* d_x;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_a), bytes);
+ cudaMalloc((void**)(&d_x), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_a(d_a, 6, 6);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_x(d_x, 6, 6);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 6, 6);
+
+ gpu_out.device(gpu_device) = gpu_a.igamma(gpu_x);
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 6; ++i) {
+ for (int j = 0; j < 6; ++j) {
+ if ((std::isnan)(igamma_s[i][j])) {
+ VERIFY((std::isnan)(out(i, j)));
+ } else {
+ VERIFY_IS_APPROX(out(i, j), igamma_s[i][j]);
+ }
+ }
+ }
+}
+
+template <typename Scalar>
+void test_cuda_igammac()
+{
+ Tensor<Scalar, 2> a(6, 6);
+ Tensor<Scalar, 2> x(6, 6);
+ Tensor<Scalar, 2> out(6, 6);
+ out.setZero();
+
+ Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+ Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+
+ for (int i = 0; i < 6; ++i) {
+ for (int j = 0; j < 6; ++j) {
+ a(i, j) = a_s[i];
+ x(i, j) = x_s[j];
+ }
+ }
+
+ Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
+ Scalar igammac_s[][6] = {{nan, nan, nan, nan, nan, nan},
+ {1.0, 0.36787944117144233, 0.22313016014842982,
+ 0.018315638888734182, 0.9999000049998333, 0.0},
+ {1.0, 0.5724067044708798, 0.3916251762710878,
+ 0.04601170568923136, 0.9999992477923555, 0.0},
+ {1.0, 0.9810118431238462, 0.9343575456215499,
+ 0.4334701203667089, 1.0, 0.0},
+ {1.0, 2.1940638138146658e-05, 1.0003291916285e-05,
+ 3.7801620118431334e-07, 0.0008629581310054535,
+ 0.0},
+ {1.0, 1.0, 1.0, 1.0, 1.0, 0.49579580674813944}};
+
+ std::size_t bytes = a.size() * sizeof(Scalar);
+
+ Scalar* d_a;
+ Scalar* d_x;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_a), bytes);
+ cudaMalloc((void**)(&d_x), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_a(d_a, 6, 6);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_x(d_x, 6, 6);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 6, 6);
+
+ gpu_out.device(gpu_device) = gpu_a.igammac(gpu_x);
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 6; ++i) {
+ for (int j = 0; j < 6; ++j) {
+ if ((std::isnan)(igammac_s[i][j])) {
+ VERIFY((std::isnan)(out(i, j)));
+ } else {
+ VERIFY_IS_APPROX(out(i, j), igammac_s[i][j]);
+ }
+ }
+ }
+}
+
+template <typename Scalar>
+void test_cuda_erf(const Scalar stddev)
+{
+ Tensor<Scalar, 2> in(72,97);
+ in.setRandom();
+ in *= in.constant(stddev);
+ Tensor<Scalar, 2> out(72,97);
+ out.setZero();
+
+ std::size_t bytes = in.size() * sizeof(Scalar);
+
+ Scalar* d_in;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_in(d_in, 72, 97);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 72, 97);
+
+ gpu_out.device(gpu_device) = gpu_in.erf();
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 72; ++i) {
+ for (int j = 0; j < 97; ++j) {
+ VERIFY_IS_APPROX(out(i,j), (std::erf)(in(i,j)));
+ }
+ }
+
+ cudaFree(d_in);
+ cudaFree(d_out);
+}
+
+template <typename Scalar>
+void test_cuda_erfc(const Scalar stddev)
+{
+ Tensor<Scalar, 2> in(72,97);
+ in.setRandom();
+ in *= in.constant(stddev);
+ Tensor<Scalar, 2> out(72,97);
+ out.setZero();
+
+ std::size_t bytes = in.size() * sizeof(Scalar);
+
+ Scalar* d_in;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_in(d_in, 72, 97);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 72, 97);
+
+ gpu_out.device(gpu_device) = gpu_in.erfc();
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 72; ++i) {
+ for (int j = 0; j < 97; ++j) {
+ VERIFY_IS_APPROX(out(i,j), (std::erfc)(in(i,j)));
+ }
+ }
+
+ cudaFree(d_in);
+ cudaFree(d_out);
}
void test_cxx11_tensor_cuda()
{
- CALL_SUBTEST(test_cuda_elementwise_small());
- CALL_SUBTEST(test_cuda_elementwise());
- CALL_SUBTEST(test_cuda_reduction());
- CALL_SUBTEST(test_cuda_contraction<ColMajor>());
- CALL_SUBTEST(test_cuda_contraction<RowMajor>());
- CALL_SUBTEST(test_cuda_convolution_1d<ColMajor>());
- CALL_SUBTEST(test_cuda_convolution_1d<RowMajor>());
- CALL_SUBTEST(test_cuda_convolution_inner_dim_col_major_1d());
- CALL_SUBTEST(test_cuda_convolution_inner_dim_row_major_1d());
- CALL_SUBTEST(test_cuda_convolution_2d<ColMajor>());
- CALL_SUBTEST(test_cuda_convolution_2d<RowMajor>());
- CALL_SUBTEST(test_cuda_convolution_3d<ColMajor>());
- CALL_SUBTEST(test_cuda_convolution_3d<RowMajor>());
+ CALL_SUBTEST_1(test_cuda_elementwise_small());
+ CALL_SUBTEST_1(test_cuda_elementwise());
+ CALL_SUBTEST_1(test_cuda_props());
+ CALL_SUBTEST_1(test_cuda_reduction());
+ CALL_SUBTEST_2(test_cuda_contraction<ColMajor>());
+ CALL_SUBTEST_2(test_cuda_contraction<RowMajor>());
+ CALL_SUBTEST_3(test_cuda_convolution_1d<ColMajor>());
+ CALL_SUBTEST_3(test_cuda_convolution_1d<RowMajor>());
+ CALL_SUBTEST_3(test_cuda_convolution_inner_dim_col_major_1d());
+ CALL_SUBTEST_3(test_cuda_convolution_inner_dim_row_major_1d());
+ CALL_SUBTEST_3(test_cuda_convolution_2d<ColMajor>());
+ CALL_SUBTEST_3(test_cuda_convolution_2d<RowMajor>());
+ CALL_SUBTEST_3(test_cuda_convolution_3d<ColMajor>());
+ CALL_SUBTEST_3(test_cuda_convolution_3d<RowMajor>());
+
+#if __cplusplus > 199711L
+ // std::erf, std::erfc, and so on where only added in c++11. We use them
+ // as a golden reference to validate the results produced by Eigen. Therefore
+ // we can only run these tests if we use a c++11 compiler.
+ CALL_SUBTEST_4(test_cuda_lgamma<float>(1.0f));
+ CALL_SUBTEST_4(test_cuda_lgamma<float>(100.0f));
+ CALL_SUBTEST_4(test_cuda_lgamma<float>(0.01f));
+ CALL_SUBTEST_4(test_cuda_lgamma<float>(0.001f));
+
+ CALL_SUBTEST_4(test_cuda_lgamma<double>(1.0));
+ CALL_SUBTEST_4(test_cuda_lgamma<double>(100.0));
+ CALL_SUBTEST_4(test_cuda_lgamma<double>(0.01));
+ CALL_SUBTEST_4(test_cuda_lgamma<double>(0.001));
+
+ CALL_SUBTEST_4(test_cuda_erf<float>(1.0f));
+ CALL_SUBTEST_4(test_cuda_erf<float>(100.0f));
+ CALL_SUBTEST_4(test_cuda_erf<float>(0.01f));
+ CALL_SUBTEST_4(test_cuda_erf<float>(0.001f));
+
+ CALL_SUBTEST_4(test_cuda_erfc<float>(1.0f));
+ // CALL_SUBTEST(test_cuda_erfc<float>(100.0f));
+ CALL_SUBTEST_4(test_cuda_erfc<float>(5.0f)); // CUDA erfc lacks precision for large inputs
+ CALL_SUBTEST_4(test_cuda_erfc<float>(0.01f));
+ CALL_SUBTEST_4(test_cuda_erfc<float>(0.001f));
+
+ CALL_SUBTEST_4(test_cuda_erf<double>(1.0));
+ CALL_SUBTEST_4(test_cuda_erf<double>(100.0));
+ CALL_SUBTEST_4(test_cuda_erf<double>(0.01));
+ CALL_SUBTEST_4(test_cuda_erf<double>(0.001));
+
+ CALL_SUBTEST_4(test_cuda_erfc<double>(1.0));
+ // CALL_SUBTEST(test_cuda_erfc<double>(100.0));
+ CALL_SUBTEST_4(test_cuda_erfc<double>(5.0)); // CUDA erfc lacks precision for large inputs
+ CALL_SUBTEST_4(test_cuda_erfc<double>(0.01));
+ CALL_SUBTEST_4(test_cuda_erfc<double>(0.001));
+
+ CALL_SUBTEST_5(test_cuda_digamma<float>());
+ CALL_SUBTEST_5(test_cuda_digamma<double>());
+
+ CALL_SUBTEST_5(test_cuda_polygamma<float>());
+ CALL_SUBTEST_5(test_cuda_polygamma<double>());
+
+ CALL_SUBTEST_5(test_cuda_zeta<float>());
+ CALL_SUBTEST_5(test_cuda_zeta<double>());
+
+ CALL_SUBTEST_5(test_cuda_igamma<float>());
+ CALL_SUBTEST_5(test_cuda_igammac<float>());
+
+ CALL_SUBTEST_5(test_cuda_igamma<double>());
+ CALL_SUBTEST_5(test_cuda_igammac<double>());
+#endif
}
diff --git a/unsupported/test/cxx11_tensor_custom_op.cpp b/unsupported/test/cxx11_tensor_custom_op.cpp
index 7e33c9580..8baa477cc 100644
--- a/unsupported/test/cxx11_tensor_custom_op.cpp
+++ b/unsupported/test/cxx11_tensor_custom_op.cpp
@@ -25,7 +25,9 @@ struct InsertZeros {
template <typename Output, typename Device>
void eval(const Tensor<float, 2>& input, Output& output, const Device& device) const
{
- array<DenseIndex, 2> strides{{2, 2}};
+ array<DenseIndex, 2> strides;
+ strides[0] = 2;
+ strides[1] = 2;
output.stride(strides).device(device) = input;
Eigen::DSizes<DenseIndex, 2> offsets(1,1);
@@ -70,7 +72,8 @@ struct BatchMatMul {
Output& output, const Device& device) const
{
typedef Tensor<float, 3>::DimensionPair DimPair;
- array<DimPair, 1> dims({{DimPair(1, 0)}});
+ array<DimPair, 1> dims;
+ dims[0] = DimPair(1, 0);
for (int i = 0; i < output.dimension(2); ++i) {
output.template chip<2>(i).device(device) = input1.chip<2>(i).contract(input2.chip<2>(i), dims);
}
@@ -88,9 +91,10 @@ static void test_custom_binary_op()
Tensor<float, 3> result = tensor1.customOp(tensor2, BatchMatMul());
for (int i = 0; i < 5; ++i) {
typedef Tensor<float, 3>::DimensionPair DimPair;
- array<DimPair, 1> dims({{DimPair(1, 0)}});
+ array<DimPair, 1> dims;
+ dims[0] = DimPair(1, 0);
Tensor<float, 2> reference = tensor1.chip<2>(i).contract(tensor2.chip<2>(i), dims);
- TensorRef<Tensor<float, 2>> val = result.chip<2>(i);
+ TensorRef<Tensor<float, 2> > val = result.chip<2>(i);
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(val(j, k), reference(j, k));
diff --git a/unsupported/test/cxx11_tensor_device.cpp b/unsupported/test/cxx11_tensor_device.cu
index ed5dd7505..cbe9e6449 100644
--- a/unsupported/test/cxx11_tensor_device.cpp
+++ b/unsupported/test/cxx11_tensor_device.cu
@@ -109,19 +109,19 @@ struct GPUContext {
// The actual expression to evaluate
template <typename Context>
-static void test_contextual_eval(Context* context)
+void test_contextual_eval(Context* context)
{
context->out().device(context->device()) = context->in1() + context->in2() * 3.14f + context->in1().constant(2.718f);
}
template <typename Context>
-static void test_forced_contextual_eval(Context* context)
+void test_forced_contextual_eval(Context* context)
{
context->out().device(context->device()) = (context->in1() + context->in2()).eval() * 3.14f + context->in1().constant(2.718f);
}
template <typename Context>
-static void test_compound_assignment(Context* context)
+void test_compound_assignment(Context* context)
{
context->out().device(context->device()) = context->in1().constant(2.718f);
context->out().device(context->device()) += context->in1() + context->in2() * 3.14f;
@@ -129,7 +129,7 @@ static void test_compound_assignment(Context* context)
template <typename Context>
-static void test_contraction(Context* context)
+void test_contraction(Context* context)
{
Eigen::array<std::pair<int, int>, 2> dims;
dims[0] = std::make_pair(1, 1);
@@ -145,7 +145,7 @@ static void test_contraction(Context* context)
template <typename Context>
-static void test_1d_convolution(Context* context)
+void test_1d_convolution(Context* context)
{
Eigen::DSizes<int, 3> indices(0,0,0);
Eigen::DSizes<int, 3> sizes(40,49,70);
@@ -155,7 +155,7 @@ static void test_1d_convolution(Context* context)
}
template <typename Context>
-static void test_2d_convolution(Context* context)
+void test_2d_convolution(Context* context)
{
Eigen::DSizes<int, 3> indices(0,0,0);
Eigen::DSizes<int, 3> sizes(40,49,69);
@@ -165,7 +165,7 @@ static void test_2d_convolution(Context* context)
}
template <typename Context>
-static void test_3d_convolution(Context* context)
+void test_3d_convolution(Context* context)
{
Eigen::DSizes<int, 3> indices(0,0,0);
Eigen::DSizes<int, 3> sizes(39,49,69);
@@ -175,7 +175,7 @@ static void test_3d_convolution(Context* context)
}
-static void test_cpu() {
+void test_cpu() {
Eigen::Tensor<float, 3> in1(40,50,70);
Eigen::Tensor<float, 3> in2(40,50,70);
Eigen::Tensor<float, 3> out(40,50,70);
@@ -267,7 +267,7 @@ static void test_cpu() {
}
}
-static void test_gpu() {
+void test_gpu() {
Eigen::Tensor<float, 3> in1(40,50,70);
Eigen::Tensor<float, 3> in2(40,50,70);
Eigen::Tensor<float, 3> out(40,50,70);
@@ -383,6 +383,6 @@ static void test_gpu() {
void test_cxx11_tensor_device()
{
- CALL_SUBTEST(test_cpu());
- CALL_SUBTEST(test_gpu());
+ CALL_SUBTEST_1(test_cpu());
+ CALL_SUBTEST_2(test_gpu());
}
diff --git a/unsupported/test/cxx11_tensor_empty.cpp b/unsupported/test/cxx11_tensor_empty.cpp
new file mode 100644
index 000000000..9130fff35
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_empty.cpp
@@ -0,0 +1,40 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+
+static void test_empty_tensor()
+{
+ Tensor<float, 2> source;
+ Tensor<float, 2> tgt1 = source;
+ Tensor<float, 2> tgt2(source);
+ Tensor<float, 2> tgt3;
+ tgt3 = tgt1;
+ tgt3 = tgt2;
+}
+
+static void test_empty_fixed_size_tensor()
+{
+ TensorFixedSize<float, Sizes<0>> source;
+ TensorFixedSize<float, Sizes<0>> tgt1 = source;
+ TensorFixedSize<float, Sizes<0>> tgt2(source);
+ TensorFixedSize<float, Sizes<0>> tgt3;
+ tgt3 = tgt1;
+ tgt3 = tgt2;
+}
+
+
+void test_cxx11_tensor_empty()
+{
+ CALL_SUBTEST(test_empty_tensor());
+ CALL_SUBTEST(test_empty_fixed_size_tensor());
+}
diff --git a/unsupported/test/cxx11_tensor_fft.cpp b/unsupported/test/cxx11_tensor_fft.cpp
index 0f6e09106..89874349f 100644
--- a/unsupported/test/cxx11_tensor_fft.cpp
+++ b/unsupported/test/cxx11_tensor_fft.cpp
@@ -14,7 +14,7 @@ using Eigen::Tensor;
template <int DataLayout>
static void test_fft_2D_golden() {
- Tensor<float, 2, DataLayout, long> input(2, 3);
+ Tensor<float, 2, DataLayout> input(2, 3);
input(0, 0) = 1;
input(0, 1) = 2;
input(0, 2) = 3;
@@ -22,11 +22,11 @@ static void test_fft_2D_golden() {
input(1, 1) = 5;
input(1, 2) = 6;
- array<int, 2> fft;
+ array<ptrdiff_t, 2> fft;
fft[0] = 0;
fft[1] = 1;
- Tensor<std::complex<float>, 2, DataLayout, long> output = input.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft);
+ Tensor<std::complex<float>, 2, DataLayout> output = input.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft);
std::complex<float> output_golden[6]; // in ColMajor order
output_golden[0] = std::complex<float>(21, 0);
@@ -57,24 +57,24 @@ static void test_fft_2D_golden() {
}
static void test_fft_complex_input_golden() {
- Tensor<std::complex<float>, 1, ColMajor, long> input(5);
+ Tensor<std::complex<float>, 1, ColMajor> input(5);
input(0) = std::complex<float>(1, 1);
input(1) = std::complex<float>(2, 2);
input(2) = std::complex<float>(3, 3);
input(3) = std::complex<float>(4, 4);
input(4) = std::complex<float>(5, 5);
- array<int, 1> fft;
+ array<ptrdiff_t, 1> fft;
fft[0] = 0;
- Tensor<std::complex<float>, 1, ColMajor, long> forward_output_both_parts = input.fft<BothParts, FFT_FORWARD>(fft);
- Tensor<std::complex<float>, 1, ColMajor, long> reverse_output_both_parts = input.fft<BothParts, FFT_REVERSE>(fft);
+ Tensor<std::complex<float>, 1, ColMajor> forward_output_both_parts = input.fft<BothParts, FFT_FORWARD>(fft);
+ Tensor<std::complex<float>, 1, ColMajor> reverse_output_both_parts = input.fft<BothParts, FFT_REVERSE>(fft);
- Tensor<float, 1, ColMajor, long> forward_output_real_part = input.fft<RealPart, FFT_FORWARD>(fft);
- Tensor<float, 1, ColMajor, long> reverse_output_real_part = input.fft<RealPart, FFT_REVERSE>(fft);
+ Tensor<float, 1, ColMajor> forward_output_real_part = input.fft<RealPart, FFT_FORWARD>(fft);
+ Tensor<float, 1, ColMajor> reverse_output_real_part = input.fft<RealPart, FFT_REVERSE>(fft);
- Tensor<float, 1, ColMajor, long> forward_output_imag_part = input.fft<ImagPart, FFT_FORWARD>(fft);
- Tensor<float, 1, ColMajor, long> reverse_output_imag_part = input.fft<ImagPart, FFT_REVERSE>(fft);
+ Tensor<float, 1, ColMajor> forward_output_imag_part = input.fft<ImagPart, FFT_FORWARD>(fft);
+ Tensor<float, 1, ColMajor> reverse_output_imag_part = input.fft<ImagPart, FFT_REVERSE>(fft);
VERIFY_IS_EQUAL(forward_output_both_parts.dimension(0), input.dimension(0));
VERIFY_IS_EQUAL(reverse_output_both_parts.dimension(0), input.dimension(0));
@@ -114,24 +114,24 @@ static void test_fft_complex_input_golden() {
}
static void test_fft_real_input_golden() {
- Tensor<float, 1, ColMajor, long> input(5);
+ Tensor<float, 1, ColMajor> input(5);
input(0) = 1.0;
input(1) = 2.0;
input(2) = 3.0;
input(3) = 4.0;
input(4) = 5.0;
- array<int, 1> fft;
+ array<ptrdiff_t, 1> fft;
fft[0] = 0;
- Tensor<std::complex<float>, 1, ColMajor, long> forward_output_both_parts = input.fft<BothParts, FFT_FORWARD>(fft);
- Tensor<std::complex<float>, 1, ColMajor, long> reverse_output_both_parts = input.fft<BothParts, FFT_REVERSE>(fft);
+ Tensor<std::complex<float>, 1, ColMajor> forward_output_both_parts = input.fft<BothParts, FFT_FORWARD>(fft);
+ Tensor<std::complex<float>, 1, ColMajor> reverse_output_both_parts = input.fft<BothParts, FFT_REVERSE>(fft);
- Tensor<float, 1, ColMajor, long> forward_output_real_part = input.fft<RealPart, FFT_FORWARD>(fft);
- Tensor<float, 1, ColMajor, long> reverse_output_real_part = input.fft<RealPart, FFT_REVERSE>(fft);
+ Tensor<float, 1, ColMajor> forward_output_real_part = input.fft<RealPart, FFT_FORWARD>(fft);
+ Tensor<float, 1, ColMajor> reverse_output_real_part = input.fft<RealPart, FFT_REVERSE>(fft);
- Tensor<float, 1, ColMajor, long> forward_output_imag_part = input.fft<ImagPart, FFT_FORWARD>(fft);
- Tensor<float, 1, ColMajor, long> reverse_output_imag_part = input.fft<ImagPart, FFT_REVERSE>(fft);
+ Tensor<float, 1, ColMajor> forward_output_imag_part = input.fft<ImagPart, FFT_FORWARD>(fft);
+ Tensor<float, 1, ColMajor> reverse_output_imag_part = input.fft<ImagPart, FFT_REVERSE>(fft);
VERIFY_IS_EQUAL(forward_output_both_parts.dimension(0), input.dimension(0));
VERIFY_IS_EQUAL(reverse_output_both_parts.dimension(0), input.dimension(0));
@@ -178,21 +178,21 @@ static void test_fft_real_input_golden() {
template <int DataLayout, typename RealScalar, bool isComplexInput, int FFTResultType, int FFTDirection, int TensorRank>
static void test_fft_real_input_energy() {
- Eigen::DSizes<long, TensorRank> dimensions;
- int total_size = 1;
+ Eigen::DSizes<ptrdiff_t, TensorRank> dimensions;
+ ptrdiff_t total_size = 1;
for (int i = 0; i < TensorRank; ++i) {
dimensions[i] = rand() % 20 + 1;
total_size *= dimensions[i];
}
- const DSizes<long, TensorRank> arr = dimensions;
+ const DSizes<ptrdiff_t, TensorRank> arr = dimensions;
typedef typename internal::conditional<isComplexInput == true, std::complex<RealScalar>, RealScalar>::type InputScalar;
- Tensor<InputScalar, TensorRank, DataLayout, long> input;
+ Tensor<InputScalar, TensorRank, DataLayout> input;
input.resize(arr);
input.setRandom();
- array<int, TensorRank> fft;
+ array<ptrdiff_t, TensorRank> fft;
for (int i = 0; i < TensorRank; ++i) {
fft[i] = i;
}
diff --git a/unsupported/test/cxx11_tensor_notification.cpp b/unsupported/test/cxx11_tensor_notification.cpp
new file mode 100644
index 000000000..c946007b8
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_notification.cpp
@@ -0,0 +1,81 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Vijay Vasudevan <vrv@google.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_USE_THREADS
+
+#include <stdlib.h>
+#include "main.h"
+#include <Eigen/CXX11/Tensor>
+
+#if EIGEN_OS_WIN || EIGEN_OS_WIN64
+#include <windows.h>
+void sleep(int seconds) {
+ Sleep(seconds*1000);
+}
+#else
+#include <unistd.h>
+#endif
+
+
+namespace {
+
+void WaitAndAdd(Eigen::Notification* n, int* counter) {
+ n->Wait();
+ *counter = *counter + 1;
+}
+
+} // namespace
+
+static void test_notification_single()
+{
+ ThreadPool thread_pool(1);
+
+ int counter = 0;
+ Eigen::Notification n;
+ std::function<void()> func = std::bind(&WaitAndAdd, &n, &counter);
+ thread_pool.Schedule(func);
+ sleep(1);
+
+ // The thread should be waiting for the notification.
+ VERIFY_IS_EQUAL(counter, 0);
+
+ // Unblock the thread
+ n.Notify();
+
+ sleep(1);
+
+ // Verify the counter has been incremented
+ VERIFY_IS_EQUAL(counter, 1);
+}
+
+// Like test_notification_single() but enqueues multiple threads to
+// validate that all threads get notified by Notify().
+static void test_notification_multiple()
+{
+ ThreadPool thread_pool(1);
+
+ int counter = 0;
+ Eigen::Notification n;
+ std::function<void()> func = std::bind(&WaitAndAdd, &n, &counter);
+ thread_pool.Schedule(func);
+ thread_pool.Schedule(func);
+ thread_pool.Schedule(func);
+ thread_pool.Schedule(func);
+ sleep(1);
+ VERIFY_IS_EQUAL(counter, 0);
+ n.Notify();
+ sleep(1);
+ VERIFY_IS_EQUAL(counter, 4);
+}
+
+void test_cxx11_tensor_notification()
+{
+ CALL_SUBTEST(test_notification_single());
+ CALL_SUBTEST(test_notification_multiple());
+}
diff --git a/unsupported/test/cxx11_tensor_of_complex.cpp b/unsupported/test/cxx11_tensor_of_complex.cpp
index 8ad04f699..e9d1b2d3c 100644
--- a/unsupported/test/cxx11_tensor_of_complex.cpp
+++ b/unsupported/test/cxx11_tensor_of_complex.cpp
@@ -48,6 +48,25 @@ static void test_abs()
}
+static void test_conjugate()
+{
+ Tensor<std::complex<float>, 1> data1(3);
+ Tensor<std::complex<double>, 1> data2(3);
+ Tensor<int, 1> data3(3);
+ data1.setRandom();
+ data2.setRandom();
+ data3.setRandom();
+
+ Tensor<std::complex<float>, 1> conj1 = data1.conjugate();
+ Tensor<std::complex<double>, 1> conj2 = data2.conjugate();
+ Tensor<int, 1> conj3 = data3.conjugate();
+ for (int i = 0; i < 3; ++i) {
+ VERIFY_IS_APPROX(conj1(i), std::conj(data1(i)));
+ VERIFY_IS_APPROX(conj2(i), std::conj(data2(i)));
+ VERIFY_IS_APPROX(conj3(i), data3(i));
+ }
+}
+
static void test_contractions()
{
Tensor<std::complex<float>, 4> t_left(30, 50, 8, 31);
@@ -64,7 +83,9 @@ static void test_contractions()
// This contraction should be equivalent to a regular matrix multiplication
typedef Tensor<float, 1>::DimensionPair DimPair;
- Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}});
+ Eigen::array<DimPair, 2> dims;
+ dims[0] = DimPair(2, 0);
+ dims[1] = DimPair(3, 1);
t_result = t_left.contract(t_right, dims);
m_result = m_left * m_right;
for (int i = 0; i < t_result.dimensions().TotalSize(); i++) {
@@ -77,5 +98,6 @@ void test_cxx11_tensor_of_complex()
{
CALL_SUBTEST(test_additions());
CALL_SUBTEST(test_abs());
+ CALL_SUBTEST(test_conjugate());
CALL_SUBTEST(test_contractions());
}
diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
new file mode 100644
index 000000000..cb917bb37
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
@@ -0,0 +1,256 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_of_float16_cuda
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_GPU
+
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+#ifdef EIGEN_HAS_CUDA_FP16
+
+void test_cuda_conversion() {
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+ int num_elem = 101;
+
+ float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ Eigen::half* d_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+ float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+ d_float, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_half(
+ d_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_conv(
+ d_conv, num_elem);
+
+ gpu_float.device(gpu_device) = gpu_float.random();
+ gpu_half.device(gpu_device) = gpu_float.cast<Eigen::half>();
+ gpu_conv.device(gpu_device) = gpu_half.cast<float>();
+
+ Tensor<float, 1> initial(num_elem);
+ Tensor<float, 1> final(num_elem);
+ gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float));
+
+ for (int i = 0; i < num_elem; ++i) {
+ VERIFY_IS_APPROX(initial(i), final(i));
+ }
+
+ gpu_device.deallocate(d_float);
+ gpu_device.deallocate(d_half);
+ gpu_device.deallocate(d_conv);
+}
+
+
+void test_cuda_unary() {
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+ int num_elem = 101;
+
+ float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+ d_float, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half(
+ d_res_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
+ d_res_float, num_elem);
+
+ gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
+ gpu_res_float.device(gpu_device) = gpu_float.abs();
+ gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().cast<float>();
+
+ Tensor<float, 1> half_prec(num_elem);
+ Tensor<float, 1> full_prec(num_elem);
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+ gpu_device.synchronize();
+
+ for (int i = 0; i < num_elem; ++i) {
+ std::cout << "Checking unary " << i << std::endl;
+ VERIFY_IS_APPROX(full_prec(i), half_prec(i));
+ }
+
+ gpu_device.deallocate(d_float);
+ gpu_device.deallocate(d_res_half);
+ gpu_device.deallocate(d_res_float);
+}
+
+
+void test_cuda_elementwise() {
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+ int num_elem = 101;
+
+ float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(
+ d_float1, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(
+ d_float2, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half(
+ d_res_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
+ d_res_float, num_elem);
+
+ gpu_float1.device(gpu_device) = gpu_float1.random();
+ gpu_float2.device(gpu_device) = gpu_float2.random();
+ gpu_res_float.device(gpu_device) = (gpu_float1 + gpu_float2) * gpu_float1;
+ gpu_res_half.device(gpu_device) = ((gpu_float1.cast<Eigen::half>() + gpu_float2.cast<Eigen::half>()) * gpu_float1.cast<Eigen::half>()).cast<float>();
+
+ Tensor<float, 1> half_prec(num_elem);
+ Tensor<float, 1> full_prec(num_elem);
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+ gpu_device.synchronize();
+
+ for (int i = 0; i < num_elem; ++i) {
+ std::cout << "Checking elemwise " << i << std::endl;
+ VERIFY_IS_APPROX(full_prec(i), half_prec(i));
+ }
+
+ gpu_device.deallocate(d_float1);
+ gpu_device.deallocate(d_float2);
+ gpu_device.deallocate(d_res_half);
+ gpu_device.deallocate(d_res_float);
+}
+
+
+void test_cuda_contractions() {
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+ int rows = 23;
+ int cols = 23;
+ int num_elem = rows*cols;
+
+ float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
+ d_float1, rows, cols);
+ Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
+ d_float2, rows, cols);
+ Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_res_half(
+ d_res_half, rows, cols);
+ Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_res_float(
+ d_res_float, rows, cols);
+
+ gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
+ gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float1.constant(0.5f);
+
+ typedef Tensor<float, 2>::DimensionPair DimPair;
+ Eigen::array<DimPair, 1> dims(DimPair(1, 0));
+ gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims);
+ gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().contract(gpu_float2.cast<Eigen::half>(), dims).cast<float>();
+
+ Tensor<float, 2> half_prec(rows, cols);
+ Tensor<float, 2> full_prec(rows, cols);
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+ gpu_device.synchronize();
+
+ for (int i = 0; i < rows; ++i) {
+ for (int j = 0; j < cols; ++j) {
+ std::cout << "Checking contract " << i << " " << j << std::endl;
+ VERIFY_IS_APPROX(full_prec(i, j), half_prec(i, j));
+ }
+ }
+
+ gpu_device.deallocate(d_float1);
+ gpu_device.deallocate(d_float2);
+ gpu_device.deallocate(d_res_half);
+ gpu_device.deallocate(d_res_float);
+}
+
+
+void test_cuda_reductions() {
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+ int size = 13;
+ int num_elem = size*size;
+
+ float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_res_half = (float*)gpu_device.allocate(size * sizeof(float));
+ float* d_res_float = (float*)gpu_device.allocate(size * sizeof(float));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
+ d_float1, size, size);
+ Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
+ d_float2, size, size);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half(
+ d_res_half, size);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
+ d_res_float, size);
+
+ gpu_float1.device(gpu_device) = gpu_float1.random();
+ gpu_float2.device(gpu_device) = gpu_float2.random();
+
+ Eigen::array<int, 1> redux_dim = {{0}};
+ gpu_res_float.device(gpu_device) = gpu_float1.sum(redux_dim);
+ gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(redux_dim).cast<float>();
+
+ Tensor<float, 1> half_prec(size);
+ Tensor<float, 1> full_prec(size);
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, size*sizeof(float));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, size*sizeof(float));
+ gpu_device.synchronize();
+
+ for (int i = 0; i < size; ++i) {
+ std::cout << "Checking redux " << i << std::endl;
+ VERIFY_IS_APPROX(full_prec(i), half_prec(i));
+ }
+
+ gpu_device.deallocate(d_float1);
+ gpu_device.deallocate(d_float2);
+ gpu_device.deallocate(d_res_half);
+ gpu_device.deallocate(d_res_float);
+}
+
+
+#endif
+
+
+void test_cxx11_tensor_of_float16_cuda()
+{
+#ifdef EIGEN_HAS_CUDA_FP16
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice device(&stream);
+ if (device.majorDeviceVersion() > 5 ||
+ (device.majorDeviceVersion() == 5 && device.minorDeviceVersion() >= 3)) {
+ std::cout << "Running test on device with capability " << device.majorDeviceVersion() << "." << device.minorDeviceVersion() << std::endl;
+
+ CALL_SUBTEST_1(test_cuda_conversion());
+ CALL_SUBTEST_1(test_cuda_unary());
+ CALL_SUBTEST_1(test_cuda_elementwise());
+ CALL_SUBTEST_2(test_cuda_contractions());
+ CALL_SUBTEST_3(test_cuda_reductions());
+ }
+ else {
+ std::cout << "Half floats require compute capability of at least 5.3. This device only supports " << device.majorDeviceVersion() << "." << device.minorDeviceVersion() << ". Skipping the test" << std::endl;
+ }
+#else
+ std::cout << "Half floats are not supported by this version of cuda: skipping the test" << std::endl;
+#endif
+}
diff --git a/unsupported/test/cxx11_tensor_random.cpp b/unsupported/test/cxx11_tensor_random.cpp
index 389896c54..0f3dc5787 100644
--- a/unsupported/test/cxx11_tensor_random.cpp
+++ b/unsupported/test/cxx11_tensor_random.cpp
@@ -48,7 +48,7 @@ struct MyGenerator {
}
// Same as above but generates several numbers at a time.
- typename internal::packet_traits<int>::type packetOp(
+ internal::packet_traits<int>::type packetOp(
Eigen::DenseIndex packet_location, Eigen::DenseIndex /*unused*/ = 0) const {
const int packetSize = internal::packet_traits<int>::size;
EIGEN_ALIGN_MAX int values[packetSize];
diff --git a/unsupported/test/cxx11_tensor_random_cuda.cpp b/unsupported/test/cxx11_tensor_random_cuda.cu
index 5d091de15..5d091de15 100644
--- a/unsupported/test/cxx11_tensor_random_cuda.cpp
+++ b/unsupported/test/cxx11_tensor_random_cuda.cu
diff --git a/unsupported/test/cxx11_tensor_reduction.cpp b/unsupported/test/cxx11_tensor_reduction.cpp
index 0ec316991..6a128901a 100644
--- a/unsupported/test/cxx11_tensor_reduction.cpp
+++ b/unsupported/test/cxx11_tensor_reduction.cpp
@@ -9,6 +9,7 @@
#include "main.h"
#include <limits>
+#include <numeric>
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
diff --git a/unsupported/test/cxx11_tensor_reduction_cuda.cpp b/unsupported/test/cxx11_tensor_reduction_cuda.cu
index 9e06eb126..cad0c08e0 100644
--- a/unsupported/test/cxx11_tensor_reduction_cuda.cpp
+++ b/unsupported/test/cxx11_tensor_reduction_cuda.cu
@@ -48,9 +48,12 @@ static void test_full_reductions() {
// Check that the CPU and GPU reductions return the same result.
VERIFY_IS_APPROX(full_redux(), full_redux_gpu());
+
+ gpu_device.deallocate(gpu_in_ptr);
+ gpu_device.deallocate(gpu_out_ptr);
}
void test_cxx11_tensor_reduction_cuda() {
- CALL_SUBTEST(test_full_reductions<ColMajor>());
- CALL_SUBTEST(test_full_reductions<RowMajor>());
+ CALL_SUBTEST_1(test_full_reductions<ColMajor>());
+ CALL_SUBTEST_2(test_full_reductions<RowMajor>());
}
diff --git a/unsupported/test/cxx11_tensor_reverse.cpp b/unsupported/test/cxx11_tensor_reverse.cpp
index f96c21fa3..b35b8d29e 100644
--- a/unsupported/test/cxx11_tensor_reverse.cpp
+++ b/unsupported/test/cxx11_tensor_reverse.cpp
@@ -114,10 +114,18 @@ static void test_expr_reverse(bool LValue)
Tensor<float, 4, DataLayout> result(2,3,5,7);
- array<ptrdiff_t, 4> src_slice_dim{{2,3,1,7}};
- array<ptrdiff_t, 4> src_slice_start{{0,0,0,0}};
- array<ptrdiff_t, 4> dst_slice_dim{{2,3,1,7}};
- array<ptrdiff_t, 4> dst_slice_start{{0,0,0,0}};
+ array<ptrdiff_t, 4> src_slice_dim;
+ src_slice_dim[0] = 2;
+ src_slice_dim[1] = 3;
+ src_slice_dim[2] = 1;
+ src_slice_dim[3] = 7;
+ array<ptrdiff_t, 4> src_slice_start;
+ src_slice_start[0] = 0;
+ src_slice_start[1] = 0;
+ src_slice_start[2] = 0;
+ src_slice_start[3] = 0;
+ array<ptrdiff_t, 4> dst_slice_dim = src_slice_dim;
+ array<ptrdiff_t, 4> dst_slice_start = src_slice_start;
for (int i = 0; i < 5; ++i) {
if (LValue) {
diff --git a/unsupported/test/cxx11_tensor_roundings.cpp b/unsupported/test/cxx11_tensor_roundings.cpp
new file mode 100644
index 000000000..2c26151ab
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_roundings.cpp
@@ -0,0 +1,62 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+
+static void test_float_rounding()
+{
+ Tensor<float, 2> ftensor(20,30);
+ ftensor = ftensor.random() * 100.f;
+
+ Tensor<float, 2> result = ftensor.round();
+
+ for (int i = 0; i < 20; ++i) {
+ for (int j = 0; j < 30; ++j) {
+ VERIFY_IS_EQUAL(result(i,j), numext::round(ftensor(i,j)));
+ }
+ }
+}
+
+static void test_float_flooring()
+{
+ Tensor<float, 2> ftensor(20,30);
+ ftensor = ftensor.random() * 100.f;
+
+ Tensor<float, 2> result = ftensor.floor();
+
+ for (int i = 0; i < 20; ++i) {
+ for (int j = 0; j < 30; ++j) {
+ VERIFY_IS_EQUAL(result(i,j), numext::floor(ftensor(i,j)));
+ }
+ }
+}
+
+static void test_float_ceiling()
+{
+ Tensor<float, 2> ftensor(20,30);
+ ftensor = ftensor.random() * 100.f;
+
+ Tensor<float, 2> result = ftensor.ceil();
+
+ for (int i = 0; i < 20; ++i) {
+ for (int j = 0; j < 30; ++j) {
+ VERIFY_IS_EQUAL(result(i,j), numext::ceil(ftensor(i,j)));
+ }
+ }
+}
+
+void test_cxx11_tensor_roundings()
+{
+ CALL_SUBTEST(test_float_rounding());
+ CALL_SUBTEST(test_float_ceiling());
+ CALL_SUBTEST(test_float_flooring());
+}
diff --git a/unsupported/test/cxx11_tensor_sugar.cpp b/unsupported/test/cxx11_tensor_sugar.cpp
index 98671a986..a03f75cfe 100644
--- a/unsupported/test/cxx11_tensor_sugar.cpp
+++ b/unsupported/test/cxx11_tensor_sugar.cpp
@@ -18,7 +18,7 @@ static void test_comparison_sugar() {
#define TEST_TENSOR_EQUAL(e1, e2) \
b = ((e1) == (e2)).all(); \
- VERIFY(b(0))
+ VERIFY(b())
#define TEST_OP(op) TEST_TENSOR_EQUAL(t op 0, t op t.constant(0))
@@ -32,7 +32,30 @@ static void test_comparison_sugar() {
#undef TEST_TENSOR_EQUAL
}
+
+static void test_scalar_sugar() {
+ Tensor<float, 3> A(6, 7, 5);
+ Tensor<float, 3> B(6, 7, 5);
+ A.setRandom();
+ B.setRandom();
+
+ const float alpha = 0.43f;
+ const float beta = 0.21f;
+
+ Tensor<float, 3> R = A * A.constant(alpha) + B * B.constant(beta);
+ Tensor<float, 3> S = A * alpha + B * beta;
+
+ // TODO: add enough syntactic sugar to support this
+ // Tensor<float, 3> T = alpha * A + beta * B;
+
+ for (int i = 0; i < 6*7*5; ++i) {
+ VERIFY_IS_APPROX(R(i), S(i));
+ }
+}
+
+
void test_cxx11_tensor_sugar()
{
CALL_SUBTEST(test_comparison_sugar());
+ CALL_SUBTEST(test_scalar_sugar());
}
diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp
index e28cf55e2..e46197464 100644
--- a/unsupported/test/cxx11_tensor_thread_pool.cpp
+++ b/unsupported/test/cxx11_tensor_thread_pool.cpp
@@ -17,7 +17,7 @@
using Eigen::Tensor;
-static void test_multithread_elementwise()
+void test_multithread_elementwise()
{
Tensor<float, 3> in1(2,3,7);
Tensor<float, 3> in2(2,3,7);
@@ -40,7 +40,7 @@ static void test_multithread_elementwise()
}
-static void test_multithread_compound_assignment()
+void test_multithread_compound_assignment()
{
Tensor<float, 3> in1(2,3,7);
Tensor<float, 3> in2(2,3,7);
@@ -64,7 +64,7 @@ static void test_multithread_compound_assignment()
}
template<int DataLayout>
-static void test_multithread_contraction()
+void test_multithread_contraction()
{
Tensor<float, 4, DataLayout> t_left(30, 50, 37, 31);
Tensor<float, 5, DataLayout> t_right(37, 31, 70, 2, 10);
@@ -91,15 +91,20 @@ static void test_multithread_contraction()
for (ptrdiff_t i = 0; i < t_result.size(); i++) {
VERIFY(&t_result.data()[i] != &m_result.data()[i]);
- if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) {
- std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
- assert(false);
+ if (fabs(t_result(i) - m_result(i)) < 1e-4) {
+ continue;
+ }
+ if (Eigen::internal::isApprox(t_result(i), m_result(i), 1e-4f)) {
+ continue;
}
+ std::cout << "mismatch detected at index " << i << ": " << t_result(i)
+ << " vs " << m_result(i) << std::endl;
+ assert(false);
}
}
template<int DataLayout>
-static void test_contraction_corner_cases()
+void test_contraction_corner_cases()
{
Tensor<float, 2, DataLayout> t_left(32, 500);
Tensor<float, 2, DataLayout> t_right(32, 28*28);
@@ -186,7 +191,7 @@ static void test_contraction_corner_cases()
}
template<int DataLayout>
-static void test_multithread_contraction_agrees_with_singlethread() {
+void test_multithread_contraction_agrees_with_singlethread() {
int contract_size = internal::random<int>(1, 5000);
Tensor<float, 3, DataLayout> left(internal::random<int>(1, 80),
@@ -229,7 +234,7 @@ static void test_multithread_contraction_agrees_with_singlethread() {
template<int DataLayout>
-static void test_multithreaded_reductions() {
+void test_multithreaded_reductions() {
const int num_threads = internal::random<int>(3, 11);
ThreadPool thread_pool(num_threads);
Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, num_threads);
@@ -239,19 +244,19 @@ static void test_multithreaded_reductions() {
Tensor<float, 2, DataLayout> t1(num_rows, num_cols);
t1.setRandom();
- Tensor<float, 1, DataLayout> full_redux(1);
+ Tensor<float, 0, DataLayout> full_redux;
full_redux = t1.sum();
- Tensor<float, 1, DataLayout> full_redux_tp(1);
+ Tensor<float, 0, DataLayout> full_redux_tp;
full_redux_tp.device(thread_pool_device) = t1.sum();
// Check that the single threaded and the multi threaded reductions return
// the same result.
- VERIFY_IS_APPROX(full_redux(0), full_redux_tp(0));
+ VERIFY_IS_APPROX(full_redux(), full_redux_tp());
}
-static void test_memcpy() {
+void test_memcpy() {
for (int i = 0; i < 5; ++i) {
const int num_threads = internal::random<int>(3, 11);
@@ -270,7 +275,7 @@ static void test_memcpy() {
}
-static void test_multithread_random()
+void test_multithread_random()
{
Eigen::ThreadPool tp(2);
Eigen::ThreadPoolDevice device(&tp, 2);
@@ -278,26 +283,52 @@ static void test_multithread_random()
t.device(device) = t.random<Eigen::internal::NormalRandomGenerator<float>>();
}
+template<int DataLayout>
+void test_multithread_shuffle()
+{
+ Tensor<float, 4, DataLayout> tensor(17,5,7,11);
+ tensor.setRandom();
+
+ const int num_threads = internal::random<int>(2, 11);
+ ThreadPool threads(num_threads);
+ Eigen::ThreadPoolDevice device(&threads, num_threads);
+
+ Tensor<float, 4, DataLayout> shuffle(7,5,11,17);
+ array<ptrdiff_t, 4> shuffles = {{2,1,3,0}};
+ shuffle.device(device) = tensor.shuffle(shuffles);
+
+ for (int i = 0; i < 17; ++i) {
+ for (int j = 0; j < 5; ++j) {
+ for (int k = 0; k < 7; ++k) {
+ for (int l = 0; l < 11; ++l) {
+ VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,j,l,i));
+ }
+ }
+ }
+ }
+}
+
void test_cxx11_tensor_thread_pool()
{
- CALL_SUBTEST(test_multithread_elementwise());
- CALL_SUBTEST(test_multithread_compound_assignment());
+ CALL_SUBTEST_1(test_multithread_elementwise());
+ CALL_SUBTEST_1(test_multithread_compound_assignment());
- CALL_SUBTEST(test_multithread_contraction<ColMajor>());
- CALL_SUBTEST(test_multithread_contraction<RowMajor>());
+ CALL_SUBTEST_2(test_multithread_contraction<ColMajor>());
+ CALL_SUBTEST_2(test_multithread_contraction<RowMajor>());
- CALL_SUBTEST(test_multithread_contraction_agrees_with_singlethread<ColMajor>());
- CALL_SUBTEST(test_multithread_contraction_agrees_with_singlethread<RowMajor>());
+ CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<ColMajor>());
+ CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<RowMajor>());
// Exercise various cases that have been problematic in the past.
- CALL_SUBTEST(test_contraction_corner_cases<ColMajor>());
- CALL_SUBTEST(test_contraction_corner_cases<RowMajor>());
-
- CALL_SUBTEST(test_multithreaded_reductions<ColMajor>());
- CALL_SUBTEST(test_multithreaded_reductions<RowMajor>());
+ CALL_SUBTEST_4(test_contraction_corner_cases<ColMajor>());
+ CALL_SUBTEST_4(test_contraction_corner_cases<RowMajor>());
- CALL_SUBTEST(test_memcpy());
+ CALL_SUBTEST_5(test_multithreaded_reductions<ColMajor>());
+ CALL_SUBTEST_5(test_multithreaded_reductions<RowMajor>());
- CALL_SUBTEST(test_multithread_random());
+ CALL_SUBTEST_6(test_memcpy());
+ CALL_SUBTEST_6(test_multithread_random());
+ CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>());
+ CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>());
}
diff --git a/unsupported/test/cxx11_tensor_uint128.cpp b/unsupported/test/cxx11_tensor_uint128.cpp
index ee3767e58..d2a1e8673 100644
--- a/unsupported/test/cxx11_tensor_uint128.cpp
+++ b/unsupported/test/cxx11_tensor_uint128.cpp
@@ -11,10 +11,20 @@
#include <Eigen/CXX11/Tensor>
+
+#if EIGEN_COMP_MSVC
+#define EIGEN_NO_INT128
+#else
+typedef __uint128_t uint128_t;
+#endif
+
+// Only run the test on compilers that support 128bit integers natively
+#ifndef EIGEN_NO_INT128
+
using Eigen::internal::TensorUInt128;
using Eigen::internal::static_val;
-void VERIFY_EQUAL(TensorUInt128<uint64_t, uint64_t> actual, __uint128_t expected) {
+void VERIFY_EQUAL(TensorUInt128<uint64_t, uint64_t> actual, uint128_t expected) {
bool matchl = actual.lower() == static_cast<uint64_t>(expected);
bool matchh = actual.upper() == static_cast<uint64_t>(expected >> 64);
if (!matchl || !matchh) {
@@ -32,13 +42,13 @@ void test_add() {
for (uint64_t i1 = 0; i1 < 100; ++i1) {
for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) {
TensorUInt128<uint64_t, uint64_t> i(i1, i2);
- __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2);
+ uint128_t a = (static_cast<uint128_t>(i1) << 64) + static_cast<uint128_t>(i2);
for (uint64_t j1 = 0; j1 < 100; ++j1) {
for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) {
TensorUInt128<uint64_t, uint64_t> j(j1, j2);
- __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2);
+ uint128_t b = (static_cast<uint128_t>(j1) << 64) + static_cast<uint128_t>(j2);
TensorUInt128<uint64_t, uint64_t> actual = i + j;
- __uint128_t expected = a + b;
+ uint128_t expected = a + b;
VERIFY_EQUAL(actual, expected);
}
}
@@ -51,13 +61,13 @@ void test_sub() {
for (uint64_t i1 = 0; i1 < 100; ++i1) {
for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) {
TensorUInt128<uint64_t, uint64_t> i(i1, i2);
- __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2);
+ uint128_t a = (static_cast<uint128_t>(i1) << 64) + static_cast<uint128_t>(i2);
for (uint64_t j1 = 0; j1 < 100; ++j1) {
for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) {
TensorUInt128<uint64_t, uint64_t> j(j1, j2);
- __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2);
+ uint128_t b = (static_cast<uint128_t>(j1) << 64) + static_cast<uint128_t>(j2);
TensorUInt128<uint64_t, uint64_t> actual = i - j;
- __uint128_t expected = a - b;
+ uint128_t expected = a - b;
VERIFY_EQUAL(actual, expected);
}
}
@@ -70,13 +80,13 @@ void test_mul() {
for (uint64_t i1 = 0; i1 < 100; ++i1) {
for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) {
TensorUInt128<uint64_t, uint64_t> i(i1, i2);
- __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2);
+ uint128_t a = (static_cast<uint128_t>(i1) << 64) + static_cast<uint128_t>(i2);
for (uint64_t j1 = 0; j1 < 100; ++j1) {
for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) {
TensorUInt128<uint64_t, uint64_t> j(j1, j2);
- __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2);
+ uint128_t b = (static_cast<uint128_t>(j1) << 64) + static_cast<uint128_t>(j2);
TensorUInt128<uint64_t, uint64_t> actual = i * j;
- __uint128_t expected = a * b;
+ uint128_t expected = a * b;
VERIFY_EQUAL(actual, expected);
}
}
@@ -89,13 +99,13 @@ void test_div() {
for (uint64_t i1 = 0; i1 < 100; ++i1) {
for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) {
TensorUInt128<uint64_t, uint64_t> i(i1, i2);
- __uint128_t a = (static_cast<__uint128_t>(i1) << 64) + static_cast<__uint128_t>(i2);
+ uint128_t a = (static_cast<uint128_t>(i1) << 64) + static_cast<uint128_t>(i2);
for (uint64_t j1 = 0; j1 < 100; ++j1) {
for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) {
TensorUInt128<uint64_t, uint64_t> j(j1, j2);
- __uint128_t b = (static_cast<__uint128_t>(j1) << 64) + static_cast<__uint128_t>(j2);
+ uint128_t b = (static_cast<uint128_t>(j1) << 64) + static_cast<uint128_t>(j2);
TensorUInt128<uint64_t, uint64_t> actual = i / j;
- __uint128_t expected = a / b;
+ uint128_t expected = a / b;
VERIFY_EQUAL(actual, expected);
}
}
@@ -107,10 +117,10 @@ void test_misc1() {
uint64_t incr = internal::random<uint64_t>(1, 9999999999);
for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) {
TensorUInt128<static_val<0>, uint64_t> i(0, i2);
- __uint128_t a = static_cast<__uint128_t>(i2);
+ uint128_t a = static_cast<uint128_t>(i2);
for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) {
TensorUInt128<static_val<0>, uint64_t> j(0, j2);
- __uint128_t b = static_cast<__uint128_t>(j2);
+ uint128_t b = static_cast<uint128_t>(j2);
uint64_t actual = (i * j).upper();
uint64_t expected = (a * b) >> 64;
VERIFY_IS_EQUAL(actual, expected);
@@ -122,23 +132,29 @@ void test_misc2() {
int64_t incr = internal::random<int64_t>(1, 100);
for (int64_t log_div = 0; log_div < 63; ++log_div) {
for (int64_t divider = 1; divider <= 1000000 * incr; divider += incr) {
- uint64_t expected = (static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1;
+ uint64_t expected = (static_cast<uint128_t>(1) << (64+log_div)) / static_cast<uint128_t>(divider) - (static_cast<uint128_t>(1) << 64) + 1;
uint64_t shift = 1ULL << log_div;
TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1));
uint64_t actual = static_cast<uint64_t>(result);
- VERIFY_EQUAL(actual, expected);
+ VERIFY_IS_EQUAL(actual, expected);
}
}
}
+#endif
void test_cxx11_tensor_uint128()
{
+#ifdef EIGEN_NO_INT128
+ // Skip the test on compilers that don't support 128bit integers natively
+ return;
+#else
CALL_SUBTEST_1(test_add());
CALL_SUBTEST_2(test_sub());
CALL_SUBTEST_3(test_mul());
CALL_SUBTEST_4(test_div());
CALL_SUBTEST_5(test_misc1());
CALL_SUBTEST_6(test_misc2());
+#endif
}
diff --git a/unsupported/test/levenberg_marquardt.cpp b/unsupported/test/levenberg_marquardt.cpp
index a2bdb99e4..6dc17bd17 100644
--- a/unsupported/test/levenberg_marquardt.cpp
+++ b/unsupported/test/levenberg_marquardt.cpp
@@ -23,6 +23,9 @@
using std::sqrt;
+// tolerance for chekcing number of iterations
+#define LM_EVAL_COUNT_TOL 4/3
+
struct lmder_functor : DenseFunctor<double>
{
lmder_functor(void): DenseFunctor<double>(3,15) {}
@@ -631,7 +634,7 @@ void testNistLanczos1(void)
VERIFY_IS_EQUAL(lm.nfev(), 79);
VERIFY_IS_EQUAL(lm.njev(), 72);
// check norm^2
-// VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.430899764097e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats
+ VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
// check x
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
@@ -652,7 +655,7 @@ void testNistLanczos1(void)
VERIFY_IS_EQUAL(lm.nfev(), 9);
VERIFY_IS_EQUAL(lm.njev(), 8);
// check norm^2
-// VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.428595533845e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats
+ VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
// check x
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
@@ -789,7 +792,8 @@ void testNistMGH10(void)
MGH10_functor functor;
LevenbergMarquardt<MGH10_functor> lm(functor);
info = lm.minimize(x);
- VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
+ VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
+ // was: VERIFY_IS_EQUAL(info, 1);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
@@ -799,9 +803,13 @@ void testNistMGH10(void)
VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
// check return value
- //VERIFY_IS_EQUAL(info, 1);
+
+ ++g_test_level;
VERIFY_IS_EQUAL(lm.nfev(), 284 );
VERIFY_IS_EQUAL(lm.njev(), 249 );
+ --g_test_level;
+ VERIFY(lm.nfev() < 284 * LM_EVAL_COUNT_TOL);
+ VERIFY(lm.njev() < 249 * LM_EVAL_COUNT_TOL);
/*
* Second try
@@ -809,7 +817,10 @@ void testNistMGH10(void)
x<< 0.02, 4000., 250.;
// do the computation
info = lm.minimize(x);
+ ++g_test_level;
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
+ // was: VERIFY_IS_EQUAL(info, 1);
+ --g_test_level;
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
@@ -819,9 +830,12 @@ void testNistMGH10(void)
VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
// check return value
- //VERIFY_IS_EQUAL(info, 1);
+ ++g_test_level;
VERIFY_IS_EQUAL(lm.nfev(), 126);
VERIFY_IS_EQUAL(lm.njev(), 116);
+ --g_test_level;
+ VERIFY(lm.nfev() < 126 * LM_EVAL_COUNT_TOL);
+ VERIFY(lm.njev() < 116 * LM_EVAL_COUNT_TOL);
}
@@ -896,8 +910,12 @@ void testNistBoxBOD(void)
// check return value
VERIFY_IS_EQUAL(info, 1);
+ ++g_test_level;
VERIFY_IS_EQUAL(lm.nfev(), 16 );
VERIFY_IS_EQUAL(lm.njev(), 15 );
+ --g_test_level;
+ VERIFY(lm.nfev() < 16 * LM_EVAL_COUNT_TOL);
+ VERIFY(lm.njev() < 15 * LM_EVAL_COUNT_TOL);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03);
// check x
diff --git a/unsupported/test/matrix_function.cpp b/unsupported/test/matrix_function.cpp
index 487d5a9b8..9a995f941 100644
--- a/unsupported/test/matrix_function.cpp
+++ b/unsupported/test/matrix_function.cpp
@@ -113,8 +113,8 @@ void testMatrixLogarithm(const MatrixType& A)
MatrixType scaledA;
RealScalar maxImagPartOfSpectrum = A.eigenvalues().imag().cwiseAbs().maxCoeff();
- if (maxImagPartOfSpectrum >= 0.9 * M_PI)
- scaledA = A * 0.9 * M_PI / maxImagPartOfSpectrum;
+ if (maxImagPartOfSpectrum >= 0.9 * EIGEN_PI)
+ scaledA = A * 0.9 * EIGEN_PI / maxImagPartOfSpectrum;
else
scaledA = A;
diff --git a/unsupported/test/matrix_power.cpp b/unsupported/test/matrix_power.cpp
index baf183d12..8e104ed1e 100644
--- a/unsupported/test/matrix_power.cpp
+++ b/unsupported/test/matrix_power.cpp
@@ -24,7 +24,7 @@ void test2dRotation(double tol)
s = std::sin(angle);
B << c, s, -s, c;
- C = Apow(std::ldexp(angle,1) / M_PI);
+ C = Apow(std::ldexp(angle,1) / EIGEN_PI);
std::cout << "test2dRotation: i = " << i << " error powerm = " << relerr(C,B) << '\n';
VERIFY(C.isApprox(B, tol));
}
diff --git a/unsupported/test/mpreal/mpreal.h b/unsupported/test/mpreal/mpreal.h
index c4f6cf0cb..9b0cf7268 100644
--- a/unsupported/test/mpreal/mpreal.h
+++ b/unsupported/test/mpreal/mpreal.h
@@ -72,14 +72,12 @@
#define MPREAL_VERSION_STRING "3.6.2"
// Detect compiler using signatures from http://predef.sourceforge.net/
-#if defined(__GNUC__) && defined(__INTEL_COMPILER)
- #define IsInf(x) isinf(x) // Intel ICC compiler on Linux
-
+#if defined(__GNUC__)
+ #define IsInf(x) (isinf)(x) // GNU C++/Intel ICC compiler on Linux
#elif defined(_MSC_VER) // Microsoft Visual C++
#define IsInf(x) (!_finite(x))
-
#else
- #define IsInf(x) std::isinf EIGEN_NOT_A_MACRO (x) // GNU C/C++ (and/or other compilers), just hope for C99 conformance
+ #define IsInf(x) (std::isinf)(x) // GNU C/C++ (and/or other compilers), just hope for C99 conformance
#endif
// A Clang feature extension to determine compiler features.
@@ -3103,4 +3101,4 @@ namespace std
}
-#endif /* __MPREAL_H__ */ \ No newline at end of file
+#endif /* __MPREAL_H__ */
diff --git a/unsupported/test/splines.cpp b/unsupported/test/splines.cpp
index 97665af96..3be020434 100644
--- a/unsupported/test/splines.cpp
+++ b/unsupported/test/splines.cpp
@@ -239,7 +239,7 @@ void check_global_interpolation_with_derivatives2d()
typedef Spline2d::PointType PointType;
typedef Spline2d::KnotVectorType KnotVectorType;
- const unsigned int numPoints = 100;
+ const Eigen::DenseIndex numPoints = 100;
const unsigned int dimension = 2;
const unsigned int degree = 3;