aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/test
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/test')
-rw-r--r--unsupported/test/CMakeLists.txt53
-rw-r--r--unsupported/test/FFTW.cpp2
-rw-r--r--unsupported/test/NonLinearOptimization.cpp16
-rw-r--r--unsupported/test/autodiff.cpp3
-rw-r--r--unsupported/test/cxx11_eventcount.cpp140
-rw-r--r--unsupported/test/cxx11_float16.cpp (renamed from unsupported/test/float16.cpp)93
-rw-r--r--unsupported/test/cxx11_meta.cpp2
-rw-r--r--unsupported/test/cxx11_runqueue.cpp227
-rw-r--r--unsupported/test/cxx11_tensor_argmax.cpp8
-rw-r--r--unsupported/test/cxx11_tensor_contract_cuda.cu62
-rw-r--r--unsupported/test/cxx11_tensor_contraction.cpp9
-rw-r--r--unsupported/test/cxx11_tensor_cuda.cu31
-rw-r--r--unsupported/test/cxx11_tensor_device.cu4
-rw-r--r--unsupported/test/cxx11_tensor_dimension.cpp13
-rw-r--r--unsupported/test/cxx11_tensor_empty.cpp8
-rw-r--r--unsupported/test/cxx11_tensor_expr.cpp24
-rw-r--r--unsupported/test/cxx11_tensor_fft.cpp8
-rw-r--r--unsupported/test/cxx11_tensor_fixed_size.cpp24
-rw-r--r--unsupported/test/cxx11_tensor_forced_eval.cpp9
-rw-r--r--unsupported/test/cxx11_tensor_image_patch.cpp49
-rw-r--r--unsupported/test/cxx11_tensor_map.cpp36
-rw-r--r--unsupported/test/cxx11_tensor_math.cpp4
-rw-r--r--unsupported/test/cxx11_tensor_mixed_indices.cpp4
-rw-r--r--unsupported/test/cxx11_tensor_of_float16_cuda.cu226
-rw-r--r--unsupported/test/cxx11_tensor_simple.cpp5
-rw-r--r--unsupported/test/cxx11_tensor_thread_pool.cpp41
-rw-r--r--unsupported/test/levenberg_marquardt.cpp30
-rw-r--r--unsupported/test/matrix_function.cpp4
-rw-r--r--unsupported/test/matrix_power.cpp2
29 files changed, 921 insertions, 216 deletions
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 96652bfcf..22442b394 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -110,34 +110,48 @@ ei_add_test(minres)
ei_add_test(levenberg_marquardt)
ei_add_test(kronecker_product)
-ei_add_test(float16)
+# TODO: The following test names are prefixed with the cxx11 string, since historically
+# the tests depended on c++11. This isn't the case anymore so we ought to rename them.
+ei_add_test(cxx11_float16)
+ei_add_test(cxx11_tensor_dimension)
+ei_add_test(cxx11_tensor_map)
+ei_add_test(cxx11_tensor_assign)
+ei_add_test(cxx11_tensor_comparisons)
+ei_add_test(cxx11_tensor_forced_eval)
+ei_add_test(cxx11_tensor_math)
+ei_add_test(cxx11_tensor_const)
+ei_add_test(cxx11_tensor_intdiv)
+ei_add_test(cxx11_tensor_casts)
+ei_add_test(cxx11_tensor_empty)
+ei_add_test(cxx11_tensor_sugar)
+ei_add_test(cxx11_tensor_roundings)
+ei_add_test(cxx11_tensor_layout_swap)
+ei_add_test(cxx11_tensor_io)
+if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
+ # This test requires __uint128_t which is only available on 64bit systems
+ ei_add_test(cxx11_tensor_uint128)
+endif()
if(EIGEN_TEST_CXX11)
# It should be safe to always run these tests as there is some fallback code for
# older compiler that don't support cxx11.
set(CMAKE_CXX_STANDARD 11)
+ ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_meta)
ei_add_test(cxx11_tensor_simple)
# ei_add_test(cxx11_tensor_symmetry)
- ei_add_test(cxx11_tensor_assign)
- ei_add_test(cxx11_tensor_dimension)
ei_add_test(cxx11_tensor_index_list)
ei_add_test(cxx11_tensor_mixed_indices)
- ei_add_test(cxx11_tensor_comparisons)
ei_add_test(cxx11_tensor_contraction)
ei_add_test(cxx11_tensor_convolution)
ei_add_test(cxx11_tensor_expr)
- ei_add_test(cxx11_tensor_math)
- ei_add_test(cxx11_tensor_forced_eval)
ei_add_test(cxx11_tensor_fixed_size)
- ei_add_test(cxx11_tensor_const)
ei_add_test(cxx11_tensor_of_const_values)
ei_add_test(cxx11_tensor_of_complex)
ei_add_test(cxx11_tensor_of_strings)
- ei_add_test(cxx11_tensor_intdiv)
ei_add_test(cxx11_tensor_lvalue)
- ei_add_test(cxx11_tensor_map)
ei_add_test(cxx11_tensor_broadcasting)
ei_add_test(cxx11_tensor_chipping)
ei_add_test(cxx11_tensor_concatenation)
@@ -155,23 +169,11 @@ if(EIGEN_TEST_CXX11)
ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_tensor_ref)
ei_add_test(cxx11_tensor_random)
- ei_add_test(cxx11_tensor_casts)
- ei_add_test(cxx11_tensor_roundings)
- ei_add_test(cxx11_tensor_reverse)
- ei_add_test(cxx11_tensor_layout_swap)
- ei_add_test(cxx11_tensor_io)
ei_add_test(cxx11_tensor_generator)
ei_add_test(cxx11_tensor_custom_op)
ei_add_test(cxx11_tensor_custom_index)
- ei_add_test(cxx11_tensor_sugar)
ei_add_test(cxx11_tensor_fft)
ei_add_test(cxx11_tensor_ifft)
- ei_add_test(cxx11_tensor_empty)
-
- if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
- # This test requires __uint128_t which is only available on 64bit systems
- ei_add_test(cxx11_tensor_uint128)
- endif()
endif()
@@ -191,6 +193,10 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE)
endif()
+ if(EIGEN_TEST_CUDA_CLANG)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_${EIGEN_CUDA_COMPUTE_ARCH}")
+ endif()
+
set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\"")
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
@@ -207,10 +213,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
ei_add_test(cxx11_tensor_random_cuda)
endif()
- # Operations other that casting of half floats are only supported starting with arch 5.3
- if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 52)
- ei_add_test(cxx11_tensor_of_float16_cuda)
- endif()
+ ei_add_test(cxx11_tensor_of_float16_cuda)
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
endif()
diff --git a/unsupported/test/FFTW.cpp b/unsupported/test/FFTW.cpp
index d3718e2d2..1dd6dc97d 100644
--- a/unsupported/test/FFTW.cpp
+++ b/unsupported/test/FFTW.cpp
@@ -54,7 +54,7 @@ complex<long double> promote(long double x) { return complex<long double>( x);
long double difpower=0;
size_t n = (min)( buf1.size(),buf2.size() );
for (size_t k=0;k<n;++k) {
- totalpower += (numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2.;
+ totalpower += (numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2;
difpower += numext::abs2(buf1[k] - buf2[k]);
}
return sqrt(difpower/totalpower);
diff --git a/unsupported/test/NonLinearOptimization.cpp b/unsupported/test/NonLinearOptimization.cpp
index 724ea7b5b..6a5ed057f 100644
--- a/unsupported/test/NonLinearOptimization.cpp
+++ b/unsupported/test/NonLinearOptimization.cpp
@@ -14,6 +14,9 @@
using std::sqrt;
+// tolerance for chekcing number of iterations
+#define LM_EVAL_COUNT_TOL 4/3
+
int fcn_chkder(const VectorXd &x, VectorXd &fvec, MatrixXd &fjac, int iflag)
{
/* subroutine fcn for chkder example. */
@@ -1023,7 +1026,8 @@ void testNistLanczos1(void)
VERIFY_IS_EQUAL(lm.njev, 72);
// check norm^2
std::cout.precision(30);
- VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4290986055242372e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats
+ std::cout << lm.fvec.squaredNorm() << "\n";
+ VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
// check x
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
@@ -1044,7 +1048,7 @@ void testNistLanczos1(void)
VERIFY_IS_EQUAL(lm.nfev, 9);
VERIFY_IS_EQUAL(lm.njev, 8);
// check norm^2
- VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.430571737783119393e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats
+ VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
// check x
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
@@ -1354,8 +1358,12 @@ void testNistMGH17(void)
// check return value
VERIFY_IS_EQUAL(info, 2);
- VERIFY(lm.nfev < 650); // 602
- VERIFY(lm.njev < 600); // 545
+ ++g_test_level;
+ VERIFY_IS_EQUAL(lm.nfev, 602); // 602
+ VERIFY_IS_EQUAL(lm.njev, 545); // 545
+ --g_test_level;
+ VERIFY(lm.nfev < 602 * LM_EVAL_COUNT_TOL);
+ VERIFY(lm.njev < 545 * LM_EVAL_COUNT_TOL);
/*
* Second try
diff --git a/unsupported/test/autodiff.cpp b/unsupported/test/autodiff.cpp
index 374f86df9..c4606cd17 100644
--- a/unsupported/test/autodiff.cpp
+++ b/unsupported/test/autodiff.cpp
@@ -16,7 +16,8 @@ EIGEN_DONT_INLINE Scalar foo(const Scalar& x, const Scalar& y)
using namespace std;
// return x+std::sin(y);
EIGEN_ASM_COMMENT("mybegin");
- return static_cast<Scalar>(x*2 - 1 + pow(1+x,2) + 2*sqrt(y*y+0) - 4 * sin(0+x) + 2 * cos(y+0) - exp(-0.5*x*x+0));
+ // pow(float, int) promotes to pow(double, double)
+ return x*2 - 1 + static_cast<Scalar>(pow(1+x,2)) + 2*sqrt(y*y+0) - 4 * sin(0+x) + 2 * cos(y+0) - exp(Scalar(-0.5)*x*x+0);
//return x+2*y*x;//x*2 -std::pow(x,2);//(2*y/x);// - y*2;
EIGEN_ASM_COMMENT("myend");
}
diff --git a/unsupported/test/cxx11_eventcount.cpp b/unsupported/test/cxx11_eventcount.cpp
new file mode 100644
index 000000000..f16cc6f07
--- /dev/null
+++ b/unsupported/test/cxx11_eventcount.cpp
@@ -0,0 +1,140 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_USE_THREADS
+#include "main.h"
+#include <Eigen/CXX11/ThreadPool>
+
+// Visual studio doesn't implement a rand_r() function since its
+// implementation of rand() is already thread safe
+int rand_reentrant(unsigned int* s) {
+#ifdef EIGEN_COMP_MSVC_STRICT
+ EIGEN_UNUSED_VARIABLE(s);
+ return rand();
+#else
+ return rand_r(s);
+#endif
+}
+
+static void test_basic_eventcount()
+{
+ std::vector<EventCount::Waiter> waiters(1);
+ EventCount ec(waiters);
+ EventCount::Waiter& w = waiters[0];
+ ec.Notify(false);
+ ec.Prewait(&w);
+ ec.Notify(true);
+ ec.CommitWait(&w);
+ ec.Prewait(&w);
+ ec.CancelWait(&w);
+}
+
+// Fake bounded counter-based queue.
+struct TestQueue {
+ std::atomic<int> val_;
+ static const int kQueueSize = 10;
+
+ TestQueue() : val_() {}
+
+ ~TestQueue() { VERIFY_IS_EQUAL(val_.load(), 0); }
+
+ bool Push() {
+ int val = val_.load(std::memory_order_relaxed);
+ for (;;) {
+ VERIFY_GE(val, 0);
+ VERIFY_LE(val, kQueueSize);
+ if (val == kQueueSize) return false;
+ if (val_.compare_exchange_weak(val, val + 1, std::memory_order_relaxed))
+ return true;
+ }
+ }
+
+ bool Pop() {
+ int val = val_.load(std::memory_order_relaxed);
+ for (;;) {
+ VERIFY_GE(val, 0);
+ VERIFY_LE(val, kQueueSize);
+ if (val == 0) return false;
+ if (val_.compare_exchange_weak(val, val - 1, std::memory_order_relaxed))
+ return true;
+ }
+ }
+
+ bool Empty() { return val_.load(std::memory_order_relaxed) == 0; }
+};
+
+const int TestQueue::kQueueSize;
+
+// A number of producers send messages to a set of consumers using a set of
+// fake queues. Ensure that it does not crash, consumers don't deadlock and
+// number of blocked and unblocked threads match.
+static void test_stress_eventcount()
+{
+ const int kThreads = std::thread::hardware_concurrency();
+ static const int kEvents = 1 << 16;
+ static const int kQueues = 10;
+
+ std::vector<EventCount::Waiter> waiters(kThreads);
+ EventCount ec(waiters);
+ TestQueue queues[kQueues];
+
+ std::vector<std::unique_ptr<std::thread>> producers;
+ for (int i = 0; i < kThreads; i++) {
+ producers.emplace_back(new std::thread([&ec, &queues]() {
+ unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id()));
+ for (int j = 0; j < kEvents; j++) {
+ unsigned idx = rand_reentrant(&rnd) % kQueues;
+ if (queues[idx].Push()) {
+ ec.Notify(false);
+ continue;
+ }
+ EIGEN_THREAD_YIELD();
+ j--;
+ }
+ }));
+ }
+
+ std::vector<std::unique_ptr<std::thread>> consumers;
+ for (int i = 0; i < kThreads; i++) {
+ consumers.emplace_back(new std::thread([&ec, &queues, &waiters, i]() {
+ EventCount::Waiter& w = waiters[i];
+ unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id()));
+ for (int j = 0; j < kEvents; j++) {
+ unsigned idx = rand_reentrant(&rnd) % kQueues;
+ if (queues[idx].Pop()) continue;
+ j--;
+ ec.Prewait(&w);
+ bool empty = true;
+ for (int q = 0; q < kQueues; q++) {
+ if (!queues[q].Empty()) {
+ empty = false;
+ break;
+ }
+ }
+ if (!empty) {
+ ec.CancelWait(&w);
+ continue;
+ }
+ ec.CommitWait(&w);
+ }
+ }));
+ }
+
+ for (int i = 0; i < kThreads; i++) {
+ producers[i]->join();
+ consumers[i]->join();
+ }
+}
+
+void test_cxx11_eventcount()
+{
+ CALL_SUBTEST(test_basic_eventcount());
+ CALL_SUBTEST(test_stress_eventcount());
+}
diff --git a/unsupported/test/float16.cpp b/unsupported/test/cxx11_float16.cpp
index 13f3ddaca..9141c4820 100644
--- a/unsupported/test/float16.cpp
+++ b/unsupported/test/cxx11_float16.cpp
@@ -7,7 +7,7 @@
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC float16
+#define EIGEN_TEST_FUNC cxx11_float16
#include "main.h"
#include <Eigen/src/Core/arch/CUDA/Half.h>
@@ -31,11 +31,11 @@ void test_conversion()
VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002);
// Verify round-to-nearest-even behavior.
- float val1 = float(half(__half{0x3c00}));
- float val2 = float(half(__half{0x3c01}));
- float val3 = float(half(__half{0x3c02}));
- VERIFY_IS_EQUAL(half(0.5 * (val1 + val2)).x, 0x3c00);
- VERIFY_IS_EQUAL(half(0.5 * (val2 + val3)).x, 0x3c02);
+ float val1 = float(half(__half(0x3c00)));
+ float val2 = float(half(__half(0x3c01)));
+ float val3 = float(half(__half(0x3c02)));
+ VERIFY_IS_EQUAL(half(0.5f * (val1 + val2)).x, 0x3c00);
+ VERIFY_IS_EQUAL(half(0.5f * (val2 + val3)).x, 0x3c02);
// Conversion from int.
VERIFY_IS_EQUAL(half(-1).x, 0xbc00);
@@ -49,35 +49,43 @@ void test_conversion()
VERIFY_IS_EQUAL(half(true).x, 0x3c00);
// Conversion to float.
- VERIFY_IS_EQUAL(float(half(__half{0x0000})), 0.0f);
- VERIFY_IS_EQUAL(float(half(__half{0x3c00})), 1.0f);
+ VERIFY_IS_EQUAL(float(half(__half(0x0000))), 0.0f);
+ VERIFY_IS_EQUAL(float(half(__half(0x3c00))), 1.0f);
// Denormals.
- VERIFY_IS_APPROX(float(half(__half{0x8001})), -5.96046e-08f);
- VERIFY_IS_APPROX(float(half(__half{0x0001})), 5.96046e-08f);
- VERIFY_IS_APPROX(float(half(__half{0x0002})), 1.19209e-07f);
+ VERIFY_IS_APPROX(float(half(__half(0x8001))), -5.96046e-08f);
+ VERIFY_IS_APPROX(float(half(__half(0x0001))), 5.96046e-08f);
+ VERIFY_IS_APPROX(float(half(__half(0x0002))), 1.19209e-07f);
// NaNs and infinities.
VERIFY(!(numext::isinf)(float(half(65504.0f)))); // Largest finite number.
VERIFY(!(numext::isnan)(float(half(0.0f))));
- VERIFY((numext::isinf)(float(half(__half{0xfc00}))));
- VERIFY((numext::isnan)(float(half(__half{0xfc01}))));
- VERIFY((numext::isinf)(float(half(__half{0x7c00}))));
- VERIFY((numext::isnan)(float(half(__half{0x7c01}))));
+ VERIFY((numext::isinf)(float(half(__half(0xfc00)))));
+ VERIFY((numext::isnan)(float(half(__half(0xfc01)))));
+ VERIFY((numext::isinf)(float(half(__half(0x7c00)))));
+ VERIFY((numext::isnan)(float(half(__half(0x7c01)))));
+
+#if !EIGEN_COMP_MSVC
+ // Visual Studio errors out on divisions by 0
VERIFY((numext::isnan)(float(half(0.0 / 0.0))));
VERIFY((numext::isinf)(float(half(1.0 / 0.0))));
VERIFY((numext::isinf)(float(half(-1.0 / 0.0))));
+#endif
// Exactly same checks as above, just directly on the half representation.
- VERIFY(!(numext::isinf)(half(__half{0x7bff})));
- VERIFY(!(numext::isnan)(half(__half{0x0000})));
- VERIFY((numext::isinf)(half(__half{0xfc00})));
- VERIFY((numext::isnan)(half(__half{0xfc01})));
- VERIFY((numext::isinf)(half(__half{0x7c00})));
- VERIFY((numext::isnan)(half(__half{0x7c01})));
+ VERIFY(!(numext::isinf)(half(__half(0x7bff))));
+ VERIFY(!(numext::isnan)(half(__half(0x0000))));
+ VERIFY((numext::isinf)(half(__half(0xfc00))));
+ VERIFY((numext::isnan)(half(__half(0xfc01))));
+ VERIFY((numext::isinf)(half(__half(0x7c00))));
+ VERIFY((numext::isnan)(half(__half(0x7c01))));
+
+#if !EIGEN_COMP_MSVC
+ // Visual Studio errors out on divisions by 0
VERIFY((numext::isnan)(half(0.0 / 0.0)));
VERIFY((numext::isinf)(half(1.0 / 0.0)));
VERIFY((numext::isinf)(half(-1.0 / 0.0)));
+#endif
}
void test_arithmetic()
@@ -114,6 +122,8 @@ void test_comparison()
VERIFY(half(1.0f) != half(2.0f));
// Comparisons with NaNs and infinities.
+#if !EIGEN_COMP_MSVC
+ // Visual Studio errors out on divisions by 0
VERIFY(!(half(0.0 / 0.0) == half(0.0 / 0.0)));
VERIFY(half(0.0 / 0.0) != half(0.0 / 0.0));
@@ -124,13 +134,26 @@ void test_comparison()
VERIFY(half(1.0) < half(1.0 / 0.0));
VERIFY(half(1.0) > half(-1.0 / 0.0));
+#endif
}
-void test_functions()
+void test_basic_functions()
{
VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f);
VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f);
+ VERIFY_IS_EQUAL(float(numext::floor(half(3.5f))), 3.0f);
+ VERIFY_IS_EQUAL(float(numext::floor(half(-3.5f))), -4.0f);
+
+ VERIFY_IS_EQUAL(float(numext::ceil(half(3.5f))), 4.0f);
+ VERIFY_IS_EQUAL(float(numext::ceil(half(-3.5f))), -3.0f);
+
+ VERIFY_IS_APPROX(float(numext::sqrt(half(0.0f))), 0.0f);
+ VERIFY_IS_APPROX(float(numext::sqrt(half(4.0f))), 2.0f);
+
+ VERIFY_IS_APPROX(float(numext::pow(half(0.0f), half(1.0f))), 0.0f);
+ VERIFY_IS_APPROX(float(numext::pow(half(2.0f), half(2.0f))), 4.0f);
+
VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f);
VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), float(20.0 + EIGEN_PI));
@@ -138,10 +161,32 @@ void test_functions()
VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f);
}
-void test_float16()
+void test_trigonometric_functions()
+{
+ VERIFY_IS_APPROX(numext::cos(half(0.0f)), half(cosf(0.0f)));
+ VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI)), half(cosf(EIGEN_PI)));
+ //VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI/2)), half(cosf(EIGEN_PI/2)));
+ //VERIFY_IS_APPROX(numext::cos(half(3*EIGEN_PI/2)), half(cosf(3*EIGEN_PI/2)));
+ VERIFY_IS_APPROX(numext::cos(half(3.5f)), half(cosf(3.5f)));
+
+ VERIFY_IS_APPROX(numext::sin(half(0.0f)), half(sinf(0.0f)));
+ // VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI)), half(sinf(EIGEN_PI)));
+ VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI/2)), half(sinf(EIGEN_PI/2)));
+ VERIFY_IS_APPROX(numext::sin(half(3*EIGEN_PI/2)), half(sinf(3*EIGEN_PI/2)));
+ VERIFY_IS_APPROX(numext::sin(half(3.5f)), half(sinf(3.5f)));
+
+ VERIFY_IS_APPROX(numext::tan(half(0.0f)), half(tanf(0.0f)));
+ // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI)), half(tanf(EIGEN_PI)));
+ // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI/2)), half(tanf(EIGEN_PI/2)));
+ //VERIFY_IS_APPROX(numext::tan(half(3*EIGEN_PI/2)), half(tanf(3*EIGEN_PI/2)));
+ VERIFY_IS_APPROX(numext::tan(half(3.5f)), half(tanf(3.5f)));
+}
+
+void test_cxx11_float16()
{
CALL_SUBTEST(test_conversion());
CALL_SUBTEST(test_arithmetic());
CALL_SUBTEST(test_comparison());
- CALL_SUBTEST(test_functions());
+ CALL_SUBTEST(test_basic_functions());
+ CALL_SUBTEST(test_trigonometric_functions());
}
diff --git a/unsupported/test/cxx11_meta.cpp b/unsupported/test/cxx11_meta.cpp
index ecac3add1..8911c59d8 100644
--- a/unsupported/test/cxx11_meta.cpp
+++ b/unsupported/test/cxx11_meta.cpp
@@ -10,7 +10,7 @@
#include "main.h"
#include <array>
-#include <Eigen/CXX11/Core>
+#include <Eigen/CXX11/src/util/CXX11Meta.h>
using Eigen::internal::is_same;
using Eigen::internal::type_list;
diff --git a/unsupported/test/cxx11_runqueue.cpp b/unsupported/test/cxx11_runqueue.cpp
new file mode 100644
index 000000000..d20d87111
--- /dev/null
+++ b/unsupported/test/cxx11_runqueue.cpp
@@ -0,0 +1,227 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_USE_THREADS
+#include <cstdlib>
+#include "main.h"
+#include <Eigen/CXX11/ThreadPool>
+
+
+// Visual studio doesn't implement a rand_r() function since its
+// implementation of rand() is already thread safe
+int rand_reentrant(unsigned int* s) {
+#ifdef EIGEN_COMP_MSVC_STRICT
+ EIGEN_UNUSED_VARIABLE(s);
+ return rand();
+#else
+ return rand_r(s);
+#endif
+}
+
+void test_basic_runqueue()
+{
+ RunQueue<int, 4> q;
+ // Check empty state.
+ VERIFY(q.Empty());
+ VERIFY_IS_EQUAL(0u, q.Size());
+ VERIFY_IS_EQUAL(0, q.PopFront());
+ std::vector<int> stolen;
+ VERIFY_IS_EQUAL(0u, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(0u, stolen.size());
+ // Push one front, pop one front.
+ VERIFY_IS_EQUAL(0, q.PushFront(1));
+ VERIFY_IS_EQUAL(1u, q.Size());
+ VERIFY_IS_EQUAL(1, q.PopFront());
+ VERIFY_IS_EQUAL(0u, q.Size());
+ // Push front to overflow.
+ VERIFY_IS_EQUAL(0, q.PushFront(2));
+ VERIFY_IS_EQUAL(1u, q.Size());
+ VERIFY_IS_EQUAL(0, q.PushFront(3));
+ VERIFY_IS_EQUAL(2u, q.Size());
+ VERIFY_IS_EQUAL(0, q.PushFront(4));
+ VERIFY_IS_EQUAL(3u, q.Size());
+ VERIFY_IS_EQUAL(0, q.PushFront(5));
+ VERIFY_IS_EQUAL(4u, q.Size());
+ VERIFY_IS_EQUAL(6, q.PushFront(6));
+ VERIFY_IS_EQUAL(4u, q.Size());
+ VERIFY_IS_EQUAL(5, q.PopFront());
+ VERIFY_IS_EQUAL(3u, q.Size());
+ VERIFY_IS_EQUAL(4, q.PopFront());
+ VERIFY_IS_EQUAL(2u, q.Size());
+ VERIFY_IS_EQUAL(3, q.PopFront());
+ VERIFY_IS_EQUAL(1u, q.Size());
+ VERIFY_IS_EQUAL(2, q.PopFront());
+ VERIFY_IS_EQUAL(0u, q.Size());
+ VERIFY_IS_EQUAL(0, q.PopFront());
+ // Push one back, pop one back.
+ VERIFY_IS_EQUAL(0, q.PushBack(7));
+ VERIFY_IS_EQUAL(1u, q.Size());
+ VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(1u, stolen.size());
+ VERIFY_IS_EQUAL(7, stolen[0]);
+ VERIFY_IS_EQUAL(0u, q.Size());
+ stolen.clear();
+ // Push back to overflow.
+ VERIFY_IS_EQUAL(0, q.PushBack(8));
+ VERIFY_IS_EQUAL(1u, q.Size());
+ VERIFY_IS_EQUAL(0, q.PushBack(9));
+ VERIFY_IS_EQUAL(2u, q.Size());
+ VERIFY_IS_EQUAL(0, q.PushBack(10));
+ VERIFY_IS_EQUAL(3u, q.Size());
+ VERIFY_IS_EQUAL(0, q.PushBack(11));
+ VERIFY_IS_EQUAL(4u, q.Size());
+ VERIFY_IS_EQUAL(12, q.PushBack(12));
+ VERIFY_IS_EQUAL(4u, q.Size());
+ // Pop back in halves.
+ VERIFY_IS_EQUAL(2u, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(2u, stolen.size());
+ VERIFY_IS_EQUAL(10, stolen[0]);
+ VERIFY_IS_EQUAL(11, stolen[1]);
+ VERIFY_IS_EQUAL(2u, q.Size());
+ stolen.clear();
+ VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(1u, stolen.size());
+ VERIFY_IS_EQUAL(9, stolen[0]);
+ VERIFY_IS_EQUAL(1u, q.Size());
+ stolen.clear();
+ VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(1u, stolen.size());
+ VERIFY_IS_EQUAL(8, stolen[0]);
+ stolen.clear();
+ VERIFY_IS_EQUAL(0u, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(0u, stolen.size());
+ // Empty again.
+ VERIFY(q.Empty());
+ VERIFY_IS_EQUAL(0u, q.Size());
+}
+
+// Empty tests that the queue is not claimed to be empty when is is in fact not.
+// Emptiness property is crucial part of thread pool blocking scheme,
+// so we go to great effort to ensure this property. We create a queue with
+// 1 element and then push 1 element (either front or back at random) and pop
+// 1 element (either front or back at random). So queue always contains at least
+// 1 element, but otherwise changes chaotically. Another thread constantly tests
+// that the queue is not claimed to be empty.
+void test_empty_runqueue()
+{
+ RunQueue<int, 4> q;
+ q.PushFront(1);
+ std::atomic<bool> done(false);
+ std::thread mutator([&q, &done]() {
+ unsigned rnd = 0;
+ std::vector<int> stolen;
+ for (int i = 0; i < 1 << 18; i++) {
+ if (rand_reentrant(&rnd) % 2)
+ VERIFY_IS_EQUAL(0, q.PushFront(1));
+ else
+ VERIFY_IS_EQUAL(0, q.PushBack(1));
+ if (rand_reentrant(&rnd) % 2)
+ VERIFY_IS_EQUAL(1, q.PopFront());
+ else {
+ for (;;) {
+ if (q.PopBackHalf(&stolen) == 1) {
+ stolen.clear();
+ break;
+ }
+ VERIFY_IS_EQUAL(0u, stolen.size());
+ }
+ }
+ }
+ done = true;
+ });
+ while (!done) {
+ VERIFY(!q.Empty());
+ int size = q.Size();
+ VERIFY_GE(size, 1);
+ VERIFY_LE(size, 2);
+ }
+ VERIFY_IS_EQUAL(1, q.PopFront());
+ mutator.join();
+}
+
+// Stress is a chaotic random test.
+// One thread (owner) calls PushFront/PopFront, other threads call PushBack/
+// PopBack. Ensure that we don't crash, deadlock, and all sanity checks pass.
+void test_stress_runqueue()
+{
+ static const int kEvents = 1 << 18;
+ RunQueue<int, 8> q;
+ std::atomic<int> total(0);
+ std::vector<std::unique_ptr<std::thread>> threads;
+ threads.emplace_back(new std::thread([&q, &total]() {
+ int sum = 0;
+ int pushed = 1;
+ int popped = 1;
+ while (pushed < kEvents || popped < kEvents) {
+ if (pushed < kEvents) {
+ if (q.PushFront(pushed) == 0) {
+ sum += pushed;
+ pushed++;
+ }
+ }
+ if (popped < kEvents) {
+ int v = q.PopFront();
+ if (v != 0) {
+ sum -= v;
+ popped++;
+ }
+ }
+ }
+ total += sum;
+ }));
+ for (int i = 0; i < 2; i++) {
+ threads.emplace_back(new std::thread([&q, &total]() {
+ int sum = 0;
+ for (int j = 1; j < kEvents; j++) {
+ if (q.PushBack(j) == 0) {
+ sum += j;
+ continue;
+ }
+ EIGEN_THREAD_YIELD();
+ j--;
+ }
+ total += sum;
+ }));
+ threads.emplace_back(new std::thread([&q, &total]() {
+ int sum = 0;
+ std::vector<int> stolen;
+ for (int j = 1; j < kEvents;) {
+ if (q.PopBackHalf(&stolen) == 0) {
+ EIGEN_THREAD_YIELD();
+ continue;
+ }
+ while (stolen.size() && j < kEvents) {
+ int v = stolen.back();
+ stolen.pop_back();
+ VERIFY_IS_NOT_EQUAL(v, 0);
+ sum += v;
+ j++;
+ }
+ }
+ while (stolen.size()) {
+ int v = stolen.back();
+ stolen.pop_back();
+ VERIFY_IS_NOT_EQUAL(v, 0);
+ while ((v = q.PushBack(v)) != 0) EIGEN_THREAD_YIELD();
+ }
+ total -= sum;
+ }));
+ }
+ for (size_t i = 0; i < threads.size(); i++) threads[i]->join();
+ VERIFY(q.Empty());
+ VERIFY(total.load() == 0);
+}
+
+void test_cxx11_runqueue()
+{
+ CALL_SUBTEST_1(test_basic_runqueue());
+ CALL_SUBTEST_2(test_empty_runqueue());
+ CALL_SUBTEST_3(test_stress_runqueue());
+}
diff --git a/unsupported/test/cxx11_tensor_argmax.cpp b/unsupported/test/cxx11_tensor_argmax.cpp
index 482dfa7de..037767270 100644
--- a/unsupported/test/cxx11_tensor_argmax.cpp
+++ b/unsupported/test/cxx11_tensor_argmax.cpp
@@ -64,7 +64,7 @@ static void test_argmax_tuple_reducer()
Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
DimensionList<DenseIndex, 4> dims;
reduced = index_tuples.reduce(
- dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float>>());
+ dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
Tensor<float, 0, DataLayout> maxi = tensor.maximum();
@@ -74,7 +74,7 @@ static void test_argmax_tuple_reducer()
for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
reduced_by_dims = index_tuples.reduce(
- reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float>>());
+ reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
Tensor<float, 1, DataLayout> max_by_dims = tensor.maximum(reduce_dims);
@@ -96,7 +96,7 @@ static void test_argmin_tuple_reducer()
Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
DimensionList<DenseIndex, 4> dims;
reduced = index_tuples.reduce(
- dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float>>());
+ dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
Tensor<float, 0, DataLayout> mini = tensor.minimum();
@@ -106,7 +106,7 @@ static void test_argmin_tuple_reducer()
for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
reduced_by_dims = index_tuples.reduce(
- reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float>>());
+ reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
Tensor<float, 1, DataLayout> min_by_dims = tensor.minimum(reduce_dims);
diff --git a/unsupported/test/cxx11_tensor_contract_cuda.cu b/unsupported/test/cxx11_tensor_contract_cuda.cu
index 6d1ef07f9..98ac180ef 100644
--- a/unsupported/test/cxx11_tensor_contract_cuda.cu
+++ b/unsupported/test/cxx11_tensor_contract_cuda.cu
@@ -84,6 +84,65 @@ void test_cuda_contraction(int m_size, int k_size, int n_size)
cudaFree((void*)d_t_result);
}
+
+template<int DataLayout>
+void test_scalar(int m_size, int k_size, int n_size)
+{
+ std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
+ // with these dimensions, the output has 300 * 140 elements, which is
+ // more than 30 * 1024, which is the number of threads in blocks on
+ // a 15 SM GK110 GPU
+ Tensor<float, 2, DataLayout> t_left(m_size, k_size);
+ Tensor<float, 2, DataLayout> t_right(k_size, n_size);
+ Tensor<float, 0, DataLayout> t_result;
+ Tensor<float, 0, DataLayout> t_result_gpu;
+ Eigen::array<DimPair, 2> dims(DimPair(0, 0), DimPair(1, 1));
+
+ t_left.setRandom();
+ t_right.setRandom();
+
+ std::size_t t_left_bytes = t_left.size() * sizeof(float);
+ std::size_t t_right_bytes = t_right.size() * sizeof(float);
+ std::size_t t_result_bytes = sizeof(float);
+
+ float* d_t_left;
+ float* d_t_right;
+ float* d_t_result;
+
+ cudaMalloc((void**)(&d_t_left), t_left_bytes);
+ cudaMalloc((void**)(&d_t_right), t_right_bytes);
+ cudaMalloc((void**)(&d_t_result), t_result_bytes);
+
+ cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
+ gpu_t_left(d_t_left, m_size, k_size);
+ Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
+ gpu_t_right(d_t_right, k_size, n_size);
+ Eigen::TensorMap<Eigen::Tensor<float, 0, DataLayout> >
+ gpu_t_result(d_t_result);
+
+ gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims);
+ t_result = t_left.contract(t_right, dims);
+
+ cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost);
+ if (fabs(t_result() - t_result_gpu()) > 1e-4f &&
+ !Eigen::internal::isApprox(t_result(), t_result_gpu(), 1e-4f)) {
+ std::cout << "mismatch detected: " << t_result()
+ << " vs " << t_result_gpu() << std::endl;
+ assert(false);
+ }
+
+ cudaFree((void*)d_t_left);
+ cudaFree((void*)d_t_right);
+ cudaFree((void*)d_t_result);
+}
+
+
template<int DataLayout>
void test_cuda_contraction_m() {
for (int k = 32; k < 256; k++) {
@@ -138,6 +197,9 @@ void test_cxx11_tensor_cuda()
CALL_SUBTEST_1(test_cuda_contraction<ColMajor>(128, 128, 128));
CALL_SUBTEST_1(test_cuda_contraction<RowMajor>(128, 128, 128));
+ CALL_SUBTEST_1(test_scalar<ColMajor>(128, 128, 128));
+ CALL_SUBTEST_1(test_scalar<RowMajor>(128, 128, 128));
+
CALL_SUBTEST_2(test_cuda_contraction_m<ColMajor>());
CALL_SUBTEST_3(test_cuda_contraction_m<RowMajor>());
diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp
index 0e16308a2..73623b2ed 100644
--- a/unsupported/test/cxx11_tensor_contraction.cpp
+++ b/unsupported/test/cxx11_tensor_contraction.cpp
@@ -87,19 +87,14 @@ static void test_scalar()
vec1.setRandom();
vec2.setRandom();
- Tensor<float, 1, DataLayout> scalar(1);
- scalar.setZero();
Eigen::array<DimPair, 1> dims = {{DimPair(0, 0)}};
- typedef TensorEvaluator<decltype(vec1.contract(vec2, dims)), DefaultDevice> Evaluator;
- Evaluator eval(vec1.contract(vec2, dims), DefaultDevice());
- eval.evalTo(scalar.data());
- EIGEN_STATIC_ASSERT(Evaluator::NumDims==1ul, YOU_MADE_A_PROGRAMMING_MISTAKE);
+ Tensor<float, 0, DataLayout> scalar = vec1.contract(vec2, dims);
float expected = 0.0f;
for (int i = 0; i < 6; ++i) {
expected += vec1(i) * vec2(i);
}
- VERIFY_IS_APPROX(scalar(0), expected);
+ VERIFY_IS_APPROX(scalar(), expected);
}
template<int DataLayout>
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu
index 134359611..4026f48f0 100644
--- a/unsupported/test/cxx11_tensor_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cuda.cu
@@ -661,6 +661,9 @@ void test_cuda_digamma()
for (int i = 5; i < 7; ++i) {
VERIFY_IS_EQUAL(out(i), expected_out(i));
}
+
+ cudaFree(d_in);
+ cudaFree(d_out);
}
template <typename Scalar>
@@ -718,13 +721,17 @@ void test_cuda_zeta()
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
VERIFY_IS_EQUAL(out(0), expected_out(0));
- VERIFY_IS_APPROX_OR_LESS_THAN(out(3), expected_out(3));
+ VERIFY((std::isnan)(out(3)));
for (int i = 1; i < 6; ++i) {
if (i != 3) {
VERIFY_IS_APPROX(out(i), expected_out(i));
}
}
+
+ cudaFree(d_in_x);
+ cudaFree(d_in_q);
+ cudaFree(d_out);
}
template <typename Scalar>
@@ -787,6 +794,10 @@ void test_cuda_polygamma()
for (int i = 0; i < 7; ++i) {
VERIFY_IS_APPROX(out(i), expected_out(i));
}
+
+ cudaFree(d_in_x);
+ cudaFree(d_in_n);
+ cudaFree(d_out);
}
template <typename Scalar>
@@ -826,9 +837,9 @@ void test_cuda_igamma()
Scalar* d_a;
Scalar* d_x;
Scalar* d_out;
- cudaMalloc((void**)(&d_a), bytes);
- cudaMalloc((void**)(&d_x), bytes);
- cudaMalloc((void**)(&d_out), bytes);
+ assert(cudaMalloc((void**)(&d_a), bytes) == cudaSuccess);
+ assert(cudaMalloc((void**)(&d_x), bytes) == cudaSuccess);
+ assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess);
cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice);
@@ -854,6 +865,10 @@ void test_cuda_igamma()
}
}
}
+
+ cudaFree(d_a);
+ cudaFree(d_x);
+ cudaFree(d_out);
}
template <typename Scalar>
@@ -920,6 +935,10 @@ void test_cuda_igammac()
}
}
}
+
+ cudaFree(d_a);
+ cudaFree(d_x);
+ cudaFree(d_out);
}
template <typename Scalar>
@@ -935,8 +954,8 @@ void test_cuda_erf(const Scalar stddev)
Scalar* d_in;
Scalar* d_out;
- cudaMalloc((void**)(&d_in), bytes);
- cudaMalloc((void**)(&d_out), bytes);
+ assert(cudaMalloc((void**)(&d_in), bytes) == cudaSuccess);
+ assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess);
cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
diff --git a/unsupported/test/cxx11_tensor_device.cu b/unsupported/test/cxx11_tensor_device.cu
index cbe9e6449..b6ca54d93 100644
--- a/unsupported/test/cxx11_tensor_device.cu
+++ b/unsupported/test/cxx11_tensor_device.cu
@@ -241,7 +241,7 @@ void test_cpu() {
const float result = out(i,j,k);
const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f) +
(in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f);
- if (fabs(expected) < 1e-4 && fabs(result) < 1e-4) {
+ if (fabs(expected) < 1e-4f && fabs(result) < 1e-4f) {
continue;
}
VERIFY_IS_APPROX(expected, result);
@@ -258,7 +258,7 @@ void test_cpu() {
in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f) +
(in1(i+1,j,k) * -1.0f + in1(i+1,j+1,k) * -0.3f +
in1(i+1,j,k+1) * -0.7f + in1(i+1,j+1,k+1) * -0.5f);
- if (fabs(expected) < 1e-4 && fabs(result) < 1e-4) {
+ if (fabs(expected) < 1e-4f && fabs(result) < 1e-4f) {
continue;
}
VERIFY_IS_APPROX(expected, result);
diff --git a/unsupported/test/cxx11_tensor_dimension.cpp b/unsupported/test/cxx11_tensor_dimension.cpp
index ce78efe52..421e73693 100644
--- a/unsupported/test/cxx11_tensor_dimension.cpp
+++ b/unsupported/test/cxx11_tensor_dimension.cpp
@@ -37,7 +37,6 @@ static void test_fixed_size()
VERIFY_IS_EQUAL(dimensions.TotalSize(), 2*3*7);
}
-
static void test_match()
{
Eigen::DSizes<int, 3> dyn(2,3,7);
@@ -49,10 +48,22 @@ static void test_match()
VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn1, dyn2), false);
}
+static void test_rank_zero()
+{
+ Eigen::Sizes<> scalar;
+ VERIFY_IS_EQUAL(scalar.TotalSize(), 1);
+ VERIFY_IS_EQUAL(scalar.rank(), 0);
+ VERIFY_IS_EQUAL(internal::array_prod(scalar), 1);
+
+ Eigen::DSizes<ptrdiff_t, 0> dscalar;
+ VERIFY_IS_EQUAL(dscalar.TotalSize(), 1);
+ VERIFY_IS_EQUAL(dscalar.rank(), 0);
+}
void test_cxx11_tensor_dimension()
{
CALL_SUBTEST(test_dynamic_size());
CALL_SUBTEST(test_fixed_size());
CALL_SUBTEST(test_match());
+ CALL_SUBTEST(test_rank_zero());
}
diff --git a/unsupported/test/cxx11_tensor_empty.cpp b/unsupported/test/cxx11_tensor_empty.cpp
index 9130fff35..d7eea42d7 100644
--- a/unsupported/test/cxx11_tensor_empty.cpp
+++ b/unsupported/test/cxx11_tensor_empty.cpp
@@ -24,10 +24,10 @@ static void test_empty_tensor()
static void test_empty_fixed_size_tensor()
{
- TensorFixedSize<float, Sizes<0>> source;
- TensorFixedSize<float, Sizes<0>> tgt1 = source;
- TensorFixedSize<float, Sizes<0>> tgt2(source);
- TensorFixedSize<float, Sizes<0>> tgt3;
+ TensorFixedSize<float, Sizes<0> > source;
+ TensorFixedSize<float, Sizes<0> > tgt1 = source;
+ TensorFixedSize<float, Sizes<0> > tgt2(source);
+ TensorFixedSize<float, Sizes<0> > tgt3;
tgt3 = tgt1;
tgt3 = tgt2;
}
diff --git a/unsupported/test/cxx11_tensor_expr.cpp b/unsupported/test/cxx11_tensor_expr.cpp
index 8389e9840..4dd355e6e 100644
--- a/unsupported/test/cxx11_tensor_expr.cpp
+++ b/unsupported/test/cxx11_tensor_expr.cpp
@@ -112,13 +112,13 @@ static void test_3d()
Tensor<float, 3> mat1(2,3,7);
Tensor<float, 3, RowMajor> mat2(2,3,7);
- float val = 1.0;
+ float val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
mat2(i,j,k) = val;
- val += 1.0;
+ val += 1.0f;
}
}
}
@@ -142,7 +142,7 @@ static void test_3d()
Tensor<float, 3, RowMajor> mat11(2,3,7);
mat11 = mat2 / 3.14f;
- val = 1.0;
+ val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
@@ -155,7 +155,7 @@ static void test_3d()
VERIFY_IS_APPROX(mat9(i,j,k), val + 3.14f);
VERIFY_IS_APPROX(mat10(i,j,k), val - 3.14f);
VERIFY_IS_APPROX(mat11(i,j,k), val / 3.14f);
- val += 1.0;
+ val += 1.0f;
}
}
}
@@ -167,25 +167,25 @@ static void test_constants()
Tensor<float, 3> mat2(2,3,7);
Tensor<float, 3> mat3(2,3,7);
- float val = 1.0;
+ float val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
- val += 1.0;
+ val += 1.0f;
}
}
}
mat2 = mat1.constant(3.14f);
mat3 = mat1.cwiseMax(7.3f).exp();
- val = 1.0;
+ val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat2(i,j,k), 3.14f);
VERIFY_IS_APPROX(mat3(i,j,k), expf((std::max)(val, 7.3f)));
- val += 1.0;
+ val += 1.0f;
}
}
}
@@ -228,25 +228,25 @@ static void test_functors()
Tensor<float, 3> mat2(2,3,7);
Tensor<float, 3> mat3(2,3,7);
- float val = 1.0;
+ float val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
- val += 1.0;
+ val += 1.0f;
}
}
}
mat2 = mat1.inverse().unaryExpr(&asinf);
mat3 = mat1.unaryExpr(&tanhf);
- val = 1.0;
+ val = 1.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat2(i,j,k), asinf(1.0f / mat1(i,j,k)));
VERIFY_IS_APPROX(mat3(i,j,k), tanhf(mat1(i,j,k)));
- val += 1.0;
+ val += 1.0f;
}
}
}
diff --git a/unsupported/test/cxx11_tensor_fft.cpp b/unsupported/test/cxx11_tensor_fft.cpp
index 89874349f..2f14ebc62 100644
--- a/unsupported/test/cxx11_tensor_fft.cpp
+++ b/unsupported/test/cxx11_tensor_fft.cpp
@@ -205,15 +205,15 @@ static void test_fft_real_input_energy() {
VERIFY_IS_EQUAL(output.dimension(i), input.dimension(i));
}
- float energy_original = 0.0;
- float energy_after_fft = 0.0;
+ RealScalar energy_original = 0.0;
+ RealScalar energy_after_fft = 0.0;
for (int i = 0; i < total_size; ++i) {
- energy_original += pow(std::abs(input(i)), 2);
+ energy_original += numext::abs2(input(i));
}
for (int i = 0; i < total_size; ++i) {
- energy_after_fft += pow(std::abs(output(i)), 2);
+ energy_after_fft += numext::abs2(output(i));
}
if(FFTDirection == FFT_FORWARD) {
diff --git a/unsupported/test/cxx11_tensor_fixed_size.cpp b/unsupported/test/cxx11_tensor_fixed_size.cpp
index 1c33fefb3..4c660de65 100644
--- a/unsupported/test/cxx11_tensor_fixed_size.cpp
+++ b/unsupported/test/cxx11_tensor_fixed_size.cpp
@@ -20,6 +20,8 @@ static void test_0d()
TensorFixedSize<float, Sizes<> > scalar1;
TensorFixedSize<float, Sizes<>, RowMajor> scalar2;
VERIFY_IS_EQUAL(scalar1.rank(), 0);
+ VERIFY_IS_EQUAL(scalar1.size(), 1);
+ VERIFY_IS_EQUAL(array_prod(scalar1.dimensions()), 1);
scalar1() = 7.0;
scalar2() = 13.0;
@@ -128,9 +130,9 @@ static void test_tensor_map()
static void test_2d()
{
float data1[6];
- TensorMap<TensorFixedSize<float, Sizes<2, 3> >> mat1(data1,2,3);
+ TensorMap<TensorFixedSize<float, Sizes<2, 3> > > mat1(data1,2,3);
float data2[6];
- TensorMap<TensorFixedSize<float, Sizes<2, 3>, RowMajor>> mat2(data2,2,3);
+ TensorMap<TensorFixedSize<float, Sizes<2, 3>, RowMajor> > mat2(data2,2,3);
VERIFY_IS_EQUAL((mat1.size()), 2*3);
VERIFY_IS_EQUAL(mat1.rank(), 2);
@@ -151,7 +153,7 @@ static void test_2d()
mat2(1,1) = -4.0;
mat2(1,2) = -5.0;
- TensorFixedSize<float, Sizes<2, 3>> mat3;
+ TensorFixedSize<float, Sizes<2, 3> > mat3;
TensorFixedSize<float, Sizes<2, 3>, RowMajor> mat4;
mat3 = mat1.abs();
mat4 = mat2.abs();
@@ -186,13 +188,13 @@ static void test_3d()
// VERIFY_IS_EQUAL((mat1.dimension(1)), 3);
// VERIFY_IS_EQUAL((mat1.dimension(2)), 7);
- float val = 0.0;
+ float val = 0.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
mat2(i,j,k) = val;
- val += 1.0;
+ val += 1.0f;
}
}
}
@@ -208,13 +210,13 @@ static void test_3d()
// VERIFY_IS_EQUAL((mat3.dimension(2)), 7);
- val = 0.0;
+ val = 0.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat3(i,j,k), sqrtf(val));
VERIFY_IS_APPROX(mat4(i,j,k), sqrtf(val));
- val += 1.0;
+ val += 1.0f;
}
}
}
@@ -224,12 +226,12 @@ static void test_3d()
static void test_array()
{
TensorFixedSize<float, Sizes<2, 3, 7> > mat1;
- float val = 0.0;
+ float val = 0.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
mat1(i,j,k) = val;
- val += 1.0;
+ val += 1.0f;
}
}
}
@@ -237,12 +239,12 @@ static void test_array()
TensorFixedSize<float, Sizes<2, 3, 7> > mat3;
mat3 = mat1.pow(3.5f);
- val = 0.0;
+ val = 0.0f;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat3(i,j,k), powf(val, 3.5f));
- val += 1.0;
+ val += 1.0f;
}
}
}
diff --git a/unsupported/test/cxx11_tensor_forced_eval.cpp b/unsupported/test/cxx11_tensor_forced_eval.cpp
index ad9de867d..45d7345e9 100644
--- a/unsupported/test/cxx11_tensor_forced_eval.cpp
+++ b/unsupported/test/cxx11_tensor_forced_eval.cpp
@@ -22,14 +22,15 @@ static void test_simple()
m1.setRandom();
m2.setRandom();
- TensorMap<Tensor<float, 2>> mat1(m1.data(), 3,3);
- TensorMap<Tensor<float, 2>> mat2(m2.data(), 3,3);
+ TensorMap<Tensor<float, 2> > mat1(m1.data(), 3,3);
+ TensorMap<Tensor<float, 2> > mat2(m2.data(), 3,3);
Tensor<float, 2> mat3(3,3);
mat3 = mat1;
typedef Tensor<float, 1>::DimensionPair DimPair;
- Eigen::array<DimPair, 1> dims({{DimPair(1, 0)}});
+ Eigen::array<DimPair, 1> dims;
+ dims[0] = DimPair(1, 0);
mat3 = mat3.contract(mat2, dims).eval();
@@ -60,7 +61,7 @@ static void test_const()
Eigen::array<int, 2> bcast;
bcast[0] = 3;
bcast[1] = 1;
- const TensorMap<Tensor<const float, 2>> input_tensor(input.data(), 3, 3);
+ const TensorMap<Tensor<const float, 2> > input_tensor(input.data(), 3, 3);
Tensor<float, 2> output_tensor= (input_tensor - input_tensor.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast));
for (int i = 0; i < 3; ++i) {
diff --git a/unsupported/test/cxx11_tensor_image_patch.cpp b/unsupported/test/cxx11_tensor_image_patch.cpp
index 5d6a49181..988b01481 100644
--- a/unsupported/test/cxx11_tensor_image_patch.cpp
+++ b/unsupported/test/cxx11_tensor_image_patch.cpp
@@ -568,13 +568,7 @@ static void test_imagenet_patches()
VERIFY_IS_EQUAL(l_out.dimension(4), 16);
// RowMajor
- Tensor<float, 4, RowMajor> l_in_row_major = l_in.swap_layout();
- VERIFY_IS_EQUAL(l_in.dimension(0), l_in_row_major.dimension(3));
- VERIFY_IS_EQUAL(l_in.dimension(1), l_in_row_major.dimension(2));
- VERIFY_IS_EQUAL(l_in.dimension(2), l_in_row_major.dimension(1));
- VERIFY_IS_EQUAL(l_in.dimension(3), l_in_row_major.dimension(0));
-
- Tensor<float, 5, RowMajor> l_out_row_major = l_in_row_major.extract_image_patches(11, 11);
+ Tensor<float, 5, RowMajor> l_out_row_major = l_in.swap_layout().extract_image_patches(11, 11);
VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 16);
VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 128*128);
VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 11);
@@ -589,10 +583,8 @@ static void test_imagenet_patches()
for (int r = 0; r < 11; ++r) {
for (int d = 0; d < 3; ++d) {
float expected = 0.0f;
- float expected_row_major = 0.0f;
if (r-5+i >= 0 && c-5+j >= 0 && r-5+i < 128 && c-5+j < 128) {
expected = l_in(d, r-5+i, c-5+j, b);
- expected_row_major = l_in_row_major(b, c-5+j, r-5+i, d);
}
// ColMajor
if (l_out(d, r, c, patchId, b) != expected) {
@@ -601,15 +593,13 @@ static void test_imagenet_patches()
VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
// RowMajor
if (l_out_row_major(b, patchId, c, r, d) !=
- expected_row_major) {
+ expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j
<< " r=" << r << " c=" << c << " d=" << d << " b=" << b
<< std::endl;
}
VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d),
- expected_row_major);
- // Check that ColMajor and RowMajor agree.
- VERIFY_IS_EQUAL(expected, expected_row_major);
+ expected);
}
}
}
@@ -628,8 +618,7 @@ static void test_imagenet_patches()
VERIFY_IS_EQUAL(l_out.dimension(4), 32);
// RowMajor
- l_in_row_major = l_in.swap_layout();
- l_out_row_major = l_in_row_major.extract_image_patches(9, 9);
+ l_out_row_major = l_in.swap_layout().extract_image_patches(9, 9);
VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 64*64);
VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 9);
@@ -644,10 +633,8 @@ static void test_imagenet_patches()
for (int r = 0; r < 9; ++r) {
for (int d = 0; d < 16; ++d) {
float expected = 0.0f;
- float expected_row_major = 0.0f;
if (r-4+i >= 0 && c-4+j >= 0 && r-4+i < 64 && c-4+j < 64) {
expected = l_in(d, r-4+i, c-4+j, b);
- expected_row_major = l_in_row_major(b, c-4+j, r-4+i, d);
}
// ColMajor
if (l_out(d, r, c, patchId, b) != expected) {
@@ -655,12 +642,10 @@ static void test_imagenet_patches()
}
VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
// RowMajor
- if (l_out_row_major(b, patchId, c, r, d) != expected_row_major) {
+ if (l_out_row_major(b, patchId, c, r, d) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
- VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected_row_major);
- // Check that ColMajor and RowMajor agree.
- VERIFY_IS_EQUAL(expected, expected_row_major);
+ VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
}
}
}
@@ -679,8 +664,7 @@ static void test_imagenet_patches()
VERIFY_IS_EQUAL(l_out.dimension(4), 32);
// RowMajor
- l_in_row_major = l_in.swap_layout();
- l_out_row_major = l_in_row_major.extract_image_patches(7, 7);
+ l_out_row_major = l_in.swap_layout().extract_image_patches(7, 7);
VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 16*16);
VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 7);
@@ -695,10 +679,8 @@ static void test_imagenet_patches()
for (int r = 0; r < 7; ++r) {
for (int d = 0; d < 32; ++d) {
float expected = 0.0f;
- float expected_row_major = 0.0f;
if (r-3+i >= 0 && c-3+j >= 0 && r-3+i < 16 && c-3+j < 16) {
expected = l_in(d, r-3+i, c-3+j, b);
- expected_row_major = l_in_row_major(b, c-3+j, r-3+i, d);
}
// ColMajor
if (l_out(d, r, c, patchId, b) != expected) {
@@ -706,12 +688,10 @@ static void test_imagenet_patches()
}
VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
// RowMajor
- if (l_out_row_major(b, patchId, c, r, d) != expected_row_major) {
+ if (l_out_row_major(b, patchId, c, r, d) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
- VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected_row_major);
- // Check that ColMajor and RowMajor agree.
- VERIFY_IS_EQUAL(expected, expected_row_major);
+ VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
}
}
}
@@ -730,8 +710,7 @@ static void test_imagenet_patches()
VERIFY_IS_EQUAL(l_out.dimension(4), 32);
// RowMajor
- l_in_row_major = l_in.swap_layout();
- l_out_row_major = l_in_row_major.extract_image_patches(3, 3);
+ l_out_row_major = l_in.swap_layout().extract_image_patches(3, 3);
VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 13*13);
VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 3);
@@ -746,10 +725,8 @@ static void test_imagenet_patches()
for (int r = 0; r < 3; ++r) {
for (int d = 0; d < 64; ++d) {
float expected = 0.0f;
- float expected_row_major = 0.0f;
if (r-1+i >= 0 && c-1+j >= 0 && r-1+i < 13 && c-1+j < 13) {
expected = l_in(d, r-1+i, c-1+j, b);
- expected_row_major = l_in_row_major(b, c-1+j, r-1+i, d);
}
// ColMajor
if (l_out(d, r, c, patchId, b) != expected) {
@@ -757,12 +734,10 @@ static void test_imagenet_patches()
}
VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
// RowMajor
- if (l_out_row_major(b, patchId, c, r, d) != expected_row_major) {
+ if (l_out_row_major(b, patchId, c, r, d) != expected) {
std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
}
- VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected_row_major);
- // Check that ColMajor and RowMajor agree.
- VERIFY_IS_EQUAL(expected, expected_row_major);
+ VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
}
}
}
diff --git a/unsupported/test/cxx11_tensor_map.cpp b/unsupported/test/cxx11_tensor_map.cpp
index a8a095e38..3db0ee7c0 100644
--- a/unsupported/test/cxx11_tensor_map.cpp
+++ b/unsupported/test/cxx11_tensor_map.cpp
@@ -19,8 +19,8 @@ static void test_0d()
Tensor<int, 0> scalar1;
Tensor<int, 0, RowMajor> scalar2;
- TensorMap<Tensor<const int, 0>> scalar3(scalar1.data());
- TensorMap<Tensor<const int, 0, RowMajor>> scalar4(scalar2.data());
+ TensorMap<Tensor<const int, 0> > scalar3(scalar1.data());
+ TensorMap<Tensor<const int, 0, RowMajor> > scalar4(scalar2.data());
scalar1() = 7;
scalar2() = 13;
@@ -37,8 +37,8 @@ static void test_1d()
Tensor<int, 1> vec1(6);
Tensor<int, 1, RowMajor> vec2(6);
- TensorMap<Tensor<const int, 1>> vec3(vec1.data(), 6);
- TensorMap<Tensor<const int, 1, RowMajor>> vec4(vec2.data(), 6);
+ TensorMap<Tensor<const int, 1> > vec3(vec1.data(), 6);
+ TensorMap<Tensor<const int, 1, RowMajor> > vec4(vec2.data(), 6);
vec1(0) = 4; vec2(0) = 0;
vec1(1) = 8; vec2(1) = 1;
@@ -85,8 +85,8 @@ static void test_2d()
mat2(1,1) = 4;
mat2(1,2) = 5;
- TensorMap<Tensor<const int, 2>> mat3(mat1.data(), 2, 3);
- TensorMap<Tensor<const int, 2, RowMajor>> mat4(mat2.data(), 2, 3);
+ TensorMap<Tensor<const int, 2> > mat3(mat1.data(), 2, 3);
+ TensorMap<Tensor<const int, 2, RowMajor> > mat4(mat2.data(), 2, 3);
VERIFY_IS_EQUAL(mat3.rank(), 2);
VERIFY_IS_EQUAL(mat3.size(), 6);
@@ -129,8 +129,8 @@ static void test_3d()
}
}
- TensorMap<Tensor<const int, 3>> mat3(mat1.data(), 2, 3, 7);
- TensorMap<Tensor<const int, 3, RowMajor>> mat4(mat2.data(), array<DenseIndex, 3>{{2, 3, 7}});
+ TensorMap<Tensor<const int, 3> > mat3(mat1.data(), 2, 3, 7);
+ TensorMap<Tensor<const int, 3, RowMajor> > mat4(mat2.data(), 2, 3, 7);
VERIFY_IS_EQUAL(mat3.rank(), 3);
VERIFY_IS_EQUAL(mat3.size(), 2*3*7);
@@ -173,8 +173,8 @@ static void test_from_tensor()
}
}
- TensorMap<Tensor<int, 3>> mat3(mat1);
- TensorMap<Tensor<int, 3, RowMajor>> mat4(mat2);
+ TensorMap<Tensor<int, 3> > mat3(mat1);
+ TensorMap<Tensor<int, 3, RowMajor> > mat4(mat2);
VERIFY_IS_EQUAL(mat3.rank(), 3);
VERIFY_IS_EQUAL(mat3.size(), 2*3*7);
@@ -199,19 +199,23 @@ static void test_from_tensor()
}
}
- TensorFixedSize<int, Sizes<2,3,7>> mat5;
+ TensorFixedSize<int, Sizes<2,3,7> > mat5;
val = 0;
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
- mat5(i,j,k) = val;
+ array<ptrdiff_t, 3> coords;
+ coords[0] = i;
+ coords[1] = j;
+ coords[2] = k;
+ mat5(coords) = val;
val++;
}
}
}
- TensorMap<TensorFixedSize<int, Sizes<2,3,7>>> mat6(mat5);
+ TensorMap<TensorFixedSize<int, Sizes<2,3,7> > > mat6(mat5);
VERIFY_IS_EQUAL(mat6.rank(), 3);
VERIFY_IS_EQUAL(mat6.size(), 2*3*7);
@@ -233,8 +237,8 @@ static void test_from_tensor()
static int f(const TensorMap<Tensor<int, 3> >& tensor) {
// Size<0> empty;
- EIGEN_STATIC_ASSERT((internal::array_size<Sizes<>>::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
- EIGEN_STATIC_ASSERT((internal::array_size<DSizes<int, 0>>::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((internal::array_size<Sizes<> >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((internal::array_size<DSizes<int, 0> >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
Tensor<int, 0> result = tensor.sum();
return result();
}
@@ -253,7 +257,7 @@ static void test_casting()
}
}
- TensorMap<Tensor<int, 3>> map(tensor);
+ TensorMap<Tensor<int, 3> > map(tensor);
int sum1 = f(map);
int sum2 = f(tensor);
diff --git a/unsupported/test/cxx11_tensor_math.cpp b/unsupported/test/cxx11_tensor_math.cpp
index d247bebaa..61c742a16 100644
--- a/unsupported/test/cxx11_tensor_math.cpp
+++ b/unsupported/test/cxx11_tensor_math.cpp
@@ -16,7 +16,7 @@ using Eigen::RowMajor;
static void test_tanh()
{
- Tensor<float, 1> vec1({6});
+ Tensor<float, 1> vec1(6);
vec1.setRandom();
Tensor<float, 1> vec2 = vec1.tanh();
@@ -28,7 +28,7 @@ static void test_tanh()
static void test_sigmoid()
{
- Tensor<float, 1> vec1({6});
+ Tensor<float, 1> vec1(6);
vec1.setRandom();
Tensor<float, 1> vec2 = vec1.sigmoid();
diff --git a/unsupported/test/cxx11_tensor_mixed_indices.cpp b/unsupported/test/cxx11_tensor_mixed_indices.cpp
index 72f826216..4fba6fdd1 100644
--- a/unsupported/test/cxx11_tensor_mixed_indices.cpp
+++ b/unsupported/test/cxx11_tensor_mixed_indices.cpp
@@ -14,8 +14,8 @@
static void test_simple()
{
- Tensor<float, 1, ColMajor> vec1({6});
- Tensor<float, 1, ColMajor, int> vec2({6});
+ Tensor<float, 1, ColMajor> vec1(6);
+ Tensor<float, 1, ColMajor, int> vec2(6);
vec1(0) = 4.0; vec2(0) = 0.0;
vec1(1) = 8.0; vec2(1) = 1.0;
diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
index cb917bb37..dceac793e 100644
--- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu
+++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
@@ -134,6 +134,68 @@ void test_cuda_elementwise() {
gpu_device.deallocate(d_res_float);
}
+void test_cuda_trancendental() {
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+ int num_elem = 101;
+
+ float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ Eigen::half* d_res1_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+ Eigen::half* d_res1_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+ Eigen::half* d_res2_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+ Eigen::half* d_res2_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(
+ d_float1, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(
+ d_float2, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_half(
+ d_res1_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_float(
+ d_res1_float, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_half(
+ d_res2_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_float(
+ d_res2_float, num_elem);
+
+ gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
+ gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f);
+ gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::half>();
+ gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::half>();
+ gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().exp();
+ gpu_res2_half.device(gpu_device) = gpu_float2.cast<Eigen::half>().log();
+
+ Tensor<float, 1> input1(num_elem);
+ Tensor<Eigen::half, 1> half_prec1(num_elem);
+ Tensor<Eigen::half, 1> full_prec1(num_elem);
+ Tensor<float, 1> input2(num_elem);
+ Tensor<Eigen::half, 1> half_prec2(num_elem);
+ Tensor<Eigen::half, 1> full_prec2(num_elem);
+ gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res1_half, num_elem*sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res2_half, num_elem*sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(Eigen::half));
+ gpu_device.synchronize();
+
+ for (int i = 0; i < num_elem; ++i) {
+ std::cout << "Checking elemwise exp " << i << " input = " << input1(i) << " full = " << full_prec1(i) << " half = " << half_prec1(i) << std::endl;
+ VERIFY_IS_APPROX(full_prec1(i), half_prec1(i));
+ }
+ for (int i = 0; i < num_elem; ++i) {
+ std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl;
+ VERIFY_IS_APPROX(full_prec2(i), half_prec2(i));
+ }
+ gpu_device.deallocate(d_float1);
+ gpu_device.deallocate(d_float2);
+ gpu_device.deallocate(d_res1_half);
+ gpu_device.deallocate(d_res1_float);
+ gpu_device.deallocate(d_res2_half);
+ gpu_device.deallocate(d_res2_float);
+}
+
void test_cuda_contractions() {
Eigen::CudaStreamDevice stream;
@@ -144,36 +206,38 @@ void test_cuda_contractions() {
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
- float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
- float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+ Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
d_float1, rows, cols);
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
d_float2, rows, cols);
- Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_res_half(
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 2>, Eigen::Aligned> gpu_res_half(
d_res_half, rows, cols);
- Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_res_float(
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 2>, Eigen::Aligned> gpu_res_float(
d_res_float, rows, cols);
gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
- gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float1.constant(0.5f);
+ gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float2.constant(0.5f);
typedef Tensor<float, 2>::DimensionPair DimPair;
Eigen::array<DimPair, 1> dims(DimPair(1, 0));
- gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims);
- gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().contract(gpu_float2.cast<Eigen::half>(), dims).cast<float>();
+ gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims).cast<Eigen::half>();
+ gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().contract(gpu_float2.cast<Eigen::half>(), dims);
- Tensor<float, 2> half_prec(rows, cols);
- Tensor<float, 2> full_prec(rows, cols);
- gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float));
- gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+ Tensor<Eigen::half, 2> half_prec(rows, cols);
+ Tensor<Eigen::half, 2> full_prec(rows, cols);
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(Eigen::half));
gpu_device.synchronize();
for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
- std::cout << "Checking contract " << i << " " << j << std::endl;
- VERIFY_IS_APPROX(full_prec(i, j), half_prec(i, j));
+ std::cout << "Checking contract " << i << " " << j << full_prec(i, j) << " " << half_prec(i, j) << std::endl;
+ if (numext::abs(full_prec(i, j) - half_prec(i, j)) > Eigen::half(1e-2f)) {
+ VERIFY_IS_APPROX(full_prec(i, j), half_prec(i, j));
+ }
}
}
@@ -192,29 +256,29 @@ void test_cuda_reductions() {
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
- float* d_res_half = (float*)gpu_device.allocate(size * sizeof(float));
- float* d_res_float = (float*)gpu_device.allocate(size * sizeof(float));
+ Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(size * sizeof(Eigen::half));
+ Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(size * sizeof(Eigen::half));
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
d_float1, size, size);
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
d_float2, size, size);
- Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half(
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_half(
d_res_half, size);
- Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_float(
d_res_float, size);
gpu_float1.device(gpu_device) = gpu_float1.random();
gpu_float2.device(gpu_device) = gpu_float2.random();
Eigen::array<int, 1> redux_dim = {{0}};
- gpu_res_float.device(gpu_device) = gpu_float1.sum(redux_dim);
- gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(redux_dim).cast<float>();
+ gpu_res_float.device(gpu_device) = gpu_float1.sum(redux_dim).cast<Eigen::half>();
+ gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(redux_dim);
- Tensor<float, 1> half_prec(size);
- Tensor<float, 1> full_prec(size);
- gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, size*sizeof(float));
- gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, size*sizeof(float));
+ Tensor<Eigen::half, 1> half_prec(size);
+ Tensor<Eigen::half, 1> full_prec(size);
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, size*sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, size*sizeof(Eigen::half));
gpu_device.synchronize();
for (int i = 0; i < size; ++i) {
@@ -222,6 +286,61 @@ void test_cuda_reductions() {
VERIFY_IS_APPROX(full_prec(i), half_prec(i));
}
+ redux_dim = {{1}};
+ gpu_res_float.device(gpu_device) = gpu_float1.sum(redux_dim).cast<Eigen::half>();
+ gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(redux_dim);
+
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, size*sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, size*sizeof(Eigen::half));
+ gpu_device.synchronize();
+
+ for (int i = 0; i < size; ++i) {
+ std::cout << "Checking redux " << i << std::endl;
+ VERIFY_IS_APPROX(full_prec(i), half_prec(i));
+ }
+
+ gpu_device.deallocate(d_float1);
+ gpu_device.deallocate(d_float2);
+ gpu_device.deallocate(d_res_half);
+ gpu_device.deallocate(d_res_float);
+}
+
+
+
+void test_cuda_full_reductions() {
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+ int size = 13;
+ int num_elem = size*size;
+
+ float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half));
+ Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
+ d_float1, size, size);
+ Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
+ d_float2, size, size);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_half(
+ d_res_half);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_float(
+ d_res_float);
+
+ gpu_float1.device(gpu_device) = gpu_float1.random();
+ gpu_float2.device(gpu_device) = gpu_float2.random();
+
+ gpu_res_float.device(gpu_device) = gpu_float1.sum().cast<Eigen::half>();
+ gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum();
+
+ Tensor<Eigen::half, 0> half_prec;
+ Tensor<Eigen::half, 0> full_prec;
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::half));
+ gpu_device.synchronize();
+
+ VERIFY_IS_APPROX(full_prec(), half_prec());
+
gpu_device.deallocate(d_float1);
gpu_device.deallocate(d_float2);
gpu_device.deallocate(d_res_half);
@@ -229,27 +348,58 @@ void test_cuda_reductions() {
}
+void test_cuda_forced_evals() {
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+ int num_elem = 101;
+
+ float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+ d_float, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half(
+ d_res_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
+ d_res_float, num_elem);
+
+ gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
+ gpu_res_float.device(gpu_device) = gpu_float.abs();
+ gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>();
+
+ Tensor<float, 1> half_prec(num_elem);
+ Tensor<float, 1> full_prec(num_elem);
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+ gpu_device.synchronize();
+
+ for (int i = 0; i < num_elem; ++i) {
+ std::cout << "Checking unary " << i << std::endl;
+ VERIFY_IS_APPROX(full_prec(i), half_prec(i));
+ }
+
+ gpu_device.deallocate(d_float);
+ gpu_device.deallocate(d_res_half);
+ gpu_device.deallocate(d_res_float);
+}
+
#endif
void test_cxx11_tensor_of_float16_cuda()
{
#ifdef EIGEN_HAS_CUDA_FP16
- Eigen::CudaStreamDevice stream;
- Eigen::GpuDevice device(&stream);
- if (device.majorDeviceVersion() > 5 ||
- (device.majorDeviceVersion() == 5 && device.minorDeviceVersion() >= 3)) {
- std::cout << "Running test on device with capability " << device.majorDeviceVersion() << "." << device.minorDeviceVersion() << std::endl;
-
- CALL_SUBTEST_1(test_cuda_conversion());
- CALL_SUBTEST_1(test_cuda_unary());
- CALL_SUBTEST_1(test_cuda_elementwise());
- CALL_SUBTEST_2(test_cuda_contractions());
- CALL_SUBTEST_3(test_cuda_reductions());
- }
- else {
- std::cout << "Half floats require compute capability of at least 5.3. This device only supports " << device.majorDeviceVersion() << "." << device.minorDeviceVersion() << ". Skipping the test" << std::endl;
- }
+ CALL_SUBTEST_1(test_cuda_conversion());
+ CALL_SUBTEST_1(test_cuda_unary());
+ CALL_SUBTEST_1(test_cuda_elementwise());
+ CALL_SUBTEST_1(test_cuda_trancendental());
+ CALL_SUBTEST_2(test_cuda_contractions());
+ CALL_SUBTEST_3(test_cuda_reductions());
+ CALL_SUBTEST_3(test_cuda_full_reductions());
+ CALL_SUBTEST_4(test_cuda_forced_evals());
+
#else
std::cout << "Half floats are not supported by this version of cuda: skipping the test" << std::endl;
#endif
diff --git a/unsupported/test/cxx11_tensor_simple.cpp b/unsupported/test/cxx11_tensor_simple.cpp
index 47d4d8636..fe860c970 100644
--- a/unsupported/test/cxx11_tensor_simple.cpp
+++ b/unsupported/test/cxx11_tensor_simple.cpp
@@ -195,7 +195,10 @@ static void test_3d()
VERIFY_IS_EQUAL((epsilon(0,2,1)), -1);
VERIFY_IS_EQUAL((epsilon(1,0,2)), -1);
- array<Eigen::DenseIndex, 3> dims{{2,3,4}};
+ array<Eigen::DenseIndex, 3> dims;
+ dims[0] = 2;
+ dims[1] = 3;
+ dims[2] = 4;
Tensor<int, 3> t1(dims);
Tensor<int, 3, RowMajor> t2(dims);
diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp
index e46197464..423074a38 100644
--- a/unsupported/test/cxx11_tensor_thread_pool.cpp
+++ b/unsupported/test/cxx11_tensor_thread_pool.cpp
@@ -226,7 +226,7 @@ void test_multithread_contraction_agrees_with_singlethread() {
for (ptrdiff_t i = 0; i < st_result.size(); i++) {
// if both of the values are very small, then do nothing (because the test will fail
// due to numerical precision issues when values are small)
- if (fabs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4) {
+ if (fabs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) {
VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]);
}
}
@@ -234,6 +234,42 @@ void test_multithread_contraction_agrees_with_singlethread() {
template<int DataLayout>
+void test_full_contraction() {
+ int contract_size1 = internal::random<int>(1, 500);
+ int contract_size2 = internal::random<int>(1, 500);
+
+ Tensor<float, 2, DataLayout> left(contract_size1,
+ contract_size2);
+ Tensor<float, 2, DataLayout> right(contract_size1,
+ contract_size2);
+ left.setRandom();
+ right.setRandom();
+
+ // add constants to shift values away from 0 for more precision
+ left += left.constant(1.5f);
+ right += right.constant(1.5f);
+
+ typedef Tensor<float, 2>::DimensionPair DimPair;
+ Eigen::array<DimPair, 2> dims({{DimPair(0, 0), DimPair(1, 1)}});
+
+ Eigen::ThreadPool tp(internal::random<int>(2, 11));
+ Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
+
+ Tensor<float, 0, DataLayout> st_result;
+ st_result = left.contract(right, dims);
+
+ Tensor<float, 0, DataLayout> tp_result;
+ tp_result.device(thread_pool_device) = left.contract(right, dims);
+
+ VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
+ // if both of the values are very small, then do nothing (because the test will fail
+ // due to numerical precision issues when values are small)
+ if (fabs(st_result() - tp_result()) >= 1e-4f) {
+ VERIFY_IS_APPROX(st_result(), tp_result());
+ }
+}
+
+template<int DataLayout>
void test_multithreaded_reductions() {
const int num_threads = internal::random<int>(3, 11);
ThreadPool thread_pool(num_threads);
@@ -324,6 +360,9 @@ void test_cxx11_tensor_thread_pool()
CALL_SUBTEST_4(test_contraction_corner_cases<ColMajor>());
CALL_SUBTEST_4(test_contraction_corner_cases<RowMajor>());
+ CALL_SUBTEST_4(test_full_contraction<ColMajor>());
+ CALL_SUBTEST_4(test_full_contraction<RowMajor>());
+
CALL_SUBTEST_5(test_multithreaded_reductions<ColMajor>());
CALL_SUBTEST_5(test_multithreaded_reductions<RowMajor>());
diff --git a/unsupported/test/levenberg_marquardt.cpp b/unsupported/test/levenberg_marquardt.cpp
index a2bdb99e4..64f168c16 100644
--- a/unsupported/test/levenberg_marquardt.cpp
+++ b/unsupported/test/levenberg_marquardt.cpp
@@ -23,6 +23,9 @@
using std::sqrt;
+// tolerance for chekcing number of iterations
+#define LM_EVAL_COUNT_TOL 4/3
+
struct lmder_functor : DenseFunctor<double>
{
lmder_functor(void): DenseFunctor<double>(3,15) {}
@@ -631,7 +634,7 @@ void testNistLanczos1(void)
VERIFY_IS_EQUAL(lm.nfev(), 79);
VERIFY_IS_EQUAL(lm.njev(), 72);
// check norm^2
-// VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.430899764097e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats
+ VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
// check x
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
@@ -652,7 +655,7 @@ void testNistLanczos1(void)
VERIFY_IS_EQUAL(lm.nfev(), 9);
VERIFY_IS_EQUAL(lm.njev(), 8);
// check norm^2
-// VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.428595533845e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats
+ VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
// check x
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
@@ -789,7 +792,10 @@ void testNistMGH10(void)
MGH10_functor functor;
LevenbergMarquardt<MGH10_functor> lm(functor);
info = lm.minimize(x);
- VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
+ ++g_test_level;
+ VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
+ --g_test_level;
+ // was: VERIFY_IS_EQUAL(info, 1);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
@@ -799,9 +805,13 @@ void testNistMGH10(void)
VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
// check return value
- //VERIFY_IS_EQUAL(info, 1);
+
+ ++g_test_level;
VERIFY_IS_EQUAL(lm.nfev(), 284 );
VERIFY_IS_EQUAL(lm.njev(), 249 );
+ --g_test_level;
+ VERIFY(lm.nfev() < 284 * LM_EVAL_COUNT_TOL);
+ VERIFY(lm.njev() < 249 * LM_EVAL_COUNT_TOL);
/*
* Second try
@@ -809,7 +819,10 @@ void testNistMGH10(void)
x<< 0.02, 4000., 250.;
// do the computation
info = lm.minimize(x);
+ ++g_test_level;
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
+ // was: VERIFY_IS_EQUAL(info, 1);
+ --g_test_level;
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
@@ -819,9 +832,12 @@ void testNistMGH10(void)
VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
// check return value
- //VERIFY_IS_EQUAL(info, 1);
+ ++g_test_level;
VERIFY_IS_EQUAL(lm.nfev(), 126);
VERIFY_IS_EQUAL(lm.njev(), 116);
+ --g_test_level;
+ VERIFY(lm.nfev() < 126 * LM_EVAL_COUNT_TOL);
+ VERIFY(lm.njev() < 116 * LM_EVAL_COUNT_TOL);
}
@@ -896,8 +912,12 @@ void testNistBoxBOD(void)
// check return value
VERIFY_IS_EQUAL(info, 1);
+ ++g_test_level;
VERIFY_IS_EQUAL(lm.nfev(), 16 );
VERIFY_IS_EQUAL(lm.njev(), 15 );
+ --g_test_level;
+ VERIFY(lm.nfev() < 16 * LM_EVAL_COUNT_TOL);
+ VERIFY(lm.njev() < 15 * LM_EVAL_COUNT_TOL);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03);
// check x
diff --git a/unsupported/test/matrix_function.cpp b/unsupported/test/matrix_function.cpp
index 9a995f941..cd24064ad 100644
--- a/unsupported/test/matrix_function.cpp
+++ b/unsupported/test/matrix_function.cpp
@@ -113,8 +113,8 @@ void testMatrixLogarithm(const MatrixType& A)
MatrixType scaledA;
RealScalar maxImagPartOfSpectrum = A.eigenvalues().imag().cwiseAbs().maxCoeff();
- if (maxImagPartOfSpectrum >= 0.9 * EIGEN_PI)
- scaledA = A * 0.9 * EIGEN_PI / maxImagPartOfSpectrum;
+ if (maxImagPartOfSpectrum >= RealScalar(0.9 * EIGEN_PI))
+ scaledA = A * RealScalar(0.9 * EIGEN_PI) / maxImagPartOfSpectrum;
else
scaledA = A;
diff --git a/unsupported/test/matrix_power.cpp b/unsupported/test/matrix_power.cpp
index 8e104ed1e..53911370f 100644
--- a/unsupported/test/matrix_power.cpp
+++ b/unsupported/test/matrix_power.cpp
@@ -24,7 +24,7 @@ void test2dRotation(double tol)
s = std::sin(angle);
B << c, s, -s, c;
- C = Apow(std::ldexp(angle,1) / EIGEN_PI);
+ C = Apow(std::ldexp(angle,1) / T(EIGEN_PI));
std::cout << "test2dRotation: i = " << i << " error powerm = " << relerr(C,B) << '\n';
VERIFY(C.isApprox(B, tol));
}