aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/test
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/test')
-rw-r--r--unsupported/test/CMakeLists.txt2
-rw-r--r--unsupported/test/cxx11_eventcount.cpp140
-rw-r--r--unsupported/test/cxx11_float16.cpp41
-rw-r--r--unsupported/test/cxx11_meta.cpp2
-rw-r--r--unsupported/test/cxx11_runqueue.cpp227
-rw-r--r--unsupported/test/cxx11_tensor_cuda.cu31
-rw-r--r--unsupported/test/cxx11_tensor_fixed_size.cpp2
-rw-r--r--unsupported/test/cxx11_tensor_math.cpp4
-rw-r--r--unsupported/test/cxx11_tensor_mixed_indices.cpp4
-rw-r--r--unsupported/test/cxx11_tensor_of_float16_cuda.cu37
-rw-r--r--unsupported/test/levenberg_marquardt.cpp2
11 files changed, 479 insertions, 13 deletions
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index f75bf9798..eed9f079e 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -116,6 +116,8 @@ if(EIGEN_TEST_CXX11)
set(CMAKE_CXX_STANDARD 11)
ei_add_test(cxx11_float16)
+ ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_meta)
ei_add_test(cxx11_tensor_simple)
# ei_add_test(cxx11_tensor_symmetry)
diff --git a/unsupported/test/cxx11_eventcount.cpp b/unsupported/test/cxx11_eventcount.cpp
new file mode 100644
index 000000000..f16cc6f07
--- /dev/null
+++ b/unsupported/test/cxx11_eventcount.cpp
@@ -0,0 +1,140 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_USE_THREADS
+#include "main.h"
+#include <Eigen/CXX11/ThreadPool>
+
+// Visual studio doesn't implement a rand_r() function since its
+// implementation of rand() is already thread safe
+int rand_reentrant(unsigned int* s) {
+#ifdef EIGEN_COMP_MSVC_STRICT
+ EIGEN_UNUSED_VARIABLE(s);
+ return rand();
+#else
+ return rand_r(s);
+#endif
+}
+
+static void test_basic_eventcount()
+{
+ std::vector<EventCount::Waiter> waiters(1);
+ EventCount ec(waiters);
+ EventCount::Waiter& w = waiters[0];
+ ec.Notify(false);
+ ec.Prewait(&w);
+ ec.Notify(true);
+ ec.CommitWait(&w);
+ ec.Prewait(&w);
+ ec.CancelWait(&w);
+}
+
+// Fake bounded counter-based queue.
+struct TestQueue {
+ std::atomic<int> val_;
+ static const int kQueueSize = 10;
+
+ TestQueue() : val_() {}
+
+ ~TestQueue() { VERIFY_IS_EQUAL(val_.load(), 0); }
+
+ bool Push() {
+ int val = val_.load(std::memory_order_relaxed);
+ for (;;) {
+ VERIFY_GE(val, 0);
+ VERIFY_LE(val, kQueueSize);
+ if (val == kQueueSize) return false;
+ if (val_.compare_exchange_weak(val, val + 1, std::memory_order_relaxed))
+ return true;
+ }
+ }
+
+ bool Pop() {
+ int val = val_.load(std::memory_order_relaxed);
+ for (;;) {
+ VERIFY_GE(val, 0);
+ VERIFY_LE(val, kQueueSize);
+ if (val == 0) return false;
+ if (val_.compare_exchange_weak(val, val - 1, std::memory_order_relaxed))
+ return true;
+ }
+ }
+
+ bool Empty() { return val_.load(std::memory_order_relaxed) == 0; }
+};
+
+const int TestQueue::kQueueSize;
+
+// A number of producers send messages to a set of consumers using a set of
+// fake queues. Ensure that it does not crash, consumers don't deadlock and
+// number of blocked and unblocked threads match.
+static void test_stress_eventcount()
+{
+ const int kThreads = std::thread::hardware_concurrency();
+ static const int kEvents = 1 << 16;
+ static const int kQueues = 10;
+
+ std::vector<EventCount::Waiter> waiters(kThreads);
+ EventCount ec(waiters);
+ TestQueue queues[kQueues];
+
+ std::vector<std::unique_ptr<std::thread>> producers;
+ for (int i = 0; i < kThreads; i++) {
+ producers.emplace_back(new std::thread([&ec, &queues]() {
+ unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id()));
+ for (int j = 0; j < kEvents; j++) {
+ unsigned idx = rand_reentrant(&rnd) % kQueues;
+ if (queues[idx].Push()) {
+ ec.Notify(false);
+ continue;
+ }
+ EIGEN_THREAD_YIELD();
+ j--;
+ }
+ }));
+ }
+
+ std::vector<std::unique_ptr<std::thread>> consumers;
+ for (int i = 0; i < kThreads; i++) {
+ consumers.emplace_back(new std::thread([&ec, &queues, &waiters, i]() {
+ EventCount::Waiter& w = waiters[i];
+ unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id()));
+ for (int j = 0; j < kEvents; j++) {
+ unsigned idx = rand_reentrant(&rnd) % kQueues;
+ if (queues[idx].Pop()) continue;
+ j--;
+ ec.Prewait(&w);
+ bool empty = true;
+ for (int q = 0; q < kQueues; q++) {
+ if (!queues[q].Empty()) {
+ empty = false;
+ break;
+ }
+ }
+ if (!empty) {
+ ec.CancelWait(&w);
+ continue;
+ }
+ ec.CommitWait(&w);
+ }
+ }));
+ }
+
+ for (int i = 0; i < kThreads; i++) {
+ producers[i]->join();
+ consumers[i]->join();
+ }
+}
+
+void test_cxx11_eventcount()
+{
+ CALL_SUBTEST(test_basic_eventcount());
+ CALL_SUBTEST(test_stress_eventcount());
+}
diff --git a/unsupported/test/cxx11_float16.cpp b/unsupported/test/cxx11_float16.cpp
index 2dc0872d8..273dcbc11 100644
--- a/unsupported/test/cxx11_float16.cpp
+++ b/unsupported/test/cxx11_float16.cpp
@@ -122,6 +122,8 @@ void test_comparison()
VERIFY(half(1.0f) != half(2.0f));
// Comparisons with NaNs and infinities.
+#if !EIGEN_COMP_MSVC
+ // Visual Studio errors out on divisions by 0
VERIFY(!(half(0.0 / 0.0) == half(0.0 / 0.0)));
VERIFY(half(0.0 / 0.0) != half(0.0 / 0.0));
@@ -132,13 +134,26 @@ void test_comparison()
VERIFY(half(1.0) < half(1.0 / 0.0));
VERIFY(half(1.0) > half(-1.0 / 0.0));
+#endif
}
-void test_functions()
+void test_basic_functions()
{
VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f);
VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f);
+ VERIFY_IS_EQUAL(float(numext::floor(half(3.5f))), 3.0f);
+ VERIFY_IS_EQUAL(float(numext::floor(half(-3.5f))), -4.0f);
+
+ VERIFY_IS_EQUAL(float(numext::ceil(half(3.5f))), 4.0f);
+ VERIFY_IS_EQUAL(float(numext::ceil(half(-3.5f))), -3.0f);
+
+ VERIFY_IS_APPROX(float(numext::sqrt(half(0.0f))), 0.0f);
+ VERIFY_IS_APPROX(float(numext::sqrt(half(4.0f))), 2.0f);
+
+ VERIFY_IS_APPROX(float(numext::pow(half(0.0f), half(1.0f))), 0.0f);
+ VERIFY_IS_APPROX(float(numext::pow(half(2.0f), half(2.0f))), 4.0f);
+
VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f);
VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), float(20.0 + EIGEN_PI));
@@ -146,10 +161,32 @@ void test_functions()
VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f);
}
+void test_trigonometric_functions()
+{
+ VERIFY_IS_APPROX(numext::cos(half(0.0f)), half(cosf(0.0f)));
+ VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI)), half(cosf(EIGEN_PI)));
+ //VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI/2)), half(cosf(EIGEN_PI/2)));
+ //VERIFY_IS_APPROX(numext::cos(half(3*EIGEN_PI/2)), half(cosf(3*EIGEN_PI/2)));
+ VERIFY_IS_APPROX(numext::cos(half(3.5f)), half(cosf(3.5f)));
+
+ VERIFY_IS_APPROX(numext::sin(half(0.0f)), half(sinf(0.0f)));
+ // VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI)), half(sinf(EIGEN_PI)));
+ VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI/2)), half(sinf(EIGEN_PI/2)));
+ VERIFY_IS_APPROX(numext::sin(half(3*EIGEN_PI/2)), half(sinf(3*EIGEN_PI/2)));
+ VERIFY_IS_APPROX(numext::sin(half(3.5f)), half(sinf(3.5f)));
+
+ VERIFY_IS_APPROX(numext::tan(half(0.0f)), half(tanf(0.0f)));
+ // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI)), half(tanf(EIGEN_PI)));
+ // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI/2)), half(tanf(EIGEN_PI/2)));
+ //VERIFY_IS_APPROX(numext::tan(half(3*EIGEN_PI/2)), half(tanf(3*EIGEN_PI/2)));
+ VERIFY_IS_APPROX(numext::tan(half(3.5f)), half(tanf(3.5f)));
+}
+
void test_cxx11_float16()
{
CALL_SUBTEST(test_conversion());
CALL_SUBTEST(test_arithmetic());
CALL_SUBTEST(test_comparison());
- CALL_SUBTEST(test_functions());
+ CALL_SUBTEST(test_basic_functions());
+ CALL_SUBTEST(test_trigonometric_functions());
}
diff --git a/unsupported/test/cxx11_meta.cpp b/unsupported/test/cxx11_meta.cpp
index ecac3add1..8911c59d8 100644
--- a/unsupported/test/cxx11_meta.cpp
+++ b/unsupported/test/cxx11_meta.cpp
@@ -10,7 +10,7 @@
#include "main.h"
#include <array>
-#include <Eigen/CXX11/Core>
+#include <Eigen/CXX11/src/util/CXX11Meta.h>
using Eigen::internal::is_same;
using Eigen::internal::type_list;
diff --git a/unsupported/test/cxx11_runqueue.cpp b/unsupported/test/cxx11_runqueue.cpp
new file mode 100644
index 000000000..d1770ee1b
--- /dev/null
+++ b/unsupported/test/cxx11_runqueue.cpp
@@ -0,0 +1,227 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_USE_THREADS
+#include <cstdlib>
+#include "main.h"
+#include <Eigen/CXX11/ThreadPool>
+
+
+// Visual studio doesn't implement a rand_r() function since its
+// implementation of rand() is already thread safe
+int rand_reentrant(unsigned int* s) {
+#ifdef EIGEN_COMP_MSVC_STRICT
+ EIGEN_UNUSED_VARIABLE(s);
+ return rand();
+#else
+ return rand_r(s);
+#endif
+}
+
+void test_basic_runqueue()
+{
+ RunQueue<int, 4> q;
+ // Check empty state.
+ VERIFY(q.Empty());
+ VERIFY_IS_EQUAL(0u, q.Size());
+ VERIFY_IS_EQUAL(0, q.PopFront());
+ std::vector<int> stolen;
+ VERIFY_IS_EQUAL(0, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(0u, stolen.size());
+ // Push one front, pop one front.
+ VERIFY_IS_EQUAL(0, q.PushFront(1));
+ VERIFY_IS_EQUAL(1, q.Size());
+ VERIFY_IS_EQUAL(1, q.PopFront());
+ VERIFY_IS_EQUAL(0, q.Size());
+ // Push front to overflow.
+ VERIFY_IS_EQUAL(0, q.PushFront(2));
+ VERIFY_IS_EQUAL(1, q.Size());
+ VERIFY_IS_EQUAL(0, q.PushFront(3));
+ VERIFY_IS_EQUAL(2, q.Size());
+ VERIFY_IS_EQUAL(0, q.PushFront(4));
+ VERIFY_IS_EQUAL(3, q.Size());
+ VERIFY_IS_EQUAL(0, q.PushFront(5));
+ VERIFY_IS_EQUAL(4, q.Size());
+ VERIFY_IS_EQUAL(6, q.PushFront(6));
+ VERIFY_IS_EQUAL(4, q.Size());
+ VERIFY_IS_EQUAL(5, q.PopFront());
+ VERIFY_IS_EQUAL(3, q.Size());
+ VERIFY_IS_EQUAL(4, q.PopFront());
+ VERIFY_IS_EQUAL(2, q.Size());
+ VERIFY_IS_EQUAL(3, q.PopFront());
+ VERIFY_IS_EQUAL(1, q.Size());
+ VERIFY_IS_EQUAL(2, q.PopFront());
+ VERIFY_IS_EQUAL(0, q.Size());
+ VERIFY_IS_EQUAL(0, q.PopFront());
+ // Push one back, pop one back.
+ VERIFY_IS_EQUAL(0, q.PushBack(7));
+ VERIFY_IS_EQUAL(1, q.Size());
+ VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(1, stolen.size());
+ VERIFY_IS_EQUAL(7, stolen[0]);
+ VERIFY_IS_EQUAL(0, q.Size());
+ stolen.clear();
+ // Push back to overflow.
+ VERIFY_IS_EQUAL(0, q.PushBack(8));
+ VERIFY_IS_EQUAL(1, q.Size());
+ VERIFY_IS_EQUAL(0, q.PushBack(9));
+ VERIFY_IS_EQUAL(2, q.Size());
+ VERIFY_IS_EQUAL(0, q.PushBack(10));
+ VERIFY_IS_EQUAL(3, q.Size());
+ VERIFY_IS_EQUAL(0, q.PushBack(11));
+ VERIFY_IS_EQUAL(4, q.Size());
+ VERIFY_IS_EQUAL(12, q.PushBack(12));
+ VERIFY_IS_EQUAL(4, q.Size());
+ // Pop back in halves.
+ VERIFY_IS_EQUAL(2, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(2, stolen.size());
+ VERIFY_IS_EQUAL(10, stolen[0]);
+ VERIFY_IS_EQUAL(11, stolen[1]);
+ VERIFY_IS_EQUAL(2, q.Size());
+ stolen.clear();
+ VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(1, stolen.size());
+ VERIFY_IS_EQUAL(9, stolen[0]);
+ VERIFY_IS_EQUAL(1, q.Size());
+ stolen.clear();
+ VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(1, stolen.size());
+ VERIFY_IS_EQUAL(8, stolen[0]);
+ stolen.clear();
+ VERIFY_IS_EQUAL(0, q.PopBackHalf(&stolen));
+ VERIFY_IS_EQUAL(0, stolen.size());
+ // Empty again.
+ VERIFY(q.Empty());
+ VERIFY_IS_EQUAL(0, q.Size());
+}
+
+// Empty tests that the queue is not claimed to be empty when is is in fact not.
+// Emptiness property is crucial part of thread pool blocking scheme,
+// so we go to great effort to ensure this property. We create a queue with
+// 1 element and then push 1 element (either front or back at random) and pop
+// 1 element (either front or back at random). So queue always contains at least
+// 1 element, but otherwise changes chaotically. Another thread constantly tests
+// that the queue is not claimed to be empty.
+void test_empty_runqueue()
+{
+ RunQueue<int, 4> q;
+ q.PushFront(1);
+ std::atomic<bool> done(false);
+ std::thread mutator([&q, &done]() {
+ unsigned rnd = 0;
+ std::vector<int> stolen;
+ for (int i = 0; i < 1 << 18; i++) {
+ if (rand_reentrant(&rnd) % 2)
+ VERIFY_IS_EQUAL(0, q.PushFront(1));
+ else
+ VERIFY_IS_EQUAL(0, q.PushBack(1));
+ if (rand_reentrant(&rnd) % 2)
+ VERIFY_IS_EQUAL(1, q.PopFront());
+ else {
+ for (;;) {
+ if (q.PopBackHalf(&stolen) == 1) {
+ stolen.clear();
+ break;
+ }
+ VERIFY_IS_EQUAL(0, stolen.size());
+ }
+ }
+ }
+ done = true;
+ });
+ while (!done) {
+ VERIFY(!q.Empty());
+ int size = q.Size();
+ VERIFY_GE(size, 1);
+ VERIFY_LE(size, 2);
+ }
+ VERIFY_IS_EQUAL(1, q.PopFront());
+ mutator.join();
+}
+
+// Stress is a chaotic random test.
+// One thread (owner) calls PushFront/PopFront, other threads call PushBack/
+// PopBack. Ensure that we don't crash, deadlock, and all sanity checks pass.
+void test_stress_runqueue()
+{
+ static const int kEvents = 1 << 18;
+ RunQueue<int, 8> q;
+ std::atomic<int> total(0);
+ std::vector<std::unique_ptr<std::thread>> threads;
+ threads.emplace_back(new std::thread([&q, &total]() {
+ int sum = 0;
+ int pushed = 1;
+ int popped = 1;
+ while (pushed < kEvents || popped < kEvents) {
+ if (pushed < kEvents) {
+ if (q.PushFront(pushed) == 0) {
+ sum += pushed;
+ pushed++;
+ }
+ }
+ if (popped < kEvents) {
+ int v = q.PopFront();
+ if (v != 0) {
+ sum -= v;
+ popped++;
+ }
+ }
+ }
+ total += sum;
+ }));
+ for (int i = 0; i < 2; i++) {
+ threads.emplace_back(new std::thread([&q, &total]() {
+ int sum = 0;
+ for (int j = 1; j < kEvents; j++) {
+ if (q.PushBack(j) == 0) {
+ sum += j;
+ continue;
+ }
+ EIGEN_THREAD_YIELD();
+ j--;
+ }
+ total += sum;
+ }));
+ threads.emplace_back(new std::thread([&q, &total]() {
+ int sum = 0;
+ std::vector<int> stolen;
+ for (int j = 1; j < kEvents;) {
+ if (q.PopBackHalf(&stolen) == 0) {
+ EIGEN_THREAD_YIELD();
+ continue;
+ }
+ while (stolen.size() && j < kEvents) {
+ int v = stolen.back();
+ stolen.pop_back();
+ VERIFY_IS_NOT_EQUAL(v, 0);
+ sum += v;
+ j++;
+ }
+ }
+ while (stolen.size()) {
+ int v = stolen.back();
+ stolen.pop_back();
+ VERIFY_IS_NOT_EQUAL(v, 0);
+ while ((v = q.PushBack(v)) != 0) EIGEN_THREAD_YIELD();
+ }
+ total -= sum;
+ }));
+ }
+ for (size_t i = 0; i < threads.size(); i++) threads[i]->join();
+ VERIFY(q.Empty());
+ VERIFY(total.load() == 0);
+}
+
+void test_cxx11_runqueue()
+{
+ CALL_SUBTEST_1(test_basic_runqueue());
+ CALL_SUBTEST_2(test_empty_runqueue());
+ CALL_SUBTEST_3(test_stress_runqueue());
+}
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu
index 134359611..4026f48f0 100644
--- a/unsupported/test/cxx11_tensor_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cuda.cu
@@ -661,6 +661,9 @@ void test_cuda_digamma()
for (int i = 5; i < 7; ++i) {
VERIFY_IS_EQUAL(out(i), expected_out(i));
}
+
+ cudaFree(d_in);
+ cudaFree(d_out);
}
template <typename Scalar>
@@ -718,13 +721,17 @@ void test_cuda_zeta()
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
VERIFY_IS_EQUAL(out(0), expected_out(0));
- VERIFY_IS_APPROX_OR_LESS_THAN(out(3), expected_out(3));
+ VERIFY((std::isnan)(out(3)));
for (int i = 1; i < 6; ++i) {
if (i != 3) {
VERIFY_IS_APPROX(out(i), expected_out(i));
}
}
+
+ cudaFree(d_in_x);
+ cudaFree(d_in_q);
+ cudaFree(d_out);
}
template <typename Scalar>
@@ -787,6 +794,10 @@ void test_cuda_polygamma()
for (int i = 0; i < 7; ++i) {
VERIFY_IS_APPROX(out(i), expected_out(i));
}
+
+ cudaFree(d_in_x);
+ cudaFree(d_in_n);
+ cudaFree(d_out);
}
template <typename Scalar>
@@ -826,9 +837,9 @@ void test_cuda_igamma()
Scalar* d_a;
Scalar* d_x;
Scalar* d_out;
- cudaMalloc((void**)(&d_a), bytes);
- cudaMalloc((void**)(&d_x), bytes);
- cudaMalloc((void**)(&d_out), bytes);
+ assert(cudaMalloc((void**)(&d_a), bytes) == cudaSuccess);
+ assert(cudaMalloc((void**)(&d_x), bytes) == cudaSuccess);
+ assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess);
cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice);
@@ -854,6 +865,10 @@ void test_cuda_igamma()
}
}
}
+
+ cudaFree(d_a);
+ cudaFree(d_x);
+ cudaFree(d_out);
}
template <typename Scalar>
@@ -920,6 +935,10 @@ void test_cuda_igammac()
}
}
}
+
+ cudaFree(d_a);
+ cudaFree(d_x);
+ cudaFree(d_out);
}
template <typename Scalar>
@@ -935,8 +954,8 @@ void test_cuda_erf(const Scalar stddev)
Scalar* d_in;
Scalar* d_out;
- cudaMalloc((void**)(&d_in), bytes);
- cudaMalloc((void**)(&d_out), bytes);
+ assert(cudaMalloc((void**)(&d_in), bytes) == cudaSuccess);
+ assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess);
cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
diff --git a/unsupported/test/cxx11_tensor_fixed_size.cpp b/unsupported/test/cxx11_tensor_fixed_size.cpp
index 1c33fefb3..5fe164859 100644
--- a/unsupported/test/cxx11_tensor_fixed_size.cpp
+++ b/unsupported/test/cxx11_tensor_fixed_size.cpp
@@ -20,6 +20,8 @@ static void test_0d()
TensorFixedSize<float, Sizes<> > scalar1;
TensorFixedSize<float, Sizes<>, RowMajor> scalar2;
VERIFY_IS_EQUAL(scalar1.rank(), 0);
+ VERIFY_IS_EQUAL(scalar1.size(), 1);
+ VERIFY_IS_EQUAL(array_prod(scalar1.dimensions()), 1);
scalar1() = 7.0;
scalar2() = 13.0;
diff --git a/unsupported/test/cxx11_tensor_math.cpp b/unsupported/test/cxx11_tensor_math.cpp
index d247bebaa..61c742a16 100644
--- a/unsupported/test/cxx11_tensor_math.cpp
+++ b/unsupported/test/cxx11_tensor_math.cpp
@@ -16,7 +16,7 @@ using Eigen::RowMajor;
static void test_tanh()
{
- Tensor<float, 1> vec1({6});
+ Tensor<float, 1> vec1(6);
vec1.setRandom();
Tensor<float, 1> vec2 = vec1.tanh();
@@ -28,7 +28,7 @@ static void test_tanh()
static void test_sigmoid()
{
- Tensor<float, 1> vec1({6});
+ Tensor<float, 1> vec1(6);
vec1.setRandom();
Tensor<float, 1> vec2 = vec1.sigmoid();
diff --git a/unsupported/test/cxx11_tensor_mixed_indices.cpp b/unsupported/test/cxx11_tensor_mixed_indices.cpp
index 72f826216..4fba6fdd1 100644
--- a/unsupported/test/cxx11_tensor_mixed_indices.cpp
+++ b/unsupported/test/cxx11_tensor_mixed_indices.cpp
@@ -14,8 +14,8 @@
static void test_simple()
{
- Tensor<float, 1, ColMajor> vec1({6});
- Tensor<float, 1, ColMajor, int> vec2({6});
+ Tensor<float, 1, ColMajor> vec1(6);
+ Tensor<float, 1, ColMajor, int> vec2(6);
vec1(0) = 4.0; vec2(0) = 0.0;
vec1(1) = 8.0; vec2(1) = 1.0;
diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
index cb917bb37..154a72d5c 100644
--- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu
+++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
@@ -228,6 +228,42 @@ void test_cuda_reductions() {
gpu_device.deallocate(d_res_float);
}
+void test_cuda_forced_evals() {
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+ int num_elem = 101;
+
+ float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+ d_float, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half(
+ d_res_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
+ d_res_float, num_elem);
+
+ gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
+ gpu_res_float.device(gpu_device) = gpu_float.abs();
+ gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>();
+
+ Tensor<float, 1> half_prec(num_elem);
+ Tensor<float, 1> full_prec(num_elem);
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+ gpu_device.synchronize();
+
+ for (int i = 0; i < num_elem; ++i) {
+ std::cout << "Checking unary " << i << std::endl;
+ VERIFY_IS_APPROX(full_prec(i), half_prec(i));
+ }
+
+ gpu_device.deallocate(d_float);
+ gpu_device.deallocate(d_res_half);
+ gpu_device.deallocate(d_res_float);
+}
#endif
@@ -246,6 +282,7 @@ void test_cxx11_tensor_of_float16_cuda()
CALL_SUBTEST_1(test_cuda_elementwise());
CALL_SUBTEST_2(test_cuda_contractions());
CALL_SUBTEST_3(test_cuda_reductions());
+ CALL_SUBTEST_4(test_cuda_forced_evals());
}
else {
std::cout << "Half floats require compute capability of at least 5.3. This device only supports " << device.majorDeviceVersion() << "." << device.minorDeviceVersion() << ". Skipping the test" << std::endl;
diff --git a/unsupported/test/levenberg_marquardt.cpp b/unsupported/test/levenberg_marquardt.cpp
index 6dc17bd17..64f168c16 100644
--- a/unsupported/test/levenberg_marquardt.cpp
+++ b/unsupported/test/levenberg_marquardt.cpp
@@ -792,7 +792,9 @@ void testNistMGH10(void)
MGH10_functor functor;
LevenbergMarquardt<MGH10_functor> lm(functor);
info = lm.minimize(x);
+ ++g_test_level;
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
+ --g_test_level;
// was: VERIFY_IS_EQUAL(info, 1);
// check norm^2