diff options
Diffstat (limited to 'unsupported/test')
-rw-r--r-- | unsupported/test/CMakeLists.txt | 2 | ||||
-rw-r--r-- | unsupported/test/cxx11_eventcount.cpp | 140 | ||||
-rw-r--r-- | unsupported/test/cxx11_float16.cpp | 41 | ||||
-rw-r--r-- | unsupported/test/cxx11_meta.cpp | 2 | ||||
-rw-r--r-- | unsupported/test/cxx11_runqueue.cpp | 227 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_cuda.cu | 31 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_fixed_size.cpp | 2 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_math.cpp | 4 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_mixed_indices.cpp | 4 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_of_float16_cuda.cu | 37 | ||||
-rw-r--r-- | unsupported/test/levenberg_marquardt.cpp | 2 |
11 files changed, 479 insertions, 13 deletions
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index f75bf9798..eed9f079e 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -116,6 +116,8 @@ if(EIGEN_TEST_CXX11) set(CMAKE_CXX_STANDARD 11) ei_add_test(cxx11_float16) + ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}") + ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}") ei_add_test(cxx11_meta) ei_add_test(cxx11_tensor_simple) # ei_add_test(cxx11_tensor_symmetry) diff --git a/unsupported/test/cxx11_eventcount.cpp b/unsupported/test/cxx11_eventcount.cpp new file mode 100644 index 000000000..f16cc6f07 --- /dev/null +++ b/unsupported/test/cxx11_eventcount.cpp @@ -0,0 +1,140 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com> +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_USE_THREADS +#include "main.h" +#include <Eigen/CXX11/ThreadPool> + +// Visual studio doesn't implement a rand_r() function since its +// implementation of rand() is already thread safe +int rand_reentrant(unsigned int* s) { +#ifdef EIGEN_COMP_MSVC_STRICT + EIGEN_UNUSED_VARIABLE(s); + return rand(); +#else + return rand_r(s); +#endif +} + +static void test_basic_eventcount() +{ + std::vector<EventCount::Waiter> waiters(1); + EventCount ec(waiters); + EventCount::Waiter& w = waiters[0]; + ec.Notify(false); + ec.Prewait(&w); + ec.Notify(true); + ec.CommitWait(&w); + ec.Prewait(&w); + ec.CancelWait(&w); +} + +// Fake bounded counter-based queue. +struct TestQueue { + std::atomic<int> val_; + static const int kQueueSize = 10; + + TestQueue() : val_() {} + + ~TestQueue() { VERIFY_IS_EQUAL(val_.load(), 0); } + + bool Push() { + int val = val_.load(std::memory_order_relaxed); + for (;;) { + VERIFY_GE(val, 0); + VERIFY_LE(val, kQueueSize); + if (val == kQueueSize) return false; + if (val_.compare_exchange_weak(val, val + 1, std::memory_order_relaxed)) + return true; + } + } + + bool Pop() { + int val = val_.load(std::memory_order_relaxed); + for (;;) { + VERIFY_GE(val, 0); + VERIFY_LE(val, kQueueSize); + if (val == 0) return false; + if (val_.compare_exchange_weak(val, val - 1, std::memory_order_relaxed)) + return true; + } + } + + bool Empty() { return val_.load(std::memory_order_relaxed) == 0; } +}; + +const int TestQueue::kQueueSize; + +// A number of producers send messages to a set of consumers using a set of +// fake queues. Ensure that it does not crash, consumers don't deadlock and +// number of blocked and unblocked threads match. +static void test_stress_eventcount() +{ + const int kThreads = std::thread::hardware_concurrency(); + static const int kEvents = 1 << 16; + static const int kQueues = 10; + + std::vector<EventCount::Waiter> waiters(kThreads); + EventCount ec(waiters); + TestQueue queues[kQueues]; + + std::vector<std::unique_ptr<std::thread>> producers; + for (int i = 0; i < kThreads; i++) { + producers.emplace_back(new std::thread([&ec, &queues]() { + unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id())); + for (int j = 0; j < kEvents; j++) { + unsigned idx = rand_reentrant(&rnd) % kQueues; + if (queues[idx].Push()) { + ec.Notify(false); + continue; + } + EIGEN_THREAD_YIELD(); + j--; + } + })); + } + + std::vector<std::unique_ptr<std::thread>> consumers; + for (int i = 0; i < kThreads; i++) { + consumers.emplace_back(new std::thread([&ec, &queues, &waiters, i]() { + EventCount::Waiter& w = waiters[i]; + unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id())); + for (int j = 0; j < kEvents; j++) { + unsigned idx = rand_reentrant(&rnd) % kQueues; + if (queues[idx].Pop()) continue; + j--; + ec.Prewait(&w); + bool empty = true; + for (int q = 0; q < kQueues; q++) { + if (!queues[q].Empty()) { + empty = false; + break; + } + } + if (!empty) { + ec.CancelWait(&w); + continue; + } + ec.CommitWait(&w); + } + })); + } + + for (int i = 0; i < kThreads; i++) { + producers[i]->join(); + consumers[i]->join(); + } +} + +void test_cxx11_eventcount() +{ + CALL_SUBTEST(test_basic_eventcount()); + CALL_SUBTEST(test_stress_eventcount()); +} diff --git a/unsupported/test/cxx11_float16.cpp b/unsupported/test/cxx11_float16.cpp index 2dc0872d8..273dcbc11 100644 --- a/unsupported/test/cxx11_float16.cpp +++ b/unsupported/test/cxx11_float16.cpp @@ -122,6 +122,8 @@ void test_comparison() VERIFY(half(1.0f) != half(2.0f)); // Comparisons with NaNs and infinities. +#if !EIGEN_COMP_MSVC + // Visual Studio errors out on divisions by 0 VERIFY(!(half(0.0 / 0.0) == half(0.0 / 0.0))); VERIFY(half(0.0 / 0.0) != half(0.0 / 0.0)); @@ -132,13 +134,26 @@ void test_comparison() VERIFY(half(1.0) < half(1.0 / 0.0)); VERIFY(half(1.0) > half(-1.0 / 0.0)); +#endif } -void test_functions() +void test_basic_functions() { VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f); VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f); + VERIFY_IS_EQUAL(float(numext::floor(half(3.5f))), 3.0f); + VERIFY_IS_EQUAL(float(numext::floor(half(-3.5f))), -4.0f); + + VERIFY_IS_EQUAL(float(numext::ceil(half(3.5f))), 4.0f); + VERIFY_IS_EQUAL(float(numext::ceil(half(-3.5f))), -3.0f); + + VERIFY_IS_APPROX(float(numext::sqrt(half(0.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::sqrt(half(4.0f))), 2.0f); + + VERIFY_IS_APPROX(float(numext::pow(half(0.0f), half(1.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::pow(half(2.0f), half(2.0f))), 4.0f); + VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f); VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), float(20.0 + EIGEN_PI)); @@ -146,10 +161,32 @@ void test_functions() VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f); } +void test_trigonometric_functions() +{ + VERIFY_IS_APPROX(numext::cos(half(0.0f)), half(cosf(0.0f))); + VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI)), half(cosf(EIGEN_PI))); + //VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI/2)), half(cosf(EIGEN_PI/2))); + //VERIFY_IS_APPROX(numext::cos(half(3*EIGEN_PI/2)), half(cosf(3*EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::cos(half(3.5f)), half(cosf(3.5f))); + + VERIFY_IS_APPROX(numext::sin(half(0.0f)), half(sinf(0.0f))); + // VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI)), half(sinf(EIGEN_PI))); + VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI/2)), half(sinf(EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::sin(half(3*EIGEN_PI/2)), half(sinf(3*EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::sin(half(3.5f)), half(sinf(3.5f))); + + VERIFY_IS_APPROX(numext::tan(half(0.0f)), half(tanf(0.0f))); + // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI)), half(tanf(EIGEN_PI))); + // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI/2)), half(tanf(EIGEN_PI/2))); + //VERIFY_IS_APPROX(numext::tan(half(3*EIGEN_PI/2)), half(tanf(3*EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::tan(half(3.5f)), half(tanf(3.5f))); +} + void test_cxx11_float16() { CALL_SUBTEST(test_conversion()); CALL_SUBTEST(test_arithmetic()); CALL_SUBTEST(test_comparison()); - CALL_SUBTEST(test_functions()); + CALL_SUBTEST(test_basic_functions()); + CALL_SUBTEST(test_trigonometric_functions()); } diff --git a/unsupported/test/cxx11_meta.cpp b/unsupported/test/cxx11_meta.cpp index ecac3add1..8911c59d8 100644 --- a/unsupported/test/cxx11_meta.cpp +++ b/unsupported/test/cxx11_meta.cpp @@ -10,7 +10,7 @@ #include "main.h" #include <array> -#include <Eigen/CXX11/Core> +#include <Eigen/CXX11/src/util/CXX11Meta.h> using Eigen::internal::is_same; using Eigen::internal::type_list; diff --git a/unsupported/test/cxx11_runqueue.cpp b/unsupported/test/cxx11_runqueue.cpp new file mode 100644 index 000000000..d1770ee1b --- /dev/null +++ b/unsupported/test/cxx11_runqueue.cpp @@ -0,0 +1,227 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com> +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_USE_THREADS +#include <cstdlib> +#include "main.h" +#include <Eigen/CXX11/ThreadPool> + + +// Visual studio doesn't implement a rand_r() function since its +// implementation of rand() is already thread safe +int rand_reentrant(unsigned int* s) { +#ifdef EIGEN_COMP_MSVC_STRICT + EIGEN_UNUSED_VARIABLE(s); + return rand(); +#else + return rand_r(s); +#endif +} + +void test_basic_runqueue() +{ + RunQueue<int, 4> q; + // Check empty state. + VERIFY(q.Empty()); + VERIFY_IS_EQUAL(0u, q.Size()); + VERIFY_IS_EQUAL(0, q.PopFront()); + std::vector<int> stolen; + VERIFY_IS_EQUAL(0, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(0u, stolen.size()); + // Push one front, pop one front. + VERIFY_IS_EQUAL(0, q.PushFront(1)); + VERIFY_IS_EQUAL(1, q.Size()); + VERIFY_IS_EQUAL(1, q.PopFront()); + VERIFY_IS_EQUAL(0, q.Size()); + // Push front to overflow. + VERIFY_IS_EQUAL(0, q.PushFront(2)); + VERIFY_IS_EQUAL(1, q.Size()); + VERIFY_IS_EQUAL(0, q.PushFront(3)); + VERIFY_IS_EQUAL(2, q.Size()); + VERIFY_IS_EQUAL(0, q.PushFront(4)); + VERIFY_IS_EQUAL(3, q.Size()); + VERIFY_IS_EQUAL(0, q.PushFront(5)); + VERIFY_IS_EQUAL(4, q.Size()); + VERIFY_IS_EQUAL(6, q.PushFront(6)); + VERIFY_IS_EQUAL(4, q.Size()); + VERIFY_IS_EQUAL(5, q.PopFront()); + VERIFY_IS_EQUAL(3, q.Size()); + VERIFY_IS_EQUAL(4, q.PopFront()); + VERIFY_IS_EQUAL(2, q.Size()); + VERIFY_IS_EQUAL(3, q.PopFront()); + VERIFY_IS_EQUAL(1, q.Size()); + VERIFY_IS_EQUAL(2, q.PopFront()); + VERIFY_IS_EQUAL(0, q.Size()); + VERIFY_IS_EQUAL(0, q.PopFront()); + // Push one back, pop one back. + VERIFY_IS_EQUAL(0, q.PushBack(7)); + VERIFY_IS_EQUAL(1, q.Size()); + VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(1, stolen.size()); + VERIFY_IS_EQUAL(7, stolen[0]); + VERIFY_IS_EQUAL(0, q.Size()); + stolen.clear(); + // Push back to overflow. + VERIFY_IS_EQUAL(0, q.PushBack(8)); + VERIFY_IS_EQUAL(1, q.Size()); + VERIFY_IS_EQUAL(0, q.PushBack(9)); + VERIFY_IS_EQUAL(2, q.Size()); + VERIFY_IS_EQUAL(0, q.PushBack(10)); + VERIFY_IS_EQUAL(3, q.Size()); + VERIFY_IS_EQUAL(0, q.PushBack(11)); + VERIFY_IS_EQUAL(4, q.Size()); + VERIFY_IS_EQUAL(12, q.PushBack(12)); + VERIFY_IS_EQUAL(4, q.Size()); + // Pop back in halves. + VERIFY_IS_EQUAL(2, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(2, stolen.size()); + VERIFY_IS_EQUAL(10, stolen[0]); + VERIFY_IS_EQUAL(11, stolen[1]); + VERIFY_IS_EQUAL(2, q.Size()); + stolen.clear(); + VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(1, stolen.size()); + VERIFY_IS_EQUAL(9, stolen[0]); + VERIFY_IS_EQUAL(1, q.Size()); + stolen.clear(); + VERIFY_IS_EQUAL(1, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(1, stolen.size()); + VERIFY_IS_EQUAL(8, stolen[0]); + stolen.clear(); + VERIFY_IS_EQUAL(0, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(0, stolen.size()); + // Empty again. + VERIFY(q.Empty()); + VERIFY_IS_EQUAL(0, q.Size()); +} + +// Empty tests that the queue is not claimed to be empty when is is in fact not. +// Emptiness property is crucial part of thread pool blocking scheme, +// so we go to great effort to ensure this property. We create a queue with +// 1 element and then push 1 element (either front or back at random) and pop +// 1 element (either front or back at random). So queue always contains at least +// 1 element, but otherwise changes chaotically. Another thread constantly tests +// that the queue is not claimed to be empty. +void test_empty_runqueue() +{ + RunQueue<int, 4> q; + q.PushFront(1); + std::atomic<bool> done(false); + std::thread mutator([&q, &done]() { + unsigned rnd = 0; + std::vector<int> stolen; + for (int i = 0; i < 1 << 18; i++) { + if (rand_reentrant(&rnd) % 2) + VERIFY_IS_EQUAL(0, q.PushFront(1)); + else + VERIFY_IS_EQUAL(0, q.PushBack(1)); + if (rand_reentrant(&rnd) % 2) + VERIFY_IS_EQUAL(1, q.PopFront()); + else { + for (;;) { + if (q.PopBackHalf(&stolen) == 1) { + stolen.clear(); + break; + } + VERIFY_IS_EQUAL(0, stolen.size()); + } + } + } + done = true; + }); + while (!done) { + VERIFY(!q.Empty()); + int size = q.Size(); + VERIFY_GE(size, 1); + VERIFY_LE(size, 2); + } + VERIFY_IS_EQUAL(1, q.PopFront()); + mutator.join(); +} + +// Stress is a chaotic random test. +// One thread (owner) calls PushFront/PopFront, other threads call PushBack/ +// PopBack. Ensure that we don't crash, deadlock, and all sanity checks pass. +void test_stress_runqueue() +{ + static const int kEvents = 1 << 18; + RunQueue<int, 8> q; + std::atomic<int> total(0); + std::vector<std::unique_ptr<std::thread>> threads; + threads.emplace_back(new std::thread([&q, &total]() { + int sum = 0; + int pushed = 1; + int popped = 1; + while (pushed < kEvents || popped < kEvents) { + if (pushed < kEvents) { + if (q.PushFront(pushed) == 0) { + sum += pushed; + pushed++; + } + } + if (popped < kEvents) { + int v = q.PopFront(); + if (v != 0) { + sum -= v; + popped++; + } + } + } + total += sum; + })); + for (int i = 0; i < 2; i++) { + threads.emplace_back(new std::thread([&q, &total]() { + int sum = 0; + for (int j = 1; j < kEvents; j++) { + if (q.PushBack(j) == 0) { + sum += j; + continue; + } + EIGEN_THREAD_YIELD(); + j--; + } + total += sum; + })); + threads.emplace_back(new std::thread([&q, &total]() { + int sum = 0; + std::vector<int> stolen; + for (int j = 1; j < kEvents;) { + if (q.PopBackHalf(&stolen) == 0) { + EIGEN_THREAD_YIELD(); + continue; + } + while (stolen.size() && j < kEvents) { + int v = stolen.back(); + stolen.pop_back(); + VERIFY_IS_NOT_EQUAL(v, 0); + sum += v; + j++; + } + } + while (stolen.size()) { + int v = stolen.back(); + stolen.pop_back(); + VERIFY_IS_NOT_EQUAL(v, 0); + while ((v = q.PushBack(v)) != 0) EIGEN_THREAD_YIELD(); + } + total -= sum; + })); + } + for (size_t i = 0; i < threads.size(); i++) threads[i]->join(); + VERIFY(q.Empty()); + VERIFY(total.load() == 0); +} + +void test_cxx11_runqueue() +{ + CALL_SUBTEST_1(test_basic_runqueue()); + CALL_SUBTEST_2(test_empty_runqueue()); + CALL_SUBTEST_3(test_stress_runqueue()); +} diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 134359611..4026f48f0 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -661,6 +661,9 @@ void test_cuda_digamma() for (int i = 5; i < 7; ++i) { VERIFY_IS_EQUAL(out(i), expected_out(i)); } + + cudaFree(d_in); + cudaFree(d_out); } template <typename Scalar> @@ -718,13 +721,17 @@ void test_cuda_zeta() assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); VERIFY_IS_EQUAL(out(0), expected_out(0)); - VERIFY_IS_APPROX_OR_LESS_THAN(out(3), expected_out(3)); + VERIFY((std::isnan)(out(3))); for (int i = 1; i < 6; ++i) { if (i != 3) { VERIFY_IS_APPROX(out(i), expected_out(i)); } } + + cudaFree(d_in_x); + cudaFree(d_in_q); + cudaFree(d_out); } template <typename Scalar> @@ -787,6 +794,10 @@ void test_cuda_polygamma() for (int i = 0; i < 7; ++i) { VERIFY_IS_APPROX(out(i), expected_out(i)); } + + cudaFree(d_in_x); + cudaFree(d_in_n); + cudaFree(d_out); } template <typename Scalar> @@ -826,9 +837,9 @@ void test_cuda_igamma() Scalar* d_a; Scalar* d_x; Scalar* d_out; - cudaMalloc((void**)(&d_a), bytes); - cudaMalloc((void**)(&d_x), bytes); - cudaMalloc((void**)(&d_out), bytes); + assert(cudaMalloc((void**)(&d_a), bytes) == cudaSuccess); + assert(cudaMalloc((void**)(&d_x), bytes) == cudaSuccess); + assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess); cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice); cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice); @@ -854,6 +865,10 @@ void test_cuda_igamma() } } } + + cudaFree(d_a); + cudaFree(d_x); + cudaFree(d_out); } template <typename Scalar> @@ -920,6 +935,10 @@ void test_cuda_igammac() } } } + + cudaFree(d_a); + cudaFree(d_x); + cudaFree(d_out); } template <typename Scalar> @@ -935,8 +954,8 @@ void test_cuda_erf(const Scalar stddev) Scalar* d_in; Scalar* d_out; - cudaMalloc((void**)(&d_in), bytes); - cudaMalloc((void**)(&d_out), bytes); + assert(cudaMalloc((void**)(&d_in), bytes) == cudaSuccess); + assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess); cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); diff --git a/unsupported/test/cxx11_tensor_fixed_size.cpp b/unsupported/test/cxx11_tensor_fixed_size.cpp index 1c33fefb3..5fe164859 100644 --- a/unsupported/test/cxx11_tensor_fixed_size.cpp +++ b/unsupported/test/cxx11_tensor_fixed_size.cpp @@ -20,6 +20,8 @@ static void test_0d() TensorFixedSize<float, Sizes<> > scalar1; TensorFixedSize<float, Sizes<>, RowMajor> scalar2; VERIFY_IS_EQUAL(scalar1.rank(), 0); + VERIFY_IS_EQUAL(scalar1.size(), 1); + VERIFY_IS_EQUAL(array_prod(scalar1.dimensions()), 1); scalar1() = 7.0; scalar2() = 13.0; diff --git a/unsupported/test/cxx11_tensor_math.cpp b/unsupported/test/cxx11_tensor_math.cpp index d247bebaa..61c742a16 100644 --- a/unsupported/test/cxx11_tensor_math.cpp +++ b/unsupported/test/cxx11_tensor_math.cpp @@ -16,7 +16,7 @@ using Eigen::RowMajor; static void test_tanh() { - Tensor<float, 1> vec1({6}); + Tensor<float, 1> vec1(6); vec1.setRandom(); Tensor<float, 1> vec2 = vec1.tanh(); @@ -28,7 +28,7 @@ static void test_tanh() static void test_sigmoid() { - Tensor<float, 1> vec1({6}); + Tensor<float, 1> vec1(6); vec1.setRandom(); Tensor<float, 1> vec2 = vec1.sigmoid(); diff --git a/unsupported/test/cxx11_tensor_mixed_indices.cpp b/unsupported/test/cxx11_tensor_mixed_indices.cpp index 72f826216..4fba6fdd1 100644 --- a/unsupported/test/cxx11_tensor_mixed_indices.cpp +++ b/unsupported/test/cxx11_tensor_mixed_indices.cpp @@ -14,8 +14,8 @@ static void test_simple() { - Tensor<float, 1, ColMajor> vec1({6}); - Tensor<float, 1, ColMajor, int> vec2({6}); + Tensor<float, 1, ColMajor> vec1(6); + Tensor<float, 1, ColMajor, int> vec2(6); vec1(0) = 4.0; vec2(0) = 0.0; vec1(1) = 8.0; vec2(1) = 1.0; diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index cb917bb37..154a72d5c 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -228,6 +228,42 @@ void test_cuda_reductions() { gpu_device.deallocate(d_res_float); } +void test_cuda_forced_evals() { + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = 101; + + float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float( + d_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half( + d_res_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float( + d_res_float, num_elem); + + gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f); + gpu_res_float.device(gpu_device) = gpu_float.abs(); + gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>(); + + Tensor<float, 1> half_prec(num_elem); + Tensor<float, 1> full_prec(num_elem); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float)); + gpu_device.synchronize(); + + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking unary " << i << std::endl; + VERIFY_IS_APPROX(full_prec(i), half_prec(i)); + } + + gpu_device.deallocate(d_float); + gpu_device.deallocate(d_res_half); + gpu_device.deallocate(d_res_float); +} #endif @@ -246,6 +282,7 @@ void test_cxx11_tensor_of_float16_cuda() CALL_SUBTEST_1(test_cuda_elementwise()); CALL_SUBTEST_2(test_cuda_contractions()); CALL_SUBTEST_3(test_cuda_reductions()); + CALL_SUBTEST_4(test_cuda_forced_evals()); } else { std::cout << "Half floats require compute capability of at least 5.3. This device only supports " << device.majorDeviceVersion() << "." << device.minorDeviceVersion() << ". Skipping the test" << std::endl; diff --git a/unsupported/test/levenberg_marquardt.cpp b/unsupported/test/levenberg_marquardt.cpp index 6dc17bd17..64f168c16 100644 --- a/unsupported/test/levenberg_marquardt.cpp +++ b/unsupported/test/levenberg_marquardt.cpp @@ -792,7 +792,9 @@ void testNistMGH10(void) MGH10_functor functor; LevenbergMarquardt<MGH10_functor> lm(functor); info = lm.minimize(x); + ++g_test_level; VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall); + --g_test_level; // was: VERIFY_IS_EQUAL(info, 1); // check norm^2 |