diff options
Diffstat (limited to 'unsupported/test')
29 files changed, 921 insertions, 216 deletions
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 96652bfcf..22442b394 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -110,34 +110,48 @@ ei_add_test(minres) ei_add_test(levenberg_marquardt) ei_add_test(kronecker_product) -ei_add_test(float16) +# TODO: The following test names are prefixed with the cxx11 string, since historically +# the tests depended on c++11. This isn't the case anymore so we ought to rename them. +ei_add_test(cxx11_float16) +ei_add_test(cxx11_tensor_dimension) +ei_add_test(cxx11_tensor_map) +ei_add_test(cxx11_tensor_assign) +ei_add_test(cxx11_tensor_comparisons) +ei_add_test(cxx11_tensor_forced_eval) +ei_add_test(cxx11_tensor_math) +ei_add_test(cxx11_tensor_const) +ei_add_test(cxx11_tensor_intdiv) +ei_add_test(cxx11_tensor_casts) +ei_add_test(cxx11_tensor_empty) +ei_add_test(cxx11_tensor_sugar) +ei_add_test(cxx11_tensor_roundings) +ei_add_test(cxx11_tensor_layout_swap) +ei_add_test(cxx11_tensor_io) +if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + # This test requires __uint128_t which is only available on 64bit systems + ei_add_test(cxx11_tensor_uint128) +endif() if(EIGEN_TEST_CXX11) # It should be safe to always run these tests as there is some fallback code for # older compiler that don't support cxx11. set(CMAKE_CXX_STANDARD 11) + ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}") + ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}") ei_add_test(cxx11_meta) ei_add_test(cxx11_tensor_simple) # ei_add_test(cxx11_tensor_symmetry) - ei_add_test(cxx11_tensor_assign) - ei_add_test(cxx11_tensor_dimension) ei_add_test(cxx11_tensor_index_list) ei_add_test(cxx11_tensor_mixed_indices) - ei_add_test(cxx11_tensor_comparisons) ei_add_test(cxx11_tensor_contraction) ei_add_test(cxx11_tensor_convolution) ei_add_test(cxx11_tensor_expr) - ei_add_test(cxx11_tensor_math) - ei_add_test(cxx11_tensor_forced_eval) ei_add_test(cxx11_tensor_fixed_size) - ei_add_test(cxx11_tensor_const) ei_add_test(cxx11_tensor_of_const_values) ei_add_test(cxx11_tensor_of_complex) ei_add_test(cxx11_tensor_of_strings) - ei_add_test(cxx11_tensor_intdiv) ei_add_test(cxx11_tensor_lvalue) - ei_add_test(cxx11_tensor_map) ei_add_test(cxx11_tensor_broadcasting) ei_add_test(cxx11_tensor_chipping) ei_add_test(cxx11_tensor_concatenation) @@ -155,23 +169,11 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}") ei_add_test(cxx11_tensor_ref) ei_add_test(cxx11_tensor_random) - ei_add_test(cxx11_tensor_casts) - ei_add_test(cxx11_tensor_roundings) - ei_add_test(cxx11_tensor_reverse) - ei_add_test(cxx11_tensor_layout_swap) - ei_add_test(cxx11_tensor_io) ei_add_test(cxx11_tensor_generator) ei_add_test(cxx11_tensor_custom_op) ei_add_test(cxx11_tensor_custom_index) - ei_add_test(cxx11_tensor_sugar) ei_add_test(cxx11_tensor_fft) ei_add_test(cxx11_tensor_ifft) - ei_add_test(cxx11_tensor_empty) - - if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") - # This test requires __uint128_t which is only available on 64bit systems - ei_add_test(cxx11_tensor_uint128) - endif() endif() @@ -191,6 +193,10 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE) endif() + if(EIGEN_TEST_CUDA_CLANG) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_${EIGEN_CUDA_COMPUTE_ARCH}") + endif() + set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\"") cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include") set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") @@ -207,10 +213,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) ei_add_test(cxx11_tensor_random_cuda) endif() - # Operations other that casting of half floats are only supported starting with arch 5.3 - if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 52) - ei_add_test(cxx11_tensor_of_float16_cuda) - endif() + ei_add_test(cxx11_tensor_of_float16_cuda) unset(EIGEN_ADD_TEST_FILENAME_EXTENSION) endif() diff --git a/unsupported/test/FFTW.cpp b/unsupported/test/FFTW.cpp index d3718e2d2..1dd6dc97d 100644 --- a/unsupported/test/FFTW.cpp +++ b/unsupported/test/FFTW.cpp @@ -54,7 +54,7 @@ complex<long double> promote(long double x) { return complex<long double>( x); long double difpower=0; size_t n = (min)( buf1.size(),buf2.size() ); for (size_t k=0;k<n;++k) { - totalpower += (numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2.; + totalpower += (numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2; difpower += numext::abs2(buf1[k] - buf2[k]); } return sqrt(difpower/totalpower); diff --git a/unsupported/test/NonLinearOptimization.cpp b/unsupported/test/NonLinearOptimization.cpp index 724ea7b5b..6a5ed057f 100644 --- a/unsupported/test/NonLinearOptimization.cpp +++ b/unsupported/test/NonLinearOptimization.cpp @@ -14,6 +14,9 @@ using std::sqrt; +// tolerance for chekcing number of iterations +#define LM_EVAL_COUNT_TOL 4/3 + int fcn_chkder(const VectorXd &x, VectorXd &fvec, MatrixXd &fjac, int iflag) { /* subroutine fcn for chkder example. */ @@ -1023,7 +1026,8 @@ void testNistLanczos1(void) VERIFY_IS_EQUAL(lm.njev, 72); // check norm^2 std::cout.precision(30); - VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4290986055242372e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats + std::cout << lm.fvec.squaredNorm() << "\n"; + VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25); // check x VERIFY_IS_APPROX(x[0], 9.5100000027E-02); VERIFY_IS_APPROX(x[1], 1.0000000001E+00); @@ -1044,7 +1048,7 @@ void testNistLanczos1(void) VERIFY_IS_EQUAL(lm.nfev, 9); VERIFY_IS_EQUAL(lm.njev, 8); // check norm^2 - VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.430571737783119393e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats + VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25); // check x VERIFY_IS_APPROX(x[0], 9.5100000027E-02); VERIFY_IS_APPROX(x[1], 1.0000000001E+00); @@ -1354,8 +1358,12 @@ void testNistMGH17(void) // check return value VERIFY_IS_EQUAL(info, 2); - VERIFY(lm.nfev < 650); // 602 - VERIFY(lm.njev < 600); // 545 + ++g_test_level; + VERIFY_IS_EQUAL(lm.nfev, 602); // 602 + VERIFY_IS_EQUAL(lm.njev, 545); // 545 + --g_test_level; + VERIFY(lm.nfev < 602 * LM_EVAL_COUNT_TOL); + VERIFY(lm.njev < 545 * LM_EVAL_COUNT_TOL); /* * Second try diff --git a/unsupported/test/autodiff.cpp b/unsupported/test/autodiff.cpp index 374f86df9..c4606cd17 100644 --- a/unsupported/test/autodiff.cpp +++ b/unsupported/test/autodiff.cpp @@ -16,7 +16,8 @@ EIGEN_DONT_INLINE Scalar foo(const Scalar& x, const Scalar& y) using namespace std; // return x+std::sin(y); EIGEN_ASM_COMMENT("mybegin"); - return static_cast<Scalar>(x*2 - 1 + pow(1+x,2) + 2*sqrt(y*y+0) - 4 * sin(0+x) + 2 * cos(y+0) - exp(-0.5*x*x+0)); + // pow(float, int) promotes to pow(double, double) + return x*2 - 1 + static_cast<Scalar>(pow(1+x,2)) + 2*sqrt(y*y+0) - 4 * sin(0+x) + 2 * cos(y+0) - exp(Scalar(-0.5)*x*x+0); //return x+2*y*x;//x*2 -std::pow(x,2);//(2*y/x);// - y*2; EIGEN_ASM_COMMENT("myend"); } diff --git a/unsupported/test/cxx11_eventcount.cpp b/unsupported/test/cxx11_eventcount.cpp new file mode 100644 index 000000000..f16cc6f07 --- /dev/null +++ b/unsupported/test/cxx11_eventcount.cpp @@ -0,0 +1,140 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com> +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_USE_THREADS +#include "main.h" +#include <Eigen/CXX11/ThreadPool> + +// Visual studio doesn't implement a rand_r() function since its +// implementation of rand() is already thread safe +int rand_reentrant(unsigned int* s) { +#ifdef EIGEN_COMP_MSVC_STRICT + EIGEN_UNUSED_VARIABLE(s); + return rand(); +#else + return rand_r(s); +#endif +} + +static void test_basic_eventcount() +{ + std::vector<EventCount::Waiter> waiters(1); + EventCount ec(waiters); + EventCount::Waiter& w = waiters[0]; + ec.Notify(false); + ec.Prewait(&w); + ec.Notify(true); + ec.CommitWait(&w); + ec.Prewait(&w); + ec.CancelWait(&w); +} + +// Fake bounded counter-based queue. +struct TestQueue { + std::atomic<int> val_; + static const int kQueueSize = 10; + + TestQueue() : val_() {} + + ~TestQueue() { VERIFY_IS_EQUAL(val_.load(), 0); } + + bool Push() { + int val = val_.load(std::memory_order_relaxed); + for (;;) { + VERIFY_GE(val, 0); + VERIFY_LE(val, kQueueSize); + if (val == kQueueSize) return false; + if (val_.compare_exchange_weak(val, val + 1, std::memory_order_relaxed)) + return true; + } + } + + bool Pop() { + int val = val_.load(std::memory_order_relaxed); + for (;;) { + VERIFY_GE(val, 0); + VERIFY_LE(val, kQueueSize); + if (val == 0) return false; + if (val_.compare_exchange_weak(val, val - 1, std::memory_order_relaxed)) + return true; + } + } + + bool Empty() { return val_.load(std::memory_order_relaxed) == 0; } +}; + +const int TestQueue::kQueueSize; + +// A number of producers send messages to a set of consumers using a set of +// fake queues. Ensure that it does not crash, consumers don't deadlock and +// number of blocked and unblocked threads match. +static void test_stress_eventcount() +{ + const int kThreads = std::thread::hardware_concurrency(); + static const int kEvents = 1 << 16; + static const int kQueues = 10; + + std::vector<EventCount::Waiter> waiters(kThreads); + EventCount ec(waiters); + TestQueue queues[kQueues]; + + std::vector<std::unique_ptr<std::thread>> producers; + for (int i = 0; i < kThreads; i++) { + producers.emplace_back(new std::thread([&ec, &queues]() { + unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id())); + for (int j = 0; j < kEvents; j++) { + unsigned idx = rand_reentrant(&rnd) % kQueues; + if (queues[idx].Push()) { + ec.Notify(false); + continue; + } + EIGEN_THREAD_YIELD(); + j--; + } + })); + } + + std::vector<std::unique_ptr<std::thread>> consumers; + for (int i = 0; i < kThreads; i++) { + consumers.emplace_back(new std::thread([&ec, &queues, &waiters, i]() { + EventCount::Waiter& w = waiters[i]; + unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id())); + for (int j = 0; j < kEvents; j++) { + unsigned idx = rand_reentrant(&rnd) % kQueues; + if (queues[idx].Pop()) continue; + j--; + ec.Prewait(&w); + bool empty = true; + for (int q = 0; q < kQueues; q++) { + if (!queues[q].Empty()) { + empty = false; + break; + } + } + if (!empty) { + ec.CancelWait(&w); + continue; + } + ec.CommitWait(&w); + } + })); + } + + for (int i = 0; i < kThreads; i++) { + producers[i]->join(); + consumers[i]->join(); + } +} + +void test_cxx11_eventcount() +{ + CALL_SUBTEST(test_basic_eventcount()); + CALL_SUBTEST(test_stress_eventcount()); +} diff --git a/unsupported/test/float16.cpp b/unsupported/test/cxx11_float16.cpp index 13f3ddaca..9141c4820 100644 --- a/unsupported/test/float16.cpp +++ b/unsupported/test/cxx11_float16.cpp @@ -7,7 +7,7 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC float16 +#define EIGEN_TEST_FUNC cxx11_float16 #include "main.h" #include <Eigen/src/Core/arch/CUDA/Half.h> @@ -31,11 +31,11 @@ void test_conversion() VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002); // Verify round-to-nearest-even behavior. - float val1 = float(half(__half{0x3c00})); - float val2 = float(half(__half{0x3c01})); - float val3 = float(half(__half{0x3c02})); - VERIFY_IS_EQUAL(half(0.5 * (val1 + val2)).x, 0x3c00); - VERIFY_IS_EQUAL(half(0.5 * (val2 + val3)).x, 0x3c02); + float val1 = float(half(__half(0x3c00))); + float val2 = float(half(__half(0x3c01))); + float val3 = float(half(__half(0x3c02))); + VERIFY_IS_EQUAL(half(0.5f * (val1 + val2)).x, 0x3c00); + VERIFY_IS_EQUAL(half(0.5f * (val2 + val3)).x, 0x3c02); // Conversion from int. VERIFY_IS_EQUAL(half(-1).x, 0xbc00); @@ -49,35 +49,43 @@ void test_conversion() VERIFY_IS_EQUAL(half(true).x, 0x3c00); // Conversion to float. - VERIFY_IS_EQUAL(float(half(__half{0x0000})), 0.0f); - VERIFY_IS_EQUAL(float(half(__half{0x3c00})), 1.0f); + VERIFY_IS_EQUAL(float(half(__half(0x0000))), 0.0f); + VERIFY_IS_EQUAL(float(half(__half(0x3c00))), 1.0f); // Denormals. - VERIFY_IS_APPROX(float(half(__half{0x8001})), -5.96046e-08f); - VERIFY_IS_APPROX(float(half(__half{0x0001})), 5.96046e-08f); - VERIFY_IS_APPROX(float(half(__half{0x0002})), 1.19209e-07f); + VERIFY_IS_APPROX(float(half(__half(0x8001))), -5.96046e-08f); + VERIFY_IS_APPROX(float(half(__half(0x0001))), 5.96046e-08f); + VERIFY_IS_APPROX(float(half(__half(0x0002))), 1.19209e-07f); // NaNs and infinities. VERIFY(!(numext::isinf)(float(half(65504.0f)))); // Largest finite number. VERIFY(!(numext::isnan)(float(half(0.0f)))); - VERIFY((numext::isinf)(float(half(__half{0xfc00})))); - VERIFY((numext::isnan)(float(half(__half{0xfc01})))); - VERIFY((numext::isinf)(float(half(__half{0x7c00})))); - VERIFY((numext::isnan)(float(half(__half{0x7c01})))); + VERIFY((numext::isinf)(float(half(__half(0xfc00))))); + VERIFY((numext::isnan)(float(half(__half(0xfc01))))); + VERIFY((numext::isinf)(float(half(__half(0x7c00))))); + VERIFY((numext::isnan)(float(half(__half(0x7c01))))); + +#if !EIGEN_COMP_MSVC + // Visual Studio errors out on divisions by 0 VERIFY((numext::isnan)(float(half(0.0 / 0.0)))); VERIFY((numext::isinf)(float(half(1.0 / 0.0)))); VERIFY((numext::isinf)(float(half(-1.0 / 0.0)))); +#endif // Exactly same checks as above, just directly on the half representation. - VERIFY(!(numext::isinf)(half(__half{0x7bff}))); - VERIFY(!(numext::isnan)(half(__half{0x0000}))); - VERIFY((numext::isinf)(half(__half{0xfc00}))); - VERIFY((numext::isnan)(half(__half{0xfc01}))); - VERIFY((numext::isinf)(half(__half{0x7c00}))); - VERIFY((numext::isnan)(half(__half{0x7c01}))); + VERIFY(!(numext::isinf)(half(__half(0x7bff)))); + VERIFY(!(numext::isnan)(half(__half(0x0000)))); + VERIFY((numext::isinf)(half(__half(0xfc00)))); + VERIFY((numext::isnan)(half(__half(0xfc01)))); + VERIFY((numext::isinf)(half(__half(0x7c00)))); + VERIFY((numext::isnan)(half(__half(0x7c01)))); + +#if !EIGEN_COMP_MSVC + // Visual Studio errors out on divisions by 0 VERIFY((numext::isnan)(half(0.0 / 0.0))); VERIFY((numext::isinf)(half(1.0 / 0.0))); VERIFY((numext::isinf)(half(-1.0 / 0.0))); +#endif } void test_arithmetic() @@ -114,6 +122,8 @@ void test_comparison() VERIFY(half(1.0f) != half(2.0f)); // Comparisons with NaNs and infinities. +#if !EIGEN_COMP_MSVC + // Visual Studio errors out on divisions by 0 VERIFY(!(half(0.0 / 0.0) == half(0.0 / 0.0))); VERIFY(half(0.0 / 0.0) != half(0.0 / 0.0)); @@ -124,13 +134,26 @@ void test_comparison() VERIFY(half(1.0) < half(1.0 / 0.0)); VERIFY(half(1.0) > half(-1.0 / 0.0)); +#endif } -void test_functions() +void test_basic_functions() { VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f); VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f); + VERIFY_IS_EQUAL(float(numext::floor(half(3.5f))), 3.0f); + VERIFY_IS_EQUAL(float(numext::floor(half(-3.5f))), -4.0f); + + VERIFY_IS_EQUAL(float(numext::ceil(half(3.5f))), 4.0f); + VERIFY_IS_EQUAL(float(numext::ceil(half(-3.5f))), -3.0f); + + VERIFY_IS_APPROX(float(numext::sqrt(half(0.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::sqrt(half(4.0f))), 2.0f); + + VERIFY_IS_APPROX(float(numext::pow(half(0.0f), half(1.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::pow(half(2.0f), half(2.0f))), 4.0f); + VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f); VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), float(20.0 + EIGEN_PI)); @@ -138,10 +161,32 @@ void test_functions() VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f); } -void test_float16() +void test_trigonometric_functions() +{ + VERIFY_IS_APPROX(numext::cos(half(0.0f)), half(cosf(0.0f))); + VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI)), half(cosf(EIGEN_PI))); + //VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI/2)), half(cosf(EIGEN_PI/2))); + //VERIFY_IS_APPROX(numext::cos(half(3*EIGEN_PI/2)), half(cosf(3*EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::cos(half(3.5f)), half(cosf(3.5f))); + + VERIFY_IS_APPROX(numext::sin(half(0.0f)), half(sinf(0.0f))); + // VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI)), half(sinf(EIGEN_PI))); + VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI/2)), half(sinf(EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::sin(half(3*EIGEN_PI/2)), half(sinf(3*EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::sin(half(3.5f)), half(sinf(3.5f))); + + VERIFY_IS_APPROX(numext::tan(half(0.0f)), half(tanf(0.0f))); + // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI)), half(tanf(EIGEN_PI))); + // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI/2)), half(tanf(EIGEN_PI/2))); + //VERIFY_IS_APPROX(numext::tan(half(3*EIGEN_PI/2)), half(tanf(3*EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::tan(half(3.5f)), half(tanf(3.5f))); +} + +void test_cxx11_float16() { CALL_SUBTEST(test_conversion()); CALL_SUBTEST(test_arithmetic()); CALL_SUBTEST(test_comparison()); - CALL_SUBTEST(test_functions()); + CALL_SUBTEST(test_basic_functions()); + CALL_SUBTEST(test_trigonometric_functions()); } diff --git a/unsupported/test/cxx11_meta.cpp b/unsupported/test/cxx11_meta.cpp index ecac3add1..8911c59d8 100644 --- a/unsupported/test/cxx11_meta.cpp +++ b/unsupported/test/cxx11_meta.cpp @@ -10,7 +10,7 @@ #include "main.h" #include <array> -#include <Eigen/CXX11/Core> +#include <Eigen/CXX11/src/util/CXX11Meta.h> using Eigen::internal::is_same; using Eigen::internal::type_list; diff --git a/unsupported/test/cxx11_runqueue.cpp b/unsupported/test/cxx11_runqueue.cpp new file mode 100644 index 000000000..d20d87111 --- /dev/null +++ b/unsupported/test/cxx11_runqueue.cpp @@ -0,0 +1,227 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com> +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_USE_THREADS +#include <cstdlib> +#include "main.h" +#include <Eigen/CXX11/ThreadPool> + + +// Visual studio doesn't implement a rand_r() function since its +// implementation of rand() is already thread safe +int rand_reentrant(unsigned int* s) { +#ifdef EIGEN_COMP_MSVC_STRICT + EIGEN_UNUSED_VARIABLE(s); + return rand(); +#else + return rand_r(s); +#endif +} + +void test_basic_runqueue() +{ + RunQueue<int, 4> q; + // Check empty state. + VERIFY(q.Empty()); + VERIFY_IS_EQUAL(0u, q.Size()); + VERIFY_IS_EQUAL(0, q.PopFront()); + std::vector<int> stolen; + VERIFY_IS_EQUAL(0u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(0u, stolen.size()); + // Push one front, pop one front. + VERIFY_IS_EQUAL(0, q.PushFront(1)); + VERIFY_IS_EQUAL(1u, q.Size()); + VERIFY_IS_EQUAL(1, q.PopFront()); + VERIFY_IS_EQUAL(0u, q.Size()); + // Push front to overflow. + VERIFY_IS_EQUAL(0, q.PushFront(2)); + VERIFY_IS_EQUAL(1u, q.Size()); + VERIFY_IS_EQUAL(0, q.PushFront(3)); + VERIFY_IS_EQUAL(2u, q.Size()); + VERIFY_IS_EQUAL(0, q.PushFront(4)); + VERIFY_IS_EQUAL(3u, q.Size()); + VERIFY_IS_EQUAL(0, q.PushFront(5)); + VERIFY_IS_EQUAL(4u, q.Size()); + VERIFY_IS_EQUAL(6, q.PushFront(6)); + VERIFY_IS_EQUAL(4u, q.Size()); + VERIFY_IS_EQUAL(5, q.PopFront()); + VERIFY_IS_EQUAL(3u, q.Size()); + VERIFY_IS_EQUAL(4, q.PopFront()); + VERIFY_IS_EQUAL(2u, q.Size()); + VERIFY_IS_EQUAL(3, q.PopFront()); + VERIFY_IS_EQUAL(1u, q.Size()); + VERIFY_IS_EQUAL(2, q.PopFront()); + VERIFY_IS_EQUAL(0u, q.Size()); + VERIFY_IS_EQUAL(0, q.PopFront()); + // Push one back, pop one back. + VERIFY_IS_EQUAL(0, q.PushBack(7)); + VERIFY_IS_EQUAL(1u, q.Size()); + VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(1u, stolen.size()); + VERIFY_IS_EQUAL(7, stolen[0]); + VERIFY_IS_EQUAL(0u, q.Size()); + stolen.clear(); + // Push back to overflow. + VERIFY_IS_EQUAL(0, q.PushBack(8)); + VERIFY_IS_EQUAL(1u, q.Size()); + VERIFY_IS_EQUAL(0, q.PushBack(9)); + VERIFY_IS_EQUAL(2u, q.Size()); + VERIFY_IS_EQUAL(0, q.PushBack(10)); + VERIFY_IS_EQUAL(3u, q.Size()); + VERIFY_IS_EQUAL(0, q.PushBack(11)); + VERIFY_IS_EQUAL(4u, q.Size()); + VERIFY_IS_EQUAL(12, q.PushBack(12)); + VERIFY_IS_EQUAL(4u, q.Size()); + // Pop back in halves. + VERIFY_IS_EQUAL(2u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(2u, stolen.size()); + VERIFY_IS_EQUAL(10, stolen[0]); + VERIFY_IS_EQUAL(11, stolen[1]); + VERIFY_IS_EQUAL(2u, q.Size()); + stolen.clear(); + VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(1u, stolen.size()); + VERIFY_IS_EQUAL(9, stolen[0]); + VERIFY_IS_EQUAL(1u, q.Size()); + stolen.clear(); + VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(1u, stolen.size()); + VERIFY_IS_EQUAL(8, stolen[0]); + stolen.clear(); + VERIFY_IS_EQUAL(0u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(0u, stolen.size()); + // Empty again. + VERIFY(q.Empty()); + VERIFY_IS_EQUAL(0u, q.Size()); +} + +// Empty tests that the queue is not claimed to be empty when is is in fact not. +// Emptiness property is crucial part of thread pool blocking scheme, +// so we go to great effort to ensure this property. We create a queue with +// 1 element and then push 1 element (either front or back at random) and pop +// 1 element (either front or back at random). So queue always contains at least +// 1 element, but otherwise changes chaotically. Another thread constantly tests +// that the queue is not claimed to be empty. +void test_empty_runqueue() +{ + RunQueue<int, 4> q; + q.PushFront(1); + std::atomic<bool> done(false); + std::thread mutator([&q, &done]() { + unsigned rnd = 0; + std::vector<int> stolen; + for (int i = 0; i < 1 << 18; i++) { + if (rand_reentrant(&rnd) % 2) + VERIFY_IS_EQUAL(0, q.PushFront(1)); + else + VERIFY_IS_EQUAL(0, q.PushBack(1)); + if (rand_reentrant(&rnd) % 2) + VERIFY_IS_EQUAL(1, q.PopFront()); + else { + for (;;) { + if (q.PopBackHalf(&stolen) == 1) { + stolen.clear(); + break; + } + VERIFY_IS_EQUAL(0u, stolen.size()); + } + } + } + done = true; + }); + while (!done) { + VERIFY(!q.Empty()); + int size = q.Size(); + VERIFY_GE(size, 1); + VERIFY_LE(size, 2); + } + VERIFY_IS_EQUAL(1, q.PopFront()); + mutator.join(); +} + +// Stress is a chaotic random test. +// One thread (owner) calls PushFront/PopFront, other threads call PushBack/ +// PopBack. Ensure that we don't crash, deadlock, and all sanity checks pass. +void test_stress_runqueue() +{ + static const int kEvents = 1 << 18; + RunQueue<int, 8> q; + std::atomic<int> total(0); + std::vector<std::unique_ptr<std::thread>> threads; + threads.emplace_back(new std::thread([&q, &total]() { + int sum = 0; + int pushed = 1; + int popped = 1; + while (pushed < kEvents || popped < kEvents) { + if (pushed < kEvents) { + if (q.PushFront(pushed) == 0) { + sum += pushed; + pushed++; + } + } + if (popped < kEvents) { + int v = q.PopFront(); + if (v != 0) { + sum -= v; + popped++; + } + } + } + total += sum; + })); + for (int i = 0; i < 2; i++) { + threads.emplace_back(new std::thread([&q, &total]() { + int sum = 0; + for (int j = 1; j < kEvents; j++) { + if (q.PushBack(j) == 0) { + sum += j; + continue; + } + EIGEN_THREAD_YIELD(); + j--; + } + total += sum; + })); + threads.emplace_back(new std::thread([&q, &total]() { + int sum = 0; + std::vector<int> stolen; + for (int j = 1; j < kEvents;) { + if (q.PopBackHalf(&stolen) == 0) { + EIGEN_THREAD_YIELD(); + continue; + } + while (stolen.size() && j < kEvents) { + int v = stolen.back(); + stolen.pop_back(); + VERIFY_IS_NOT_EQUAL(v, 0); + sum += v; + j++; + } + } + while (stolen.size()) { + int v = stolen.back(); + stolen.pop_back(); + VERIFY_IS_NOT_EQUAL(v, 0); + while ((v = q.PushBack(v)) != 0) EIGEN_THREAD_YIELD(); + } + total -= sum; + })); + } + for (size_t i = 0; i < threads.size(); i++) threads[i]->join(); + VERIFY(q.Empty()); + VERIFY(total.load() == 0); +} + +void test_cxx11_runqueue() +{ + CALL_SUBTEST_1(test_basic_runqueue()); + CALL_SUBTEST_2(test_empty_runqueue()); + CALL_SUBTEST_3(test_stress_runqueue()); +} diff --git a/unsupported/test/cxx11_tensor_argmax.cpp b/unsupported/test/cxx11_tensor_argmax.cpp index 482dfa7de..037767270 100644 --- a/unsupported/test/cxx11_tensor_argmax.cpp +++ b/unsupported/test/cxx11_tensor_argmax.cpp @@ -64,7 +64,7 @@ static void test_argmax_tuple_reducer() Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced; DimensionList<DenseIndex, 4> dims; reduced = index_tuples.reduce( - dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float>>()); + dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >()); Tensor<float, 0, DataLayout> maxi = tensor.maximum(); @@ -74,7 +74,7 @@ static void test_argmax_tuple_reducer() for (int d = 0; d < 3; ++d) reduce_dims[d] = d; Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7); reduced_by_dims = index_tuples.reduce( - reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float>>()); + reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >()); Tensor<float, 1, DataLayout> max_by_dims = tensor.maximum(reduce_dims); @@ -96,7 +96,7 @@ static void test_argmin_tuple_reducer() Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced; DimensionList<DenseIndex, 4> dims; reduced = index_tuples.reduce( - dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float>>()); + dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >()); Tensor<float, 0, DataLayout> mini = tensor.minimum(); @@ -106,7 +106,7 @@ static void test_argmin_tuple_reducer() for (int d = 0; d < 3; ++d) reduce_dims[d] = d; Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7); reduced_by_dims = index_tuples.reduce( - reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float>>()); + reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >()); Tensor<float, 1, DataLayout> min_by_dims = tensor.minimum(reduce_dims); diff --git a/unsupported/test/cxx11_tensor_contract_cuda.cu b/unsupported/test/cxx11_tensor_contract_cuda.cu index 6d1ef07f9..98ac180ef 100644 --- a/unsupported/test/cxx11_tensor_contract_cuda.cu +++ b/unsupported/test/cxx11_tensor_contract_cuda.cu @@ -84,6 +84,65 @@ void test_cuda_contraction(int m_size, int k_size, int n_size) cudaFree((void*)d_t_result); } + +template<int DataLayout> +void test_scalar(int m_size, int k_size, int n_size) +{ + std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl; + // with these dimensions, the output has 300 * 140 elements, which is + // more than 30 * 1024, which is the number of threads in blocks on + // a 15 SM GK110 GPU + Tensor<float, 2, DataLayout> t_left(m_size, k_size); + Tensor<float, 2, DataLayout> t_right(k_size, n_size); + Tensor<float, 0, DataLayout> t_result; + Tensor<float, 0, DataLayout> t_result_gpu; + Eigen::array<DimPair, 2> dims(DimPair(0, 0), DimPair(1, 1)); + + t_left.setRandom(); + t_right.setRandom(); + + std::size_t t_left_bytes = t_left.size() * sizeof(float); + std::size_t t_right_bytes = t_right.size() * sizeof(float); + std::size_t t_result_bytes = sizeof(float); + + float* d_t_left; + float* d_t_right; + float* d_t_result; + + cudaMalloc((void**)(&d_t_left), t_left_bytes); + cudaMalloc((void**)(&d_t_right), t_right_bytes); + cudaMalloc((void**)(&d_t_result), t_result_bytes); + + cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > + gpu_t_left(d_t_left, m_size, k_size); + Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > + gpu_t_right(d_t_right, k_size, n_size); + Eigen::TensorMap<Eigen::Tensor<float, 0, DataLayout> > + gpu_t_result(d_t_result); + + gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims); + t_result = t_left.contract(t_right, dims); + + cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost); + if (fabs(t_result() - t_result_gpu()) > 1e-4f && + !Eigen::internal::isApprox(t_result(), t_result_gpu(), 1e-4f)) { + std::cout << "mismatch detected: " << t_result() + << " vs " << t_result_gpu() << std::endl; + assert(false); + } + + cudaFree((void*)d_t_left); + cudaFree((void*)d_t_right); + cudaFree((void*)d_t_result); +} + + template<int DataLayout> void test_cuda_contraction_m() { for (int k = 32; k < 256; k++) { @@ -138,6 +197,9 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST_1(test_cuda_contraction<ColMajor>(128, 128, 128)); CALL_SUBTEST_1(test_cuda_contraction<RowMajor>(128, 128, 128)); + CALL_SUBTEST_1(test_scalar<ColMajor>(128, 128, 128)); + CALL_SUBTEST_1(test_scalar<RowMajor>(128, 128, 128)); + CALL_SUBTEST_2(test_cuda_contraction_m<ColMajor>()); CALL_SUBTEST_3(test_cuda_contraction_m<RowMajor>()); diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp index 0e16308a2..73623b2ed 100644 --- a/unsupported/test/cxx11_tensor_contraction.cpp +++ b/unsupported/test/cxx11_tensor_contraction.cpp @@ -87,19 +87,14 @@ static void test_scalar() vec1.setRandom(); vec2.setRandom(); - Tensor<float, 1, DataLayout> scalar(1); - scalar.setZero(); Eigen::array<DimPair, 1> dims = {{DimPair(0, 0)}}; - typedef TensorEvaluator<decltype(vec1.contract(vec2, dims)), DefaultDevice> Evaluator; - Evaluator eval(vec1.contract(vec2, dims), DefaultDevice()); - eval.evalTo(scalar.data()); - EIGEN_STATIC_ASSERT(Evaluator::NumDims==1ul, YOU_MADE_A_PROGRAMMING_MISTAKE); + Tensor<float, 0, DataLayout> scalar = vec1.contract(vec2, dims); float expected = 0.0f; for (int i = 0; i < 6; ++i) { expected += vec1(i) * vec2(i); } - VERIFY_IS_APPROX(scalar(0), expected); + VERIFY_IS_APPROX(scalar(), expected); } template<int DataLayout> diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 134359611..4026f48f0 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -661,6 +661,9 @@ void test_cuda_digamma() for (int i = 5; i < 7; ++i) { VERIFY_IS_EQUAL(out(i), expected_out(i)); } + + cudaFree(d_in); + cudaFree(d_out); } template <typename Scalar> @@ -718,13 +721,17 @@ void test_cuda_zeta() assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); VERIFY_IS_EQUAL(out(0), expected_out(0)); - VERIFY_IS_APPROX_OR_LESS_THAN(out(3), expected_out(3)); + VERIFY((std::isnan)(out(3))); for (int i = 1; i < 6; ++i) { if (i != 3) { VERIFY_IS_APPROX(out(i), expected_out(i)); } } + + cudaFree(d_in_x); + cudaFree(d_in_q); + cudaFree(d_out); } template <typename Scalar> @@ -787,6 +794,10 @@ void test_cuda_polygamma() for (int i = 0; i < 7; ++i) { VERIFY_IS_APPROX(out(i), expected_out(i)); } + + cudaFree(d_in_x); + cudaFree(d_in_n); + cudaFree(d_out); } template <typename Scalar> @@ -826,9 +837,9 @@ void test_cuda_igamma() Scalar* d_a; Scalar* d_x; Scalar* d_out; - cudaMalloc((void**)(&d_a), bytes); - cudaMalloc((void**)(&d_x), bytes); - cudaMalloc((void**)(&d_out), bytes); + assert(cudaMalloc((void**)(&d_a), bytes) == cudaSuccess); + assert(cudaMalloc((void**)(&d_x), bytes) == cudaSuccess); + assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess); cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice); cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice); @@ -854,6 +865,10 @@ void test_cuda_igamma() } } } + + cudaFree(d_a); + cudaFree(d_x); + cudaFree(d_out); } template <typename Scalar> @@ -920,6 +935,10 @@ void test_cuda_igammac() } } } + + cudaFree(d_a); + cudaFree(d_x); + cudaFree(d_out); } template <typename Scalar> @@ -935,8 +954,8 @@ void test_cuda_erf(const Scalar stddev) Scalar* d_in; Scalar* d_out; - cudaMalloc((void**)(&d_in), bytes); - cudaMalloc((void**)(&d_out), bytes); + assert(cudaMalloc((void**)(&d_in), bytes) == cudaSuccess); + assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess); cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); diff --git a/unsupported/test/cxx11_tensor_device.cu b/unsupported/test/cxx11_tensor_device.cu index cbe9e6449..b6ca54d93 100644 --- a/unsupported/test/cxx11_tensor_device.cu +++ b/unsupported/test/cxx11_tensor_device.cu @@ -241,7 +241,7 @@ void test_cpu() { const float result = out(i,j,k); const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f) + (in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f); - if (fabs(expected) < 1e-4 && fabs(result) < 1e-4) { + if (fabs(expected) < 1e-4f && fabs(result) < 1e-4f) { continue; } VERIFY_IS_APPROX(expected, result); @@ -258,7 +258,7 @@ void test_cpu() { in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f) + (in1(i+1,j,k) * -1.0f + in1(i+1,j+1,k) * -0.3f + in1(i+1,j,k+1) * -0.7f + in1(i+1,j+1,k+1) * -0.5f); - if (fabs(expected) < 1e-4 && fabs(result) < 1e-4) { + if (fabs(expected) < 1e-4f && fabs(result) < 1e-4f) { continue; } VERIFY_IS_APPROX(expected, result); diff --git a/unsupported/test/cxx11_tensor_dimension.cpp b/unsupported/test/cxx11_tensor_dimension.cpp index ce78efe52..421e73693 100644 --- a/unsupported/test/cxx11_tensor_dimension.cpp +++ b/unsupported/test/cxx11_tensor_dimension.cpp @@ -37,7 +37,6 @@ static void test_fixed_size() VERIFY_IS_EQUAL(dimensions.TotalSize(), 2*3*7); } - static void test_match() { Eigen::DSizes<int, 3> dyn(2,3,7); @@ -49,10 +48,22 @@ static void test_match() VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn1, dyn2), false); } +static void test_rank_zero() +{ + Eigen::Sizes<> scalar; + VERIFY_IS_EQUAL(scalar.TotalSize(), 1); + VERIFY_IS_EQUAL(scalar.rank(), 0); + VERIFY_IS_EQUAL(internal::array_prod(scalar), 1); + + Eigen::DSizes<ptrdiff_t, 0> dscalar; + VERIFY_IS_EQUAL(dscalar.TotalSize(), 1); + VERIFY_IS_EQUAL(dscalar.rank(), 0); +} void test_cxx11_tensor_dimension() { CALL_SUBTEST(test_dynamic_size()); CALL_SUBTEST(test_fixed_size()); CALL_SUBTEST(test_match()); + CALL_SUBTEST(test_rank_zero()); } diff --git a/unsupported/test/cxx11_tensor_empty.cpp b/unsupported/test/cxx11_tensor_empty.cpp index 9130fff35..d7eea42d7 100644 --- a/unsupported/test/cxx11_tensor_empty.cpp +++ b/unsupported/test/cxx11_tensor_empty.cpp @@ -24,10 +24,10 @@ static void test_empty_tensor() static void test_empty_fixed_size_tensor() { - TensorFixedSize<float, Sizes<0>> source; - TensorFixedSize<float, Sizes<0>> tgt1 = source; - TensorFixedSize<float, Sizes<0>> tgt2(source); - TensorFixedSize<float, Sizes<0>> tgt3; + TensorFixedSize<float, Sizes<0> > source; + TensorFixedSize<float, Sizes<0> > tgt1 = source; + TensorFixedSize<float, Sizes<0> > tgt2(source); + TensorFixedSize<float, Sizes<0> > tgt3; tgt3 = tgt1; tgt3 = tgt2; } diff --git a/unsupported/test/cxx11_tensor_expr.cpp b/unsupported/test/cxx11_tensor_expr.cpp index 8389e9840..4dd355e6e 100644 --- a/unsupported/test/cxx11_tensor_expr.cpp +++ b/unsupported/test/cxx11_tensor_expr.cpp @@ -112,13 +112,13 @@ static void test_3d() Tensor<float, 3> mat1(2,3,7); Tensor<float, 3, RowMajor> mat2(2,3,7); - float val = 1.0; + float val = 1.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { mat1(i,j,k) = val; mat2(i,j,k) = val; - val += 1.0; + val += 1.0f; } } } @@ -142,7 +142,7 @@ static void test_3d() Tensor<float, 3, RowMajor> mat11(2,3,7); mat11 = mat2 / 3.14f; - val = 1.0; + val = 1.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { @@ -155,7 +155,7 @@ static void test_3d() VERIFY_IS_APPROX(mat9(i,j,k), val + 3.14f); VERIFY_IS_APPROX(mat10(i,j,k), val - 3.14f); VERIFY_IS_APPROX(mat11(i,j,k), val / 3.14f); - val += 1.0; + val += 1.0f; } } } @@ -167,25 +167,25 @@ static void test_constants() Tensor<float, 3> mat2(2,3,7); Tensor<float, 3> mat3(2,3,7); - float val = 1.0; + float val = 1.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { mat1(i,j,k) = val; - val += 1.0; + val += 1.0f; } } } mat2 = mat1.constant(3.14f); mat3 = mat1.cwiseMax(7.3f).exp(); - val = 1.0; + val = 1.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { VERIFY_IS_APPROX(mat2(i,j,k), 3.14f); VERIFY_IS_APPROX(mat3(i,j,k), expf((std::max)(val, 7.3f))); - val += 1.0; + val += 1.0f; } } } @@ -228,25 +228,25 @@ static void test_functors() Tensor<float, 3> mat2(2,3,7); Tensor<float, 3> mat3(2,3,7); - float val = 1.0; + float val = 1.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { mat1(i,j,k) = val; - val += 1.0; + val += 1.0f; } } } mat2 = mat1.inverse().unaryExpr(&asinf); mat3 = mat1.unaryExpr(&tanhf); - val = 1.0; + val = 1.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { VERIFY_IS_APPROX(mat2(i,j,k), asinf(1.0f / mat1(i,j,k))); VERIFY_IS_APPROX(mat3(i,j,k), tanhf(mat1(i,j,k))); - val += 1.0; + val += 1.0f; } } } diff --git a/unsupported/test/cxx11_tensor_fft.cpp b/unsupported/test/cxx11_tensor_fft.cpp index 89874349f..2f14ebc62 100644 --- a/unsupported/test/cxx11_tensor_fft.cpp +++ b/unsupported/test/cxx11_tensor_fft.cpp @@ -205,15 +205,15 @@ static void test_fft_real_input_energy() { VERIFY_IS_EQUAL(output.dimension(i), input.dimension(i)); } - float energy_original = 0.0; - float energy_after_fft = 0.0; + RealScalar energy_original = 0.0; + RealScalar energy_after_fft = 0.0; for (int i = 0; i < total_size; ++i) { - energy_original += pow(std::abs(input(i)), 2); + energy_original += numext::abs2(input(i)); } for (int i = 0; i < total_size; ++i) { - energy_after_fft += pow(std::abs(output(i)), 2); + energy_after_fft += numext::abs2(output(i)); } if(FFTDirection == FFT_FORWARD) { diff --git a/unsupported/test/cxx11_tensor_fixed_size.cpp b/unsupported/test/cxx11_tensor_fixed_size.cpp index 1c33fefb3..4c660de65 100644 --- a/unsupported/test/cxx11_tensor_fixed_size.cpp +++ b/unsupported/test/cxx11_tensor_fixed_size.cpp @@ -20,6 +20,8 @@ static void test_0d() TensorFixedSize<float, Sizes<> > scalar1; TensorFixedSize<float, Sizes<>, RowMajor> scalar2; VERIFY_IS_EQUAL(scalar1.rank(), 0); + VERIFY_IS_EQUAL(scalar1.size(), 1); + VERIFY_IS_EQUAL(array_prod(scalar1.dimensions()), 1); scalar1() = 7.0; scalar2() = 13.0; @@ -128,9 +130,9 @@ static void test_tensor_map() static void test_2d() { float data1[6]; - TensorMap<TensorFixedSize<float, Sizes<2, 3> >> mat1(data1,2,3); + TensorMap<TensorFixedSize<float, Sizes<2, 3> > > mat1(data1,2,3); float data2[6]; - TensorMap<TensorFixedSize<float, Sizes<2, 3>, RowMajor>> mat2(data2,2,3); + TensorMap<TensorFixedSize<float, Sizes<2, 3>, RowMajor> > mat2(data2,2,3); VERIFY_IS_EQUAL((mat1.size()), 2*3); VERIFY_IS_EQUAL(mat1.rank(), 2); @@ -151,7 +153,7 @@ static void test_2d() mat2(1,1) = -4.0; mat2(1,2) = -5.0; - TensorFixedSize<float, Sizes<2, 3>> mat3; + TensorFixedSize<float, Sizes<2, 3> > mat3; TensorFixedSize<float, Sizes<2, 3>, RowMajor> mat4; mat3 = mat1.abs(); mat4 = mat2.abs(); @@ -186,13 +188,13 @@ static void test_3d() // VERIFY_IS_EQUAL((mat1.dimension(1)), 3); // VERIFY_IS_EQUAL((mat1.dimension(2)), 7); - float val = 0.0; + float val = 0.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { mat1(i,j,k) = val; mat2(i,j,k) = val; - val += 1.0; + val += 1.0f; } } } @@ -208,13 +210,13 @@ static void test_3d() // VERIFY_IS_EQUAL((mat3.dimension(2)), 7); - val = 0.0; + val = 0.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { VERIFY_IS_APPROX(mat3(i,j,k), sqrtf(val)); VERIFY_IS_APPROX(mat4(i,j,k), sqrtf(val)); - val += 1.0; + val += 1.0f; } } } @@ -224,12 +226,12 @@ static void test_3d() static void test_array() { TensorFixedSize<float, Sizes<2, 3, 7> > mat1; - float val = 0.0; + float val = 0.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { mat1(i,j,k) = val; - val += 1.0; + val += 1.0f; } } } @@ -237,12 +239,12 @@ static void test_array() TensorFixedSize<float, Sizes<2, 3, 7> > mat3; mat3 = mat1.pow(3.5f); - val = 0.0; + val = 0.0f; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { VERIFY_IS_APPROX(mat3(i,j,k), powf(val, 3.5f)); - val += 1.0; + val += 1.0f; } } } diff --git a/unsupported/test/cxx11_tensor_forced_eval.cpp b/unsupported/test/cxx11_tensor_forced_eval.cpp index ad9de867d..45d7345e9 100644 --- a/unsupported/test/cxx11_tensor_forced_eval.cpp +++ b/unsupported/test/cxx11_tensor_forced_eval.cpp @@ -22,14 +22,15 @@ static void test_simple() m1.setRandom(); m2.setRandom(); - TensorMap<Tensor<float, 2>> mat1(m1.data(), 3,3); - TensorMap<Tensor<float, 2>> mat2(m2.data(), 3,3); + TensorMap<Tensor<float, 2> > mat1(m1.data(), 3,3); + TensorMap<Tensor<float, 2> > mat2(m2.data(), 3,3); Tensor<float, 2> mat3(3,3); mat3 = mat1; typedef Tensor<float, 1>::DimensionPair DimPair; - Eigen::array<DimPair, 1> dims({{DimPair(1, 0)}}); + Eigen::array<DimPair, 1> dims; + dims[0] = DimPair(1, 0); mat3 = mat3.contract(mat2, dims).eval(); @@ -60,7 +61,7 @@ static void test_const() Eigen::array<int, 2> bcast; bcast[0] = 3; bcast[1] = 1; - const TensorMap<Tensor<const float, 2>> input_tensor(input.data(), 3, 3); + const TensorMap<Tensor<const float, 2> > input_tensor(input.data(), 3, 3); Tensor<float, 2> output_tensor= (input_tensor - input_tensor.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast)); for (int i = 0; i < 3; ++i) { diff --git a/unsupported/test/cxx11_tensor_image_patch.cpp b/unsupported/test/cxx11_tensor_image_patch.cpp index 5d6a49181..988b01481 100644 --- a/unsupported/test/cxx11_tensor_image_patch.cpp +++ b/unsupported/test/cxx11_tensor_image_patch.cpp @@ -568,13 +568,7 @@ static void test_imagenet_patches() VERIFY_IS_EQUAL(l_out.dimension(4), 16); // RowMajor - Tensor<float, 4, RowMajor> l_in_row_major = l_in.swap_layout(); - VERIFY_IS_EQUAL(l_in.dimension(0), l_in_row_major.dimension(3)); - VERIFY_IS_EQUAL(l_in.dimension(1), l_in_row_major.dimension(2)); - VERIFY_IS_EQUAL(l_in.dimension(2), l_in_row_major.dimension(1)); - VERIFY_IS_EQUAL(l_in.dimension(3), l_in_row_major.dimension(0)); - - Tensor<float, 5, RowMajor> l_out_row_major = l_in_row_major.extract_image_patches(11, 11); + Tensor<float, 5, RowMajor> l_out_row_major = l_in.swap_layout().extract_image_patches(11, 11); VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 16); VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 128*128); VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 11); @@ -589,10 +583,8 @@ static void test_imagenet_patches() for (int r = 0; r < 11; ++r) { for (int d = 0; d < 3; ++d) { float expected = 0.0f; - float expected_row_major = 0.0f; if (r-5+i >= 0 && c-5+j >= 0 && r-5+i < 128 && c-5+j < 128) { expected = l_in(d, r-5+i, c-5+j, b); - expected_row_major = l_in_row_major(b, c-5+j, r-5+i, d); } // ColMajor if (l_out(d, r, c, patchId, b) != expected) { @@ -601,15 +593,13 @@ static void test_imagenet_patches() VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); // RowMajor if (l_out_row_major(b, patchId, c, r, d) != - expected_row_major) { + expected) { std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; } VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), - expected_row_major); - // Check that ColMajor and RowMajor agree. - VERIFY_IS_EQUAL(expected, expected_row_major); + expected); } } } @@ -628,8 +618,7 @@ static void test_imagenet_patches() VERIFY_IS_EQUAL(l_out.dimension(4), 32); // RowMajor - l_in_row_major = l_in.swap_layout(); - l_out_row_major = l_in_row_major.extract_image_patches(9, 9); + l_out_row_major = l_in.swap_layout().extract_image_patches(9, 9); VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32); VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 64*64); VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 9); @@ -644,10 +633,8 @@ static void test_imagenet_patches() for (int r = 0; r < 9; ++r) { for (int d = 0; d < 16; ++d) { float expected = 0.0f; - float expected_row_major = 0.0f; if (r-4+i >= 0 && c-4+j >= 0 && r-4+i < 64 && c-4+j < 64) { expected = l_in(d, r-4+i, c-4+j, b); - expected_row_major = l_in_row_major(b, c-4+j, r-4+i, d); } // ColMajor if (l_out(d, r, c, patchId, b) != expected) { @@ -655,12 +642,10 @@ static void test_imagenet_patches() } VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); // RowMajor - if (l_out_row_major(b, patchId, c, r, d) != expected_row_major) { + if (l_out_row_major(b, patchId, c, r, d) != expected) { std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; } - VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected_row_major); - // Check that ColMajor and RowMajor agree. - VERIFY_IS_EQUAL(expected, expected_row_major); + VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected); } } } @@ -679,8 +664,7 @@ static void test_imagenet_patches() VERIFY_IS_EQUAL(l_out.dimension(4), 32); // RowMajor - l_in_row_major = l_in.swap_layout(); - l_out_row_major = l_in_row_major.extract_image_patches(7, 7); + l_out_row_major = l_in.swap_layout().extract_image_patches(7, 7); VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32); VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 16*16); VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 7); @@ -695,10 +679,8 @@ static void test_imagenet_patches() for (int r = 0; r < 7; ++r) { for (int d = 0; d < 32; ++d) { float expected = 0.0f; - float expected_row_major = 0.0f; if (r-3+i >= 0 && c-3+j >= 0 && r-3+i < 16 && c-3+j < 16) { expected = l_in(d, r-3+i, c-3+j, b); - expected_row_major = l_in_row_major(b, c-3+j, r-3+i, d); } // ColMajor if (l_out(d, r, c, patchId, b) != expected) { @@ -706,12 +688,10 @@ static void test_imagenet_patches() } VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); // RowMajor - if (l_out_row_major(b, patchId, c, r, d) != expected_row_major) { + if (l_out_row_major(b, patchId, c, r, d) != expected) { std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; } - VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected_row_major); - // Check that ColMajor and RowMajor agree. - VERIFY_IS_EQUAL(expected, expected_row_major); + VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected); } } } @@ -730,8 +710,7 @@ static void test_imagenet_patches() VERIFY_IS_EQUAL(l_out.dimension(4), 32); // RowMajor - l_in_row_major = l_in.swap_layout(); - l_out_row_major = l_in_row_major.extract_image_patches(3, 3); + l_out_row_major = l_in.swap_layout().extract_image_patches(3, 3); VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32); VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 13*13); VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 3); @@ -746,10 +725,8 @@ static void test_imagenet_patches() for (int r = 0; r < 3; ++r) { for (int d = 0; d < 64; ++d) { float expected = 0.0f; - float expected_row_major = 0.0f; if (r-1+i >= 0 && c-1+j >= 0 && r-1+i < 13 && c-1+j < 13) { expected = l_in(d, r-1+i, c-1+j, b); - expected_row_major = l_in_row_major(b, c-1+j, r-1+i, d); } // ColMajor if (l_out(d, r, c, patchId, b) != expected) { @@ -757,12 +734,10 @@ static void test_imagenet_patches() } VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); // RowMajor - if (l_out_row_major(b, patchId, c, r, d) != expected_row_major) { + if (l_out_row_major(b, patchId, c, r, d) != expected) { std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; } - VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected_row_major); - // Check that ColMajor and RowMajor agree. - VERIFY_IS_EQUAL(expected, expected_row_major); + VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected); } } } diff --git a/unsupported/test/cxx11_tensor_map.cpp b/unsupported/test/cxx11_tensor_map.cpp index a8a095e38..3db0ee7c0 100644 --- a/unsupported/test/cxx11_tensor_map.cpp +++ b/unsupported/test/cxx11_tensor_map.cpp @@ -19,8 +19,8 @@ static void test_0d() Tensor<int, 0> scalar1; Tensor<int, 0, RowMajor> scalar2; - TensorMap<Tensor<const int, 0>> scalar3(scalar1.data()); - TensorMap<Tensor<const int, 0, RowMajor>> scalar4(scalar2.data()); + TensorMap<Tensor<const int, 0> > scalar3(scalar1.data()); + TensorMap<Tensor<const int, 0, RowMajor> > scalar4(scalar2.data()); scalar1() = 7; scalar2() = 13; @@ -37,8 +37,8 @@ static void test_1d() Tensor<int, 1> vec1(6); Tensor<int, 1, RowMajor> vec2(6); - TensorMap<Tensor<const int, 1>> vec3(vec1.data(), 6); - TensorMap<Tensor<const int, 1, RowMajor>> vec4(vec2.data(), 6); + TensorMap<Tensor<const int, 1> > vec3(vec1.data(), 6); + TensorMap<Tensor<const int, 1, RowMajor> > vec4(vec2.data(), 6); vec1(0) = 4; vec2(0) = 0; vec1(1) = 8; vec2(1) = 1; @@ -85,8 +85,8 @@ static void test_2d() mat2(1,1) = 4; mat2(1,2) = 5; - TensorMap<Tensor<const int, 2>> mat3(mat1.data(), 2, 3); - TensorMap<Tensor<const int, 2, RowMajor>> mat4(mat2.data(), 2, 3); + TensorMap<Tensor<const int, 2> > mat3(mat1.data(), 2, 3); + TensorMap<Tensor<const int, 2, RowMajor> > mat4(mat2.data(), 2, 3); VERIFY_IS_EQUAL(mat3.rank(), 2); VERIFY_IS_EQUAL(mat3.size(), 6); @@ -129,8 +129,8 @@ static void test_3d() } } - TensorMap<Tensor<const int, 3>> mat3(mat1.data(), 2, 3, 7); - TensorMap<Tensor<const int, 3, RowMajor>> mat4(mat2.data(), array<DenseIndex, 3>{{2, 3, 7}}); + TensorMap<Tensor<const int, 3> > mat3(mat1.data(), 2, 3, 7); + TensorMap<Tensor<const int, 3, RowMajor> > mat4(mat2.data(), 2, 3, 7); VERIFY_IS_EQUAL(mat3.rank(), 3); VERIFY_IS_EQUAL(mat3.size(), 2*3*7); @@ -173,8 +173,8 @@ static void test_from_tensor() } } - TensorMap<Tensor<int, 3>> mat3(mat1); - TensorMap<Tensor<int, 3, RowMajor>> mat4(mat2); + TensorMap<Tensor<int, 3> > mat3(mat1); + TensorMap<Tensor<int, 3, RowMajor> > mat4(mat2); VERIFY_IS_EQUAL(mat3.rank(), 3); VERIFY_IS_EQUAL(mat3.size(), 2*3*7); @@ -199,19 +199,23 @@ static void test_from_tensor() } } - TensorFixedSize<int, Sizes<2,3,7>> mat5; + TensorFixedSize<int, Sizes<2,3,7> > mat5; val = 0; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { - mat5(i,j,k) = val; + array<ptrdiff_t, 3> coords; + coords[0] = i; + coords[1] = j; + coords[2] = k; + mat5(coords) = val; val++; } } } - TensorMap<TensorFixedSize<int, Sizes<2,3,7>>> mat6(mat5); + TensorMap<TensorFixedSize<int, Sizes<2,3,7> > > mat6(mat5); VERIFY_IS_EQUAL(mat6.rank(), 3); VERIFY_IS_EQUAL(mat6.size(), 2*3*7); @@ -233,8 +237,8 @@ static void test_from_tensor() static int f(const TensorMap<Tensor<int, 3> >& tensor) { // Size<0> empty; - EIGEN_STATIC_ASSERT((internal::array_size<Sizes<>>::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::array_size<DSizes<int, 0>>::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::array_size<Sizes<> >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::array_size<DSizes<int, 0> >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); Tensor<int, 0> result = tensor.sum(); return result(); } @@ -253,7 +257,7 @@ static void test_casting() } } - TensorMap<Tensor<int, 3>> map(tensor); + TensorMap<Tensor<int, 3> > map(tensor); int sum1 = f(map); int sum2 = f(tensor); diff --git a/unsupported/test/cxx11_tensor_math.cpp b/unsupported/test/cxx11_tensor_math.cpp index d247bebaa..61c742a16 100644 --- a/unsupported/test/cxx11_tensor_math.cpp +++ b/unsupported/test/cxx11_tensor_math.cpp @@ -16,7 +16,7 @@ using Eigen::RowMajor; static void test_tanh() { - Tensor<float, 1> vec1({6}); + Tensor<float, 1> vec1(6); vec1.setRandom(); Tensor<float, 1> vec2 = vec1.tanh(); @@ -28,7 +28,7 @@ static void test_tanh() static void test_sigmoid() { - Tensor<float, 1> vec1({6}); + Tensor<float, 1> vec1(6); vec1.setRandom(); Tensor<float, 1> vec2 = vec1.sigmoid(); diff --git a/unsupported/test/cxx11_tensor_mixed_indices.cpp b/unsupported/test/cxx11_tensor_mixed_indices.cpp index 72f826216..4fba6fdd1 100644 --- a/unsupported/test/cxx11_tensor_mixed_indices.cpp +++ b/unsupported/test/cxx11_tensor_mixed_indices.cpp @@ -14,8 +14,8 @@ static void test_simple() { - Tensor<float, 1, ColMajor> vec1({6}); - Tensor<float, 1, ColMajor, int> vec2({6}); + Tensor<float, 1, ColMajor> vec1(6); + Tensor<float, 1, ColMajor, int> vec2(6); vec1(0) = 4.0; vec2(0) = 0.0; vec1(1) = 8.0; vec2(1) = 1.0; diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index cb917bb37..dceac793e 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -134,6 +134,68 @@ void test_cuda_elementwise() { gpu_device.deallocate(d_res_float); } +void test_cuda_trancendental() { + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = 101; + + float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + Eigen::half* d_res1_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res1_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res2_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res2_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1( + d_float1, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2( + d_float2, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_half( + d_res1_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_float( + d_res1_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_half( + d_res2_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_float( + d_res2_float, num_elem); + + gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f); + gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f); + gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::half>(); + gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::half>(); + gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().exp(); + gpu_res2_half.device(gpu_device) = gpu_float2.cast<Eigen::half>().log(); + + Tensor<float, 1> input1(num_elem); + Tensor<Eigen::half, 1> half_prec1(num_elem); + Tensor<Eigen::half, 1> full_prec1(num_elem); + Tensor<float, 1> input2(num_elem); + Tensor<Eigen::half, 1> half_prec2(num_elem); + Tensor<Eigen::half, 1> full_prec2(num_elem); + gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res1_half, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res2_half, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(Eigen::half)); + gpu_device.synchronize(); + + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking elemwise exp " << i << " input = " << input1(i) << " full = " << full_prec1(i) << " half = " << half_prec1(i) << std::endl; + VERIFY_IS_APPROX(full_prec1(i), half_prec1(i)); + } + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl; + VERIFY_IS_APPROX(full_prec2(i), half_prec2(i)); + } + gpu_device.deallocate(d_float1); + gpu_device.deallocate(d_float2); + gpu_device.deallocate(d_res1_half); + gpu_device.deallocate(d_res1_float); + gpu_device.deallocate(d_res2_half); + gpu_device.deallocate(d_res2_float); +} + void test_cuda_contractions() { Eigen::CudaStreamDevice stream; @@ -144,36 +206,38 @@ void test_cuda_contractions() { float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); - float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float)); - float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1( d_float1, rows, cols); Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2( d_float2, rows, cols); - Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_res_half( + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 2>, Eigen::Aligned> gpu_res_half( d_res_half, rows, cols); - Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_res_float( + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 2>, Eigen::Aligned> gpu_res_float( d_res_float, rows, cols); gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f); - gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float1.constant(0.5f); + gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float2.constant(0.5f); typedef Tensor<float, 2>::DimensionPair DimPair; Eigen::array<DimPair, 1> dims(DimPair(1, 0)); - gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims); - gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().contract(gpu_float2.cast<Eigen::half>(), dims).cast<float>(); + gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims).cast<Eigen::half>(); + gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().contract(gpu_float2.cast<Eigen::half>(), dims); - Tensor<float, 2> half_prec(rows, cols); - Tensor<float, 2> full_prec(rows, cols); - gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float)); - gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float)); + Tensor<Eigen::half, 2> half_prec(rows, cols); + Tensor<Eigen::half, 2> full_prec(rows, cols); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(Eigen::half)); gpu_device.synchronize(); for (int i = 0; i < rows; ++i) { for (int j = 0; j < cols; ++j) { - std::cout << "Checking contract " << i << " " << j << std::endl; - VERIFY_IS_APPROX(full_prec(i, j), half_prec(i, j)); + std::cout << "Checking contract " << i << " " << j << full_prec(i, j) << " " << half_prec(i, j) << std::endl; + if (numext::abs(full_prec(i, j) - half_prec(i, j)) > Eigen::half(1e-2f)) { + VERIFY_IS_APPROX(full_prec(i, j), half_prec(i, j)); + } } } @@ -192,29 +256,29 @@ void test_cuda_reductions() { float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); - float* d_res_half = (float*)gpu_device.allocate(size * sizeof(float)); - float* d_res_float = (float*)gpu_device.allocate(size * sizeof(float)); + Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(size * sizeof(Eigen::half)); + Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(size * sizeof(Eigen::half)); Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1( d_float1, size, size); Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2( d_float2, size, size); - Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half( + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_half( d_res_half, size); - Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float( + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_float( d_res_float, size); gpu_float1.device(gpu_device) = gpu_float1.random(); gpu_float2.device(gpu_device) = gpu_float2.random(); Eigen::array<int, 1> redux_dim = {{0}}; - gpu_res_float.device(gpu_device) = gpu_float1.sum(redux_dim); - gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(redux_dim).cast<float>(); + gpu_res_float.device(gpu_device) = gpu_float1.sum(redux_dim).cast<Eigen::half>(); + gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(redux_dim); - Tensor<float, 1> half_prec(size); - Tensor<float, 1> full_prec(size); - gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, size*sizeof(float)); - gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, size*sizeof(float)); + Tensor<Eigen::half, 1> half_prec(size); + Tensor<Eigen::half, 1> full_prec(size); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, size*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, size*sizeof(Eigen::half)); gpu_device.synchronize(); for (int i = 0; i < size; ++i) { @@ -222,6 +286,61 @@ void test_cuda_reductions() { VERIFY_IS_APPROX(full_prec(i), half_prec(i)); } + redux_dim = {{1}}; + gpu_res_float.device(gpu_device) = gpu_float1.sum(redux_dim).cast<Eigen::half>(); + gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(redux_dim); + + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, size*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, size*sizeof(Eigen::half)); + gpu_device.synchronize(); + + for (int i = 0; i < size; ++i) { + std::cout << "Checking redux " << i << std::endl; + VERIFY_IS_APPROX(full_prec(i), half_prec(i)); + } + + gpu_device.deallocate(d_float1); + gpu_device.deallocate(d_float2); + gpu_device.deallocate(d_res_half); + gpu_device.deallocate(d_res_float); +} + + + +void test_cuda_full_reductions() { + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int size = 13; + int num_elem = size*size; + + float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half)); + Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half)); + + Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1( + d_float1, size, size); + Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2( + d_float2, size, size); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_half( + d_res_half); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_float( + d_res_float); + + gpu_float1.device(gpu_device) = gpu_float1.random(); + gpu_float2.device(gpu_device) = gpu_float2.random(); + + gpu_res_float.device(gpu_device) = gpu_float1.sum().cast<Eigen::half>(); + gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(); + + Tensor<Eigen::half, 0> half_prec; + Tensor<Eigen::half, 0> full_prec; + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::half)); + gpu_device.synchronize(); + + VERIFY_IS_APPROX(full_prec(), half_prec()); + gpu_device.deallocate(d_float1); gpu_device.deallocate(d_float2); gpu_device.deallocate(d_res_half); @@ -229,27 +348,58 @@ void test_cuda_reductions() { } +void test_cuda_forced_evals() { + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = 101; + + float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float( + d_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half( + d_res_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float( + d_res_float, num_elem); + + gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f); + gpu_res_float.device(gpu_device) = gpu_float.abs(); + gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>(); + + Tensor<float, 1> half_prec(num_elem); + Tensor<float, 1> full_prec(num_elem); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float)); + gpu_device.synchronize(); + + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking unary " << i << std::endl; + VERIFY_IS_APPROX(full_prec(i), half_prec(i)); + } + + gpu_device.deallocate(d_float); + gpu_device.deallocate(d_res_half); + gpu_device.deallocate(d_res_float); +} + #endif void test_cxx11_tensor_of_float16_cuda() { #ifdef EIGEN_HAS_CUDA_FP16 - Eigen::CudaStreamDevice stream; - Eigen::GpuDevice device(&stream); - if (device.majorDeviceVersion() > 5 || - (device.majorDeviceVersion() == 5 && device.minorDeviceVersion() >= 3)) { - std::cout << "Running test on device with capability " << device.majorDeviceVersion() << "." << device.minorDeviceVersion() << std::endl; - - CALL_SUBTEST_1(test_cuda_conversion()); - CALL_SUBTEST_1(test_cuda_unary()); - CALL_SUBTEST_1(test_cuda_elementwise()); - CALL_SUBTEST_2(test_cuda_contractions()); - CALL_SUBTEST_3(test_cuda_reductions()); - } - else { - std::cout << "Half floats require compute capability of at least 5.3. This device only supports " << device.majorDeviceVersion() << "." << device.minorDeviceVersion() << ". Skipping the test" << std::endl; - } + CALL_SUBTEST_1(test_cuda_conversion()); + CALL_SUBTEST_1(test_cuda_unary()); + CALL_SUBTEST_1(test_cuda_elementwise()); + CALL_SUBTEST_1(test_cuda_trancendental()); + CALL_SUBTEST_2(test_cuda_contractions()); + CALL_SUBTEST_3(test_cuda_reductions()); + CALL_SUBTEST_3(test_cuda_full_reductions()); + CALL_SUBTEST_4(test_cuda_forced_evals()); + #else std::cout << "Half floats are not supported by this version of cuda: skipping the test" << std::endl; #endif diff --git a/unsupported/test/cxx11_tensor_simple.cpp b/unsupported/test/cxx11_tensor_simple.cpp index 47d4d8636..fe860c970 100644 --- a/unsupported/test/cxx11_tensor_simple.cpp +++ b/unsupported/test/cxx11_tensor_simple.cpp @@ -195,7 +195,10 @@ static void test_3d() VERIFY_IS_EQUAL((epsilon(0,2,1)), -1); VERIFY_IS_EQUAL((epsilon(1,0,2)), -1); - array<Eigen::DenseIndex, 3> dims{{2,3,4}}; + array<Eigen::DenseIndex, 3> dims; + dims[0] = 2; + dims[1] = 3; + dims[2] = 4; Tensor<int, 3> t1(dims); Tensor<int, 3, RowMajor> t2(dims); diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index e46197464..423074a38 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -226,7 +226,7 @@ void test_multithread_contraction_agrees_with_singlethread() { for (ptrdiff_t i = 0; i < st_result.size(); i++) { // if both of the values are very small, then do nothing (because the test will fail // due to numerical precision issues when values are small) - if (fabs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4) { + if (fabs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) { VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]); } } @@ -234,6 +234,42 @@ void test_multithread_contraction_agrees_with_singlethread() { template<int DataLayout> +void test_full_contraction() { + int contract_size1 = internal::random<int>(1, 500); + int contract_size2 = internal::random<int>(1, 500); + + Tensor<float, 2, DataLayout> left(contract_size1, + contract_size2); + Tensor<float, 2, DataLayout> right(contract_size1, + contract_size2); + left.setRandom(); + right.setRandom(); + + // add constants to shift values away from 0 for more precision + left += left.constant(1.5f); + right += right.constant(1.5f); + + typedef Tensor<float, 2>::DimensionPair DimPair; + Eigen::array<DimPair, 2> dims({{DimPair(0, 0), DimPair(1, 1)}}); + + Eigen::ThreadPool tp(internal::random<int>(2, 11)); + Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11)); + + Tensor<float, 0, DataLayout> st_result; + st_result = left.contract(right, dims); + + Tensor<float, 0, DataLayout> tp_result; + tp_result.device(thread_pool_device) = left.contract(right, dims); + + VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions())); + // if both of the values are very small, then do nothing (because the test will fail + // due to numerical precision issues when values are small) + if (fabs(st_result() - tp_result()) >= 1e-4f) { + VERIFY_IS_APPROX(st_result(), tp_result()); + } +} + +template<int DataLayout> void test_multithreaded_reductions() { const int num_threads = internal::random<int>(3, 11); ThreadPool thread_pool(num_threads); @@ -324,6 +360,9 @@ void test_cxx11_tensor_thread_pool() CALL_SUBTEST_4(test_contraction_corner_cases<ColMajor>()); CALL_SUBTEST_4(test_contraction_corner_cases<RowMajor>()); + CALL_SUBTEST_4(test_full_contraction<ColMajor>()); + CALL_SUBTEST_4(test_full_contraction<RowMajor>()); + CALL_SUBTEST_5(test_multithreaded_reductions<ColMajor>()); CALL_SUBTEST_5(test_multithreaded_reductions<RowMajor>()); diff --git a/unsupported/test/levenberg_marquardt.cpp b/unsupported/test/levenberg_marquardt.cpp index a2bdb99e4..64f168c16 100644 --- a/unsupported/test/levenberg_marquardt.cpp +++ b/unsupported/test/levenberg_marquardt.cpp @@ -23,6 +23,9 @@ using std::sqrt; +// tolerance for chekcing number of iterations +#define LM_EVAL_COUNT_TOL 4/3 + struct lmder_functor : DenseFunctor<double> { lmder_functor(void): DenseFunctor<double>(3,15) {} @@ -631,7 +634,7 @@ void testNistLanczos1(void) VERIFY_IS_EQUAL(lm.nfev(), 79); VERIFY_IS_EQUAL(lm.njev(), 72); // check norm^2 -// VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.430899764097e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats + VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25); // check x VERIFY_IS_APPROX(x[0], 9.5100000027E-02); VERIFY_IS_APPROX(x[1], 1.0000000001E+00); @@ -652,7 +655,7 @@ void testNistLanczos1(void) VERIFY_IS_EQUAL(lm.nfev(), 9); VERIFY_IS_EQUAL(lm.njev(), 8); // check norm^2 -// VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.428595533845e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats + VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25); // check x VERIFY_IS_APPROX(x[0], 9.5100000027E-02); VERIFY_IS_APPROX(x[1], 1.0000000001E+00); @@ -789,7 +792,10 @@ void testNistMGH10(void) MGH10_functor functor; LevenbergMarquardt<MGH10_functor> lm(functor); info = lm.minimize(x); - VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall); + ++g_test_level; + VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall); + --g_test_level; + // was: VERIFY_IS_EQUAL(info, 1); // check norm^2 VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01); @@ -799,9 +805,13 @@ void testNistMGH10(void) VERIFY_IS_APPROX(x[2], 3.4522363462E+02); // check return value - //VERIFY_IS_EQUAL(info, 1); + + ++g_test_level; VERIFY_IS_EQUAL(lm.nfev(), 284 ); VERIFY_IS_EQUAL(lm.njev(), 249 ); + --g_test_level; + VERIFY(lm.nfev() < 284 * LM_EVAL_COUNT_TOL); + VERIFY(lm.njev() < 249 * LM_EVAL_COUNT_TOL); /* * Second try @@ -809,7 +819,10 @@ void testNistMGH10(void) x<< 0.02, 4000., 250.; // do the computation info = lm.minimize(x); + ++g_test_level; VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall); + // was: VERIFY_IS_EQUAL(info, 1); + --g_test_level; // check norm^2 VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01); @@ -819,9 +832,12 @@ void testNistMGH10(void) VERIFY_IS_APPROX(x[2], 3.4522363462E+02); // check return value - //VERIFY_IS_EQUAL(info, 1); + ++g_test_level; VERIFY_IS_EQUAL(lm.nfev(), 126); VERIFY_IS_EQUAL(lm.njev(), 116); + --g_test_level; + VERIFY(lm.nfev() < 126 * LM_EVAL_COUNT_TOL); + VERIFY(lm.njev() < 116 * LM_EVAL_COUNT_TOL); } @@ -896,8 +912,12 @@ void testNistBoxBOD(void) // check return value VERIFY_IS_EQUAL(info, 1); + ++g_test_level; VERIFY_IS_EQUAL(lm.nfev(), 16 ); VERIFY_IS_EQUAL(lm.njev(), 15 ); + --g_test_level; + VERIFY(lm.nfev() < 16 * LM_EVAL_COUNT_TOL); + VERIFY(lm.njev() < 15 * LM_EVAL_COUNT_TOL); // check norm^2 VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03); // check x diff --git a/unsupported/test/matrix_function.cpp b/unsupported/test/matrix_function.cpp index 9a995f941..cd24064ad 100644 --- a/unsupported/test/matrix_function.cpp +++ b/unsupported/test/matrix_function.cpp @@ -113,8 +113,8 @@ void testMatrixLogarithm(const MatrixType& A) MatrixType scaledA; RealScalar maxImagPartOfSpectrum = A.eigenvalues().imag().cwiseAbs().maxCoeff(); - if (maxImagPartOfSpectrum >= 0.9 * EIGEN_PI) - scaledA = A * 0.9 * EIGEN_PI / maxImagPartOfSpectrum; + if (maxImagPartOfSpectrum >= RealScalar(0.9 * EIGEN_PI)) + scaledA = A * RealScalar(0.9 * EIGEN_PI) / maxImagPartOfSpectrum; else scaledA = A; diff --git a/unsupported/test/matrix_power.cpp b/unsupported/test/matrix_power.cpp index 8e104ed1e..53911370f 100644 --- a/unsupported/test/matrix_power.cpp +++ b/unsupported/test/matrix_power.cpp @@ -24,7 +24,7 @@ void test2dRotation(double tol) s = std::sin(angle); B << c, s, -s, c; - C = Apow(std::ldexp(angle,1) / EIGEN_PI); + C = Apow(std::ldexp(angle,1) / T(EIGEN_PI)); std::cout << "test2dRotation: i = " << i << " error powerm = " << relerr(C,B) << '\n'; VERIFY(C.isApprox(B, tol)); } |