From ba32ded021b1cf6224ec7c0b5638af079076e99e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 15:11:09 -0700 Subject: Fixed include path --- unsupported/Eigen/CXX11/src/util/CXX11Meta.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/util/CXX11Meta.h b/unsupported/Eigen/CXX11/src/util/CXX11Meta.h index f479590b9..ec27eddb8 100644 --- a/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/util/CXX11Meta.h @@ -535,7 +535,7 @@ InstType instantiate_by_c_array(ArrType* arr) #else // Non C++11, fallback to emulation mode -#include "src/Core/util/EmulateCXX11Meta.h" +#include "EmulateCXX11Meta.h" #endif -- cgit v1.2.3 From c07404f6a1324f515841695666ca91af70f8b8a5 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 15:19:19 -0700 Subject: Restore Tensor support for non c++11 compilers --- unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h | 10 +++++----- unsupported/test/cxx11_tensor_argmax.cpp | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h index 4e8f86674..0f6dcedaa 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h @@ -34,25 +34,25 @@ class TensorOpCost { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int MulCost() { return internal::functor_traits< - internal::scalar_product_op>::Cost; + internal::scalar_product_op >::Cost; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int AddCost() { - return internal::functor_traits>::Cost; + return internal::functor_traits >::Cost; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int DivCost() { return internal::functor_traits< - internal::scalar_quotient_op>::Cost; + internal::scalar_quotient_op >::Cost; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int ModCost() { - return internal::functor_traits>::Cost; + return internal::functor_traits >::Cost; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int CastCost() { return internal::functor_traits< - internal::scalar_cast_op>::Cost; + internal::scalar_cast_op >::Cost; } TensorOpCost() : bytes_loaded_(0), bytes_stored_(0), compute_cycles_(0) {} diff --git a/unsupported/test/cxx11_tensor_argmax.cpp b/unsupported/test/cxx11_tensor_argmax.cpp index 482dfa7de..037767270 100644 --- a/unsupported/test/cxx11_tensor_argmax.cpp +++ b/unsupported/test/cxx11_tensor_argmax.cpp @@ -64,7 +64,7 @@ static void test_argmax_tuple_reducer() Tensor, 0, DataLayout> reduced; DimensionList dims; reduced = index_tuples.reduce( - dims, internal::ArgMaxTupleReducer>()); + dims, internal::ArgMaxTupleReducer >()); Tensor maxi = tensor.maximum(); @@ -74,7 +74,7 @@ static void test_argmax_tuple_reducer() for (int d = 0; d < 3; ++d) reduce_dims[d] = d; Tensor, 1, DataLayout> reduced_by_dims(7); reduced_by_dims = index_tuples.reduce( - reduce_dims, internal::ArgMaxTupleReducer>()); + reduce_dims, internal::ArgMaxTupleReducer >()); Tensor max_by_dims = tensor.maximum(reduce_dims); @@ -96,7 +96,7 @@ static void test_argmin_tuple_reducer() Tensor, 0, DataLayout> reduced; DimensionList dims; reduced = index_tuples.reduce( - dims, internal::ArgMinTupleReducer>()); + dims, internal::ArgMinTupleReducer >()); Tensor mini = tensor.minimum(); @@ -106,7 +106,7 @@ static void test_argmin_tuple_reducer() for (int d = 0; d < 3; ++d) reduce_dims[d] = d; Tensor, 1, DataLayout> reduced_by_dims(7); reduced_by_dims = index_tuples.reduce( - reduce_dims, internal::ArgMinTupleReducer>()); + reduce_dims, internal::ArgMinTupleReducer >()); Tensor min_by_dims = tensor.minimum(reduce_dims); -- cgit v1.2.3 From 46bcb70969829f243eaae0360b70859e68450dee Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 15:20:59 -0700 Subject: Don't turn on const expressions when compiling with gcc >= 4.8 unless the -std=c++11 option has been used --- Eigen/src/Core/util/Macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index de1265af3..69863d826 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -375,7 +375,7 @@ #define EIGEN_HAS_CONSTEXPR 1 #endif #elif __has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ - EIGEN_GNUC_AT_LEAST(4,8) + (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)) #define EIGEN_HAS_CONSTEXPR 1 #endif -- cgit v1.2.3 From 1131a984a67b3ed33333c3f253cd418a57c02f20 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 15:48:59 -0700 Subject: Made the cxx11_tensor_forced_eval compile without c++11. --- unsupported/test/cxx11_tensor_forced_eval.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/unsupported/test/cxx11_tensor_forced_eval.cpp b/unsupported/test/cxx11_tensor_forced_eval.cpp index ad9de867d..45d7345e9 100644 --- a/unsupported/test/cxx11_tensor_forced_eval.cpp +++ b/unsupported/test/cxx11_tensor_forced_eval.cpp @@ -22,14 +22,15 @@ static void test_simple() m1.setRandom(); m2.setRandom(); - TensorMap> mat1(m1.data(), 3,3); - TensorMap> mat2(m2.data(), 3,3); + TensorMap > mat1(m1.data(), 3,3); + TensorMap > mat2(m2.data(), 3,3); Tensor mat3(3,3); mat3 = mat1; typedef Tensor::DimensionPair DimPair; - Eigen::array dims({{DimPair(1, 0)}}); + Eigen::array dims; + dims[0] = DimPair(1, 0); mat3 = mat3.contract(mat2, dims).eval(); @@ -60,7 +61,7 @@ static void test_const() Eigen::array bcast; bcast[0] = 3; bcast[1] = 1; - const TensorMap> input_tensor(input.data(), 3, 3); + const TensorMap > input_tensor(input.data(), 3, 3); Tensor output_tensor= (input_tensor - input_tensor.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast)); for (int i = 0; i < 3; ++i) { -- cgit v1.2.3 From 4f53178e62f82b95c28ed67d507e6632515cfca6 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 16:09:54 -0700 Subject: Made a coupe of tensor tests compile without requiring c++11 support. --- unsupported/test/CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index b9e1b34bf..485e4e1e0 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -110,12 +110,17 @@ ei_add_test(minres) ei_add_test(levenberg_marquardt) ei_add_test(kronecker_product) +# TODO: The following tests are prefix with the cxx11 string, since historically +# they depended on c++11. This isn't the case anymore so we ought to rename them. +ei_add_test(cxx11_float16) +ei_add_test(cxx11_tensor_forced_eval) + + if(EIGEN_TEST_CXX11) # It should be safe to always run these tests as there is some fallback code for # older compiler that don't support cxx11. set(CMAKE_CXX_STANDARD 11) - ei_add_test(cxx11_float16) ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}") ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}") ei_add_test(cxx11_meta) @@ -130,7 +135,6 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_convolution) ei_add_test(cxx11_tensor_expr) ei_add_test(cxx11_tensor_math) - ei_add_test(cxx11_tensor_forced_eval) ei_add_test(cxx11_tensor_fixed_size) ei_add_test(cxx11_tensor_const) ei_add_test(cxx11_tensor_of_const_values) -- cgit v1.2.3 From a8c0405cf5eb0e8def89686fa616a40e5df2d602 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 16:34:43 -0700 Subject: Deleted unused default values for template parameters --- unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h index 392cb6e3d..36298cb60 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h @@ -233,7 +233,7 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper::half HalfPacket; - template + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { // whole method makes column major assumption @@ -276,7 +276,7 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper(data); } - template + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { // whole method makes column major assumption -- cgit v1.2.3 From 9d1dbd1ec08e8086e6eb5eca0bc7f4db555ac925 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 16:53:55 -0700 Subject: Fixed teh cxx11_tensor_empty test to compile without requiring cxx11 support --- unsupported/test/cxx11_tensor_empty.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/unsupported/test/cxx11_tensor_empty.cpp b/unsupported/test/cxx11_tensor_empty.cpp index 9130fff35..d7eea42d7 100644 --- a/unsupported/test/cxx11_tensor_empty.cpp +++ b/unsupported/test/cxx11_tensor_empty.cpp @@ -24,10 +24,10 @@ static void test_empty_tensor() static void test_empty_fixed_size_tensor() { - TensorFixedSize> source; - TensorFixedSize> tgt1 = source; - TensorFixedSize> tgt2(source); - TensorFixedSize> tgt3; + TensorFixedSize > source; + TensorFixedSize > tgt1 = source; + TensorFixedSize > tgt2(source); + TensorFixedSize > tgt3; tgt3 = tgt1; tgt3 = tgt2; } -- cgit v1.2.3 From c0882ef4d921068bc29dd2ca9acdd0edeac560c2 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 17:13:51 -0700 Subject: Moved a number of tensor tests that don't require cxx11 to work properly outside the EIGEN_TEST_CXX11 test section --- unsupported/test/CMakeLists.txt | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 485e4e1e0..d244132a7 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -110,11 +110,24 @@ ei_add_test(minres) ei_add_test(levenberg_marquardt) ei_add_test(kronecker_product) -# TODO: The following tests are prefix with the cxx11 string, since historically -# they depended on c++11. This isn't the case anymore so we ought to rename them. +# TODO: The following test names are prefixed with the cxx11 string, since historically +# the tests depended on c++11. This isn't the case anymore so we ought to rename them. ei_add_test(cxx11_float16) +ei_add_test(cxx11_tensor_dimension) ei_add_test(cxx11_tensor_forced_eval) - +ei_add_test(cxx11_tensor_math) +ei_add_test(cxx11_tensor_const) +ei_add_test(cxx11_tensor_intdiv) +ei_add_test(cxx11_tensor_casts) +ei_add_test(cxx11_tensor_empty) +ei_add_test(cxx11_tensor_sugar) +ei_add_test(cxx11_tensor_roundings) +ei_add_test(cxx11_tensor_layout_swap) +ei_add_test(cxx11_tensor_io) +if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + # This test requires __uint128_t which is only available on 64bit systems + ei_add_test(cxx11_tensor_uint128) +endif() if(EIGEN_TEST_CXX11) # It should be safe to always run these tests as there is some fallback code for @@ -127,20 +140,16 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_simple) # ei_add_test(cxx11_tensor_symmetry) ei_add_test(cxx11_tensor_assign) - ei_add_test(cxx11_tensor_dimension) ei_add_test(cxx11_tensor_index_list) ei_add_test(cxx11_tensor_mixed_indices) ei_add_test(cxx11_tensor_comparisons) ei_add_test(cxx11_tensor_contraction) ei_add_test(cxx11_tensor_convolution) ei_add_test(cxx11_tensor_expr) - ei_add_test(cxx11_tensor_math) ei_add_test(cxx11_tensor_fixed_size) - ei_add_test(cxx11_tensor_const) ei_add_test(cxx11_tensor_of_const_values) ei_add_test(cxx11_tensor_of_complex) ei_add_test(cxx11_tensor_of_strings) - ei_add_test(cxx11_tensor_intdiv) ei_add_test(cxx11_tensor_lvalue) ei_add_test(cxx11_tensor_map) ei_add_test(cxx11_tensor_broadcasting) @@ -160,23 +169,11 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}") ei_add_test(cxx11_tensor_ref) ei_add_test(cxx11_tensor_random) - ei_add_test(cxx11_tensor_casts) - ei_add_test(cxx11_tensor_roundings) - ei_add_test(cxx11_tensor_reverse) - ei_add_test(cxx11_tensor_layout_swap) - ei_add_test(cxx11_tensor_io) ei_add_test(cxx11_tensor_generator) ei_add_test(cxx11_tensor_custom_op) ei_add_test(cxx11_tensor_custom_index) - ei_add_test(cxx11_tensor_sugar) ei_add_test(cxx11_tensor_fft) ei_add_test(cxx11_tensor_ifft) - ei_add_test(cxx11_tensor_empty) - - if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") - # This test requires __uint128_t which is only available on 64bit systems - ei_add_test(cxx11_tensor_uint128) - endif() endif() -- cgit v1.2.3 From d14105f158a731fb3b02650dde4df58935abd71e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 17:22:37 -0700 Subject: Made several tensor tests compatible with cxx03 --- unsupported/test/CMakeLists.txt | 6 ++--- unsupported/test/cxx11_tensor_fixed_size.cpp | 6 ++--- unsupported/test/cxx11_tensor_map.cpp | 36 +++++++++++++++------------- unsupported/test/cxx11_tensor_simple.cpp | 5 +++- 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index d244132a7..22442b394 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -114,6 +114,9 @@ ei_add_test(kronecker_product) # the tests depended on c++11. This isn't the case anymore so we ought to rename them. ei_add_test(cxx11_float16) ei_add_test(cxx11_tensor_dimension) +ei_add_test(cxx11_tensor_map) +ei_add_test(cxx11_tensor_assign) +ei_add_test(cxx11_tensor_comparisons) ei_add_test(cxx11_tensor_forced_eval) ei_add_test(cxx11_tensor_math) ei_add_test(cxx11_tensor_const) @@ -139,10 +142,8 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_meta) ei_add_test(cxx11_tensor_simple) # ei_add_test(cxx11_tensor_symmetry) - ei_add_test(cxx11_tensor_assign) ei_add_test(cxx11_tensor_index_list) ei_add_test(cxx11_tensor_mixed_indices) - ei_add_test(cxx11_tensor_comparisons) ei_add_test(cxx11_tensor_contraction) ei_add_test(cxx11_tensor_convolution) ei_add_test(cxx11_tensor_expr) @@ -151,7 +152,6 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_of_complex) ei_add_test(cxx11_tensor_of_strings) ei_add_test(cxx11_tensor_lvalue) - ei_add_test(cxx11_tensor_map) ei_add_test(cxx11_tensor_broadcasting) ei_add_test(cxx11_tensor_chipping) ei_add_test(cxx11_tensor_concatenation) diff --git a/unsupported/test/cxx11_tensor_fixed_size.cpp b/unsupported/test/cxx11_tensor_fixed_size.cpp index 5fe164859..46d741b05 100644 --- a/unsupported/test/cxx11_tensor_fixed_size.cpp +++ b/unsupported/test/cxx11_tensor_fixed_size.cpp @@ -130,9 +130,9 @@ static void test_tensor_map() static void test_2d() { float data1[6]; - TensorMap >> mat1(data1,2,3); + TensorMap > > mat1(data1,2,3); float data2[6]; - TensorMap, RowMajor>> mat2(data2,2,3); + TensorMap, RowMajor> > mat2(data2,2,3); VERIFY_IS_EQUAL((mat1.size()), 2*3); VERIFY_IS_EQUAL(mat1.rank(), 2); @@ -153,7 +153,7 @@ static void test_2d() mat2(1,1) = -4.0; mat2(1,2) = -5.0; - TensorFixedSize> mat3; + TensorFixedSize > mat3; TensorFixedSize, RowMajor> mat4; mat3 = mat1.abs(); mat4 = mat2.abs(); diff --git a/unsupported/test/cxx11_tensor_map.cpp b/unsupported/test/cxx11_tensor_map.cpp index a8a095e38..3db0ee7c0 100644 --- a/unsupported/test/cxx11_tensor_map.cpp +++ b/unsupported/test/cxx11_tensor_map.cpp @@ -19,8 +19,8 @@ static void test_0d() Tensor scalar1; Tensor scalar2; - TensorMap> scalar3(scalar1.data()); - TensorMap> scalar4(scalar2.data()); + TensorMap > scalar3(scalar1.data()); + TensorMap > scalar4(scalar2.data()); scalar1() = 7; scalar2() = 13; @@ -37,8 +37,8 @@ static void test_1d() Tensor vec1(6); Tensor vec2(6); - TensorMap> vec3(vec1.data(), 6); - TensorMap> vec4(vec2.data(), 6); + TensorMap > vec3(vec1.data(), 6); + TensorMap > vec4(vec2.data(), 6); vec1(0) = 4; vec2(0) = 0; vec1(1) = 8; vec2(1) = 1; @@ -85,8 +85,8 @@ static void test_2d() mat2(1,1) = 4; mat2(1,2) = 5; - TensorMap> mat3(mat1.data(), 2, 3); - TensorMap> mat4(mat2.data(), 2, 3); + TensorMap > mat3(mat1.data(), 2, 3); + TensorMap > mat4(mat2.data(), 2, 3); VERIFY_IS_EQUAL(mat3.rank(), 2); VERIFY_IS_EQUAL(mat3.size(), 6); @@ -129,8 +129,8 @@ static void test_3d() } } - TensorMap> mat3(mat1.data(), 2, 3, 7); - TensorMap> mat4(mat2.data(), array{{2, 3, 7}}); + TensorMap > mat3(mat1.data(), 2, 3, 7); + TensorMap > mat4(mat2.data(), 2, 3, 7); VERIFY_IS_EQUAL(mat3.rank(), 3); VERIFY_IS_EQUAL(mat3.size(), 2*3*7); @@ -173,8 +173,8 @@ static void test_from_tensor() } } - TensorMap> mat3(mat1); - TensorMap> mat4(mat2); + TensorMap > mat3(mat1); + TensorMap > mat4(mat2); VERIFY_IS_EQUAL(mat3.rank(), 3); VERIFY_IS_EQUAL(mat3.size(), 2*3*7); @@ -199,19 +199,23 @@ static void test_from_tensor() } } - TensorFixedSize> mat5; + TensorFixedSize > mat5; val = 0; for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { - mat5(i,j,k) = val; + array coords; + coords[0] = i; + coords[1] = j; + coords[2] = k; + mat5(coords) = val; val++; } } } - TensorMap>> mat6(mat5); + TensorMap > > mat6(mat5); VERIFY_IS_EQUAL(mat6.rank(), 3); VERIFY_IS_EQUAL(mat6.size(), 2*3*7); @@ -233,8 +237,8 @@ static void test_from_tensor() static int f(const TensorMap >& tensor) { // Size<0> empty; - EIGEN_STATIC_ASSERT((internal::array_size>::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((internal::array_size>::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::array_size >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::array_size >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); Tensor result = tensor.sum(); return result(); } @@ -253,7 +257,7 @@ static void test_casting() } } - TensorMap> map(tensor); + TensorMap > map(tensor); int sum1 = f(map); int sum2 = f(tensor); diff --git a/unsupported/test/cxx11_tensor_simple.cpp b/unsupported/test/cxx11_tensor_simple.cpp index 47d4d8636..fe860c970 100644 --- a/unsupported/test/cxx11_tensor_simple.cpp +++ b/unsupported/test/cxx11_tensor_simple.cpp @@ -195,7 +195,10 @@ static void test_3d() VERIFY_IS_EQUAL((epsilon(0,2,1)), -1); VERIFY_IS_EQUAL((epsilon(1,0,2)), -1); - array dims{{2,3,4}}; + array dims; + dims[0] = 2; + dims[1] = 3; + dims[2] = 4; Tensor t1(dims); Tensor t2(dims); -- cgit v1.2.3 From f100d1494ced3fdfb2c6a364596fc251a9e6cecc Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 18:14:33 -0700 Subject: Return the proper size (ie 1) for tensors of rank 0 --- unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 977dcafb0..f0b8ac958 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -275,7 +275,7 @@ struct DSizes : array { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const { - return internal::array_prod(*static_cast(this)); + return (NumDims == 0) ? 1 : internal::array_prod(*static_cast(this)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DSizes() { -- cgit v1.2.3 From d2172178425ef3963a9b9696d5071b808cc33902 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 18:15:34 -0700 Subject: Added a few tests to ensure that the dimensions of rank 0 tensors are correctly computed --- unsupported/test/cxx11_tensor_dimension.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/unsupported/test/cxx11_tensor_dimension.cpp b/unsupported/test/cxx11_tensor_dimension.cpp index ce78efe52..421e73693 100644 --- a/unsupported/test/cxx11_tensor_dimension.cpp +++ b/unsupported/test/cxx11_tensor_dimension.cpp @@ -37,7 +37,6 @@ static void test_fixed_size() VERIFY_IS_EQUAL(dimensions.TotalSize(), 2*3*7); } - static void test_match() { Eigen::DSizes dyn(2,3,7); @@ -49,10 +48,22 @@ static void test_match() VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn1, dyn2), false); } +static void test_rank_zero() +{ + Eigen::Sizes<> scalar; + VERIFY_IS_EQUAL(scalar.TotalSize(), 1); + VERIFY_IS_EQUAL(scalar.rank(), 0); + VERIFY_IS_EQUAL(internal::array_prod(scalar), 1); + + Eigen::DSizes dscalar; + VERIFY_IS_EQUAL(dscalar.TotalSize(), 1); + VERIFY_IS_EQUAL(dscalar.rank(), 0); +} void test_cxx11_tensor_dimension() { CALL_SUBTEST(test_dynamic_size()); CALL_SUBTEST(test_fixed_size()); CALL_SUBTEST(test_match()); + CALL_SUBTEST(test_rank_zero()); } -- cgit v1.2.3 From 2b890ae618bb440a0d2826d204ba9ab6b22fbcfa Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 18:30:40 -0700 Subject: Fixed compilation errors generated by clang --- Eigen/src/Core/arch/CUDA/Half.h | 4 +++- unsupported/test/cxx11_float16.cpp | 36 ++++++++++++++++++------------------ 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 6387f2870..c2a61f9ce 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -46,6 +46,8 @@ // Make our own __half definition that is similar to CUDA's. struct __half { + __half() {} + explicit __half(unsigned short raw) : x(raw) {} unsigned short x; }; @@ -292,7 +294,7 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) const FP32 f16max = { (127 + 16) << 23 }; const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 }; unsigned int sign_mask = 0x80000000u; - __half o = { 0 }; + __half o(0); unsigned int sign = f.u & sign_mask; f.u ^= sign; diff --git a/unsupported/test/cxx11_float16.cpp b/unsupported/test/cxx11_float16.cpp index 273dcbc11..9a813653c 100644 --- a/unsupported/test/cxx11_float16.cpp +++ b/unsupported/test/cxx11_float16.cpp @@ -31,9 +31,9 @@ void test_conversion() VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002); // Verify round-to-nearest-even behavior. - float val1 = float(half(__half{0x3c00})); - float val2 = float(half(__half{0x3c01})); - float val3 = float(half(__half{0x3c02})); + float val1 = float(half(__half(0x3c00))); + float val2 = float(half(__half(0x3c01))); + float val3 = float(half(__half(0x3c02))); VERIFY_IS_EQUAL(half(0.5 * (val1 + val2)).x, 0x3c00); VERIFY_IS_EQUAL(half(0.5 * (val2 + val3)).x, 0x3c02); @@ -49,21 +49,21 @@ void test_conversion() VERIFY_IS_EQUAL(half(true).x, 0x3c00); // Conversion to float. - VERIFY_IS_EQUAL(float(half(__half{0x0000})), 0.0f); - VERIFY_IS_EQUAL(float(half(__half{0x3c00})), 1.0f); + VERIFY_IS_EQUAL(float(half(__half(0x0000))), 0.0f); + VERIFY_IS_EQUAL(float(half(__half(0x3c00))), 1.0f); // Denormals. - VERIFY_IS_APPROX(float(half(__half{0x8001})), -5.96046e-08f); - VERIFY_IS_APPROX(float(half(__half{0x0001})), 5.96046e-08f); - VERIFY_IS_APPROX(float(half(__half{0x0002})), 1.19209e-07f); + VERIFY_IS_APPROX(float(half(__half(0x8001))), -5.96046e-08f); + VERIFY_IS_APPROX(float(half(__half(0x0001))), 5.96046e-08f); + VERIFY_IS_APPROX(float(half(__half(0x0002))), 1.19209e-07f); // NaNs and infinities. VERIFY(!(numext::isinf)(float(half(65504.0f)))); // Largest finite number. VERIFY(!(numext::isnan)(float(half(0.0f)))); - VERIFY((numext::isinf)(float(half(__half{0xfc00})))); - VERIFY((numext::isnan)(float(half(__half{0xfc01})))); - VERIFY((numext::isinf)(float(half(__half{0x7c00})))); - VERIFY((numext::isnan)(float(half(__half{0x7c01})))); + VERIFY((numext::isinf)(float(half(__half(0xfc00))))); + VERIFY((numext::isnan)(float(half(__half(0xfc01))))); + VERIFY((numext::isinf)(float(half(__half(0x7c00))))); + VERIFY((numext::isnan)(float(half(__half(0x7c01))))); #if !EIGEN_COMP_MSVC // Visual Studio errors out on divisions by 0 @@ -73,12 +73,12 @@ void test_conversion() #endif // Exactly same checks as above, just directly on the half representation. - VERIFY(!(numext::isinf)(half(__half{0x7bff}))); - VERIFY(!(numext::isnan)(half(__half{0x0000}))); - VERIFY((numext::isinf)(half(__half{0xfc00}))); - VERIFY((numext::isnan)(half(__half{0xfc01}))); - VERIFY((numext::isinf)(half(__half{0x7c00}))); - VERIFY((numext::isnan)(half(__half{0x7c01}))); + VERIFY(!(numext::isinf)(half(__half(0x7bff)))); + VERIFY(!(numext::isnan)(half(__half(0x0000)))); + VERIFY((numext::isinf)(half(__half(0xfc00)))); + VERIFY((numext::isnan)(half(__half(0xfc01)))); + VERIFY((numext::isinf)(half(__half(0x7c00)))); + VERIFY((numext::isnan)(half(__half(0x7c01)))); #if !EIGEN_COMP_MSVC // Visual Studio errors out on divisions by 0 -- cgit v1.2.3 From 44f592dcebffaf741ce40050a8efc0e1911e56b8 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 18:33:46 -0700 Subject: Deleted unnecessary trailing commas. --- unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h index 2f06f8442..b7597b3a5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h @@ -40,7 +40,7 @@ class compute_tensor_flags }; public: - enum { ret = packet_access_bit}; + enum { ret = packet_access_bit }; }; @@ -54,7 +54,7 @@ struct traits > static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; enum { Options = Options_, - Flags = compute_tensor_flags::ret | (is_const::value ? 0 : LvalueBit), + Flags = compute_tensor_flags::ret | (is_const::value ? 0 : LvalueBit) }; }; @@ -69,7 +69,7 @@ struct traits > static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; enum { Options = Options_, - Flags = compute_tensor_flags::ret | (is_const::value ? 0: LvalueBit), + Flags = compute_tensor_flags::ret | (is_const::value ? 0: LvalueBit) }; }; @@ -86,7 +86,7 @@ struct traits > static const int Layout = BaseTraits::Layout; enum { Options = Options_, - Flags = BaseTraits::Flags, + Flags = BaseTraits::Flags }; }; @@ -102,7 +102,7 @@ struct traits > static const int Layout = BaseTraits::Layout; enum { Options = BaseTraits::Options, - Flags = BaseTraits::Flags, + Flags = BaseTraits::Flags }; }; @@ -253,7 +253,7 @@ struct nested > // Pc=0. typedef enum { PADDING_VALID = 1, - PADDING_SAME = 2, + PADDING_SAME = 2 } PaddingType; } // end namespace Eigen -- cgit v1.2.3 From e5f71aa6b232f08748af17cd85e63da9759fda1e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 18:36:10 -0700 Subject: Deleted useless trailing commas --- unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h | 4 ++-- unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index 5abff0800..cb615c75b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -36,7 +36,7 @@ struct traits > static const int Layout = internal::traits::Layout; enum { - Flags = 0, + Flags = 0 }; }; @@ -100,7 +100,7 @@ struct TensorEvaluator, Device> IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess, Layout = TensorEvaluator::Layout, - RawAccess = TensorEvaluator::RawAccess, + RawAccess = TensorEvaluator::RawAccess }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 97182258d..8b8a1d2c3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -41,7 +41,7 @@ struct traits > static const int Layout = traits::Layout; enum { - Flags = 0, + Flags = 0 }; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h index 36298cb60..b27e1a1b4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h @@ -16,7 +16,7 @@ namespace internal { enum { Rhs = 0, - Lhs = 1, + Lhs = 1 }; /* diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h index 49d849e23..8491c4ca2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h @@ -40,7 +40,7 @@ struct traits > static const int Layout = XprTraits::Layout; enum { - Flags = 0, + Flags = 0 }; }; @@ -163,7 +163,7 @@ struct traits > static const int Layout = XprTraits::Layout; enum { - Flags = 0, + Flags = 0 }; }; -- cgit v1.2.3 From 17fe7f354e679110f5a62e67c2883f1f9378115d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 18:39:01 -0700 Subject: Deleted trailing commas --- unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 8b8a1d2c3..6f113b903 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -588,7 +588,7 @@ struct TensorEvaluator::type PacketReturnType; enum { - Layout = TensorEvaluator::Layout, + Layout = TensorEvaluator::Layout }; // Most of the code is assuming that both input tensors are ColMajor. If the diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index ff3c5662d..091007ab7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -233,7 +233,7 @@ struct traits > static const int Layout = traits::Layout; enum { - Flags = 0, + Flags = 0 }; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index 5c6748a43..c556fec0f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -34,7 +34,7 @@ struct traits > static const int Layout = XprTraits::Layout; enum { - Flags = 0, + Flags = 0 }; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 1ce53ad69..7ec757519 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -34,7 +34,7 @@ struct traits > static const int Layout = XprTraits::Layout; enum { - Flags = 0, + Flags = 0 }; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h index 63a8476ef..cd0109ef4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h @@ -181,7 +181,7 @@ template IsAligned = TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess, Layout = (static_cast(TensorEvaluator::Layout) == static_cast(ColMajor)) ? RowMajor : ColMajor, - CoordAccess = false, // to be implemented + CoordAccess = false // to be implemented }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) -- cgit v1.2.3 From d6c9596fd8693b29b94fed984824480f285336ea Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Apr 2016 18:51:33 -0700 Subject: Added missing accessors to fixed sized tensors --- .../Eigen/CXX11/src/Tensor/TensorFixedSize.h | 92 +++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h index 9c0ed43b7..b27ee0084 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -128,7 +128,6 @@ class TensorFixedSize : public TensorBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const @@ -137,8 +136,54 @@ class TensorFixedSize : public TensorBaseoperator()(array{{firstIndex, otherIndices...}}); } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const + { + if (Options&RowMajor) { + const Index index = i1 + i0 * m_storage.dimensions()[1]; + return m_storage.data()[index]; + } else { + const Index index = i0 + i1 * m_storage.dimensions()[0]; + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const + { + if (Options&RowMajor) { + const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const + { + if (Options&RowMajor) { + const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3)); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const + { + if (Options&RowMajor) { + const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0))); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4))); + return m_storage.data()[index]; + } + } #endif + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array& indices) const { @@ -176,6 +221,51 @@ class TensorFixedSize : public TensorBase{{firstIndex, otherIndices...}}); } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) + { + if (Options&RowMajor) { + const Index index = i1 + i0 * m_storage.dimensions()[1]; + return m_storage.data()[index]; + } else { + const Index index = i0 + i1 * m_storage.dimensions()[0]; + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) + { + if (Options&RowMajor) { + const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) + { + if (Options&RowMajor) { + const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3)); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) + { + if (Options&RowMajor) { + const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0))); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4))); + return m_storage.data()[index]; + } + } #endif EIGEN_DEVICE_FUNC -- cgit v1.2.3 From b1bd53aa6bec39c53de475c90987eece86c206d2 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sun, 1 May 2016 23:25:06 +0200 Subject: Fix performance regression: with AVX, unaligned stores were emitted instead of aligned ones for fixed size assignement. --- Eigen/src/Core/AssignEvaluator.h | 15 +++++++++++---- Eigen/src/Core/CoreEvaluators.h | 4 ++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 9d4b315a0..b1193e421 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -256,12 +256,13 @@ struct copy_using_evaluator_innervec_CompleteUnrolling enum { outer = Index / DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime, - JointAlignment = Kernel::AssignmentTraits::JointAlignment + JointAlignment = Kernel::AssignmentTraits::JointAlignment, + DefaultAlignment = unpacket_traits::alignment }; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { - kernel.template assignPacketByOuterInner(outer, inner); + kernel.template assignPacketByOuterInner(outer, inner); enum { NextIndex = Index + unpacket_traits::size }; copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); } @@ -277,9 +278,12 @@ template struct copy_using_evaluator_innervec_InnerUnrolling { typedef typename Kernel::PacketType PacketType; + enum { + DefaultAlignment = unpacket_traits::alignment + }; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) { - kernel.template assignPacketByOuterInner(outer, Index_); + kernel.template assignPacketByOuterInner(outer, Index_); enum { NextIndex = Index_ + unpacket_traits::size }; copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); } @@ -433,6 +437,9 @@ template struct dense_assignment_loop { typedef typename Kernel::PacketType PacketType; + enum { + DefaultAlignment = unpacket_traits::alignment + }; EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) { const Index innerSize = kernel.innerSize(); @@ -440,7 +447,7 @@ struct dense_assignment_loop const Index packetSize = unpacket_traits::size; for(Index outer = 0; outer < outerSize; ++outer) for(Index inner = 0; inner < innerSize; inner+=packetSize) - kernel.template assignPacketByOuterInner(outer, inner); + kernel.template assignPacketByOuterInner(outer, inner); } }; diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 388805f0d..932178f53 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -850,14 +850,14 @@ struct unary_evaluator, IndexBa template EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) - { + { return m_argImpl.template writePacket(m_startRow.value() + row, m_startCol.value() + col, x); } template EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) - { + { return writePacket(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0, x); -- cgit v1.2.3 From 8a9228ed9b4d38bbd8630474f6779a4d3847b153 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sun, 1 May 2016 14:48:01 -0700 Subject: Fixed compilation error --- .../Eigen/CXX11/src/Tensor/TensorContractionCuda.h | 58 +++++++++++----------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h index dbff660a9..6a3ef14ef 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h @@ -543,12 +543,12 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh #define prefetch_lhs(reg, row, col) \ if (!CHECK_LHS_BOUNDARY) { \ if (col < k_size) { \ - reg =lhs.loadPacket(row, col); \ + reg =lhs.loadPacket(row, col); \ } \ } else { \ if (col < k_size) { \ if (row + 3 < m_size) { \ - reg =lhs.loadPacket(row, col); \ + reg =lhs.loadPacket(row, col); \ } else if (row + 2 < m_size) { \ reg.x =lhs(row + 0, col); \ reg.y =lhs(row + 1, col); \ @@ -578,7 +578,7 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh if (!CHECK_RHS_BOUNDARY) { if ((rhs_vert + 3) < k_size) { // just CHECK_RHS_BOUNDARY - rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); + rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); } else if (rhs_vert + 2 < k_size) { // just CHECK_RHS_BOUNDARY rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); @@ -593,7 +593,7 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh } else { if (rhs_horiz0 < n_size) { if ((rhs_vert + 3) < k_size) { - rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); + rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); } else if ((rhs_vert + 2) < k_size) { rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); @@ -790,37 +790,37 @@ EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, if (!CHECK_LHS_BOUNDARY) { if ((threadIdx.y/4+k+24) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); - lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); - lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24)); + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24)); } else if ((threadIdx.y/4+k+16) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); - lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); } else if ((threadIdx.y/4+k+8) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); } else if ((threadIdx.y/4+k) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); } } else { // just CHECK_LHS_BOUNDARY if (lhs_vert + 3 < m_size) { if ((threadIdx.y/4+k+24) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); - lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); - lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24)); + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24)); } else if ((threadIdx.y/4+k+16) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); - lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); } else if ((threadIdx.y/4+k+8) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); } else if ((threadIdx.y/4+k) < k_size) { - lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); } } else if (lhs_vert + 2 < m_size) { if ((threadIdx.y/4+k+24) < k_size) { @@ -909,8 +909,8 @@ EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, if (!CHECK_RHS_BOUNDARY) { if ((rhs_vert + 3) < k_size) { // just CHECK_RHS_BOUNDARY - rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); - rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1); + rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); + rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1); } else if (rhs_vert + 2 < k_size) { // just CHECK_RHS_BOUNDARY rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); @@ -932,8 +932,8 @@ EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, if (rhs_horiz1 < n_size) { if ((rhs_vert + 3) < k_size) { // just CHECK_RHS_BOUNDARY - rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); - rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1); + rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); + rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1); } else if (rhs_vert + 2 < k_size) { // just CHECK_RHS_BOUNDARY rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); @@ -954,7 +954,7 @@ EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, } else if (rhs_horiz0 < n_size) { if ((rhs_vert + 3) < k_size) { // just CHECK_RHS_BOUNDARY - rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); + rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); } else if ((rhs_vert + 2) < k_size) { // just CHECK_RHS_BOUNDARY rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); -- cgit v1.2.3 From da50419df8787d5de914092f604b643806862eeb Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 2 May 2016 19:50:22 -0700 Subject: Made a cast explicit --- Eigen/src/Core/arch/CUDA/Half.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index c2a61f9ce..f6dfaff53 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -294,7 +294,7 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) const FP32 f16max = { (127 + 16) << 23 }; const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 }; unsigned int sign_mask = 0x80000000u; - __half o(0); + __half o(static_cast(0x0u)); unsigned int sign = f.u & sign_mask; f.u ^= sign; -- cgit v1.2.3 From aad9a04da4e58b16010268d58ef92f4a1141fbf4 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 3 May 2016 09:37:19 -0700 Subject: Deleted superfluous explicit keyword. --- unsupported/Eigen/CXX11/src/util/MaxSizeVector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h b/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h index 551124bae..961456f10 100644 --- a/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +++ b/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h @@ -41,7 +41,7 @@ class MaxSizeVector { // Construct a new MaxSizeVector, reserve and resize to n. // Copy the init value to all elements. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - explicit MaxSizeVector(size_t n, const T& init) + MaxSizeVector(size_t n, const T& init) : reserve_(n), size_(n), data_(static_cast(internal::aligned_malloc(n * sizeof(T)))) { for (size_t i = 0; i < n; ++i) { new (&data_[i]) T(init); } -- cgit v1.2.3 From 6c3e5b85bc543ba428725479c0e55345f1a02461 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 3 May 2016 09:38:42 -0700 Subject: Fixed compilation error with cuda >= 7.5 --- Eigen/src/Core/arch/CUDA/Half.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index f6dfaff53..060c2c805 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -294,7 +294,8 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) const FP32 f16max = { (127 + 16) << 23 }; const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 }; unsigned int sign_mask = 0x80000000u; - __half o(static_cast(0x0u)); + __half o; + o.x = static_cast(0x0u); unsigned int sign = f.u & sign_mask; f.u ^= sign; -- cgit v1.2.3 From 2c5568a757e75b1e8dd6b8754ea3d13a95be96ce Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 3 May 2016 12:06:07 -0700 Subject: Added a test to validate the computation of exp and log on 16bit floats --- unsupported/test/cxx11_tensor_of_float16_cuda.cu | 63 ++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index 154a72d5c..37fe3e9a4 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -134,6 +134,68 @@ void test_cuda_elementwise() { gpu_device.deallocate(d_res_float); } +void test_cuda_trancendental() { + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = 101; + + float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res1_half = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res1_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res2_half = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res2_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + + Eigen::TensorMap, Eigen::Aligned> gpu_float1( + d_float1, num_elem); + Eigen::TensorMap, Eigen::Aligned> gpu_float2( + d_float2, num_elem); + Eigen::TensorMap, Eigen::Aligned> gpu_res1_half( + d_res1_half, num_elem); + Eigen::TensorMap, Eigen::Aligned> gpu_res1_float( + d_res1_float, num_elem); + Eigen::TensorMap, Eigen::Aligned> gpu_res2_half( + d_res2_half, num_elem); + Eigen::TensorMap, Eigen::Aligned> gpu_res2_float( + d_res2_float, num_elem); + + gpu_float1.device(gpu_device) = gpu_float1.random(); + gpu_float2.device(gpu_device) = gpu_float2.random(); + gpu_res1_float.device(gpu_device) = gpu_float1.exp(); + gpu_res2_float.device(gpu_device) = gpu_float2.log(); + gpu_res1_half.device(gpu_device) = gpu_float1.cast().exp().cast(); + gpu_res2_half.device(gpu_device) = gpu_float2.cast().log().cast(); + + Tensor input1(num_elem); + Tensor half_prec1(num_elem); + Tensor full_prec1(num_elem); + Tensor input2(num_elem); + Tensor half_prec2(num_elem); + Tensor full_prec2(num_elem); + gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res1_half, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res2_half, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(float)); + gpu_device.synchronize(); + + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking elemwise exp " << i << " input = " << input1(i) << " full = " << full_prec1(i) << " half = " << half_prec1(i) << std::endl; + VERIFY_IS_APPROX(full_prec1(i), half_prec1(i)); + } + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl; + VERIFY_IS_APPROX(full_prec2(i), half_prec2(i)); + } + gpu_device.deallocate(d_float1); + gpu_device.deallocate(d_float2); + gpu_device.deallocate(d_res1_half); + gpu_device.deallocate(d_res1_float); + gpu_device.deallocate(d_res2_half); + gpu_device.deallocate(d_res2_float); +} + void test_cuda_contractions() { Eigen::CudaStreamDevice stream; @@ -280,6 +342,7 @@ void test_cxx11_tensor_of_float16_cuda() CALL_SUBTEST_1(test_cuda_conversion()); CALL_SUBTEST_1(test_cuda_unary()); CALL_SUBTEST_1(test_cuda_elementwise()); + CALL_SUBTEST_1(test_cuda_trancendental()); CALL_SUBTEST_2(test_cuda_contractions()); CALL_SUBTEST_3(test_cuda_reductions()); CALL_SUBTEST_4(test_cuda_forced_evals()); -- cgit v1.2.3