diff options
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h | 133 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 8 | ||||
-rw-r--r-- | unsupported/test/CMakeLists.txt | 2 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_fixed_size.cpp | 2 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_shuffling.cpp | 9 |
5 files changed, 141 insertions, 13 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 11590b474..732c6b344 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -37,8 +37,7 @@ template <typename Index> struct IndexPair { Index second; }; - -// Boiler plate code +// Boilerplate code namespace internal { template<std::size_t n, typename Dimension> struct dget { @@ -110,6 +109,11 @@ struct Sizes : internal::numeric_list<std::size_t, Indices...> { } }; +template <typename std::size_t... Indices> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<Indices...>&) { + return Sizes<Indices...>::total_size; +} + #else template <std::size_t n> @@ -136,9 +140,21 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0 // todo: add assertion } #ifdef EIGEN_HAS_VARIADIC_TEMPLATES + template <typename... DenseIndex> Sizes(DenseIndex... indices) { } explicit Sizes(std::initializer_list<std::size_t> l) { // todo: add assertion } +#else + EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0) { + } + EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1) { + } + EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) { + } + EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) { + } + EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) { + } #endif template <typename T> Sizes& operator = (const T& other) { @@ -156,9 +172,14 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0 } }; +template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) { + return Sizes<V1, V2, V3, V4, V5>::total_size; +}; + #endif -// Boiler plate +// Boilerplate namespace internal { template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> struct tensor_index_linearization_helper @@ -243,6 +264,112 @@ struct DSizes : array<DenseIndex, NumDims> { }; + + +// Boilerplate +namespace internal { +template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> +struct tensor_vsize_index_linearization_helper +{ + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index run(array<Index, NumIndices> const& indices, std::vector<DenseIndex> const& dimensions) + { + return array_get<RowMajor ? n : (NumIndices - n - 1)>(indices) + + array_get<RowMajor ? n : (NumIndices - n - 1)>(dimensions) * + tensor_vsize_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions); + } +}; + +template<typename Index, std::size_t NumIndices, bool RowMajor> +struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor> +{ + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index run(array<Index, NumIndices> const& indices, std::vector<DenseIndex> const&) + { + return array_get<RowMajor ? 0 : NumIndices - 1>(indices); + } +}; +} // end namespace internal + +template <typename DenseIndex> +struct VSizes : std::vector<DenseIndex> { + typedef std::vector<DenseIndex> Base; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() const { + return internal::array_prod(*static_cast<const Base*>(this)); + } + + EIGEN_DEVICE_FUNC VSizes() { } + EIGEN_DEVICE_FUNC explicit VSizes(const std::vector<DenseIndex>& a) : Base(a) { } + + template <std::size_t NumDims> + EIGEN_DEVICE_FUNC explicit VSizes(const array<DenseIndex, NumDims>& a) { + this->resize(NumDims); + for (int i = 0; i < NumDims; ++i) { + (*this)[i] = a[i]; + } + } + + EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0) { + this->resize(1); + (*this)[0] = i0; + } + EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1) { + this->resize(2); + (*this)[0] = i0; + (*this)[1] = i1; + } + EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) { + this->resize(3); + (*this)[0] = i0; + (*this)[1] = i1; + (*this)[2] = i2; + } + EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) { + this->resize(4); + (*this)[0] = i0; + (*this)[1] = i1; + (*this)[2] = i2; + (*this)[3] = i3; + } + EIGEN_DEVICE_FUNC explicit VSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) { + this->resize(5); + (*this)[0] = i0; + (*this)[1] = i1; + (*this)[2] = i2; + (*this)[3] = i3; + (*this)[4] = i4; + } + + VSizes& operator = (const std::vector<DenseIndex>& other) { + *static_cast<Base*>(this) = other; + return *this; + } + + // A constexpr would be so much better here + template <std::size_t NumDims> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfColMajor(const array<DenseIndex, NumDims>& indices) const { + return internal::tensor_vsize_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, false>::run(indices, *static_cast<const Base*>(this)); + } + template <std::size_t NumDims> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t IndexOfRowMajor(const array<DenseIndex, NumDims>& indices) const { + return internal::tensor_vsize_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, true>::run(indices, *static_cast<const Base*>(this)); + } +}; + + +// Boilerplate +namespace internal { +template <typename DenseIndex> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex array_prod(const VSizes<DenseIndex>& sizes) { + DenseIndex total_size = 1; + for (int i = 0; i < sizes.size(); ++i) { + total_size *= sizes[i]; + } + return total_size; +} +} + namespace internal { template <typename DenseIndex, std::size_t NumDims> struct array_size<const DSizes<DenseIndex, NumDims> > { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 84768ca09..10f5a5ee7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -39,7 +39,7 @@ class TensorExecutor const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) { - const Index size = evaluator.dimensions().TotalSize(); + const Index size = array_prod(evaluator.dimensions()); for (Index i = 0; i < size; ++i) { evaluator.evalScalar(i); } @@ -60,7 +60,7 @@ class TensorExecutor<Expression, DefaultDevice, true> const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) { - const Index size = evaluator.dimensions().TotalSize(); + const Index size = array_prod(evaluator.dimensions()); static const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size; const int VectorizedSize = (size / PacketSize) * PacketSize; @@ -122,7 +122,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) { - const Index size = evaluator.dimensions().TotalSize(); + const Index size = array_prod(evaluator.dimensions()); static const int PacketSize = Vectorizable ? unpacket_traits<typename Evaluator::PacketReturnType>::size : 1; @@ -176,7 +176,7 @@ class TensorExecutor<Expression, GpuDevice, Vectorizable> const int num_blocks = getNumCudaMultiProcessors() * maxCudaThreadsPerMultiProcessor() / maxCudaThreadsPerBlock(); const int block_size = maxCudaThreadsPerBlock(); - const Index size = evaluator.dimensions().TotalSize(); + const Index size = array_prod(evaluator.dimensions()); EigenMetaKernel<TensorEvaluator<Expression, GpuDevice> > <<<num_blocks, block_size, 0, device.stream()>>>(evaluator, size); assert(cudaGetLastError() == cudaSuccess); } diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 1c4d0838a..ac2ccaf27 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -119,7 +119,7 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_morphing "-std=c++0x") ei_add_test(cxx11_tensor_padding "-std=c++0x") ei_add_test(cxx11_tensor_reduction "-std=c++0x") -# ei_add_test(cxx11_tensor_shuffling "-std=c++0x") + ei_add_test(cxx11_tensor_shuffling "-std=c++0x") ei_add_test(cxx11_tensor_striding "-std=c++0x") # ei_add_test(cxx11_tensor_device "-std=c++0x") ei_add_test(cxx11_tensor_thread_pool "-std=c++0x") diff --git a/unsupported/test/cxx11_tensor_fixed_size.cpp b/unsupported/test/cxx11_tensor_fixed_size.cpp index d270486f2..b0501aaa3 100644 --- a/unsupported/test/cxx11_tensor_fixed_size.cpp +++ b/unsupported/test/cxx11_tensor_fixed_size.cpp @@ -179,7 +179,7 @@ static void test_array() for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { - VERIFY_IS_APPROX(mat3(array<ptrdiff_t, 3>(i,j,k)), powf(val, 3.5f)); + VERIFY_IS_APPROX(mat3(i,j,k), powf(val, 3.5f)); val += 1.0; } } diff --git a/unsupported/test/cxx11_tensor_shuffling.cpp b/unsupported/test/cxx11_tensor_shuffling.cpp index 5ab8b6821..39c623499 100644 --- a/unsupported/test/cxx11_tensor_shuffling.cpp +++ b/unsupported/test/cxx11_tensor_shuffling.cpp @@ -12,6 +12,7 @@ #include <Eigen/CXX11/Tensor> using Eigen::Tensor; +using Eigen::array; static void test_simple_shuffling() { @@ -80,10 +81,10 @@ static void test_expr_shuffling() Tensor<float, 4> result(5,7,3,2); - array<int, 4> src_slice_dim(Eigen::array<int, 4>(2,3,1,7)); - array<int, 4> src_slice_start(Eigen::array<int, 4>(0,0,0,0)); - array<int, 4> dst_slice_dim(Eigen::array<int, 4>(1,7,3,2)); - array<int, 4> dst_slice_start(Eigen::array<int, 4>(0,0,0,0)); + array<int, 4> src_slice_dim{{2,3,1,7}}; + array<int, 4> src_slice_start{{0,0,0,0}}; + array<int, 4> dst_slice_dim{{1,7,3,2}}; + array<int, 4> dst_slice_start{{0,0,0,0}}; for (int i = 0; i < 5; ++i) { result.slice(dst_slice_start, dst_slice_dim) = |