diff options
Diffstat (limited to 'unsupported')
10 files changed, 151 insertions, 45 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index e22dd4de0..b2b28826a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -555,6 +555,11 @@ class TensorBase<Derived, WriteAccessors> : public TensorBase<Derived, ReadOnlyA chip(const Index offset, const Index dim) const { return TensorChippingOp<Dynamic, Derived>(derived(), offset, dim); } + template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReverseOp<const ReverseDimensions, Derived> + reverse(const ReverseDimensions& rev) const { + return TensorReverseOp<const ReverseDimensions, Derived>(derived(), rev); + } template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp<const Shuffle, Derived> shuffle(const Shuffle& shuffle) const { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 5790e19d6..055a7d407 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -249,7 +249,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> innermostLoc = index; } else { if (internal::index_statically_eq<InputDimensions>()(0, 1)) { - eigen_assert(innermostLoc % m_impl.dimensions()[0] == 0); + eigen_assert(index % m_impl.dimensions()[0] == 0); innermostLoc = 0; } else { innermostLoc = index % m_impl.dimensions()[0]; @@ -302,7 +302,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> innermostLoc = index; } else { if (internal::index_statically_eq<InputDimensions>()(NumDims-1, 1)) { - eigen_assert(innermostLoc % m_impl.dimensions()[NumDims-1] == 0); + eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); innermostLoc = 0; } else { innermostLoc = index % m_impl.dimensions()[NumDims-1]; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index 8b87f1045..9259c864e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -174,8 +174,6 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT OutputMapper output(buffer, m); - LhsPacker pack_lhs; - // compute block sizes (which depend on number of threads) const Index num_threads = this->m_device.numThreads(); Index mc = m; @@ -190,8 +188,8 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT const Index k_blocks = CEIL_DIV(k, kc); const Index n_blocks = CEIL_DIV(n, nc); const Index m_blocks = CEIL_DIV(m, mc); - const int sizeA = mc * kc; - const int sizeB = kc * nc; + const Index sizeA = mc * kc; + const Index sizeB = kc * nc; /* cout << "m: " << m << " n: " << n << " k: " << k << endl; cout << "mc: " << mc << " nc: " << nc << " kc: " << kc << endl; @@ -228,7 +226,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT const Index num_kernel_promises = num_threads * n_blocks; std::vector<Promise> kernel_promises(num_kernel_promises); std::vector<Future> kernel_futures(num_kernel_promises); - for (int i = 0; i < kernel_promises.size(); ++i) { + for (std::size_t i = 0; i < kernel_promises.size(); ++i) { kernel_promises[i].set_value(); kernel_futures[i] = kernel_promises[i].get_future(); } @@ -239,16 +237,16 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT const Index actual_kc = (std::min)(k_start + kc, k) - k_start; for (Index m_block_idx = 0; m_block_idx < m_blocks; m_block_idx += numBlockAs) { - const int num_blocks = (std::min)(m_blocks-m_block_idx, numBlockAs); + const Index num_blocks = (std::min)(m_blocks-m_block_idx, numBlockAs); for (Index mt_block_idx = m_block_idx; mt_block_idx < m_block_idx+num_blocks; mt_block_idx++) { const Index m_start = mt_block_idx * mc; const Index actual_mc = (std::min)(m_start + mc, m) - m_start; eigen_assert(actual_mc > 0); - int blockAId = (k_block_idx * m_blocks + mt_block_idx) % num_threads; + Index blockAId = (k_block_idx * m_blocks + mt_block_idx) % num_threads; for (int i = 0; i < n_blocks; ++i) { - int future_id = (blockAId * n_blocks + i); + Index future_id = (blockAId * n_blocks + i); wait_until_ready(&kernel_futures[future_id]); kernel_promises[future_id] = Promise(); kernel_futures[future_id] = kernel_promises[future_id].get_future(); @@ -277,9 +275,9 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT // first make sure the previous kernels are all done before overwriting rhs. Also wait if // we're going to start new k. In both cases need_to_pack is true. if (need_to_pack) { - for (int i = num_blocks; i < num_threads; ++i) { - int blockAId = (k_block_idx * m_blocks + i + m_block_idx) % num_threads; - int future_id = (blockAId * n_blocks + n_block_idx); + for (Index i = num_blocks; i < num_threads; ++i) { + Index blockAId = (k_block_idx * m_blocks + i + m_block_idx) % num_threads; + Index future_id = (blockAId * n_blocks + n_block_idx); wait_until_ready(&kernel_futures[future_id]); } } @@ -361,7 +359,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT for (Index mt_block_idx = 0; mt_block_idx < arg.num_blockAs; mt_block_idx++) { const Index m_base_start = arg.m + arg.mc*mt_block_idx; if (m_base_start < arg.max_m) { - int blockAId = (arg.k_block_idx * arg.m_blocks + mt_block_idx + arg.m_block_idx) % arg.num_threads; + Index blockAId = (arg.k_block_idx * arg.m_blocks + mt_block_idx + arg.m_block_idx) % arg.num_threads; wait_until_ready(&(*arg.lhs_futures)[blockAId]); const Index actual_mc = (std::min)(m_base_start + arg.mc, arg.max_m) - m_base_start; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 2ad52b2f9..5e805fd95 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -230,7 +230,7 @@ struct DSizes : array<DenseIndex, NumDims> { } EIGEN_DEVICE_FUNC DSizes() { - for (int i = 0 ; i < NumDims; ++i) { + for (std::size_t i = 0 ; i < NumDims; ++i) { (*this)[i] = 0; } } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 05ac9bd2f..bb2f8b977 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -97,7 +97,7 @@ struct EvalRange<Evaluator, Index, true> { Index i = first; static const int PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size; - if (last - first > PacketSize) { + if (last - first >= PacketSize) { eigen_assert(first % PacketSize == 0); Index lastPacket = last - (last % PacketSize); for (; i < lastPacket; i += PacketSize) { @@ -131,7 +131,6 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> const Index blocksize = std::max<Index>(PacketSize, (blocksz - (blocksz % PacketSize))); const Index numblocks = size / blocksize; - Index i = 0; std::vector<Future> results; results.reserve(numblocks); for (int i = 0; i < numblocks; ++i) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index 2714117ab..11c7ce443 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -28,6 +28,23 @@ namespace Eigen { namespace internal { +namespace { + // Note: result is undefined if val == 0 + template <typename T> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int count_leading_zeros(const T val) + { +#ifdef __CUDA_ARCH__ + return __clz(val); +#elif EIGEN_COMP_MSVC + DWORD leading_zero = 0; + _BitScanReverse( &leading_zero, value); + return 31 - leading_zero; +#else + return __builtin_clz(static_cast<uint32_t>(val)); +#endif + } +} + template <typename T> struct TensorIntDivisor { public: @@ -44,11 +61,7 @@ struct TensorIntDivisor { eigen_assert(divider <= (1<<(N-1)) - 1); // fast ln2 -#ifndef __CUDA_ARCH__ - const int leading_zeros = __builtin_clz(divider); -#else - const int leading_zeros = __clz(divider); -#endif + const int leading_zeros = count_leading_zeros(divider); const int log_div = N - (leading_zeros+1); multiplier = (static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h index c119b30e2..054ecf7b5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h @@ -85,6 +85,15 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA const typename internal::remove_all<typename XprType::Nested>::type& expression() const { return m_xpr; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const TensorLayoutSwapOp& other) + { + typedef TensorAssignOp<TensorLayoutSwapOp, const TensorLayoutSwapOp> Assign; + Assign assign(*this, other); + internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + return *this; + } + template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const OtherDerived& other) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index a93f48ccb..01ba0a80f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -302,7 +302,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices()) { - for (int i = 0; i < internal::array_size<Dimensions>::value; ++i) { + for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) { eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h index ad21e966b..16bef2ad3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h @@ -49,12 +49,9 @@ struct nested<TensorReverseOp<ReverseDimensions, XprType>, 1, } // end namespace internal - - - template<typename ReverseDimensions, typename XprType> class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions, - XprType>, ReadOnlyAccessors> + XprType>, WriteAccessors> { public: typedef typename Eigen::internal::traits<TensorReverseOp>::Scalar Scalar; @@ -67,8 +64,8 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions, StorageKind; typedef typename Eigen::internal::traits<TensorReverseOp>::Index Index; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp(const XprType& expr, - const ReverseDimensions& reverse_dims) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp( + const XprType& expr, const ReverseDimensions& reverse_dims) : m_xpr(expr), m_reverse_dims(reverse_dims) {} EIGEN_DEVICE_FUNC @@ -78,12 +75,30 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions, const typename internal::remove_all<typename XprType::Nested>::type& expression() const { return m_xpr; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorReverseOp& operator = (const TensorReverseOp& other) + { + typedef TensorAssignOp<TensorReverseOp, const TensorReverseOp> Assign; + Assign assign(*this, other); + internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + return *this; + } + + template<typename OtherDerived> + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorReverseOp& operator = (const OtherDerived& other) + { + typedef TensorAssignOp<TensorReverseOp, const OtherDerived> Assign; + Assign assign(*this, other); + internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice()); + return *this; + } + protected: typename XprType::Nested m_xpr; const ReverseDimensions m_reverse_dims; }; - // Eval as rvalue template<typename ReverseDimensions, typename ArgType, typename Device> struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device> @@ -134,8 +149,8 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device m_impl.cleanup(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index reverseIndex( + Index index) const { eigen_assert(index < dimensions().TotalSize()); Index inputIndex = 0; if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { @@ -152,7 +167,6 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device } else { inputIndex += index; } - return m_impl.coeff(inputIndex); } else { for (int i = 0; i < NumDims - 1; ++i) { Index idx = index / m_strides[i]; @@ -167,8 +181,13 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device } else { inputIndex += index; } - return m_impl.coeff(inputIndex); } + return inputIndex; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff( + Index index) const { + return m_impl.coeff(reverseIndex(index)); } template<int LoadMode> @@ -199,9 +218,57 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device ReverseDimensions m_reverse; }; +// Eval as lvalue + +template <typename ReverseDimensions, typename ArgType, typename Device> +struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device> + : public TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, + Device> { + typedef TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, + Device> Base; + typedef TensorReverseOp<ReverseDimensions, ArgType> XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size<ReverseDimensions>::value; + typedef DSizes<Index, NumDims> Dimensions; + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, + Layout = TensorEvaluator<ArgType, Device>::Layout, + CoordAccess = false, // to be implemented + }; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, + const Device& device) + : Base(op, device) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Dimensions& dimensions() const { return this->m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + return this->m_impl.coeffRef(this->reverseIndex(index)); + } + + template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) { + const int packetSize = internal::unpacket_traits<PacketReturnType>::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + // This code is pilfered from TensorMorphing.h + EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize]; + internal::pstore<CoeffReturnType, PacketReturnType>(values, x); + for (int i = 0; i < packetSize; ++i) { + this->coeffRef(index+i) = values[i]; + } + } + +}; -} // end namespace Eigen +} // end namespace Eigen #endif // EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H diff --git a/unsupported/test/cxx11_tensor_reverse.cpp b/unsupported/test/cxx11_tensor_reverse.cpp index 4c0be35da..f96c21fa3 100644 --- a/unsupported/test/cxx11_tensor_reverse.cpp +++ b/unsupported/test/cxx11_tensor_reverse.cpp @@ -94,7 +94,7 @@ static void test_simple_reverse() template <int DataLayout> -static void test_expr_reverse() +static void test_expr_reverse(bool LValue) { Tensor<float, 4, DataLayout> tensor(2,3,5,7); tensor.setRandom(); @@ -105,9 +105,12 @@ static void test_expr_reverse() dim_rev[2] = false; dim_rev[3] = true; - - Tensor<float, 4, DataLayout> expected; - expected = tensor.reverse(dim_rev); + Tensor<float, 4, DataLayout> expected(2, 3, 5, 7); + if (LValue) { + expected.reverse(dim_rev) = tensor; + } else { + expected = tensor.reverse(dim_rev); + } Tensor<float, 4, DataLayout> result(2,3,5,7); @@ -117,8 +120,13 @@ static void test_expr_reverse() array<ptrdiff_t, 4> dst_slice_start{{0,0,0,0}}; for (int i = 0; i < 5; ++i) { - result.slice(dst_slice_start, dst_slice_dim) = - tensor.slice(src_slice_start, src_slice_dim).reverse(dim_rev); + if (LValue) { + result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev) = + tensor.slice(src_slice_start, src_slice_dim); + } else { + result.slice(dst_slice_start, dst_slice_dim) = + tensor.slice(src_slice_start, src_slice_dim).reverse(dim_rev); + } src_slice_start[2] += 1; dst_slice_start[2] += 1; } @@ -141,8 +149,13 @@ static void test_expr_reverse() dst_slice_start[2] = 0; result.setRandom(); for (int i = 0; i < 5; ++i) { - result.slice(dst_slice_start, dst_slice_dim) = - tensor.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim); + if (LValue) { + result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev) = + tensor.slice(dst_slice_start, dst_slice_dim); + } else { + result.slice(dst_slice_start, dst_slice_dim) = + tensor.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim); + } dst_slice_start[2] += 1; } @@ -162,6 +175,8 @@ void test_cxx11_tensor_reverse() { CALL_SUBTEST(test_simple_reverse<ColMajor>()); CALL_SUBTEST(test_simple_reverse<RowMajor>()); - CALL_SUBTEST(test_expr_reverse<ColMajor>()); - CALL_SUBTEST(test_expr_reverse<RowMajor>()); + CALL_SUBTEST(test_expr_reverse<ColMajor>(true)); + CALL_SUBTEST(test_expr_reverse<RowMajor>(true)); + CALL_SUBTEST(test_expr_reverse<ColMajor>(false)); + CALL_SUBTEST(test_expr_reverse<RowMajor>(false)); } |