diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2015-01-14 15:38:48 -0800 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2015-01-14 15:38:48 -0800 |
commit | f697df723798779bc29d9f7299bb5398767d5db0 (patch) | |
tree | c155c21ad9ef0e6269f6af83fe2f29f97a0c0e21 /unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h | |
parent | 6559d09c60fb4acfc7ee5197284f576ac14926f1 (diff) |
Improved support for RowMajor tensors
Misc fixes and API cleanups.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 166 |
1 files changed, 150 insertions, 16 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 8cb41aec8..ef134adf2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -30,6 +30,8 @@ struct traits<TensorBroadcastingOp<Broadcast, XprType> > : public traits<XprType typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; typedef typename remove_reference<Nested>::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; }; template<typename Broadcast, typename XprType> @@ -91,6 +93,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> enum { IsAligned = false, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, + Layout = TensorEvaluator<ArgType, Device>::Layout, }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) @@ -103,11 +106,20 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> m_dimensions[i] = input_dims[i] * broadcast[i]; } - m_inputStrides[0] = 1; - m_outputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + if (Layout == ColMajor) { + m_inputStrides[0] = 1; + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + } + } else { + m_inputStrides[NumDims-1] = 1; + m_outputStrides[NumDims-1] = 1; + for (int i = NumDims-2; i >= 0; --i) { + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; + } } } @@ -125,16 +137,30 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> m_impl.cleanup(); } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const + { + if (Layout == ColMajor) { + return coeffColMajor(index); + } else { + return coeffRowMajor(index); + } + } + // TODO: attempt to speed this up. The integer divisions and modulo are slow - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const { Index inputIndex = 0; for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq<InputDimensions>()(i, 1)) { - eigen_assert(idx % m_impl.dimensions()[i] == 0); + if (internal::index_statically_eq<Broadcast>()(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; } else { - inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + if (internal::index_statically_eq<InputDimensions>()(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } } index -= idx * m_outputStrides[i]; } @@ -142,15 +168,59 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> eigen_assert(index < m_impl.dimensions()[0]); inputIndex += index; } else { - inputIndex += (index % m_impl.dimensions()[0]); + if (internal::index_statically_eq<InputDimensions>()(0, 1)) { + eigen_assert(index % m_impl.dimensions()[0] == 0); + } else { + inputIndex += (index % m_impl.dimensions()[0]); + } } return m_impl.coeff(inputIndex); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffRowMajor(Index index) const + { + Index inputIndex = 0; + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStrides[i]; + if (internal::index_statically_eq<Broadcast>()(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; + } else { + if (internal::index_statically_eq<InputDimensions>()(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } + } + index -= idx * m_outputStrides[i]; + } + if (internal::index_statically_eq<Broadcast>()(NumDims-1, 1)) { + eigen_assert(index < m_impl.dimensions()[NumDims-1]); + inputIndex += index; + } else { + if (internal::index_statically_eq<InputDimensions>()(NumDims-1, 1)) { + eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); + } else { + inputIndex += (index % m_impl.dimensions()[NumDims-1]); + } + } + return m_impl.coeff(inputIndex); + } + + template<int LoadMode> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const + { + if (Layout == ColMajor) { + return packetColMajor<LoadMode>(index); + } else { + return packetRowMajor<LoadMode>(index); + } + } + // Ignore the LoadMode and always use unaligned loads since we can't guarantee // the alignment at compile time. template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const { const int packetSize = internal::unpacket_traits<PacketReturnType>::size; EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) @@ -161,10 +231,15 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> Index inputIndex = 0; for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq<InputDimensions>()(i, 1)) { - eigen_assert(idx % m_impl.dimensions()[i] == 0); + if (internal::index_statically_eq<Broadcast>()(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; } else { - inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + if (internal::index_statically_eq<InputDimensions>()(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } } index -= idx * m_outputStrides[i]; } @@ -173,7 +248,12 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> eigen_assert(index < m_impl.dimensions()[0]); innermostLoc = index; } else { - innermostLoc = index % m_impl.dimensions()[0]; + if (internal::index_statically_eq<InputDimensions>()(0, 1)) { + eigen_assert(innermostLoc % m_impl.dimensions()[0] == 0); + innermostLoc = 0; + } else { + innermostLoc = index % m_impl.dimensions()[0]; + } } inputIndex += innermostLoc; @@ -185,13 +265,67 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; values[0] = m_impl.coeff(inputIndex); for (int i = 1; i < packetSize; ++i) { - values[i] = coeff(originalIndex+i); + values[i] = coeffColMajor(originalIndex+i); } PacketReturnType rslt = internal::pload<PacketReturnType>(values); return rslt; } } + template<int LoadMode> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const + { + const int packetSize = internal::unpacket_traits<PacketReturnType>::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + + const Index originalIndex = index; + + Index inputIndex = 0; + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStrides[i]; + if (internal::index_statically_eq<Broadcast>()(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; + } else { + if (internal::index_statically_eq<InputDimensions>()(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } + } + index -= idx * m_outputStrides[i]; + } + Index innermostLoc; + if (internal::index_statically_eq<Broadcast>()(NumDims-1, 1)) { + eigen_assert(index < m_impl.dimensions()[NumDims-1]); + innermostLoc = index; + } else { + if (internal::index_statically_eq<InputDimensions>()(NumDims-1, 1)) { + eigen_assert(innermostLoc % m_impl.dimensions()[NumDims-1] == 0); + innermostLoc = 0; + } else { + innermostLoc = index % m_impl.dimensions()[NumDims-1]; + } + } + inputIndex += innermostLoc; + + // Todo: this could be extended to the second dimension if we're not + // broadcasting alongside the first dimension, and so on. + if (innermostLoc + packetSize <= m_impl.dimensions()[NumDims-1]) { + return m_impl.template packet<Unaligned>(inputIndex); + } else { + EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; + values[0] = m_impl.coeff(inputIndex); + for (int i = 1; i < packetSize; ++i) { + values[i] = coeffRowMajor(originalIndex+i); + } + PacketReturnType rslt = internal::pload<PacketReturnType>(values); + return rslt; + } + } + + Scalar* data() const { return NULL; } protected: |