diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-07-10 11:09:46 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2014-07-10 11:09:46 -0700 |
commit | ffd3654f6738bab79db010e02cd67660ecca62c1 (patch) | |
tree | 596f3e24ac87793e10c00b08b0a00229b0d80e01 /unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h | |
parent | 25b2f6624d092ed99d0c4936de0c83c9ea4a024d (diff) |
Vectorized the evaluation of expressions involving tensor slices.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h | 108 |
1 files changed, 98 insertions, 10 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index f6f67afa7..3b42c8514 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -273,8 +273,10 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi static const int NumDims = internal::array_size<Sizes>::value; enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/false, + // Alignment can't be guaranteed at compile time since it depends on the + // slice offsets and sizes. + IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, + PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) @@ -329,11 +331,40 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi return m_impl.coeff(inputIndex); } - /* template<int LoadMode> + template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - return m_impl.template packet<LoadMode>(index); - }*/ + static const int packetSize = internal::unpacket_traits<PacketReturnType>::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + + Index inputIndices[] = {0, 0}; + Index indices[] = {index, index + packetSize - 1}; + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / m_outputStrides[i]; + const Index idx1 = indices[1] / m_outputStrides[i]; + inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; + inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; + indices[0] -= idx0 * m_outputStrides[i]; + indices[1] -= idx1 * m_outputStrides[i]; + } + inputIndices[0] += (indices[0] + m_offsets[0]); + inputIndices[1] += (indices[1] + m_offsets[0]); + if (inputIndices[1] - inputIndices[0] == packetSize - 1) { + PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]); + return rslt; + } + else { + CoeffReturnType values[packetSize]; + values[0] = m_impl.coeff(inputIndices[0]); + values[packetSize-1] = m_impl.coeff(inputIndices[1]); + for (int i = 1; i < packetSize-1; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload<PacketReturnType>(values); + return rslt; + } + } private: Dimensions m_dimensions; @@ -353,8 +384,8 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> static const int NumDims = internal::array_size<Sizes>::value; enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/false, + IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, + PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) @@ -409,11 +440,38 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> return m_impl.coeff(inputIndex); } - /* template<int LoadMode> + template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - return m_impl.template packet<LoadMode>(index); - }*/ + static const int packetSize = internal::unpacket_traits<PacketReturnType>::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + Index inputIndices[] = {0, 0}; + Index indices[] = {index, index + packetSize - 1}; + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / m_outputStrides[i]; + const Index idx1 = indices[1] / m_outputStrides[i]; + inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; + inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; + indices[0] -= idx0 * m_outputStrides[i]; + indices[1] -= idx1 * m_outputStrides[i]; + } + inputIndices[0] += (indices[0] + m_offsets[0]); + inputIndices[1] += (indices[1] + m_offsets[0]); + if (inputIndices[1] - inputIndices[0] == packetSize - 1) { + PacketReturnType rslt = m_impl.template packet<LoadMode>(inputIndices[0]); + return rslt; + } + else { + CoeffReturnType values[packetSize]; + values[0] = m_impl.coeff(inputIndices[0]); + values[packetSize-1] = m_impl.coeff(inputIndices[1]); + for (int i = 1; i < packetSize-1; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload<PacketReturnType>(values); + return rslt; + } + } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) { @@ -427,6 +485,36 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> return m_impl.coeffRef(inputIndex); } + template <int StoreMode> EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + static const int packetSize = internal::unpacket_traits<PacketReturnType>::size; + Index inputIndices[] = {0, 0}; + Index indices[] = {index, index + packetSize - 1}; + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / m_outputStrides[i]; + const Index idx1 = indices[1] / m_outputStrides[i]; + inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; + inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; + indices[0] -= idx0 * m_outputStrides[i]; + indices[1] -= idx1 * m_outputStrides[i]; + } + inputIndices[0] += (indices[0] + m_offsets[0]); + inputIndices[1] += (indices[1] + m_offsets[0]); + if (inputIndices[1] - inputIndices[0] == packetSize - 1) { + m_impl.template writePacket<StoreMode>(inputIndices[0], x); + } + else { + CoeffReturnType values[packetSize]; + internal::pstore<CoeffReturnType, PacketReturnType>(values, x); + m_impl.coeffRef(inputIndices[0]) = values[0]; + m_impl.coeffRef(inputIndices[1]) = values[packetSize-1]; + for (int i = 1; i < packetSize-1; ++i) { + coeffRef(index+i) = values[i]; + } + } + } + private: Dimensions m_dimensions; array<Index, NumDims> m_outputStrides; |