From 2959045f2fe111f93b23517fd6f7afe49720a290 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 26 Aug 2014 09:47:18 -0700 Subject: Optimized the tensor padding code. --- unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h | 95 ++++++++++++++++++---- 1 file changed, 81 insertions(+), 14 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index 45558d7dd..4482c0992 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -87,7 +87,7 @@ struct TensorEvaluator, Device enum { IsAligned = false, - PacketAccess = /*TensorEvaluator::PacketAccess*/false, + PacketAccess = TensorEvaluator::PacketAccess, }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) @@ -100,15 +100,13 @@ struct TensorEvaluator, Device } const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); - for (int i = 0; i < NumDims; ++i) { - if (i > 0) { - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; - } else { - m_inputStrides[0] = 1; - m_outputStrides[0] = 1; - } + m_inputStrides[0] = 1; + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; } + m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1]; } typedef typename XprType::Scalar Scalar; @@ -128,7 +126,7 @@ struct TensorEvaluator, Device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { Index inputIndex = 0; - for (int i = NumDims - 1; i >= 0; --i) { + for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / m_outputStrides[i]; if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) { return Scalar(0); @@ -136,21 +134,90 @@ struct TensorEvaluator, Device inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; index -= idx * m_outputStrides[i]; } + if (index < m_padding[0].first || index >= m_dimensions[0] - m_padding[0].second) { + return Scalar(0); + } + inputIndex += (index - m_padding[0].first); return m_impl.coeff(inputIndex); } - /* template + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - return m_impl.template packet(index); - }*/ + static const int packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + + const Index initialIndex = index; + Index inputIndex = 0; + for (int i = NumDims - 1; i > 0; --i) { + const int first = index; + const int last = index + packetSize - 1; + const int lastPaddedLeft = m_padding[i].first * m_outputStrides[i]; + const int firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i]; + const int lastPaddedRight = m_outputStrides[i+1]; + + if (last < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1(Scalar(0)); + } + else if (first >= firstPaddedRight && last < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1(Scalar(0)); + } + else if (first >= lastPaddedLeft && last < firstPaddedRight) { + // all the coefficient are between the 2 padding zones. + const Index idx = index / m_outputStrides[i]; + inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + else { + // Every other case + return packetWithPossibleZero(initialIndex); + } + } + + const Index last = index + packetSize - 1; + const Index first = index; + const int lastPaddedLeft = m_padding[0].first; + const int firstPaddedRight = (m_dimensions[0] - m_padding[0].second); + const int lastPaddedRight = m_outputStrides[1]; + + if (last < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1(Scalar(0)); + } + else if (first >= firstPaddedRight && last < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1(Scalar(0)); + } + else if (first >= lastPaddedLeft && last < firstPaddedRight) { + // all the coefficient are between the 2 padding zones. + inputIndex += (index - m_padding[0].first); + return m_impl.template packet(inputIndex); + } + // Every other case + return packetWithPossibleZero(initialIndex); + } Scalar* data() const { return NULL; } protected: + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const + { + static const int packetSize = internal::unpacket_traits::size; + EIGEN_ALIGN_DEFAULT typename internal::remove_const::type values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + PaddingDimensions m_padding; Dimensions m_dimensions; - array m_outputStrides; + array m_outputStrides; array m_inputStrides; TensorEvaluator m_impl; }; -- cgit v1.2.3