diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h | 95 |
1 files changed, 81 insertions, 14 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index 45558d7dd..4482c0992 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -87,7 +87,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device enum { IsAligned = false, - PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/false, + PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) @@ -100,15 +100,13 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device } const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - for (int i = 0; i < NumDims; ++i) { - if (i > 0) { - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; - } else { - m_inputStrides[0] = 1; - m_outputStrides[0] = 1; - } + m_inputStrides[0] = 1; + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; } + m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1]; } typedef typename XprType::Scalar Scalar; @@ -128,7 +126,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { Index inputIndex = 0; - for (int i = NumDims - 1; i >= 0; --i) { + for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / m_outputStrides[i]; if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) { return Scalar(0); @@ -136,21 +134,90 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; index -= idx * m_outputStrides[i]; } + if (index < m_padding[0].first || index >= m_dimensions[0] - m_padding[0].second) { + return Scalar(0); + } + inputIndex += (index - m_padding[0].first); return m_impl.coeff(inputIndex); } - /* template<int LoadMode> + template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - return m_impl.template packet<LoadMode>(index); - }*/ + static const int packetSize = internal::unpacket_traits<PacketReturnType>::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + + const Index initialIndex = index; + Index inputIndex = 0; + for (int i = NumDims - 1; i > 0; --i) { + const int first = index; + const int last = index + packetSize - 1; + const int lastPaddedLeft = m_padding[i].first * m_outputStrides[i]; + const int firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i]; + const int lastPaddedRight = m_outputStrides[i+1]; + + if (last < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1<PacketReturnType>(Scalar(0)); + } + else if (first >= firstPaddedRight && last < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1<PacketReturnType>(Scalar(0)); + } + else if (first >= lastPaddedLeft && last < firstPaddedRight) { + // all the coefficient are between the 2 padding zones. + const Index idx = index / m_outputStrides[i]; + inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + else { + // Every other case + return packetWithPossibleZero(initialIndex); + } + } + + const Index last = index + packetSize - 1; + const Index first = index; + const int lastPaddedLeft = m_padding[0].first; + const int firstPaddedRight = (m_dimensions[0] - m_padding[0].second); + const int lastPaddedRight = m_outputStrides[1]; + + if (last < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1<PacketReturnType>(Scalar(0)); + } + else if (first >= firstPaddedRight && last < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1<PacketReturnType>(Scalar(0)); + } + else if (first >= lastPaddedLeft && last < firstPaddedRight) { + // all the coefficient are between the 2 padding zones. + inputIndex += (index - m_padding[0].first); + return m_impl.template packet<Unaligned>(inputIndex); + } + // Every other case + return packetWithPossibleZero(initialIndex); + } Scalar* data() const { return NULL; } protected: + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const + { + static const int packetSize = internal::unpacket_traits<PacketReturnType>::size; + EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload<PacketReturnType>(values); + return rslt; + } + PaddingDimensions m_padding; Dimensions m_dimensions; - array<Index, NumDims> m_outputStrides; + array<Index, NumDims+1> m_outputStrides; array<Index, NumDims> m_inputStrides; TensorEvaluator<ArgType, Device> m_impl; }; |