From 034aa3b2c0fa55cdf7d4bf052a067989218565c0 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 25 May 2016 11:43:08 -0700 Subject: Improved the performance of tensor padding --- unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h | 102 ++++++++++----------- 1 file changed, 48 insertions(+), 54 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index 266eea0a0..1f066b7c7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -150,27 +150,26 @@ struct TensorEvaluator, Device if (static_cast(Layout) == static_cast(ColMajor)) { for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / m_outputStrides[i]; - if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) { + if (isPaddingAtIndexForDim(idx, i)) { return m_paddingValue; } inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; index -= idx * m_outputStrides[i]; } - if (index < m_padding[0].first || index >= m_dimensions[0] - m_padding[0].second) { + if (isPaddingAtIndexForDim(index, 0)) { return m_paddingValue; } inputIndex += (index - m_padding[0].first); } else { for (int i = 0; i < NumDims - 1; ++i) { const Index idx = index / m_outputStrides[i+1]; - if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) { + if (isPaddingAtIndexForDim(idx, i)) { return m_paddingValue; } inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; index -= idx * m_outputStrides[i+1]; } - if (index < m_padding[NumDims-1].first || - index >= m_dimensions[NumDims-1] - m_padding[NumDims-1].second) { + if (isPaddingAtIndexForDim(index, NumDims-1)) { return m_paddingValue; } inputIndex += (index - m_padding[NumDims-1].first); @@ -187,43 +186,6 @@ struct TensorEvaluator, Device return packetRowMajor(index); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array& coords) const - { - Index inputIndex; - if (static_cast(Layout) == static_cast(ColMajor)) { - { - const Index idx = coords[0]; - if (idx < m_padding[0].first || idx >= m_dimensions[0] - m_padding[0].second) { - return m_paddingValue; - } - inputIndex = idx - m_padding[0].first; - } - for (int i = 1; i < NumDims; ++i) { - const Index idx = coords[i]; - if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) { - return m_paddingValue; - } - inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; - } - } else { - { - const Index idx = coords[NumDims-1]; - if (idx < m_padding[NumDims-1].first || idx >= m_dimensions[NumDims-1] - m_padding[NumDims-1].second) { - return m_paddingValue; - } - inputIndex = idx - m_padding[NumDims-1].first; - } - for (int i = NumDims - 2; i >= 0; --i) { - const Index idx = coords[i]; - if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) { - return m_paddingValue; - } - inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; - } - } - return m_impl.coeff(inputIndex); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { TensorOpCost cost = m_impl.costPerCoeff(vectorized); if (static_cast(Layout) == static_cast(ColMajor)) { @@ -239,6 +201,38 @@ struct TensorEvaluator, Device EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } private: + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim( + Index index, int dim_index) const { +#if defined(EIGEN_HAS_INDEX_LIST) + return (!internal::index_pair_first_statically_eq(dim_index, 0) && + index < m_padding[dim_index].first) || + (!internal::index_pair_second_statically_eq(dim_index, 0) && + index >= m_dimensions[dim_index] - m_padding[dim_index].second); +#else + return (index < m_padding[dim_index].first) || + (index >= m_dimensions[dim_index] - m_padding[dim_index].second); +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero( + int dim_index) const { +#if defined(EIGEN_HAS_INDEX_LIST) + return internal::index_pair_first_statically_eq(dim_index, 0); +#else + return false; +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero( + int dim_index) const { +#if defined(EIGEN_HAS_INDEX_LIST) + return internal::index_pair_second_statically_eq(dim_index, 0); +#else + return false; +#endif + } + + void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const { const double in = static_cast(m_impl.dimensions()[i]); const double out = in + m_padding[i].first + m_padding[i].second; @@ -273,15 +267,15 @@ struct TensorEvaluator, Device const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i]; const Index lastPaddedRight = m_outputStrides[i+1]; - if (last < lastPaddedLeft) { + if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= firstPaddedRight && last < lastPaddedRight) { + else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= lastPaddedLeft && last < firstPaddedRight) { + else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { // all the coefficient are between the 2 padding zones. const Index idx = index / m_outputStrides[i]; inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; @@ -299,15 +293,15 @@ struct TensorEvaluator, Device const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second); const Index lastPaddedRight = m_outputStrides[1]; - if (last < lastPaddedLeft) { + if (!isLeftPaddingCompileTimeZero(0) && last < lastPaddedLeft) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= firstPaddedRight && last < lastPaddedRight) { + else if (!isRightPaddingCompileTimeZero(0) && first >= firstPaddedRight && last < lastPaddedRight) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= lastPaddedLeft && last < firstPaddedRight) { + else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { // all the coefficient are between the 2 padding zones. inputIndex += (index - m_padding[0].first); return m_impl.template packet(inputIndex); @@ -331,15 +325,15 @@ struct TensorEvaluator, Device const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1]; const Index lastPaddedRight = m_outputStrides[i]; - if (last < lastPaddedLeft) { + if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= firstPaddedRight && last < lastPaddedRight) { + else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= lastPaddedLeft && last < firstPaddedRight) { + else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { // all the coefficient are between the 2 padding zones. const Index idx = index / m_outputStrides[i+1]; inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; @@ -357,15 +351,15 @@ struct TensorEvaluator, Device const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second); const Index lastPaddedRight = m_outputStrides[NumDims-1]; - if (last < lastPaddedLeft) { + if (!isLeftPaddingCompileTimeZero(NumDims-1) && last < lastPaddedLeft) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= firstPaddedRight && last < lastPaddedRight) { + else if (!isRightPaddingCompileTimeZero(NumDims-1) && first >= firstPaddedRight && last < lastPaddedRight) { // all the coefficient are in the padding zone. return internal::pset1(m_paddingValue); } - else if (first >= lastPaddedLeft && last < firstPaddedRight) { + else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { // all the coefficient are between the 2 padding zones. inputIndex += (index - m_padding[NumDims-1].first); return m_impl.template packet(inputIndex); -- cgit v1.2.3