From 229db815721f5589dfc039b74d11c93237028dcf Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 25 Jun 2019 15:41:37 -0700 Subject: Optimize evaluation strategy for TensorSlicingOp and TensorChippingOp --- .../Eigen/CXX11/src/Tensor/TensorChipping.h | 45 +++++++++++++--------- 1 file changed, 27 insertions(+), 18 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index 8c0644925..ebc611d7d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -145,10 +145,18 @@ struct TensorEvaluator, Device> // Alignment can't be guaranteed at compile time since it depends on the // slice offsets. IsAligned = false, + Layout = TensorEvaluator::Layout, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = TensorEvaluator::BlockAccess, - PreferBlockAccess = true, - Layout = TensorEvaluator::Layout, + // Chipping of outer-most dimension is a trivial operation, because we can + // read and write directly from the underlying tensor using single offset. + IsOuterChipping = (static_cast(Layout) == ColMajor && DimId == NumInputDims - 1) || + (static_cast(Layout) == RowMajor && DimId == 0), + // Chipping inner-most dimension. + IsInnerChipping = (static_cast(Layout) == ColMajor && DimId == 0) || + (static_cast(Layout) == RowMajor && DimId == NumInputDims - 1), + // Do not choose block access if chipping is trivial. + PreferBlockAccess = !IsOuterChipping, CoordAccess = false, // to be implemented RawAccess = false }; @@ -230,8 +238,7 @@ struct TensorEvaluator, Device> EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); - if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == 0) || - (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == NumInputDims-1)) { + if (IsInnerChipping) { // m_stride is equal to 1, so let's avoid the integer division. eigen_assert(m_stride == 1); Index inputIndex = index * m_inputStride + m_inputOffset; @@ -242,8 +249,7 @@ struct TensorEvaluator, Device> } PacketReturnType rslt = internal::pload(values); return rslt; - } else if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == NumInputDims - 1) || - (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == 0)) { + } else if (IsOuterChipping) { // m_stride is always greater than index, so let's avoid the integer division. eigen_assert(m_stride > index); return m_impl.template packet(index + m_inputOffset); @@ -345,9 +351,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits::PointerType data() const { CoeffReturnType* result = const_cast(m_impl.data()); - if (((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == NumDims) || - (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == 0)) && - result) { + if (IsOuterChipping && result) { return result + m_inputOffset; } else { return NULL; @@ -370,13 +374,11 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const { Index inputIndex; - if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == 0) || - (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == NumInputDims - 1)) { + if (IsInnerChipping) { // m_stride is equal to 1, so let's avoid the integer division. eigen_assert(m_stride == 1); inputIndex = index * m_inputStride + m_inputOffset; - } else if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == NumInputDims - 1) || - (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == 0)) { + } else if (IsOuterChipping) { // m_stride is always greater than index, so let's avoid the integer // division. eigen_assert(m_stride > index); @@ -425,7 +427,16 @@ struct TensorEvaluator, Device> PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = TensorEvaluator::BlockAccess, Layout = TensorEvaluator::Layout, - RawAccess = false + RawAccess = false, + // Chipping of outer-most dimension is a trivial operation, because we can + // read and write directly from the underlying tensor using single offset. + IsOuterChipping = + (static_cast(Layout) == ColMajor && DimId == NumInputDims - 1) || + (static_cast(Layout) == RowMajor && DimId == 0), + // Chipping inner-most dimension. + IsInnerChipping = + (static_cast(Layout) == ColMajor && DimId == 0) || + (static_cast(Layout) == RowMajor && DimId == NumInputDims - 1), }; typedef typename internal::remove_const::type ScalarNoConst; @@ -449,8 +460,7 @@ struct TensorEvaluator, Device> { EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - if ((static_cast(this->Layout) == static_cast(ColMajor) && this->m_dim.actualDim() == 0) || - (static_cast(this->Layout) == static_cast(RowMajor) && this->m_dim.actualDim() == NumInputDims-1)) { + if (IsInnerChipping) { // m_stride is equal to 1, so let's avoid the integer division. eigen_assert(this->m_stride == 1); EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; @@ -460,8 +470,7 @@ struct TensorEvaluator, Device> this->m_impl.coeffRef(inputIndex) = values[i]; inputIndex += this->m_inputStride; } - } else if ((static_cast(this->Layout) == static_cast(ColMajor) && this->m_dim.actualDim() == NumInputDims-1) || - (static_cast(this->Layout) == static_cast(RowMajor) && this->m_dim.actualDim() == 0)) { + } else if (IsOuterChipping) { // m_stride is always greater than index, so let's avoid the integer division. eigen_assert(this->m_stride > index); this->m_impl.template writePacket(index + this->m_inputOffset, x); -- cgit v1.2.3