aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-06-25 15:41:37 -0700
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-06-25 15:41:37 -0700
commit229db815721f5589dfc039b74d11c93237028dcf (patch)
tree808f8241679db0c7aa6050ce021d8793bd1404db /unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
parentba506d5bd2da5ef5b709ae5f4e2cebd45f343370 (diff)
Optimize evaluation strategy for TensorSlicingOp and TensorChippingOp
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h45
1 files changed, 27 insertions, 18 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
index 8c0644925..ebc611d7d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
@@ -145,10 +145,18 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
// Alignment can't be guaranteed at compile time since it depends on the
// slice offsets.
IsAligned = false,
+ Layout = TensorEvaluator<ArgType, Device>::Layout,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
- PreferBlockAccess = true,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
+ // Chipping of outer-most dimension is a trivial operation, because we can
+ // read and write directly from the underlying tensor using single offset.
+ IsOuterChipping = (static_cast<int>(Layout) == ColMajor && DimId == NumInputDims - 1) ||
+ (static_cast<int>(Layout) == RowMajor && DimId == 0),
+ // Chipping inner-most dimension.
+ IsInnerChipping = (static_cast<int>(Layout) == ColMajor && DimId == 0) ||
+ (static_cast<int>(Layout) == RowMajor && DimId == NumInputDims - 1),
+ // Do not choose block access if chipping is trivial.
+ PreferBlockAccess = !IsOuterChipping,
CoordAccess = false, // to be implemented
RawAccess = false
};
@@ -230,8 +238,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
- if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) {
+ if (IsInnerChipping) {
// m_stride is equal to 1, so let's avoid the integer division.
eigen_assert(m_stride == 1);
Index inputIndex = index * m_inputStride + m_inputOffset;
@@ -242,8 +249,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
}
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
return rslt;
- } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims - 1) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) {
+ } else if (IsOuterChipping) {
// m_stride is always greater than index, so let's avoid the integer division.
eigen_assert(m_stride > index);
return m_impl.template packet<LoadMode>(index + m_inputOffset);
@@ -345,9 +351,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const {
CoeffReturnType* result = const_cast<CoeffReturnType*>(m_impl.data());
- if (((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumDims) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) &&
- result) {
+ if (IsOuterChipping && result) {
return result + m_inputOffset;
} else {
return NULL;
@@ -370,13 +374,11 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
{
Index inputIndex;
- if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims - 1)) {
+ if (IsInnerChipping) {
// m_stride is equal to 1, so let's avoid the integer division.
eigen_assert(m_stride == 1);
inputIndex = index * m_inputStride + m_inputOffset;
- } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims - 1) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) {
+ } else if (IsOuterChipping) {
// m_stride is always greater than index, so let's avoid the integer
// division.
eigen_assert(m_stride > index);
@@ -425,7 +427,16 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
- RawAccess = false
+ RawAccess = false,
+ // Chipping of outer-most dimension is a trivial operation, because we can
+ // read and write directly from the underlying tensor using single offset.
+ IsOuterChipping =
+ (static_cast<int>(Layout) == ColMajor && DimId == NumInputDims - 1) ||
+ (static_cast<int>(Layout) == RowMajor && DimId == 0),
+ // Chipping inner-most dimension.
+ IsInnerChipping =
+ (static_cast<int>(Layout) == ColMajor && DimId == 0) ||
+ (static_cast<int>(Layout) == RowMajor && DimId == NumInputDims - 1),
};
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
@@ -449,8 +460,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == 0) ||
- (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == NumInputDims-1)) {
+ if (IsInnerChipping) {
// m_stride is equal to 1, so let's avoid the integer division.
eigen_assert(this->m_stride == 1);
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
@@ -460,8 +470,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
this->m_impl.coeffRef(inputIndex) = values[i];
inputIndex += this->m_inputStride;
}
- } else if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == NumInputDims-1) ||
- (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == 0)) {
+ } else if (IsOuterChipping) {
// m_stride is always greater than index, so let's avoid the integer division.
eigen_assert(this->m_stride > index);
this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset, x);