aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-04-25 09:16:08 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-04-25 09:16:08 -0700
commitfd9401f2601fe8ed16c1a386f217da02dc8bfd2f (patch)
tree66d74b165e8faf3564c2e2f888214a2482c458c3 /unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
parent5c372d19e38fc5d96a559b253cdaba0145d9b468 (diff)
Refined the cost of the striding operation.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h16
1 files changed, 11 insertions, 5 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
index 23248c626..52b7d216a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
@@ -209,11 +209,17 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- const double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
- 2 * TensorOpCost::MulCost<Index>() +
- TensorOpCost::DivCost<Index>());
- return m_impl.costPerCoeff(vectorized) +
- TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
+ double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() +
+ TensorOpCost::MulCost<Index>() +
+ TensorOpCost::DivCost<Index>()) +
+ TensorOpCost::MulCost<Index>();
+ if (vectorized) {
+ compute_cost *= 2; // packet() computes two indices
+ }
+ const int innerDim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : (NumDims - 1);
+ return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) +
+ // Computation is not vectorized per se, but it is done once per packet.
+ TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
}
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }