From fd9401f2601fe8ed16c1a386f217da02dc8bfd2f Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 25 Apr 2016 09:16:08 -0700 Subject: Refined the cost of the striding operation. --- unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h index 23248c626..52b7d216a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h @@ -209,11 +209,17 @@ struct TensorEvaluator, Device> } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - const double compute_cost = NumDims * (2 * TensorOpCost::AddCost() + - 2 * TensorOpCost::MulCost() + - TensorOpCost::DivCost()); - return m_impl.costPerCoeff(vectorized) + - TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize); + double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost() + + TensorOpCost::MulCost() + + TensorOpCost::DivCost()) + + TensorOpCost::MulCost(); + if (vectorized) { + compute_cost *= 2; // packet() computes two indices + } + const int innerDim = (static_cast(Layout) == static_cast(ColMajor)) ? 0 : (NumDims - 1); + return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) + + // Computation is not vectorized per se, but it is done once per packet. + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); } EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } -- cgit v1.2.3