diff options
author | Rasmus Munk Larsen <rmlarsen@google.com> | 2016-04-14 13:57:35 -0700 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2016-04-14 13:57:35 -0700 |
commit | 235e83aba608cf3d94b033bfbf551f8c136a3fab (patch) | |
tree | 7b011fee8fe18b605320c69e75995cf8521fbdf4 /unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h | |
parent | 3551dea887ce60756c28796e83bb7c080f2b2782 (diff) |
Eigen cost model part 1. This implements a basic recursive framework to estimate the cost of evaluating tensor expressions.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h | 47 |
1 files changed, 27 insertions, 20 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h index 085f8fd3d..23248c626 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h @@ -103,6 +103,10 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device> typedef typename XprType::Index Index; static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; typedef DSizes<Index, NumDims> Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; enum { IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, @@ -142,10 +146,6 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device> } } - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { @@ -164,12 +164,11 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device> template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); Index inputIndices[] = {0, 0}; - Index indices[] = {index, index + packetSize - 1}; + Index indices[] = {index, index + PacketSize - 1}; if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { for (int i = NumDims - 1; i > 0; --i) { const Index idx0 = indices[0] / m_outputStrides[i]; @@ -193,15 +192,15 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device> inputIndices[0] += indices[0] * m_inputStrides[NumDims-1]; inputIndices[1] += indices[1] * m_inputStrides[NumDims-1]; } - if (inputIndices[1] - inputIndices[0] == packetSize - 1) { + if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]); return rslt; } else { - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize]; + EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; values[0] = m_impl.coeff(inputIndices[0]); - values[packetSize-1] = m_impl.coeff(inputIndices[1]); - for (int i = 1; i < packetSize-1; ++i) { + values[PacketSize-1] = m_impl.coeff(inputIndices[1]); + for (int i = 1; i < PacketSize-1; ++i) { values[i] = coeff(index+i); } PacketReturnType rslt = internal::pload<PacketReturnType>(values); @@ -209,6 +208,14 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device> } } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + const double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() + + 2 * TensorOpCost::MulCost<Index>() + + TensorOpCost::DivCost<Index>()); + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize); + } + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } protected: @@ -266,6 +273,7 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device> typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { @@ -275,12 +283,11 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device> template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType& x) { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < this->dimensions().TotalSize()); + EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < this->dimensions().TotalSize()); Index inputIndices[] = {0, 0}; - Index indices[] = {index, index + packetSize - 1}; + Index indices[] = {index, index + PacketSize - 1}; if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { for (int i = NumDims - 1; i > 0; --i) { const Index idx0 = indices[0] / this->m_outputStrides[i]; @@ -304,15 +311,15 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device> inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1]; inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1]; } - if (inputIndices[1] - inputIndices[0] == packetSize - 1) { + if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { this->m_impl.template writePacket<Unaligned>(inputIndices[0], x); } else { - EIGEN_ALIGN_MAX Scalar values[packetSize]; + EIGEN_ALIGN_MAX Scalar values[PacketSize]; internal::pstore<Scalar, PacketReturnType>(values, x); this->m_impl.coeffRef(inputIndices[0]) = values[0]; - this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1]; - for (int i = 1; i < packetSize-1; ++i) { + this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1]; + for (int i = 1; i < PacketSize-1; ++i) { this->coeffRef(index+i) = values[i]; } } |