From 235e83aba608cf3d94b033bfbf551f8c136a3fab Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Thu, 14 Apr 2016 13:57:35 -0700 Subject: Eigen cost model part 1. This implements a basic recursive framework to estimate the cost of evaluating tensor expressions. --- .../Eigen/CXX11/src/Tensor/TensorChipping.h | 63 ++++++++++++++-------- 1 file changed, 42 insertions(+), 21 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index c21a98fe0..2742dbb95 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -134,6 +134,10 @@ struct TensorEvaluator, Device> typedef typename XprType::Index Index; typedef DSizes Dimensions; typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + enum { // Alignment can't be guaranteed at compile time since it depends on the @@ -180,9 +184,6 @@ struct TensorEvaluator, Device> m_inputOffset = m_stride * op.offset(); } - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType::type PacketReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { @@ -202,17 +203,16 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - const int packetSize = internal::unpacket_traits::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == 0) || (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == NumInputDims-1)) { // m_stride is equal to 1, so let's avoid the integer division. eigen_assert(m_stride == 1); Index inputIndex = index * m_inputStride + m_inputOffset; - EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; - for (int i = 0; i < packetSize; ++i) { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { values[i] = m_impl.coeff(inputIndex); inputIndex += m_inputStride; } @@ -226,13 +226,13 @@ struct TensorEvaluator, Device> } else { const Index idx = index / m_stride; const Index rem = index - idx * m_stride; - if (rem + packetSize <= m_stride) { + if (rem + PacketSize <= m_stride) { Index inputIndex = idx * m_inputStride + m_inputOffset + rem; return m_impl.template packet(inputIndex); } else { // Cross the stride boundary. Fallback to slow path. - EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; - for (int i = 0; i < packetSize; ++i) { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { values[i] = coeff(index); ++index; } @@ -242,6 +242,28 @@ struct TensorEvaluator, Device> } } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + double cost = 0; + if ((static_cast(Layout) == static_cast(ColMajor) && + m_dim.actualDim() == 0) || + (static_cast(Layout) == static_cast(RowMajor) && + m_dim.actualDim() == NumInputDims - 1)) { + cost += TensorOpCost::MulCost() + TensorOpCost::AddCost(); + } else if ((static_cast(Layout) == static_cast(ColMajor) && + m_dim.actualDim() == NumInputDims - 1) || + (static_cast(Layout) == static_cast(RowMajor) && + m_dim.actualDim() == 0)) { + cost += TensorOpCost::AddCost(); + } else { + cost += 3 * TensorOpCost::MulCost() + TensorOpCost::DivCost() + + 3 * TensorOpCost::AddCost(); + } + + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, cost, vectorized, PacketSize); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { CoeffReturnType* result = const_cast(m_impl.data()); if (((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == NumDims) || @@ -298,6 +320,9 @@ struct TensorEvaluator, Device> typedef typename XprType::Index Index; typedef DSizes Dimensions; typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; enum { IsAligned = false, @@ -309,9 +334,6 @@ struct TensorEvaluator, Device> : Base(op, device) { } - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType::type PacketReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) { return this->m_impl.coeffRef(this->srcCoeff(index)); @@ -320,17 +342,16 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType& x) { - static const int packetSize = internal::unpacket_traits::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) if ((static_cast(this->Layout) == static_cast(ColMajor) && this->m_dim.actualDim() == 0) || (static_cast(this->Layout) == static_cast(RowMajor) && this->m_dim.actualDim() == NumInputDims-1)) { // m_stride is equal to 1, so let's avoid the integer division. eigen_assert(this->m_stride == 1); - EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; internal::pstore(values, x); Index inputIndex = index * this->m_inputStride + this->m_inputOffset; - for (int i = 0; i < packetSize; ++i) { + for (int i = 0; i < PacketSize; ++i) { this->m_impl.coeffRef(inputIndex) = values[i]; inputIndex += this->m_inputStride; } @@ -342,14 +363,14 @@ struct TensorEvaluator, Device> } else { const Index idx = index / this->m_stride; const Index rem = index - idx * this->m_stride; - if (rem + packetSize <= this->m_stride) { + if (rem + PacketSize <= this->m_stride) { const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem; this->m_impl.template writePacket(inputIndex, x); } else { // Cross stride boundary. Fallback to slow path. - EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; internal::pstore(values, x); - for (int i = 0; i < packetSize; ++i) { + for (int i = 0; i < PacketSize; ++i) { this->coeffRef(index) = values[i]; ++index; } -- cgit v1.2.3