From 235e83aba608cf3d94b033bfbf551f8c136a3fab Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Thu, 14 Apr 2016 13:57:35 -0700 Subject: Eigen cost model part 1. This implements a basic recursive framework to estimate the cost of evaluating tensor expressions. --- .../Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 51 +++++++++++++++------- 1 file changed, 36 insertions(+), 15 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index b6e6db12a..c771496e2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -101,6 +101,9 @@ struct TensorEvaluator, Device> typedef DSizes Dimensions; typedef typename XprType::Scalar Scalar; typedef typename TensorEvaluator::Dimensions InputDimensions; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; enum { IsAligned = false, @@ -140,9 +143,6 @@ struct TensorEvaluator, Device> } } - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType::type PacketReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { @@ -247,9 +247,8 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const { - const int packetSize = internal::unpacket_traits::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); const Index originalIndex = index; @@ -284,12 +283,12 @@ struct TensorEvaluator, Device> // Todo: this could be extended to the second dimension if we're not // broadcasting alongside the first dimension, and so on. - if (innermostLoc + packetSize <= m_impl.dimensions()[0]) { + if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) { return m_impl.template packet(inputIndex); } else { - EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; values[0] = m_impl.coeff(inputIndex); - for (int i = 1; i < packetSize; ++i) { + for (int i = 1; i < PacketSize; ++i) { values[i] = coeffColMajor(originalIndex+i); } PacketReturnType rslt = internal::pload(values); @@ -300,9 +299,8 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const { - const int packetSize = internal::unpacket_traits::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); const Index originalIndex = index; @@ -337,12 +335,12 @@ struct TensorEvaluator, Device> // Todo: this could be extended to the second dimension if we're not // broadcasting alongside the first dimension, and so on. - if (innermostLoc + packetSize <= m_impl.dimensions()[NumDims-1]) { + if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) { return m_impl.template packet(inputIndex); } else { - EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; values[0] = m_impl.coeff(inputIndex); - for (int i = 1; i < packetSize; ++i) { + for (int i = 1; i < PacketSize; ++i) { values[i] = coeffRowMajor(originalIndex+i); } PacketReturnType rslt = internal::pload(values); @@ -350,6 +348,29 @@ struct TensorEvaluator, Device> } } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + double compute_cost = TensorOpCost::AddCost(); + if (NumDims > 0) { + for (int i = NumDims - 1; i > 0; --i) { + compute_cost += TensorOpCost::DivCost(); + if (internal::index_statically_eq()(i, 1)) { + compute_cost += + TensorOpCost::MulCost() + TensorOpCost::AddCost(); + } else { + if (!internal::index_statically_eq()(i, 1)) { + compute_cost += TensorOpCost::MulCost() + + TensorOpCost::ModCost() + + TensorOpCost::AddCost(); + } + } + compute_cost += + TensorOpCost::MulCost() + TensorOpCost::AddCost(); + } + } + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } -- cgit v1.2.3