diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-04-14 17:13:19 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-04-14 17:13:19 -0700 |
commit | 6fbedf5a4e7107a3a87f63f5769ba11e1c509b08 (patch) | |
tree | c849fab311621918fb81adc5e45192db663474f3 /unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h | |
parent | bebb89acfaffe68a48585e4724de20dfab3a92d6 (diff) | |
parent | aeb5494a0b2edef3be447cec222e2d178e413389 (diff) |
Merged in rmlarsen/eigen (pull request PR-177)
Eigen Tensor cost model part 1.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h | 46 |
1 files changed, 41 insertions, 5 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 4fe1fb943..680fc0a68 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -297,6 +297,11 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr typedef typename XprType::Index Index; typedef DSizes<Index, NumDims> Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; + enum { IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned, PacketAccess = TensorEvaluator<InputArgType, Device>::PacketAccess & TensorEvaluator<KernelArgType, Device>::PacketAccess, @@ -367,10 +372,6 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr } } - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { @@ -405,7 +406,6 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr template<int LoadMode> EIGEN_DEVICE_FUNC PacketReturnType packet(const Index index) const { - const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; Index indices[2] = {index, index+PacketSize-1}; Index startInputs[2] = {0, 0}; if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { @@ -448,6 +448,23 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr } } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double kernel_size = m_kernelImpl.dimensions().TotalSize(); + // We ignore the use of fused multiply-add. + const double convolve_compute_cost = + TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>(); + const double firstIndex_compute_cost = + NumDims * + (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() + + TensorOpCost::DivCost<Index>()); + return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) + + kernel_size * (m_inputImpl.costPerCoeff(vectorized) + + m_kernelImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, convolve_compute_cost, vectorized, + PacketSize)); + } + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } private: @@ -1044,6 +1061,25 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr return internal::ploadt<PacketReturnType, LoadMode>(m_buf+index); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + // TODO(rmlarsen): FIXME: For now, this is just a copy of the CPU cost + // model. + const double kernel_size = m_kernelImpl.dimensions().TotalSize(); + // We ignore the use of fused multiply-add. + const double convolve_compute_cost = + TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>(); + const double firstIndex_compute_cost = + NumDims * + (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() + + TensorOpCost::DivCost<Index>()); + return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) + + kernel_size * (m_inputImpl.costPerCoeff(vectorized) + + m_kernelImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, convolve_compute_cost, vectorized, + PacketSize)); + } + private: // No assignment (copies are needed by the kernels) TensorEvaluator& operator = (const TensorEvaluator&); |