From 694084ecbd12c5183a8ff0604d04971d043abfff Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 4 Mar 2019 11:10:21 -0800 Subject: Use fast divisors in TensorGeneratorOp --- unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h index ac66f9cf1..0fee18fb6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h @@ -98,6 +98,8 @@ struct TensorEvaluator, Device> RawAccess = false }; + typedef internal::TensorIntDivisor IndexDivisor; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_generator(op.generator()) #ifdef EIGEN_USE_SYCL @@ -118,6 +120,9 @@ struct TensorEvaluator, Device> m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; } } + for (int i = 0; i < NumDims; ++i) { + m_fast_strides[i] = IndexDivisor(m_strides[i]); + } } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -150,6 +155,8 @@ struct TensorEvaluator, Device> return rslt; } + // TODO(ezhulenev): Add tiled evaluation support. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const { // TODO(rmlarsen): This is just a placeholder. Define interface to make @@ -170,14 +177,14 @@ struct TensorEvaluator, Device> void extract_coordinates(Index index, array& coords) const { if (static_cast(Layout) == static_cast(ColMajor)) { for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_strides[i]; + const Index idx = index / m_fast_strides[i]; index -= idx * m_strides[i]; coords[i] = idx; } coords[0] = index; } else { for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_strides[i]; + const Index idx = index / m_fast_strides[i]; index -= idx * m_strides[i]; coords[i] = idx; } @@ -187,6 +194,7 @@ struct TensorEvaluator, Device> Dimensions m_dimensions; array m_strides; + array m_fast_strides; Generator m_generator; #ifdef EIGEN_USE_SYCL TensorEvaluator m_argImpl; -- cgit v1.2.3