From f2209d06e428e0691de71f30fc2db4cb29191cd2 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Fri, 10 Aug 2018 16:53:36 -0700 Subject: Add block evaluationto CwiseUnaryOp and add PreferBlockAccess enum to all evaluators --- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 76 ++++++++++++++++------ 1 file changed, 57 insertions(+), 19 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 028902fea..e30f9ad8e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -43,6 +43,7 @@ struct TensorEvaluator IsAligned = Derived::IsAligned, PacketAccess = (PacketType::size > 1), BlockAccess = internal::is_arithmetic::type>::value, + PreferBlockAccess = false, Layout = Derived::Layout, CoordAccess = NumCoords > 0, RawAccess = true @@ -195,6 +196,7 @@ struct TensorEvaluator IsAligned = Derived::IsAligned, PacketAccess = (PacketType::size > 1), BlockAccess = internal::is_arithmetic::type>::value, + PreferBlockAccess = false, Layout = Derived::Layout, CoordAccess = NumCoords > 0, RawAccess = true @@ -288,6 +290,7 @@ struct TensorEvaluator, Device> IsAligned = true, PacketAccess = internal::functor_traits::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -351,27 +354,34 @@ struct TensorEvaluator, Device> typedef TensorCwiseUnaryOp XprType; enum { - IsAligned = TensorEvaluator::IsAligned, - PacketAccess = TensorEvaluator::PacketAccess & - internal::functor_traits::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator::Layout, - CoordAccess = false, // to be implemented - RawAccess = false + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & + internal::functor_traits::PacketAccess, + BlockAccess = TensorEvaluator::PacketAccess, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - : m_functor(op.functor()), + : m_device(device), + m_functor(op.functor()), m_argImpl(op.nestedExpression(), device) { } typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const::type ScalarNoConst; typedef typename internal::traits::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; static const int PacketSize = PacketType::size; typedef typename TensorEvaluator::Dimensions Dimensions; + static const int NumDims = internal::array_size::value; + typedef internal::TensorBlock + TensorBlock; + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { @@ -399,6 +409,29 @@ struct TensorEvaluator, Device> TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( + std::vector* resources) const { + m_argImpl.getResourceRequirements(resources); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( + TensorBlock* output_block) const { + if (NumDims <= 0) { + output_block->data()[0] = coeff(0); + return; + } + internal::TensorBlockView arg_block(m_device, m_argImpl, + *output_block); + internal::TensorBlockCwiseUnaryIO::Run(m_functor, + output_block->block_sizes(), + output_block + ->block_strides(), + output_block->data(), + arg_block.block_strides(), + arg_block.data()); + } + EIGEN_DEVICE_FUNC typename Eigen::internal::traits::PointerType data() const { return NULL; } /// required by sycl in order to extract the accessor @@ -408,6 +441,7 @@ struct TensorEvaluator, Device> private: + const Device& m_device; const UnaryOp m_functor; TensorEvaluator m_argImpl; }; @@ -421,16 +455,18 @@ struct TensorEvaluator XprType; enum { - IsAligned = TensorEvaluator::IsAligned & - TensorEvaluator::IsAligned, - PacketAccess = TensorEvaluator::PacketAccess & - TensorEvaluator::PacketAccess & - internal::functor_traits::PacketAccess, - BlockAccess = TensorEvaluator::BlockAccess & - TensorEvaluator::BlockAccess, - Layout = TensorEvaluator::Layout, - CoordAccess = false, // to be implemented - RawAccess = false + IsAligned = TensorEvaluator::IsAligned & + TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & + TensorEvaluator::PacketAccess & + internal::functor_traits::PacketAccess, + BlockAccess = TensorEvaluator::BlockAccess & + TensorEvaluator::BlockAccess, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess | + TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) @@ -501,7 +537,7 @@ struct TensorEvaluatordata()[0] = coeff(0); + output_block->data()[0] = coeff(Index(0)); return; } internal::TensorBlockView left_block( @@ -543,6 +579,7 @@ struct TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -648,6 +685,7 @@ struct TensorEvaluator PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & PacketType::HasBlend, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false -- cgit v1.2.3