From 6913221c43c6ad41b1fbfc0d263d2764abd11ad2 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 25 Jul 2018 13:51:10 -0700 Subject: Add tiled evaluation support to TensorExecutor --- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 101 +++++++++++++++++++-- 1 file changed, 94 insertions(+), 7 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index fe62ff1ea..ba02802d2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -41,11 +41,24 @@ struct TensorEvaluator enum { IsAligned = Derived::IsAligned, PacketAccess = (internal::unpacket_traits::size > 1), + BlockAccess = internal::is_arithmetic::type>::value, Layout = Derived::Layout, CoordAccess = NumCoords > 0, RawAccess = true }; + typedef typename internal::TensorBlock< + typename internal::remove_const::type, Index, NumCoords, Layout> + TensorBlock; + typedef typename internal::TensorBlockReader< + typename internal::remove_const::type, Index, NumCoords, Layout, + PacketAccess> + TensorBlockReader; + typedef typename internal::TensorBlockWriter< + typename internal::remove_const::type, Index, NumCoords, Layout, + PacketAccess> + TensorBlockWriter; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) : m_data(const_cast::template MakePointer::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m) { } @@ -113,6 +126,20 @@ struct TensorEvaluator internal::unpacket_traits::size); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( + std::vector* resources) const {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const { + assert(m_data != NULL); + TensorBlockReader::Run(block, m_data); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock( + const TensorBlock& block) { + assert(m_data != NULL); + TensorBlockWriter::Run(block, m_data); + } + EIGEN_DEVICE_FUNC typename internal::traits::template MakePointer::Type data() const { return m_data; } /// required by sycl in order to construct sycl buffer from raw pointer @@ -167,11 +194,20 @@ struct TensorEvaluator enum { IsAligned = Derived::IsAligned, PacketAccess = (internal::unpacket_traits::size > 1), + BlockAccess = internal::is_arithmetic::type>::value, Layout = Derived::Layout, CoordAccess = NumCoords > 0, RawAccess = true }; + typedef typename internal::TensorBlock< + typename internal::remove_const::type, Index, NumCoords, Layout> + TensorBlock; + typedef typename internal::TensorBlockReader< + typename internal::remove_const::type, Index, NumCoords, Layout, + PacketAccess> + TensorBlockReader; + // Used for accessor extraction in SYCL Managed TensorMap: const Derived& derived() const { return m_impl; } @@ -219,6 +255,14 @@ struct TensorEvaluator internal::unpacket_traits::size); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( + std::vector* resources) const {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const { + assert(m_data != NULL); + TensorBlockReader::Run(block, m_data); + } + EIGEN_DEVICE_FUNC typename internal::traits::template MakePointer::Type data() const { return m_data; } /// added for sycl in order to construct the buffer from the sycl device @@ -244,6 +288,7 @@ struct TensorEvaluator, Device> enum { IsAligned = true, PacketAccess = internal::functor_traits::PacketAccess, + BlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -308,7 +353,9 @@ struct TensorEvaluator, Device> enum { IsAligned = TensorEvaluator::IsAligned, - PacketAccess = TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, + PacketAccess = TensorEvaluator::PacketAccess & + internal::functor_traits::PacketAccess, + BlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -375,16 +422,21 @@ struct TensorEvaluator XprType; enum { - IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, - PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & + IsAligned = TensorEvaluator::IsAligned & + TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & + TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, - Layout = TensorEvaluator::Layout, - CoordAccess = false, // to be implemented - RawAccess = false + BlockAccess = TensorEvaluator::BlockAccess & + TensorEvaluator::BlockAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - : m_functor(op.functor()), + : m_device(device), + m_functor(op.functor()), m_leftImpl(op.lhsExpression(), device), m_rightImpl(op.rhsExpression(), device) { @@ -399,6 +451,14 @@ struct TensorEvaluator::size; typedef typename TensorEvaluator::Dimensions Dimensions; + static const int NumDims = internal::array_size< + typename TensorEvaluator::Dimensions>::value; + + typedef internal::TensorBlock< + typename internal::remove_const::type, Index, NumDims, + TensorEvaluator::Layout> + TensorBlock; + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { // TODO: use right impl instead if right impl dimensions are known at compile time. @@ -433,6 +493,30 @@ struct TensorEvaluator* resources) const { + m_leftImpl.getResourceRequirements(resources); + m_rightImpl.getResourceRequirements(resources); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( + TensorBlock* output_block) const { + if (NumDims <= 0) { + output_block->data()[0] = coeff(0); + return; + } + internal::TensorBlockView left_block( + m_device, m_leftImpl, *output_block); + internal::TensorBlockView right_block( + m_device, m_rightImpl, *output_block); + internal::TensorBlockCwiseBinaryIO< + BinaryOp, Index, typename internal::remove_const::type, NumDims, + Layout>::Run(m_functor, output_block->block_sizes(), + output_block->block_strides(), output_block->data(), + left_block.block_strides(), left_block.data(), + right_block.block_strides(), right_block.data()); + } + EIGEN_DEVICE_FUNC typename Eigen::internal::traits::PointerType data() const { return NULL; } /// required by sycl in order to extract the accessor const TensorEvaluator& left_impl() const { return m_leftImpl; } @@ -442,6 +526,7 @@ struct TensorEvaluator m_leftImpl; TensorEvaluator m_rightImpl; @@ -458,6 +543,7 @@ struct TensorEvaluator::IsAligned & TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, + BlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -562,6 +648,7 @@ struct TensorEvaluator IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & internal::packet_traits::HasBlend, + BlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false -- cgit v1.2.3