From ef9dfee7bdc8e0d82c9b7ddf9414ef99d866d7ba Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 24 Sep 2019 12:52:45 -0700 Subject: Tensor block evaluation V2 support for unary/binary/broadcsting --- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 190 ++++++++++++++++++--- 1 file changed, 171 insertions(+), 19 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index fec735868..c87075a72 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -43,13 +43,14 @@ struct TensorEvaluator internal::traits::NumDimensions : 0; enum { - IsAligned = Derived::IsAligned, - PacketAccess = (PacketType::size > 1), - BlockAccess = internal::is_arithmetic::type>::value, - PreferBlockAccess = false, - Layout = Derived::Layout, - CoordAccess = NumCoords > 0, - RawAccess = true + IsAligned = Derived::IsAligned, + PacketAccess = (PacketType::size > 1), + BlockAccess = internal::is_arithmetic::type>::value, + BlockAccessV2 = internal::is_arithmetic::type>::value, + PreferBlockAccess = false, + Layout = Derived::Layout, + CoordAccess = NumCoords > 0, + RawAccess = true }; typedef typename internal::TensorBlock< @@ -62,9 +63,13 @@ struct TensorEvaluator typename internal::remove_const::type, Index, NumCoords, Layout> TensorBlockWriter; + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + //===--------------------------------------------------------------------===// + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) - : m_data(device.get((const_cast(m.data())))), - m_dims(m.dimensions()), + : m_data(device.get((const_cast(m.data())))), + m_dims(m.dimensions()), m_device(device) { } @@ -162,6 +167,22 @@ struct TensorEvaluator TensorBlockWriter::Run(block, m_data); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlockV2( + const TensorBlockDesc& desc, const TensorBlockV2& block) { + assert(m_data != NULL); + + typedef typename TensorBlockV2::XprType TensorBlockExpr; + typedef internal::TensorBlockAssignment + TensorBlockAssign; + typename TensorBlockAssign::Dst dst(desc.dimensions(), + internal::strides(m_dims), + m_data, desc.offset()); + + TensorBlockAssign::Run(dst, block.expr()); + } + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; } #ifdef EIGEN_USE_SYCL @@ -220,28 +241,43 @@ struct TensorEvaluator typedef StorageMemory Storage; typedef typename Storage::Type EvaluatorPointerType; + typedef typename internal::remove_const::type ScalarNoConst; + // NumDimensions is -1 for variable dim tensors static const int NumCoords = internal::traits::NumDimensions > 0 ? internal::traits::NumDimensions : 0; static const int PacketSize = PacketType::size; enum { - IsAligned = Derived::IsAligned, - PacketAccess = (PacketType::size > 1), - BlockAccess = internal::is_arithmetic::type>::value, + IsAligned = Derived::IsAligned, + PacketAccess = (PacketType::size > 1), + BlockAccess = internal::is_arithmetic::value, + BlockAccessV2 = internal::is_arithmetic::value, PreferBlockAccess = false, - Layout = Derived::Layout, - CoordAccess = NumCoords > 0, - RawAccess = true + Layout = Derived::Layout, + CoordAccess = NumCoords > 0, + RawAccess = true }; - typedef typename internal::TensorBlock< - typename internal::remove_const::type, Index, NumCoords, Layout> + typedef typename internal::TensorBlock TensorBlock; - typedef typename internal::TensorBlockReader< - typename internal::remove_const::type, Index, NumCoords, Layout> + typedef typename internal::TensorBlockReader TensorBlockReader; + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef internal::TensorBlockIOV2 + TensorBlockIO; + typedef typename TensorBlockIO::Dst TensorBlockIODst; + typedef typename TensorBlockIO::Src TensorBlockIOSrc; + + typedef typename internal::TensorMaterializedBlock + TensorBlockV2; + //===--------------------------------------------------------------------===// + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device) { } @@ -310,6 +346,67 @@ struct TensorEvaluator TensorBlockReader::Run(block, m_data); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 + blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const { + assert(m_data != NULL); + + // TODO(ezhulenev): Move it to TensorBlockV2 and reuse in TensorForcedEval. + + // If a tensor block descriptor covers a contiguous block of the underlying + // memory, we can skip block buffer memory allocation, and construct a block + // from existing `m_data` memory buffer. + // + // Example: (RowMajor layout) + // m_dims: [11, 12, 13, 14] + // desc.dimensions(): [1, 1, 3, 14] + // + // In this case we can construct a TensorBlock starting at + // `m_data + desc.offset()`, with a `desc.dimensions()` block sizes. + + static const bool + is_col_major = static_cast(Layout) == static_cast(ColMajor); + + // Find out how many inner dimensions have a matching size. + int num_matching_inner_dims = 0; + for (int i = 0; i < NumCoords; ++i) { + int dim = is_col_major ? i : NumCoords - i - 1; + if (m_dims[dim] != desc.dimensions()[dim]) break; + ++num_matching_inner_dims; + } + + // All the outer dimensions must be of size `1`, except a single dimension + // before the matching inner dimension (`3` in the example above). + bool can_use_direct_access = true; + for (int i = num_matching_inner_dims + 1; i < NumCoords; ++i) { + int dim = is_col_major ? i : NumCoords - i - 1; + if (desc.dimension(dim) != 1) { + can_use_direct_access = false; + break; + } + } + + if (can_use_direct_access) { + EvaluatorPointerType block_start = m_data + desc.offset(); + return TensorBlockV2(internal::TensorBlockKind::kView, block_start, + desc.dimensions()); + + } else { + void* mem = scratch.allocate(desc.size() * sizeof(Scalar)); + ScalarNoConst* block_buffer = static_cast(mem); + + TensorBlockIOSrc src(internal::strides(m_dims), m_data, + desc.offset()); + TensorBlockIODst dst(desc.dimensions(), + internal::strides(desc.dimensions()), + block_buffer); + + TensorBlockIO::Copy(dst, src); + + return TensorBlockV2(internal::TensorBlockKind::kMaterializedInScratch, + block_buffer, desc.dimensions()); + } + } + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; } #ifdef EIGEN_USE_SYCL // binding placeholder accessors to a command group handler for SYCL @@ -355,12 +452,17 @@ struct TensorEvaluator, Device> #endif , BlockAccess = false, + BlockAccessV2 = false, PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false }; + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlockV2; + //===--------------------------------------------------------------------===// + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; } @@ -421,6 +523,7 @@ struct TensorEvaluator, Device> PacketAccess = TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, BlockAccess = TensorEvaluator::BlockAccess, + BlockAccessV2 = TensorEvaluator::BlockAccessV2, PreferBlockAccess = TensorEvaluator::PreferBlockAccess, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented @@ -446,6 +549,17 @@ struct TensorEvaluator, Device> typedef internal::TensorBlock TensorBlock; + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename TensorEvaluator::TensorBlockV2 + ArgTensorBlock; + + typedef internal::TensorCwiseUnaryBlock + TensorBlockV2; + //===--------------------------------------------------------------------===// + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { @@ -505,6 +619,11 @@ struct TensorEvaluator, Device> arg_block.data()); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 + blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const { + return TensorBlockV2(m_argImpl.blockV2(desc, scratch), m_functor); + } + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } #ifdef EIGEN_USE_SYCL @@ -537,6 +656,8 @@ struct TensorEvaluator::PacketAccess, BlockAccess = TensorEvaluator::BlockAccess & TensorEvaluator::BlockAccess, + BlockAccessV2 = TensorEvaluator::BlockAccessV2 & + TensorEvaluator::BlockAccessV2, PreferBlockAccess = TensorEvaluator::PreferBlockAccess | TensorEvaluator::PreferBlockAccess, Layout = TensorEvaluator::Layout, @@ -571,6 +692,20 @@ struct TensorEvaluator::Layout> TensorBlock; + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename TensorEvaluator::TensorBlockV2 + LeftTensorBlock; + typedef typename TensorEvaluator::TensorBlockV2 + RightTensorBlock; + + typedef internal::TensorCwiseBinaryBlock + TensorBlockV2; + //===--------------------------------------------------------------------===// + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { // TODO: use right impl instead if right impl dimensions are known at compile time. @@ -642,6 +777,13 @@ struct TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, BlockAccess = false, + BlockAccessV2 = false, PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented @@ -709,6 +852,10 @@ struct TensorEvaluator Storage; typedef typename Storage::Type EvaluatorPointerType; + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlockV2; + //===--------------------------------------------------------------------===// + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { // TODO: use arg2 or arg3 dimensions if they are known at compile time. @@ -780,6 +927,7 @@ struct TensorEvaluator PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & PacketType::HasBlend, BlockAccess = false, + BlockAccessV2 = false, PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented @@ -805,6 +953,10 @@ struct TensorEvaluator typedef StorageMemory Storage; typedef typename Storage::Type EvaluatorPointerType; + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockNotImplemented TensorBlockV2; + //===--------------------------------------------------------------------===// + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { // TODO: use then or else impl instead if they happen to be known at compile time. -- cgit v1.2.3