From 60ae24ee1a6c16114de456d77fcfba6f5a1160ca Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 2 Oct 2019 12:44:06 -0700 Subject: Add block evaluation to TensorReshaping/TensorCasting/TensorPadding/TensorSelect --- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 138 ++++++++++----------- 1 file changed, 68 insertions(+), 70 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index c87075a72..b1d668744 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -176,11 +176,12 @@ struct TensorEvaluator typedef internal::TensorBlockAssignment TensorBlockAssign; - typename TensorBlockAssign::Dst dst(desc.dimensions(), - internal::strides(m_dims), - m_data, desc.offset()); - TensorBlockAssign::Run(dst, block.expr()); + TensorBlockAssign::Run( + TensorBlockAssign::target(desc.dimensions(), + internal::strides(m_dims), m_data, + desc.offset()), + block.expr()); } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; } @@ -349,62 +350,7 @@ struct TensorEvaluator EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const { assert(m_data != NULL); - - // TODO(ezhulenev): Move it to TensorBlockV2 and reuse in TensorForcedEval. - - // If a tensor block descriptor covers a contiguous block of the underlying - // memory, we can skip block buffer memory allocation, and construct a block - // from existing `m_data` memory buffer. - // - // Example: (RowMajor layout) - // m_dims: [11, 12, 13, 14] - // desc.dimensions(): [1, 1, 3, 14] - // - // In this case we can construct a TensorBlock starting at - // `m_data + desc.offset()`, with a `desc.dimensions()` block sizes. - - static const bool - is_col_major = static_cast(Layout) == static_cast(ColMajor); - - // Find out how many inner dimensions have a matching size. - int num_matching_inner_dims = 0; - for (int i = 0; i < NumCoords; ++i) { - int dim = is_col_major ? i : NumCoords - i - 1; - if (m_dims[dim] != desc.dimensions()[dim]) break; - ++num_matching_inner_dims; - } - - // All the outer dimensions must be of size `1`, except a single dimension - // before the matching inner dimension (`3` in the example above). - bool can_use_direct_access = true; - for (int i = num_matching_inner_dims + 1; i < NumCoords; ++i) { - int dim = is_col_major ? i : NumCoords - i - 1; - if (desc.dimension(dim) != 1) { - can_use_direct_access = false; - break; - } - } - - if (can_use_direct_access) { - EvaluatorPointerType block_start = m_data + desc.offset(); - return TensorBlockV2(internal::TensorBlockKind::kView, block_start, - desc.dimensions()); - - } else { - void* mem = scratch.allocate(desc.size() * sizeof(Scalar)); - ScalarNoConst* block_buffer = static_cast(mem); - - TensorBlockIOSrc src(internal::strides(m_dims), m_data, - desc.offset()); - TensorBlockIODst dst(desc.dimensions(), - internal::strides(desc.dimensions()), - block_buffer); - - TensorBlockIO::Copy(dst, src); - - return TensorBlockV2(internal::TensorBlockKind::kMaterializedInScratch, - block_buffer, desc.dimensions()); - } + return TensorBlockV2::materialize(m_data, m_dims, desc, scratch); } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; } @@ -923,15 +869,21 @@ struct TensorEvaluator typedef typename XprType::Scalar Scalar; enum { - IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, - PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & - PacketType::HasBlend, - BlockAccess = false, - BlockAccessV2 = false, - PreferBlockAccess = false, - Layout = TensorEvaluator::Layout, - CoordAccess = false, // to be implemented - RawAccess = false + IsAligned = TensorEvaluator::IsAligned & + TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & + TensorEvaluator::PacketAccess & + PacketType::HasBlend, + BlockAccess = false, + BlockAccessV2 = TensorEvaluator::BlockAccessV2 && + TensorEvaluator::BlockAccessV2 && + TensorEvaluator::BlockAccessV2, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess || + TensorEvaluator::PreferBlockAccess || + TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) @@ -953,8 +905,36 @@ struct TensorEvaluator typedef StorageMemory Storage; typedef typename Storage::Type EvaluatorPointerType; + static const int NumDims = internal::array_size::value; + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// - typedef internal::TensorBlockNotImplemented TensorBlockV2; + typedef internal::TensorBlockDescriptor TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator TensorBlockScratch; + + typedef typename TensorEvaluator::TensorBlockV2 + IfArgTensorBlock; + typedef typename TensorEvaluator::TensorBlockV2 + ThenArgTensorBlock; + typedef typename TensorEvaluator::TensorBlockV2 + ElseArgTensorBlock; + + struct TensorSelectOpBlockFactory { + template + struct XprType { + typedef TensorSelectOp type; + }; + + template + typename XprType::type expr( + const IfArgXprType& if_expr, const ThenArgXprType& then_expr, const ElseArgXprType& else_expr) const { + return typename XprType::type(if_expr, then_expr, else_expr); + } + }; + + typedef internal::TensorTernaryExprBlock + TensorBlockV2; //===--------------------------------------------------------------------===// EIGEN_DEVICE_FUNC const Dimensions& dimensions() const @@ -1000,6 +980,24 @@ struct TensorEvaluator .cwiseMax(m_elseImpl.costPerCoeff(vectorized)); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( + std::vector* resources) const { + m_condImpl.getResourceRequirements(resources); + m_thenImpl.getResourceRequirements(resources); + m_elseImpl.getResourceRequirements(resources); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 + blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const { + // It's unsafe to pass destination buffer to underlying expressions, because + // output might be aliased with one of the inputs. + desc.DropDestinationBuffer(); + + return TensorBlockV2( + m_condImpl.blockV2(desc, scratch), m_thenImpl.blockV2(desc, scratch), + m_elseImpl.blockV2(desc, scratch), TensorSelectOpBlockFactory()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; } #ifdef EIGEN_USE_SYCL -- cgit v1.2.3