diff options
author | Eugene Zhulenev <ezhulenev@google.com> | 2019-10-07 15:34:26 -0700 |
---|---|---|
committer | Eugene Zhulenev <ezhulenev@google.com> | 2019-10-07 15:34:26 -0700 |
commit | f74ab8cb8de5e425ddd25f4b06657926a2ad4599 (patch) | |
tree | 21686c69f54cd402fdf6508cedcfd25750f70898 /unsupported/Eigen/CXX11/src/Tensor | |
parent | 3afb640b5647654f272b1903b71877cb60ed3a78 (diff) |
Add block evaluation to TensorEvalTo and fix few small bugs
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h | 46 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 3 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h | 8 |
3 files changed, 47 insertions, 10 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index bf7522682..d1e4c82d2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -111,22 +111,28 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device> IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, BlockAccess = true, - BlockAccessV2 = false, + BlockAccessV2 = true, PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = true }; - typedef typename internal::TensorBlock< - CoeffReturnType, Index, internal::traits<ArgType>::NumDimensions, Layout> - TensorBlock; - typedef typename internal::TensorBlockReader< - CoeffReturnType, Index, internal::traits<ArgType>::NumDimensions, Layout> - TensorBlockReader; + static const int NumDims = internal::traits<ArgType>::NumDimensions; + + typedef typename internal::TensorBlock<CoeffReturnType, Index, NumDims, Layout> TensorBlock; + typedef typename internal::TensorBlockReader<CoeffReturnType, Index, NumDims, Layout> TensorBlockReader; //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// - typedef internal::TensorBlockNotImplemented TensorBlockV2; + typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; + + typedef typename TensorEvaluator<const ArgType, Device>::TensorBlockV2 + ArgTensorBlock; + + typedef internal::TensorBlockAssignment< + Scalar, NumDims, typename ArgTensorBlock::XprType, Index> + TensorBlockAssignment; //===--------------------------------------------------------------------===// EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) @@ -164,6 +170,30 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device> m_impl.block(&eval_to_block); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlockV2( + TensorBlockDesc& desc, TensorBlockScratch& scratch) { + // Add `m_buffer` as destination buffer to the block descriptor. + desc.AddDestinationBuffer( + /*dst_base=*/m_buffer + desc.offset(), + /*dst_strides=*/internal::strides<Layout>(m_impl.dimensions()), + /*total_dst_bytes=*/ + (internal::array_prod(m_impl.dimensions()) + * sizeof(Scalar))); + + ArgTensorBlock block = m_impl.blockV2(desc, scratch); + + // If block was evaluated into a destination buffer, there is no need to do + // an assignment. + if (block.kind() != internal::TensorBlockKind::kMaterializedInOutput) { + TensorBlockAssignment::Run( + TensorBlockAssignment::target( + desc.dimensions(), internal::strides<Layout>(m_impl.dimensions()), + m_buffer, desc.offset()), + block.expr()); + } + block.cleanup(); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index a7cb8dc97..97ac96db1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -238,7 +238,8 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable, typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper; typedef typename TensorBlock::Dimensions TensorBlockDimensions; - typedef internal::TensorBlockDescriptor<NumDims> TensorBlockDesc; + typedef internal::TensorBlockDescriptor<NumDims, StorageIndex> + TensorBlockDesc; typedef internal::TensorBlockScratchAllocator<DefaultDevice> TensorBlockScratch; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index 489b915ac..f3907be6e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -231,7 +231,11 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch) const { - eigen_assert(m_impl.data() != NULL); + // If one of the dimensions is zero, return empty block view. + if (desc.size() == 0) { + return TensorBlockV2(internal::TensorBlockKind::kView, NULL, + desc.dimensions()); + } // Check if we can reuse `desc` destination, or allocate new scratch buffer. ScalarNoConst* materialized_output = @@ -385,6 +389,8 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device const Index out = output_offset + output_inner_pad_before_size; const Index in = input_offset + output_inner_pad_before_size; + eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL); + LinCopy::template Run<LinCopy::Kind::Linear>( typename LinCopy::Dst(out, 1, materialized_output), typename LinCopy::Src(in, 1, m_impl.data()), |