diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h | 38 |
1 files changed, 32 insertions, 6 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index 554ee5f59..910472ad8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -77,6 +77,8 @@ class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType, MakePointer_>, typedef typename Eigen::internal::traits<TensorEvalToOp>::StorageKind StorageKind; typedef typename Eigen::internal::traits<TensorEvalToOp>::Index Index; + static const int NumDims = Eigen::internal::traits<TensorEvalToOp>::NumDimensions; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr) : m_xpr(expr), m_buffer(buffer) {} @@ -105,15 +107,22 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device> static const int PacketSize = PacketType<CoeffReturnType, Device>::size; enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, + IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, + PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, + BlockAccess = true, PreferBlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = true + Layout = TensorEvaluator<ArgType, Device>::Layout, + CoordAccess = false, // to be implemented + RawAccess = true }; + typedef typename internal::TensorBlock< + CoeffReturnType, Index, internal::traits<ArgType>::NumDimensions, Layout> + TensorBlock; + typedef typename internal::TensorBlockReader< + CoeffReturnType, Index, internal::traits<ArgType>::NumDimensions, Layout> + TensorBlockReader; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device), m_device(device), m_buffer(op.buffer()), m_op(op), m_expression(op.expression()) @@ -143,6 +152,18 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device> internal::pstoret<CoeffReturnType, PacketReturnType, Aligned>(m_buffer + i, m_impl.template packet<TensorEvaluator<ArgType, Device>::IsAligned ? Aligned : Unaligned>(i)); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( + std::vector<internal::TensorOpResourceRequirements>* resources) const { + m_impl.getResourceRequirements(resources); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlock(TensorBlock* block) { + TensorBlock eval_to_block(block->first_coeff_index(), block->block_sizes(), + block->tensor_strides(), block->tensor_strides(), + m_buffer + block->first_coeff_index()); + m_impl.block(&eval_to_block); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); } @@ -158,6 +179,11 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device> return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const { + assert(m_buffer != NULL); + TensorBlockReader::Run(block, m_buffer); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { // We assume that evalPacket or evalScalar is called to perform the // assignment and account for the cost of the write here. |