diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor')
5 files changed, 88 insertions, 118 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h index 21a6b66e8..24a6343e8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h @@ -73,7 +73,7 @@ struct TensorOpResourceRequirements { // expression tree (like reductions) to communicate resources // requirements based on local state (like the total number of reductions // to be computed). - TensorOpResourceRequirements(internal::TensorBlockShapeType shape, + TensorOpResourceRequirements(TensorBlockShapeType shape, const Index size) : block_shape(shape), block_total_size(size) {} }; @@ -90,9 +90,9 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements( *block_shape = resources[0].block_shape; *block_total_size = resources[0].block_total_size; for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) { - if (resources[i].block_shape == TensorBlockShapeType::kSkewedInnerDims && - *block_shape != TensorBlockShapeType::kSkewedInnerDims) { - *block_shape = TensorBlockShapeType::kSkewedInnerDims; + if (resources[i].block_shape == kSkewedInnerDims && + *block_shape != kSkewedInnerDims) { + *block_shape = kSkewedInnerDims; } *block_total_size = numext::maxi(*block_total_size, resources[i].block_total_size); @@ -152,11 +152,11 @@ struct TensorBlockCopyOp { const Scalar* src_base = &src_data[src_index]; Scalar* dst_base = &dst_data[dst_index]; - typedef const Eigen::Array<Scalar, Dynamic, 1> Src; - typedef Eigen::Array<Scalar, Dynamic, 1> Dst; + typedef const Array<Scalar, Dynamic, 1> Src; + typedef Array<Scalar, Dynamic, 1> Dst; - typedef Eigen::Map<Src, 0, InnerStride<> > SrcMap; - typedef Eigen::Map<Dst, 0, InnerStride<> > DstMap; + typedef Map<Src, 0, InnerStride<> > SrcMap; + typedef Map<Dst, 0, InnerStride<> > DstMap; const SrcMap src(src_base, num_coeff_to_copy, InnerStride<>(src_stride)); DstMap dst(dst_base, num_coeff_to_copy, InnerStride<>(dst_stride)); @@ -178,10 +178,8 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout, bool BlockRead> class TensorBlockIO { public: - typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> - TensorBlock; - typedef typename internal::TensorBlockCopyOp<Scalar, StorageIndex> - TensorBlockCopyOp; + typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; + typedef TensorBlockCopyOp<Scalar, StorageIndex> BlockCopyOp; protected: struct BlockIteratorState { @@ -194,7 +192,7 @@ class TensorBlockIO { }; static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy( - const TensorBlock& block, StorageIndex first_coeff_index, + const Block& block, StorageIndex first_coeff_index, const array<StorageIndex, NumDims>& tensor_to_block_dim_map, const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data, Scalar* dst_data) { @@ -290,8 +288,8 @@ class TensorBlockIO { const StorageIndex block_total_size = NumDims == 0 ? 1 : block.block_sizes().TotalSize(); for (StorageIndex i = 0; i < block_total_size; i += block_inner_dim_size) { - TensorBlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride, - dst_data, inputIndex, input_stride, src_data); + BlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride, + dst_data, inputIndex, input_stride, src_data); // Update index. for (int j = 0; j < num_squeezed_dims; ++j) { if (++block_iter_state[j].count < block_iter_state[j].size) { @@ -320,13 +318,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout> class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> { public: - typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> - TensorBlock; - typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> - Base; + typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; + typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> Base; static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - TensorBlock* block, const Scalar* src_data) { + Block* block, const Scalar* src_data) { array<StorageIndex, NumDims> tensor_to_block_dim_map; for (int i = 0; i < NumDims; ++i) { tensor_to_block_dim_map[i] = i; @@ -336,7 +332,7 @@ class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims, } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - TensorBlock* block, StorageIndex first_coeff_index, + Block* block, StorageIndex first_coeff_index, const array<StorageIndex, NumDims>& tensor_to_block_dim_map, const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data) { Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map, @@ -357,13 +353,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout> class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> { public: - typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> - TensorBlock; - typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> - Base; + typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; + typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> Base; static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - const TensorBlock& block, Scalar* dst_data) { + const Block& block, Scalar* dst_data) { array<StorageIndex, NumDims> tensor_to_block_dim_map; for (int i = 0; i < NumDims; ++i) { tensor_to_block_dim_map[i] = i; @@ -373,7 +367,7 @@ class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims, } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - const TensorBlock& block, StorageIndex first_coeff_index, + const Block& block, StorageIndex first_coeff_index, const array<StorageIndex, NumDims>& tensor_to_block_dim_map, const array<StorageIndex, NumDims>& tensor_strides, Scalar* dst_data) { Base::Copy(block, first_coeff_index, tensor_to_block_dim_map, @@ -401,13 +395,13 @@ struct TensorBlockCwiseBinaryOp { const StorageIndex left_stride, const LeftScalar* left_data, const StorageIndex right_index, const StorageIndex right_stride, const RightScalar* right_data) { - typedef const Eigen::Array<LeftScalar, Dynamic, 1> Lhs; - typedef const Eigen::Array<RightScalar, Dynamic, 1> Rhs; - typedef Eigen::Array<OutputScalar, Dynamic, 1> Out; + typedef const Array<LeftScalar, Dynamic, 1> Lhs; + typedef const Array<RightScalar, Dynamic, 1> Rhs; + typedef Array<OutputScalar, Dynamic, 1> Out; - typedef Eigen::Map<Lhs, 0, InnerStride<> > LhsMap; - typedef Eigen::Map<Rhs, 0, InnerStride<> > RhsMap; - typedef Eigen::Map<Out, 0, InnerStride<> > OutMap; + typedef Map<Lhs, 0, InnerStride<> > LhsMap; + typedef Map<Rhs, 0, InnerStride<> > RhsMap; + typedef Map<Out, 0, InnerStride<> > OutMap; const LeftScalar* lhs_base = &left_data[left_index]; const RightScalar* rhs_base = &right_data[right_index]; @@ -417,8 +411,7 @@ struct TensorBlockCwiseBinaryOp { const RhsMap rhs(rhs_base, num_coeff, InnerStride<>(right_stride)); OutMap out(out_base, num_coeff, InnerStride<>(output_stride)); - out = - Eigen::CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor); + out = CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor); } }; @@ -434,8 +427,7 @@ struct TensorBlockCwiseBinaryOp { template <typename BinaryFunctor, typename StorageIndex, typename OutputScalar, int NumDims, int Layout> struct TensorBlockCwiseBinaryIO { - typedef typename internal::TensorBlock<OutputScalar, StorageIndex, NumDims, - Layout>::Dimensions Dimensions; + typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims, Layout>::Dimensions Dimensions; struct BlockIteratorState { StorageIndex output_stride, output_span; @@ -627,8 +619,7 @@ struct TensorBlockView { template <typename Scalar, typename StorageIndex, int NumDims, int Layout> class TensorBlockMapper { public: - typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> - TensorBlock; + typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; typedef DSizes<StorageIndex, NumDims> Dimensions; TensorBlockMapper(const Dimensions& dims, @@ -663,7 +654,7 @@ class TensorBlockMapper { } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block GetBlockForIndex(StorageIndex block_index, Scalar* data) const { StorageIndex first_coeff_index = 0; DSizes<StorageIndex, NumDims> coords; @@ -711,8 +702,7 @@ class TensorBlockMapper { } } - return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides, - data); + return Block(first_coeff_index, sizes, strides, m_tensor_strides, data); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const { @@ -742,7 +732,7 @@ class TensorBlockMapper { block_dim_sizes[i] = 1; } } else if (block_dim_sizes.TotalSize() > min_target_size) { - if (block_shape == TensorBlockShapeType::kUniformAllDims) { + if (block_shape == kUniformAllDims) { // Tensor will not fit within 'min_target_size' budget: calculate tensor // block dimension sizes based on "square" dimension size target. const size_t dim_size_target = static_cast<const size_t>( @@ -773,7 +763,7 @@ class TensorBlockMapper { total_size = total_size_other_dims * block_dim_sizes[dim]; } } - } else if (block_shape == TensorBlockShapeType::kSkewedInnerDims) { + } else if (block_shape == kSkewedInnerDims) { StorageIndex coeff_to_allocate = min_target_size; for (int i = 0; i < NumDims; ++i) { const int dim = cond<Layout>()(i, NumDims - i - 1); @@ -818,8 +808,7 @@ class TensorBlockMapper { template <typename Scalar, typename StorageIndex, int NumDims, int Layout> class TensorSliceBlockMapper { public: - typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> - TensorBlock; + typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; typedef DSizes<StorageIndex, NumDims> Dimensions; TensorSliceBlockMapper(const Dimensions& tensor_dims, @@ -860,7 +849,7 @@ class TensorSliceBlockMapper { } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block GetBlockForIndex(StorageIndex block_index, Scalar* data) const { StorageIndex first_coeff_index = 0; DSizes<StorageIndex, NumDims> coords; @@ -917,8 +906,7 @@ class TensorSliceBlockMapper { } } - return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides, - data); + return Block(first_coeff_index, sizes, strides, m_tensor_strides, data); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index e604456e8..5d619efd8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -152,13 +152,7 @@ struct TensorContractionParams { // 1. Elementwise Relu transformation following Conv2D. // 2. AddBias to the Conv2D output channels dimension. // -// See expected implementation in NoOpOutputKernel. -struct OutputKernel { - template <typename Index, typename Scalar> - using OutputMapper = internal::blas_data_mapper<Scalar, Index, ColMajor>; -}; - -// Output kernel that does absolutely nothing. +// The NoOpOutputKernel implements an output kernel that does absolutely nothing. struct NoOpOutputKernel { /** * Tensor contraction evaluator calls this kernel after finishing each block @@ -177,7 +171,7 @@ struct NoOpOutputKernel { */ template <typename Index, typename Scalar> EIGEN_ALWAYS_INLINE void operator()( - const OutputKernel::OutputMapper<Index, Scalar>& /*output_mapper*/, + const internal::blas_data_mapper<Scalar, Index, ColMajor>& /*output_mapper*/, const TensorContractionParams& /*params*/, Index /*i*/, Index /*j*/, Index /*num_rows*/, Index /*num_cols*/) const {} }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h index 39410e63d..ab5990c14 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h @@ -20,8 +20,8 @@ namespace Eigen { * */ namespace internal { -template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_> -struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_> > +template<typename CustomUnaryFunc, typename XprType> +struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> > { typedef typename XprType::Scalar Scalar; typedef typename XprType::StorageKind StorageKind; @@ -31,34 +31,26 @@ struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_> > static const int NumDimensions = traits<XprType>::NumDimensions; static const int Layout = traits<XprType>::Layout; - template <class T> struct MakePointer { - // Intermediate typedef to workaround MSVC issue. - typedef MakePointer_<T> MakePointerT; - typedef typename MakePointerT::Type Type; - typedef typename MakePointerT::RefType RefType; - typedef typename MakePointerT::ScalarType ScalarType; - }; - typedef typename MakePointer<typename internal::remove_const<typename XprType::CoeffReturnType>::type>::Type PointerType; }; -template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_> -struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_>, Eigen::Dense> +template<typename CustomUnaryFunc, typename XprType> +struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Eigen::Dense> { - typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_>& type; + typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType>& type; }; -template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_> -struct nested<TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_> > +template<typename CustomUnaryFunc, typename XprType> +struct nested<TensorCustomUnaryOp<CustomUnaryFunc, XprType> > { - typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_> type; + typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> type; }; } // end namespace internal -template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_> -class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_>, ReadOnlyAccessors> +template<typename CustomUnaryFunc, typename XprType> +class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, ReadOnlyAccessors> { public: typedef typename internal::traits<TensorCustomUnaryOp>::Scalar Scalar; @@ -85,10 +77,10 @@ class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFun // Eval as rvalue -template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_, typename Device> -struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_>, Device> +template<typename CustomUnaryFunc, typename XprType, typename Device> +struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Device> { - typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_> ArgType; + typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> ArgType; typedef typename internal::traits<ArgType>::Index Index; static const int NumDims = internal::traits<ArgType>::NumDimensions; typedef DSizes<Index, NumDims> Dimensions; @@ -96,7 +88,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakeP typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; static const int PacketSize = PacketType<CoeffReturnType, Device>::size; - typedef typename Eigen::internal::traits<ArgType>::PointerType PointerType; + typedef typename PointerType<CoeffReturnType, Device>::Type PointerT; enum { IsAligned = false, @@ -115,12 +107,12 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakeP EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerType data) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) { if (data) { evalTo(data); return false; } else { - m_result = static_cast<PointerType>( + m_result = static_cast<PointerT>( m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))); evalTo(m_result); return true; @@ -148,14 +140,14 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakeP return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC PointerType data() const { return m_result; } + EIGEN_DEVICE_FUNC PointerT data() const { return m_result; } #ifdef EIGEN_USE_SYCL EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; } #endif protected: - EIGEN_DEVICE_FUNC void evalTo(PointerType data) { + EIGEN_DEVICE_FUNC void evalTo(PointerT data) { TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(data, m_dimensions); m_op.func().eval(m_op.expression(), result, m_device); } @@ -163,7 +155,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakeP Dimensions m_dimensions; const ArgType m_op; const Device& m_device; - PointerType m_result; + PointerT m_result; }; @@ -176,8 +168,8 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakeP * */ namespace internal { -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, template <class> class MakePointer_> -struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_> > +template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> +struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> > { typedef typename internal::promote_storage_type<typename LhsXprType::Scalar, typename RhsXprType::Scalar>::ret Scalar; @@ -194,34 +186,26 @@ struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, Mak static const int NumDimensions = traits<LhsXprType>::NumDimensions; static const int Layout = traits<LhsXprType>::Layout; - template <class T> struct MakePointer { - // Intermediate typedef to workaround MSVC issue. - typedef MakePointer_<T> MakePointerT; - typedef typename MakePointerT::Type Type; - typedef typename MakePointerT::RefType RefType; - typedef typename MakePointerT::ScalarType ScalarType; - }; - typedef typename MakePointer<CoeffReturnType>::Type PointerType; }; -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, template <class> class MakePointer_> -struct eval<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_>, Eigen::Dense> +template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> +struct eval<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Eigen::Dense> { typedef const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>& type; }; -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, template <class> class MakePointer_> -struct nested<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_> > +template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> +struct nested<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> > { - typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_> type; + typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> type; }; } // end namespace internal -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType,template <class> class MakePointer_> -class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_>, ReadOnlyAccessors> +template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> +class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, ReadOnlyAccessors> { public: typedef typename internal::traits<TensorCustomBinaryOp>::Scalar Scalar; @@ -254,10 +238,10 @@ class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinary // Eval as rvalue -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, template <class> class MakePointer_, typename Device> -struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_>, Device> +template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, typename Device> +struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Device> { - typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_> XprType; + typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> XprType; typedef typename internal::traits<XprType>::Index Index; static const int NumDims = internal::traits<XprType>::NumDimensions; typedef DSizes<Index, NumDims> Dimensions; @@ -265,7 +249,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; static const int PacketSize = PacketType<CoeffReturnType, Device>::size; - typedef typename Eigen::internal::traits<XprType>::PointerType PointerType; + typedef typename PointerType<CoeffReturnType, Device>::Type PointerT; enum { IsAligned = false, @@ -284,12 +268,12 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerType data) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) { if (data) { evalTo(data); return false; } else { - m_result = static_cast<PointerType>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType))); + m_result = static_cast<PointerT>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType))); evalTo(m_result); return true; } @@ -316,14 +300,14 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC PointerType data() const { return m_result; } + EIGEN_DEVICE_FUNC PointerT data() const { return m_result; } #ifdef EIGEN_USE_SYCL EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; } #endif protected: - EIGEN_DEVICE_FUNC void evalTo(PointerType data) { + EIGEN_DEVICE_FUNC void evalTo(PointerT data) { TensorMap<Tensor<CoeffReturnType, NumDims, Layout> > result(data, m_dimensions); m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device); } @@ -331,7 +315,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, Dimensions m_dimensions; const XprType m_op; const Device& m_device; - PointerType m_result; + PointerT m_result; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 0cefe42dd..9b9587de5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -132,7 +132,7 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable, if (needs_assign) { // Size tensor blocks to fit in cache (or requested target block size). Index block_total_size = numext::mini(cache_size, total_size); - TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims; + TensorBlockShapeType block_shape = kSkewedInnerDims; // Query expression tree for desired block size/shape. std::vector<TensorOpResourceRequirements> resources; evaluator.getResourceRequirements(&resources); @@ -229,10 +229,6 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> { Evaluator evaluator(expr, device); const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) { - const StorageIndex PacketSize = - Vectorizable - ? unpacket_traits<typename Evaluator::PacketReturnType>::size - : 1; const StorageIndex size = array_prod(evaluator.dimensions()); device.parallelFor(size, evaluator.costPerCoeff(Vectorizable), EvalRange::alignBlockSize, @@ -272,7 +268,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) { - TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims; + TensorBlockShapeType block_shape = kSkewedInnerDims; Index block_total_size = 0; // Query expression tree for desired block size/shape. std::vector<internal::TensorOpResourceRequirements> resources; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index da0751039..93a3b0e14 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -24,6 +24,14 @@ template<typename T> struct MakePointer { typedef T ScalarType; }; +// The PointerType class is a container of the device specefic pointer +// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression +// is a device-agnostic type and need MakePointer class for type conversion, +// the TensorEvaluator calss can be specialized for a device, hence it is possible +// to construct different types of temproray storage memory in TensorEvaluator +// for different devices by specializing the following PointerType class. +template<typename T, typename Device> struct PointerType : MakePointer<T>{}; + namespace internal{ template<typename A, typename B> struct Pointer_type_promotion { static const bool val=false; @@ -89,8 +97,8 @@ template<typename LeftXprType, typename RightXprType> class TensorAssignOp; template<typename Op, typename XprType> class TensorScanOp; template<typename Dims, typename XprType> class TensorTraceOp; -template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_ = MakePointer> class TensorCustomUnaryOp; -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, template <class> class MakePointer_ = MakePointer> class TensorCustomBinaryOp; +template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp; +template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp; template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorEvalToOp; template<typename XprType> class TensorForcedEvalOp; |