diff options
author | Eugene Zhulenev <ezhulenev@google.com> | 2018-08-10 16:53:36 -0700 |
---|---|---|
committer | Eugene Zhulenev <ezhulenev@google.com> | 2018-08-10 16:53:36 -0700 |
commit | f2209d06e428e0691de71f30fc2db4cb29191cd2 (patch) | |
tree | 37d7294a61f80c87389e8e930700a549554afe51 /unsupported/Eigen | |
parent | cfaedb38cd662def3b5684a20965b3bc1b0d6a3f (diff) |
Add block evaluationto CwiseUnaryOp and add PreferBlockAccess enum to all evaluators
Diffstat (limited to 'unsupported/Eigen')
33 files changed, 312 insertions, 82 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h index c0f33ba2d..ea3ab329d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h @@ -87,6 +87,7 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -220,6 +221,7 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMaxSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMaxSycl.h index 442639868..5110e99ee 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMaxSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMaxSycl.h @@ -109,6 +109,7 @@ struct TensorEvaluator<const TensorTupleReducerDeviceOp<StrideDims, ArgType>, Sy IsAligned = false, PacketAccess = false, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, SyclKernelDevice>::Layout, CoordAccess = false, RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index f1f877c16..9ec1ec726 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -102,14 +102,16 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device> static const int NumDims = XprType::NumDims; enum { - IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & - TensorEvaluator<RightArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & - TensorEvaluator<RightArgType, Device>::PacketAccess, - BlockAccess = TensorEvaluator<LeftArgType, Device>::BlockAccess & - TensorEvaluator<RightArgType, Device>::BlockAccess, - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess + IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & + TensorEvaluator<RightArgType, Device>::IsAligned, + PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & + TensorEvaluator<RightArgType, Device>::PacketAccess, + BlockAccess = TensorEvaluator<LeftArgType, Device>::BlockAccess & + TensorEvaluator<RightArgType, Device>::BlockAccess, + PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess | + TensorEvaluator<RightArgType, Device>::PreferBlockAccess, + Layout = TensorEvaluator<LeftArgType, Device>::Layout, + RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess }; typedef typename internal::TensorBlock< diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h index 877603421..4a3e1ac17 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h @@ -382,6 +382,147 @@ class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims, }; /** + * \class TensorBlockCwiseUnaryOp + * \ingroup CXX11_Tensor_Module + * + * \brief Carries out a cwise binary op on a number of coefficients. + * + * This class reads strided input from the argument, and writes the + * result of the cwise unary op to the strided output array. + * + */ +struct TensorBlockCwiseUnaryOp { + template <typename StorageIndex, typename UnaryFunctor, + typename OutputScalar, typename InputScalar> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( + const UnaryFunctor& functor, const StorageIndex num_coeff, + const StorageIndex output_index, const StorageIndex output_stride, + OutputScalar* output_data, const StorageIndex input_index, + const StorageIndex input_stride, const InputScalar* input_data) { + typedef const Eigen::Array<InputScalar, Dynamic, 1> Input; + typedef Eigen::Array<OutputScalar, Dynamic, 1> Output; + + typedef Eigen::Map<Input, 0, InnerStride<>> InputMap; + typedef Eigen::Map<Output, 0, InnerStride<>> OutputMap; + + const InputScalar* input_base = &input_data[input_index]; + OutputScalar* output_base = &output_data[output_index]; + + const InputMap input(input_base, num_coeff, InnerStride<>(input_stride)); + OutputMap output(output_base, num_coeff, InnerStride<>(output_stride)); + + output = Eigen::CwiseUnaryOp<UnaryFunctor, InputMap>(input, functor); + } +}; + +/** + * \class TensorBlockCwiseUnaryIO + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor block IO class for carrying out cwise unary ops. + * + * This class carries out the unary op on given blocks. + */ +template <typename UnaryFunctor, typename StorageIndex, typename OutputScalar, + int NumDims, int Layout> +struct TensorBlockCwiseUnaryIO { + typedef typename internal::TensorBlock<OutputScalar, StorageIndex, NumDims, + Layout>::Dimensions Dimensions; + + struct BlockIteratorState { + StorageIndex output_stride, output_span; + StorageIndex input_stride, input_span; + StorageIndex size, count; + }; + + template <typename InputScalar> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( + const UnaryFunctor& functor, const Dimensions& block_sizes, + const Dimensions& block_strides, OutputScalar* output_data, + const array<StorageIndex, NumDims>& input_strides, + const InputScalar* input_data) { + // Find the innermost dimension whose size is not 1. This is the effective + // inner dim. If all dimensions are of size 1, fallback to using the actual + // innermost dim to avoid out-of-bound access. + int num_size_one_inner_dims = 0; + for (int i = 0; i < NumDims; ++i) { + const int dim = cond<Layout>()(i, NumDims - i - 1); + if (block_sizes[dim] != 1) { + num_size_one_inner_dims = i; + break; + } + } + // Calculate strides and dimensions. + const int inner_dim = + NumDims == 0 ? 1 + : cond<Layout>()(num_size_one_inner_dims, + NumDims - num_size_one_inner_dims - 1); + StorageIndex inner_dim_size = NumDims == 0 ? 1 : block_sizes[inner_dim]; + for (int i = num_size_one_inner_dims + 1; i < NumDims; ++i) { + const int dim = cond<Layout>()(i, NumDims - i - 1); + // Merge multiple inner dims into one for larger inner dim size (i.e. + // fewer calls to TensorBlockCwiseUnaryOp::Run()). + if (inner_dim_size == block_strides[dim] && + block_strides[dim] == input_strides[dim]) { + inner_dim_size *= block_sizes[dim]; + ++num_size_one_inner_dims; + } else { + break; + } + } + + StorageIndex output_index = 0, input_index = 0; + + const StorageIndex output_stride = + NumDims == 0 ? 1 : block_strides[inner_dim]; + const StorageIndex input_stride = + NumDims == 0 ? 1 : input_strides[inner_dim]; + + const int at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1; + array<BlockIteratorState, at_least_1_dim> block_iter_state; + + // Initialize block iterator state. Squeeze away any dimension of size 1. + int num_squeezed_dims = 0; + for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) { + const int dim = cond<Layout>()(i + 1, NumDims - i - 2); + const StorageIndex size = block_sizes[dim]; + if (size == 1) { + continue; + } + BlockIteratorState& state = block_iter_state[num_squeezed_dims]; + state.output_stride = block_strides[dim]; + state.input_stride = input_strides[dim]; + state.size = size; + state.output_span = state.output_stride * (size - 1); + state.input_span = state.input_stride * (size - 1); + state.count = 0; + ++num_squeezed_dims; + } + + // Compute cwise unary op. + const StorageIndex block_total_size = + NumDims == 0 ? 1 : block_sizes.TotalSize(); + for (StorageIndex i = 0; i < block_total_size; i += inner_dim_size) { + TensorBlockCwiseUnaryOp::Run(functor, inner_dim_size, output_index, + output_stride, output_data, input_index, + input_stride, input_data); + // Update index. + for (int j = 0; j < num_squeezed_dims; ++j) { + auto& state = block_iter_state[j]; + if (++state.count < state.size) { + output_index += state.output_stride; + input_index += state.input_stride; + break; + } + state.count = 0; + output_index -= state.output_span; + input_index -= state.input_span; + } + } + } +}; + +/** * \class TensorBlockCwiseBinaryOp * \ingroup CXX11_Tensor_Module * diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index a4d750885..5e812b04d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -108,11 +108,12 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> bool isCopy= false, nByOne = false, oneByN = false; enum { - IsAligned = true, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - RawAccess = false + IsAligned = true, + PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, + BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator<ArgType, Device>::Layout, + RawAccess = false }; typedef typename internal::remove_const<Scalar>::type ScalarNoConst; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index b4c4162ef..76fab39e2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -144,12 +144,13 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> enum { // Alignment can't be guaranteed at compile time since it depends on the // slice offsets. - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false + IsAligned = false, + PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, + BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator<ArgType, Device>::Layout, + CoordAccess = false, // to be implemented + RawAccess = false }; typedef typename internal::remove_const<Scalar>::type ScalarNoConst; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h index 27c92d8f6..3863ee8c3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h @@ -123,6 +123,7 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy IsAligned = false, PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<LeftArgType, Device>::Layout, RawAccess = false }; @@ -308,6 +309,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De IsAligned = false, PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<LeftArgType, Device>::Layout, RawAccess = false }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index e604456e8..c459fc649 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -241,6 +241,7 @@ struct TensorContractionEvaluatorBase IsAligned = true, PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<LeftArgType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = true diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index a7751eee1..1f613d3c7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -196,6 +196,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device> IsAligned = false, PacketAccess = true, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, RawAccess = false }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index a07e32db0..2d0e6599f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -308,6 +308,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned, PacketAccess = TensorEvaluator<InputArgType, Device>::PacketAccess & TensorEvaluator<KernelArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<InputArgType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -780,6 +781,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr IsAligned = TensorEvaluator<InputArgType, GpuDevice>::IsAligned & TensorEvaluator<KernelArgType, GpuDevice>::IsAligned, PacketAccess = false, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h index d301d0c01..e79958fc9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h @@ -243,6 +243,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr IsAligned = TensorEvaluator<InputArgType, const Eigen::SyclDevice>::IsAligned & TensorEvaluator<KernelArgType, const Eigen::SyclDevice>::IsAligned, PacketAccess = false, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<InputArgType, const Eigen::SyclDevice>::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h index 47b5a5a5e..d71b2e34b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h @@ -93,6 +93,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi IsAligned = false, PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<XprType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -255,6 +256,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, IsAligned = false, PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<LhsXprType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index 256d499f2..554ee5f59 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -108,6 +108,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device> IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = true diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 028902fea..e30f9ad8e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -43,6 +43,7 @@ struct TensorEvaluator IsAligned = Derived::IsAligned, PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value, + PreferBlockAccess = false, Layout = Derived::Layout, CoordAccess = NumCoords > 0, RawAccess = true @@ -195,6 +196,7 @@ struct TensorEvaluator<const Derived, Device> IsAligned = Derived::IsAligned, PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value, + PreferBlockAccess = false, Layout = Derived::Layout, CoordAccess = NumCoords > 0, RawAccess = true @@ -288,6 +290,7 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device> IsAligned = true, PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -351,27 +354,34 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device> typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType; enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess & - internal::functor_traits<UnaryOp>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false + IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, + PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess & + internal::functor_traits<UnaryOp>::PacketAccess, + BlockAccess = TensorEvaluator<ArgType, Device>::PacketAccess, + PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess, + Layout = TensorEvaluator<ArgType, Device>::Layout, + CoordAccess = false, // to be implemented + RawAccess = false }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - : m_functor(op.functor()), + : m_device(device), + m_functor(op.functor()), m_argImpl(op.nestedExpression(), device) { } typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const<Scalar>::type ScalarNoConst; typedef typename internal::traits<XprType>::Scalar CoeffReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; static const int PacketSize = PacketType<CoeffReturnType, Device>::size; typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; + static const int NumDims = internal::array_size<Dimensions>::value; + typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout> + TensorBlock; + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { @@ -399,6 +409,29 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device> TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( + std::vector<internal::TensorOpResourceRequirements>* resources) const { + m_argImpl.getResourceRequirements(resources); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( + TensorBlock* output_block) const { + if (NumDims <= 0) { + output_block->data()[0] = coeff(0); + return; + } + internal::TensorBlockView<ArgType, Device> arg_block(m_device, m_argImpl, + *output_block); + internal::TensorBlockCwiseUnaryIO<UnaryOp, Index, ScalarNoConst, NumDims, + Layout>::Run(m_functor, + output_block->block_sizes(), + output_block + ->block_strides(), + output_block->data(), + arg_block.block_strides(), + arg_block.data()); + } + EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } /// required by sycl in order to extract the accessor @@ -408,6 +441,7 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device> private: + const Device& m_device; const UnaryOp m_functor; TensorEvaluator<ArgType, Device> m_argImpl; }; @@ -421,16 +455,18 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType; enum { - IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & - TensorEvaluator<RightArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & - TensorEvaluator<RightArgType, Device>::PacketAccess & - internal::functor_traits<BinaryOp>::PacketAccess, - BlockAccess = TensorEvaluator<LeftArgType, Device>::BlockAccess & - TensorEvaluator<RightArgType, Device>::BlockAccess, - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false + IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & + TensorEvaluator<RightArgType, Device>::IsAligned, + PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & + TensorEvaluator<RightArgType, Device>::PacketAccess & + internal::functor_traits<BinaryOp>::PacketAccess, + BlockAccess = TensorEvaluator<LeftArgType, Device>::BlockAccess & + TensorEvaluator<RightArgType, Device>::BlockAccess, + PreferBlockAccess = TensorEvaluator<LeftArgType, Device>::PreferBlockAccess | + TensorEvaluator<RightArgType, Device>::PreferBlockAccess, + Layout = TensorEvaluator<LeftArgType, Device>::Layout, + CoordAccess = false, // to be implemented + RawAccess = false }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) @@ -501,7 +537,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( TensorBlock* output_block) const { if (NumDims <= 0) { - output_block->data()[0] = coeff(0); + output_block->data()[0] = coeff(Index(0)); return; } internal::TensorBlockView<LeftArgType, Device> left_block( @@ -543,6 +579,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess & TensorEvaluator<Arg2Type, Device>::PacketAccess & TensorEvaluator<Arg3Type, Device>::PacketAccess & internal::functor_traits<TernaryOp>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<Arg1Type, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -648,6 +685,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType> PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess & PacketType<Scalar, Device>::HasBlend, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<IfArgType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index d6ab4d997..480cf1f39 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -136,6 +136,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D IsAligned = false, PacketAccess = true, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h index 1342e47a6..71ba56773 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -42,6 +42,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_, IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0), PacketAccess = (internal::packet_traits<Scalar>::size > 1), BlockAccess = false, + PreferBlockAccess = false, Layout = Options_ & RowMajor ? RowMajor : ColMajor, CoordAccess = true, RawAccess = true diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 2778bf5ec..edf9f85d8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -99,6 +99,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device> IsAligned = true, PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, RawAccess = true }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 0dd524a30..b8a57ab99 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -132,7 +132,12 @@ struct IsVectorizable<GpuDevice, Expression> { template <typename Device, typename Expression> struct IsTileable { - static const bool value = TensorEvaluator<Expression, Device>::BlockAccess; + // Check that block evaluation is supported and it's a preferred option (at + // least one sub-expression has much faster block evaluation, e.g. + // broadcasting). + static const bool value = + TensorEvaluator<Expression, Device>::BlockAccess & + TensorEvaluator<Expression, Device>::PreferBlockAccess; }; template <typename Expression, typename Device, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h index 97c8d4a02..95c9e6aee 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h @@ -92,6 +92,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device> IsAligned = false, PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h index 2b2f4a650..1826d7022 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h @@ -244,12 +244,13 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device> static const int PacketSize = PacketType<CoeffReturnType, Device>::size; enum { - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = true, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, - RawAccess = false + IsAligned = false, + PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, + BlockAccess = true, + PreferBlockAccess = true, + Layout = TensorEvaluator<ArgType, Device>::Layout, + CoordAccess = false, + RawAccess = false }; typedef internal::TensorBlock<Scalar, Index, NumDims, Layout> diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h index 64f2ad81f..e28565009 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h @@ -91,6 +91,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device> IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h index e3165fa10..998757d14 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h @@ -120,6 +120,7 @@ struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device> IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor, CoordAccess = false, // to be implemented RawAccess = TensorEvaluator<ArgType, Device>::RawAccess @@ -183,6 +184,7 @@ template<typename ArgType, typename Device> IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor, CoordAccess = false // to be implemented }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index b25c1eabc..6f9294ccf 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -111,16 +111,17 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, + IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, + PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, // TODO(andydavis, wuke) Enable BlockAccess for the general case when the // performance issue with block-based reshape is resolved. - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess && - TensorEvaluator<ArgType, Device>::RawAccess && - NumInputDims > 0 && NumOutputDims > 0, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = TensorEvaluator<ArgType, Device>::RawAccess + BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess && + TensorEvaluator<ArgType, Device>::RawAccess && + NumInputDims > 0 && NumOutputDims > 0, + PreferBlockAccess = true, + Layout = TensorEvaluator<ArgType, Device>::Layout, + CoordAccess = false, // to be implemented + RawAccess = TensorEvaluator<ArgType, Device>::RawAccess }; typedef typename internal::remove_const<Scalar>::type ScalarNoConst; @@ -349,6 +350,7 @@ template<typename NewDimensions, typename ArgType, typename Device> IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = TensorEvaluator<ArgType, Device>::RawAccess @@ -508,12 +510,13 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi enum { // Alignment can't be guaranteed at compile time since it depends on the // slice offsets and sizes. - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, - RawAccess = false + IsAligned = false, + PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, + BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator<ArgType, Device>::Layout, + CoordAccess = false, + RawAccess = false }; typedef typename internal::remove_const<Scalar>::type ScalarNoConst; @@ -785,12 +788,13 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> typedef Sizes Dimensions; enum { - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, - RawAccess = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess + IsAligned = false, + PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, + BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator<ArgType, Device>::Layout, + CoordAccess = false, + RawAccess = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess }; typedef typename internal::remove_const<Scalar>::type ScalarNoConst; @@ -972,6 +976,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, IsAligned = false, PacketAccess = false, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, RawAccess = false }; @@ -1148,6 +1153,7 @@ struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Stride IsAligned = false, PacketAccess = false, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess, RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index aa1db3c73..59c1704ed 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -97,6 +97,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device IsAligned = true, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = true, RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h index a0a1ad8f4..4292fe0c2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h @@ -95,6 +95,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device> IsAligned = false, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 80f179ba4..3d534eaa2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -475,12 +475,13 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, static const int PacketSize = PacketType<CoeffReturnType, Device>::size; enum { - IsAligned = false, - PacketAccess = Self::InputPacketAccess && Op::PacketAccess, - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false + IsAligned = false, + PacketAccess = Self::InputPacketAccess && Op::PacketAccess, + BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator<ArgType, Device>::Layout, + CoordAccess = false, // to be implemented + RawAccess = false }; typedef typename internal::remove_const<Scalar>::type ScalarNoConst; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h index a6cade50f..6e15e75f9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h @@ -137,6 +137,7 @@ template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef IsAligned = false, PacketAccess = false, BlockAccess = false, + PreferBlockAccess = false, Layout = PlainObjectType::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -366,6 +367,7 @@ struct TensorEvaluator<const TensorRef<Derived>, Device> IsAligned = false, PacketAccess = false, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorRef<Derived>::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -414,6 +416,7 @@ struct TensorEvaluator<TensorRef<Derived>, Device> : public TensorEvaluator<cons IsAligned = false, PacketAccess = false, BlockAccess = false, + PreferBlockAccess = false, RawAccess = false }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h index 9193bdd8e..b7fb969f3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h @@ -114,6 +114,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device IsAligned = false, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -255,6 +256,7 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device> IsAligned = false, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index b1135f297..641366d9d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -97,6 +97,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> { IsAligned = false, PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, RawAccess = true diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 98f125408..a5b541a68 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -111,12 +111,13 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> static const int PacketSize = PacketType<CoeffReturnType, Device>::size; enum { - IsAligned = false, - PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false + IsAligned = false, + PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), + BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator<ArgType, Device>::Layout, + CoordAccess = false, // to be implemented + RawAccess = false }; typedef typename internal::remove_const<Scalar>::type ScalarNoConst; @@ -412,11 +413,12 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device> static const int PacketSize = PacketType<CoeffReturnType, Device>::size; enum { - IsAligned = false, - PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - RawAccess = false + IsAligned = false, + PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), + BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator<ArgType, Device>::Layout, + RawAccess = false }; typedef typename internal::remove_const<Scalar>::type ScalarNoConst; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h index 4b69072f2..221dc96c9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h @@ -113,6 +113,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device> IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -275,6 +276,7 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device> IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h index ea53bb04b..9fc54a4c0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h @@ -96,6 +96,7 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device> IsAligned = false, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h index 3c7d8bbc0..c1b7a58ca 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h @@ -200,6 +200,7 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D IsAligned = false, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, RawAccess = false |