From f2209d06e428e0691de71f30fc2db4cb29191cd2 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Fri, 10 Aug 2018 16:53:36 -0700 Subject: Add block evaluationto CwiseUnaryOp and add PreferBlockAccess enum to all evaluators --- unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h | 2 + .../Eigen/CXX11/src/Tensor/TensorArgMaxSycl.h | 1 + unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h | 18 +-- unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h | 141 +++++++++++++++++++++ .../Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 11 +- .../Eigen/CXX11/src/Tensor/TensorChipping.h | 13 +- .../Eigen/CXX11/src/Tensor/TensorConcatenation.h | 2 + .../Eigen/CXX11/src/Tensor/TensorContraction.h | 1 + .../Eigen/CXX11/src/Tensor/TensorConversion.h | 1 + .../Eigen/CXX11/src/Tensor/TensorConvolution.h | 2 + .../Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h | 1 + .../Eigen/CXX11/src/Tensor/TensorCustomOp.h | 2 + unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h | 1 + .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 76 ++++++++--- unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h | 1 + .../Eigen/CXX11/src/Tensor/TensorFixedSize.h | 1 + .../Eigen/CXX11/src/Tensor/TensorForcedEval.h | 1 + .../CXX11/src/Tensor/TensorForwardDeclarations.h | 7 +- .../Eigen/CXX11/src/Tensor/TensorGenerator.h | 1 + .../Eigen/CXX11/src/Tensor/TensorImagePatch.h | 13 +- .../Eigen/CXX11/src/Tensor/TensorInflation.h | 1 + .../Eigen/CXX11/src/Tensor/TensorLayoutSwap.h | 2 + .../Eigen/CXX11/src/Tensor/TensorMorphing.h | 46 ++++--- unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h | 1 + unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h | 1 + .../Eigen/CXX11/src/Tensor/TensorReduction.h | 13 +- unsupported/Eigen/CXX11/src/Tensor/TensorRef.h | 3 + unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h | 2 + unsupported/Eigen/CXX11/src/Tensor/TensorScan.h | 1 + .../Eigen/CXX11/src/Tensor/TensorShuffling.h | 24 ++-- .../Eigen/CXX11/src/Tensor/TensorStriding.h | 2 + unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h | 1 + .../Eigen/CXX11/src/Tensor/TensorVolumePatch.h | 1 + 33 files changed, 312 insertions(+), 82 deletions(-) (limited to 'unsupported/Eigen') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h index c0f33ba2d..ea3ab329d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h @@ -87,6 +87,7 @@ struct TensorEvaluator, Device> IsAligned = /*TensorEvaluator::IsAligned*/ false, PacketAccess = /*TensorEvaluator::PacketAccess*/ false, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -220,6 +221,7 @@ struct TensorEvaluator, Devi IsAligned = /*TensorEvaluator::IsAligned*/ false, PacketAccess = /*TensorEvaluator::PacketAccess*/ false, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator >, Device>::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMaxSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMaxSycl.h index 442639868..5110e99ee 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorArgMaxSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorArgMaxSycl.h @@ -109,6 +109,7 @@ struct TensorEvaluator, Sy IsAligned = false, PacketAccess = false, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index f1f877c16..9ec1ec726 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -102,14 +102,16 @@ struct TensorEvaluator, Device> static const int NumDims = XprType::NumDims; enum { - IsAligned = TensorEvaluator::IsAligned & - TensorEvaluator::IsAligned, - PacketAccess = TensorEvaluator::PacketAccess & - TensorEvaluator::PacketAccess, - BlockAccess = TensorEvaluator::BlockAccess & - TensorEvaluator::BlockAccess, - Layout = TensorEvaluator::Layout, - RawAccess = TensorEvaluator::RawAccess + IsAligned = TensorEvaluator::IsAligned & + TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & + TensorEvaluator::PacketAccess, + BlockAccess = TensorEvaluator::BlockAccess & + TensorEvaluator::BlockAccess, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess | + TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + RawAccess = TensorEvaluator::RawAccess }; typedef typename internal::TensorBlock< diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h index 877603421..4a3e1ac17 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h @@ -381,6 +381,147 @@ class TensorBlockWriter : public TensorBlockIO + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( + const UnaryFunctor& functor, const StorageIndex num_coeff, + const StorageIndex output_index, const StorageIndex output_stride, + OutputScalar* output_data, const StorageIndex input_index, + const StorageIndex input_stride, const InputScalar* input_data) { + typedef const Eigen::Array Input; + typedef Eigen::Array Output; + + typedef Eigen::Map> InputMap; + typedef Eigen::Map> OutputMap; + + const InputScalar* input_base = &input_data[input_index]; + OutputScalar* output_base = &output_data[output_index]; + + const InputMap input(input_base, num_coeff, InnerStride<>(input_stride)); + OutputMap output(output_base, num_coeff, InnerStride<>(output_stride)); + + output = Eigen::CwiseUnaryOp(input, functor); + } +}; + +/** + * \class TensorBlockCwiseUnaryIO + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor block IO class for carrying out cwise unary ops. + * + * This class carries out the unary op on given blocks. + */ +template +struct TensorBlockCwiseUnaryIO { + typedef typename internal::TensorBlock::Dimensions Dimensions; + + struct BlockIteratorState { + StorageIndex output_stride, output_span; + StorageIndex input_stride, input_span; + StorageIndex size, count; + }; + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( + const UnaryFunctor& functor, const Dimensions& block_sizes, + const Dimensions& block_strides, OutputScalar* output_data, + const array& input_strides, + const InputScalar* input_data) { + // Find the innermost dimension whose size is not 1. This is the effective + // inner dim. If all dimensions are of size 1, fallback to using the actual + // innermost dim to avoid out-of-bound access. + int num_size_one_inner_dims = 0; + for (int i = 0; i < NumDims; ++i) { + const int dim = cond()(i, NumDims - i - 1); + if (block_sizes[dim] != 1) { + num_size_one_inner_dims = i; + break; + } + } + // Calculate strides and dimensions. + const int inner_dim = + NumDims == 0 ? 1 + : cond()(num_size_one_inner_dims, + NumDims - num_size_one_inner_dims - 1); + StorageIndex inner_dim_size = NumDims == 0 ? 1 : block_sizes[inner_dim]; + for (int i = num_size_one_inner_dims + 1; i < NumDims; ++i) { + const int dim = cond()(i, NumDims - i - 1); + // Merge multiple inner dims into one for larger inner dim size (i.e. + // fewer calls to TensorBlockCwiseUnaryOp::Run()). + if (inner_dim_size == block_strides[dim] && + block_strides[dim] == input_strides[dim]) { + inner_dim_size *= block_sizes[dim]; + ++num_size_one_inner_dims; + } else { + break; + } + } + + StorageIndex output_index = 0, input_index = 0; + + const StorageIndex output_stride = + NumDims == 0 ? 1 : block_strides[inner_dim]; + const StorageIndex input_stride = + NumDims == 0 ? 1 : input_strides[inner_dim]; + + const int at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1; + array block_iter_state; + + // Initialize block iterator state. Squeeze away any dimension of size 1. + int num_squeezed_dims = 0; + for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) { + const int dim = cond()(i + 1, NumDims - i - 2); + const StorageIndex size = block_sizes[dim]; + if (size == 1) { + continue; + } + BlockIteratorState& state = block_iter_state[num_squeezed_dims]; + state.output_stride = block_strides[dim]; + state.input_stride = input_strides[dim]; + state.size = size; + state.output_span = state.output_stride * (size - 1); + state.input_span = state.input_stride * (size - 1); + state.count = 0; + ++num_squeezed_dims; + } + + // Compute cwise unary op. + const StorageIndex block_total_size = + NumDims == 0 ? 1 : block_sizes.TotalSize(); + for (StorageIndex i = 0; i < block_total_size; i += inner_dim_size) { + TensorBlockCwiseUnaryOp::Run(functor, inner_dim_size, output_index, + output_stride, output_data, input_index, + input_stride, input_data); + // Update index. + for (int j = 0; j < num_squeezed_dims; ++j) { + auto& state = block_iter_state[j]; + if (++state.count < state.size) { + output_index += state.output_stride; + input_index += state.input_stride; + break; + } + state.count = 0; + output_index -= state.output_span; + input_index -= state.input_span; + } + } + } +}; + /** * \class TensorBlockCwiseBinaryOp * \ingroup CXX11_Tensor_Module diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index a4d750885..5e812b04d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -108,11 +108,12 @@ struct TensorEvaluator, Device> bool isCopy= false, nByOne = false, oneByN = false; enum { - IsAligned = true, - PacketAccess = TensorEvaluator::PacketAccess, - BlockAccess = TensorEvaluator::BlockAccess, - Layout = TensorEvaluator::Layout, - RawAccess = false + IsAligned = true, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = TensorEvaluator::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + RawAccess = false }; typedef typename internal::remove_const::type ScalarNoConst; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index b4c4162ef..76fab39e2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -144,12 +144,13 @@ struct TensorEvaluator, Device> enum { // Alignment can't be guaranteed at compile time since it depends on the // slice offsets. - IsAligned = false, - PacketAccess = TensorEvaluator::PacketAccess, - BlockAccess = TensorEvaluator::BlockAccess, - Layout = TensorEvaluator::Layout, - CoordAccess = false, // to be implemented - RawAccess = false + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = TensorEvaluator::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false }; typedef typename internal::remove_const::type ScalarNoConst; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h index 27c92d8f6..3863ee8c3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h @@ -123,6 +123,7 @@ struct TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, RawAccess = false }; @@ -308,6 +309,7 @@ template::PacketAccess & TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, RawAccess = false }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index e604456e8..c459fc649 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -241,6 +241,7 @@ struct TensorContractionEvaluatorBase IsAligned = true, PacketAccess = (PacketType::size > 1), BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = true diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index a7751eee1..1f613d3c7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -196,6 +196,7 @@ struct TensorEvaluator, Device> IsAligned = false, PacketAccess = true, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, RawAccess = false }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index a07e32db0..2d0e6599f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -308,6 +308,7 @@ struct TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -780,6 +781,7 @@ struct TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, PacketAccess = false, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h index d301d0c01..e79958fc9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h @@ -243,6 +243,7 @@ struct TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, PacketAccess = false, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h index 47b5a5a5e..d71b2e34b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h @@ -93,6 +93,7 @@ struct TensorEvaluator, Devi IsAligned = false, PacketAccess = (PacketType::size > 1), BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -255,6 +256,7 @@ struct TensorEvaluator::size > 1), BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index 256d499f2..554ee5f59 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -108,6 +108,7 @@ struct TensorEvaluator, Device> IsAligned = TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = true diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 028902fea..e30f9ad8e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -43,6 +43,7 @@ struct TensorEvaluator IsAligned = Derived::IsAligned, PacketAccess = (PacketType::size > 1), BlockAccess = internal::is_arithmetic::type>::value, + PreferBlockAccess = false, Layout = Derived::Layout, CoordAccess = NumCoords > 0, RawAccess = true @@ -195,6 +196,7 @@ struct TensorEvaluator IsAligned = Derived::IsAligned, PacketAccess = (PacketType::size > 1), BlockAccess = internal::is_arithmetic::type>::value, + PreferBlockAccess = false, Layout = Derived::Layout, CoordAccess = NumCoords > 0, RawAccess = true @@ -288,6 +290,7 @@ struct TensorEvaluator, Device> IsAligned = true, PacketAccess = internal::functor_traits::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -351,27 +354,34 @@ struct TensorEvaluator, Device> typedef TensorCwiseUnaryOp XprType; enum { - IsAligned = TensorEvaluator::IsAligned, - PacketAccess = TensorEvaluator::PacketAccess & - internal::functor_traits::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator::Layout, - CoordAccess = false, // to be implemented - RawAccess = false + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & + internal::functor_traits::PacketAccess, + BlockAccess = TensorEvaluator::PacketAccess, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - : m_functor(op.functor()), + : m_device(device), + m_functor(op.functor()), m_argImpl(op.nestedExpression(), device) { } typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const::type ScalarNoConst; typedef typename internal::traits::Scalar CoeffReturnType; typedef typename PacketType::type PacketReturnType; static const int PacketSize = PacketType::size; typedef typename TensorEvaluator::Dimensions Dimensions; + static const int NumDims = internal::array_size::value; + typedef internal::TensorBlock + TensorBlock; + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { @@ -399,6 +409,29 @@ struct TensorEvaluator, Device> TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( + std::vector* resources) const { + m_argImpl.getResourceRequirements(resources); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( + TensorBlock* output_block) const { + if (NumDims <= 0) { + output_block->data()[0] = coeff(0); + return; + } + internal::TensorBlockView arg_block(m_device, m_argImpl, + *output_block); + internal::TensorBlockCwiseUnaryIO::Run(m_functor, + output_block->block_sizes(), + output_block + ->block_strides(), + output_block->data(), + arg_block.block_strides(), + arg_block.data()); + } + EIGEN_DEVICE_FUNC typename Eigen::internal::traits::PointerType data() const { return NULL; } /// required by sycl in order to extract the accessor @@ -408,6 +441,7 @@ struct TensorEvaluator, Device> private: + const Device& m_device; const UnaryOp m_functor; TensorEvaluator m_argImpl; }; @@ -421,16 +455,18 @@ struct TensorEvaluator XprType; enum { - IsAligned = TensorEvaluator::IsAligned & - TensorEvaluator::IsAligned, - PacketAccess = TensorEvaluator::PacketAccess & - TensorEvaluator::PacketAccess & - internal::functor_traits::PacketAccess, - BlockAccess = TensorEvaluator::BlockAccess & - TensorEvaluator::BlockAccess, - Layout = TensorEvaluator::Layout, - CoordAccess = false, // to be implemented - RawAccess = false + IsAligned = TensorEvaluator::IsAligned & + TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & + TensorEvaluator::PacketAccess & + internal::functor_traits::PacketAccess, + BlockAccess = TensorEvaluator::BlockAccess & + TensorEvaluator::BlockAccess, + PreferBlockAccess = TensorEvaluator::PreferBlockAccess | + TensorEvaluator::PreferBlockAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) @@ -501,7 +537,7 @@ struct TensorEvaluatordata()[0] = coeff(0); + output_block->data()[0] = coeff(Index(0)); return; } internal::TensorBlockView left_block( @@ -543,6 +579,7 @@ struct TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -648,6 +685,7 @@ struct TensorEvaluator PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & PacketType::HasBlend, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index d6ab4d997..480cf1f39 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -136,6 +136,7 @@ struct TensorEvaluator, D IsAligned = false, PacketAccess = true, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h index 1342e47a6..71ba56773 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -42,6 +42,7 @@ class TensorFixedSize : public TensorBase0), PacketAccess = (internal::packet_traits::size > 1), BlockAccess = false, + PreferBlockAccess = false, Layout = Options_ & RowMajor ? RowMajor : ColMajor, CoordAccess = true, RawAccess = true diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 2778bf5ec..edf9f85d8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -99,6 +99,7 @@ struct TensorEvaluator, Device> IsAligned = true, PacketAccess = (PacketType::size > 1), BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, RawAccess = true }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 0dd524a30..b8a57ab99 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -132,7 +132,12 @@ struct IsVectorizable { template struct IsTileable { - static const bool value = TensorEvaluator::BlockAccess; + // Check that block evaluation is supported and it's a preferred option (at + // least one sub-expression has much faster block evaluation, e.g. + // broadcasting). + static const bool value = + TensorEvaluator::BlockAccess & + TensorEvaluator::PreferBlockAccess; }; template , Device> IsAligned = false, PacketAccess = (PacketType::size > 1), BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h index 2b2f4a650..1826d7022 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h @@ -244,12 +244,13 @@ struct TensorEvaluator, Device> static const int PacketSize = PacketType::size; enum { - IsAligned = false, - PacketAccess = TensorEvaluator::PacketAccess, - BlockAccess = true, - Layout = TensorEvaluator::Layout, - CoordAccess = false, - RawAccess = false + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = true, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false }; typedef internal::TensorBlock diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h index 64f2ad81f..e28565009 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h @@ -91,6 +91,7 @@ struct TensorEvaluator, Device> IsAligned = /*TensorEvaluator::IsAligned*/ false, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h index e3165fa10..998757d14 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h @@ -120,6 +120,7 @@ struct TensorEvaluator, Device> IsAligned = TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = (static_cast(TensorEvaluator::Layout) == static_cast(ColMajor)) ? RowMajor : ColMajor, CoordAccess = false, // to be implemented RawAccess = TensorEvaluator::RawAccess @@ -183,6 +184,7 @@ template IsAligned = TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = (static_cast(TensorEvaluator::Layout) == static_cast(ColMajor)) ? RowMajor : ColMajor, CoordAccess = false // to be implemented }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index b25c1eabc..6f9294ccf 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -111,16 +111,17 @@ struct TensorEvaluator, Device> static const int NumInputDims = internal::array_size::Dimensions>::value; enum { - IsAligned = TensorEvaluator::IsAligned, - PacketAccess = TensorEvaluator::PacketAccess, + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, // TODO(andydavis, wuke) Enable BlockAccess for the general case when the // performance issue with block-based reshape is resolved. - BlockAccess = TensorEvaluator::BlockAccess && - TensorEvaluator::RawAccess && - NumInputDims > 0 && NumOutputDims > 0, - Layout = TensorEvaluator::Layout, - CoordAccess = false, // to be implemented - RawAccess = TensorEvaluator::RawAccess + BlockAccess = TensorEvaluator::BlockAccess && + TensorEvaluator::RawAccess && + NumInputDims > 0 && NumOutputDims > 0, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = TensorEvaluator::RawAccess }; typedef typename internal::remove_const::type ScalarNoConst; @@ -349,6 +350,7 @@ template IsAligned = TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = TensorEvaluator::RawAccess @@ -508,12 +510,13 @@ struct TensorEvaluator, Devi enum { // Alignment can't be guaranteed at compile time since it depends on the // slice offsets and sizes. - IsAligned = /*TensorEvaluator::IsAligned*/false, - PacketAccess = TensorEvaluator::PacketAccess, - BlockAccess = TensorEvaluator::BlockAccess, - Layout = TensorEvaluator::Layout, - CoordAccess = false, - RawAccess = false + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = TensorEvaluator::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false }; typedef typename internal::remove_const::type ScalarNoConst; @@ -785,12 +788,13 @@ struct TensorEvaluator, Device> typedef Sizes Dimensions; enum { - IsAligned = /*TensorEvaluator::IsAligned*/false, - PacketAccess = TensorEvaluator::PacketAccess, - BlockAccess = TensorEvaluator::BlockAccess, - Layout = TensorEvaluator::Layout, - CoordAccess = false, - RawAccess = (NumDims == 1) & TensorEvaluator::RawAccess + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = TensorEvaluator::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = (NumDims == 1) & TensorEvaluator::RawAccess }; typedef typename internal::remove_const::type ScalarNoConst; @@ -972,6 +976,7 @@ struct TensorEvaluator::Layout, RawAccess = false }; @@ -1148,6 +1153,7 @@ struct TensorEvaluator::Layout, CoordAccess = TensorEvaluator::CoordAccess, RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index aa1db3c73..59c1704ed 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -97,6 +97,7 @@ struct TensorEvaluator, Device IsAligned = true, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = true, RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h index a0a1ad8f4..4292fe0c2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h @@ -95,6 +95,7 @@ struct TensorEvaluator, Device> IsAligned = false, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 80f179ba4..3d534eaa2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -475,12 +475,13 @@ struct TensorEvaluator, static const int PacketSize = PacketType::size; enum { - IsAligned = false, - PacketAccess = Self::InputPacketAccess && Op::PacketAccess, - BlockAccess = TensorEvaluator::BlockAccess, - Layout = TensorEvaluator::Layout, - CoordAccess = false, // to be implemented - RawAccess = false + IsAligned = false, + PacketAccess = Self::InputPacketAccess && Op::PacketAccess, + BlockAccess = TensorEvaluator::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false }; typedef typename internal::remove_const::type ScalarNoConst; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h index a6cade50f..6e15e75f9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h @@ -137,6 +137,7 @@ template class TensorRef : public TensorBase, Device> IsAligned = false, PacketAccess = false, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorRef::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -414,6 +416,7 @@ struct TensorEvaluator, Device> : public TensorEvaluator, Device IsAligned = false, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -255,6 +256,7 @@ struct TensorEvaluator, Device> IsAligned = false, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index b1135f297..641366d9d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -97,6 +97,7 @@ struct TensorEvaluator, Device> { IsAligned = false, PacketAccess = (PacketType::size > 1), BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, RawAccess = true diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 98f125408..a5b541a68 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -111,12 +111,13 @@ struct TensorEvaluator, Device> static const int PacketSize = PacketType::size; enum { - IsAligned = false, - PacketAccess = (PacketType::size > 1), - BlockAccess = TensorEvaluator::BlockAccess, - Layout = TensorEvaluator::Layout, - CoordAccess = false, // to be implemented - RawAccess = false + IsAligned = false, + PacketAccess = (PacketType::size > 1), + BlockAccess = TensorEvaluator::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false }; typedef typename internal::remove_const::type ScalarNoConst; @@ -412,11 +413,12 @@ struct TensorEvaluator, Device> static const int PacketSize = PacketType::size; enum { - IsAligned = false, - PacketAccess = (PacketType::size > 1), - BlockAccess = TensorEvaluator::BlockAccess, - Layout = TensorEvaluator::Layout, - RawAccess = false + IsAligned = false, + PacketAccess = (PacketType::size > 1), + BlockAccess = TensorEvaluator::BlockAccess, + PreferBlockAccess = true, + Layout = TensorEvaluator::Layout, + RawAccess = false }; typedef typename internal::remove_const::type ScalarNoConst; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h index 4b69072f2..221dc96c9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h @@ -113,6 +113,7 @@ struct TensorEvaluator, Device> IsAligned = /*TensorEvaluator::IsAligned*/false, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false @@ -275,6 +276,7 @@ struct TensorEvaluator, Device> IsAligned = /*TensorEvaluator::IsAligned*/false, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, // to be implemented RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h index ea53bb04b..9fc54a4c0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h @@ -96,6 +96,7 @@ struct TensorEvaluator, Device> IsAligned = false, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, RawAccess = false diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h index 3c7d8bbc0..c1b7a58ca 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h @@ -200,6 +200,7 @@ struct TensorEvaluator, D IsAligned = false, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = false, RawAccess = false -- cgit v1.2.3