diff options
author | Eugene Zhulenev <ezhulenev@google.com> | 2019-11-12 10:12:28 -0800 |
---|---|---|
committer | Eugene Zhulenev <ezhulenev@google.com> | 2019-11-12 10:12:28 -0800 |
commit | 13c3327f5cf829fd9d04a2ab46861e722cd74ca0 (patch) | |
tree | 20bd1a5f361023db822298696efbcff7378ab4a7 /unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h | |
parent | 71aa53dd6dfdc497324d9e87f59c4ba820191856 (diff) |
Remove legacy block evaluation support
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h | 178 |
1 files changed, 1 insertions, 177 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index 781f1d75b..700337539 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -135,11 +135,6 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> enum { IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - // TODO(andydavis, wuke) Enable BlockAccess for the general case when the - // performance issue with block-based reshape is resolved. - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess && - TensorEvaluator<ArgType, Device>::RawAccess && - NumInputDims > 0 && NumOutputDims > 0, // For trivial reshapes with raw access to underlying data we will provide // zero overhead block access. // TODO(ezhulenev): Consider adding block access without raw access? @@ -153,14 +148,6 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> typedef typename internal::remove_const<Scalar>::type ScalarNoConst; - typedef internal::TensorBlock<ScalarNoConst, Index, NumInputDims, Layout> - InputTensorBlock; - typedef internal::TensorBlock<ScalarNoConst, Index, NumOutputDims, Layout> - OutputTensorBlock; - typedef internal::TensorBlockReader<ScalarNoConst, Index, NumOutputDims, - Layout> - OutputTensorBlockReader; - //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// typedef internal::TensorBlockDescriptor<NumOutputDims, Index> TensorBlockDesc; typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; @@ -177,30 +164,6 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> // The total size of the reshaped tensor must be equal to the total size // of the input tensor. eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions())); - - if (BlockAccess) { - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = - m_impl.dimensions(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_outputStrides[0] = 1; - for (int i = 1; i < NumOutputDims; ++i) { - m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; - } - m_inputStrides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - m_inputStrides[i] = m_inputStrides[i - 1] * input_dims[i - 1]; - } - } else { - m_outputStrides[NumOutputDims - 1] = 1; - for (int i = NumOutputDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; - } - m_inputStrides[NumInputDims - 1] = 1; - for (int i = NumInputDims - 2; i >= 0; --i) { - m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1]; - } - } - } } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -249,128 +212,6 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Index size; Index count; }; - // TODO(andydavis) Reduce the overhead of this function. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( - OutputTensorBlock* output_block) const { - if (m_impl.data() != NULL) { - OutputTensorBlockReader::Run(output_block, m_impl.data()); - return; - } - - // Calculate output block unit-stride inner dimension length. - const DSizes<Index, NumOutputDims>& output_block_sizes = - output_block->block_sizes(); - Index output_inner_dim_size = 1; - Index output_outer_dim_start = NumOutputDims; - for (Index i = 0; i < NumOutputDims; ++i) { - const Index dim = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? i : NumOutputDims - i - 1; - output_inner_dim_size *= output_block_sizes[dim]; - if (output_block_sizes[dim] < m_dimensions[dim]) { - output_outer_dim_start = i + 1; - break; - } - } - - // Initialize output block iterator state. - array<BlockIteratorState, NumOutputDims> block_iter_state; - - for (Index i = 0; i < NumOutputDims; ++i) { - const Index dim = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? i : NumOutputDims - i - 1; - block_iter_state[i].size = output_block_sizes[dim]; - block_iter_state[i].stride = m_outputStrides[dim]; - block_iter_state[i].span = - block_iter_state[i].stride * (block_iter_state[i].size - 1); - block_iter_state[i].count = 0; - } - - const Index output_outer_dim_size = output_block_sizes.TotalSize() / - output_inner_dim_size; - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = - m_impl.dimensions(); - - Index index = output_block->first_coeff_index(); - for (Index outer_idx = 0; outer_idx < output_outer_dim_size; ++outer_idx) { - Index inner_idx = 0; - while (inner_idx < output_inner_dim_size) { - // Calculate input coords based on 'index'. - array<Index, NumInputDims> input_coords; - Index idx = index; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumInputDims - 1; i > 0; --i) { - input_coords[i] = idx / m_inputStrides[i]; - idx -= input_coords[i] * m_inputStrides[i]; - } - input_coords[0] = idx; - } else { - for (int i = 0; i < NumInputDims - 1; ++i) { - input_coords[i] = idx / m_inputStrides[i]; - idx -= input_coords[i] * m_inputStrides[i]; - } - input_coords[NumInputDims - 1] = idx; - } - - // Calculate target input block shape, using at most - // 'output_inner_dim_size' coefficients along the input block's inner - // dimensions. - DSizes<Index, NumInputDims> input_block_sizes; - Index num_to_allocate = output_inner_dim_size - inner_idx; - for (Index i = 0; i < NumInputDims; ++i) { - const Index dim = - static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? i : NumInputDims - i - 1; - input_block_sizes[dim] = numext::mini( - num_to_allocate, (static_cast<Index>(input_dims[dim]) - - input_coords[dim])); - if (input_coords[dim] == 0) { - num_to_allocate /= input_block_sizes[dim]; - } else { - num_to_allocate = 1; - } - } - - // Calculate input block strides. - DSizes<Index, NumInputDims> input_block_strides; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - input_block_strides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - input_block_strides[i] = input_block_strides[i - 1] * - input_block_sizes[i - 1]; - } - } else { - input_block_strides[NumInputDims - 1] = 1; - for (int i = NumInputDims - 2; i >= 0; --i) { - input_block_strides[i] = input_block_strides[i + 1] * - input_block_sizes[i + 1]; - } - } - - // Instantiate and read input block from input tensor. - InputTensorBlock input_block(index, input_block_sizes, - input_block_strides, m_inputStrides, - output_block->data() + outer_idx * - output_inner_dim_size + inner_idx); - - m_impl.block(&input_block); - - const Index input_block_total_size = input_block_sizes.TotalSize(); - index += input_block_total_size; - inner_idx += input_block_total_size; - } - eigen_assert(inner_idx == output_inner_dim_size); - index -= output_inner_dim_size; - // Update index. - for (Index i = output_outer_dim_start; i < NumOutputDims; ++i) { - if (++block_iter_state[i].count < block_iter_state[i].size) { - index += block_iter_state[i].stride; - break; - } - block_iter_state[i].count = 0; - index -= block_iter_state[i].span; - } - } - } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch, @@ -408,8 +249,6 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> protected: TensorEvaluator<ArgType, Device> m_impl; NewDimensions m_dimensions; - DSizes<Index, NumOutputDims> m_outputStrides; - DSizes<Index, NumInputDims> m_inputStrides; }; @@ -426,7 +265,6 @@ template<typename NewDimensions, typename ArgType, typename Device> enum { IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, BlockAccessV2 = TensorEvaluator<ArgType, Device>::RawAccess, PreferBlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, @@ -619,7 +457,6 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi // slice offsets and sizes. IsAligned = false, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, BlockAccessV2 = TensorEvaluator<ArgType, Device>::BlockAccessV2, PreferBlockAccess = true, Layout = TensorEvaluator<ArgType, Device>::Layout, @@ -714,7 +551,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi } } // Use memcpy if it's going to be faster than using the regular evaluation. - const MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device); + const MemcpyTriggerForSlicing<Index, Device, BlockAccessV2> trigger(m_device); if (trigger(internal::array_prod(dimensions()), contiguous_values)) { EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data(); for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) { @@ -808,16 +645,6 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi m_impl.getResourceRequirements(resources); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( - TensorBlock* output_block) const { - TensorBlock input_block(srcCoeff(output_block->first_coeff_index()), - output_block->block_sizes(), - output_block->block_strides(), - TensorBlockDimensions(m_inputStrides), - output_block->data()); - m_impl.block(&input_block); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch, bool /*root_of_expr_ast*/ = false) const { @@ -922,7 +749,6 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> enum { IsAligned = false, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, BlockAccessV2 = TensorEvaluator<ArgType, Device>::BlockAccessV2, PreferBlockAccess = true, Layout = TensorEvaluator<ArgType, Device>::Layout, @@ -1124,7 +950,6 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, // slice offsets and sizes. IsAligned = false, PacketAccess = false, - BlockAccess = false, BlockAccessV2 = false, PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess, Layout = TensorEvaluator<ArgType, Device>::Layout, @@ -1306,7 +1131,6 @@ struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Stride enum { IsAligned = false, PacketAccess = false, - BlockAccess = false, BlockAccessV2 = false, PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess, Layout = TensorEvaluator<ArgType, Device>::Layout, |