From 13c3327f5cf829fd9d04a2ab46861e722cd74ca0 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 12 Nov 2019 10:12:28 -0800 Subject: Remove legacy block evaluation support --- .../Eigen/CXX11/src/Tensor/TensorMorphing.h | 178 +-------------------- 1 file changed, 1 insertion(+), 177 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index 781f1d75b..700337539 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -135,11 +135,6 @@ struct TensorEvaluator, Device> enum { IsAligned = TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess, - // TODO(andydavis, wuke) Enable BlockAccess for the general case when the - // performance issue with block-based reshape is resolved. - BlockAccess = TensorEvaluator::BlockAccess && - TensorEvaluator::RawAccess && - NumInputDims > 0 && NumOutputDims > 0, // For trivial reshapes with raw access to underlying data we will provide // zero overhead block access. // TODO(ezhulenev): Consider adding block access without raw access? @@ -153,14 +148,6 @@ struct TensorEvaluator, Device> typedef typename internal::remove_const::type ScalarNoConst; - typedef internal::TensorBlock - InputTensorBlock; - typedef internal::TensorBlock - OutputTensorBlock; - typedef internal::TensorBlockReader - OutputTensorBlockReader; - //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// typedef internal::TensorBlockDescriptor TensorBlockDesc; typedef internal::TensorBlockScratchAllocator TensorBlockScratch; @@ -177,30 +164,6 @@ struct TensorEvaluator, Device> // The total size of the reshaped tensor must be equal to the total size // of the input tensor. eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions())); - - if (BlockAccess) { - const typename TensorEvaluator::Dimensions& input_dims = - m_impl.dimensions(); - if (static_cast(Layout) == static_cast(ColMajor)) { - m_outputStrides[0] = 1; - for (int i = 1; i < NumOutputDims; ++i) { - m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; - } - m_inputStrides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - m_inputStrides[i] = m_inputStrides[i - 1] * input_dims[i - 1]; - } - } else { - m_outputStrides[NumOutputDims - 1] = 1; - for (int i = NumOutputDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; - } - m_inputStrides[NumInputDims - 1] = 1; - for (int i = NumInputDims - 2; i >= 0; --i) { - m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1]; - } - } - } } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -249,128 +212,6 @@ struct TensorEvaluator, Device> Index size; Index count; }; - // TODO(andydavis) Reduce the overhead of this function. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( - OutputTensorBlock* output_block) const { - if (m_impl.data() != NULL) { - OutputTensorBlockReader::Run(output_block, m_impl.data()); - return; - } - - // Calculate output block unit-stride inner dimension length. - const DSizes& output_block_sizes = - output_block->block_sizes(); - Index output_inner_dim_size = 1; - Index output_outer_dim_start = NumOutputDims; - for (Index i = 0; i < NumOutputDims; ++i) { - const Index dim = static_cast(Layout) == static_cast(ColMajor) - ? i : NumOutputDims - i - 1; - output_inner_dim_size *= output_block_sizes[dim]; - if (output_block_sizes[dim] < m_dimensions[dim]) { - output_outer_dim_start = i + 1; - break; - } - } - - // Initialize output block iterator state. - array block_iter_state; - - for (Index i = 0; i < NumOutputDims; ++i) { - const Index dim = static_cast(Layout) == static_cast(ColMajor) - ? i : NumOutputDims - i - 1; - block_iter_state[i].size = output_block_sizes[dim]; - block_iter_state[i].stride = m_outputStrides[dim]; - block_iter_state[i].span = - block_iter_state[i].stride * (block_iter_state[i].size - 1); - block_iter_state[i].count = 0; - } - - const Index output_outer_dim_size = output_block_sizes.TotalSize() / - output_inner_dim_size; - const typename TensorEvaluator::Dimensions& input_dims = - m_impl.dimensions(); - - Index index = output_block->first_coeff_index(); - for (Index outer_idx = 0; outer_idx < output_outer_dim_size; ++outer_idx) { - Index inner_idx = 0; - while (inner_idx < output_inner_dim_size) { - // Calculate input coords based on 'index'. - array input_coords; - Index idx = index; - if (static_cast(Layout) == static_cast(ColMajor)) { - for (int i = NumInputDims - 1; i > 0; --i) { - input_coords[i] = idx / m_inputStrides[i]; - idx -= input_coords[i] * m_inputStrides[i]; - } - input_coords[0] = idx; - } else { - for (int i = 0; i < NumInputDims - 1; ++i) { - input_coords[i] = idx / m_inputStrides[i]; - idx -= input_coords[i] * m_inputStrides[i]; - } - input_coords[NumInputDims - 1] = idx; - } - - // Calculate target input block shape, using at most - // 'output_inner_dim_size' coefficients along the input block's inner - // dimensions. - DSizes input_block_sizes; - Index num_to_allocate = output_inner_dim_size - inner_idx; - for (Index i = 0; i < NumInputDims; ++i) { - const Index dim = - static_cast(Layout) == static_cast(ColMajor) - ? i : NumInputDims - i - 1; - input_block_sizes[dim] = numext::mini( - num_to_allocate, (static_cast(input_dims[dim]) - - input_coords[dim])); - if (input_coords[dim] == 0) { - num_to_allocate /= input_block_sizes[dim]; - } else { - num_to_allocate = 1; - } - } - - // Calculate input block strides. - DSizes input_block_strides; - if (static_cast(Layout) == static_cast(ColMajor)) { - input_block_strides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - input_block_strides[i] = input_block_strides[i - 1] * - input_block_sizes[i - 1]; - } - } else { - input_block_strides[NumInputDims - 1] = 1; - for (int i = NumInputDims - 2; i >= 0; --i) { - input_block_strides[i] = input_block_strides[i + 1] * - input_block_sizes[i + 1]; - } - } - - // Instantiate and read input block from input tensor. - InputTensorBlock input_block(index, input_block_sizes, - input_block_strides, m_inputStrides, - output_block->data() + outer_idx * - output_inner_dim_size + inner_idx); - - m_impl.block(&input_block); - - const Index input_block_total_size = input_block_sizes.TotalSize(); - index += input_block_total_size; - inner_idx += input_block_total_size; - } - eigen_assert(inner_idx == output_inner_dim_size); - index -= output_inner_dim_size; - // Update index. - for (Index i = output_outer_dim_start; i < NumOutputDims; ++i) { - if (++block_iter_state[i].count < block_iter_state[i].size) { - index += block_iter_state[i].stride; - break; - } - block_iter_state[i].count = 0; - index -= block_iter_state[i].span; - } - } - } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch, @@ -408,8 +249,6 @@ struct TensorEvaluator, Device> protected: TensorEvaluator m_impl; NewDimensions m_dimensions; - DSizes m_outputStrides; - DSizes m_inputStrides; }; @@ -426,7 +265,6 @@ template enum { IsAligned = TensorEvaluator::IsAligned, PacketAccess = TensorEvaluator::PacketAccess, - BlockAccess = false, BlockAccessV2 = TensorEvaluator::RawAccess, PreferBlockAccess = false, Layout = TensorEvaluator::Layout, @@ -619,7 +457,6 @@ struct TensorEvaluator, Devi // slice offsets and sizes. IsAligned = false, PacketAccess = TensorEvaluator::PacketAccess, - BlockAccess = TensorEvaluator::BlockAccess, BlockAccessV2 = TensorEvaluator::BlockAccessV2, PreferBlockAccess = true, Layout = TensorEvaluator::Layout, @@ -714,7 +551,7 @@ struct TensorEvaluator, Devi } } // Use memcpy if it's going to be faster than using the regular evaluation. - const MemcpyTriggerForSlicing trigger(m_device); + const MemcpyTriggerForSlicing trigger(m_device); if (trigger(internal::array_prod(dimensions()), contiguous_values)) { EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data(); for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) { @@ -808,16 +645,6 @@ struct TensorEvaluator, Devi m_impl.getResourceRequirements(resources); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( - TensorBlock* output_block) const { - TensorBlock input_block(srcCoeff(output_block->first_coeff_index()), - output_block->block_sizes(), - output_block->block_strides(), - TensorBlockDimensions(m_inputStrides), - output_block->data()); - m_impl.block(&input_block); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2 blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch, bool /*root_of_expr_ast*/ = false) const { @@ -922,7 +749,6 @@ struct TensorEvaluator, Device> enum { IsAligned = false, PacketAccess = TensorEvaluator::PacketAccess, - BlockAccess = TensorEvaluator::BlockAccess, BlockAccessV2 = TensorEvaluator::BlockAccessV2, PreferBlockAccess = true, Layout = TensorEvaluator::Layout, @@ -1124,7 +950,6 @@ struct TensorEvaluator::PreferBlockAccess, Layout = TensorEvaluator::Layout, @@ -1306,7 +1131,6 @@ struct TensorEvaluator::PreferBlockAccess, Layout = TensorEvaluator::Layout, -- cgit v1.2.3