From 13c3327f5cf829fd9d04a2ab46861e722cd74ca0 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 12 Nov 2019 10:12:28 -0800 Subject: Remove legacy block evaluation support --- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 258 --------------------- 1 file changed, 258 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index d826cfb7e..84604cf41 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -584,7 +584,6 @@ struct TensorReductionEvaluatorBase::Layout, @@ -594,11 +593,6 @@ struct TensorReductionEvaluatorBase::type ScalarNoConst; - typedef internal::TensorBlock - OutputTensorBlock; - typedef internal::TensorBlock - InputTensorBlock; - //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// typedef internal::TensorBlockNotImplemented TensorBlockV2; //===--------------------------------------------------------------------===// @@ -920,258 +914,6 @@ struct TensorReductionEvaluatorBasefirst_coeff_index() == 0); - eigen_assert(output_block->block_sizes().TotalSize() == 1); - Op reducer(m_reducer); - output_block->data()[0] = internal::InnerMostDimReducer::reduce( - *this, 0, m_numValuesToReduce, reducer); - return; - } - - // Calculate input tensor 'slice' required to reduce output block coeffs. - DSizes input_slice_sizes(m_impl.dimensions()); - for (int i = 0; i < NumOutputDims; ++i) { - // Clip preserved input dimensions by output block size. - input_slice_sizes[m_output_to_input_dim_map[i]] = - output_block->block_sizes()[i]; - } - - // Shard input tensor slice into blocks (because it could be large if we - // need to reduce along several dimensions to calculate required output - // coefficients). - const Index max_coeff_count = - numext::mini(((m_device.firstLevelCacheSize()) / sizeof(Scalar)), - input_slice_sizes.TotalSize()); - - // Calculate max output shard size needed to keep working set of reducers - // in L1, while leaving enough space for reducer overhead and 'PacketSize' - // reductions. - DSizes target_input_block_sizes; - CalculateTargetInputBlockShape(max_coeff_count, input_slice_sizes, - &target_input_block_sizes); - // Calculate indices for first preserved dimension. - const Index first_preserved_dim_output_index = - static_cast(Layout) == static_cast(ColMajor) - ? 0 - : NumOutputDims - 1; - const Index first_preserved_dim_input_index = - m_output_to_input_dim_map[first_preserved_dim_output_index]; - const bool inner_most_dim_preserved = - PreservingInnerMostDims || - (first_preserved_dim_input_index == - (static_cast(Layout) == static_cast(ColMajor) - ? 0 - : NumInputDims - 1)); - - // Calculate output block inner/outer dimension sizes. - const Index output_block_inner_dim_size = - output_block->block_sizes()[first_preserved_dim_output_index]; - const Index output_block_outer_dim_size = - output_block->block_sizes().TotalSize() / output_block_inner_dim_size; - // Calculate shard size for first preserved dimension. - const Index output_shard_size = - target_input_block_sizes[first_preserved_dim_input_index]; - const Index num_output_shards = - (output_block_inner_dim_size + output_shard_size - 1) / - output_shard_size; - - // Initialize 'tensor_slice_offsets' from input coords of output index. - DSizes tensor_slice_offsets; - GetInputCoordsForOutputIndex(output_block->first_coeff_index(), - &tensor_slice_offsets); - - // Store tensor slice offset in first preserved dimension to be used - // to update tensor slice extents in loop below. - const Index first_preserved_dim_offset_start = - tensor_slice_offsets[first_preserved_dim_input_index]; - - array block_iter_state; - - // Initialize state used to iterate through output coefficients - // and update 'tensor_slice_offsets' in outer preserved dims. - for (int i = 0; i < NumOutputDims - 1; ++i) { - const int dim = static_cast(Layout) == static_cast(ColMajor) - ? i + 1 - : NumOutputDims - i - 2; - block_iter_state[i].input_dim = m_output_to_input_dim_map[dim]; - block_iter_state[i].output_size = output_block->block_sizes()[dim]; - block_iter_state[i].output_count = 0; - } - - // Allocate input block memory. - ScalarNoConst* input_block_data = static_cast( - m_device.allocate(max_coeff_count * sizeof(Scalar))); - // Allocate reducer memory. - const bool packet_reductions_enabled = - (Self::InputPacketAccess & Self::ReducerTraits::PacketAccess); - const Index num_reducers = - (inner_most_dim_preserved && packet_reductions_enabled) - ? (output_shard_size / PacketSize + output_shard_size % PacketSize + - PacketSize) - : output_shard_size; - typedef internal::BlockReducer BlockReducer; - BlockReducer* reducers = static_cast( - m_device.allocate(num_reducers * sizeof(BlockReducer))); - - InputDimensions input_tensor_dims(m_impl.dimensions()); - for (Index output_outer_index = 0; - output_outer_index < output_block_outer_dim_size; - ++output_outer_index) { - for (Index output_shard_index = 0; output_shard_index < num_output_shards; - ++output_shard_index) { - // Initialize 'tensor_slice_extents' for this output shard. - DSizes tensor_slice_extents(input_slice_sizes); - for (int i = 0; i < NumInputDims; ++i) { - if (i == first_preserved_dim_input_index) { - // Clip first preserved dim size to output shard size. - tensor_slice_extents[i] = numext::mini( - output_shard_size, - input_slice_sizes[i] - (tensor_slice_offsets[i] - - first_preserved_dim_offset_start)); - - } else if (!m_reduced[i]) { - // Clip outer preserved dims to size 1, so that we reduce a - // contiguous set of output coefficients. - tensor_slice_extents[i] = 1; - } - } - - // Initialize output coefficient reducers. - for (int i = 0; i < num_reducers; ++i) { - new (&reducers[i]) BlockReducer(m_reducer); - } - - typedef internal::TensorSliceBlockMapper - TensorSliceBlockMapper; - - // TODO(andydavis) Consider removing 'input_block_stride_order' if we - // find that scattered reads are not worth supporting in - // TensorSliceBlockMapper. - TensorSliceBlockMapper block_mapper( - typename TensorSliceBlockMapper::Dimensions(input_tensor_dims), - tensor_slice_offsets, tensor_slice_extents, - target_input_block_sizes, DimensionList()); - - const Index num_outputs_to_update = - tensor_slice_extents[first_preserved_dim_input_index]; - const Index preserved_dim_vector_reducer_count = - (inner_most_dim_preserved && packet_reductions_enabled) - ? num_outputs_to_update / PacketSize - : 0; - const Index preserved_dim_vector_coeff_count = - inner_most_dim_preserved - ? preserved_dim_vector_reducer_count * PacketSize - : 0; - const Index preserved_dim_reducer_limit = - (inner_most_dim_preserved && packet_reductions_enabled) - ? (preserved_dim_vector_reducer_count + - num_outputs_to_update % PacketSize) - : num_outputs_to_update; - - const Index total_block_count = block_mapper.total_block_count(); - for (Index b = 0; b < total_block_count; ++b) { - InputTensorBlock input_block = - block_mapper.GetBlockForIndex(b, input_block_data); - // Read. - m_impl.block(&input_block); - - Index num_values_to_reduce = 1; - for (Index i = 0; i < NumInputDims; ++i) { - if (m_reduced[i]) { - num_values_to_reduce *= input_block.block_sizes()[i]; - } - } - // Reduce. - if (inner_most_dim_preserved) { - const Index input_outer_dim_size = - input_block.block_sizes().TotalSize() / num_outputs_to_update; - for (Index input_outer_dim_index = 0; - input_outer_dim_index < input_outer_dim_size; - ++input_outer_dim_index) { - const Index input_outer_dim_base = - input_outer_dim_index * num_outputs_to_update; - for (Index i = 0; i < preserved_dim_vector_reducer_count; ++i) { - reducers[i].Reduce(input_outer_dim_base + i * PacketSize, - PacketSize, input_block.data()); - } - const Index scalar_reducer_base = - input_outer_dim_base + preserved_dim_vector_coeff_count; - for (Index i = preserved_dim_vector_reducer_count; - i < preserved_dim_reducer_limit; ++i) { - reducers[i].Reduce(scalar_reducer_base + i - - preserved_dim_vector_reducer_count, - 1, input_block.data()); - } - } - } else { - for (Index i = 0; i < num_outputs_to_update; ++i) { - reducers[i].Reduce(i * num_values_to_reduce, num_values_to_reduce, - input_block.data()); - } - } - } - - // Finalize all reducers for this output shard. - const Index output_base_index = - output_outer_index * output_block_inner_dim_size + - output_shard_index * output_shard_size; - if (inner_most_dim_preserved) { - EIGEN_ALIGN_MAX - typename internal::remove_const::type - values[PacketSize]; - for (Index i = 0; i < preserved_dim_vector_reducer_count; ++i) { - const Index reducer_base = output_base_index + i * PacketSize; - internal::pstore( - values, reducers[i].FinalizePacket()); - for (Index j = 0; j < PacketSize; ++j) { - output_block->data()[reducer_base + j] = values[j]; - } - } - const Index scalar_reducer_base = - output_base_index + preserved_dim_vector_coeff_count; - - for (Index i = preserved_dim_vector_reducer_count; - i < preserved_dim_reducer_limit; ++i) { - output_block->data()[scalar_reducer_base + i - - preserved_dim_vector_reducer_count] = - reducers[i].Finalize(); - } - } else { - for (int i = 0; i < num_outputs_to_update; ++i) { - output_block->data()[output_base_index + i] = - reducers[i].Finalize(); - } - } - - // Update 'tensor_slice_offsets' by num outputs for this output shard. - tensor_slice_offsets[first_preserved_dim_input_index] += - num_outputs_to_update; - } - // Update slice offset for inner preserved dim. - tensor_slice_offsets[first_preserved_dim_input_index] -= - output_block_inner_dim_size; - // Update slice offsets for remaining output dims. - for (int i = 0; i < NumOutputDims - 1; ++i) { - BlockIteratorState& b = block_iter_state[i]; - if (++b.output_count < b.output_size) { - ++tensor_slice_offsets[b.input_dim]; - break; - } - b.output_count = 0; - tensor_slice_offsets[b.input_dim] -= b.output_size - 1; - } - } - - // Free memory. - m_device.deallocate(input_block_data); - m_device.deallocate(reducers); - } - EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_result; } EIGEN_DEVICE_FUNC const TensorEvaluator& impl() const { return m_impl; } EIGEN_DEVICE_FUNC const Device& device() const { return m_device; } -- cgit v1.2.3