aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-11-12 10:12:28 -0800
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-11-12 10:12:28 -0800
commit13c3327f5cf829fd9d04a2ab46861e722cd74ca0 (patch)
tree20bd1a5f361023db822298696efbcff7378ab4a7 /unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
parent71aa53dd6dfdc497324d9e87f59c4ba820191856 (diff)
Remove legacy block evaluation support
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h258
1 files changed, 0 insertions, 258 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index d826cfb7e..84604cf41 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -584,7 +584,6 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
enum {
IsAligned = false,
PacketAccess = Self::InputPacketAccess && ReducerTraits::PacketAccess,
- BlockAccess = false,
BlockAccessV2 = false,
PreferBlockAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
@@ -594,11 +593,6 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
- typedef internal::TensorBlock<ScalarNoConst, Index, NumOutputDims, Layout>
- OutputTensorBlock;
- typedef internal::TensorBlock<ScalarNoConst, Index, NumInputDims, Layout>
- InputTensorBlock;
-
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockNotImplemented TensorBlockV2;
//===--------------------------------------------------------------------===//
@@ -920,258 +914,6 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
m_impl.getResourceRequirements(resources);
}
- EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void block(
- OutputTensorBlock* output_block) const {
- // Special case full reductions to avoid input block copy below.
- if (NumInputDims == NumReducedDims) {
- eigen_assert(output_block->first_coeff_index() == 0);
- eigen_assert(output_block->block_sizes().TotalSize() == 1);
- Op reducer(m_reducer);
- output_block->data()[0] = internal::InnerMostDimReducer<Self, Op>::reduce(
- *this, 0, m_numValuesToReduce, reducer);
- return;
- }
-
- // Calculate input tensor 'slice' required to reduce output block coeffs.
- DSizes<Index, NumInputDims> input_slice_sizes(m_impl.dimensions());
- for (int i = 0; i < NumOutputDims; ++i) {
- // Clip preserved input dimensions by output block size.
- input_slice_sizes[m_output_to_input_dim_map[i]] =
- output_block->block_sizes()[i];
- }
-
- // Shard input tensor slice into blocks (because it could be large if we
- // need to reduce along several dimensions to calculate required output
- // coefficients).
- const Index max_coeff_count =
- numext::mini<Index>(((m_device.firstLevelCacheSize()) / sizeof(Scalar)),
- input_slice_sizes.TotalSize());
-
- // Calculate max output shard size needed to keep working set of reducers
- // in L1, while leaving enough space for reducer overhead and 'PacketSize'
- // reductions.
- DSizes<Index, NumInputDims> target_input_block_sizes;
- CalculateTargetInputBlockShape(max_coeff_count, input_slice_sizes,
- &target_input_block_sizes);
- // Calculate indices for first preserved dimension.
- const Index first_preserved_dim_output_index =
- static_cast<int>(Layout) == static_cast<int>(ColMajor)
- ? 0
- : NumOutputDims - 1;
- const Index first_preserved_dim_input_index =
- m_output_to_input_dim_map[first_preserved_dim_output_index];
- const bool inner_most_dim_preserved =
- PreservingInnerMostDims ||
- (first_preserved_dim_input_index ==
- (static_cast<int>(Layout) == static_cast<int>(ColMajor)
- ? 0
- : NumInputDims - 1));
-
- // Calculate output block inner/outer dimension sizes.
- const Index output_block_inner_dim_size =
- output_block->block_sizes()[first_preserved_dim_output_index];
- const Index output_block_outer_dim_size =
- output_block->block_sizes().TotalSize() / output_block_inner_dim_size;
- // Calculate shard size for first preserved dimension.
- const Index output_shard_size =
- target_input_block_sizes[first_preserved_dim_input_index];
- const Index num_output_shards =
- (output_block_inner_dim_size + output_shard_size - 1) /
- output_shard_size;
-
- // Initialize 'tensor_slice_offsets' from input coords of output index.
- DSizes<Index, NumInputDims> tensor_slice_offsets;
- GetInputCoordsForOutputIndex(output_block->first_coeff_index(),
- &tensor_slice_offsets);
-
- // Store tensor slice offset in first preserved dimension to be used
- // to update tensor slice extents in loop below.
- const Index first_preserved_dim_offset_start =
- tensor_slice_offsets[first_preserved_dim_input_index];
-
- array<BlockIteratorState, NumOutputDims> block_iter_state;
-
- // Initialize state used to iterate through output coefficients
- // and update 'tensor_slice_offsets' in outer preserved dims.
- for (int i = 0; i < NumOutputDims - 1; ++i) {
- const int dim = static_cast<int>(Layout) == static_cast<int>(ColMajor)
- ? i + 1
- : NumOutputDims - i - 2;
- block_iter_state[i].input_dim = m_output_to_input_dim_map[dim];
- block_iter_state[i].output_size = output_block->block_sizes()[dim];
- block_iter_state[i].output_count = 0;
- }
-
- // Allocate input block memory.
- ScalarNoConst* input_block_data = static_cast<ScalarNoConst*>(
- m_device.allocate(max_coeff_count * sizeof(Scalar)));
- // Allocate reducer memory.
- const bool packet_reductions_enabled =
- (Self::InputPacketAccess & Self::ReducerTraits::PacketAccess);
- const Index num_reducers =
- (inner_most_dim_preserved && packet_reductions_enabled)
- ? (output_shard_size / PacketSize + output_shard_size % PacketSize +
- PacketSize)
- : output_shard_size;
- typedef internal::BlockReducer<Self, Op> BlockReducer;
- BlockReducer* reducers = static_cast<BlockReducer*>(
- m_device.allocate(num_reducers * sizeof(BlockReducer)));
-
- InputDimensions input_tensor_dims(m_impl.dimensions());
- for (Index output_outer_index = 0;
- output_outer_index < output_block_outer_dim_size;
- ++output_outer_index) {
- for (Index output_shard_index = 0; output_shard_index < num_output_shards;
- ++output_shard_index) {
- // Initialize 'tensor_slice_extents' for this output shard.
- DSizes<Index, NumInputDims> tensor_slice_extents(input_slice_sizes);
- for (int i = 0; i < NumInputDims; ++i) {
- if (i == first_preserved_dim_input_index) {
- // Clip first preserved dim size to output shard size.
- tensor_slice_extents[i] = numext::mini(
- output_shard_size,
- input_slice_sizes[i] - (tensor_slice_offsets[i] -
- first_preserved_dim_offset_start));
-
- } else if (!m_reduced[i]) {
- // Clip outer preserved dims to size 1, so that we reduce a
- // contiguous set of output coefficients.
- tensor_slice_extents[i] = 1;
- }
- }
-
- // Initialize output coefficient reducers.
- for (int i = 0; i < num_reducers; ++i) {
- new (&reducers[i]) BlockReducer(m_reducer);
- }
-
- typedef internal::TensorSliceBlockMapper<ScalarNoConst, Index,
- NumInputDims, Layout>
- TensorSliceBlockMapper;
-
- // TODO(andydavis) Consider removing 'input_block_stride_order' if we
- // find that scattered reads are not worth supporting in
- // TensorSliceBlockMapper.
- TensorSliceBlockMapper block_mapper(
- typename TensorSliceBlockMapper::Dimensions(input_tensor_dims),
- tensor_slice_offsets, tensor_slice_extents,
- target_input_block_sizes, DimensionList<Index, NumInputDims>());
-
- const Index num_outputs_to_update =
- tensor_slice_extents[first_preserved_dim_input_index];
- const Index preserved_dim_vector_reducer_count =
- (inner_most_dim_preserved && packet_reductions_enabled)
- ? num_outputs_to_update / PacketSize
- : 0;
- const Index preserved_dim_vector_coeff_count =
- inner_most_dim_preserved
- ? preserved_dim_vector_reducer_count * PacketSize
- : 0;
- const Index preserved_dim_reducer_limit =
- (inner_most_dim_preserved && packet_reductions_enabled)
- ? (preserved_dim_vector_reducer_count +
- num_outputs_to_update % PacketSize)
- : num_outputs_to_update;
-
- const Index total_block_count = block_mapper.total_block_count();
- for (Index b = 0; b < total_block_count; ++b) {
- InputTensorBlock input_block =
- block_mapper.GetBlockForIndex(b, input_block_data);
- // Read.
- m_impl.block(&input_block);
-
- Index num_values_to_reduce = 1;
- for (Index i = 0; i < NumInputDims; ++i) {
- if (m_reduced[i]) {
- num_values_to_reduce *= input_block.block_sizes()[i];
- }
- }
- // Reduce.
- if (inner_most_dim_preserved) {
- const Index input_outer_dim_size =
- input_block.block_sizes().TotalSize() / num_outputs_to_update;
- for (Index input_outer_dim_index = 0;
- input_outer_dim_index < input_outer_dim_size;
- ++input_outer_dim_index) {
- const Index input_outer_dim_base =
- input_outer_dim_index * num_outputs_to_update;
- for (Index i = 0; i < preserved_dim_vector_reducer_count; ++i) {
- reducers[i].Reduce(input_outer_dim_base + i * PacketSize,
- PacketSize, input_block.data());
- }
- const Index scalar_reducer_base =
- input_outer_dim_base + preserved_dim_vector_coeff_count;
- for (Index i = preserved_dim_vector_reducer_count;
- i < preserved_dim_reducer_limit; ++i) {
- reducers[i].Reduce(scalar_reducer_base + i -
- preserved_dim_vector_reducer_count,
- 1, input_block.data());
- }
- }
- } else {
- for (Index i = 0; i < num_outputs_to_update; ++i) {
- reducers[i].Reduce(i * num_values_to_reduce, num_values_to_reduce,
- input_block.data());
- }
- }
- }
-
- // Finalize all reducers for this output shard.
- const Index output_base_index =
- output_outer_index * output_block_inner_dim_size +
- output_shard_index * output_shard_size;
- if (inner_most_dim_preserved) {
- EIGEN_ALIGN_MAX
- typename internal::remove_const<CoeffReturnType>::type
- values[PacketSize];
- for (Index i = 0; i < preserved_dim_vector_reducer_count; ++i) {
- const Index reducer_base = output_base_index + i * PacketSize;
- internal::pstore<CoeffReturnType, PacketReturnType>(
- values, reducers[i].FinalizePacket());
- for (Index j = 0; j < PacketSize; ++j) {
- output_block->data()[reducer_base + j] = values[j];
- }
- }
- const Index scalar_reducer_base =
- output_base_index + preserved_dim_vector_coeff_count;
-
- for (Index i = preserved_dim_vector_reducer_count;
- i < preserved_dim_reducer_limit; ++i) {
- output_block->data()[scalar_reducer_base + i -
- preserved_dim_vector_reducer_count] =
- reducers[i].Finalize();
- }
- } else {
- for (int i = 0; i < num_outputs_to_update; ++i) {
- output_block->data()[output_base_index + i] =
- reducers[i].Finalize();
- }
- }
-
- // Update 'tensor_slice_offsets' by num outputs for this output shard.
- tensor_slice_offsets[first_preserved_dim_input_index] +=
- num_outputs_to_update;
- }
- // Update slice offset for inner preserved dim.
- tensor_slice_offsets[first_preserved_dim_input_index] -=
- output_block_inner_dim_size;
- // Update slice offsets for remaining output dims.
- for (int i = 0; i < NumOutputDims - 1; ++i) {
- BlockIteratorState& b = block_iter_state[i];
- if (++b.output_count < b.output_size) {
- ++tensor_slice_offsets[b.input_dim];
- break;
- }
- b.output_count = 0;
- tensor_slice_offsets[b.input_dim] -= b.output_size - 1;
- }
- }
-
- // Free memory.
- m_device.deallocate(input_block_data);
- m_device.deallocate(reducers);
- }
-
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_result; }
EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
EIGEN_DEVICE_FUNC const Device& device() const { return m_device; }