aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-11-12 10:12:28 -0800
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-11-12 10:12:28 -0800
commit13c3327f5cf829fd9d04a2ab46861e722cd74ca0 (patch)
tree20bd1a5f361023db822298696efbcff7378ab4a7 /unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
parent71aa53dd6dfdc497324d9e87f59c4ba820191856 (diff)
Remove legacy block evaluation support
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h178
1 files changed, 1 insertions, 177 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index 781f1d75b..700337539 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -135,11 +135,6 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
enum {
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- // TODO(andydavis, wuke) Enable BlockAccess for the general case when the
- // performance issue with block-based reshape is resolved.
- BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess &&
- TensorEvaluator<ArgType, Device>::RawAccess &&
- NumInputDims > 0 && NumOutputDims > 0,
// For trivial reshapes with raw access to underlying data we will provide
// zero overhead block access.
// TODO(ezhulenev): Consider adding block access without raw access?
@@ -153,14 +148,6 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
- typedef internal::TensorBlock<ScalarNoConst, Index, NumInputDims, Layout>
- InputTensorBlock;
- typedef internal::TensorBlock<ScalarNoConst, Index, NumOutputDims, Layout>
- OutputTensorBlock;
- typedef internal::TensorBlockReader<ScalarNoConst, Index, NumOutputDims,
- Layout>
- OutputTensorBlockReader;
-
//===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
typedef internal::TensorBlockDescriptor<NumOutputDims, Index> TensorBlockDesc;
typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
@@ -177,30 +164,6 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
// The total size of the reshaped tensor must be equal to the total size
// of the input tensor.
eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
-
- if (BlockAccess) {
- const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims =
- m_impl.dimensions();
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_outputStrides[0] = 1;
- for (int i = 1; i < NumOutputDims; ++i) {
- m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
- }
- m_inputStrides[0] = 1;
- for (int i = 1; i < NumInputDims; ++i) {
- m_inputStrides[i] = m_inputStrides[i - 1] * input_dims[i - 1];
- }
- } else {
- m_outputStrides[NumOutputDims - 1] = 1;
- for (int i = NumOutputDims - 2; i >= 0; --i) {
- m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
- }
- m_inputStrides[NumInputDims - 1] = 1;
- for (int i = NumInputDims - 2; i >= 0; --i) {
- m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1];
- }
- }
- }
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
@@ -249,128 +212,6 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
Index size;
Index count;
};
- // TODO(andydavis) Reduce the overhead of this function.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
- OutputTensorBlock* output_block) const {
- if (m_impl.data() != NULL) {
- OutputTensorBlockReader::Run(output_block, m_impl.data());
- return;
- }
-
- // Calculate output block unit-stride inner dimension length.
- const DSizes<Index, NumOutputDims>& output_block_sizes =
- output_block->block_sizes();
- Index output_inner_dim_size = 1;
- Index output_outer_dim_start = NumOutputDims;
- for (Index i = 0; i < NumOutputDims; ++i) {
- const Index dim = static_cast<int>(Layout) == static_cast<int>(ColMajor)
- ? i : NumOutputDims - i - 1;
- output_inner_dim_size *= output_block_sizes[dim];
- if (output_block_sizes[dim] < m_dimensions[dim]) {
- output_outer_dim_start = i + 1;
- break;
- }
- }
-
- // Initialize output block iterator state.
- array<BlockIteratorState, NumOutputDims> block_iter_state;
-
- for (Index i = 0; i < NumOutputDims; ++i) {
- const Index dim = static_cast<int>(Layout) == static_cast<int>(ColMajor)
- ? i : NumOutputDims - i - 1;
- block_iter_state[i].size = output_block_sizes[dim];
- block_iter_state[i].stride = m_outputStrides[dim];
- block_iter_state[i].span =
- block_iter_state[i].stride * (block_iter_state[i].size - 1);
- block_iter_state[i].count = 0;
- }
-
- const Index output_outer_dim_size = output_block_sizes.TotalSize() /
- output_inner_dim_size;
- const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims =
- m_impl.dimensions();
-
- Index index = output_block->first_coeff_index();
- for (Index outer_idx = 0; outer_idx < output_outer_dim_size; ++outer_idx) {
- Index inner_idx = 0;
- while (inner_idx < output_inner_dim_size) {
- // Calculate input coords based on 'index'.
- array<Index, NumInputDims> input_coords;
- Index idx = index;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumInputDims - 1; i > 0; --i) {
- input_coords[i] = idx / m_inputStrides[i];
- idx -= input_coords[i] * m_inputStrides[i];
- }
- input_coords[0] = idx;
- } else {
- for (int i = 0; i < NumInputDims - 1; ++i) {
- input_coords[i] = idx / m_inputStrides[i];
- idx -= input_coords[i] * m_inputStrides[i];
- }
- input_coords[NumInputDims - 1] = idx;
- }
-
- // Calculate target input block shape, using at most
- // 'output_inner_dim_size' coefficients along the input block's inner
- // dimensions.
- DSizes<Index, NumInputDims> input_block_sizes;
- Index num_to_allocate = output_inner_dim_size - inner_idx;
- for (Index i = 0; i < NumInputDims; ++i) {
- const Index dim =
- static_cast<int>(Layout) == static_cast<int>(ColMajor)
- ? i : NumInputDims - i - 1;
- input_block_sizes[dim] = numext::mini(
- num_to_allocate, (static_cast<Index>(input_dims[dim]) -
- input_coords[dim]));
- if (input_coords[dim] == 0) {
- num_to_allocate /= input_block_sizes[dim];
- } else {
- num_to_allocate = 1;
- }
- }
-
- // Calculate input block strides.
- DSizes<Index, NumInputDims> input_block_strides;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- input_block_strides[0] = 1;
- for (int i = 1; i < NumInputDims; ++i) {
- input_block_strides[i] = input_block_strides[i - 1] *
- input_block_sizes[i - 1];
- }
- } else {
- input_block_strides[NumInputDims - 1] = 1;
- for (int i = NumInputDims - 2; i >= 0; --i) {
- input_block_strides[i] = input_block_strides[i + 1] *
- input_block_sizes[i + 1];
- }
- }
-
- // Instantiate and read input block from input tensor.
- InputTensorBlock input_block(index, input_block_sizes,
- input_block_strides, m_inputStrides,
- output_block->data() + outer_idx *
- output_inner_dim_size + inner_idx);
-
- m_impl.block(&input_block);
-
- const Index input_block_total_size = input_block_sizes.TotalSize();
- index += input_block_total_size;
- inner_idx += input_block_total_size;
- }
- eigen_assert(inner_idx == output_inner_dim_size);
- index -= output_inner_dim_size;
- // Update index.
- for (Index i = output_outer_dim_start; i < NumOutputDims; ++i) {
- if (++block_iter_state[i].count < block_iter_state[i].size) {
- index += block_iter_state[i].stride;
- break;
- }
- block_iter_state[i].count = 0;
- index -= block_iter_state[i].span;
- }
- }
- }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
@@ -408,8 +249,6 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
protected:
TensorEvaluator<ArgType, Device> m_impl;
NewDimensions m_dimensions;
- DSizes<Index, NumOutputDims> m_outputStrides;
- DSizes<Index, NumInputDims> m_inputStrides;
};
@@ -426,7 +265,6 @@ template<typename NewDimensions, typename ArgType, typename Device>
enum {
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- BlockAccess = false,
BlockAccessV2 = TensorEvaluator<ArgType, Device>::RawAccess,
PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
@@ -619,7 +457,6 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
// slice offsets and sizes.
IsAligned = false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
BlockAccessV2 = TensorEvaluator<ArgType, Device>::BlockAccessV2,
PreferBlockAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
@@ -714,7 +551,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
}
}
// Use memcpy if it's going to be faster than using the regular evaluation.
- const MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
+ const MemcpyTriggerForSlicing<Index, Device, BlockAccessV2> trigger(m_device);
if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
@@ -808,16 +645,6 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
m_impl.getResourceRequirements(resources);
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
- TensorBlock* output_block) const {
- TensorBlock input_block(srcCoeff(output_block->first_coeff_index()),
- output_block->block_sizes(),
- output_block->block_strides(),
- TensorBlockDimensions(m_inputStrides),
- output_block->data());
- m_impl.block(&input_block);
- }
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
blockV2(TensorBlockDesc& desc, TensorBlockScratch& scratch,
bool /*root_of_expr_ast*/ = false) const {
@@ -922,7 +749,6 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
enum {
IsAligned = false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
BlockAccessV2 = TensorEvaluator<ArgType, Device>::BlockAccessV2,
PreferBlockAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
@@ -1124,7 +950,6 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
// slice offsets and sizes.
IsAligned = false,
PacketAccess = false,
- BlockAccess = false,
BlockAccessV2 = false,
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
@@ -1306,7 +1131,6 @@ struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Stride
enum {
IsAligned = false,
PacketAccess = false,
- BlockAccess = false,
BlockAccessV2 = false,
PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,