aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-11-12 10:12:28 -0800
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-11-12 10:12:28 -0800
commit13c3327f5cf829fd9d04a2ab46861e722cd74ca0 (patch)
tree20bd1a5f361023db822298696efbcff7378ab4a7 /unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
parent71aa53dd6dfdc497324d9e87f59c4ba820191856 (diff)
Remove legacy block evaluation support
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h907
1 files changed, 0 insertions, 907 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index a8e7a8d7b..447da9121 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -142,782 +142,6 @@ class TensorBlock {
Scalar* m_data; // Not owned.
};
-template <typename Scalar, typename StorageIndex>
-struct TensorBlockCopyOp {
-
- typedef typename packet_traits<Scalar>::type Packet;
- enum {
- Vectorizable = packet_traits<Scalar>::Vectorizable,
- PacketSize = packet_traits<Scalar>::size
- };
-
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
- const StorageIndex num_coeff_to_copy, const StorageIndex dst_index,
- const StorageIndex dst_stride, Scalar* EIGEN_RESTRICT dst_data,
- const StorageIndex src_index, const StorageIndex src_stride,
- const Scalar* EIGEN_RESTRICT src_data) {
- const Scalar* src = &src_data[src_index];
- Scalar* dst = &dst_data[dst_index];
-
- if (!Vectorizable) {
- for (Index i = 0; i < num_coeff_to_copy; ++i) {
- dst[i * dst_stride] = src[i * src_stride];
- }
- return;
- }
-
- if (src_stride == 1) {
- const StorageIndex vectorized_size = (num_coeff_to_copy / PacketSize) * PacketSize;
- if (dst_stride == 1) {
- // LINEAR
- for (StorageIndex i = 0; i < vectorized_size; i += PacketSize) {
- Packet p = ploadu<Packet>(src + i);
- pstoreu<Scalar, Packet>(dst + i, p);
- }
- for (StorageIndex i = vectorized_size; i < num_coeff_to_copy; ++i) {
- dst[i] = src[i];
- }
- } else {
- // SCATTER
- for (StorageIndex i = 0; i < vectorized_size; i += PacketSize) {
- Packet p = ploadu<Packet>(src + i);
- pscatter<Scalar, Packet>(dst + i * dst_stride, p, dst_stride);
- }
- for (StorageIndex i = vectorized_size; i < num_coeff_to_copy; ++i) {
- dst[i * dst_stride] = src[i];
- }
- }
- } else if (src_stride == 0) {
- const StorageIndex vectorized_size = (num_coeff_to_copy / PacketSize) * PacketSize;
- if (dst_stride == 1) {
- // LINEAR
- for (StorageIndex i = 0; i < vectorized_size; i += PacketSize) {
- Packet p = pload1<Packet>(src);
- pstoreu<Scalar, Packet>(dst + i, p);
- }
- for (StorageIndex i = vectorized_size; i < num_coeff_to_copy; ++i) {
- dst[i] = *src;
- }
- } else {
- // SCATTER
- for (StorageIndex i = 0; i < vectorized_size; i += PacketSize) {
- Packet p = pload1<Packet>(src);
- pscatter<Scalar, Packet>(dst + i * dst_stride, p, dst_stride);
- }
- for (StorageIndex i = vectorized_size; i < num_coeff_to_copy; ++i) {
- dst[i * dst_stride] = *src;
- }
- }
- } else {
- if (dst_stride == 1) {
- // GATHER
- const StorageIndex vectorized_size = (num_coeff_to_copy / PacketSize) * PacketSize;
- for (StorageIndex i = 0; i < vectorized_size; i += PacketSize) {
- Packet p = pgather<Scalar, Packet>(src + i * src_stride, src_stride);
- pstoreu<Scalar, Packet>(dst + i, p);
- }
- for (StorageIndex i = vectorized_size; i < num_coeff_to_copy; ++i) {
- dst[i] = src[i * src_stride];
- }
- } else {
- // RANDOM
- for (StorageIndex i = 0; i < num_coeff_to_copy; ++i) {
- dst[i * dst_stride] = src[i * src_stride];
- }
- }
- }
- }
-};
-
-/**
- * \class TensorBlockIO
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block IO class.
- *
- * This class is responsible for copying data between a tensor and a tensor
- * block.
- */
-template <typename Scalar, typename StorageIndex, int NumDims, int Layout,
- bool BlockRead>
-class TensorBlockIO {
- public:
- typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
- typedef TensorBlockCopyOp<Scalar, StorageIndex> BlockCopyOp;
-
- protected:
- typedef array<StorageIndex, NumDims> Dimensions;
-
- struct BlockIteratorState {
- StorageIndex input_stride;
- StorageIndex output_stride;
- StorageIndex input_span;
- StorageIndex output_span;
- StorageIndex size;
- StorageIndex count;
- BlockIteratorState()
- : input_stride(0),
- output_stride(0),
- input_span(0),
- output_span(0),
- size(0),
- count(0) {}
- };
-
- // Compute how many inner dimensions it's allowed to squeeze when doing IO
- // between a tensor and a block. It's safe to squeeze inner dimensions, only
- // if they are not reordered.
- static int NumSqueezableInnerDims(const Dimensions& tensor_to_block_dim_map) {
- int num_squeezable_dims = 0;
- if (Layout == ColMajor) {
- for (int i = 0; i < NumDims; ++i) {
- if (tensor_to_block_dim_map[i] == i) num_squeezable_dims++;
- else break;
- }
- } else {
- for (int i = NumDims - 1; i >= 0; --i) {
- if (tensor_to_block_dim_map[i] == i) num_squeezable_dims++;
- else break;
- }
- }
- return num_squeezable_dims;
- }
-
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy(
- const Block& block, StorageIndex first_coeff_index,
- const Dimensions& tensor_to_block_dim_map,
- const Dimensions& tensor_strides,
- const Scalar* src_data,
- Scalar* dst_data) {
- // Do not squeeze reordered inner dimensions.
- int num_squeezable_dims = NumSqueezableInnerDims(tensor_to_block_dim_map);
-
- // Find the innermost tensor dimension whose size is not 1. This is the
- // effective inner dim. If all dimensions are of size 1, then fallback to
- // using the actual innermost dim to avoid out-of-bound access.
- StorageIndex num_size_one_inner_dims = 0;
- for (int i = 0; i < num_squeezable_dims; ++i) {
- const int dim = cond<Layout>()(i, NumDims - i - 1);
- if (block.block_sizes()[tensor_to_block_dim_map[dim]] != 1) {
- num_size_one_inner_dims = i;
- break;
- }
- }
-
- // Calculate strides and dimensions.
- const StorageIndex tensor_stride1_dim = cond<Layout>()(
- num_size_one_inner_dims, NumDims - num_size_one_inner_dims - 1);
- const StorageIndex block_dim_for_tensor_stride1_dim =
- NumDims == 0 ? 1 : tensor_to_block_dim_map[tensor_stride1_dim];
- StorageIndex block_inner_dim_size =
- NumDims == 0 ? 1
- : block.block_sizes()[block_dim_for_tensor_stride1_dim];
-
- // Squeeze multiple inner dims into one for larger inner dim size.
- for (Index i = num_size_one_inner_dims + 1; i < num_squeezable_dims; ++i) {
- const Index dim = cond<Layout>()(i, NumDims - i - 1);
- const StorageIndex block_stride =
- block.block_strides()[tensor_to_block_dim_map[dim]];
- if (block_inner_dim_size == block_stride &&
- block_stride == tensor_strides[dim]) {
- block_inner_dim_size *=
- block.block_sizes()[tensor_to_block_dim_map[dim]];
- ++num_size_one_inner_dims;
- } else {
- break;
- }
- }
-
- StorageIndex inputIndex;
- StorageIndex outputIndex;
- StorageIndex input_stride;
- StorageIndex output_stride;
-
- // Setup strides to read/write along the tensor's stride1 dimension.
- if (BlockRead) {
- inputIndex = first_coeff_index;
- outputIndex = 0;
- input_stride = NumDims == 0 ? 1 : tensor_strides[tensor_stride1_dim];
- output_stride =
- NumDims == 0
- ? 1
- : block.block_strides()[block_dim_for_tensor_stride1_dim];
- } else {
- inputIndex = 0;
- outputIndex = first_coeff_index;
- input_stride =
- NumDims == 0
- ? 1
- : block.block_strides()[block_dim_for_tensor_stride1_dim];
- output_stride = NumDims == 0 ? 1 : tensor_strides[tensor_stride1_dim];
- }
-
- const int at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1;
- array<BlockIteratorState, at_least_1_dim> block_iter_state;
-
- // Initialize block iterator state. Squeeze away any dimension of size 1.
- Index num_squeezed_dims = 0;
- for (Index i = num_size_one_inner_dims; i < NumDims - 1; ++i) {
- const Index dim = cond<Layout>()(i + 1, NumDims - i - 2);
- const StorageIndex size = block.block_sizes()[tensor_to_block_dim_map[dim]];
- if (size == 1) {
- continue;
- }
- block_iter_state[num_squeezed_dims].size = size;
- if (BlockRead) {
- block_iter_state[num_squeezed_dims].input_stride = tensor_strides[dim];
- block_iter_state[num_squeezed_dims].output_stride =
- block.block_strides()[tensor_to_block_dim_map[dim]];
- } else {
- block_iter_state[num_squeezed_dims].input_stride =
- block.block_strides()[tensor_to_block_dim_map[dim]];
- block_iter_state[num_squeezed_dims].output_stride = tensor_strides[dim];
- }
- block_iter_state[num_squeezed_dims].input_span =
- block_iter_state[num_squeezed_dims].input_stride *
- (block_iter_state[num_squeezed_dims].size - 1);
- block_iter_state[num_squeezed_dims].output_span =
- block_iter_state[num_squeezed_dims].output_stride *
- (block_iter_state[num_squeezed_dims].size - 1);
- ++num_squeezed_dims;
- }
-
- // Iterate copying data from src to dst.
- const StorageIndex block_total_size =
- NumDims == 0 ? 1 : block.block_sizes().TotalSize();
- for (StorageIndex i = 0; i < block_total_size; i += block_inner_dim_size) {
- BlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride,
- dst_data, inputIndex, input_stride, src_data);
- // Update index.
- for (int j = 0; j < num_squeezed_dims; ++j) {
- if (++block_iter_state[j].count < block_iter_state[j].size) {
- inputIndex += block_iter_state[j].input_stride;
- outputIndex += block_iter_state[j].output_stride;
- break;
- }
- block_iter_state[j].count = 0;
- inputIndex -= block_iter_state[j].input_span;
- outputIndex -= block_iter_state[j].output_span;
- }
- }
- }
-};
-
-/**
- * \class TensorBlockReader
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block reader class.
- *
- * This class is responsible for reading a tensor block.
- *
- */
-template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
-class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
- Layout, /*BlockRead=*/true> {
- public:
- typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
- typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> Base;
-
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
- Block* block, const Scalar* src_data) {
- array<StorageIndex, NumDims> tensor_to_block_dim_map;
- for (int i = 0; i < NumDims; ++i) {
- tensor_to_block_dim_map[i] = i;
- }
- Base::Copy(*block, block->first_coeff_index(), tensor_to_block_dim_map,
- block->tensor_strides(), src_data, block->data());
- }
-
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
- Block* block, StorageIndex first_coeff_index,
- const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
- const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data) {
- Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map,
- tensor_strides, src_data, block->data());
- }
-};
-
-/**
- * \class TensorBlockWriter
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block writer class.
- *
- * This class is responsible for writing a tensor block.
- *
- */
-template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
-class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
- Layout, /*BlockRead=*/false> {
- public:
- typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
- typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> Base;
-
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
- const Block& block, Scalar* dst_data) {
- array<StorageIndex, NumDims> tensor_to_block_dim_map;
- for (int i = 0; i < NumDims; ++i) {
- tensor_to_block_dim_map[i] = i;
- }
- Base::Copy(block, block.first_coeff_index(), tensor_to_block_dim_map,
- block.tensor_strides(), block.data(), dst_data);
- }
-
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
- const Block& block, StorageIndex first_coeff_index,
- const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
- const array<StorageIndex, NumDims>& tensor_strides, Scalar* dst_data) {
- Base::Copy(block, first_coeff_index, tensor_to_block_dim_map,
- tensor_strides, block.data(), dst_data);
- }
-};
-
-/**
- * \class TensorBlockCwiseUnaryOp
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Carries out a cwise binary op on a number of coefficients.
- *
- * This class reads strided input from the argument, and writes the
- * result of the cwise unary op to the strided output array.
- *
- */
-template <bool Vectorizable>
-struct TensorBlockCwiseUnaryOp {
- template <typename StorageIndex, typename UnaryFunctor,
- typename OutputScalar, typename InputScalar>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
- const UnaryFunctor& functor, const StorageIndex num_coeff,
- const StorageIndex output_index, const StorageIndex output_stride,
- OutputScalar* output_data, const StorageIndex input_index,
- const StorageIndex input_stride, const InputScalar* input_data) {
- typedef const Array<InputScalar, Dynamic, 1> Input;
- typedef Array<OutputScalar, Dynamic, 1> Output;
-
- typedef Map<Input, 0, InnerStride<> > InputMap;
- typedef Map<Output, 0, InnerStride<> > OutputMap;
-
- const InputScalar* input_base = &input_data[input_index];
- OutputScalar* output_base = &output_data[output_index];
-
- const InputMap input(input_base, num_coeff, InnerStride<>(input_stride));
- OutputMap output(output_base, num_coeff, InnerStride<>(output_stride));
-
- output = CwiseUnaryOp<UnaryFunctor, InputMap>(input, functor);
- }
-};
-
-template<>
-struct TensorBlockCwiseUnaryOp<true> {
- template <typename StorageIndex, typename UnaryFunctor,
- typename OutputScalar, typename InputScalar>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
- const UnaryFunctor& functor, const StorageIndex num_coeff,
- const StorageIndex output_index, const StorageIndex output_stride,
- OutputScalar* output_data, const StorageIndex input_index,
- const StorageIndex input_stride, const InputScalar* input_data) {
- if (input_stride == 1 && output_stride == 1) {
- typedef const Array<InputScalar, Dynamic, 1> Input;
- typedef Array<OutputScalar, Dynamic, 1> Output;
-
- const Map<Input> input(&input_data[input_index], num_coeff);
- Map<Output> output(&output_data[output_index], num_coeff);
-
- output = CwiseUnaryOp<UnaryFunctor, Map<Input> >(input, functor);
- } else {
- TensorBlockCwiseUnaryOp<false>::Run(
- functor, num_coeff, output_index, output_stride, output_data,
- input_index, input_stride, input_data);
- }
- }
-};
-
-/**
- * \class TensorBlockCwiseUnaryIO
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block IO class for carrying out cwise unary ops.
- *
- * This class carries out the unary op on given blocks.
- */
-template <typename UnaryFunctor, typename StorageIndex, typename OutputScalar,
- int NumDims, int Layout>
-struct TensorBlockCwiseUnaryIO {
- typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims,
- Layout>::Dimensions Dimensions;
-
- typedef TensorBlockCwiseUnaryOp<
- packet_traits<OutputScalar>::Vectorizable &&
- functor_traits<UnaryFunctor>::PacketAccess>
- TensorBlockCwiseUnaryOpImpl;
-
- struct BlockIteratorState {
- StorageIndex output_stride, output_span;
- StorageIndex input_stride, input_span;
- StorageIndex size, count;
- };
-
- template <typename InputScalar>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
- const UnaryFunctor& functor, const Dimensions& block_sizes,
- const Dimensions& block_strides, OutputScalar* output_data,
- const array<StorageIndex, NumDims>& input_strides,
- const InputScalar* input_data) {
- // Find the innermost dimension whose size is not 1. This is the effective
- // inner dim. If all dimensions are of size 1, fallback to using the actual
- // innermost dim to avoid out-of-bound access.
- int num_size_one_inner_dims = 0;
- for (int i = 0; i < NumDims; ++i) {
- const int dim = cond<Layout>()(i, NumDims - i - 1);
- if (block_sizes[dim] != 1) {
- num_size_one_inner_dims = i;
- break;
- }
- }
- // Calculate strides and dimensions.
- const int inner_dim =
- NumDims == 0 ? 1
- : cond<Layout>()(num_size_one_inner_dims,
- NumDims - num_size_one_inner_dims - 1);
- StorageIndex inner_dim_size = NumDims == 0 ? 1 : block_sizes[inner_dim];
- for (int i = num_size_one_inner_dims + 1; i < NumDims; ++i) {
- const int dim = cond<Layout>()(i, NumDims - i - 1);
- // Merge multiple inner dims into one for larger inner dim size (i.e.
- // fewer calls to TensorBlockCwiseUnaryOp::Run()).
- if (inner_dim_size == block_strides[dim] &&
- block_strides[dim] == input_strides[dim]) {
- inner_dim_size *= block_sizes[dim];
- ++num_size_one_inner_dims;
- } else {
- break;
- }
- }
-
- StorageIndex output_index = 0, input_index = 0;
-
- const StorageIndex output_stride =
- NumDims == 0 ? 1 : block_strides[inner_dim];
- const StorageIndex input_stride =
- NumDims == 0 ? 1 : input_strides[inner_dim];
-
- const int at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1;
- array<BlockIteratorState, at_least_1_dim> block_iter_state;
-
- // Initialize block iterator state. Squeeze away any dimension of size 1.
- int num_squeezed_dims = 0;
- for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) {
- const int dim = cond<Layout>()(i + 1, NumDims - i - 2);
- const StorageIndex size = block_sizes[dim];
- if (size == 1) {
- continue;
- }
- BlockIteratorState& state = block_iter_state[num_squeezed_dims];
- state.output_stride = block_strides[dim];
- state.input_stride = input_strides[dim];
- state.size = size;
- state.output_span = state.output_stride * (size - 1);
- state.input_span = state.input_stride * (size - 1);
- state.count = 0;
- ++num_squeezed_dims;
- }
-
- // Compute cwise unary op.
- const StorageIndex block_total_size =
- NumDims == 0 ? 1 : block_sizes.TotalSize();
- for (StorageIndex i = 0; i < block_total_size; i += inner_dim_size) {
- TensorBlockCwiseUnaryOpImpl::Run(functor, inner_dim_size, output_index,
- output_stride, output_data, input_index,
- input_stride, input_data);
- // Update index.
- for (int j = 0; j < num_squeezed_dims; ++j) {
- BlockIteratorState& state = block_iter_state[j];
- if (++state.count < state.size) {
- output_index += state.output_stride;
- input_index += state.input_stride;
- break;
- }
- state.count = 0;
- output_index -= state.output_span;
- input_index -= state.input_span;
- }
- }
- }
-};
-
-/**
- * \class TensorBlockCwiseBinaryOp
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Carries out a cwise binary op on a number of coefficients.
- *
- * This class reads strided inputs from left and right operands, and writes the
- * result of the cwise binary op to the strided output array.
- *
- */
-template<bool Vectorizable>
-struct TensorBlockCwiseBinaryOp {
- template <typename StorageIndex, typename BinaryFunctor, typename OutputScalar,
- typename LeftScalar, typename RightScalar>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
- const BinaryFunctor& functor, const StorageIndex num_coeff,
- const StorageIndex output_index, const StorageIndex output_stride,
- OutputScalar* output_data, const StorageIndex left_index,
- const StorageIndex left_stride, const LeftScalar* left_data,
- const StorageIndex right_index, const StorageIndex right_stride,
- const RightScalar* right_data) {
- typedef const Array<LeftScalar, Dynamic, 1> Lhs;
- typedef const Array<RightScalar, Dynamic, 1> Rhs;
- typedef Array<OutputScalar, Dynamic, 1> Out;
-
- typedef Map<Lhs, 0, InnerStride<> > LhsMap;
- typedef Map<Rhs, 0, InnerStride<> > RhsMap;
- typedef Map<Out, 0, InnerStride<> > OutMap;
-
- const LeftScalar* lhs_base = &left_data[left_index];
- const RightScalar* rhs_base = &right_data[right_index];
- OutputScalar* out_base = &output_data[output_index];
-
- const LhsMap lhs(lhs_base, num_coeff, InnerStride<>(left_stride));
- const RhsMap rhs(rhs_base, num_coeff, InnerStride<>(right_stride));
- OutMap out(out_base, num_coeff, InnerStride<>(output_stride));
-
- out = CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor);
- }
-};
-
-template<>
-struct TensorBlockCwiseBinaryOp<true> {
- template <typename StorageIndex, typename BinaryFunctor, typename OutputScalar,
- typename LeftScalar, typename RightScalar>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
- const BinaryFunctor& functor, const StorageIndex num_coeff,
- const StorageIndex output_index, const StorageIndex output_stride,
- OutputScalar* output_data, const StorageIndex left_index,
- const StorageIndex left_stride, const LeftScalar* left_data,
- const StorageIndex right_index, const StorageIndex right_stride,
- const RightScalar* right_data) {
- if (left_stride == 1 && right_stride == 1 && output_stride == 1) {
- typedef const Array<LeftScalar, Dynamic, 1> Lhs;
- typedef const Array<RightScalar, Dynamic, 1> Rhs;
- typedef Array<OutputScalar, Dynamic, 1> Out;
-
- const LeftScalar* lhs_base = &left_data[left_index];
- const RightScalar* rhs_base = &right_data[right_index];
- OutputScalar* out_base = &output_data[output_index];
-
- const Map<Lhs> lhs(lhs_base, num_coeff);
- const Map<Rhs> rhs(rhs_base, num_coeff);
- Map<Out> out(out_base, num_coeff);
-
- out = CwiseBinaryOp<BinaryFunctor, Map<Lhs>, Map<Rhs> >(lhs, rhs, functor);
- } else {
- TensorBlockCwiseBinaryOp<false>::Run(
- functor, num_coeff, output_index, output_stride, output_data,
- left_index, left_stride, left_data, right_index, right_stride,
- right_data);
- }
- }
-};
-
-/**
- * \class TensorBlockCwiseBinaryIO
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block IO class for carrying out cwise binary ops.
- *
- * This class carries out the binary op on given blocks.
- *
- */
-template <typename BinaryFunctor, typename StorageIndex, typename OutputScalar,
- int NumDims, int Layout>
-struct TensorBlockCwiseBinaryIO {
- typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims, Layout>::Dimensions Dimensions;
-
- typedef TensorBlockCwiseBinaryOp<
- packet_traits<OutputScalar>::Vectorizable &&
- functor_traits<BinaryFunctor>::PacketAccess>
- TensorBlockCwiseBinaryOpImpl;
-
- struct BlockIteratorState {
- StorageIndex output_stride, output_span;
- StorageIndex left_stride, left_span;
- StorageIndex right_stride, right_span;
- StorageIndex size, count;
- };
-
- template <typename LeftScalar, typename RightScalar>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
- const BinaryFunctor& functor, const Dimensions& block_sizes,
- const Dimensions& block_strides, OutputScalar* output_data,
- const array<StorageIndex, NumDims>& left_strides,
- const LeftScalar* left_data,
- const array<StorageIndex, NumDims>& right_strides,
- const RightScalar* right_data) {
- // Find the innermost dimension whose size is not 1. This is the effective
- // inner dim. If all dimensions are of size 1, fallback to using the actual
- // innermost dim to avoid out-of-bound access.
- int num_size_one_inner_dims = 0;
- for (int i = 0; i < NumDims; ++i) {
- const int dim = cond<Layout>()(i, NumDims - i - 1);
- if (block_sizes[dim] != 1) {
- num_size_one_inner_dims = i;
- break;
- }
- }
- // Calculate strides and dimensions.
- const int inner_dim =
- NumDims == 0 ? 1
- : cond<Layout>()(num_size_one_inner_dims,
- NumDims - num_size_one_inner_dims - 1);
- StorageIndex inner_dim_size = NumDims == 0 ? 1 : block_sizes[inner_dim];
- for (int i = num_size_one_inner_dims + 1; i < NumDims; ++i) {
- const int dim = cond<Layout>()(i, NumDims - i - 1);
- // Merge multiple inner dims into one for larger inner dim size (i.e.
- // fewer calls to TensorBlockCwiseBinaryOp::Run()).
- if (inner_dim_size == block_strides[dim] &&
- block_strides[dim] == left_strides[dim] &&
- block_strides[dim] == right_strides[dim]) {
- inner_dim_size *= block_sizes[dim];
- ++num_size_one_inner_dims;
- } else {
- break;
- }
- }
-
- StorageIndex output_index = 0, left_index = 0, right_index = 0;
- const StorageIndex output_stride =
- NumDims == 0 ? 1 : block_strides[inner_dim];
- const StorageIndex left_stride = NumDims == 0 ? 1 : left_strides[inner_dim];
- const StorageIndex right_stride =
- NumDims == 0 ? 1 : right_strides[inner_dim];
-
- const int at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1;
- array<BlockIteratorState, at_least_1_dim> block_iter_state;
-
- // Initialize block iterator state. Squeeze away any dimension of size 1.
- int num_squeezed_dims = 0;
- for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) {
- const int dim = cond<Layout>()(i + 1, NumDims - i - 2);
- const StorageIndex size = block_sizes[dim];
- if (size == 1) {
- continue;
- }
- BlockIteratorState& state = block_iter_state[num_squeezed_dims];
- state.output_stride = block_strides[dim];
- state.left_stride = left_strides[dim];
- state.right_stride = right_strides[dim];
- state.size = size;
- state.output_span = state.output_stride * (size - 1);
- state.left_span = state.left_stride * (size - 1);
- state.right_span = state.right_stride * (size - 1);
- state.count = 0;
- ++num_squeezed_dims;
- }
-
- // Compute cwise binary op.
- const StorageIndex block_total_size =
- NumDims == 0 ? 1 : block_sizes.TotalSize();
- for (StorageIndex i = 0; i < block_total_size; i += inner_dim_size) {
- TensorBlockCwiseBinaryOpImpl::Run(functor, inner_dim_size, output_index,
- output_stride, output_data, left_index,
- left_stride, left_data, right_index,
- right_stride, right_data);
- // Update index.
- for (int j = 0; j < num_squeezed_dims; ++j) {
- BlockIteratorState& state = block_iter_state[j];
- if (++state.count < state.size) {
- output_index += state.output_stride;
- left_index += state.left_stride;
- right_index += state.right_stride;
- break;
- }
- state.count = 0;
- output_index -= state.output_span;
- left_index -= state.left_span;
- right_index -= state.right_span;
- }
- }
- }
-};
-
-/**
- * \class TensorBlockView
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Read-only view into a block of data.
- *
- * This class provides read-only access to a block of data in impl. It may need
- * to allocate space for holding the intermediate result.
- *
- */
-template <class ArgType, class Device>
-struct TensorBlockView {
- typedef TensorEvaluator<ArgType, Device> Impl;
- typedef typename Impl::Index StorageIndex;
- typedef typename remove_const<typename Impl::Scalar>::type Scalar;
- static const int NumDims = array_size<typename Impl::Dimensions>::value;
- typedef DSizes<StorageIndex, NumDims> Dimensions;
-
- // Constructs a TensorBlockView for `impl`. `block` is only used for for
- // specifying the start offset, shape, and strides of the block.
- template <typename OtherTensorBlock>
- TensorBlockView(const Device& device,
- const TensorEvaluator<ArgType, Device>& impl,
- const OtherTensorBlock& block)
- : m_device(device),
- m_block_sizes(block.block_sizes()),
- m_data(NULL),
- m_allocated_data(NULL) {
- if (Impl::RawAccess && impl.data() != NULL) {
- m_data = impl.data() + block.first_coeff_index();
- m_block_strides = block.tensor_strides();
- } else {
- // Actually make a copy.
-
- // TODO(wuke): This sometimes put a lot pressure on the heap allocator.
- // Consider allowing ops to request additional temporary block memory in
- // TensorOpResourceRequirements.
- m_allocated_data = static_cast<Scalar*>(
- m_device.allocate(m_block_sizes.TotalSize() * sizeof(Scalar)));
- m_data = m_allocated_data;
- if (NumDims > 0) {
- if (static_cast<int>(Impl::Layout) == static_cast<int>(ColMajor)) {
- m_block_strides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_block_strides[i] = m_block_strides[i - 1] * m_block_sizes[i - 1];
- }
- } else {
- m_block_strides[NumDims - 1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_block_strides[i] = m_block_strides[i + 1] * m_block_sizes[i + 1];
- }
- }
- }
- TensorBlock<Scalar, StorageIndex, NumDims, Impl::Layout> input_block(
- block.first_coeff_index(), m_block_sizes, m_block_strides,
- block.tensor_strides(), m_allocated_data);
- impl.block(&input_block);
- }
- }
-
- ~TensorBlockView() {
- if (m_allocated_data != NULL) {
- m_device.deallocate(m_allocated_data);
- }
- }
-
- const Dimensions& block_sizes() const { return m_block_sizes; }
- const Dimensions& block_strides() const { return m_block_strides; }
- const Scalar* data() const { return m_data; }
-
- private:
- const Device EIGEN_DEVICE_REF m_device;
- Dimensions m_block_sizes, m_block_strides;
- const Scalar* m_data; // Not owned.
- Scalar* m_allocated_data; // Owned.
-};
-
/**
* \class TensorBlockMapper
* \ingroup CXX11_Tensor_Module
@@ -1108,137 +332,6 @@ class TensorBlockMapper {
StorageIndex m_total_block_count;
};
-/**
- * \class TensorSliceBlockMapper
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor slice block mapper class.
- *
- * This class is responsible for iterating over the blocks of
- * a slice of a tensor. Supports shuffling of the block strides
- * for callers that want to reduce strides for dimensions to be
- * processed together.
- *
- */
-template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
-class TensorSliceBlockMapper {
- public:
- typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
- typedef DSizes<StorageIndex, NumDims> Dimensions;
-
- TensorSliceBlockMapper(const Dimensions& tensor_dims,
- const Dimensions& tensor_slice_offsets,
- const Dimensions& tensor_slice_extents,
- const Dimensions& block_dim_sizes,
- const Dimensions& block_stride_order)
- : m_tensor_dimensions(tensor_dims),
- m_tensor_slice_offsets(tensor_slice_offsets),
- m_tensor_slice_extents(tensor_slice_extents),
- m_block_dim_sizes(block_dim_sizes),
- m_block_stride_order(block_stride_order),
- m_total_block_count(1) {
- // Calculate block counts by dimension and total block count.
- DSizes<StorageIndex, NumDims> block_count;
- for (Index i = 0; i < block_count.rank(); ++i) {
- block_count[i] = divup(m_tensor_slice_extents[i], m_block_dim_sizes[i]);
- }
- m_total_block_count = array_prod(block_count);
-
- // Calculate block strides (used for enumerating blocks).
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_block_strides[0] = 1;
- m_tensor_strides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_block_strides[i] = m_block_strides[i - 1] * block_count[i - 1];
- m_tensor_strides[i] =
- m_tensor_strides[i - 1] * m_tensor_dimensions[i - 1];
- }
- } else {
- m_block_strides[NumDims - 1] = 1;
- m_tensor_strides[NumDims - 1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_block_strides[i] = m_block_strides[i + 1] * block_count[i + 1];
- m_tensor_strides[i] =
- m_tensor_strides[i + 1] * m_tensor_dimensions[i + 1];
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
- GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
- StorageIndex first_coeff_index = 0;
- DSizes<StorageIndex, NumDims> coords;
- DSizes<StorageIndex, NumDims> sizes;
- DSizes<StorageIndex, NumDims> strides;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- const Index idx = block_index / m_block_strides[i];
- coords[i] = m_tensor_slice_offsets[i] + idx * m_block_dim_sizes[i];
- sizes[i] = numext::mini(
- m_tensor_slice_offsets[i] + m_tensor_slice_extents[i] - coords[i],
- m_block_dim_sizes[i]);
- block_index -= idx * m_block_strides[i];
- first_coeff_index += coords[i] * m_tensor_strides[i];
- }
- coords[0] =
- m_tensor_slice_offsets[0] + block_index * m_block_dim_sizes[0];
- sizes[0] = numext::mini(
- m_tensor_slice_offsets[0] + m_tensor_slice_extents[0] - coords[0],
- m_block_dim_sizes[0]);
- first_coeff_index += coords[0] * m_tensor_strides[0];
-
- StorageIndex prev_dim = m_block_stride_order[0];
- strides[prev_dim] = 1;
- for (int i = 1; i < NumDims; ++i) {
- const StorageIndex curr_dim = m_block_stride_order[i];
- strides[curr_dim] = strides[prev_dim] * sizes[prev_dim];
- prev_dim = curr_dim;
- }
- } else {
- for (int i = 0; i < NumDims - 1; ++i) {
- const StorageIndex idx = block_index / m_block_strides[i];
- coords[i] = m_tensor_slice_offsets[i] + idx * m_block_dim_sizes[i];
- sizes[i] = numext::mini(
- m_tensor_slice_offsets[i] + m_tensor_slice_extents[i] - coords[i],
- m_block_dim_sizes[i]);
- block_index -= idx * m_block_strides[i];
- first_coeff_index += coords[i] * m_tensor_strides[i];
- }
- coords[NumDims - 1] = m_tensor_slice_offsets[NumDims - 1] +
- block_index * m_block_dim_sizes[NumDims - 1];
- sizes[NumDims - 1] = numext::mini(
- m_tensor_slice_offsets[NumDims - 1] +
- m_tensor_slice_extents[NumDims - 1] - coords[NumDims - 1],
- m_block_dim_sizes[NumDims - 1]);
- first_coeff_index += coords[NumDims - 1] * m_tensor_strides[NumDims - 1];
-
- StorageIndex prev_dim = m_block_stride_order[NumDims - 1];
- strides[prev_dim] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- const StorageIndex curr_dim = m_block_stride_order[i];
- strides[curr_dim] = strides[prev_dim] * sizes[prev_dim];
- prev_dim = curr_dim;
- }
- }
-
- return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
- return m_total_block_count;
- }
-
- private:
- Dimensions m_tensor_dimensions;
- Dimensions m_tensor_slice_offsets;
- Dimensions m_tensor_slice_extents;
- Dimensions m_tensor_strides;
- Dimensions m_block_dim_sizes;
- Dimensions m_block_stride_order;
- Dimensions m_block_strides;
- StorageIndex m_total_block_count;
-};
-
} // namespace internal
} // namespace Eigen