From 13c3327f5cf829fd9d04a2ab46861e722cd74ca0 Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Tue, 12 Nov 2019 10:12:28 -0800
Subject: Remove legacy block evaluation support

---
 unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h | 907 -----------------------
 1 file changed, 907 deletions(-)

(limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h')
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index a8e7a8d7b..447da9121 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -142,782 +142,6 @@ class TensorBlock {
   Scalar* m_data;  // Not owned.
 };
 
-template <typename Scalar, typename StorageIndex>
-struct TensorBlockCopyOp {
-
-  typedef typename packet_traits<Scalar>::type Packet;
-  enum {
-    Vectorizable = packet_traits<Scalar>::Vectorizable,
-    PacketSize   = packet_traits<Scalar>::size
-  };
-
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const StorageIndex num_coeff_to_copy, const StorageIndex dst_index,
-      const StorageIndex dst_stride, Scalar* EIGEN_RESTRICT dst_data,
-      const StorageIndex src_index, const StorageIndex src_stride,
-      const Scalar* EIGEN_RESTRICT src_data) {
-    const Scalar* src = &src_data[src_index];
-    Scalar* dst = &dst_data[dst_index];
-
-    if (!Vectorizable) {
-      for (Index i = 0; i < num_coeff_to_copy; ++i) {
-        dst[i * dst_stride] = src[i * src_stride];
-      }
-      return;
-    }
-
-    if (src_stride == 1) {
-      const StorageIndex vectorized_size = (num_coeff_to_copy / PacketSize) * PacketSize;
-      if (dst_stride == 1) {
-        // LINEAR
-        for (StorageIndex i = 0; i < vectorized_size; i += PacketSize) {
-          Packet p = ploadu<Packet>(src + i);
-          pstoreu<Scalar, Packet>(dst + i, p);
-        }
-        for (StorageIndex i = vectorized_size; i < num_coeff_to_copy; ++i) {
-          dst[i] = src[i];
-        }
-      } else {
-        // SCATTER
-        for (StorageIndex i = 0; i < vectorized_size; i += PacketSize) {
-          Packet p = ploadu<Packet>(src + i);
-          pscatter<Scalar, Packet>(dst + i * dst_stride, p, dst_stride);
-        }
-        for (StorageIndex i = vectorized_size; i < num_coeff_to_copy; ++i) {
-          dst[i * dst_stride] = src[i];
-        }
-      }
-    } else if (src_stride == 0) {
-      const StorageIndex vectorized_size = (num_coeff_to_copy / PacketSize) * PacketSize;
-      if (dst_stride == 1) {
-        // LINEAR
-        for (StorageIndex i = 0; i < vectorized_size; i += PacketSize) {
-          Packet p = pload1<Packet>(src);
-          pstoreu<Scalar, Packet>(dst + i, p);
-        }
-        for (StorageIndex i = vectorized_size; i < num_coeff_to_copy; ++i) {
-          dst[i] = *src;
-        }
-      } else {
-        // SCATTER
-        for (StorageIndex i = 0; i < vectorized_size; i += PacketSize) {
-          Packet p = pload1<Packet>(src);
-          pscatter<Scalar, Packet>(dst + i * dst_stride, p, dst_stride);
-        }
-        for (StorageIndex i = vectorized_size; i < num_coeff_to_copy; ++i) {
-          dst[i * dst_stride] = *src;
-        }
-      }
-    } else {
-      if (dst_stride == 1) {
-        // GATHER
-        const StorageIndex vectorized_size = (num_coeff_to_copy / PacketSize) * PacketSize;
-        for (StorageIndex i = 0; i < vectorized_size; i += PacketSize) {
-          Packet p = pgather<Scalar, Packet>(src + i * src_stride, src_stride);
-          pstoreu<Scalar, Packet>(dst + i, p);
-        }
-        for (StorageIndex i = vectorized_size; i < num_coeff_to_copy; ++i) {
-          dst[i] = src[i * src_stride];
-        }
-      } else {
-        // RANDOM
-        for (StorageIndex i = 0; i < num_coeff_to_copy; ++i) {
-          dst[i * dst_stride] = src[i * src_stride];
-        }
-      }
-    }
-  }
-};
-
-/**
- * \class TensorBlockIO
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block IO class.
- *
- * This class is responsible for copying data between a tensor and a tensor
- * block.
- */
-template <typename Scalar, typename StorageIndex, int NumDims, int Layout,
-          bool BlockRead>
-class TensorBlockIO {
- public:
-  typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
-  typedef TensorBlockCopyOp<Scalar, StorageIndex> BlockCopyOp;
-
- protected:
-  typedef array<StorageIndex, NumDims> Dimensions;
-
-  struct BlockIteratorState {
-    StorageIndex input_stride;
-    StorageIndex output_stride;
-    StorageIndex input_span;
-    StorageIndex output_span;
-    StorageIndex size;
-    StorageIndex count;
-    BlockIteratorState()
-        : input_stride(0),
-          output_stride(0),
-          input_span(0),
-          output_span(0),
-          size(0),
-          count(0) {}
-  };
-
-  // Compute how many inner dimensions it's allowed to squeeze when doing IO
-  // between a tensor and a block. It's safe to squeeze inner dimensions, only
-  // if they are not reordered.
-  static int NumSqueezableInnerDims(const Dimensions& tensor_to_block_dim_map) {
-    int num_squeezable_dims = 0;
-    if (Layout == ColMajor) {
-      for (int i = 0; i < NumDims; ++i) {
-        if (tensor_to_block_dim_map[i] == i) num_squeezable_dims++;
-        else break;
-      }
-    } else {
-      for (int i = NumDims - 1; i >= 0; --i) {
-        if (tensor_to_block_dim_map[i] == i) num_squeezable_dims++;
-        else break;
-      }
-    }
-    return num_squeezable_dims;
-  }
-
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy(
-      const Block& block, StorageIndex first_coeff_index,
-      const Dimensions& tensor_to_block_dim_map,
-      const Dimensions& tensor_strides,
-      const Scalar* src_data,
-      Scalar* dst_data) {
-    // Do not squeeze reordered inner dimensions.
-    int num_squeezable_dims = NumSqueezableInnerDims(tensor_to_block_dim_map);
-
-    // Find the innermost tensor dimension whose size is not 1. This is the
-    // effective inner dim. If all dimensions are of size 1, then fallback to
-    // using the actual innermost dim to avoid out-of-bound access.
-    StorageIndex num_size_one_inner_dims = 0;
-    for (int i = 0; i < num_squeezable_dims; ++i) {
-      const int dim = cond<Layout>()(i, NumDims - i - 1);
-      if (block.block_sizes()[tensor_to_block_dim_map[dim]] != 1) {
-        num_size_one_inner_dims = i;
-        break;
-      }
-    }
-
-    // Calculate strides and dimensions.
-    const StorageIndex tensor_stride1_dim = cond<Layout>()(
-        num_size_one_inner_dims, NumDims - num_size_one_inner_dims - 1);
-    const StorageIndex block_dim_for_tensor_stride1_dim =
-        NumDims == 0 ? 1 : tensor_to_block_dim_map[tensor_stride1_dim];
-    StorageIndex block_inner_dim_size =
-        NumDims == 0 ? 1
-                     : block.block_sizes()[block_dim_for_tensor_stride1_dim];
-
-    // Squeeze multiple inner dims into one for larger inner dim size.
-    for (Index i = num_size_one_inner_dims + 1; i < num_squeezable_dims; ++i) {
-      const Index dim = cond<Layout>()(i, NumDims - i - 1);
-      const StorageIndex block_stride =
-          block.block_strides()[tensor_to_block_dim_map[dim]];
-      if (block_inner_dim_size == block_stride &&
-          block_stride == tensor_strides[dim]) {
-        block_inner_dim_size *=
-            block.block_sizes()[tensor_to_block_dim_map[dim]];
-        ++num_size_one_inner_dims;
-      } else {
-        break;
-      }
-    }
-
-    StorageIndex inputIndex;
-    StorageIndex outputIndex;
-    StorageIndex input_stride;
-    StorageIndex output_stride;
-
-    // Setup strides to read/write along the tensor's stride1 dimension.
-    if (BlockRead) {
-      inputIndex = first_coeff_index;
-      outputIndex = 0;
-      input_stride = NumDims == 0 ? 1 : tensor_strides[tensor_stride1_dim];
-      output_stride =
-          NumDims == 0
-              ? 1
-              : block.block_strides()[block_dim_for_tensor_stride1_dim];
-    } else {
-      inputIndex = 0;
-      outputIndex = first_coeff_index;
-      input_stride =
-          NumDims == 0
-              ? 1
-              : block.block_strides()[block_dim_for_tensor_stride1_dim];
-      output_stride = NumDims == 0 ? 1 : tensor_strides[tensor_stride1_dim];
-    }
-
-    const int at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1;
-    array<BlockIteratorState, at_least_1_dim> block_iter_state;
-
-    // Initialize block iterator state. Squeeze away any dimension of size 1.
-    Index num_squeezed_dims = 0;
-    for (Index i = num_size_one_inner_dims; i < NumDims - 1; ++i) {
-      const Index dim = cond<Layout>()(i + 1, NumDims - i - 2);
-      const StorageIndex size = block.block_sizes()[tensor_to_block_dim_map[dim]];
-      if (size == 1) {
-        continue;
-      }
-      block_iter_state[num_squeezed_dims].size = size;
-      if (BlockRead) {
-        block_iter_state[num_squeezed_dims].input_stride = tensor_strides[dim];
-        block_iter_state[num_squeezed_dims].output_stride =
-            block.block_strides()[tensor_to_block_dim_map[dim]];
-      } else {
-        block_iter_state[num_squeezed_dims].input_stride =
-            block.block_strides()[tensor_to_block_dim_map[dim]];
-        block_iter_state[num_squeezed_dims].output_stride = tensor_strides[dim];
-      }
-      block_iter_state[num_squeezed_dims].input_span =
-          block_iter_state[num_squeezed_dims].input_stride *
-          (block_iter_state[num_squeezed_dims].size - 1);
-      block_iter_state[num_squeezed_dims].output_span =
-          block_iter_state[num_squeezed_dims].output_stride *
-          (block_iter_state[num_squeezed_dims].size - 1);
-      ++num_squeezed_dims;
-    }
-
-    // Iterate copying data from src to dst.
-    const StorageIndex block_total_size =
-        NumDims == 0 ? 1 : block.block_sizes().TotalSize();
-    for (StorageIndex i = 0; i < block_total_size; i += block_inner_dim_size) {
-      BlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride,
-                       dst_data, inputIndex, input_stride, src_data);
-      // Update index.
-      for (int j = 0; j < num_squeezed_dims; ++j) {
-        if (++block_iter_state[j].count < block_iter_state[j].size) {
-          inputIndex += block_iter_state[j].input_stride;
-          outputIndex += block_iter_state[j].output_stride;
-          break;
-        }
-        block_iter_state[j].count = 0;
-        inputIndex -= block_iter_state[j].input_span;
-        outputIndex -= block_iter_state[j].output_span;
-      }
-    }
-  }
-};
-
-/**
- * \class TensorBlockReader
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block reader class.
- *
- * This class is responsible for reading a tensor block.
- *
- */
-template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
-class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
-                                               Layout, /*BlockRead=*/true> {
- public:
-  typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
-  typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> Base;
-
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      Block* block, const Scalar* src_data) {
-    array<StorageIndex, NumDims> tensor_to_block_dim_map;
-    for (int i = 0; i < NumDims; ++i) {
-      tensor_to_block_dim_map[i] = i;
-    }
-    Base::Copy(*block, block->first_coeff_index(), tensor_to_block_dim_map,
-               block->tensor_strides(), src_data, block->data());
-  }
-
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      Block* block, StorageIndex first_coeff_index,
-      const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
-      const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data) {
-    Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map,
-               tensor_strides, src_data, block->data());
-  }
-};
-
-/**
- * \class TensorBlockWriter
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block writer class.
- *
- * This class is responsible for writing a tensor block.
- *
- */
-template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
-class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
-                                               Layout, /*BlockRead=*/false> {
- public:
-  typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
-  typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> Base;
-
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const Block& block, Scalar* dst_data) {
-    array<StorageIndex, NumDims> tensor_to_block_dim_map;
-    for (int i = 0; i < NumDims; ++i) {
-      tensor_to_block_dim_map[i] = i;
-    }
-    Base::Copy(block, block.first_coeff_index(), tensor_to_block_dim_map,
-               block.tensor_strides(), block.data(), dst_data);
-  }
-
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const Block& block, StorageIndex first_coeff_index,
-      const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
-      const array<StorageIndex, NumDims>& tensor_strides, Scalar* dst_data) {
-    Base::Copy(block, first_coeff_index, tensor_to_block_dim_map,
-               tensor_strides, block.data(), dst_data);
-  }
-};
-
-/**
- * \class TensorBlockCwiseUnaryOp
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Carries out a cwise binary op on a number of coefficients.
- *
- * This class reads strided input from the argument, and writes the
- * result of the cwise unary op to the strided output array.
- *
- */
-template <bool Vectorizable>
-struct TensorBlockCwiseUnaryOp {
-  template <typename StorageIndex, typename UnaryFunctor,
-            typename OutputScalar, typename InputScalar>
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const UnaryFunctor& functor, const StorageIndex num_coeff,
-      const StorageIndex output_index, const StorageIndex output_stride,
-      OutputScalar* output_data, const StorageIndex input_index,
-      const StorageIndex input_stride, const InputScalar* input_data) {
-    typedef const Array<InputScalar, Dynamic, 1> Input;
-    typedef Array<OutputScalar, Dynamic, 1> Output;
-
-    typedef Map<Input, 0, InnerStride<> > InputMap;
-    typedef Map<Output, 0, InnerStride<> > OutputMap;
-
-    const InputScalar* input_base = &input_data[input_index];
-    OutputScalar* output_base = &output_data[output_index];
-
-    const InputMap input(input_base, num_coeff, InnerStride<>(input_stride));
-    OutputMap output(output_base, num_coeff, InnerStride<>(output_stride));
-
-    output = CwiseUnaryOp<UnaryFunctor, InputMap>(input, functor);
-  }
-};
-
-template<>
-struct TensorBlockCwiseUnaryOp<true> {
-  template <typename StorageIndex, typename UnaryFunctor,
-            typename OutputScalar, typename InputScalar>
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const UnaryFunctor& functor, const StorageIndex num_coeff,
-      const StorageIndex output_index, const StorageIndex output_stride,
-      OutputScalar* output_data, const StorageIndex input_index,
-      const StorageIndex input_stride, const InputScalar* input_data) {
-    if (input_stride == 1 && output_stride == 1) {
-      typedef const Array<InputScalar, Dynamic, 1> Input;
-      typedef Array<OutputScalar, Dynamic, 1> Output;
-
-      const Map<Input> input(&input_data[input_index], num_coeff);
-      Map<Output> output(&output_data[output_index], num_coeff);
-
-      output = CwiseUnaryOp<UnaryFunctor, Map<Input> >(input, functor);
-    } else {
-      TensorBlockCwiseUnaryOp<false>::Run(
-          functor, num_coeff, output_index, output_stride, output_data,
-          input_index, input_stride, input_data);
-    }
-  }
-};
-
-/**
- * \class TensorBlockCwiseUnaryIO
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block IO class for carrying out cwise unary ops.
- *
- * This class carries out the unary op on given blocks.
- */
-template <typename UnaryFunctor, typename StorageIndex, typename OutputScalar,
-          int NumDims, int Layout>
-struct TensorBlockCwiseUnaryIO {
-  typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims,
-                                         Layout>::Dimensions Dimensions;
-
-  typedef TensorBlockCwiseUnaryOp<
-      packet_traits<OutputScalar>::Vectorizable &&
-      functor_traits<UnaryFunctor>::PacketAccess>
-      TensorBlockCwiseUnaryOpImpl;
-
-  struct BlockIteratorState {
-    StorageIndex output_stride, output_span;
-    StorageIndex input_stride, input_span;
-    StorageIndex size, count;
-  };
-
-  template <typename InputScalar>
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const UnaryFunctor& functor, const Dimensions& block_sizes,
-      const Dimensions& block_strides, OutputScalar* output_data,
-      const array<StorageIndex, NumDims>& input_strides,
-      const InputScalar* input_data) {
-    // Find the innermost dimension whose size is not 1. This is the effective
-    // inner dim. If all dimensions are of size 1, fallback to using the actual
-    // innermost dim to avoid out-of-bound access.
-    int num_size_one_inner_dims = 0;
-    for (int i = 0; i < NumDims; ++i) {
-      const int dim = cond<Layout>()(i, NumDims - i - 1);
-      if (block_sizes[dim] != 1) {
-        num_size_one_inner_dims = i;
-        break;
-      }
-    }
-    // Calculate strides and dimensions.
-    const int inner_dim =
-        NumDims == 0 ? 1
-                     : cond<Layout>()(num_size_one_inner_dims,
-                                      NumDims - num_size_one_inner_dims - 1);
-    StorageIndex inner_dim_size = NumDims == 0 ? 1 : block_sizes[inner_dim];
-    for (int i = num_size_one_inner_dims + 1; i < NumDims; ++i) {
-      const int dim = cond<Layout>()(i, NumDims - i - 1);
-      // Merge multiple inner dims into one for larger inner dim size (i.e.
-      // fewer calls to TensorBlockCwiseUnaryOp::Run()).
-      if (inner_dim_size == block_strides[dim] &&
-          block_strides[dim] == input_strides[dim]) {
-        inner_dim_size *= block_sizes[dim];
-        ++num_size_one_inner_dims;
-      } else {
-        break;
-      }
-    }
-
-    StorageIndex output_index = 0, input_index = 0;
-
-    const StorageIndex output_stride =
-        NumDims == 0 ? 1 : block_strides[inner_dim];
-    const StorageIndex input_stride =
-        NumDims == 0 ? 1 : input_strides[inner_dim];
-
-    const int at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1;
-    array<BlockIteratorState, at_least_1_dim> block_iter_state;
-
-    // Initialize block iterator state. Squeeze away any dimension of size 1.
-    int num_squeezed_dims = 0;
-    for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) {
-      const int dim = cond<Layout>()(i + 1, NumDims - i - 2);
-      const StorageIndex size = block_sizes[dim];
-      if (size == 1) {
-        continue;
-      }
-      BlockIteratorState& state = block_iter_state[num_squeezed_dims];
-      state.output_stride = block_strides[dim];
-      state.input_stride = input_strides[dim];
-      state.size = size;
-      state.output_span = state.output_stride * (size - 1);
-      state.input_span = state.input_stride * (size - 1);
-      state.count = 0;
-      ++num_squeezed_dims;
-    }
-
-    // Compute cwise unary op.
-    const StorageIndex block_total_size =
-        NumDims == 0 ? 1 : block_sizes.TotalSize();
-    for (StorageIndex i = 0; i < block_total_size; i += inner_dim_size) {
-      TensorBlockCwiseUnaryOpImpl::Run(functor, inner_dim_size, output_index,
-                                       output_stride, output_data, input_index,
-                                       input_stride, input_data);
-      // Update index.
-      for (int j = 0; j < num_squeezed_dims; ++j) {
-        BlockIteratorState& state = block_iter_state[j];
-        if (++state.count < state.size) {
-          output_index += state.output_stride;
-          input_index += state.input_stride;
-          break;
-        }
-        state.count = 0;
-        output_index -= state.output_span;
-        input_index -= state.input_span;
-      }
-    }
-  }
-};
-
-/**
- * \class TensorBlockCwiseBinaryOp
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Carries out a cwise binary op on a number of coefficients.
- *
- * This class reads strided inputs from left and right operands, and writes the
- * result of the cwise binary op to the strided output array.
- *
- */
-template<bool Vectorizable>
-struct TensorBlockCwiseBinaryOp {
-  template <typename StorageIndex, typename BinaryFunctor, typename OutputScalar,
-            typename LeftScalar, typename RightScalar>
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const BinaryFunctor& functor, const StorageIndex num_coeff,
-      const StorageIndex output_index, const StorageIndex output_stride,
-      OutputScalar* output_data, const StorageIndex left_index,
-      const StorageIndex left_stride, const LeftScalar* left_data,
-      const StorageIndex right_index, const StorageIndex right_stride,
-      const RightScalar* right_data) {
-    typedef const Array<LeftScalar, Dynamic, 1> Lhs;
-    typedef const Array<RightScalar, Dynamic, 1> Rhs;
-    typedef Array<OutputScalar, Dynamic, 1> Out;
-
-    typedef Map<Lhs, 0, InnerStride<> > LhsMap;
-    typedef Map<Rhs, 0, InnerStride<> > RhsMap;
-    typedef Map<Out, 0, InnerStride<> > OutMap;
-
-    const LeftScalar* lhs_base = &left_data[left_index];
-    const RightScalar* rhs_base = &right_data[right_index];
-    OutputScalar* out_base = &output_data[output_index];
-
-    const LhsMap lhs(lhs_base, num_coeff, InnerStride<>(left_stride));
-    const RhsMap rhs(rhs_base, num_coeff, InnerStride<>(right_stride));
-    OutMap out(out_base, num_coeff, InnerStride<>(output_stride));
-
-    out = CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor);
-  }
-};
-
-template<>
-struct TensorBlockCwiseBinaryOp<true> {
-  template <typename StorageIndex, typename BinaryFunctor, typename OutputScalar,
-            typename LeftScalar, typename RightScalar>
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const BinaryFunctor& functor, const StorageIndex num_coeff,
-      const StorageIndex output_index, const StorageIndex output_stride,
-      OutputScalar* output_data, const StorageIndex left_index,
-      const StorageIndex left_stride, const LeftScalar* left_data,
-      const StorageIndex right_index, const StorageIndex right_stride,
-      const RightScalar* right_data) {
-    if (left_stride == 1 && right_stride == 1 && output_stride == 1) {
-      typedef const Array<LeftScalar, Dynamic, 1> Lhs;
-      typedef const Array<RightScalar, Dynamic, 1> Rhs;
-      typedef Array<OutputScalar, Dynamic, 1> Out;
-
-      const LeftScalar* lhs_base = &left_data[left_index];
-      const RightScalar* rhs_base = &right_data[right_index];
-      OutputScalar* out_base = &output_data[output_index];
-
-      const Map<Lhs> lhs(lhs_base, num_coeff);
-      const Map<Rhs> rhs(rhs_base, num_coeff);
-      Map<Out> out(out_base, num_coeff);
-
-      out = CwiseBinaryOp<BinaryFunctor, Map<Lhs>, Map<Rhs> >(lhs, rhs, functor);
-    } else {
-      TensorBlockCwiseBinaryOp<false>::Run(
-          functor, num_coeff, output_index, output_stride, output_data,
-          left_index, left_stride, left_data, right_index, right_stride,
-          right_data);
-    }
-  }
-};
-
-/**
- * \class TensorBlockCwiseBinaryIO
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block IO class for carrying out cwise binary ops.
- *
- * This class carries out the binary op on given blocks.
- *
- */
-template <typename BinaryFunctor, typename StorageIndex, typename OutputScalar,
-          int NumDims, int Layout>
-struct TensorBlockCwiseBinaryIO {
-  typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims, Layout>::Dimensions Dimensions;
-
-  typedef TensorBlockCwiseBinaryOp<
-      packet_traits<OutputScalar>::Vectorizable &&
-      functor_traits<BinaryFunctor>::PacketAccess>
-      TensorBlockCwiseBinaryOpImpl;
-
-  struct BlockIteratorState {
-    StorageIndex output_stride, output_span;
-    StorageIndex left_stride, left_span;
-    StorageIndex right_stride, right_span;
-    StorageIndex size, count;
-  };
-
-  template <typename LeftScalar, typename RightScalar>
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const BinaryFunctor& functor, const Dimensions& block_sizes,
-      const Dimensions& block_strides, OutputScalar* output_data,
-      const array<StorageIndex, NumDims>& left_strides,
-      const LeftScalar* left_data,
-      const array<StorageIndex, NumDims>& right_strides,
-      const RightScalar* right_data) {
-    // Find the innermost dimension whose size is not 1. This is the effective
-    // inner dim. If all dimensions are of size 1, fallback to using the actual
-    // innermost dim to avoid out-of-bound access.
-    int num_size_one_inner_dims = 0;
-    for (int i = 0; i < NumDims; ++i) {
-      const int dim = cond<Layout>()(i, NumDims - i - 1);
-      if (block_sizes[dim] != 1) {
-        num_size_one_inner_dims = i;
-        break;
-      }
-    }
-    // Calculate strides and dimensions.
-    const int inner_dim =
-        NumDims == 0 ? 1
-                     : cond<Layout>()(num_size_one_inner_dims,
-                                      NumDims - num_size_one_inner_dims - 1);
-    StorageIndex inner_dim_size = NumDims == 0 ? 1 : block_sizes[inner_dim];
-    for (int i = num_size_one_inner_dims + 1; i < NumDims; ++i) {
-      const int dim = cond<Layout>()(i, NumDims - i - 1);
-      // Merge multiple inner dims into one for larger inner dim size (i.e.
-      // fewer calls to TensorBlockCwiseBinaryOp::Run()).
-      if (inner_dim_size == block_strides[dim] &&
-          block_strides[dim] == left_strides[dim] &&
-          block_strides[dim] == right_strides[dim]) {
-        inner_dim_size *= block_sizes[dim];
-        ++num_size_one_inner_dims;
-      } else {
-        break;
-      }
-    }
-
-    StorageIndex output_index = 0, left_index = 0, right_index = 0;
-    const StorageIndex output_stride =
-        NumDims == 0 ? 1 : block_strides[inner_dim];
-    const StorageIndex left_stride = NumDims == 0 ? 1 : left_strides[inner_dim];
-    const StorageIndex right_stride =
-        NumDims == 0 ? 1 : right_strides[inner_dim];
-
-    const int at_least_1_dim = NumDims <= 1 ? 1 : NumDims - 1;
-    array<BlockIteratorState, at_least_1_dim> block_iter_state;
-
-    // Initialize block iterator state. Squeeze away any dimension of size 1.
-    int num_squeezed_dims = 0;
-    for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) {
-      const int dim = cond<Layout>()(i + 1, NumDims - i - 2);
-      const StorageIndex size = block_sizes[dim];
-      if (size == 1) {
-        continue;
-      }
-      BlockIteratorState& state = block_iter_state[num_squeezed_dims];
-      state.output_stride = block_strides[dim];
-      state.left_stride = left_strides[dim];
-      state.right_stride = right_strides[dim];
-      state.size = size;
-      state.output_span = state.output_stride * (size - 1);
-      state.left_span = state.left_stride * (size - 1);
-      state.right_span = state.right_stride * (size - 1);
-      state.count = 0;
-      ++num_squeezed_dims;
-    }
-
-    // Compute cwise binary op.
-    const StorageIndex block_total_size =
-        NumDims == 0 ? 1 : block_sizes.TotalSize();
-    for (StorageIndex i = 0; i < block_total_size; i += inner_dim_size) {
-      TensorBlockCwiseBinaryOpImpl::Run(functor, inner_dim_size, output_index,
-                                        output_stride, output_data, left_index,
-                                        left_stride, left_data, right_index,
-                                        right_stride, right_data);
-      // Update index.
-      for (int j = 0; j < num_squeezed_dims; ++j) {
-        BlockIteratorState& state = block_iter_state[j];
-        if (++state.count < state.size) {
-          output_index += state.output_stride;
-          left_index += state.left_stride;
-          right_index += state.right_stride;
-          break;
-        }
-        state.count = 0;
-        output_index -= state.output_span;
-        left_index -= state.left_span;
-        right_index -= state.right_span;
-      }
-    }
-  }
-};
-
-/**
- * \class TensorBlockView
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Read-only view into a block of data.
- *
- * This class provides read-only access to a block of data in impl. It may need
- * to allocate space for holding the intermediate result.
- *
- */
-template <class ArgType, class Device>
-struct TensorBlockView {
-  typedef TensorEvaluator<ArgType, Device> Impl;
-  typedef typename Impl::Index StorageIndex;
-  typedef typename remove_const<typename Impl::Scalar>::type Scalar;
-  static const int NumDims = array_size<typename Impl::Dimensions>::value;
-  typedef DSizes<StorageIndex, NumDims> Dimensions;
-
-  // Constructs a TensorBlockView for `impl`. `block` is only used for for
-  // specifying the start offset, shape, and strides of the block.
-  template <typename OtherTensorBlock>
-  TensorBlockView(const Device& device,
-                  const TensorEvaluator<ArgType, Device>& impl,
-                  const OtherTensorBlock& block)
-      : m_device(device),
-        m_block_sizes(block.block_sizes()),
-        m_data(NULL),
-        m_allocated_data(NULL) {
-    if (Impl::RawAccess && impl.data() != NULL) {
-      m_data = impl.data() + block.first_coeff_index();
-      m_block_strides = block.tensor_strides();
-    } else {
-      // Actually make a copy.
-
-      // TODO(wuke): This sometimes put a lot pressure on the heap allocator.
-      // Consider allowing ops to request additional temporary block memory in
-      // TensorOpResourceRequirements.
-      m_allocated_data = static_cast<Scalar*>(
-          m_device.allocate(m_block_sizes.TotalSize() * sizeof(Scalar)));
-      m_data = m_allocated_data;
-      if (NumDims > 0) {
-        if (static_cast<int>(Impl::Layout) == static_cast<int>(ColMajor)) {
-          m_block_strides[0] = 1;
-          for (int i = 1; i < NumDims; ++i) {
-            m_block_strides[i] = m_block_strides[i - 1] * m_block_sizes[i - 1];
-          }
-        } else {
-          m_block_strides[NumDims - 1] = 1;
-          for (int i = NumDims - 2; i >= 0; --i) {
-            m_block_strides[i] = m_block_strides[i + 1] * m_block_sizes[i + 1];
-          }
-        }
-      }
-      TensorBlock<Scalar, StorageIndex, NumDims, Impl::Layout> input_block(
-          block.first_coeff_index(), m_block_sizes, m_block_strides,
-          block.tensor_strides(), m_allocated_data);
-      impl.block(&input_block);
-    }
-  }
-
-  ~TensorBlockView() {
-    if (m_allocated_data != NULL) {
-      m_device.deallocate(m_allocated_data);
-    }
-  }
-
-  const Dimensions& block_sizes() const { return m_block_sizes; }
-  const Dimensions& block_strides() const { return m_block_strides; }
-  const Scalar* data() const { return m_data; }
-
- private:
-  const Device EIGEN_DEVICE_REF m_device;
-  Dimensions m_block_sizes, m_block_strides;
-  const Scalar* m_data;      // Not owned.
-  Scalar* m_allocated_data;  // Owned.
-};
-
 /**
  * \class TensorBlockMapper
  * \ingroup CXX11_Tensor_Module
@@ -1108,137 +332,6 @@ class TensorBlockMapper {
   StorageIndex m_total_block_count;
 };
 
-/**
- * \class TensorSliceBlockMapper
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor slice block mapper class.
- *
- * This class is responsible for iterating over the blocks of
- * a slice of a tensor. Supports shuffling of the block strides
- * for callers that want to reduce strides for dimensions to be
- * processed together.
- *
- */
-template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
-class TensorSliceBlockMapper {
- public:
-  typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
-  typedef DSizes<StorageIndex, NumDims> Dimensions;
-
-  TensorSliceBlockMapper(const Dimensions& tensor_dims,
-                         const Dimensions& tensor_slice_offsets,
-                         const Dimensions& tensor_slice_extents,
-                         const Dimensions& block_dim_sizes,
-                         const Dimensions& block_stride_order)
-      : m_tensor_dimensions(tensor_dims),
-        m_tensor_slice_offsets(tensor_slice_offsets),
-        m_tensor_slice_extents(tensor_slice_extents),
-        m_block_dim_sizes(block_dim_sizes),
-        m_block_stride_order(block_stride_order),
-        m_total_block_count(1) {
-    // Calculate block counts by dimension and total block count.
-    DSizes<StorageIndex, NumDims> block_count;
-    for (Index i = 0; i < block_count.rank(); ++i) {
-      block_count[i] = divup(m_tensor_slice_extents[i], m_block_dim_sizes[i]);
-    }
-    m_total_block_count = array_prod(block_count);
-
-    // Calculate block strides (used for enumerating blocks).
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      m_block_strides[0] = 1;
-      m_tensor_strides[0] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        m_block_strides[i] = m_block_strides[i - 1] * block_count[i - 1];
-        m_tensor_strides[i] =
-            m_tensor_strides[i - 1] * m_tensor_dimensions[i - 1];
-      }
-    } else {
-      m_block_strides[NumDims - 1] = 1;
-      m_tensor_strides[NumDims - 1] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        m_block_strides[i] = m_block_strides[i + 1] * block_count[i + 1];
-        m_tensor_strides[i] =
-            m_tensor_strides[i + 1] * m_tensor_dimensions[i + 1];
-      }
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
-  GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
-    StorageIndex first_coeff_index = 0;
-    DSizes<StorageIndex, NumDims> coords;
-    DSizes<StorageIndex, NumDims> sizes;
-    DSizes<StorageIndex, NumDims> strides;
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      for (int i = NumDims - 1; i > 0; --i) {
-        const Index idx = block_index / m_block_strides[i];
-        coords[i] = m_tensor_slice_offsets[i] + idx * m_block_dim_sizes[i];
-        sizes[i] = numext::mini(
-            m_tensor_slice_offsets[i] + m_tensor_slice_extents[i] - coords[i],
-            m_block_dim_sizes[i]);
-        block_index -= idx * m_block_strides[i];
-        first_coeff_index += coords[i] * m_tensor_strides[i];
-      }
-      coords[0] =
-          m_tensor_slice_offsets[0] + block_index * m_block_dim_sizes[0];
-      sizes[0] = numext::mini(
-          m_tensor_slice_offsets[0] + m_tensor_slice_extents[0] - coords[0],
-          m_block_dim_sizes[0]);
-      first_coeff_index += coords[0] * m_tensor_strides[0];
-
-      StorageIndex prev_dim = m_block_stride_order[0];
-      strides[prev_dim] = 1;
-      for (int i = 1; i < NumDims; ++i) {
-        const StorageIndex curr_dim = m_block_stride_order[i];
-        strides[curr_dim] = strides[prev_dim] * sizes[prev_dim];
-        prev_dim = curr_dim;
-      }
-    } else {
-      for (int i = 0; i < NumDims - 1; ++i) {
-        const StorageIndex idx = block_index / m_block_strides[i];
-        coords[i] = m_tensor_slice_offsets[i] + idx * m_block_dim_sizes[i];
-        sizes[i] = numext::mini(
-            m_tensor_slice_offsets[i] + m_tensor_slice_extents[i] - coords[i],
-            m_block_dim_sizes[i]);
-        block_index -= idx * m_block_strides[i];
-        first_coeff_index += coords[i] * m_tensor_strides[i];
-      }
-      coords[NumDims - 1] = m_tensor_slice_offsets[NumDims - 1] +
-                            block_index * m_block_dim_sizes[NumDims - 1];
-      sizes[NumDims - 1] = numext::mini(
-          m_tensor_slice_offsets[NumDims - 1] +
-              m_tensor_slice_extents[NumDims - 1] - coords[NumDims - 1],
-          m_block_dim_sizes[NumDims - 1]);
-      first_coeff_index += coords[NumDims - 1] * m_tensor_strides[NumDims - 1];
-
-      StorageIndex prev_dim = m_block_stride_order[NumDims - 1];
-      strides[prev_dim] = 1;
-      for (int i = NumDims - 2; i >= 0; --i) {
-        const StorageIndex curr_dim = m_block_stride_order[i];
-        strides[curr_dim] = strides[prev_dim] * sizes[prev_dim];
-        prev_dim = curr_dim;
-      }
-    }
-
-    return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
-    return m_total_block_count;
-  }
-
- private:
-  Dimensions m_tensor_dimensions;
-  Dimensions m_tensor_slice_offsets;
-  Dimensions m_tensor_slice_extents;
-  Dimensions m_tensor_strides;
-  Dimensions m_block_dim_sizes;
-  Dimensions m_block_stride_order;
-  Dimensions m_block_strides;
-  StorageIndex m_total_block_count;
-};
-
 }  // namespace internal
 
 }  // namespace Eigen
-- 
cgit v1.2.3