Remove TensorBlock.h and old TensorBlock/BlockMapper

author: Eugene Zhulenev <ezhulenev@google.com> 2019-12-10 11:58:30 -0800
committer: Eugene Zhulenev <ezhulenev@google.com> 2019-12-10 14:31:44 -0800
commit: dbca11e8805ec07660d8f966a1884ad0be302f15 (patch)
tree: 9da1438132a9a40de7ca3abafec2e559eb0449e3
parent: c49f0d851ab77c9e4d782b453b4b0428bce903d3 (diff)
14 files changed, 421 insertions, 671 deletions
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor
index f8a62253c..10786048e 100644
--- a/unsupported/Eigen/CXX11/Tensor
+++ b/unsupported/Eigen/CXX11/Tensor
@@ -97,7 +97,6 @@ typedef unsigned __int64 uint64_t;
 #include "src/Tensor/TensorGlobalFunctions.h"
 
 #include "src/Tensor/TensorBase.h"
-#include "src/Tensor/TensorBlock.h"
 #include "src/Tensor/TensorBlockV2.h"
 
 #include "src/Tensor/TensorEvaluator.h"
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
index c4f6f86e8..22d672aa4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
@@ -116,20 +116,12 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
     RawAccess         = TensorEvaluator<LeftArgType, Device>::RawAccess
   };
 
-  typedef typename internal::TensorBlock<
-      typename internal::remove_const<Scalar>::type, Index, NumDims, Layout>
-      TensorBlock;
-
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
 
   typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlockV2
       RightTensorBlock;
-
-  typedef internal::TensorBlockAssignment<
-      Scalar, NumDims, typename RightTensorBlock::XprType, Index>
-      TensorBlockAssignment;
   //===--------------------------------------------------------------------===//
 
   EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
deleted file mode 100644
index ba11bf7a8..000000000
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ /dev/null
@@ -1,305 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2018 Andy Davis <andydavis@google.com>
-// Copyright (C) 2018 Eugene Zhulenev <ezhulenev@google.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
-#define EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
-
-namespace Eigen {
-namespace internal {
-
-namespace {
-
-// Helper template to choose between ColMajor and RowMajor values.
-template <int Layout>
-struct cond;
-
-template <>
-struct cond<ColMajor> {
-  template <typename T>
-  EIGEN_STRONG_INLINE const T& operator()(const T& col,
-                                          const T& /*row*/) const {
-    return col;
-  }
-};
-
-template <>
-struct cond<RowMajor> {
-  template <typename T>
-  EIGEN_STRONG_INLINE const T& operator()(const T& /*col*/,
-                                          const T& row) const {
-    return row;
-  }
-};
-
-}  // namespace
-
-/**
- * \enum TensorBlockShapeType
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block shape type.
- *
- * Tensor block shape type defines what are the shape preference for the blocks
- * extracted from the larger tensor.
- *
- * Example:
- *
- * We want to extract blocks of 100 elements from the large 100x100 tensor:
- *  - tensor: 100x100
- *  - target_block_size: 100
- *
- * TensorBlockShapeType:
- *  - kUniformAllDims: 100 blocks of size 10x10
- *  - kSkewedInnerDims: 100 blocks of size 100x1 (or 1x100 depending on a column
- *                      or row major layout)
- */
-enum TensorBlockShapeType {
-  kUniformAllDims,
-  kSkewedInnerDims
-};
-
-/**
- * \class TensorBlock
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block class.
- *
- * This class represents a tensor block specified by the index of the
- * first block coefficient, and the size of the block in each dimension.
- */
-template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
-class TensorBlock {
- public:
-  typedef DSizes<StorageIndex, NumDims> Dimensions;
-
-  TensorBlock(const StorageIndex first_coeff_index, const Dimensions& block_sizes,
-              const Dimensions& block_strides, const Dimensions& tensor_strides,
-              Scalar* data)
-      : m_first_coeff_index(first_coeff_index),
-        m_block_sizes(block_sizes),
-        m_block_strides(block_strides),
-        m_tensor_strides(tensor_strides),
-        m_data(data) {}
-
-  StorageIndex first_coeff_index() const { return m_first_coeff_index; }
-
-  const Dimensions& block_sizes() const { return m_block_sizes; }
-
-  const Dimensions& block_strides() const { return m_block_strides; }
-
-  const Dimensions& tensor_strides() const { return m_tensor_strides; }
-
-  Scalar* data() { return m_data; }
-
-  const Scalar* data() const { return m_data; }
-
- private:
-  StorageIndex m_first_coeff_index;
-  Dimensions m_block_sizes;
-  Dimensions m_block_strides;
-  Dimensions m_tensor_strides;
-  Scalar* m_data;  // Not owned.
-};
-
-/**
- * \class TensorBlockMapper
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor block mapper class.
- *
- * This class is responsible for iterating over the blocks of a tensor.
- */
-template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
-class TensorBlockMapper {
- public:
-  typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
-  typedef DSizes<StorageIndex, NumDims> Dimensions;
-
-  TensorBlockMapper() {}
-  TensorBlockMapper(const Dimensions& dims,
-                    const TensorBlockShapeType block_shape,
-                    Index min_target_size)
-      : m_dimensions(dims),
-        m_block_dim_sizes(BlockDimensions(dims, block_shape, convert_index<StorageIndex>(min_target_size))) {
-    // Calculate block counts by dimension and total block count.
-    DSizes<StorageIndex, NumDims> block_count;
-    for (Index i = 0; i < block_count.rank(); ++i) {
-      block_count[i] = divup(m_dimensions[i], m_block_dim_sizes[i]);
-    }
-    m_total_block_count = array_prod(block_count);
-
-    // Calculate block strides (used for enumerating blocks).
-    if (NumDims > 0) {
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        m_block_strides[0] = 1;
-        m_tensor_strides[0] = 1;
-        for (int i = 1; i < NumDims; ++i) {
-          m_block_strides[i] = m_block_strides[i - 1] * block_count[i - 1];
-          m_tensor_strides[i] = m_tensor_strides[i - 1] * m_dimensions[i - 1];
-        }
-      } else {
-        m_block_strides[NumDims - 1] = 1;
-        m_tensor_strides[NumDims - 1] = 1;
-        for (int i = NumDims - 2; i >= 0; --i) {
-          m_block_strides[i] = m_block_strides[i + 1] * block_count[i + 1];
-          m_tensor_strides[i] = m_tensor_strides[i + 1] * m_dimensions[i + 1];
-        }
-      }
-    }
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
-  GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
-    StorageIndex first_coeff_index = 0;
-    DSizes<StorageIndex, NumDims> coords;
-    DSizes<StorageIndex, NumDims> sizes;
-    DSizes<StorageIndex, NumDims> strides;
-    if (NumDims > 0) {
-      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-        for (int i = NumDims - 1; i > 0; --i) {
-          const StorageIndex idx = block_index / m_block_strides[i];
-          coords[i] = idx * m_block_dim_sizes[i];
-          sizes[i] =
-              numext::mini((m_dimensions[i] - coords[i]), m_block_dim_sizes[i]);
-          block_index -= idx * m_block_strides[i];
-          first_coeff_index += coords[i] * m_tensor_strides[i];
-        }
-        coords[0] = block_index * m_block_dim_sizes[0];
-        sizes[0] =
-            numext::mini((m_dimensions[0] - coords[0]), m_block_dim_sizes[0]);
-        first_coeff_index += coords[0] * m_tensor_strides[0];
-
-        strides[0] = 1;
-        for (int i = 1; i < NumDims; ++i) {
-          strides[i] = strides[i - 1] * sizes[i - 1];
-        }
-      } else {
-        for (int i = 0; i < NumDims - 1; ++i) {
-          const StorageIndex idx = block_index / m_block_strides[i];
-          coords[i] = idx * m_block_dim_sizes[i];
-          sizes[i] =
-              numext::mini((m_dimensions[i] - coords[i]), m_block_dim_sizes[i]);
-          block_index -= idx * m_block_strides[i];
-          first_coeff_index += coords[i] * m_tensor_strides[i];
-        }
-        coords[NumDims - 1] = block_index * m_block_dim_sizes[NumDims - 1];
-        sizes[NumDims - 1] =
-            numext::mini((m_dimensions[NumDims - 1] - coords[NumDims - 1]),
-                         m_block_dim_sizes[NumDims - 1]);
-        first_coeff_index +=
-            coords[NumDims - 1] * m_tensor_strides[NumDims - 1];
-
-        strides[NumDims - 1] = 1;
-        for (int i = NumDims - 2; i >= 0; --i) {
-          strides[i] = strides[i + 1] * sizes[i + 1];
-        }
-      }
-    }
-
-    return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
-    return m_total_block_count;
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex
-  block_dims_total_size() const {
-    return m_block_dim_sizes.TotalSize();
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions&
-  block_dim_sizes() const {
-    return m_block_dim_sizes;
-  }
-
- private:
-  static Dimensions BlockDimensions(const Dimensions& tensor_dims,
-                                    const TensorBlockShapeType block_shape,
-                                    StorageIndex min_target_size) {
-    min_target_size = numext::maxi<StorageIndex>(1, min_target_size);
-
-    // If tensor fully fits into the target size, we'll treat it a single block.
-    Dimensions block_dim_sizes = tensor_dims;
-
-    if (tensor_dims.TotalSize() == 0) {
-      // Corner case: one of the dimensions is zero. Logic below is too complex
-      // to handle this case on a general basis, just use unit block size.
-      // Note: we must not yield blocks with zero dimensions (recipe for
-      // overflows/underflows, divisions by zero and NaNs later).
-      for (int i = 0; i < NumDims; ++i) {
-        block_dim_sizes[i] = 1;
-      }
-    } else if (block_dim_sizes.TotalSize() > min_target_size) {
-      if (block_shape == kUniformAllDims) {
-        // Tensor will not fit within 'min_target_size' budget: calculate tensor
-        // block dimension sizes based on "square" dimension size target.
-        const StorageIndex dim_size_target = convert_index<StorageIndex>(
-          std::pow(static_cast<float>(min_target_size),
-                   1.0f / static_cast<float>(block_dim_sizes.rank())));
-        for (Index i = 0; i < block_dim_sizes.rank(); ++i) {
-          // TODO(andydavis) Adjust the inner most 'block_dim_size' to make it
-          // a multiple of the packet size. Note that reducing
-          // 'block_dim_size' in this manner can increase the number of
-          // blocks, and so will amplify any per-block overhead.
-          block_dim_sizes[i] = numext::mini(dim_size_target, tensor_dims[i]);
-        }
-        // Add any un-allocated coefficients to inner dimension(s).
-        StorageIndex total_size = block_dim_sizes.TotalSize();
-        for (int i = 0; i < NumDims; ++i) {
-          const int dim = cond<Layout>()(i, NumDims - i - 1);
-          if (block_dim_sizes[dim] < tensor_dims[dim]) {
-            const StorageIndex total_size_other_dims =
-                total_size / block_dim_sizes[dim];
-            const StorageIndex alloc_avail =
-                divup<StorageIndex>(min_target_size, total_size_other_dims);
-            if (alloc_avail == block_dim_sizes[dim]) {
-              // Insufficient excess coefficients to allocate.
-              break;
-            }
-            block_dim_sizes[dim] = numext::mini(tensor_dims[dim], alloc_avail);
-            total_size = total_size_other_dims * block_dim_sizes[dim];
-          }
-        }
-      } else if (block_shape == kSkewedInnerDims) {
-        StorageIndex coeff_to_allocate = min_target_size;
-        for (int i = 0; i < NumDims; ++i) {
-          const int dim = cond<Layout>()(i, NumDims - i - 1);
-          block_dim_sizes[dim] =
-              numext::mini(coeff_to_allocate, tensor_dims[dim]);
-          coeff_to_allocate = divup(
-              coeff_to_allocate,
-              numext::maxi(static_cast<StorageIndex>(1), block_dim_sizes[dim]));
-        }
-        eigen_assert(coeff_to_allocate == 1);
-      } else {
-        eigen_assert(false);  // someone added new block shape type
-      }
-    }
-
-    eigen_assert(
-        block_dim_sizes.TotalSize() >=
-        numext::mini<Index>(min_target_size, tensor_dims.TotalSize()));
-
-    return block_dim_sizes;
-  }
-
-  Dimensions m_dimensions;
-  Dimensions m_block_dim_sizes;
-  Dimensions m_block_strides;
-  Dimensions m_tensor_strides;
-  StorageIndex m_total_block_count;
-};
-
-}  // namespace internal
-
-}  // namespace Eigen
-
-#endif  // EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
index f8814bc8c..029180ca5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
@@ -76,12 +76,6 @@ struct TensorBlockV2ResourceRequirements {
   TensorBlockV2ShapeType shape_type;
   size_t size;
 
-  TensorBlockShapeType shapeV1() const {
-    return shape_type == TensorBlockV2ShapeType::kUniformAllDims
-               ? internal::kUniformAllDims
-               : internal::kSkewedInnerDims;
-  }
-
   EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE TensorBlockV2ResourceRequirements
   merge(const TensorBlockV2ResourceRequirements &lhs,
@@ -275,6 +269,168 @@ class TensorBlockDescriptor {
 };
 
 // -------------------------------------------------------------------------- //
+// TensorBlockMapper is responsible for iterating over the blocks of a tensor.
+
+template <int NumDims, int Layout, typename IndexType = Eigen::Index>
+class TensorBlockV2Mapper {
+  typedef TensorBlockDescriptor<NumDims, IndexType> BlockDescriptor;
+
+ public:
+  typedef DSizes<IndexType, NumDims> Dimensions;
+
+  TensorBlockV2Mapper() = default;
+  TensorBlockV2Mapper(const DSizes<IndexType, NumDims>& dimensions,
+                      const TensorBlockV2ResourceRequirements& requirements)
+      : m_tensor_dimensions(dimensions), m_requirements(requirements) {
+    // Initialize `m_block_dimensions`.
+    InitializeBlockDimensions();
+
+    // Calculate block counts by dimension and total block count.
+    DSizes<IndexType, NumDims> block_count;
+    for (int i = 0; i < NumDims; ++i) {
+      block_count[i] = divup(m_tensor_dimensions[i], m_block_dimensions[i]);
+    }
+    m_total_block_count = array_prod(block_count);
+
+    // Calculate block strides (used for enumerating blocks).
+    m_tensor_strides = strides<Layout>(m_tensor_dimensions);
+    m_block_strides = strides<Layout>(block_count);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType blockCount() const {
+    return m_total_block_count;
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE IndexType blockTotalSize() const {
+    return m_block_dimensions.TotalSize();
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const DSizes<IndexType, NumDims>&
+  blockDimensions() const {
+    return m_block_dimensions;
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  BlockDescriptor blockDescriptor(IndexType block_index) const {
+    static const bool isColMajor = Layout == static_cast<int>(ColMajor);
+
+    IndexType offset = 0;
+    DSizes<IndexType, NumDims> dimensions;
+
+    if (NumDims == 0) return BlockDescriptor(offset, dimensions);
+
+    // Iterate outer -> inner dimensions.
+    for (int i = NumDims - 1; i >= 0; --i) {
+      const int dim = isColMajor ? i : NumDims - i - 1;
+
+      const IndexType idx = block_index / m_block_strides[dim];
+      block_index -= idx * m_block_strides[dim];
+
+      const IndexType coord = idx * m_block_dimensions[dim];
+      dimensions[dim] = numext::mini(m_tensor_dimensions[dim] - coord,
+                                     m_block_dimensions[dim]);
+      offset += coord * m_tensor_strides[dim];
+    }
+
+    return {offset, dimensions};
+  }
+
+ private:
+  void InitializeBlockDimensions() {
+    // Requested block shape and size.
+    const TensorBlockV2ShapeType shape_type = m_requirements.shape_type;
+    const IndexType target_block_size =
+        numext::maxi<IndexType>(1, static_cast<IndexType>(m_requirements.size));
+
+    // Corner case: one of the dimensions is zero. Logic below is too complex
+    // to handle this case on a general basis, just use unit block size.
+    // Note: we must not yield blocks with zero dimensions (recipe for
+    // overflows/underflows, divisions by zero and NaNs later).
+    if (m_tensor_dimensions.TotalSize() == 0) {
+      for (int i = 0; i < NumDims; ++i) {
+        m_block_dimensions[i] = 1;
+      }
+      return;
+    }
+
+    // If tensor fits into a target block size, evaluate it as a single block.
+    if (m_tensor_dimensions.TotalSize() <= target_block_size) {
+      m_block_dimensions = m_tensor_dimensions;
+      return;
+    }
+
+    static const bool isColMajor = Layout == static_cast<int>(ColMajor);
+
+    // Block shape skewed towards inner dimension.
+    if (shape_type == TensorBlockV2ShapeType::kSkewedInnerDims) {
+      IndexType coeff_to_allocate = target_block_size;
+
+      for (int i = 0; i < NumDims; ++i) {
+        const int dim = isColMajor ? i : NumDims - i - 1;
+        m_block_dimensions[dim] =
+            numext::mini(coeff_to_allocate, m_tensor_dimensions[dim]);
+        coeff_to_allocate = divup(
+            coeff_to_allocate,
+            numext::maxi(static_cast<IndexType>(1), m_block_dimensions[dim]));
+      }
+      eigen_assert(coeff_to_allocate == 1);
+
+    } else if (shape_type == TensorBlockV2ShapeType::kUniformAllDims) {
+      // Tensor will not fit within 'target_block_size' budget: calculate tensor
+      // block dimension sizes based on "square" dimension size target.
+      const IndexType dim_size_target = convert_index<IndexType>(
+          std::pow(static_cast<float>(target_block_size),
+                   1.0f / static_cast<float>(m_block_dimensions.rank())));
+
+      for (int i = 0; i < NumDims; ++i) {
+        // TODO(andydavis) Adjust the inner most 'block_dim_size' to make it
+        // a multiple of the packet size. Note that reducing
+        // 'block_dim_size' in this manner can increase the number of
+        // blocks, and so will amplify any per-block overhead.
+        m_block_dimensions[i] =
+            numext::mini(dim_size_target, m_tensor_dimensions[i]);
+      }
+
+      // Add any un-allocated coefficients to inner dimension(s).
+      IndexType total_size = m_block_dimensions.TotalSize();
+      for (int i = 0; i < NumDims; ++i) {
+        const int dim = isColMajor ? i : NumDims - i - 1;
+
+        if (m_block_dimensions[dim] < m_tensor_dimensions[dim]) {
+          const IndexType total_size_other_dims =
+              total_size / m_block_dimensions[dim];
+          const IndexType alloc_avail =
+              divup<IndexType>(target_block_size, total_size_other_dims);
+          if (alloc_avail == m_block_dimensions[dim]) {
+            // Insufficient excess coefficients to allocate.
+            break;
+          }
+          m_block_dimensions[dim] =
+              numext::mini(m_tensor_dimensions[dim], alloc_avail);
+          total_size = total_size_other_dims * m_block_dimensions[dim];
+        }
+      }
+
+    } else {
+      eigen_assert(false);  // unknown block shape
+    }
+
+    eigen_assert(m_block_dimensions.TotalSize() >=
+                 numext::mini<IndexType>(target_block_size,
+                                     m_tensor_dimensions.TotalSize()));
+  }
+
+  DSizes<IndexType, NumDims> m_tensor_dimensions;
+  TensorBlockV2ResourceRequirements m_requirements;
+
+  DSizes<IndexType, NumDims> m_block_dimensions;
+  IndexType m_total_block_count;
+
+  DSizes<IndexType, NumDims> m_tensor_strides;
+  DSizes<IndexType, NumDims> m_block_strides;
+};
+
+// -------------------------------------------------------------------------- //
 // TensorBlockScratchAllocator is responsible for allocating temporary buffers
 // for block evaluation (output or input block materialization). Given that
 // Eigen expression traversal order is deterministic, all temporary allocations
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
index 9b835c4de..268c3246a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
@@ -447,13 +447,6 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
     RawAccess     = false
   };
 
-  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
-
-  typedef internal::TensorBlock<ScalarNoConst, Index, NumInputDims, Layout>
-      InputTensorBlock;
-  typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>
-      OutputTensorBlock;
-
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
   //===--------------------------------------------------------------------===//
@@ -506,50 +499,6 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
     }
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
-      const OutputTensorBlock& output_block) {
-    // Calculate input block sizes.
-    const DSizes<Index, NumDims>& output_block_sizes =
-        output_block.block_sizes();
-    const DSizes<Index, NumDims>& output_block_strides =
-        output_block.block_strides();
-    const Index chip_dim = this->m_dim.actualDim();
-    DSizes<Index, NumInputDims> input_block_sizes;
-    DSizes<Index, NumInputDims> input_block_strides;
-    for (Index i = 0; i < NumInputDims; ++i) {
-      if (i < chip_dim) {
-        input_block_sizes[i] = output_block_sizes[i];
-        input_block_strides[i] = output_block_strides[i];
-      } else if (i > chip_dim) {
-        input_block_sizes[i] = output_block_sizes[i - 1];
-        input_block_strides[i] = output_block_strides[i - 1];
-      } else {
-        input_block_sizes[i] = 1;
-      }
-    }
-    // Fix up input_block_stride for chip dimension.
-    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-      if (chip_dim == 0) {
-        input_block_strides[chip_dim] = 1;
-      } else {
-        input_block_strides[chip_dim] =
-            input_block_strides[chip_dim - 1] * input_block_sizes[chip_dim - 1];
-      }
-    } else {
-      if (chip_dim == NumInputDims - 1) {
-        input_block_strides[chip_dim] = 1;
-      } else {
-        input_block_strides[chip_dim] =
-            input_block_strides[chip_dim + 1] * input_block_sizes[chip_dim + 1];
-      }
-    }
-    // Write input block.
-    this->m_impl.writeBlock(InputTensorBlock(
-        this->srcCoeff(output_block.first_coeff_index()), input_block_sizes,
-        input_block_strides, this->m_inputStrides,
-        const_cast<ScalarNoConst*>(output_block.data())));
-  }
-
   template <typename TensorBlockV2>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlockV2(
       const TensorBlockDesc& desc, const TensorBlockV2& block) {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index 4085ad314..613a8347d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -471,8 +471,6 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
   typedef StorageMemory<CoeffReturnType, Device> Storage;
   typedef typename Storage::Type EvaluatorPointerType;
   static const int NumDims = internal::array_size<Dimensions>::value;
-  typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>
-      TensorBlock;
 
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
@@ -593,11 +591,6 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
   static const int NumDims = internal::array_size<
       typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
 
-  typedef internal::TensorBlock<
-      typename internal::remove_const<Scalar>::type, Index, NumDims,
-      TensorEvaluator<LeftArgType, Device>::Layout>
-      TensorBlock;
-
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index db123d8a4..7b7b670ed 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -172,9 +172,8 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
   EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE void run(const Expression& expr,
                          const DefaultDevice& device = DefaultDevice()) {
-    typedef TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlock;
-    typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper;
-    typedef typename TensorBlock::Dimensions TensorBlockDimensions;
+    typedef TensorBlockV2Mapper<NumDims, Evaluator::Layout, StorageIndex>
+        TensorBlockMapper;
 
     typedef internal::TensorBlockDescriptor<NumDims, StorageIndex>
         TensorBlockDesc;
@@ -192,17 +191,15 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
           evaluator.getResourceRequirements();
 
       const TensorBlockMapper block_mapper(
-          TensorBlockDimensions(evaluator.dimensions()), requirements.shapeV1(),
-          requirements.size);
+          typename TensorBlockDesc::Dimensions(evaluator.dimensions()),
+          requirements);
 
       // Share scratch memory allocator between all blocks.
       TensorBlockScratch scratch(device);
 
-      const StorageIndex total_block_count = block_mapper.total_block_count();
+      const StorageIndex total_block_count = block_mapper.blockCount();
       for (StorageIndex i = 0; i < total_block_count; ++i) {
-        TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
-
-        TensorBlockDesc desc(block.first_coeff_index(), block.block_sizes());
+        TensorBlockDesc desc = block_mapper.blockDescriptor(i);
         evaluator.evalBlockV2(desc, scratch);
         scratch.reset();
       }
@@ -226,8 +223,6 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
 
 template <typename TensorBlockMapper>
 struct TensorExecutorTilingContext {
-  typedef typename TensorBlockMapper::Block TensorBlock;
-
   TensorExecutorTilingContext() : buffer(nullptr) {}
   TensorExecutorTilingContext(const TensorBlockMapper& b_mapper,
                               const TensorOpCost& b_cost, void* b_buffer,
@@ -274,9 +269,9 @@ TensorExecutorTilingContext<TensorBlockMapper> GetTensorExecutorTilingContext(
 
   TensorBlockMapper block_mapper(
       typename TensorBlockMapper::Dimensions(evaluator.dimensions()),
-      requirements.shapeV1(), block_size);
+      requirements);
 
-  block_size = block_mapper.block_dims_total_size();
+  block_size = block_mapper.blockTotalSize();
   const size_t align = numext::maxi(EIGEN_MAX_ALIGN_BYTES, 1);
   const size_t aligned_blocksize =
       align *
@@ -382,9 +377,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
   static const int NumDims = traits<Expression>::NumDimensions;
 
   typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
-  typedef TensorBlockMapper<ScalarNoConst, IndexType, NumDims,
-                            Evaluator::Layout>
-      BlockMapper;
+  typedef TensorBlockV2Mapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
   typedef TensorExecutorTilingContext<BlockMapper> TilingContext;
 
   typedef internal::TensorBlockDescriptor<NumDims, IndexType>
@@ -408,14 +401,13 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
         TensorBlockScratch scratch(device);
 
         for (IndexType block_idx = firstBlockIdx; block_idx < lastBlockIdx; ++block_idx) {
-          auto block = tiling.block_mapper.GetBlockForIndex(block_idx, nullptr);
-          TensorBlockDesc desc(block.first_coeff_index(), block.block_sizes());
+          TensorBlockDesc desc = tiling.block_mapper.blockDescriptor(block_idx);
           evaluator.evalBlockV2(desc, scratch);
           scratch.reset();
         }
       };
 
-      device.parallelFor(tiling.block_mapper.total_block_count(), tiling.cost,
+      device.parallelFor(tiling.block_mapper.blockCount(), tiling.cost,
                          eval_block);
     }
     evaluator.cleanup();
@@ -486,9 +478,7 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
   static const int NumDims = traits<Expression>::NumDimensions;
 
   typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
-  typedef TensorBlockMapper<ScalarNoConst, IndexType, NumDims,
-                            Evaluator::Layout>
-      BlockMapper;
+  typedef TensorBlockV2Mapper<NumDims, Evaluator::Layout, IndexType> BlockMapper;
   typedef TensorExecutorTilingContext<BlockMapper> TilingContext;
 
   typedef internal::TensorBlockDescriptor<NumDims, IndexType> TensorBlockDesc;
@@ -518,14 +508,13 @@ class TensorAsyncExecutor<Expression, ThreadPoolDevice, DoneCallback,
 
         for (IndexType block_idx = firstBlockIdx; block_idx < lastBlockIdx;
              ++block_idx) {
-          auto block =
-              ctx->tiling.block_mapper.GetBlockForIndex(block_idx, nullptr);
-          TensorBlockDesc desc(block.first_coeff_index(), block.block_sizes());
+          TensorBlockDesc desc =
+              ctx->tiling.block_mapper.blockDescriptor(block_idx);
           ctx->evaluator.evalBlockV2(desc, scratch);
           scratch.reset();
         }
       };
-      ctx->device.parallelForAsync(ctx->tiling.block_mapper.total_block_count(),
+      ctx->device.parallelForAsync(ctx->tiling.block_mapper.blockCount(),
                                    ctx->tiling.cost, eval_block, [ctx]() { delete ctx; });
     };
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
index bed7a1b00..2bbbbcf37 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
@@ -102,9 +102,6 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
 
   typedef internal::TensorIntDivisor<Index> IndexDivisor;
 
-  typedef internal::TensorBlock<CoeffReturnType, Index, NumDims, Layout>
-      TensorBlock;
-
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
index 959e77e01..5010d5c95 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
@@ -238,9 +238,6 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
     RawAccess         = false
   };
 
-  typedef internal::TensorBlock<Scalar, Index, NumDims, Layout>
-      OutputTensorBlock;
-
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockNotImplemented TensorBlockV2;
   //===--------------------------------------------------------------------===//
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index 7299cdcdb..7697add4b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -465,9 +465,6 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
 
   typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
 
-  typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout> TensorBlock;
-  typedef typename TensorBlock::Dimensions TensorBlockDimensions;
-
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
@@ -757,9 +754,6 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
 
   typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
 
-  typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout> TensorBlock;
-  typedef typename TensorBlock::Dimensions TensorBlockDimensions;
-
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
@@ -829,14 +823,6 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
     }
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
-      const TensorBlock& block) {
-    this->m_impl.writeBlock(TensorBlock(
-        this->srcCoeff(block.first_coeff_index()), block.block_sizes(),
-        block.block_strides(), TensorBlockDimensions(this->m_inputStrides),
-        const_cast<ScalarNoConst*>(block.data())));
-  }
-
   template<typename TensorBlockV2>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlockV2(
       const TensorBlockDesc& desc, const TensorBlockV2& block) {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
index 0d18cfc36..68699351b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
@@ -124,10 +124,6 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
 
   typedef internal::TensorIntDivisor<Index> IndexDivisor;
 
-  typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
-  typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>
-      OutputTensorBlock;
-
   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
@@ -252,9 +248,8 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
   internal::TensorBlockV2ResourceRequirements getResourceRequirements() const {
     const size_t target_block_size =
         numext::maxi<size_t>(1, m_device.lastLevelCacheSize() / sizeof(Scalar));
-    return internal::TensorBlockV2ResourceRequirements::merge(
-        {internal::TensorBlockV2ShapeType::kSkewedInnerDims, target_block_size},
-        m_impl.getResourceRequirements());
+    return {internal::TensorBlockV2ShapeType::kSkewedInnerDims,
+            target_block_size};
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockV2
diff --git a/unsupported/test/cxx11_tensor_block_access.cpp b/unsupported/test/cxx11_tensor_block_access.cpp
index 8d3ca84c8..b56601ebd 100644
--- a/unsupported/test/cxx11_tensor_block_access.cpp
+++ b/unsupported/test/cxx11_tensor_block_access.cpp
@@ -19,6 +19,7 @@ using Eigen::Tensor;
 using Eigen::Index;
 using Eigen::RowMajor;
 using Eigen::ColMajor;
+using Eigen::internal::TensorBlockV2ShapeType;
 
 
 template<typename T>
@@ -26,15 +27,15 @@ static const T& choose(int layout, const T& col, const T& row) {
   return layout == ColMajor ? col : row;
 }
 
-static internal::TensorBlockShapeType RandomShape() {
+static TensorBlockV2ShapeType RandomShape() {
   return internal::random<bool>()
-             ? internal::kUniformAllDims
-             : internal::kSkewedInnerDims;
+         ? TensorBlockV2ShapeType::kUniformAllDims
+         : TensorBlockV2ShapeType::kSkewedInnerDims;
 }
 
 template <int NumDims>
-static Index RandomTargetSize(const DSizes<Index, NumDims>& dims) {
-  return internal::random<Index>(1, dims.TotalSize());
+static size_t RandomTargetSize(const DSizes<Index, NumDims>& dims) {
+  return internal::random<size_t>(1, dims.TotalSize());
 }
 
 template <int NumDims>
@@ -66,55 +67,43 @@ static void Debug(DSizes<Index, NumDims> dims) {
 template <int Layout>
 static void test_block_mapper_sanity()
 {
-  typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper;
+  typedef internal::TensorBlockV2Mapper<2, Layout> TensorBlockMapper;
 
   DSizes<Index, 2> tensor_dims(100, 100);
 
   // Test uniform blocks.
   TensorBlockMapper uniform_block_mapper(
-      tensor_dims, internal::kUniformAllDims, 100);
+      tensor_dims, {TensorBlockV2ShapeType::kUniformAllDims, 100});
 
-  VERIFY_IS_EQUAL(uniform_block_mapper.total_block_count(), 100);
-  VERIFY_IS_EQUAL(uniform_block_mapper.block_dims_total_size(), 100);
+  VERIFY_IS_EQUAL(uniform_block_mapper.blockCount(), 100);
+  VERIFY_IS_EQUAL(uniform_block_mapper.blockTotalSize(), 100);
 
   // 10x10 blocks
-  typename TensorBlockMapper::Block uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, NULL);
-  VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(0), 10);
-  VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(1), 10);
-  // Depending on a layout we stride by cols rows.
-  VERIFY_IS_EQUAL(uniform_b0.block_strides().at(0), choose(Layout, 1, 10));
-  VERIFY_IS_EQUAL(uniform_b0.block_strides().at(1), choose(Layout, 10, 1));
-  // Tensor strides depend only on a layout and not on the block size.
-  VERIFY_IS_EQUAL(uniform_b0.tensor_strides().at(0), choose(Layout, 1, 100));
-  VERIFY_IS_EQUAL(uniform_b0.tensor_strides().at(1), choose(Layout, 100, 1));
+  auto uniform_b0 = uniform_block_mapper.blockDescriptor(0);
+  VERIFY_IS_EQUAL(uniform_b0.dimensions().at(0), 10);
+  VERIFY_IS_EQUAL(uniform_b0.dimensions().at(1), 10);
 
   // Test skewed to inner dims blocks.
   TensorBlockMapper skewed_block_mapper(
-      tensor_dims, internal::kSkewedInnerDims, 100);
+      tensor_dims, {TensorBlockV2ShapeType::kSkewedInnerDims, 100});
 
-  VERIFY_IS_EQUAL(skewed_block_mapper.total_block_count(), 100);
-  VERIFY_IS_EQUAL(skewed_block_mapper.block_dims_total_size(), 100);
+  VERIFY_IS_EQUAL(skewed_block_mapper.blockCount(), 100);
+  VERIFY_IS_EQUAL(skewed_block_mapper.blockTotalSize(), 100);
 
   // 1x100 (100x1) rows/cols depending on a tensor layout.
-  typename TensorBlockMapper::Block skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, NULL);
-  VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(0), choose(Layout, 100, 1));
-  VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(1), choose(Layout, 1, 100));
-  // Depending on a layout we stride by cols rows.
-  VERIFY_IS_EQUAL(skewed_b0.block_strides().at(0), choose(Layout, 1, 100));
-  VERIFY_IS_EQUAL(skewed_b0.block_strides().at(1), choose(Layout, 100, 1));
-  // Tensor strides depend only on a layout and not on the block size.
-  VERIFY_IS_EQUAL(skewed_b0.tensor_strides().at(0), choose(Layout, 1, 100));
-  VERIFY_IS_EQUAL(skewed_b0.tensor_strides().at(1), choose(Layout, 100, 1));
+  auto skewed_b0 = skewed_block_mapper.blockDescriptor(0);
+  VERIFY_IS_EQUAL(skewed_b0.dimensions().at(0), choose(Layout, 100, 1));
+  VERIFY_IS_EQUAL(skewed_b0.dimensions().at(1), choose(Layout, 1, 100));
 }
 
 // Given a TensorBlock "visit" every element accessible though it, and a keep an
 // index in the visited set. Verify that every coeff accessed only once.
-template <typename T, int Layout, int NumDims>
+template<int NumDims, int Layout>
 static void UpdateCoeffSet(
-    const internal::TensorBlock<T, Index, NumDims, Layout>& block,
+    const DSizes<Index, NumDims>& tensor_strides,
+    const internal::TensorBlockDescriptor<NumDims>& block,
     Index first_coeff_index, int dim_index, std::set<Index>* visited_coeffs) {
-  const DSizes<Index, NumDims>& block_sizes = block.block_sizes();
-  const DSizes<Index, NumDims>& tensor_strides = block.tensor_strides();
+  const DSizes<Index, NumDims>& block_sizes = block.dimensions();
 
   for (int i = 0; i < block_sizes[dim_index]; ++i) {
     if (tensor_strides[dim_index] == 1) {
@@ -123,7 +112,7 @@ static void UpdateCoeffSet(
       VERIFY_IS_EQUAL(inserted.second, true);
     } else {
       int next_dim_index = dim_index + choose(Layout, -1, 1);
-      UpdateCoeffSet<T, Layout, NumDims>(block, first_coeff_index,
+      UpdateCoeffSet<NumDims, Layout>(tensor_strides, block, first_coeff_index,
                                          next_dim_index, visited_coeffs);
       first_coeff_index += tensor_strides[dim_index];
     }
@@ -132,22 +121,22 @@ static void UpdateCoeffSet(
 
 template <typename T, int NumDims, int Layout>
 static void test_block_mapper_maps_every_element() {
-  typedef internal::TensorBlock<T, Index, NumDims, Layout> TensorBlock;
-  typedef internal::TensorBlockMapper<T, Index, NumDims, Layout> TensorBlockMapper;
+  typedef internal::TensorBlockV2Mapper<NumDims, Layout> TensorBlockMapper;
 
   DSizes<Index, NumDims> dims = RandomDims<NumDims>();
+  DSizes<Index, NumDims> strides = internal::strides<Layout>(dims);
 
   // Keep track of elements indices available via block access.
   std::set<Index> coeff_set;
 
   // Try different combinations of block types and sizes.
-  TensorBlockMapper block_mapper(dims, RandomShape(), RandomTargetSize(dims));
+  TensorBlockMapper block_mapper(dims, {RandomShape(), RandomTargetSize(dims)});
 
-  for (int i = 0; i < block_mapper.total_block_count(); ++i) {
-    TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
-    UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(),
-                                       choose(Layout, NumDims - 1, 0),
-                                       &coeff_set);
+  for (int i = 0; i < block_mapper.blockCount(); ++i) {
+    auto block = block_mapper.blockDescriptor(i);
+    UpdateCoeffSet<NumDims, Layout>(strides, block, block.offset(),
+                                    choose(Layout, NumDims - 1, 0),
+                                    &coeff_set);
   }
 
   // Verify that every coefficient in the original Tensor is accessible through
@@ -237,20 +226,21 @@ public:
 template <int Layout>
 static void test_uniform_block_shape()
 {
-  typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock;
-  typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper;
+  typedef internal::TensorBlockDescriptor<5> TensorBlock;
+  typedef internal::TensorBlockV2Mapper<5, Layout> TensorBlockMapper;
 
   {
     // Test shape 'UniformAllDims' with uniform 'max_coeff count'.
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const Index max_coeff_count = 5 * 5 * 5 * 5 * 5;
-    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
     for (int i = 0; i < 5; ++i) {
-      VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
+      VERIFY_IS_EQUAL(5, block.dimensions()[i]);
     }
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   }
 
   // Test shape 'UniformAllDims' with larger 'max_coeff count' which spills
@@ -258,25 +248,27 @@ static void test_uniform_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const Index max_coeff_count = 7 * 5 * 5 * 5 * 5;
-    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(7, block.dimensions()[0]);
     for (int i = 1; i < 5; ++i) {
-      VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
+      VERIFY_IS_EQUAL(5, block.dimensions()[i]);
     }
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const Index max_coeff_count = 5 * 5 * 5 * 5 * 6;
-    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(6, block.dimensions()[4]);
     for (int i = 3; i >= 0; --i) {
-      VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
+      VERIFY_IS_EQUAL(5, block.dimensions()[i]);
     }
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   }
 
   // Test shape 'UniformAllDims' with larger 'max_coeff count' which spills
@@ -284,25 +276,27 @@ static void test_uniform_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const Index max_coeff_count = 11 * 5 * 5 * 5 * 5;
-    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(11, block.dimensions()[0]);
     for (int i = 1; i < 5; ++i) {
-      VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
+      VERIFY_IS_EQUAL(5, block.dimensions()[i]);
     }
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const Index max_coeff_count = 5 * 5 * 5 * 5 * 7;
-    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(7, block.dimensions()[4]);
     for (int i = 3; i >= 0; --i) {
-      VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
+      VERIFY_IS_EQUAL(5, block.dimensions()[i]);
     }
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   }
 
   // Test shape 'UniformAllDims' with larger 'max_coeff count' which spills
@@ -310,111 +304,119 @@ static void test_uniform_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(7, 5, 6, 17, 7);
     const Index max_coeff_count = 7 * 5 * 6 * 7 * 5;
-    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
-    VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
-    VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
-    VERIFY_IS_EQUAL(7, block.block_sizes()[3]);
-    VERIFY_IS_EQUAL(5, block.block_sizes()[4]);
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(7, block.dimensions()[0]);
+    VERIFY_IS_EQUAL(5, block.dimensions()[1]);
+    VERIFY_IS_EQUAL(6, block.dimensions()[2]);
+    VERIFY_IS_EQUAL(7, block.dimensions()[3]);
+    VERIFY_IS_EQUAL(5, block.dimensions()[4]);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(7, 5, 6, 9, 7);
     const Index max_coeff_count = 5 * 5 * 5 * 6 * 7;
-    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
-    VERIFY_IS_EQUAL(6, block.block_sizes()[3]);
-    VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
-    VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
-    VERIFY_IS_EQUAL(5, block.block_sizes()[0]);
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(7, block.dimensions()[4]);
+    VERIFY_IS_EQUAL(6, block.dimensions()[3]);
+    VERIFY_IS_EQUAL(5, block.dimensions()[2]);
+    VERIFY_IS_EQUAL(5, block.dimensions()[1]);
+    VERIFY_IS_EQUAL(5, block.dimensions()[0]);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   }
 
   // Test shape 'UniformAllDims' with full allocation to all dims.
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(7, 5, 6, 17, 7);
     const Index max_coeff_count = 7 * 5 * 6 * 17 * 7;
-    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
-    VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
-    VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
-    VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
-    VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(7, block.dimensions()[0]);
+    VERIFY_IS_EQUAL(5, block.dimensions()[1]);
+    VERIFY_IS_EQUAL(6, block.dimensions()[2]);
+    VERIFY_IS_EQUAL(17, block.dimensions()[3]);
+    VERIFY_IS_EQUAL(7, block.dimensions()[4]);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(7, 5, 6, 9, 7);
     const Index max_coeff_count = 7 * 5 * 6 * 9 * 7;
-    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
-    VERIFY_IS_EQUAL(9, block.block_sizes()[3]);
-    VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
-    VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
-    VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kUniformAllDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(7, block.dimensions()[4]);
+    VERIFY_IS_EQUAL(9, block.dimensions()[3]);
+    VERIFY_IS_EQUAL(6, block.dimensions()[2]);
+    VERIFY_IS_EQUAL(5, block.dimensions()[1]);
+    VERIFY_IS_EQUAL(7, block.dimensions()[0]);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   }
 }
 
 template <int Layout>
 static void test_skewed_inner_dim_block_shape()
 {
-  typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock;
-  typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper;
+  typedef internal::TensorBlockDescriptor<5> TensorBlock;
+  typedef internal::TensorBlockV2Mapper<5, Layout> TensorBlockMapper;
 
   // Test shape 'SkewedInnerDims' with partial allocation to inner-most dim.
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const Index max_coeff_count = 10 * 1 * 1 * 1 * 1;
-    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(10, block.block_sizes()[0]);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(10, block.dimensions()[0]);
     for (int i = 1; i < 5; ++i) {
-      VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
+      VERIFY_IS_EQUAL(1, block.dimensions()[i]);
     }
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const Index max_coeff_count = 1 * 1 * 1 * 1 * 6;
-    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(6, block.dimensions()[4]);
     for (int i = 3; i >= 0; --i) {
-      VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
+      VERIFY_IS_EQUAL(1, block.dimensions()[i]);
     }
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   }
 
   // Test shape 'SkewedInnerDims' with full allocation to inner-most dim.
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const Index max_coeff_count = 11 * 1 * 1 * 1 * 1;
-    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(11, block.dimensions()[0]);
     for (int i = 1; i < 5; ++i) {
-      VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
+      VERIFY_IS_EQUAL(1, block.dimensions()[i]);
     }
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const Index max_coeff_count = 1 * 1 * 1 * 1 * 7;
-    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(7, block.dimensions()[4]);
     for (int i = 3; i >= 0; --i) {
-      VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
+      VERIFY_IS_EQUAL(1, block.dimensions()[i]);
     }
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   }
 
   // Test shape 'SkewedInnerDims' with full allocation to inner-most dim,
@@ -422,27 +424,29 @@ static void test_skewed_inner_dim_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const Index max_coeff_count = 11 * 3 * 1 * 1 * 1;
-    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
-    VERIFY_IS_EQUAL(3, block.block_sizes()[1]);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(11, block.dimensions()[0]);
+    VERIFY_IS_EQUAL(3, block.dimensions()[1]);
     for (int i = 2; i < 5; ++i) {
-      VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
+      VERIFY_IS_EQUAL(1, block.dimensions()[i]);
     }
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const Index max_coeff_count = 1 * 1 * 1 * 15 * 7;
-    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
-    VERIFY_IS_EQUAL(15, block.block_sizes()[3]);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(7, block.dimensions()[4]);
+    VERIFY_IS_EQUAL(15, block.dimensions()[3]);
     for (int i = 2; i >= 0; --i) {
-      VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
+      VERIFY_IS_EQUAL(1, block.dimensions()[i]);
     }
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   }
 
   // Test shape 'SkewedInnerDims' with full allocation to inner-most dim,
@@ -450,61 +454,65 @@ static void test_skewed_inner_dim_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const Index max_coeff_count = 11 * 5 * 5 * 1 * 1;
-    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
-    VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
-    VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(11, block.dimensions()[0]);
+    VERIFY_IS_EQUAL(5, block.dimensions()[1]);
+    VERIFY_IS_EQUAL(5, block.dimensions()[2]);
     for (int i = 3; i < 5; ++i) {
-      VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
+      VERIFY_IS_EQUAL(1, block.dimensions()[i]);
     }
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const Index max_coeff_count = 1 * 1 * 5 * 17 * 7;
-    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
-    VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
-    VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(7, block.dimensions()[4]);
+    VERIFY_IS_EQUAL(17, block.dimensions()[3]);
+    VERIFY_IS_EQUAL(5, block.dimensions()[2]);
     for (int i = 1; i >= 0; --i) {
-      VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
+      VERIFY_IS_EQUAL(1, block.dimensions()[i]);
     }
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   }
 
   // Test shape 'SkewedInnerDims' with full allocation to all dims.
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
-    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
-    VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
-    VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
-    VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
-    VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(11, block.dimensions()[0]);
+    VERIFY_IS_EQUAL(5, block.dimensions()[1]);
+    VERIFY_IS_EQUAL(6, block.dimensions()[2]);
+    VERIFY_IS_EQUAL(17, block.dimensions()[3]);
+    VERIFY_IS_EQUAL(7, block.dimensions()[4]);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
-    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
-                                   max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
-    VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
-    VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
-    VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
-    VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
-    VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
-    VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
+    TensorBlockMapper
+        block_mapper(dims, {TensorBlockV2ShapeType::kSkewedInnerDims,
+                            max_coeff_count});
+    TensorBlock block = block_mapper.blockDescriptor(0);
+    VERIFY_IS_EQUAL(7, block.dimensions()[4]);
+    VERIFY_IS_EQUAL(17, block.dimensions()[3]);
+    VERIFY_IS_EQUAL(6, block.dimensions()[2]);
+    VERIFY_IS_EQUAL(5, block.dimensions()[1]);
+    VERIFY_IS_EQUAL(11, block.dimensions()[0]);
+    VERIFY(block.dimensions().TotalSize() <= max_coeff_count);
   }
 }
 
 template <int Layout>
-static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
+static void test_empty_dims(const internal::TensorBlockV2ShapeType block_shape)
 {
   // Test blocking of tensors with zero dimensions:
   //  - we must not crash on asserts and divisions by zero
@@ -512,26 +520,28 @@ static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
   //    (recipe for overflows/underflows, divisions by zero and NaNs later)
   //  - total block count must be zero
   {
-    typedef internal::TensorBlockMapper<int, Index, 1, Layout> TensorBlockMapper;
+    typedef internal::TensorBlockV2Mapper<1, Layout> TensorBlockMapper;
+
     DSizes<Index, 1> dims(0);
-    for (int max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
-      TensorBlockMapper block_mapper(dims, block_shape, max_coeff_count);
-      VERIFY_IS_EQUAL(block_mapper.total_block_count(), 0);
-      VERIFY(block_mapper.block_dims_total_size() >= 1);
+    for (size_t max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
+      TensorBlockMapper block_mapper(dims, {block_shape, max_coeff_count});
+      VERIFY_IS_EQUAL(block_mapper.blockCount(), 0);
+      VERIFY(block_mapper.blockTotalSize() >= 1);
     }
   }
 
   {
-    typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper;
+    typedef internal::TensorBlockV2Mapper<2, Layout> TensorBlockMapper;
+
     for (int dim1 = 0; dim1 < 3; ++dim1) {
       for (int dim2 = 0; dim2 < 3; ++dim2) {
         DSizes<Index, 2> dims(dim1, dim2);
-        for (int max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
-          TensorBlockMapper block_mapper(dims, block_shape, max_coeff_count);
+        for (size_t max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
+          TensorBlockMapper block_mapper(dims, {block_shape, max_coeff_count});
           if (dim1 * dim2 == 0) {
-            VERIFY_IS_EQUAL(block_mapper.total_block_count(), 0);
+            VERIFY_IS_EQUAL(block_mapper.blockCount(), 0);
           }
-          VERIFY(block_mapper.block_dims_total_size() >= 1);
+          VERIFY(block_mapper.blockTotalSize() >= 1);
         }
       }
     }
@@ -563,8 +573,8 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_access) {
   TEST_LAYOUTS_AND_DIMS(float, test_block_mapper_maps_every_element);
   TEST_LAYOUTS(test_uniform_block_shape);
   TEST_LAYOUTS(test_skewed_inner_dim_block_shape);
-  TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kUniformAllDims);
-  TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kSkewedInnerDims);
+  TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockV2ShapeType::kUniformAllDims);
+  TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockV2ShapeType::kSkewedInnerDims);
 }
 
 #undef TEST_LAYOUTS
diff --git a/unsupported/test/cxx11_tensor_block_eval.cpp b/unsupported/test/cxx11_tensor_block_eval.cpp
index 086dd8c11..700e84a19 100644
--- a/unsupported/test/cxx11_tensor_block_eval.cpp
+++ b/unsupported/test/cxx11_tensor_block_eval.cpp
@@ -61,21 +61,21 @@ static TensorBlockParams<NumDims> RandomBlock(DSizes<Index, NumDims> dims,
 template <int Layout, int NumDims>
 static TensorBlockParams<NumDims> SkewedInnerBlock(
     DSizes<Index, NumDims> dims) {
-  using BlockMapper = internal::TensorBlockMapper<int, Index, NumDims, Layout>;
+  using BlockMapper = internal::TensorBlockV2Mapper<NumDims, Layout, Index>;
   BlockMapper block_mapper(dims,
-                           internal::TensorBlockShapeType::kSkewedInnerDims,
-                           internal::random<Index>(1, dims.TotalSize()));
+                           {internal::TensorBlockV2ShapeType::kSkewedInnerDims,
+                            internal::random<size_t>(1, dims.TotalSize())});
 
-  Index total_blocks = block_mapper.total_block_count();
+  Index total_blocks = block_mapper.blockCount();
   Index block_index = internal::random<Index>(0, total_blocks - 1);
-  auto block = block_mapper.GetBlockForIndex(block_index, nullptr);
-  DSizes<Index, NumDims> sizes = block.block_sizes();
+  auto block = block_mapper.blockDescriptor(block_index);
+  DSizes<Index, NumDims> sizes = block.dimensions();
 
   auto strides = internal::strides<Layout>(dims);
   DSizes<Index, NumDims> offsets;
 
   // Compute offsets for the first block coefficient.
-  Index index = block.first_coeff_index();
+  Index index = block.offset();
   if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
     for (int i = NumDims - 1; i > 0; --i) {
       const Index idx = index / strides[i];
@@ -92,8 +92,7 @@ static TensorBlockParams<NumDims> SkewedInnerBlock(
     if (NumDims > 0) offsets[NumDims - 1] = index;
   }
 
-  auto desc = TensorBlockDescriptor<NumDims>(block.first_coeff_index(), sizes);
-  return {offsets, sizes, desc};
+  return {offsets, sizes, block};
 }
 
 template <int NumDims>
diff --git a/unsupported/test/cxx11_tensor_block_io.cpp b/unsupported/test/cxx11_tensor_block_io.cpp
index ddda3c7f9..6f318d9fe 100644
--- a/unsupported/test/cxx11_tensor_block_io.cpp
+++ b/unsupported/test/cxx11_tensor_block_io.cpp
@@ -22,14 +22,15 @@ static DSizes<Index, NumDims> RandomDims(Index min, Index max) {
   return DSizes<Index, NumDims>(dims);
 }
 
-static internal::TensorBlockShapeType RandomBlockShape() {
-  return internal::random<bool>() ? internal::kUniformAllDims
-                                  : internal::kSkewedInnerDims;
+static internal::TensorBlockV2ShapeType RandomBlockShape() {
+  return internal::random<bool>()
+         ? internal::TensorBlockV2ShapeType::kUniformAllDims
+         : internal::TensorBlockV2ShapeType::kSkewedInnerDims;
 }
 
 template <int NumDims>
-static Index RandomTargetBlockSize(const DSizes<Index, NumDims>& dims) {
-  return internal::random<Index>(1, dims.TotalSize());
+static size_t RandomTargetBlockSize(const DSizes<Index, NumDims>& dims) {
+  return internal::random<size_t>(1, dims.TotalSize());
 }
 
 template <int Layout, int NumDims>
@@ -73,12 +74,12 @@ static void test_block_io_copy_data_from_source_to_target() {
 
   // Construct a tensor block mapper.
   using TensorBlockMapper =
-      internal::TensorBlockMapper<T, Index, NumDims, Layout>;
-  TensorBlockMapper block_mapper(dims, RandomBlockShape(),
-                                 RandomTargetBlockSize(dims));
+      internal::TensorBlockV2Mapper<NumDims, Layout, Index>;
+  TensorBlockMapper block_mapper(dims, {RandomBlockShape(),
+                                        RandomTargetBlockSize(dims)});
 
   // We will copy data from input to output through this buffer.
-  Tensor<T, NumDims, Layout> block(block_mapper.block_dim_sizes());
+  Tensor<T, NumDims, Layout> block(block_mapper.blockDimensions());
 
   // Precompute strides for TensorBlockIO::Copy.
   auto input_strides = internal::strides<Layout>(dims);
@@ -88,24 +89,23 @@ static void test_block_io_copy_data_from_source_to_target() {
   T* output_data = output.data();
   T* block_data = block.data();
 
-  for (int i = 0; i < block_mapper.total_block_count(); ++i) {
-    using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>;
-    TensorBlock blk = block_mapper.GetBlockForIndex(i, block_data);
+  for (int i = 0; i < block_mapper.blockCount(); ++i) {
+    auto desc = block_mapper.blockDescriptor(i);
 
-    auto blk_dims = blk.block_sizes();
+    auto blk_dims = desc.dimensions();
     auto blk_strides = internal::strides<Layout>(blk_dims);
 
     {
       // Read from input into a block buffer.
       IODst dst(blk_dims, blk_strides, block_data, 0);
-      IOSrc src(input_strides, input_data, blk.first_coeff_index());
+      IOSrc src(input_strides, input_data, desc.offset());
 
       TensorBlockIO::Copy(dst, src);
     }
 
     {
       // Write from block buffer to output.
-      IODst dst(blk_dims, output_strides, output_data, blk.first_coeff_index());
+      IODst dst(blk_dims, output_strides, output_data, desc.offset());
       IOSrc src(blk_strides, block_data, 0);
 
       TensorBlockIO::Copy(dst, src);
@@ -145,12 +145,12 @@ static void test_block_io_copy_using_reordered_dimensions() {
   // Construct a tensor block mapper.
   // NOTE: Tensor block mapper works with shuffled dimensions.
   using TensorBlockMapper =
-      internal::TensorBlockMapper<T, Index, NumDims, Layout>;
-  TensorBlockMapper block_mapper(output_tensor_dims, RandomBlockShape(),
-                                 RandomTargetBlockSize(output_tensor_dims));
+      internal::TensorBlockV2Mapper<NumDims, Layout, Index>;
+  TensorBlockMapper block_mapper(output_tensor_dims, {RandomBlockShape(),
+                                 RandomTargetBlockSize(output_tensor_dims)});
 
   // We will copy data from input to output through this buffer.
-  Tensor<T, NumDims, Layout> block(block_mapper.block_dim_sizes());
+  Tensor<T, NumDims, Layout> block(block_mapper.blockDimensions());
 
   // Precompute strides for TensorBlockIO::Copy.
   auto input_strides = internal::strides<Layout>(dims);
@@ -160,12 +160,11 @@ static void test_block_io_copy_using_reordered_dimensions() {
   T* output_data = output.data();
   T* block_data = block.data();
 
-  for (Index i = 0; i < block_mapper.total_block_count(); ++i) {
-    using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>;
-    TensorBlock blk = block_mapper.GetBlockForIndex(i, block_data);
+  for (Index i = 0; i < block_mapper.blockCount(); ++i) {
+    auto desc = block_mapper.blockDescriptor(i);
 
     const Index first_coeff_index = GetInputIndex<Layout, NumDims>(
-        blk.first_coeff_index(), output_to_input_dim_map, input_strides,
+        desc.offset(), output_to_input_dim_map, input_strides,
         output_strides);
 
     // NOTE: Block dimensions are in the same order as output dimensions.
@@ -174,7 +173,7 @@ static void test_block_io_copy_using_reordered_dimensions() {
     using IODst = typename TensorBlockIO::Dst;
     using IOSrc = typename TensorBlockIO::Src;
 
-    auto blk_dims = blk.block_sizes();
+    auto blk_dims = desc.dimensions();
     auto blk_strides = internal::strides<Layout>(blk_dims);
 
     {
@@ -236,16 +235,13 @@ static void test_block_io_copy_using_reordered_dimensions_do_not_squeeze() {
   float* tensor_data = tensor.data();
   float* block_data = block.data();
 
-  typedef internal::TensorBlock<float, Index, 3, Layout> TensorBlock;
-  TensorBlock blk(0, block_dims, block_strides, tensor_strides, block_data);
-
   using TensorBlockIO = internal::TensorBlockIOV2<float, Index, 3, Layout>;
   using IODst = typename TensorBlockIO::Dst;
   using IOSrc = typename TensorBlockIO::Src;
 
   // Read from a tensor into a block.
-  IODst dst(blk.block_sizes(), block_strides, block_data, 0);
-  IOSrc src(tensor_strides, tensor_data, blk.first_coeff_index());
+  IODst dst(block_dims, block_strides, block_data, 0);
+  IOSrc src(tensor_strides, tensor_data, 0);
 
   TensorBlockIO::Copy(dst, src, /*dst_to_src_dim_map=*/block_to_tensor_dim);
 
@@ -287,16 +283,13 @@ static void test_block_io_copy_using_reordered_dimensions_squeeze() {
   float* tensor_data = tensor.data();
   float* block_data = block.data();
 
-  typedef internal::TensorBlock<float, Index, 4, Layout> TensorBlock;
-  TensorBlock blk(0, block_dims, block_strides, tensor_strides, block_data);
-
   using TensorBlockIO = internal::TensorBlockIOV2<float, Index, 4, Layout>;
   using IODst = typename TensorBlockIO::Dst;
   using IOSrc = typename TensorBlockIO::Src;
 
   // Read from a tensor into a block.
-  IODst dst(blk.block_sizes(), block_strides, block_data, 0);
-  IOSrc src(tensor_strides, tensor_data, blk.first_coeff_index());
+  IODst dst(block_dims, block_strides, block_data, 0);
+  IOSrc src(tensor_strides, tensor_data, 0);
 
   TensorBlockIO::Copy(dst, src, /*dst_to_src_dim_map=*/block_to_tensor_dim);
author	Eugene Zhulenev <ezhulenev@google.com>	2019-12-10 11:58:30 -0800
committer	Eugene Zhulenev <ezhulenev@google.com>	2019-12-10 14:31:44 -0800
commit	dbca11e8805ec07660d8f966a1884ad0be302f15 (patch)
tree	9da1438132a9a40de7ca3abafec2e559eb0449e3
parent	c49f0d851ab77c9e4d782b453b4b0428bce903d3 (diff)