19 files changed, 236 insertions, 228 deletions
diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h
index 020f939ad..668922ffc 100644
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -43,6 +43,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
     enum {
       RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
       ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+      InnerStrideAtCompileTime = internal::traits<Derived>::InnerStrideAtCompileTime,
       SizeAtCompileTime = Base::SizeAtCompileTime
     };
 
@@ -187,8 +188,11 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
     void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
     {
 #if EIGEN_MAX_ALIGN_BYTES>0
+      // innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value:
+      const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime);
+      EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride);
       eigen_assert((   ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
-                    || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
+                    || (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
 #endif
     }
 
diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h
index 4bb95eb8b..354e33de5 100644
--- a/Eigen/src/SuperLUSupport/SuperLUSupport.h
+++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h
@@ -297,8 +297,8 @@ SluMatrix asSluMatrix(MatrixType& mat)
 template<typename Scalar, int Flags, typename Index>
 MappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat)
 {
-  eigen_assert((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR
-         || (Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC);
+  eigen_assert(((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR)
+         || ((Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC));
 
   Index outerSize = (Flags&RowMajor)==RowMajor ? sluMat.ncol : sluMat.nrow;
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index 322260011..24a6343e8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -91,7 +91,7 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements(
   *block_total_size = resources[0].block_total_size;
   for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) {
     if (resources[i].block_shape == kSkewedInnerDims &&
-        *block_shape ! kSkewedInnerDims) {
+        *block_shape != kSkewedInnerDims) {
       *block_shape = kSkewedInnerDims;
     }
     *block_total_size =
@@ -152,11 +152,11 @@ struct TensorBlockCopyOp {
     const Scalar* src_base = &src_data[src_index];
     Scalar* dst_base = &dst_data[dst_index];
 
-    typedef const Eigen::Array<Scalar, Dynamic, 1> Src;
-    typedef Eigen::Array<Scalar, Dynamic, 1> Dst;
+    typedef const Array<Scalar, Dynamic, 1> Src;
+    typedef Array<Scalar, Dynamic, 1> Dst;
 
-    typedef Eigen::Map<Src, 0, InnerStride<> > SrcMap;
-    typedef Eigen::Map<Dst, 0, InnerStride<> > DstMap;
+    typedef Map<Src, 0, InnerStride<> > SrcMap;
+    typedef Map<Dst, 0, InnerStride<> > DstMap;
 
     const SrcMap src(src_base, num_coeff_to_copy, InnerStride<>(src_stride));
     DstMap dst(dst_base, num_coeff_to_copy, InnerStride<>(dst_stride));
@@ -178,10 +178,8 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout,
           bool BlockRead>
 class TensorBlockIO {
  public:
-  typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
-      TensorBlock;
-  typedef typename TensorBlockCopyOp<Scalar, StorageIndex>
-      TensorBlockCopyOp;
+  typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
+  typedef TensorBlockCopyOp<Scalar, StorageIndex> BlockCopyOp;
 
  protected:
   struct BlockIteratorState {
@@ -194,7 +192,7 @@ class TensorBlockIO {
   };
 
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy(
-      const TensorBlock& block, StorageIndex first_coeff_index,
+      const Block& block, StorageIndex first_coeff_index,
       const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
       const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data,
       Scalar* dst_data) {
@@ -290,8 +288,8 @@ class TensorBlockIO {
     const StorageIndex block_total_size =
         NumDims == 0 ? 1 : block.block_sizes().TotalSize();
     for (StorageIndex i = 0; i < block_total_size; i += block_inner_dim_size) {
-      TensorBlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride,
-                             dst_data, inputIndex, input_stride, src_data);
+      BlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride,
+                       dst_data, inputIndex, input_stride, src_data);
       // Update index.
       for (int j = 0; j < num_squeezed_dims; ++j) {
         if (++block_iter_state[j].count < block_iter_state[j].size) {
@@ -320,13 +318,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
 class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
                                                Layout, /*BlockRead=*/true> {
  public:
-  typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
-      TensorBlock;
-  typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true>
-      Base;
+  typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
+  typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> Base;
 
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      TensorBlock* block, const Scalar* src_data) {
+      Block* block, const Scalar* src_data) {
     array<StorageIndex, NumDims> tensor_to_block_dim_map;
     for (int i = 0; i < NumDims; ++i) {
       tensor_to_block_dim_map[i] = i;
@@ -336,7 +332,7 @@ class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
   }
 
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      TensorBlock* block, StorageIndex first_coeff_index,
+      Block* block, StorageIndex first_coeff_index,
       const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
       const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data) {
     Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map,
@@ -357,13 +353,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
 class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
                                                Layout, /*BlockRead=*/false> {
  public:
-  typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
-      TensorBlock;
-  typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false>
-      Base;
+  typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
+  typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> Base;
 
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const TensorBlock& block, Scalar* dst_data) {
+      const Block& block, Scalar* dst_data) {
     array<StorageIndex, NumDims> tensor_to_block_dim_map;
     for (int i = 0; i < NumDims; ++i) {
       tensor_to_block_dim_map[i] = i;
@@ -373,7 +367,7 @@ class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
   }
 
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const TensorBlock& block, StorageIndex first_coeff_index,
+      const Block& block, StorageIndex first_coeff_index,
       const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
       const array<StorageIndex, NumDims>& tensor_strides, Scalar* dst_data) {
     Base::Copy(block, first_coeff_index, tensor_to_block_dim_map,
@@ -401,13 +395,13 @@ struct TensorBlockCwiseBinaryOp {
       const StorageIndex left_stride, const LeftScalar* left_data,
       const StorageIndex right_index, const StorageIndex right_stride,
       const RightScalar* right_data) {
-    typedef const Eigen::Array<LeftScalar, Dynamic, 1> Lhs;
-    typedef const Eigen::Array<RightScalar, Dynamic, 1> Rhs;
-    typedef Eigen::Array<OutputScalar, Dynamic, 1> Out;
+    typedef const Array<LeftScalar, Dynamic, 1> Lhs;
+    typedef const Array<RightScalar, Dynamic, 1> Rhs;
+    typedef Array<OutputScalar, Dynamic, 1> Out;
 
-    typedef Eigen::Map<Lhs, 0, InnerStride<> > LhsMap;
-    typedef Eigen::Map<Rhs, 0, InnerStride<> > RhsMap;
-    typedef Eigen::Map<Out, 0, InnerStride<> > OutMap;
+    typedef Map<Lhs, 0, InnerStride<> > LhsMap;
+    typedef Map<Rhs, 0, InnerStride<> > RhsMap;
+    typedef Map<Out, 0, InnerStride<> > OutMap;
 
     const LeftScalar* lhs_base = &left_data[left_index];
     const RightScalar* rhs_base = &right_data[right_index];
@@ -417,8 +411,7 @@ struct TensorBlockCwiseBinaryOp {
     const RhsMap rhs(rhs_base, num_coeff, InnerStride<>(right_stride));
     OutMap out(out_base, num_coeff, InnerStride<>(output_stride));
 
-    out =
-        Eigen::CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor);
+    out = CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor);
   }
 };
 
@@ -434,8 +427,7 @@ struct TensorBlockCwiseBinaryOp {
 template <typename BinaryFunctor, typename StorageIndex, typename OutputScalar,
           int NumDims, int Layout>
 struct TensorBlockCwiseBinaryIO {
-  typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims,
-                                         Layout>::Dimensions Dimensions;
+  typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims, Layout>::Dimensions Dimensions;
 
   struct BlockIteratorState {
     StorageIndex output_stride, output_span;
@@ -627,8 +619,7 @@ struct TensorBlockView {
 template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
 class TensorBlockMapper {
  public:
-  typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
-      TensorBlock;
+  typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
   typedef DSizes<StorageIndex, NumDims> Dimensions;
 
   TensorBlockMapper(const Dimensions& dims,
@@ -663,7 +654,7 @@ class TensorBlockMapper {
     }
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
   GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
     StorageIndex first_coeff_index = 0;
     DSizes<StorageIndex, NumDims> coords;
@@ -711,8 +702,7 @@ class TensorBlockMapper {
       }
     }
 
-    return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides,
-                       data);
+    return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
@@ -818,8 +808,7 @@ class TensorBlockMapper {
 template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
 class TensorSliceBlockMapper {
  public:
-  typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
-      TensorBlock;
+  typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
   typedef DSizes<StorageIndex, NumDims> Dimensions;
 
   TensorSliceBlockMapper(const Dimensions& tensor_dims,
@@ -860,7 +849,7 @@ class TensorSliceBlockMapper {
     }
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
   GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
     StorageIndex first_coeff_index = 0;
     DSizes<StorageIndex, NumDims> coords;
@@ -917,8 +906,7 @@ class TensorSliceBlockMapper {
       }
     }
 
-    return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides,
-                       data);
+    return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index a023718c6..2ca91709f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -152,13 +152,7 @@ struct TensorContractionParams {
 //   1. Elementwise Relu transformation following Conv2D.
 //   2. AddBias to the Conv2D output channels dimension.
 //
-// See expected implementation in NoOpOutputKernel.
-struct OutputKernel {
-  template <typename Index, typename Scalar>
-  typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
-};
-
-// Output kernel that does absolutely nothing.
+// The NoOpOutputKernel implements an output kernel that does absolutely nothing.
 struct NoOpOutputKernel {
   /**
    * Tensor contraction evaluator calls this kernel after finishing each block
@@ -177,7 +171,7 @@ struct NoOpOutputKernel {
    */
   template <typename Index, typename Scalar>
   EIGEN_ALWAYS_INLINE void operator()(
-      const OutputKernel::OutputMapper<Index, Scalar>& /*output_mapper*/,
+      const internal::blas_data_mapper<Scalar, Index, ColMajor>& /*output_mapper*/,
       const TensorContractionParams& /*params*/, Index /*i*/,
       Index /*j*/, Index /*num_rows*/, Index /*num_cols*/) const {}
 };
@@ -666,7 +660,7 @@ struct TensorContractionEvaluatorBase
 
           // call gebp (matrix kernel)
           // The parameters here are copied from Eigen's GEMM implementation
-          const auto output_mapper = output.getSubMapper(i2, j2);
+          const OutputMapper output_mapper = output.getSubMapper(i2, j2);
           gebp(output_mapper, blockA, blockB, actual_mc, actual_kc, actual_nc,
                Scalar(1), -1, -1, 0, 0);
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
index 47b5a5a5e..cbec5e9b4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
@@ -88,6 +88,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  typedef typename PointerType<CoeffReturnType, Device>::Type PointerT;
 
   enum {
     IsAligned = false,
@@ -106,12 +107,12 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) {
     if (data) {
       evalTo(data);
       return false;
     } else {
-      m_result = static_cast<CoeffReturnType*>(
+      m_result = static_cast<PointerT>(
           m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
       evalTo(m_result);
       return true;
@@ -139,23 +140,22 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
   }
 
-  EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return m_result; }
+  EIGEN_DEVICE_FUNC PointerT data() const { return m_result; }
 
 #ifdef EIGEN_USE_SYCL
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
 #endif
 
  protected:
-  EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
-    TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(
-        data, m_dimensions);
+  EIGEN_DEVICE_FUNC void evalTo(PointerT data) {
+    TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(data, m_dimensions);
     m_op.func().eval(m_op.expression(), result, m_device);
   }
 
   Dimensions m_dimensions;
   const ArgType m_op;
   const Device& m_device;
-  CoeffReturnType* m_result;
+  PointerT m_result;
 };
 
 
@@ -250,6 +250,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  typedef typename PointerType<CoeffReturnType, Device>::Type PointerT;
 
   enum {
     IsAligned = false,
@@ -268,12 +269,12 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) {
     if (data) {
       evalTo(data);
       return false;
     } else {
-      m_result = static_cast<Scalar *>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
+      m_result = static_cast<PointerT>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType)));
       evalTo(m_result);
       return true;
     }
@@ -300,22 +301,22 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
   }
 
-  EIGEN_DEVICE_FUNC typename internal::traits<XprType>::PointerType data() const { return m_result; }
+  EIGEN_DEVICE_FUNC PointerT data() const { return m_result; }
 
 #ifdef EIGEN_USE_SYCL
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
 #endif
 
  protected:
-  EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
-    TensorMap<Tensor<Scalar, NumDims, Layout> > result(data, m_dimensions);
+  EIGEN_DEVICE_FUNC void evalTo(PointerT data) {
+    TensorMap<Tensor<CoeffReturnType, NumDims, Layout> > result(data, m_dimensions);
     m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device);
   }
 
   Dimensions m_dimensions;
   const XprType m_op;
   const Device& m_device;
-  CoeffReturnType* m_result;
+  PointerT m_result;
 };
 
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 0cefe42dd..9b9587de5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -132,7 +132,7 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
     if (needs_assign) {
       // Size tensor blocks to fit in cache (or requested target block size).
       Index block_total_size = numext::mini(cache_size, total_size);
-      TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims;
+      TensorBlockShapeType block_shape = kSkewedInnerDims;
       // Query expression tree for desired block size/shape.
       std::vector<TensorOpResourceRequirements> resources;
       evaluator.getResourceRequirements(&resources);
@@ -229,10 +229,6 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> {
     Evaluator evaluator(expr, device);
     const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
     if (needs_assign) {
-      const StorageIndex PacketSize =
-          Vectorizable
-              ? unpacket_traits<typename Evaluator::PacketReturnType>::size
-              : 1;
       const StorageIndex size = array_prod(evaluator.dimensions());
       device.parallelFor(size, evaluator.costPerCoeff(Vectorizable),
                          EvalRange::alignBlockSize,
@@ -272,7 +268,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
 
     const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
     if (needs_assign) {
-      TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims;
+      TensorBlockShapeType block_shape = kSkewedInnerDims;
       Index block_total_size = 0;
       // Query expression tree for desired block size/shape.
       std::vector<internal::TensorOpResourceRequirements> resources;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index 0dd524a30..93a3b0e14 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -24,6 +24,14 @@ template<typename T> struct MakePointer {
   typedef T ScalarType;
 };
 
+// The PointerType class is a container of the device specefic pointer
+// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression
+// is a device-agnostic type and need MakePointer class for type conversion,
+// the TensorEvaluator calss can be specialized for a device, hence it is possible
+// to construct different types of temproray storage memory in TensorEvaluator
+// for different devices by specializing the following PointerType class.
+template<typename T, typename Device> struct PointerType : MakePointer<T>{};
+
 namespace internal{
 template<typename A, typename B> struct Pointer_type_promotion {
   static const bool val=false;
diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
index 354995be8..a800e827f 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
@@ -57,6 +57,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
         coprimes_.push_back(i);
       }
     }
+    queues_.resize(num_threads_);
     for (int i = 0; i < num_threads_; i++) {
       queues_.push_back(new Queue());
     }
@@ -64,7 +65,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     init_barrier_.reset(new Barrier(num_threads_));
 #endif
     for (int i = 0; i < num_threads_; i++) {
-      threads_.push_back(env_.CreateThread([this, i]() { WorkerLoop(i); }));
+      threads_.emplace_back(env_.CreateThread([this, i]() { WorkerLoop(i); }));
     }
 #ifndef EIGEN_THREAD_LOCAL
     // Wait for workers to initialize per_thread_map_. Otherwise we might race
@@ -85,13 +86,13 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
       // Since we were cancelled, there might be entries in the queues.
       // Empty them to prevent their destructor from asserting.
       for (size_t i = 0; i < queues_.size(); i++) {
-        queues_[i]->Flush();
+        queues_[i].Flush();
       }
     }
 
     // Join threads explicitly to avoid destruction order issues.
-    for (int i = 0; i < num_threads_; i++) delete threads_[i];
-    for (int i = 0; i < num_threads_; i++) delete queues_[i];
+    threads_.resize(0);
+    queues_.resize(0);
 #ifndef EIGEN_THREAD_LOCAL
     for (auto it : per_thread_map_) delete it.second;
 #endif
@@ -102,13 +103,13 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     PerThread* pt = GetPerThread();
     if (pt->pool == this) {
       // Worker thread of this pool, push onto the thread's queue.
-      Queue* q = queues_[pt->thread_id];
-      t = q->PushFront(std::move(t));
+      Queue& q = queues_[pt->thread_id];
+      t = q.PushFront(std::move(t));
     } else {
       // A free-standing thread (or worker of another pool), push onto a random
       // queue.
-      Queue* q = queues_[Rand(&pt->rand) % queues_.size()];
-      t = q->PushBack(std::move(t));
+      Queue& q = queues_[Rand(&pt->rand) % queues_.size()];
+      t = q.PushBack(std::move(t));
     }
     // Note: below we touch this after making w available to worker threads.
     // Strictly speaking, this can lead to a racy-use-after-free. Consider that
@@ -163,8 +164,8 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
   Environment env_;
   const int num_threads_;
   const bool allow_spinning_;
-  MaxSizeVector<Thread*> threads_;
-  MaxSizeVector<Queue*> queues_;
+  MaxSizeVector<std::unique_ptr<Thread> > threads_;
+  MaxSizeVector<Queue> queues_;
   MaxSizeVector<unsigned> coprimes_;
   MaxSizeVector<EventCount::Waiter> waiters_;
   std::atomic<unsigned> blocked_;
@@ -193,7 +194,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     pt->pool = this;
     pt->rand = GlobalThreadIdHash();
     pt->thread_id = thread_id;
-    Queue* q = queues_[thread_id];
+    Queue& q = queues_[thread_id];
     EventCount::Waiter* waiter = &waiters_[thread_id];
     // TODO(dvyukov,rmlarsen): The time spent in Steal() is proportional
     // to num_threads_ and we assume that new work is scheduled at a
@@ -209,10 +210,10 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
       // counter-productive for the types of I/O workloads the single thread
       // pools tend to be used for.
       while (!cancelled_) {
-        Task t = q->PopFront();
+        Task t = q.PopFront();
         for (int i = 0; i < spin_count && !t.f; i++) {
           if (!cancelled_.load(std::memory_order_relaxed)) {
-            t = q->PopFront();
+            t = q.PopFront();
           }
         }
         if (!t.f) {
@@ -226,7 +227,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
       }
     } else {
       while (!cancelled_) {
-        Task t = q->PopFront();
+        Task t = q.PopFront();
         if (!t.f) {
           t = Steal();
           if (!t.f) {
@@ -263,7 +264,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     unsigned inc = coprimes_[r % coprimes_.size()];
     unsigned victim = r % size;
     for (unsigned i = 0; i < size; i++) {
-      Task t = queues_[victim]->PopBack();
+      Task t = queues_[victim].PopBack();
       if (t.f) {
         return t;
       }
@@ -290,7 +291,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
       if (cancelled_) {
         return false;
       } else {
-        *t = queues_[victim]->PopBack();
+        *t = queues_[victim].PopBack();
         return true;
       }
     }
@@ -298,6 +299,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     // If we are shutting down and all worker threads blocked without work,
     // that's we are done.
     blocked_++;
+    // TODO is blocked_ required to be unsigned?
     if (done_ && blocked_ == static_cast<unsigned>(num_threads_)) {
       ec_.CancelWait(waiter);
       // Almost done, but need to re-check queues.
@@ -331,7 +333,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     unsigned inc = coprimes_[r % coprimes_.size()];
     unsigned victim = r % size;
     for (unsigned i = 0; i < size; i++) {
-      if (!queues_[victim]->Empty()) {
+      if (!queues_[victim].Empty()) {
         return victim;
       }
       victim += inc;
diff --git a/unsupported/Eigen/CXX11/src/util/EmulateArray.h b/unsupported/Eigen/CXX11/src/util/EmulateArray.h
index d91662d96..32db51592 100644
--- a/unsupported/Eigen/CXX11/src/util/EmulateArray.h
+++ b/unsupported/Eigen/CXX11/src/util/EmulateArray.h
@@ -26,6 +26,11 @@ template <typename T, size_t n> class array {
   EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; }
 
   EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE T& at(size_t index) { eigen_assert(index < size()); return values[index]; }
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE const T& at(size_t index) const { eigen_assert(index < size()); return values[index]; }
+
+  EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE T& front() { return values[0]; }
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE const T& front() const { return values[0]; }
diff --git a/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h b/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
index 4bc3dd1ba..bc5b3632c 100644
--- a/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
+++ b/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
@@ -35,7 +35,6 @@ class MaxSizeVector {
   explicit MaxSizeVector(size_t n)
       : reserve_(n), size_(0),
         data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
-    for (size_t i = 0; i < n; ++i) { new (&data_[i]) T; }
   }
 
   // Construct a new MaxSizeVector, reserve and resize to n.
@@ -44,35 +43,55 @@ class MaxSizeVector {
   MaxSizeVector(size_t n, const T& init)
       : reserve_(n), size_(n),
         data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
-    for (size_t i = 0; i < n; ++i) { new (&data_[i]) T(init); }
+    size_t i = 0;
+    EIGEN_TRY
+    {
+      for(; i < size_; ++i) { new (&data_[i]) T(init); }
+    }
+    EIGEN_CATCH(...)
+    {
+      // Construction failed, destruct in reverse order:
+      for(; (i+1) > 0; --i) { data_[i-1].~T(); }
+      internal::aligned_free(data_);
+      EIGEN_THROW;
+    }
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   ~MaxSizeVector() {
-    for (size_t i = 0; i < size_; ++i) {
-      data_[i].~T();
+    for (size_t i = size_; i > 0; --i) {
+      data_[i-1].~T();
     }
     internal::aligned_free(data_);
   }
 
   void resize(size_t n) {
     eigen_assert(n <= reserve_);
-    for (size_t i = size_; i < n; ++i) {
-      new (&data_[i]) T;
+    for (; size_ < n; ++size_) {
+      new (&data_[size_]) T;
     }
-    for (size_t i = n; i < size_; ++i) {
-      data_[i].~T();
+    for (; size_ > n; --size_) {
+      data_[size_-1].~T();
     }
-    size_ = n;
+    eigen_assert(size_ == n);
   }
 
   // Append new elements (up to reserved size).
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   void push_back(const T& t) {
     eigen_assert(size_ < reserve_);
-    data_[size_++] = t;
+    new (&data_[size_++]) T(t);
   }
 
+  // For C++03 compatibility this only takes one argument
+  template<class X>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  void emplace_back(const X& x) {
+    eigen_assert(size_ < reserve_);
+    new (&data_[size_++]) T(x);
+  }
+
+
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   const T& operator[] (size_t i) const {
     eigen_assert(i < size_);
@@ -99,11 +118,8 @@ class MaxSizeVector {
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   void pop_back() {
-    // NOTE: This does not destroy the value at the end the way
-    // std::vector's version of pop_back() does.  That happens when
-    // the Vector is destroyed.
     eigen_assert(size_ > 0);
-    size_--;
+    data_[--size_].~T();
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT
index 2c45b3999..d8cf3e642 100644
--- a/unsupported/Eigen/FFT
+++ b/unsupported/Eigen/FFT
@@ -289,6 +289,7 @@ class FFT
     void inv( MatrixBase<OutputDerived> & dst, const MatrixBase<ComplexDerived> & src, Index nfft=-1)
     {
       typedef typename ComplexDerived::Scalar src_type;
+      typedef typename ComplexDerived::RealScalar real_type;
       typedef typename OutputDerived::Scalar dst_type;
       const bool realfft= (NumTraits<dst_type>::IsComplex == 0);
       EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived)
@@ -329,9 +330,9 @@ class FFT
             tmp.head(nhead) = src.head(nhead);
             tmp.tail(ntail) = src.tail(ntail);
             if (resize_input<0) { //shrinking -- create the Nyquist bin as the average of the two bins that fold into it
-              tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*src_type(.5);
+              tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*real_type(.5);
             }else{ // expanding -- split the old Nyquist bin into two halves
-              tmp(nhead) = src(nhead) * src_type(.5);
+              tmp(nhead) = src(nhead) * real_type(.5);
               tmp(tmp.size()-nhead) = tmp(nhead);
             }
           }
diff --git a/unsupported/Eigen/OpenGLSupport b/unsupported/Eigen/OpenGLSupport
index 11d99567e..489fd8354 100644
--- a/unsupported/Eigen/OpenGLSupport
+++ b/unsupported/Eigen/OpenGLSupport
@@ -184,7 +184,7 @@ inline void glRotate(const Rotation2D<float>& rot)
 }
 inline void glRotate(const Rotation2D<double>& rot)
 {
-  glRotated(rot.angle()*180.0/EIGEN_PI, 0.0, 0.0, 1.0);
+  glRotated(rot.angle()*180.0/double(EIGEN_PI), 0.0, 0.0, 1.0);
 }
 
 template<typename Derived> void glRotate(const RotationBase<Derived,3>& rot)
diff --git a/unsupported/Eigen/src/BVH/KdBVH.h b/unsupported/Eigen/src/BVH/KdBVH.h
index 13f792cd0..2d5b76ad0 100644
--- a/unsupported/Eigen/src/BVH/KdBVH.h
+++ b/unsupported/Eigen/src/BVH/KdBVH.h
@@ -35,6 +35,7 @@ struct get_boxes_helper {
   {
     outBoxes.insert(outBoxes.end(), boxBegin, boxEnd);
     eigen_assert(outBoxes.size() == objects.size());
+    EIGEN_ONLY_USED_FOR_DEBUG(objects);
   }
 };
 
diff --git a/unsupported/test/cxx11_tensor_block_access.cpp b/unsupported/test/cxx11_tensor_block_access.cpp
index 6feeff231..f572e496d 100644
--- a/unsupported/test/cxx11_tensor_block_access.cpp
+++ b/unsupported/test/cxx11_tensor_block_access.cpp
@@ -10,6 +10,7 @@
 
 #include "main.h"
 
+#include <algorithm>
 #include <set>
 
 #include <Eigen/CXX11/Tensor>
@@ -19,17 +20,16 @@ using Eigen::Index;
 using Eigen::RowMajor;
 using Eigen::ColMajor;
 
-using internal::TensorBlockShapeType;
 
 template<typename T>
 static const T& choose(int layout, const T& col, const T& row) {
   return layout == ColMajor ? col : row;
 }
 
-static const TensorBlockShapeType RandomShape() {
+static internal::TensorBlockShapeType RandomShape() {
   return internal::random<bool>()
-             ? internal::TensorBlockShapeType::kUniformAllDims
-             : internal::TensorBlockShapeType::kSkewedInnerDims;
+             ? internal::kUniformAllDims
+             : internal::kSkewedInnerDims;
 }
 
 template <int NumDims>
@@ -44,12 +44,12 @@ static DSizes<Index, NumDims> RandomDims() {
     dims[i] = internal::random<int>(1, 20);
   }
   return DSizes<Index, NumDims>(dims);
-};
+}
 
 /** Dummy data type to test TensorBlock copy ops. */
 struct Data {
-  Data() : Data(0) {}
-  explicit Data(int v) { value = v; }
+  Data() : value(0) {}
+  explicit Data(int v) : value(v) { }
   int value;
 };
 
@@ -91,21 +91,19 @@ static void Debug(DSizes<Index, NumDims> dims) {
 template <int Layout>
 static void test_block_mapper_sanity()
 {
-  using T = int;
-  using TensorBlock = internal::TensorBlock<T, Index, 2, Layout>;
-  using TensorBlockMapper = internal::TensorBlockMapper<T, Index, 2, Layout>;
+  typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper;
 
   DSizes<Index, 2> tensor_dims(100, 100);
 
   // Test uniform blocks.
   TensorBlockMapper uniform_block_mapper(
-      tensor_dims, internal::TensorBlockShapeType::kUniformAllDims, 100);
+      tensor_dims, internal::kUniformAllDims, 100);
 
   VERIFY_IS_EQUAL(uniform_block_mapper.total_block_count(), 100);
   VERIFY_IS_EQUAL(uniform_block_mapper.block_dims_total_size(), 100);
 
   // 10x10 blocks
-  auto uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, nullptr);
+  typename TensorBlockMapper::Block uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, NULL);
   VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(0), 10);
   VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(1), 10);
   // Depending on a layout we stride by cols rows.
@@ -117,13 +115,13 @@ static void test_block_mapper_sanity()
 
   // Test skewed to inner dims blocks.
   TensorBlockMapper skewed_block_mapper(
-      tensor_dims, internal::TensorBlockShapeType::kSkewedInnerDims, 100);
+      tensor_dims, internal::kSkewedInnerDims, 100);
 
   VERIFY_IS_EQUAL(skewed_block_mapper.total_block_count(), 100);
   VERIFY_IS_EQUAL(skewed_block_mapper.block_dims_total_size(), 100);
 
   // 1x100 (100x1) rows/cols depending on a tensor layout.
-  auto skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, nullptr);
+  typename TensorBlockMapper::Block skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, NULL);
   VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(0), choose(Layout, 100, 1));
   VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(1), choose(Layout, 1, 100));
   // Depending on a layout we stride by cols rows.
@@ -145,7 +143,8 @@ static void UpdateCoeffSet(
 
   for (int i = 0; i < block_sizes[dim_index]; ++i) {
     if (tensor_strides[dim_index] == 1) {
-      auto inserted = visited_coeffs->insert(first_coeff_index + i);
+      typedef std::pair<std::set<Index>::iterator, bool> ReturnType;
+      ReturnType inserted = visited_coeffs->insert(first_coeff_index + i);
       VERIFY_IS_EQUAL(inserted.second, true);
     } else {
       int next_dim_index = dim_index + choose(Layout, -1, 1);
@@ -158,9 +157,8 @@ static void UpdateCoeffSet(
 
 template <typename T, int NumDims, int Layout>
 static void test_block_mapper_maps_every_element() {
-  using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>;
-  using TensorBlockMapper =
-      internal::TensorBlockMapper<T, Index, NumDims, Layout>;
+  typedef internal::TensorBlock<T, Index, NumDims, Layout> TensorBlock;
+  typedef internal::TensorBlockMapper<T, Index, NumDims, Layout> TensorBlockMapper;
 
   DSizes<Index, NumDims> dims = RandomDims<NumDims>();
 
@@ -171,7 +169,7 @@ static void test_block_mapper_maps_every_element() {
   TensorBlockMapper block_mapper(dims, RandomShape(), RandomTargetSize(dims));
 
   for (int i = 0; i < block_mapper.total_block_count(); ++i) {
-    TensorBlock block = block_mapper.GetBlockForIndex(i, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
     UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(),
                                        choose(Layout, NumDims - 1, 0),
                                        &coeff_set);
@@ -187,9 +185,8 @@ static void test_block_mapper_maps_every_element() {
 
 template <typename T, int NumDims, int Layout>
 static void test_slice_block_mapper_maps_every_element() {
-  using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>;
-  using TensorSliceBlockMapper =
-      internal::TensorSliceBlockMapper<T, Index, NumDims, Layout>;
+  typedef internal::TensorBlock<T, Index, NumDims, Layout> TensorBlock;
+  typedef internal::TensorSliceBlockMapper<T, Index, NumDims, Layout> TensorSliceBlockMapper;
 
   DSizes<Index, NumDims> tensor_dims = RandomDims<NumDims>();
   DSizes<Index, NumDims> tensor_slice_offsets = RandomDims<NumDims>();
@@ -206,7 +203,7 @@ static void test_slice_block_mapper_maps_every_element() {
   // Keep track of elements indices available via block access.
   std::set<Index> coeff_set;
 
-  auto total_coeffs = static_cast<int>(tensor_slice_extents.TotalSize());
+  int total_coeffs = static_cast<int>(tensor_slice_extents.TotalSize());
 
   // Pick a random dimension sizes for the tensor blocks.
   DSizes<Index, NumDims> block_sizes;
@@ -219,7 +216,7 @@ static void test_slice_block_mapper_maps_every_element() {
                                       DimensionList<Index, NumDims>());
 
   for (int i = 0; i < block_mapper.total_block_count(); ++i) {
-    TensorBlock block = block_mapper.GetBlockForIndex(i, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
     UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(),
                                        choose(Layout, NumDims - 1, 0),
                                        &coeff_set);
@@ -240,7 +237,7 @@ static void test_block_io_copy_data_from_source_to_target() {
       TensorBlockWriter;
 
   DSizes<Index, NumDims> input_tensor_dims = RandomDims<NumDims>();
-  const auto input_tensor_size = input_tensor_dims.TotalSize();
+  const Index input_tensor_size = input_tensor_dims.TotalSize();
 
   T* input_data = GenerateRandomData<T>(input_tensor_size);
   T* output_data = new T[input_tensor_size];
@@ -319,7 +316,7 @@ static void test_block_io_copy_using_reordered_dimensions() {
       TensorBlockWriter;
 
   DSizes<Index, NumDims> input_tensor_dims = RandomDims<NumDims>();
-  const auto input_tensor_size = input_tensor_dims.TotalSize();
+  const Index input_tensor_size = input_tensor_dims.TotalSize();
 
   // Create a random input tensor.
   T* input_data = GenerateRandomData<T>(input_tensor_size);
@@ -327,7 +324,7 @@ static void test_block_io_copy_using_reordered_dimensions() {
   // Create a random dimension re-ordering/shuffle.
   std::vector<Index> shuffle;
   for (int i = 0; i < NumDims; ++i) shuffle.push_back(i);
-  std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937());
+  std::random_shuffle(shuffle.begin(), shuffle.end());
 
   DSizes<Index, NumDims> output_tensor_dims;
   array<Index, NumDims> input_to_output_dim_map;
@@ -342,8 +339,8 @@ static void test_block_io_copy_using_reordered_dimensions() {
   TensorBlockMapper block_mapper(output_tensor_dims, RandomShape(),
                                  RandomTargetSize(input_tensor_dims));
 
-  auto* block_data = new T[block_mapper.block_dims_total_size()];
-  auto* output_data = new T[input_tensor_size];
+  T* block_data = new T[block_mapper.block_dims_total_size()];
+  T* output_data = new T[input_tensor_size];
 
   array<Index, NumDims> input_tensor_strides =
       ComputeStrides<Layout, NumDims>(input_tensor_dims);
@@ -385,8 +382,8 @@ static void test_block_io_zero_stride()
   input_tensor_dims[0] = 1;
   input_tensor_dims[2] = 1;
   input_tensor_dims[4] = 1;
-  const auto input_tensor_size = input_tensor_dims.TotalSize();
-  auto* input_data = GenerateRandomData<float>(input_tensor_size);
+  const Index input_tensor_size = input_tensor_dims.TotalSize();
+  float* input_data = GenerateRandomData<float>(input_tensor_size);
 
   DSizes<Index, 5> output_tensor_dims = rnd_dims;
 
@@ -427,7 +424,7 @@ static void test_block_io_zero_stride()
   };
 
   {
-    auto* output_data = new float[output_tensor_dims.TotalSize()];
+    float* output_data = new float[output_tensor_dims.TotalSize()];
     TensorBlock read_block(0, output_tensor_dims, output_tensor_strides,
                            input_tensor_strides_with_zeros, output_data);
     TensorBlockReader::Run(&read_block, input_data);
@@ -436,7 +433,7 @@ static void test_block_io_zero_stride()
   }
 
   {
-    auto* output_data = new float[output_tensor_dims.TotalSize()];
+    float* output_data = new float[output_tensor_dims.TotalSize()];
     TensorBlock write_block(0, output_tensor_dims,
                             input_tensor_strides_with_zeros,
                             output_tensor_strides, input_data);
@@ -459,14 +456,14 @@ static void test_block_io_squeeze_ones() {
   // Total size > 1.
   {
     DSizes<Index, 5> block_sizes(1, 2, 1, 2, 1);
-    const auto total_size = block_sizes.TotalSize();
+    const Index total_size = block_sizes.TotalSize();
 
     // Create a random input tensor.
-    auto* input_data = GenerateRandomData<float>(total_size);
+    float* input_data = GenerateRandomData<float>(total_size);
     DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
 
     {
-      auto* output_data = new float[block_sizes.TotalSize()];
+      float* output_data = new float[block_sizes.TotalSize()];
       TensorBlock read_block(0, block_sizes, strides, strides, output_data);
       TensorBlockReader::Run(&read_block, input_data);
       for (int i = 0; i < total_size; ++i) {
@@ -476,7 +473,7 @@ static void test_block_io_squeeze_ones() {
     }
 
     {
-      auto* output_data = new float[block_sizes.TotalSize()];
+      float* output_data = new float[block_sizes.TotalSize()];
       TensorBlock write_block(0, block_sizes, strides, strides, input_data);
       TensorBlockWriter::Run(write_block, output_data);
       for (int i = 0; i < total_size; ++i) {
@@ -489,14 +486,14 @@ static void test_block_io_squeeze_ones() {
   // Total size == 1.
   {
     DSizes<Index, 5> block_sizes(1, 1, 1, 1, 1);
-    const auto total_size = block_sizes.TotalSize();
+    const Index total_size = block_sizes.TotalSize();
 
     // Create a random input tensor.
-    auto* input_data = GenerateRandomData<float>(total_size);
+    float* input_data = GenerateRandomData<float>(total_size);
     DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
 
     {
-      auto* output_data = new float[block_sizes.TotalSize()];
+      float* output_data = new float[block_sizes.TotalSize()];
       TensorBlock read_block(0, block_sizes, strides, strides, output_data);
       TensorBlockReader::Run(&read_block, input_data);
       for (int i = 0; i < total_size; ++i) {
@@ -506,7 +503,7 @@ static void test_block_io_squeeze_ones() {
     }
 
     {
-      auto* output_data = new float[block_sizes.TotalSize()];
+      float* output_data = new float[block_sizes.TotalSize()];
       TensorBlock write_block(0, block_sizes, strides, strides, input_data);
       TensorBlockWriter::Run(write_block, output_data);
       for (int i = 0; i < total_size; ++i) {
@@ -527,7 +524,7 @@ static void test_block_cwise_binary_io_basic() {
   DSizes<Index, NumDims> block_sizes = RandomDims<NumDims>();
   DSizes<Index, NumDims> strides(ComputeStrides<Layout, NumDims>(block_sizes));
 
-  const auto total_size = block_sizes.TotalSize();
+  const Index total_size = block_sizes.TotalSize();
 
   // Create a random input tensors.
   T* left_data = GenerateRandomData<T>(total_size);
@@ -556,13 +553,13 @@ static void test_block_cwise_binary_io_squeeze_ones() {
   DSizes<Index, 5> block_sizes(1, 2, 1, 3, 1);
   DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
 
-  const auto total_size = block_sizes.TotalSize();
+  const Index total_size = block_sizes.TotalSize();
 
   // Create a random input tensors.
-  auto* left_data = GenerateRandomData<float>(total_size);
-  auto* right_data = GenerateRandomData<float>(total_size);
+  float* left_data = GenerateRandomData<float>(total_size);
+  float* right_data = GenerateRandomData<float>(total_size);
 
-  auto* output_data = new float[total_size];
+  float* output_data = new float[total_size];
   BinaryFunctor functor;
   TensorBlockCwiseBinaryIO::Run(functor, block_sizes, strides, output_data,
                                 strides, left_data, strides, right_data);
@@ -603,14 +600,14 @@ static void test_block_cwise_binary_io_zero_strides() {
   right_strides[3] = 0;
 
   // Generate random data.
-  auto* left_data = GenerateRandomData<float>(left_sizes.TotalSize());
-  auto* right_data = GenerateRandomData<float>(right_sizes.TotalSize());
+  float* left_data = GenerateRandomData<float>(left_sizes.TotalSize());
+  float* right_data = GenerateRandomData<float>(right_sizes.TotalSize());
 
   DSizes<Index, 5> output_sizes = rnd_dims;
   DSizes<Index, 5> output_strides(ComputeStrides<Layout, 5>(output_sizes));
 
-  const auto output_total_size = output_sizes.TotalSize();
-  auto* output_data = new float[output_total_size];
+  const Index output_total_size = output_sizes.TotalSize();
+  float* output_data = new float[output_total_size];
 
   BinaryFunctor functor;
   TensorBlockCwiseBinaryIO::Run(functor, output_sizes, output_strides,
@@ -647,17 +644,16 @@ static void test_block_cwise_binary_io_zero_strides() {
 template <int Layout>
 static void test_uniform_block_shape()
 {
-  using T = int;
-  typedef internal::TensorBlock<T, Index, 5, Layout> TensorBlock;
-  typedef internal::TensorBlockMapper<T, Index, 5, Layout> TensorBlockMapper;
+  typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock;
+  typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper;
 
   {
     // Test shape 'UniformAllDims' with uniform 'max_coeff count'.
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 5 * 5 * 5 * 5 * 5;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     for (int i = 0; i < 5; ++i) {
       VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
     }
@@ -669,9 +665,9 @@ static void test_uniform_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 7 * 5 * 5 * 5 * 5;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
     for (int i = 1; i < 5; ++i) {
       VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@@ -680,9 +676,9 @@ static void test_uniform_block_shape()
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 5 * 5 * 5 * 5 * 6;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
     for (int i = 3; i >= 0; --i) {
       VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@@ -695,9 +691,9 @@ static void test_uniform_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 11 * 5 * 5 * 5 * 5;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
     for (int i = 1; i < 5; ++i) {
       VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@@ -706,9 +702,9 @@ static void test_uniform_block_shape()
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 5 * 5 * 5 * 5 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
     for (int i = 3; i >= 0; --i) {
       VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@@ -721,9 +717,9 @@ static void test_uniform_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(7, 5, 6, 17, 7);
     const size_t max_coeff_count = 7 * 5 * 6 * 7 * 5;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
     VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
     VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@@ -733,9 +729,9 @@ static void test_uniform_block_shape()
   } else {
     DSizes<Index, 5> dims(7, 5, 6, 9, 7);
     const size_t max_coeff_count = 5 * 5 * 5 * 6 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
     VERIFY_IS_EQUAL(6, block.block_sizes()[3]);
     VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
@@ -748,9 +744,9 @@ static void test_uniform_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(7, 5, 6, 17, 7);
     const size_t max_coeff_count = 7 * 5 * 6 * 17 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
     VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
     VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@@ -760,9 +756,9 @@ static void test_uniform_block_shape()
   } else {
     DSizes<Index, 5> dims(7, 5, 6, 9, 7);
     const size_t max_coeff_count = 7 * 5 * 6 * 9 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
     VERIFY_IS_EQUAL(9, block.block_sizes()[3]);
     VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@@ -775,17 +771,16 @@ static void test_uniform_block_shape()
 template <int Layout>
 static void test_skewed_inner_dim_block_shape()
 {
-  using T = int;
-  typedef internal::TensorBlock<T, Index, 5, Layout> TensorBlock;
-  typedef internal::TensorBlockMapper<T, Index, 5, Layout> TensorBlockMapper;
+  typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock;
+  typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper;
 
   // Test shape 'SkewedInnerDims' with partial allocation to inner-most dim.
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 10 * 1 * 1 * 1 * 1;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(10, block.block_sizes()[0]);
     for (int i = 1; i < 5; ++i) {
       VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@@ -794,9 +789,9 @@ static void test_skewed_inner_dim_block_shape()
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 1 * 1 * 1 * 1 * 6;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
     for (int i = 3; i >= 0; --i) {
       VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@@ -808,9 +803,9 @@ static void test_skewed_inner_dim_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 11 * 1 * 1 * 1 * 1;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
     for (int i = 1; i < 5; ++i) {
       VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@@ -819,9 +814,9 @@ static void test_skewed_inner_dim_block_shape()
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 1 * 1 * 1 * 1 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
     for (int i = 3; i >= 0; --i) {
       VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@@ -834,9 +829,9 @@ static void test_skewed_inner_dim_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 11 * 3 * 1 * 1 * 1;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
     VERIFY_IS_EQUAL(3, block.block_sizes()[1]);
     for (int i = 2; i < 5; ++i) {
@@ -846,9 +841,9 @@ static void test_skewed_inner_dim_block_shape()
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 1 * 1 * 1 * 15 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
     VERIFY_IS_EQUAL(15, block.block_sizes()[3]);
     for (int i = 2; i >= 0; --i) {
@@ -862,9 +857,9 @@ static void test_skewed_inner_dim_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 11 * 5 * 5 * 1 * 1;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
     VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
     VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
@@ -875,9 +870,9 @@ static void test_skewed_inner_dim_block_shape()
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 1 * 1 * 5 * 17 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
     VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
     VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
@@ -891,9 +886,9 @@ static void test_skewed_inner_dim_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
     VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
     VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@@ -903,9 +898,9 @@ static void test_skewed_inner_dim_block_shape()
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
     VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
     VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@@ -918,15 +913,13 @@ static void test_skewed_inner_dim_block_shape()
 template <int Layout>
 static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
 {
-  using T = int;
-
   // Test blocking of tensors with zero dimensions:
   //  - we must not crash on asserts and divisions by zero
   //  - we must not return block with zero dimensions
   //    (recipe for overflows/underflows, divisions by zero and NaNs later)
   //  - total block count must be zero
   {
-    typedef internal::TensorBlockMapper<T, Index, 1, Layout> TensorBlockMapper;
+    typedef internal::TensorBlockMapper<int, Index, 1, Layout> TensorBlockMapper;
     DSizes<Index, 1> dims(0);
     for (int max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
       TensorBlockMapper block_mapper(dims, block_shape, max_coeff_count);
@@ -936,7 +929,7 @@ static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
   }
 
   {
-    typedef internal::TensorBlockMapper<T, Index, 2, Layout> TensorBlockMapper;
+    typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper;
     for (int dim1 = 0; dim1 < 3; ++dim1) {
       for (int dim2 = 0; dim2 < 3; ++dim2) {
         DSizes<Index, 2> dims(dim1, dim2);
@@ -987,9 +980,9 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_access) {
   TEST_LAYOUTS(test_block_cwise_binary_io_zero_strides);
   TEST_LAYOUTS(test_uniform_block_shape);
   TEST_LAYOUTS(test_skewed_inner_dim_block_shape);
-  TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kUniformAllDims);
-  TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kSkewedInnerDims);
+  TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kUniformAllDims);
+  TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kSkewedInnerDims);
 }
 
 #undef TEST_LAYOUTS
-#undef TEST_LAYOUTS_WITH_ARG
-\ No newline at end of file
+#undef TEST_LAYOUTS_WITH_ARG
diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp
index d4cfbd0da..928d20f6e 100644
--- a/unsupported/test/cxx11_tensor_contraction.cpp
+++ b/unsupported/test/cxx11_tensor_contraction.cpp
@@ -471,7 +471,7 @@ static void test_tensor_product()
   mat1.setRandom();
   mat2.setRandom();
 
-  Tensor<float, 4, DataLayout> result = mat1.contract(mat2, Eigen::array<DimPair, 0>{{}});
+  Tensor<float, 4, DataLayout> result = mat1.contract(mat2, Eigen::array<DimPair, 0>{});
 
   VERIFY_IS_EQUAL(result.dimension(0), 2);
   VERIFY_IS_EQUAL(result.dimension(1), 3);
@@ -514,7 +514,7 @@ static void test_const_inputs()
 struct SqrtOutputKernel {
   template <typename Index, typename Scalar>
   EIGEN_ALWAYS_INLINE void operator()(
-      const OutputKernel::OutputMapper<Index, Scalar>& output_mapper,
+      const internal::blas_data_mapper<Scalar, Index, ColMajor>& output_mapper,
       const TensorContractionParams&, Index, Index, Index num_rows,
       Index num_cols) const {
     for (int i = 0; i < num_rows; ++i) {
@@ -553,7 +553,7 @@ static void test_large_contraction_with_output_kernel() {
 
   m_result = m_left * m_right;
 
-  for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) {
+  for (std::ptrdiff_t i = 0; i < t_result.dimensions().TotalSize(); i++) {
     VERIFY(&t_result.data()[i] != &m_result.data()[i]);
     VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
   }
diff --git a/unsupported/test/cxx11_tensor_convolution.cpp b/unsupported/test/cxx11_tensor_convolution.cpp
index 01bc77bc1..9fe980648 100644
--- a/unsupported/test/cxx11_tensor_convolution.cpp
+++ b/unsupported/test/cxx11_tensor_convolution.cpp
@@ -25,7 +25,7 @@ static void test_evals()
 
   Tensor<float, 2, DataLayout> result(2,3);
   result.setZero();
-  Eigen::array<Tensor<float, 2>::Index, 1> dims3{{0}};
+  Eigen::array<Tensor<float, 2>::Index, 1> dims3{0};
 
   typedef TensorEvaluator<decltype(input.convolve(kernel, dims3)), DefaultDevice> Evaluator;
   Evaluator eval(input.convolve(kernel, dims3), DefaultDevice());
diff --git a/unsupported/test/cxx11_tensor_index_list.cpp b/unsupported/test/cxx11_tensor_index_list.cpp
index e81fa5e40..294677a4d 100644
--- a/unsupported/test/cxx11_tensor_index_list.cpp
+++ b/unsupported/test/cxx11_tensor_index_list.cpp
@@ -170,7 +170,6 @@ static void test_type2indexpair_list()
   typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<2,12>> Dims2_b;
   typedef Eigen::IndexPairList<Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<1,11>, Eigen::IndexPair<DenseIndex>> Dims2_c;
 
-  Dims0 d0;
   Dims2_a d2_a;
 
   Dims2_b d2_b;
diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp
index dd163c18a..7606b0abf 100644
--- a/unsupported/test/cxx11_tensor_thread_pool.cpp
+++ b/unsupported/test/cxx11_tensor_thread_pool.cpp
@@ -255,7 +255,7 @@ void test_multithread_contraction_agrees_with_singlethread() {
 struct SqrtOutputKernel {
   template <typename Index, typename Scalar>
   EIGEN_ALWAYS_INLINE void operator()(
-      const OutputKernel::OutputMapper<Index, Scalar>& output_mapper,
+      const internal::blas_data_mapper<Scalar, Index, ColMajor>& output_mapper,
       const TensorContractionParams&, Index, Index, Index num_rows,
       Index num_cols) const {
     for (int i = 0; i < num_rows; ++i) {
diff --git a/unsupported/test/kronecker_product.cpp b/unsupported/test/kronecker_product.cpp
index 4f143b6de..b5b764c65 100644
--- a/unsupported/test/kronecker_product.cpp
+++ b/unsupported/test/kronecker_product.cpp
@@ -9,6 +9,7 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+
 #ifdef EIGEN_TEST_PART_1
 
 #include "sparse.h"
@@ -95,7 +96,7 @@ EIGEN_DECLARE_TEST(kronecker_product)
   SM_a.insert(1,0) = DM_a.coeffRef(1,0) = -0.9076572187376921;
   SM_a.insert(1,1) = DM_a.coeffRef(1,1) =  0.6469156566545853;
   SM_a.insert(1,2) = DM_a.coeffRef(1,2) = -0.3658010398782789;
- 
+
   MatrixXd             DM_b(3,2);
   SparseMatrix<double> SM_b(3,2);
   SM_b.insert(0,0) = DM_b.coeffRef(0,0) =  0.9004440976767099;
@@ -165,7 +166,7 @@ EIGEN_DECLARE_TEST(kronecker_product)
   SM_a.insert(0,3) = -0.2;
   SM_a.insert(2,4) =  0.3;
   SM_a.finalize();
-  
+
   SM_b.insert(0,0) =  0.4;
   SM_b.insert(2,1) = -0.5;
   SM_b.finalize();
@@ -183,7 +184,7 @@ EIGEN_DECLARE_TEST(kronecker_product)
   DM_b2.resize(4,8);
   DM_ab2 = kroneckerProduct(DM_a2,DM_b2);
   CALL_SUBTEST(check_dimension(DM_ab2,10*4,9*8));
-  
+
   for(int i = 0; i < g_repeat; i++)
   {
     double density = Eigen::internal::random<double>(0.01,0.5);
@@ -196,35 +197,35 @@ EIGEN_DECLARE_TEST(kronecker_product)
     MatrixXf dA(ra,ca), dB(rb,cb), dC;
     initSparse(density, dA, sA);
     initSparse(density, dB, sB);
-    
+
     sC = kroneckerProduct(sA,sB);
     dC = kroneckerProduct(dA,dB);
     VERIFY_IS_APPROX(MatrixXf(sC),dC);
-    
+
     sC = kroneckerProduct(sA.transpose(),sB);
     dC = kroneckerProduct(dA.transpose(),dB);
     VERIFY_IS_APPROX(MatrixXf(sC),dC);
-    
+
     sC = kroneckerProduct(sA.transpose(),sB.transpose());
     dC = kroneckerProduct(dA.transpose(),dB.transpose());
     VERIFY_IS_APPROX(MatrixXf(sC),dC);
-    
+
     sC = kroneckerProduct(sA,sB.transpose());
     dC = kroneckerProduct(dA,dB.transpose());
     VERIFY_IS_APPROX(MatrixXf(sC),dC);
-    
+
     sC2 = kroneckerProduct(sA,sB);
     dC = kroneckerProduct(dA,dB);
     VERIFY_IS_APPROX(MatrixXf(sC2),dC);
-    
+
     sC2 = kroneckerProduct(dA,sB);
     dC = kroneckerProduct(dA,dB);
     VERIFY_IS_APPROX(MatrixXf(sC2),dC);
-    
+
     sC2 = kroneckerProduct(sA,dB);
     dC = kroneckerProduct(dA,dB);
     VERIFY_IS_APPROX(MatrixXf(sC2),dC);
-    
+
     sC2 = kroneckerProduct(2*sA,sB);
     dC = kroneckerProduct(2*dA,dB);
     VERIFY_IS_APPROX(MatrixXf(sC2),dC);
@@ -236,7 +237,6 @@ EIGEN_DECLARE_TEST(kronecker_product)
 #ifdef EIGEN_TEST_PART_2
 
 // simply check that for a dense kronecker product, sparse module is not needed
-
 #include "main.h"
 #include <Eigen/KroneckerProduct>