From fa68342ef80d72f52b340851a5ab18003259b2ed Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Fri, 3 Aug 2018 17:31:23 -0700
Subject: Move sigmoid functor to core.

---
 Eigen/src/Core/GlobalFunctions.h                   | 11 ++++----
 Eigen/src/Core/functors/UnaryFunctors.h            | 32 ++++++++++++++++++++--
 Eigen/src/plugins/ArrayCwiseUnaryOps.h             | 10 +++++++
 test/array.cpp                                     |  4 +++
 .../Eigen/CXX11/src/Tensor/TensorFunctors.h        | 30 --------------------
 5 files changed, 50 insertions(+), 37 deletions(-)
diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h
index 50406400b..4c9823d70 100644
--- a/Eigen/src/Core/GlobalFunctions.h
+++ b/Eigen/src/Core/GlobalFunctions.h
@@ -66,6 +66,7 @@ namespace Eigen
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sigmoid,scalar_sigmoid_op,sigmoid function,\sa ArrayBase::sigmoid)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf)
@@ -89,7 +90,7 @@ namespace Eigen
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op,infinite value test,\sa Eigen::isnan DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isinf)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op,finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign)
-  
+
   /** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent.
     *
     * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar).
@@ -124,21 +125,21 @@ namespace Eigen
     *
     * Example: \include Cwise_array_power_array.cpp
     * Output: \verbinclude Cwise_array_power_array.out
-    * 
+    *
     * \sa ArrayBase::pow()
     *
     * \relates ArrayBase
     */
   template<typename Derived,typename ExponentDerived>
   inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>
-  pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents) 
+  pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents)
   {
     return Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>(
       x.derived(),
       exponents.derived()
     );
   }
-  
+
   /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents.
     *
     * This function computes the coefficient-wise power between a scalar and an array of exponents.
@@ -147,7 +148,7 @@ namespace Eigen
     *
     * Example: \include Cwise_scalar_power_array.cpp
     * Output: \verbinclude Cwise_scalar_power_array.out
-    * 
+    *
     * \sa ArrayBase::pow()
     *
     * \relates ArrayBase
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index bfc046556..c350cdf98 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -701,7 +701,7 @@ template<typename Scalar> struct scalar_isnan_op {
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {
 #if defined(__SYCL_DEVICE_ONLY__)
     return numext::isnan(a);
-#else  
+#else
     return (numext::isnan)(a);
 #endif
   }
@@ -815,7 +815,7 @@ struct scalar_sign_op<Scalar,true> {
 template<typename Scalar>
 struct functor_traits<scalar_sign_op<Scalar> >
 { enum {
-    Cost = 
+    Cost =
         NumTraits<Scalar>::IsComplex
         ? ( 8*NumTraits<Scalar>::MulCost  ) // roughly
         : ( 3*NumTraits<Scalar>::AddCost),
@@ -823,6 +823,34 @@ struct functor_traits<scalar_sign_op<Scalar> >
   };
 };
 
+/** \internal
+  * \brief Template functor to compute the sigmoid of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::sigmoid()
+  */
+template <typename T>
+struct scalar_sigmoid_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
+    const T one = T(1);
+    return one / (one + numext::exp(-x));
+  }
+
+  template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Packet packetOp(const Packet& x) const {
+    const Packet one = pset1<Packet>(T(1));
+    return pdiv(one, padd(one, pexp(pnegate(x))));
+  }
+};
+template <typename T>
+struct functor_traits<scalar_sigmoid_op<T> > {
+  enum {
+    Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
+    PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
+                   packet_traits<T>::HasNegate && packet_traits<T>::HasExp
+  };
+};
+
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
index 43615bd56..c9c7e7e4f 100644
--- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h
+++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
@@ -21,6 +21,7 @@ typedef CwiseUnaryOp<internal::scalar_acos_op<Scalar>, const Derived> AcosReturn
 typedef CwiseUnaryOp<internal::scalar_asin_op<Scalar>, const Derived> AsinReturnType;
 typedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturnType;
 typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturnType;
+typedef CwiseUnaryOp<internal::scalar_sigmoid_op<Scalar>, const Derived> SigmoidReturnType;
 typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType;
 typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType;
 typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType;
@@ -335,6 +336,15 @@ cosh() const
   return CoshReturnType(derived());
 }
 
+/** \returns an expression of the coefficient-wise sigmoid of *this.
+  */
+EIGEN_DEVICE_FUNC
+inline const SigmoidReturnType
+sigmoid() const
+{
+  return SigmoidReturnType(derived());
+}
+
 /** \returns an expression of the coefficient-wise inverse of *this.
   *
   * Example: \include Cwise_inverse.cpp
diff --git a/test/array.cpp b/test/array.cpp
index c01653668..1b5725fb4 100644
--- a/test/array.cpp
+++ b/test/array.cpp
@@ -231,6 +231,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
   VERIFY_IS_APPROX(m1.sinh(), sinh(m1));
   VERIFY_IS_APPROX(m1.cosh(), cosh(m1));
   VERIFY_IS_APPROX(m1.tanh(), tanh(m1));
+  VERIFY_IS_APPROX(m1.sigmoid(), sigmoid(m1));
 
   VERIFY_IS_APPROX(m1.arg(), arg(m1));
   VERIFY_IS_APPROX(m1.round(), round(m1));
@@ -266,6 +267,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
   VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1)));
   VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1)));
   VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1))));
+  VERIFY_IS_APPROX(sigmoid(m1), (1.0/(1.0+exp(-m1))));
   VERIFY_IS_APPROX(arg(m1), ((m1<0).template cast<Scalar>())*std::acos(-1.0));
   VERIFY((round(m1) <= ceil(m1) && round(m1) >= floor(m1)).all());
   VERIFY((Eigen::isnan)((m1*0.0)/0.0).all());
@@ -345,6 +347,7 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
   VERIFY_IS_APPROX(m1.sinh(), sinh(m1));
   VERIFY_IS_APPROX(m1.cosh(), cosh(m1));
   VERIFY_IS_APPROX(m1.tanh(), tanh(m1));
+  VERIFY_IS_APPROX(m1.sigmoid(), sigmoid(m1));
   VERIFY_IS_APPROX(m1.arg(), arg(m1));
   VERIFY((m1.isNaN() == (Eigen::isnan)(m1)).all());
   VERIFY((m1.isInf() == (Eigen::isinf)(m1)).all());
@@ -368,6 +371,7 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
   VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1)));
   VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1)));
   VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1))));
+  VERIFY_IS_APPROX(sigmoid(m1), (1.0/(1.0 + exp(-m1))));
 
   for (Index i = 0; i < m.rows(); ++i)
     for (Index j = 0; j < m.cols(); ++j)
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
index 7ecd4d1ac..cd666c173 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
@@ -54,36 +54,6 @@ struct functor_traits<scalar_fmod_op<Scalar> > {
          PacketAccess = false };
 };
 
-
-/** \internal
-  * \brief Template functor to compute the sigmoid of a scalar
-  * \sa class CwiseUnaryOp, ArrayBase::sigmoid()
-  */
-template <typename T>
-struct scalar_sigmoid_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
-    const T one = T(1);
-    return one / (one + numext::exp(-x));
-  }
-
-  template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  Packet packetOp(const Packet& x) const {
-    const Packet one = pset1<Packet>(T(1));
-    return pdiv(one, padd(one, pexp(pnegate(x))));
-  }
-};
-
-template <typename T>
-struct functor_traits<scalar_sigmoid_op<T> > {
-  enum {
-    Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
-    PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
-                   packet_traits<T>::HasNegate && packet_traits<T>::HasExp
-  };
-};
-
-
 template<typename Reducer, typename Device>
 struct reducer_traits {
   enum {
-- 
cgit v1.2.3


From 3055e3a7c213e7f1337cdc43d5b1953acd013897 Mon Sep 17 00:00:00 2001
From: Mehdi Goli <mehdi.goli@codeplay.com>
Date: Wed, 8 Aug 2018 11:19:02 +0100
Subject: Creating a pointer type in TensorCustomOp.h

---
 .../Eigen/CXX11/src/Tensor/TensorCustomOp.h        | 25 +++++++++++-----------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
index 47b5a5a5e..87d84a311 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
@@ -88,6 +88,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  typedef typename internal::remove_all<typename Eigen::internal::traits<XprType>::PointerType>::type * PointerType;
 
   enum {
     IsAligned = false,
@@ -106,7 +107,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerType data) {
     if (data) {
       evalTo(data);
       return false;
@@ -139,23 +140,22 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
   }
 
-  EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return m_result; }
+  EIGEN_DEVICE_FUNC PointerType data() const { return m_result; }
 
 #ifdef EIGEN_USE_SYCL
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
 #endif
 
  protected:
-  EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
-    TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(
-        data, m_dimensions);
+  EIGEN_DEVICE_FUNC void evalTo(PointerType data) {
+    TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(data, m_dimensions);
     m_op.func().eval(m_op.expression(), result, m_device);
   }
 
   Dimensions m_dimensions;
   const ArgType m_op;
   const Device& m_device;
-  CoeffReturnType* m_result;
+  PointerType m_result;
 };
 
 
@@ -250,6 +250,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+  typedef typename internal::remove_all<typename Eigen::internal::traits<XprType>::PointerType>::type * PointerType;
 
   enum {
     IsAligned = false,
@@ -268,12 +269,12 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerType data) {
     if (data) {
       evalTo(data);
       return false;
     } else {
-      m_result = static_cast<Scalar *>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
+      m_result = static_cast<PointerType>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType)));
       evalTo(m_result);
       return true;
     }
@@ -300,22 +301,22 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
   }
 
-  EIGEN_DEVICE_FUNC typename internal::traits<XprType>::PointerType data() const { return m_result; }
+  EIGEN_DEVICE_FUNC PointerType data() const { return m_result; }
 
 #ifdef EIGEN_USE_SYCL
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
 #endif
 
  protected:
-  EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
-    TensorMap<Tensor<Scalar, NumDims, Layout> > result(data, m_dimensions);
+  EIGEN_DEVICE_FUNC void evalTo(PointerType data) {
+    TensorMap<Tensor<CoeffReturnType, NumDims, Layout> > result(data, m_dimensions);
     m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device);
   }
 
   Dimensions m_dimensions;
   const XprType m_op;
   const Device& m_device;
-  CoeffReturnType* m_result;
+  PointerType m_result;
 };
 
 
-- 
cgit v1.2.3


From 8c083bfd0e975fc2592ced1a066e1796550338a2 Mon Sep 17 00:00:00 2001
From: Mehdi Goli <mehdi.goli@codeplay.com>
Date: Thu, 9 Aug 2018 13:57:43 +0100
Subject: Properly fixing the PointerType for TensorCustomOp.h. As the output
 type here should be based on CoeffreturnType not the Scalar type.  Therefore,
 Similar to reduction and evalTo function, it should have its own MakePointer
 class. In this case, for other device the type is defaulted to
 CoeffReturnType and no changes is required on users' code. However, in SYCL,
 on the device, we can recunstruct the device Type.

---
 .../Eigen/CXX11/src/Tensor/TensorCustomOp.h        | 77 +++++++++++++---------
 .../CXX11/src/Tensor/TensorForwardDeclarations.h   |  4 +-
 2 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
index 87d84a311..39410e63d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
@@ -20,8 +20,8 @@ namespace Eigen {
   *
   */
 namespace internal {
-template<typename CustomUnaryFunc, typename XprType>
-struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
+template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_>
+struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_> >
 {
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::StorageKind StorageKind;
@@ -30,27 +30,35 @@ struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
   typedef typename remove_reference<Nested>::type _Nested;
   static const int NumDimensions = traits<XprType>::NumDimensions;
   static const int Layout = traits<XprType>::Layout;
-  typedef typename traits<XprType>::PointerType PointerType;
+
+   template <class T> struct MakePointer {
+    // Intermediate typedef to workaround MSVC issue.
+    typedef MakePointer_<T> MakePointerT;
+    typedef typename MakePointerT::Type Type;
+    typedef typename MakePointerT::RefType RefType;
+    typedef typename MakePointerT::ScalarType ScalarType;
+  };
+  typedef typename MakePointer<typename internal::remove_const<typename XprType::CoeffReturnType>::type>::Type PointerType;
 };
 
-template<typename CustomUnaryFunc, typename XprType>
-struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Eigen::Dense>
+template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_>
+struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_>, Eigen::Dense>
 {
-  typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType>& type;
+  typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_>& type;
 };
 
-template<typename CustomUnaryFunc, typename XprType>
-struct nested<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
+template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_>
+struct nested<TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_> >
 {
-  typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> type;
+  typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_> type;
 };
 
 }  // end namespace internal
 
 
 
-template<typename CustomUnaryFunc, typename XprType>
-class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, ReadOnlyAccessors>
+template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_>
+class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_>, ReadOnlyAccessors>
 {
   public:
   typedef typename internal::traits<TensorCustomUnaryOp>::Scalar Scalar;
@@ -77,10 +85,10 @@ class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFun
 
 
 // Eval as rvalue
-template<typename CustomUnaryFunc, typename XprType, typename Device>
-struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Device>
+template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_, typename Device>
+struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_>, Device>
 {
-  typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> ArgType;
+  typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType, MakePointer_> ArgType;
   typedef typename internal::traits<ArgType>::Index Index;
   static const int NumDims = internal::traits<ArgType>::NumDimensions;
   typedef DSizes<Index, NumDims> Dimensions;
@@ -88,7 +96,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
-  typedef typename internal::remove_all<typename Eigen::internal::traits<XprType>::PointerType>::type * PointerType;
+  typedef typename Eigen::internal::traits<ArgType>::PointerType PointerType;
 
   enum {
     IsAligned = false,
@@ -112,7 +120,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
       evalTo(data);
       return false;
     } else {
-      m_result = static_cast<CoeffReturnType*>(
+      m_result = static_cast<PointerType>(
           m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
       evalTo(m_result);
       return true;
@@ -168,8 +176,8 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
   *
   */
 namespace internal {
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
-struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
+template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, template <class> class MakePointer_>
+struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_> >
 {
   typedef typename internal::promote_storage_type<typename LhsXprType::Scalar,
                                                   typename RhsXprType::Scalar>::ret Scalar;
@@ -185,28 +193,35 @@ struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
   typedef typename remove_reference<RhsNested>::type _RhsNested;
   static const int NumDimensions = traits<LhsXprType>::NumDimensions;
   static const int Layout = traits<LhsXprType>::Layout;
-  typedef typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
-                                typename traits<LhsXprType>::PointerType, typename traits<RhsXprType>::PointerType>::type PointerType;
+
+ template <class T> struct MakePointer {
+    // Intermediate typedef to workaround MSVC issue.
+    typedef MakePointer_<T> MakePointerT;
+    typedef typename MakePointerT::Type Type;
+    typedef typename MakePointerT::RefType RefType;
+    typedef typename MakePointerT::ScalarType ScalarType;
+  };
+  typedef typename MakePointer<CoeffReturnType>::Type PointerType;
 };
 
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
-struct eval<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Eigen::Dense>
+template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, template <class> class MakePointer_>
+struct eval<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_>, Eigen::Dense>
 {
   typedef const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>& type;
 };
 
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
-struct nested<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
+template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, template <class> class MakePointer_>
+struct nested<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_> >
 {
-  typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> type;
+  typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_> type;
 };
 
 }  // end namespace internal
 
 
 
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
-class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, ReadOnlyAccessors>
+template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType,template <class> class MakePointer_>
+class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_>, ReadOnlyAccessors>
 {
   public:
   typedef typename internal::traits<TensorCustomBinaryOp>::Scalar Scalar;
@@ -239,10 +254,10 @@ class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinary
 
 
 // Eval as rvalue
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, typename Device>
-struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Device>
+template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, template <class> class MakePointer_, typename Device>
+struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_>, Device>
 {
-  typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> XprType;
+  typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType, MakePointer_> XprType;
   typedef typename internal::traits<XprType>::Index Index;
   static const int NumDims = internal::traits<XprType>::NumDimensions;
   typedef DSizes<Index, NumDims> Dimensions;
@@ -250,7 +265,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
-  typedef typename internal::remove_all<typename Eigen::internal::traits<XprType>::PointerType>::type * PointerType;
+  typedef typename Eigen::internal::traits<XprType>::PointerType PointerType;
 
   enum {
     IsAligned = false,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index 0dd524a30..da0751039 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -89,8 +89,8 @@ template<typename LeftXprType, typename RightXprType> class TensorAssignOp;
 template<typename Op, typename XprType> class TensorScanOp;
 template<typename Dims, typename XprType> class TensorTraceOp;
 
-template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp;
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp;
+template<typename CustomUnaryFunc, typename XprType, template <class> class MakePointer_ = MakePointer> class TensorCustomUnaryOp;
+template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, template <class> class MakePointer_ = MakePointer> class TensorCustomBinaryOp;
 
 template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorEvalToOp;
 template<typename XprType> class TensorForcedEvalOp;
-- 
cgit v1.2.3


From c49e93440f85462728975b330e6534aeb37aa2d0 Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Mon, 13 Aug 2018 15:53:31 -0700
Subject: SuiteSparse defines the macro SuiteSparse_long to control what type
 is used for 64bit integers. The default value of this macro on non-MSVC
 platforms is long and __int64 on MSVC. CholmodSupport defaults to using long
 for the long variants of CHOLMOD functions. This creates problems when
 SuiteSparse_long is different than long. So the correct thing to do here is
 to use SuiteSparse_long as the type instead of long.

---
 Eigen/src/CholmodSupport/CholmodSupport.h | 76 +++++++++++++++----------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h
index dc199ece6..adaf52858 100644
--- a/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/Eigen/src/CholmodSupport/CholmodSupport.h
@@ -10,7 +10,7 @@
 #ifndef EIGEN_CHOLMODSUPPORT_H
 #define EIGEN_CHOLMODSUPPORT_H
 
-namespace Eigen { 
+namespace Eigen {
 
 namespace internal {
 
@@ -79,12 +79,12 @@ cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> >
 
   res.dtype   = 0;
   res.stype   = -1;
-  
+
   if (internal::is_same<_StorageIndex,int>::value)
   {
     res.itype = CHOLMOD_INT;
   }
-  else if (internal::is_same<_StorageIndex,long>::value)
+  else if (internal::is_same<_StorageIndex,SuiteSparse_long>::value)
   {
     res.itype = CHOLMOD_LONG;
   }
@@ -95,9 +95,9 @@ cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> >
 
   // setup res.xtype
   internal::cholmod_configure_matrix<_Scalar>::run(res);
-  
+
   res.stype = 0;
-  
+
   return res;
 }
 
@@ -121,7 +121,7 @@ template<typename _Scalar, int _Options, typename _Index, unsigned int UpLo>
 cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat)
 {
   cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.matrix().const_cast_derived()));
-  
+
   if(UpLo==Upper) res.stype =  1;
   if(UpLo==Lower) res.stype = -1;
   // swap stype for rowmajor matrices (only works for real matrices)
@@ -168,11 +168,11 @@ namespace internal {
 
 #define EIGEN_CHOLMOD_SPECIALIZE0(ret, name) \
     template<typename _StorageIndex> inline ret cm_ ## name       (cholmod_common &Common) { return cholmod_ ## name   (&Common); } \
-    template<>                       inline ret cm_ ## name<long> (cholmod_common &Common) { return cholmod_l_ ## name (&Common); }
+    template<>                       inline ret cm_ ## name<SuiteSparse_long> (cholmod_common &Common) { return cholmod_l_ ## name (&Common); }
 
 #define EIGEN_CHOLMOD_SPECIALIZE1(ret, name, t1, a1) \
     template<typename _StorageIndex> inline ret cm_ ## name       (t1& a1, cholmod_common &Common) { return cholmod_ ## name   (&a1, &Common); } \
-    template<>                       inline ret cm_ ## name<long> (t1& a1, cholmod_common &Common) { return cholmod_l_ ## name (&a1, &Common); }
+    template<>                       inline ret cm_ ## name<SuiteSparse_long> (t1& a1, cholmod_common &Common) { return cholmod_l_ ## name (&a1, &Common); }
 
 EIGEN_CHOLMOD_SPECIALIZE0(int, start)
 EIGEN_CHOLMOD_SPECIALIZE0(int, finish)
@@ -184,15 +184,15 @@ EIGEN_CHOLMOD_SPECIALIZE1(int, free_sparse, cholmod_sparse*, A)
 EIGEN_CHOLMOD_SPECIALIZE1(cholmod_factor*, analyze, cholmod_sparse, A)
 
 template<typename _StorageIndex> inline cholmod_dense*  cm_solve         (int sys, cholmod_factor& L, cholmod_dense&  B, cholmod_common &Common) { return cholmod_solve     (sys, &L, &B, &Common); }
-template<>                       inline cholmod_dense*  cm_solve<long>   (int sys, cholmod_factor& L, cholmod_dense&  B, cholmod_common &Common) { return cholmod_l_solve   (sys, &L, &B, &Common); }
+template<>                       inline cholmod_dense*  cm_solve<SuiteSparse_long>   (int sys, cholmod_factor& L, cholmod_dense&  B, cholmod_common &Common) { return cholmod_l_solve   (sys, &L, &B, &Common); }
 
 template<typename _StorageIndex> inline cholmod_sparse* cm_spsolve       (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_spsolve   (sys, &L, &B, &Common); }
-template<>                       inline cholmod_sparse* cm_spsolve<long> (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_l_spsolve (sys, &L, &B, &Common); }
+template<>                       inline cholmod_sparse* cm_spsolve<SuiteSparse_long> (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_l_spsolve (sys, &L, &B, &Common); }
 
 template<typename _StorageIndex>
 inline int  cm_factorize_p       (cholmod_sparse*  A, double beta[2], _StorageIndex* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_factorize_p   (A, beta, fset, fsize, L, &Common); }
 template<>
-inline int  cm_factorize_p<long> (cholmod_sparse*  A, double beta[2], long* fset,          std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_l_factorize_p (A, beta, fset, fsize, L, &Common); }
+inline int  cm_factorize_p<SuiteSparse_long> (cholmod_sparse*  A, double beta[2], SuiteSparse_long* fset,          std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_l_factorize_p (A, beta, fset, fsize, L, &Common); }
 
 #undef EIGEN_CHOLMOD_SPECIALIZE0
 #undef EIGEN_CHOLMOD_SPECIALIZE1
@@ -254,10 +254,10 @@ class CholmodBase : public SparseSolverBase<Derived>
         internal::cm_free_factor<StorageIndex>(m_cholmodFactor, m_cholmod);
       internal::cm_finish<StorageIndex>(m_cholmod);
     }
-    
+
     inline StorageIndex cols() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); }
     inline StorageIndex rows() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); }
-    
+
     /** \brief Reports whether previous computation was successful.
       *
       * \returns \c Success if computation was successful,
@@ -276,11 +276,11 @@ class CholmodBase : public SparseSolverBase<Derived>
       factorize(matrix);
       return derived();
     }
-    
+
     /** Performs a symbolic decomposition on the sparsity pattern of \a matrix.
       *
       * This function is particularly useful when solving for several problems having the same structure.
-      * 
+      *
       * \sa factorize()
       */
     void analyzePattern(const MatrixType& matrix)
@@ -292,13 +292,13 @@ class CholmodBase : public SparseSolverBase<Derived>
       }
       cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>());
       m_cholmodFactor = internal::cm_analyze<StorageIndex>(A, m_cholmod);
-      
+
       this->m_isInitialized = true;
       this->m_info = Success;
       m_analysisIsOk = true;
       m_factorizationIsOk = false;
     }
-    
+
     /** Performs a numeric decomposition of \a matrix
       *
       * The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been performed.
@@ -315,11 +315,11 @@ class CholmodBase : public SparseSolverBase<Derived>
       this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue);
       m_factorizationIsOk = true;
     }
-    
+
     /** Returns a reference to the Cholmod's configuration structure to get a full control over the performed operations.
      *  See the Cholmod user guide for details. */
     cholmod_common& cholmod() { return m_cholmod; }
-    
+
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     /** \internal */
     template<typename Rhs,typename Dest>
@@ -329,7 +329,7 @@ class CholmodBase : public SparseSolverBase<Derived>
       const Index size = m_cholmodFactor->n;
       EIGEN_UNUSED_VARIABLE(size);
       eigen_assert(size==b.rows());
-      
+
       // Cholmod needs column-major storage without inner-stride, which corresponds to the default behavior of Ref.
       Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b.derived());
 
@@ -345,7 +345,7 @@ class CholmodBase : public SparseSolverBase<Derived>
       dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols());
       internal::cm_free_dense<StorageIndex>(x_cd, m_cholmod);
     }
-    
+
     /** \internal */
     template<typename RhsDerived, typename DestDerived>
     void _solve_impl(const SparseMatrixBase<RhsDerived> &b, SparseMatrixBase<DestDerived> &dest) const
@@ -370,8 +370,8 @@ class CholmodBase : public SparseSolverBase<Derived>
       internal::cm_free_sparse<StorageIndex>(x_cs, m_cholmod);
     }
     #endif // EIGEN_PARSED_BY_DOXYGEN
-    
-    
+
+
     /** Sets the shift parameter that will be used to adjust the diagonal coefficients during the numerical factorization.
       *
       * During the numerical factorization, an offset term is added to the diagonal coefficients:\n
@@ -386,7 +386,7 @@ class CholmodBase : public SparseSolverBase<Derived>
       m_shiftOffset[0] = double(offset);
       return derived();
     }
-    
+
     /** \returns the determinant of the underlying matrix from the current factorization */
     Scalar determinant() const
     {
@@ -441,7 +441,7 @@ class CholmodBase : public SparseSolverBase<Derived>
     template<typename Stream>
     void dumpMemory(Stream& /*s*/)
     {}
-    
+
   protected:
     mutable cholmod_common m_cholmod;
     cholmod_factor* m_cholmodFactor;
@@ -478,11 +478,11 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
 {
     typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT> Base;
     using Base::m_cholmod;
-    
+
   public:
-    
+
     typedef _MatrixType MatrixType;
-    
+
     CholmodSimplicialLLT() : Base() { init(); }
 
     CholmodSimplicialLLT(const MatrixType& matrix) : Base()
@@ -529,11 +529,11 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
 {
     typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT> Base;
     using Base::m_cholmod;
-    
+
   public:
-    
+
     typedef _MatrixType MatrixType;
-    
+
     CholmodSimplicialLDLT() : Base() { init(); }
 
     CholmodSimplicialLDLT(const MatrixType& matrix) : Base()
@@ -578,11 +578,11 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
 {
     typedef CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT> Base;
     using Base::m_cholmod;
-    
+
   public:
-    
+
     typedef _MatrixType MatrixType;
-    
+
     CholmodSupernodalLLT() : Base() { init(); }
 
     CholmodSupernodalLLT(const MatrixType& matrix) : Base()
@@ -629,11 +629,11 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom
 {
     typedef CholmodBase<_MatrixType, _UpLo, CholmodDecomposition> Base;
     using Base::m_cholmod;
-    
+
   public:
-    
+
     typedef _MatrixType MatrixType;
-    
+
     CholmodDecomposition() : Base() { init(); }
 
     CholmodDecomposition(const MatrixType& matrix) : Base()
@@ -643,7 +643,7 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom
     }
 
     ~CholmodDecomposition() {}
-    
+
     void setMode(CholmodMode mode)
     {
       switch(mode)
-- 
cgit v1.2.3


From eabc7a4031cf0f5702fa16009ef33f87c1d51873 Mon Sep 17 00:00:00 2001
From: Justin Carpentier <jcarpent@laas.fr>
Date: Fri, 10 Aug 2018 14:30:06 +0200
Subject: PR 465: Fix issue in RowMajor assignment in
 plain_matrix_type_row_major::type

The type should be RowMajor
---
 Eigen/src/Core/util/XprHelper.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index e3231c712..836ff4711 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -405,7 +405,7 @@ template<typename T> struct plain_matrix_type_row_major
   typedef Matrix<typename traits<T>::Scalar,
                 Rows,
                 Cols,
-                (MaxCols==1&&MaxRows!=1) ? RowMajor : ColMajor,
+                (MaxCols==1&&MaxRows!=1) ? ColMajor : RowMajor,
                 MaxRows,
                 MaxCols
           > type;
-- 
cgit v1.2.3


From e6d5be811ddab928ae7ed73f76e1c4c8e18917e2 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Mon, 13 Aug 2018 10:29:21 -0700
Subject: Fixed syntax of nested templates chevrons to make it compatible with
 c++97 mode.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index cbf91013b..3904552a9 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -155,8 +155,8 @@ struct TensorBlockCopyOp {
     typedef const Eigen::Array<Scalar, Dynamic, 1> Src;
     typedef Eigen::Array<Scalar, Dynamic, 1> Dst;
 
-    typedef Eigen::Map<Src, 0, InnerStride<>> SrcMap;
-    typedef Eigen::Map<Dst, 0, InnerStride<>> DstMap;
+    typedef Eigen::Map<Src, 0, InnerStride<> > SrcMap;
+    typedef Eigen::Map<Dst, 0, InnerStride<> > DstMap;
 
     const SrcMap src(src_base, num_coeff_to_copy, InnerStride<>(src_stride));
     DstMap dst(dst_base, num_coeff_to_copy, InnerStride<>(dst_stride));
@@ -405,9 +405,9 @@ struct TensorBlockCwiseBinaryOp {
     typedef const Eigen::Array<RightScalar, Dynamic, 1> Rhs;
     typedef Eigen::Array<OutputScalar, Dynamic, 1> Out;
 
-    typedef Eigen::Map<Lhs, 0, InnerStride<>> LhsMap;
-    typedef Eigen::Map<Rhs, 0, InnerStride<>> RhsMap;
-    typedef Eigen::Map<Out, 0, InnerStride<>> OutMap;
+    typedef Eigen::Map<Lhs, 0, InnerStride<> > LhsMap;
+    typedef Eigen::Map<Rhs, 0, InnerStride<> > RhsMap;
+    typedef Eigen::Map<Out, 0, InnerStride<> > OutMap;
 
     const LeftScalar* lhs_base = &left_data[left_index];
     const RightScalar* rhs_base = &right_data[right_index];
-- 
cgit v1.2.3


From 3810ec228fbc9ff8fff23a997c09a490f319c902 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Mon, 13 Aug 2018 10:46:09 -0700
Subject: Don't use the auto keyword since it's not always supported properly.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index 3904552a9..21a6b66e8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -501,7 +501,7 @@ struct TensorBlockCwiseBinaryIO {
       if (size == 1) {
         continue;
       }
-      auto& state = block_iter_state[num_squeezed_dims];
+      BlockIteratorState& state = block_iter_state[num_squeezed_dims];
       state.output_stride = block_strides[dim];
       state.left_stride = left_strides[dim];
       state.right_stride = right_strides[dim];
@@ -523,7 +523,7 @@ struct TensorBlockCwiseBinaryIO {
                                     right_stride, right_data);
       // Update index.
       for (int j = 0; j < num_squeezed_dims; ++j) {
-        auto& state = block_iter_state[j];
+        BlockIteratorState& state = block_iter_state[j];
         if (++state.count < state.size) {
           output_index += state.output_stride;
           left_index += state.left_stride;
-- 
cgit v1.2.3


From 26239ee580e5ffbdcad657c291bf4f49e6b297cf Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Mon, 13 Aug 2018 11:05:51 -0700
Subject: Use NULL instead of nullptr to avoid adding a cxx11 requirement.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h           | 2 +-
 unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 4 ++--
 unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h         | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
index f1f877c16..bcaf5c97f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
@@ -187,7 +187,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlock(TensorBlock* block) {
     if (TensorEvaluator<LeftArgType, Device>::RawAccess &&
-        m_leftImpl.data() != nullptr) {
+        m_leftImpl.data() != NULL) {
       TensorBlock left_block(block->first_coeff_index(), block->block_sizes(),
                              block->tensor_strides(), block->tensor_strides(),
                              m_leftImpl.data() + block->first_coeff_index());
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index cc134228a..3e3665efb 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -102,7 +102,7 @@ class Allocator {
 // Build a thread pool device on top the an existing pool of threads.
 struct ThreadPoolDevice {
   // The ownership of the thread pool remains with the caller.
-  ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = nullptr)
+  ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = NULL)
       : pool_(pool), num_threads_(num_cores), allocator_(allocator) { }
 
   EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
@@ -282,7 +282,7 @@ struct ThreadPoolDevice {
   // Convenience wrapper for parallelFor that does not align blocks.
   void parallelFor(Index n, const TensorOpCost& cost,
                    std::function<void(Index, Index)> f) const {
-    parallelFor(n, cost, nullptr, std::move(f));
+    parallelFor(n, cost, NULL, std::move(f));
   }
 
   // Thread pool accessor.
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 3f3b5685d..0294aa62e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -227,7 +227,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> {
     typedef EvalRange<Evaluator, StorageIndex, Vectorizable> EvalRange;
 
     Evaluator evaluator(expr, device);
-    const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr);
+    const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
     if (needs_assign) {
       const StorageIndex PacketSize =
           Vectorizable
@@ -271,7 +271,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
       return;
     }
 
-    const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr);
+    const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
     if (needs_assign) {
       TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims;
       Index block_total_size = 0;
-- 
cgit v1.2.3


From d6e283ba9650faed0c48c36f475d5dd77b916427 Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Mon, 13 Aug 2018 11:14:50 -0700
Subject: sigmoid -> logistic

---
 Eigen/src/Core/GlobalFunctions.h        |  2 +-
 Eigen/src/Core/functors/UnaryFunctors.h | 10 +++++-----
 Eigen/src/plugins/ArrayCwiseUnaryOps.h  | 10 +++++-----
 test/array.cpp                          |  8 ++++----
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h
index 4c9823d70..563df6e84 100644
--- a/Eigen/src/Core/GlobalFunctions.h
+++ b/Eigen/src/Core/GlobalFunctions.h
@@ -66,7 +66,7 @@ namespace Eigen
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sigmoid,scalar_sigmoid_op,sigmoid function,\sa ArrayBase::sigmoid)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(logistic,scalar_logistic_op,logistic function,\sa ArrayBase::logistic)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf)
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index c350cdf98..c1cc2ab3b 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -824,12 +824,12 @@ struct functor_traits<scalar_sign_op<Scalar> >
 };
 
 /** \internal
-  * \brief Template functor to compute the sigmoid of a scalar
-  * \sa class CwiseUnaryOp, ArrayBase::sigmoid()
+  * \brief Template functor to compute the logistic function of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::logistic()
   */
 template <typename T>
-struct scalar_sigmoid_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op)
+struct scalar_logistic_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
     const T one = T(1);
     return one / (one + numext::exp(-x));
@@ -842,7 +842,7 @@ struct scalar_sigmoid_op {
   }
 };
 template <typename T>
-struct functor_traits<scalar_sigmoid_op<T> > {
+struct functor_traits<scalar_logistic_op<T> > {
   enum {
     Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
     PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
index c9c7e7e4f..e928db467 100644
--- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h
+++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
@@ -21,7 +21,7 @@ typedef CwiseUnaryOp<internal::scalar_acos_op<Scalar>, const Derived> AcosReturn
 typedef CwiseUnaryOp<internal::scalar_asin_op<Scalar>, const Derived> AsinReturnType;
 typedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturnType;
 typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturnType;
-typedef CwiseUnaryOp<internal::scalar_sigmoid_op<Scalar>, const Derived> SigmoidReturnType;
+typedef CwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived> LogisticReturnType;
 typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType;
 typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType;
 typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType;
@@ -336,13 +336,13 @@ cosh() const
   return CoshReturnType(derived());
 }
 
-/** \returns an expression of the coefficient-wise sigmoid of *this.
+/** \returns an expression of the coefficient-wise logistic of *this.
   */
 EIGEN_DEVICE_FUNC
-inline const SigmoidReturnType
-sigmoid() const
+inline const LogisticReturnType
+logistic() const
 {
-  return SigmoidReturnType(derived());
+  return LogisticReturnType(derived());
 }
 
 /** \returns an expression of the coefficient-wise inverse of *this.
diff --git a/test/array.cpp b/test/array.cpp
index 1b5725fb4..d9c4626c0 100644
--- a/test/array.cpp
+++ b/test/array.cpp
@@ -231,7 +231,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
   VERIFY_IS_APPROX(m1.sinh(), sinh(m1));
   VERIFY_IS_APPROX(m1.cosh(), cosh(m1));
   VERIFY_IS_APPROX(m1.tanh(), tanh(m1));
-  VERIFY_IS_APPROX(m1.sigmoid(), sigmoid(m1));
+  VERIFY_IS_APPROX(m1.logistic(), logistic(m1));
 
   VERIFY_IS_APPROX(m1.arg(), arg(m1));
   VERIFY_IS_APPROX(m1.round(), round(m1));
@@ -267,7 +267,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
   VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1)));
   VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1)));
   VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1))));
-  VERIFY_IS_APPROX(sigmoid(m1), (1.0/(1.0+exp(-m1))));
+  VERIFY_IS_APPROX(logistic(m1), (1.0/(1.0+exp(-m1))));
   VERIFY_IS_APPROX(arg(m1), ((m1<0).template cast<Scalar>())*std::acos(-1.0));
   VERIFY((round(m1) <= ceil(m1) && round(m1) >= floor(m1)).all());
   VERIFY((Eigen::isnan)((m1*0.0)/0.0).all());
@@ -347,7 +347,7 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
   VERIFY_IS_APPROX(m1.sinh(), sinh(m1));
   VERIFY_IS_APPROX(m1.cosh(), cosh(m1));
   VERIFY_IS_APPROX(m1.tanh(), tanh(m1));
-  VERIFY_IS_APPROX(m1.sigmoid(), sigmoid(m1));
+  VERIFY_IS_APPROX(m1.logistic(), logistic(m1));
   VERIFY_IS_APPROX(m1.arg(), arg(m1));
   VERIFY((m1.isNaN() == (Eigen::isnan)(m1)).all());
   VERIFY((m1.isInf() == (Eigen::isinf)(m1)).all());
@@ -371,7 +371,7 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
   VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1)));
   VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1)));
   VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1))));
-  VERIFY_IS_APPROX(sigmoid(m1), (1.0/(1.0 + exp(-m1))));
+  VERIFY_IS_APPROX(logistic(m1), (1.0/(1.0 + exp(-m1))));
 
   for (Index i = 0; i < m.rows(); ++i)
     for (Index j = 0; j < m.cols(); ++j)
-- 
cgit v1.2.3


From 0f1b2e08a5d5ed9158435d12da60dfaa5e8b2803 Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Mon, 13 Aug 2018 11:52:58 -0700
Subject: Call logistic functor from Tensor::sigmoid.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 97f90f638..7cc71d99e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -200,9 +200,9 @@ class TensorBase<Derived, ReadOnlyAccessors>
     }
 
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sigmoid_op<Scalar>, const Derived>
+    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived>
     sigmoid() const {
-      return unaryExpr(internal::scalar_sigmoid_op<Scalar>());
+      return unaryExpr(internal::scalar_logistic_op<Scalar>());
     }
 
     EIGEN_DEVICE_FUNC
-- 
cgit v1.2.3


From 3d3711f22fd2b04026f04ce6f1fe7e888ea5a4da Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Mon, 13 Aug 2018 15:16:06 -0700
Subject: Fixed compilation errors.

---
 unsupported/test/cxx11_tensor_thread_pool.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp
index 5c3aae482..dd163c18a 100644
--- a/unsupported/test/cxx11_tensor_thread_pool.cpp
+++ b/unsupported/test/cxx11_tensor_thread_pool.cpp
@@ -428,7 +428,7 @@ void test_threadpool_allocate(TestAllocator* allocator)
     void* ptr = device.allocate(512);
     device.deallocate(ptr);
   }
-  VERIFY(allocator != nullptr);
+  VERIFY(allocator != NULL);
   VERIFY_IS_EQUAL(allocator->alloc_count(), num_allocs);
   VERIFY_IS_EQUAL(allocator->dealloc_count(), num_allocs);
 }
@@ -460,7 +460,7 @@ EIGEN_DECLARE_TEST(cxx11_tensor_thread_pool)
   CALL_SUBTEST_6(test_multithread_random());
 
   TestAllocator test_allocator;
-  CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>(nullptr));
+  CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>(NULL));
   CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>(&test_allocator));
   CALL_SUBTEST_6(test_threadpool_allocate(&test_allocator));
 }
-- 
cgit v1.2.3


From 501be70b27e855d4921c5b510ec1a19783238c16 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Mon, 13 Aug 2018 15:16:40 -0700
Subject: Code cleanup

---
 unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 0294aa62e..0cefe42dd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -257,7 +257,6 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
 
   static EIGEN_STRONG_INLINE void run(const Expression& expr,
                          const ThreadPoolDevice& device) {
-    typedef TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlock;
     typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper;
 
     Evaluator evaluator(expr, device);
-- 
cgit v1.2.3


From 8278ae63137fb2ce2cdf6fc8117df3080e5cb2fe Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Mon, 13 Aug 2018 15:31:23 -0700
Subject: Add support for thread local support on platforms that do not support
 it through emulation using a hash map.

---
 test/main.h                                        |  6 +-
 unsupported/Eigen/CXX11/ThreadPool                 | 10 +++-
 .../CXX11/src/Tensor/TensorDeviceThreadPool.h      | 50 -----------------
 .../CXX11/src/ThreadPool/NonBlockingThreadPool.h   | 65 +++++++++++++++++-----
 .../Eigen/CXX11/src/ThreadPool/ThreadLocal.h       | 48 +++++++++++++---
 5 files changed, 103 insertions(+), 76 deletions(-)

diff --git a/test/main.h b/test/main.h
index de8a4865f..36784b1f4 100644
--- a/test/main.h
+++ b/test/main.h
@@ -125,7 +125,7 @@ inline void on_temporary_creation(long int size) {
     if(nb_temporaries!=(N)) { std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; }\
     VERIFY( (#XPR) && nb_temporaries==(N) ); \
   }
-  
+
 #endif
 
 #include "split_test_helper.h"
@@ -328,7 +328,7 @@ namespace Eigen
   #define VERIFY_RAISES_STATIC_ASSERT(a) \
     std::cout << "Can't VERIFY_RAISES_STATIC_ASSERT( " #a " ) with exceptions disabled\n";
 #endif
-    
+
   #if !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(__SYCL_DEVICE_ONLY__)
   #define EIGEN_USE_CUSTOM_ASSERT
   #endif
@@ -845,4 +845,4 @@ int main(int argc, char *argv[])
 #ifdef _MSC_VER
   // 4503 - decorated name length exceeded, name was truncated
   #pragma warning( disable : 4503)
-#endif
\ No newline at end of file
+#endif
diff --git a/unsupported/Eigen/CXX11/ThreadPool b/unsupported/Eigen/CXX11/ThreadPool
index cbb3bbf2c..12aa07c7f 100644
--- a/unsupported/Eigen/CXX11/ThreadPool
+++ b/unsupported/Eigen/CXX11/ThreadPool
@@ -44,6 +44,14 @@
 #include <thread>
 #include <functional>
 #include <memory>
+#ifndef EIGEN_THREAD_LOCAL
+// There are non-parenthesized calls to "max" in the  <unordered_map> header,
+// which trigger a check in test/main.h causing compilation to fail.
+// We work around the check here by removing the check for max in
+// the case where we have to emulate thread_local.
+#undef max
+#include <unordered_map>
+#endif
 
 #include "src/util/CXX11Meta.h"
 #include "src/util/MaxSizeVector.h"
@@ -55,6 +63,7 @@
 #include "src/ThreadPool/RunQueue.h"
 #include "src/ThreadPool/ThreadPoolInterface.h"
 #include "src/ThreadPool/ThreadEnvironment.h"
+#include "src/ThreadPool/Barrier.h"
 #include "src/ThreadPool/NonBlockingThreadPool.h"
 
 #endif
@@ -62,4 +71,3 @@
 #include <Eigen/src/Core/util/ReenableStupidWarnings.h>
 
 #endif // EIGEN_CXX11_THREADPOOL_MODULE
-
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index 3e3665efb..6fc6688d3 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -12,56 +12,6 @@
 
 namespace Eigen {
 
-// Barrier is an object that allows one or more threads to wait until
-// Notify has been called a specified number of times.
-class Barrier {
- public:
-  Barrier(unsigned int count) : state_(count << 1), notified_(false) {
-    eigen_assert(((count << 1) >> 1) == count);
-  }
-  ~Barrier() {
-    eigen_assert((state_>>1) == 0);
-  }
-
-  void Notify() {
-    unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2;
-    if (v != 1) {
-      eigen_assert(((v + 2) & ~1) != 0);
-      return;  // either count has not dropped to 0, or waiter is not waiting
-    }
-    std::unique_lock<std::mutex> l(mu_);
-    eigen_assert(!notified_);
-    notified_ = true;
-    cv_.notify_all();
-  }
-
-  void Wait() {
-    unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel);
-    if ((v >> 1) == 0) return;
-    std::unique_lock<std::mutex> l(mu_);
-    while (!notified_) {
-      cv_.wait(l);
-    }
-  }
-
- private:
-  std::mutex mu_;
-  std::condition_variable cv_;
-  std::atomic<unsigned int> state_;  // low bit is waiter flag
-  bool notified_;
-};
-
-
-// Notification is an object that allows a user to to wait for another
-// thread to signal a notification that an event has occurred.
-//
-// Multiple threads can wait on the same Notification object,
-// but only one caller must call Notify() on the object.
-struct Notification : Barrier {
-  Notification() : Barrier(1) {};
-};
-
-
 // Runs an arbitrary function and then calls Notify() on the passed in
 // Notification.
 template <typename Function, typename... Args> struct FunctionWrapperWithNotification
diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
index ecd49f382..ede70da8d 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
@@ -10,7 +10,6 @@
 #ifndef EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H
 #define EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H
 
-
 namespace Eigen {
 
 template <typename Environment>
@@ -23,7 +22,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
       : ThreadPoolTempl(num_threads, true, env) {}
 
   ThreadPoolTempl(int num_threads, bool allow_spinning,
-                             Environment env = Environment())
+                  Environment env = Environment())
       : env_(env),
         num_threads_(num_threads),
         allow_spinning_(allow_spinning),
@@ -61,9 +60,17 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     for (int i = 0; i < num_threads_; i++) {
       queues_.push_back(new Queue());
     }
+#ifndef EIGEN_THREAD_LOCAL
+    init_barrier_.reset(new Barrier(num_threads_));
+#endif
     for (int i = 0; i < num_threads_; i++) {
       threads_.push_back(env_.CreateThread([this, i]() { WorkerLoop(i); }));
     }
+#ifndef EIGEN_THREAD_LOCAL
+    // Wait for workers to initialize per_thread_map_. Otherwise we might race
+    // with them in Schedule or CurrentThreadId.
+    init_barrier_->Wait();
+#endif
   }
 
   ~ThreadPoolTempl() {
@@ -85,6 +92,9 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     // Join threads explicitly to avoid destruction order issues.
     for (size_t i = 0; i < num_threads_; i++) delete threads_[i];
     for (size_t i = 0; i < num_threads_; i++) delete queues_[i];
+#ifndef EIGEN_THREAD_LOCAL
+    for (auto it : per_thread_map_) delete it.second;
+#endif
   }
 
   void Schedule(std::function<void()> fn) {
@@ -109,8 +119,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     // this is kept alive while any threads can potentially be in Schedule.
     if (!t.f) {
       ec_.Notify(false);
-    }
-    else {
+    } else {
       env_.ExecuteTask(t);  // Push failed, execute directly.
     }
   }
@@ -130,13 +139,10 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     ec_.Notify(true);
   }
 
-  int NumThreads() const final {
-    return num_threads_;
-  }
+  int NumThreads() const final { return num_threads_; }
 
   int CurrentThreadId() const final {
-    const PerThread* pt =
-        const_cast<ThreadPoolTempl*>(this)->GetPerThread();
+    const PerThread* pt = const_cast<ThreadPoolTempl*>(this)->GetPerThread();
     if (pt->pool == this) {
       return pt->thread_id;
     } else {
@@ -148,10 +154,10 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
   typedef typename Environment::EnvThread Thread;
 
   struct PerThread {
-    constexpr PerThread() : pool(NULL), rand(0), thread_id(-1) { }
+    constexpr PerThread() : pool(NULL), rand(0), thread_id(-1) {}
     ThreadPoolTempl* pool;  // Parent pool, or null for normal threads.
-    uint64_t rand;  // Random generator state.
-    int thread_id;  // Worker thread index in pool.
+    uint64_t rand;          // Random generator state.
+    int thread_id;          // Worker thread index in pool.
   };
 
   Environment env_;
@@ -166,12 +172,26 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
   std::atomic<bool> done_;
   std::atomic<bool> cancelled_;
   EventCount ec_;
+#ifndef EIGEN_THREAD_LOCAL
+  std::unique_ptr<Barrier> init_barrier_;
+  std::mutex mu;  // Protects per_thread_map_.
+  std::unordered_map<uint64_t, PerThread*> per_thread_map_;
+#endif
 
   // Main worker thread loop.
   void WorkerLoop(int thread_id) {
+#ifndef EIGEN_THREAD_LOCAL
+    PerThread* pt = new PerThread();
+    mu.lock();
+    per_thread_map_[GlobalThreadIdHash()] = pt;
+    mu.unlock();
+    init_barrier_->Notify();
+    init_barrier_->Wait();
+#else
     PerThread* pt = GetPerThread();
+#endif
     pt->pool = this;
-    pt->rand = std::hash<std::thread::id>()(std::this_thread::get_id());
+    pt->rand = GlobalThreadIdHash();
     pt->thread_id = thread_id;
     Queue* q = queues_[thread_id];
     EventCount::Waiter* waiter = &waiters_[thread_id];
@@ -322,10 +342,24 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     return -1;
   }
 
-  static EIGEN_STRONG_INLINE PerThread* GetPerThread() {
+  static EIGEN_STRONG_INLINE uint64_t GlobalThreadIdHash() {
+    return std::hash<std::thread::id>()(std::this_thread::get_id());
+  }
+
+  EIGEN_STRONG_INLINE PerThread* GetPerThread() {
+#ifndef EIGEN_THREAD_LOCAL
+    static PerThread dummy;
+    auto it = per_thread_map_.find(GlobalThreadIdHash());
+    if (it == per_thread_map_.end()) {
+      return &dummy;
+    } else {
+      return it->second;
+    }
+#else
     EIGEN_THREAD_LOCAL PerThread per_thread_;
     PerThread* pt = &per_thread_;
     return pt;
+#endif
   }
 
   static EIGEN_STRONG_INLINE unsigned Rand(uint64_t* state) {
@@ -333,7 +367,8 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     // Update the internal state
     *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
     // Generate the random output (using the PCG-XSH-RS scheme)
-    return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61)));
+    return static_cast<unsigned>((current ^ (current >> 22)) >>
+                                 (22 + (current >> 61)));
   }
 };
 
diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
index cfa221732..f33759ba9 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
@@ -10,13 +10,47 @@
 #ifndef EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H
 #define EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H
 
-// Try to come up with a portable implementation of thread local variables
-#if EIGEN_COMP_GNUC && EIGEN_GNUC_AT_MOST(4, 7)
-#define EIGEN_THREAD_LOCAL static __thread
-#elif EIGEN_COMP_CLANG
-#define EIGEN_THREAD_LOCAL static __thread
-#else
-#define EIGEN_THREAD_LOCAL static thread_local
+#undef EIGEN_THREAD_LOCAL
+
+#if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_thread_local))
+  #define EIGEN_THREAD_LOCAL static thread_local
+#elif (EIGEN_COMP_GNUC && EIGEN_GNUC_AT_MOST(4, 7)) || EIGEN_COMP_CLANG
+  #define EIGEN_THREAD_LOCAL static __thread
+#endif
+
+// Disable TLS for Apple and Android builds with older toolchains.
+#if defined(__APPLE__)
+// Included for TARGET_OS_IPHONE, __IPHONE_OS_VERSION_MIN_REQUIRED,
+// __IPHONE_8_0.
+#include <Availability.h>
+#include <TargetConditionals.h>
+#endif
+// Checks whether C++11's `thread_local` storage duration specifier is
+// supported.
+#if defined(__apple_build_version__) &&     \
+    ((__apple_build_version__ < 8000042) || \
+     (TARGET_OS_IPHONE && __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_9_0))
+// Notes: Xcode's clang did not support `thread_local` until version
+// 8, and even then not for all iOS < 9.0.
+#undef EIGEN_THREAD_LOCAL
+
+#elif defined(__ANDROID__) && EIGEN_COMP_CLANG
+// There are platforms for which TLS should not be used even though the compiler
+// makes it seem like it's supported (Android NDK < r12b for example).
+// This is primarily because of linker problems and toolchain misconfiguration:
+// TLS isn't supported until NDK r12b per
+// https://developer.android.com/ndk/downloads/revision_history.html
+// Since NDK r16, `__NDK_MAJOR__` and `__NDK_MINOR__` are defined in
+// <android/ndk-version.h>. For NDK < r16, users should define these macros,
+// e.g. `-D__NDK_MAJOR__=11 -D__NKD_MINOR__=0` for NDK r11.
+#if __has_include(<android/ndk-version.h>)
+#include <android/ndk-version.h>
+#endif  // __has_include(<android/ndk-version.h>)
+#if defined(__ANDROID__) && defined(__clang__) && defined(__NDK_MAJOR__) && \
+    defined(__NDK_MINOR__) &&                                               \
+    ((__NDK_MAJOR__ < 12) || ((__NDK_MAJOR__ == 12) && (__NDK_MINOR__ < 1)))
+#undef EIGEN_THREAD_LOCAL
 #endif
+#endif  // defined(__ANDROID__) && defined(__clang__)
 
 #endif  // EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H
-- 
cgit v1.2.3


From 9bb75d8d31571f5513107080c8d3c85e27ff8430 Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Mon, 13 Aug 2018 15:34:03 -0700
Subject: Add Barrier.h.

---
 unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h | 67 ++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h

diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h b/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h
new file mode 100644
index 000000000..c37fc1e65
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h
@@ -0,0 +1,67 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2018 Rasmus Munk Larsen <rmlarsen@google.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// Barrier is an object that allows one or more threads to wait until
+// Notify has been called a specified number of times.
+
+#ifndef EIGEN_CXX11_THREADPOOL_BARRIER_H
+#define EIGEN_CXX11_THREADPOOL_BARRIER_H
+
+namespace Eigen {
+
+class Barrier {
+ public:
+  Barrier(unsigned int count) : state_(count << 1), notified_(false) {
+    eigen_assert(((count << 1) >> 1) == count);
+  }
+  ~Barrier() {
+    eigen_assert((state_>>1) == 0);
+  }
+
+  void Notify() {
+    unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2;
+    if (v != 1) {
+      eigen_assert(((v + 2) & ~1) != 0);
+      return;  // either count has not dropped to 0, or waiter is not waiting
+    }
+    std::unique_lock<std::mutex> l(mu_);
+    eigen_assert(!notified_);
+    notified_ = true;
+    cv_.notify_all();
+  }
+
+  void Wait() {
+    unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel);
+    if ((v >> 1) == 0) return;
+    std::unique_lock<std::mutex> l(mu_);
+    while (!notified_) {
+      cv_.wait(l);
+    }
+  }
+
+ private:
+  std::mutex mu_;
+  std::condition_variable cv_;
+  std::atomic<unsigned int> state_;  // low bit is waiter flag
+  bool notified_;
+};
+
+
+// Notification is an object that allows a user to to wait for another
+// thread to signal a notification that an event has occurred.
+//
+// Multiple threads can wait on the same Notification object,
+// but only one caller must call Notify() on the object.
+struct Notification : Barrier {
+  Notification() : Barrier(1) {};
+};
+
+}  // namespace Eigen
+
+#endif  // EIGEN_CXX11_THREADPOOL_BARRIER_H
-- 
cgit v1.2.3


From 59bba77ead210f71b61ee6c551207c6f062bc123 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Tue, 14 Aug 2018 10:54:48 -0700
Subject: Fixed compilation errors with gcc 4.7 and 4.8

---
 unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h   | 26 +++++++++++-----------
 .../Eigen/CXX11/src/Tensor/TensorContraction.h     |  2 +-
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index 21a6b66e8..322260011 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -73,7 +73,7 @@ struct TensorOpResourceRequirements {
   // expression tree (like reductions) to communicate resources
   // requirements based on local state (like the total number of reductions
   // to be computed).
-  TensorOpResourceRequirements(internal::TensorBlockShapeType shape,
+  TensorOpResourceRequirements(TensorBlockShapeType shape,
                                const Index size)
       : block_shape(shape), block_total_size(size) {}
 };
@@ -90,9 +90,9 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements(
   *block_shape = resources[0].block_shape;
   *block_total_size = resources[0].block_total_size;
   for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) {
-    if (resources[i].block_shape == TensorBlockShapeType::kSkewedInnerDims &&
-        *block_shape != TensorBlockShapeType::kSkewedInnerDims) {
-      *block_shape = TensorBlockShapeType::kSkewedInnerDims;
+    if (resources[i].block_shape == kSkewedInnerDims &&
+        *block_shape ! kSkewedInnerDims) {
+      *block_shape = kSkewedInnerDims;
     }
     *block_total_size =
         numext::maxi(*block_total_size, resources[i].block_total_size);
@@ -178,9 +178,9 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout,
           bool BlockRead>
 class TensorBlockIO {
  public:
-  typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout>
+  typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
       TensorBlock;
-  typedef typename internal::TensorBlockCopyOp<Scalar, StorageIndex>
+  typedef typename TensorBlockCopyOp<Scalar, StorageIndex>
       TensorBlockCopyOp;
 
  protected:
@@ -320,7 +320,7 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
 class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
                                                Layout, /*BlockRead=*/true> {
  public:
-  typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout>
+  typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
       TensorBlock;
   typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true>
       Base;
@@ -357,7 +357,7 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
 class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
                                                Layout, /*BlockRead=*/false> {
  public:
-  typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout>
+  typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
       TensorBlock;
   typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false>
       Base;
@@ -434,7 +434,7 @@ struct TensorBlockCwiseBinaryOp {
 template <typename BinaryFunctor, typename StorageIndex, typename OutputScalar,
           int NumDims, int Layout>
 struct TensorBlockCwiseBinaryIO {
-  typedef typename internal::TensorBlock<OutputScalar, StorageIndex, NumDims,
+  typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims,
                                          Layout>::Dimensions Dimensions;
 
   struct BlockIteratorState {
@@ -627,7 +627,7 @@ struct TensorBlockView {
 template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
 class TensorBlockMapper {
  public:
-  typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout>
+  typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
       TensorBlock;
   typedef DSizes<StorageIndex, NumDims> Dimensions;
 
@@ -742,7 +742,7 @@ class TensorBlockMapper {
         block_dim_sizes[i] = 1;
       }
     } else if (block_dim_sizes.TotalSize() > min_target_size) {
-      if (block_shape == TensorBlockShapeType::kUniformAllDims) {
+      if (block_shape == kUniformAllDims) {
         // Tensor will not fit within 'min_target_size' budget: calculate tensor
         // block dimension sizes based on "square" dimension size target.
         const size_t dim_size_target = static_cast<const size_t>(
@@ -773,7 +773,7 @@ class TensorBlockMapper {
             total_size = total_size_other_dims * block_dim_sizes[dim];
           }
         }
-      } else if (block_shape == TensorBlockShapeType::kSkewedInnerDims) {
+      } else if (block_shape == kSkewedInnerDims) {
         StorageIndex coeff_to_allocate = min_target_size;
         for (int i = 0; i < NumDims; ++i) {
           const int dim = cond<Layout>()(i, NumDims - i - 1);
@@ -818,7 +818,7 @@ class TensorBlockMapper {
 template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
 class TensorSliceBlockMapper {
  public:
-  typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout>
+  typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
       TensorBlock;
   typedef DSizes<StorageIndex, NumDims> Dimensions;
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index e604456e8..a023718c6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -155,7 +155,7 @@ struct TensorContractionParams {
 // See expected implementation in NoOpOutputKernel.
 struct OutputKernel {
   template <typename Index, typename Scalar>
-  using OutputMapper = internal::blas_data_mapper<Scalar, Index, ColMajor>;
+  typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
 };
 
 // Output kernel that does absolutely nothing.
-- 
cgit v1.2.3


From aebdb0642402e49ded58db98dd29c67cd76d204a Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Tue, 14 Aug 2018 12:06:39 -0700
Subject: Fix a few compiler warnings in CXX11 tests.

---
 unsupported/test/cxx11_tensor_contraction.cpp |  4 ++--
 unsupported/test/cxx11_tensor_convolution.cpp |  2 +-
 unsupported/test/cxx11_tensor_index_list.cpp  |  1 -
 unsupported/test/kronecker_product.cpp        | 27 ++++++++++++++-------------
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp
index d4cfbd0da..2e918eb30 100644
--- a/unsupported/test/cxx11_tensor_contraction.cpp
+++ b/unsupported/test/cxx11_tensor_contraction.cpp
@@ -471,7 +471,7 @@ static void test_tensor_product()
   mat1.setRandom();
   mat2.setRandom();
 
-  Tensor<float, 4, DataLayout> result = mat1.contract(mat2, Eigen::array<DimPair, 0>{{}});
+  Tensor<float, 4, DataLayout> result = mat1.contract(mat2, Eigen::array<DimPair, 0>{});
 
   VERIFY_IS_EQUAL(result.dimension(0), 2);
   VERIFY_IS_EQUAL(result.dimension(1), 3);
@@ -553,7 +553,7 @@ static void test_large_contraction_with_output_kernel() {
 
   m_result = m_left * m_right;
 
-  for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) {
+  for (std::ptrdiff_t i = 0; i < t_result.dimensions().TotalSize(); i++) {
     VERIFY(&t_result.data()[i] != &m_result.data()[i]);
     VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
   }
diff --git a/unsupported/test/cxx11_tensor_convolution.cpp b/unsupported/test/cxx11_tensor_convolution.cpp
index 01bc77bc1..9fe980648 100644
--- a/unsupported/test/cxx11_tensor_convolution.cpp
+++ b/unsupported/test/cxx11_tensor_convolution.cpp
@@ -25,7 +25,7 @@ static void test_evals()
 
   Tensor<float, 2, DataLayout> result(2,3);
   result.setZero();
-  Eigen::array<Tensor<float, 2>::Index, 1> dims3{{0}};
+  Eigen::array<Tensor<float, 2>::Index, 1> dims3{0};
 
   typedef TensorEvaluator<decltype(input.convolve(kernel, dims3)), DefaultDevice> Evaluator;
   Evaluator eval(input.convolve(kernel, dims3), DefaultDevice());
diff --git a/unsupported/test/cxx11_tensor_index_list.cpp b/unsupported/test/cxx11_tensor_index_list.cpp
index e81fa5e40..294677a4d 100644
--- a/unsupported/test/cxx11_tensor_index_list.cpp
+++ b/unsupported/test/cxx11_tensor_index_list.cpp
@@ -170,7 +170,6 @@ static void test_type2indexpair_list()
   typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<2,12>> Dims2_b;
   typedef Eigen::IndexPairList<Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<1,11>, Eigen::IndexPair<DenseIndex>> Dims2_c;
 
-  Dims0 d0;
   Dims2_a d2_a;
 
   Dims2_b d2_b;
diff --git a/unsupported/test/kronecker_product.cpp b/unsupported/test/kronecker_product.cpp
index 4f143b6de..1a936ed25 100644
--- a/unsupported/test/kronecker_product.cpp
+++ b/unsupported/test/kronecker_product.cpp
@@ -9,6 +9,9 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+
+#include "main.h"
+
 #ifdef EIGEN_TEST_PART_1
 
 #include "sparse.h"
@@ -95,7 +98,7 @@ EIGEN_DECLARE_TEST(kronecker_product)
   SM_a.insert(1,0) = DM_a.coeffRef(1,0) = -0.9076572187376921;
   SM_a.insert(1,1) = DM_a.coeffRef(1,1) =  0.6469156566545853;
   SM_a.insert(1,2) = DM_a.coeffRef(1,2) = -0.3658010398782789;
- 
+
   MatrixXd             DM_b(3,2);
   SparseMatrix<double> SM_b(3,2);
   SM_b.insert(0,0) = DM_b.coeffRef(0,0) =  0.9004440976767099;
@@ -165,7 +168,7 @@ EIGEN_DECLARE_TEST(kronecker_product)
   SM_a.insert(0,3) = -0.2;
   SM_a.insert(2,4) =  0.3;
   SM_a.finalize();
-  
+
   SM_b.insert(0,0) =  0.4;
   SM_b.insert(2,1) = -0.5;
   SM_b.finalize();
@@ -183,7 +186,7 @@ EIGEN_DECLARE_TEST(kronecker_product)
   DM_b2.resize(4,8);
   DM_ab2 = kroneckerProduct(DM_a2,DM_b2);
   CALL_SUBTEST(check_dimension(DM_ab2,10*4,9*8));
-  
+
   for(int i = 0; i < g_repeat; i++)
   {
     double density = Eigen::internal::random<double>(0.01,0.5);
@@ -196,35 +199,35 @@ EIGEN_DECLARE_TEST(kronecker_product)
     MatrixXf dA(ra,ca), dB(rb,cb), dC;
     initSparse(density, dA, sA);
     initSparse(density, dB, sB);
-    
+
     sC = kroneckerProduct(sA,sB);
     dC = kroneckerProduct(dA,dB);
     VERIFY_IS_APPROX(MatrixXf(sC),dC);
-    
+
     sC = kroneckerProduct(sA.transpose(),sB);
     dC = kroneckerProduct(dA.transpose(),dB);
     VERIFY_IS_APPROX(MatrixXf(sC),dC);
-    
+
     sC = kroneckerProduct(sA.transpose(),sB.transpose());
     dC = kroneckerProduct(dA.transpose(),dB.transpose());
     VERIFY_IS_APPROX(MatrixXf(sC),dC);
-    
+
     sC = kroneckerProduct(sA,sB.transpose());
     dC = kroneckerProduct(dA,dB.transpose());
     VERIFY_IS_APPROX(MatrixXf(sC),dC);
-    
+
     sC2 = kroneckerProduct(sA,sB);
     dC = kroneckerProduct(dA,dB);
     VERIFY_IS_APPROX(MatrixXf(sC2),dC);
-    
+
     sC2 = kroneckerProduct(dA,sB);
     dC = kroneckerProduct(dA,dB);
     VERIFY_IS_APPROX(MatrixXf(sC2),dC);
-    
+
     sC2 = kroneckerProduct(sA,dB);
     dC = kroneckerProduct(dA,dB);
     VERIFY_IS_APPROX(MatrixXf(sC2),dC);
-    
+
     sC2 = kroneckerProduct(2*sA,sB);
     dC = kroneckerProduct(2*dA,dB);
     VERIFY_IS_APPROX(MatrixXf(sC2),dC);
@@ -236,8 +239,6 @@ EIGEN_DECLARE_TEST(kronecker_product)
 #ifdef EIGEN_TEST_PART_2
 
 // simply check that for a dense kronecker product, sparse module is not needed
-
-#include "main.h"
 #include <Eigen/KroneckerProduct>
 
 EIGEN_DECLARE_TEST(kronecker_product)
-- 
cgit v1.2.3


From 15d4f515e2d4982bd16f0a85a7bbb5343270deec Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Tue, 14 Aug 2018 12:17:46 -0700
Subject: Use plain_assert in destructors to avoid throwing in CXX11 tests
 where main.h owerwrites eigen_assert with a throwing version.

---
 unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h               | 7 ++-----
 unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h            | 5 +++--
 unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h | 6 +++---
 unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h              | 8 +++-----
 4 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h b/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h
index c37fc1e65..ef5e9ff18 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h
@@ -20,9 +20,7 @@ class Barrier {
   Barrier(unsigned int count) : state_(count << 1), notified_(false) {
     eigen_assert(((count << 1) >> 1) == count);
   }
-  ~Barrier() {
-    eigen_assert((state_>>1) == 0);
-  }
+  ~Barrier() { eigen_plain_assert((state_ >> 1) == 0); }
 
   void Notify() {
     unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2;
@@ -52,14 +50,13 @@ class Barrier {
   bool notified_;
 };
 
-
 // Notification is an object that allows a user to to wait for another
 // thread to signal a notification that an event has occurred.
 //
 // Multiple threads can wait on the same Notification object,
 // but only one caller must call Notify() on the object.
 struct Notification : Barrier {
-  Notification() : Barrier(1) {};
+  Notification() : Barrier(1){};
 };
 
 }  // namespace Eigen
diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h b/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h
index 0a7181102..22c952ae1 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h
@@ -58,7 +58,7 @@ class EventCount {
 
   ~EventCount() {
     // Ensure there are no waiters.
-    eigen_assert((state_.load() & (kStackMask | kWaiterMask)) == kStackMask);
+    eigen_plain_assert((state_.load() & (kStackMask | kWaiterMask)) == kStackMask);
   }
 
   // Prewait prepares for waiting.
@@ -169,7 +169,8 @@ class EventCount {
 
   class Waiter {
     friend class EventCount;
-    // Align to 128 byte boundary to prevent false sharing with other Waiter objects in the same vector.
+    // Align to 128 byte boundary to prevent false sharing with other Waiter
+    // objects in the same vector.
     EIGEN_ALIGN_TO_BOUNDARY(128) std::atomic<Waiter*> next;
     std::mutex mu;
     std::condition_variable cv;
diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
index ede70da8d..354995be8 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
@@ -90,8 +90,8 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     }
 
     // Join threads explicitly to avoid destruction order issues.
-    for (size_t i = 0; i < num_threads_; i++) delete threads_[i];
-    for (size_t i = 0; i < num_threads_; i++) delete queues_[i];
+    for (int i = 0; i < num_threads_; i++) delete threads_[i];
+    for (int i = 0; i < num_threads_; i++) delete queues_[i];
 #ifndef EIGEN_THREAD_LOCAL
     for (auto it : per_thread_map_) delete it.second;
 #endif
@@ -298,7 +298,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     // If we are shutting down and all worker threads blocked without work,
     // that's we are done.
     blocked_++;
-    if (done_ && blocked_ == num_threads_) {
+    if (done_ && blocked_ == static_cast<unsigned>(num_threads_)) {
       ec_.CancelWait(waiter);
       // Almost done, but need to re-check queues.
       // Consider that all queues are empty and all worker threads are preempted
diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h b/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
index cb3690a2e..05c739aa1 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
@@ -10,7 +10,6 @@
 #ifndef EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
 #define EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
 
-
 namespace Eigen {
 
 // RunQueue is a fixed-size, partially non-blocking deque or Work items.
@@ -47,7 +46,7 @@ class RunQueue {
       array_[i].state.store(kEmpty, std::memory_order_relaxed);
   }
 
-  ~RunQueue() { eigen_assert(Size() == 0); }
+  ~RunQueue() { eigen_plain_assert(Size() == 0); }
 
   // PushFront inserts w at the beginning of the queue.
   // If queue is full returns w, otherwise returns default-constructed Work.
@@ -131,9 +130,8 @@ class RunQueue {
       Elem* e = &array_[mid & kMask];
       uint8_t s = e->state.load(std::memory_order_relaxed);
       if (n == 0) {
-        if (s != kReady ||
-            !e->state.compare_exchange_strong(s, kBusy,
-                                              std::memory_order_acquire))
+        if (s != kReady || !e->state.compare_exchange_strong(
+                               s, kBusy, std::memory_order_acquire))
           continue;
         start = mid;
       } else {
-- 
cgit v1.2.3


From ab3f481141a6bc72d2bbdc6300fb9dc157029ea9 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Tue, 14 Aug 2018 14:05:46 -0700
Subject: Cleaned up the code and make it compile with more compilers

---
 unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h | 76 ++++++++++--------------
 1 file changed, 32 insertions(+), 44 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index 322260011..24a6343e8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -91,7 +91,7 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements(
   *block_total_size = resources[0].block_total_size;
   for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) {
     if (resources[i].block_shape == kSkewedInnerDims &&
-        *block_shape ! kSkewedInnerDims) {
+        *block_shape != kSkewedInnerDims) {
       *block_shape = kSkewedInnerDims;
     }
     *block_total_size =
@@ -152,11 +152,11 @@ struct TensorBlockCopyOp {
     const Scalar* src_base = &src_data[src_index];
     Scalar* dst_base = &dst_data[dst_index];
 
-    typedef const Eigen::Array<Scalar, Dynamic, 1> Src;
-    typedef Eigen::Array<Scalar, Dynamic, 1> Dst;
+    typedef const Array<Scalar, Dynamic, 1> Src;
+    typedef Array<Scalar, Dynamic, 1> Dst;
 
-    typedef Eigen::Map<Src, 0, InnerStride<> > SrcMap;
-    typedef Eigen::Map<Dst, 0, InnerStride<> > DstMap;
+    typedef Map<Src, 0, InnerStride<> > SrcMap;
+    typedef Map<Dst, 0, InnerStride<> > DstMap;
 
     const SrcMap src(src_base, num_coeff_to_copy, InnerStride<>(src_stride));
     DstMap dst(dst_base, num_coeff_to_copy, InnerStride<>(dst_stride));
@@ -178,10 +178,8 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout,
           bool BlockRead>
 class TensorBlockIO {
  public:
-  typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
-      TensorBlock;
-  typedef typename TensorBlockCopyOp<Scalar, StorageIndex>
-      TensorBlockCopyOp;
+  typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
+  typedef TensorBlockCopyOp<Scalar, StorageIndex> BlockCopyOp;
 
  protected:
   struct BlockIteratorState {
@@ -194,7 +192,7 @@ class TensorBlockIO {
   };
 
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy(
-      const TensorBlock& block, StorageIndex first_coeff_index,
+      const Block& block, StorageIndex first_coeff_index,
       const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
       const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data,
       Scalar* dst_data) {
@@ -290,8 +288,8 @@ class TensorBlockIO {
     const StorageIndex block_total_size =
         NumDims == 0 ? 1 : block.block_sizes().TotalSize();
     for (StorageIndex i = 0; i < block_total_size; i += block_inner_dim_size) {
-      TensorBlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride,
-                             dst_data, inputIndex, input_stride, src_data);
+      BlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride,
+                       dst_data, inputIndex, input_stride, src_data);
       // Update index.
       for (int j = 0; j < num_squeezed_dims; ++j) {
         if (++block_iter_state[j].count < block_iter_state[j].size) {
@@ -320,13 +318,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
 class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
                                                Layout, /*BlockRead=*/true> {
  public:
-  typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
-      TensorBlock;
-  typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true>
-      Base;
+  typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
+  typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> Base;
 
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      TensorBlock* block, const Scalar* src_data) {
+      Block* block, const Scalar* src_data) {
     array<StorageIndex, NumDims> tensor_to_block_dim_map;
     for (int i = 0; i < NumDims; ++i) {
       tensor_to_block_dim_map[i] = i;
@@ -336,7 +332,7 @@ class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
   }
 
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      TensorBlock* block, StorageIndex first_coeff_index,
+      Block* block, StorageIndex first_coeff_index,
       const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
       const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data) {
     Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map,
@@ -357,13 +353,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
 class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
                                                Layout, /*BlockRead=*/false> {
  public:
-  typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
-      TensorBlock;
-  typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false>
-      Base;
+  typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
+  typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> Base;
 
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const TensorBlock& block, Scalar* dst_data) {
+      const Block& block, Scalar* dst_data) {
     array<StorageIndex, NumDims> tensor_to_block_dim_map;
     for (int i = 0; i < NumDims; ++i) {
       tensor_to_block_dim_map[i] = i;
@@ -373,7 +367,7 @@ class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
   }
 
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
-      const TensorBlock& block, StorageIndex first_coeff_index,
+      const Block& block, StorageIndex first_coeff_index,
       const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
       const array<StorageIndex, NumDims>& tensor_strides, Scalar* dst_data) {
     Base::Copy(block, first_coeff_index, tensor_to_block_dim_map,
@@ -401,13 +395,13 @@ struct TensorBlockCwiseBinaryOp {
       const StorageIndex left_stride, const LeftScalar* left_data,
       const StorageIndex right_index, const StorageIndex right_stride,
       const RightScalar* right_data) {
-    typedef const Eigen::Array<LeftScalar, Dynamic, 1> Lhs;
-    typedef const Eigen::Array<RightScalar, Dynamic, 1> Rhs;
-    typedef Eigen::Array<OutputScalar, Dynamic, 1> Out;
+    typedef const Array<LeftScalar, Dynamic, 1> Lhs;
+    typedef const Array<RightScalar, Dynamic, 1> Rhs;
+    typedef Array<OutputScalar, Dynamic, 1> Out;
 
-    typedef Eigen::Map<Lhs, 0, InnerStride<> > LhsMap;
-    typedef Eigen::Map<Rhs, 0, InnerStride<> > RhsMap;
-    typedef Eigen::Map<Out, 0, InnerStride<> > OutMap;
+    typedef Map<Lhs, 0, InnerStride<> > LhsMap;
+    typedef Map<Rhs, 0, InnerStride<> > RhsMap;
+    typedef Map<Out, 0, InnerStride<> > OutMap;
 
     const LeftScalar* lhs_base = &left_data[left_index];
     const RightScalar* rhs_base = &right_data[right_index];
@@ -417,8 +411,7 @@ struct TensorBlockCwiseBinaryOp {
     const RhsMap rhs(rhs_base, num_coeff, InnerStride<>(right_stride));
     OutMap out(out_base, num_coeff, InnerStride<>(output_stride));
 
-    out =
-        Eigen::CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor);
+    out = CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor);
   }
 };
 
@@ -434,8 +427,7 @@ struct TensorBlockCwiseBinaryOp {
 template <typename BinaryFunctor, typename StorageIndex, typename OutputScalar,
           int NumDims, int Layout>
 struct TensorBlockCwiseBinaryIO {
-  typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims,
-                                         Layout>::Dimensions Dimensions;
+  typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims, Layout>::Dimensions Dimensions;
 
   struct BlockIteratorState {
     StorageIndex output_stride, output_span;
@@ -627,8 +619,7 @@ struct TensorBlockView {
 template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
 class TensorBlockMapper {
  public:
-  typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
-      TensorBlock;
+  typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
   typedef DSizes<StorageIndex, NumDims> Dimensions;
 
   TensorBlockMapper(const Dimensions& dims,
@@ -663,7 +654,7 @@ class TensorBlockMapper {
     }
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
   GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
     StorageIndex first_coeff_index = 0;
     DSizes<StorageIndex, NumDims> coords;
@@ -711,8 +702,7 @@ class TensorBlockMapper {
       }
     }
 
-    return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides,
-                       data);
+    return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
@@ -818,8 +808,7 @@ class TensorBlockMapper {
 template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
 class TensorSliceBlockMapper {
  public:
-  typedef typename TensorBlock<Scalar, StorageIndex, NumDims, Layout>
-      TensorBlock;
+  typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
   typedef DSizes<StorageIndex, NumDims> Dimensions;
 
   TensorSliceBlockMapper(const Dimensions& tensor_dims,
@@ -860,7 +849,7 @@ class TensorSliceBlockMapper {
     }
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
   GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
     StorageIndex first_coeff_index = 0;
     DSizes<StorageIndex, NumDims> coords;
@@ -917,8 +906,7 @@ class TensorSliceBlockMapper {
       }
     }
 
-    return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides,
-                       data);
+    return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
-- 
cgit v1.2.3


From 43ec0082a6ca5e6098bdca1728094b0b7ce550c4 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Tue, 14 Aug 2018 14:08:36 -0700
Subject: Made the kronecker_product test compile again

---
 unsupported/test/kronecker_product.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/unsupported/test/kronecker_product.cpp b/unsupported/test/kronecker_product.cpp
index 1a936ed25..b5b764c65 100644
--- a/unsupported/test/kronecker_product.cpp
+++ b/unsupported/test/kronecker_product.cpp
@@ -10,8 +10,6 @@
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 
-#include "main.h"
-
 #ifdef EIGEN_TEST_PART_1
 
 #include "sparse.h"
@@ -239,6 +237,7 @@ EIGEN_DECLARE_TEST(kronecker_product)
 #ifdef EIGEN_TEST_PART_2
 
 // simply check that for a dense kronecker product, sparse module is not needed
+#include "main.h"
 #include <Eigen/KroneckerProduct>
 
 EIGEN_DECLARE_TEST(kronecker_product)
-- 
cgit v1.2.3


From 6bb3f1b43ecad440fb7ad172657f0f7a0c804d29 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Tue, 14 Aug 2018 14:26:59 -0700
Subject: Made the tensor_block_access test compile again

---
 unsupported/test/cxx11_tensor_block_access.cpp | 120 ++++++++++++-------------
 1 file changed, 58 insertions(+), 62 deletions(-)

diff --git a/unsupported/test/cxx11_tensor_block_access.cpp b/unsupported/test/cxx11_tensor_block_access.cpp
index 6feeff231..2631767e2 100644
--- a/unsupported/test/cxx11_tensor_block_access.cpp
+++ b/unsupported/test/cxx11_tensor_block_access.cpp
@@ -10,6 +10,8 @@
 
 #include "main.h"
 
+#include <algorithm>
+#include <random>
 #include <set>
 
 #include <Eigen/CXX11/Tensor>
@@ -19,17 +21,16 @@ using Eigen::Index;
 using Eigen::RowMajor;
 using Eigen::ColMajor;
 
-using internal::TensorBlockShapeType;
 
 template<typename T>
 static const T& choose(int layout, const T& col, const T& row) {
   return layout == ColMajor ? col : row;
 }
 
-static const TensorBlockShapeType RandomShape() {
+static internal::TensorBlockShapeType RandomShape() {
   return internal::random<bool>()
-             ? internal::TensorBlockShapeType::kUniformAllDims
-             : internal::TensorBlockShapeType::kSkewedInnerDims;
+             ? internal::kUniformAllDims
+             : internal::kSkewedInnerDims;
 }
 
 template <int NumDims>
@@ -44,7 +45,7 @@ static DSizes<Index, NumDims> RandomDims() {
     dims[i] = internal::random<int>(1, 20);
   }
   return DSizes<Index, NumDims>(dims);
-};
+}
 
 /** Dummy data type to test TensorBlock copy ops. */
 struct Data {
@@ -91,21 +92,19 @@ static void Debug(DSizes<Index, NumDims> dims) {
 template <int Layout>
 static void test_block_mapper_sanity()
 {
-  using T = int;
-  using TensorBlock = internal::TensorBlock<T, Index, 2, Layout>;
-  using TensorBlockMapper = internal::TensorBlockMapper<T, Index, 2, Layout>;
+  typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper;
 
   DSizes<Index, 2> tensor_dims(100, 100);
 
   // Test uniform blocks.
   TensorBlockMapper uniform_block_mapper(
-      tensor_dims, internal::TensorBlockShapeType::kUniformAllDims, 100);
+      tensor_dims, internal::kUniformAllDims, 100);
 
   VERIFY_IS_EQUAL(uniform_block_mapper.total_block_count(), 100);
   VERIFY_IS_EQUAL(uniform_block_mapper.block_dims_total_size(), 100);
 
   // 10x10 blocks
-  auto uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, nullptr);
+  auto uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, NULL);
   VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(0), 10);
   VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(1), 10);
   // Depending on a layout we stride by cols rows.
@@ -117,13 +116,13 @@ static void test_block_mapper_sanity()
 
   // Test skewed to inner dims blocks.
   TensorBlockMapper skewed_block_mapper(
-      tensor_dims, internal::TensorBlockShapeType::kSkewedInnerDims, 100);
+      tensor_dims, internal::kSkewedInnerDims, 100);
 
   VERIFY_IS_EQUAL(skewed_block_mapper.total_block_count(), 100);
   VERIFY_IS_EQUAL(skewed_block_mapper.block_dims_total_size(), 100);
 
   // 1x100 (100x1) rows/cols depending on a tensor layout.
-  auto skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, nullptr);
+  auto skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, NULL);
   VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(0), choose(Layout, 100, 1));
   VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(1), choose(Layout, 1, 100));
   // Depending on a layout we stride by cols rows.
@@ -171,7 +170,7 @@ static void test_block_mapper_maps_every_element() {
   TensorBlockMapper block_mapper(dims, RandomShape(), RandomTargetSize(dims));
 
   for (int i = 0; i < block_mapper.total_block_count(); ++i) {
-    TensorBlock block = block_mapper.GetBlockForIndex(i, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
     UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(),
                                        choose(Layout, NumDims - 1, 0),
                                        &coeff_set);
@@ -219,7 +218,7 @@ static void test_slice_block_mapper_maps_every_element() {
                                       DimensionList<Index, NumDims>());
 
   for (int i = 0; i < block_mapper.total_block_count(); ++i) {
-    TensorBlock block = block_mapper.GetBlockForIndex(i, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
     UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(),
                                        choose(Layout, NumDims - 1, 0),
                                        &coeff_set);
@@ -647,17 +646,16 @@ static void test_block_cwise_binary_io_zero_strides() {
 template <int Layout>
 static void test_uniform_block_shape()
 {
-  using T = int;
-  typedef internal::TensorBlock<T, Index, 5, Layout> TensorBlock;
-  typedef internal::TensorBlockMapper<T, Index, 5, Layout> TensorBlockMapper;
+  typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock;
+  typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper;
 
   {
     // Test shape 'UniformAllDims' with uniform 'max_coeff count'.
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 5 * 5 * 5 * 5 * 5;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     for (int i = 0; i < 5; ++i) {
       VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
     }
@@ -669,9 +667,9 @@ static void test_uniform_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 7 * 5 * 5 * 5 * 5;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
     for (int i = 1; i < 5; ++i) {
       VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@@ -680,9 +678,9 @@ static void test_uniform_block_shape()
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 5 * 5 * 5 * 5 * 6;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
     for (int i = 3; i >= 0; --i) {
       VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@@ -695,9 +693,9 @@ static void test_uniform_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 11 * 5 * 5 * 5 * 5;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
     for (int i = 1; i < 5; ++i) {
       VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@@ -706,9 +704,9 @@ static void test_uniform_block_shape()
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 5 * 5 * 5 * 5 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
     for (int i = 3; i >= 0; --i) {
       VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@@ -721,9 +719,9 @@ static void test_uniform_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(7, 5, 6, 17, 7);
     const size_t max_coeff_count = 7 * 5 * 6 * 7 * 5;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
     VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
     VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@@ -733,9 +731,9 @@ static void test_uniform_block_shape()
   } else {
     DSizes<Index, 5> dims(7, 5, 6, 9, 7);
     const size_t max_coeff_count = 5 * 5 * 5 * 6 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
     VERIFY_IS_EQUAL(6, block.block_sizes()[3]);
     VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
@@ -748,9 +746,9 @@ static void test_uniform_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(7, 5, 6, 17, 7);
     const size_t max_coeff_count = 7 * 5 * 6 * 17 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
     VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
     VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@@ -760,9 +758,9 @@ static void test_uniform_block_shape()
   } else {
     DSizes<Index, 5> dims(7, 5, 6, 9, 7);
     const size_t max_coeff_count = 7 * 5 * 6 * 9 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims,
+    TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
     VERIFY_IS_EQUAL(9, block.block_sizes()[3]);
     VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@@ -783,9 +781,9 @@ static void test_skewed_inner_dim_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 10 * 1 * 1 * 1 * 1;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(10, block.block_sizes()[0]);
     for (int i = 1; i < 5; ++i) {
       VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@@ -794,9 +792,9 @@ static void test_skewed_inner_dim_block_shape()
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 1 * 1 * 1 * 1 * 6;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
     for (int i = 3; i >= 0; --i) {
       VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@@ -808,9 +806,9 @@ static void test_skewed_inner_dim_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 11 * 1 * 1 * 1 * 1;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
     for (int i = 1; i < 5; ++i) {
       VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@@ -819,9 +817,9 @@ static void test_skewed_inner_dim_block_shape()
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 1 * 1 * 1 * 1 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
     for (int i = 3; i >= 0; --i) {
       VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@@ -834,9 +832,9 @@ static void test_skewed_inner_dim_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 11 * 3 * 1 * 1 * 1;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
     VERIFY_IS_EQUAL(3, block.block_sizes()[1]);
     for (int i = 2; i < 5; ++i) {
@@ -846,9 +844,9 @@ static void test_skewed_inner_dim_block_shape()
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 1 * 1 * 1 * 15 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
     VERIFY_IS_EQUAL(15, block.block_sizes()[3]);
     for (int i = 2; i >= 0; --i) {
@@ -862,9 +860,9 @@ static void test_skewed_inner_dim_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 11 * 5 * 5 * 1 * 1;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
     VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
     VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
@@ -875,9 +873,9 @@ static void test_skewed_inner_dim_block_shape()
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 1 * 1 * 5 * 17 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
     VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
     VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
@@ -891,9 +889,9 @@ static void test_skewed_inner_dim_block_shape()
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
     VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
     VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@@ -903,9 +901,9 @@ static void test_skewed_inner_dim_block_shape()
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
     const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7;
-    TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims,
+    TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
-    TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr);
+    TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
     VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
     VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
     VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@@ -918,15 +916,13 @@ static void test_skewed_inner_dim_block_shape()
 template <int Layout>
 static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
 {
-  using T = int;
-
   // Test blocking of tensors with zero dimensions:
   //  - we must not crash on asserts and divisions by zero
   //  - we must not return block with zero dimensions
   //    (recipe for overflows/underflows, divisions by zero and NaNs later)
   //  - total block count must be zero
   {
-    typedef internal::TensorBlockMapper<T, Index, 1, Layout> TensorBlockMapper;
+    typedef internal::TensorBlockMapper<int, Index, 1, Layout> TensorBlockMapper;
     DSizes<Index, 1> dims(0);
     for (int max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
       TensorBlockMapper block_mapper(dims, block_shape, max_coeff_count);
@@ -936,7 +932,7 @@ static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
   }
 
   {
-    typedef internal::TensorBlockMapper<T, Index, 2, Layout> TensorBlockMapper;
+    typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper;
     for (int dim1 = 0; dim1 < 3; ++dim1) {
       for (int dim2 = 0; dim2 < 3; ++dim2) {
         DSizes<Index, 2> dims(dim1, dim2);
@@ -987,9 +983,9 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_access) {
   TEST_LAYOUTS(test_block_cwise_binary_io_zero_strides);
   TEST_LAYOUTS(test_uniform_block_shape);
   TEST_LAYOUTS(test_skewed_inner_dim_block_shape);
-  TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kUniformAllDims);
-  TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kSkewedInnerDims);
+  TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kUniformAllDims);
+  TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kSkewedInnerDims);
 }
 
 #undef TEST_LAYOUTS
-#undef TEST_LAYOUTS_WITH_ARG
\ No newline at end of file
+#undef TEST_LAYOUTS_WITH_ARG
-- 
cgit v1.2.3


From fbb834144df6190a93757098d097f230b167edc5 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Wed, 15 Aug 2018 08:52:58 -0700
Subject: Fixed more compilation errors

---
 unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 0cefe42dd..676645b0c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -132,7 +132,7 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
     if (needs_assign) {
       // Size tensor blocks to fit in cache (or requested target block size).
       Index block_total_size = numext::mini(cache_size, total_size);
-      TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims;
+      TensorBlockShapeType block_shape = kSkewedInnerDims;
       // Query expression tree for desired block size/shape.
       std::vector<TensorOpResourceRequirements> resources;
       evaluator.getResourceRequirements(&resources);
@@ -272,7 +272,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
 
     const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
     if (needs_assign) {
-      TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims;
+      TensorBlockShapeType block_shape = kSkewedInnerDims;
       Index block_total_size = 0;
       // Query expression tree for desired block size/shape.
       std::vector<internal::TensorOpResourceRequirements> resources;
-- 
cgit v1.2.3


From b6f96cf7dd616ed1604919892f68f5b94d31fa5e Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Wed, 15 Aug 2018 08:54:31 -0700
Subject: Removed dependencies on cxx11 language features from the
 tensor_block_access test

---
 unsupported/test/cxx11_tensor_block_access.cpp | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/unsupported/test/cxx11_tensor_block_access.cpp b/unsupported/test/cxx11_tensor_block_access.cpp
index 2631767e2..417b72201 100644
--- a/unsupported/test/cxx11_tensor_block_access.cpp
+++ b/unsupported/test/cxx11_tensor_block_access.cpp
@@ -157,9 +157,8 @@ static void UpdateCoeffSet(
 
 template <typename T, int NumDims, int Layout>
 static void test_block_mapper_maps_every_element() {
-  using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>;
-  using TensorBlockMapper =
-      internal::TensorBlockMapper<T, Index, NumDims, Layout>;
+  typedef internal::TensorBlock<T, Index, NumDims, Layout> TensorBlock;
+  typedef internal::TensorBlockMapper<T, Index, NumDims, Layout> TensorBlockMapper;
 
   DSizes<Index, NumDims> dims = RandomDims<NumDims>();
 
@@ -186,9 +185,8 @@ static void test_block_mapper_maps_every_element() {
 
 template <typename T, int NumDims, int Layout>
 static void test_slice_block_mapper_maps_every_element() {
-  using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>;
-  using TensorSliceBlockMapper =
-      internal::TensorSliceBlockMapper<T, Index, NumDims, Layout>;
+  typedef internal::TensorBlock<T, Index, NumDims, Layout> TensorBlock;
+  typedef internal::TensorSliceBlockMapper<T, Index, NumDims, Layout> TensorSliceBlockMapper;
 
   DSizes<Index, NumDims> tensor_dims = RandomDims<NumDims>();
   DSizes<Index, NumDims> tensor_slice_offsets = RandomDims<NumDims>();
@@ -773,9 +771,8 @@ static void test_uniform_block_shape()
 template <int Layout>
 static void test_skewed_inner_dim_block_shape()
 {
-  using T = int;
-  typedef internal::TensorBlock<T, Index, 5, Layout> TensorBlock;
-  typedef internal::TensorBlockMapper<T, Index, 5, Layout> TensorBlockMapper;
+  typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock;
+  typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper;
 
   // Test shape 'SkewedInnerDims' with partial allocation to inner-most dim.
   if (Layout == ColMajor) {
-- 
cgit v1.2.3


From 4181556907fd29d6328fb718fa42cf9ce4734133 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Wed, 15 Aug 2018 09:34:47 -0700
Subject: Fixed the tensor contraction code.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 10 ++--------
 unsupported/test/cxx11_tensor_contraction.cpp          |  2 +-
 unsupported/test/cxx11_tensor_thread_pool.cpp          |  2 +-
 3 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index a023718c6..5d619efd8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -152,13 +152,7 @@ struct TensorContractionParams {
 //   1. Elementwise Relu transformation following Conv2D.
 //   2. AddBias to the Conv2D output channels dimension.
 //
-// See expected implementation in NoOpOutputKernel.
-struct OutputKernel {
-  template <typename Index, typename Scalar>
-  typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
-};
-
-// Output kernel that does absolutely nothing.
+// The NoOpOutputKernel implements an output kernel that does absolutely nothing.
 struct NoOpOutputKernel {
   /**
    * Tensor contraction evaluator calls this kernel after finishing each block
@@ -177,7 +171,7 @@ struct NoOpOutputKernel {
    */
   template <typename Index, typename Scalar>
   EIGEN_ALWAYS_INLINE void operator()(
-      const OutputKernel::OutputMapper<Index, Scalar>& /*output_mapper*/,
+      const internal::blas_data_mapper<Scalar, Index, ColMajor>& /*output_mapper*/,
       const TensorContractionParams& /*params*/, Index /*i*/,
       Index /*j*/, Index /*num_rows*/, Index /*num_cols*/) const {}
 };
diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp
index 2e918eb30..928d20f6e 100644
--- a/unsupported/test/cxx11_tensor_contraction.cpp
+++ b/unsupported/test/cxx11_tensor_contraction.cpp
@@ -514,7 +514,7 @@ static void test_const_inputs()
 struct SqrtOutputKernel {
   template <typename Index, typename Scalar>
   EIGEN_ALWAYS_INLINE void operator()(
-      const OutputKernel::OutputMapper<Index, Scalar>& output_mapper,
+      const internal::blas_data_mapper<Scalar, Index, ColMajor>& output_mapper,
       const TensorContractionParams&, Index, Index, Index num_rows,
       Index num_cols) const {
     for (int i = 0; i < num_rows; ++i) {
diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp
index dd163c18a..7606b0abf 100644
--- a/unsupported/test/cxx11_tensor_thread_pool.cpp
+++ b/unsupported/test/cxx11_tensor_thread_pool.cpp
@@ -255,7 +255,7 @@ void test_multithread_contraction_agrees_with_singlethread() {
 struct SqrtOutputKernel {
   template <typename Index, typename Scalar>
   EIGEN_ALWAYS_INLINE void operator()(
-      const OutputKernel::OutputMapper<Index, Scalar>& output_mapper,
+      const internal::blas_data_mapper<Scalar, Index, ColMajor>& output_mapper,
       const TensorContractionParams&, Index, Index, Index num_rows,
       Index num_cols) const {
     for (int i = 0; i < num_rows; ++i) {
-- 
cgit v1.2.3


From f197c3f55b3a04ab24dfee8057b1d510c7483fc3 Mon Sep 17 00:00:00 2001
From: Sameer Agarwal <sameeragarwal@google.com>
Date: Wed, 15 Aug 2018 11:24:57 -0700
Subject: Removed an used variable (PacketSize) from TensorExecutor

---
 unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 676645b0c..9b9587de5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -229,10 +229,6 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> {
     Evaluator evaluator(expr, device);
     const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
     if (needs_assign) {
-      const StorageIndex PacketSize =
-          Vectorizable
-              ? unpacket_traits<typename Evaluator::PacketReturnType>::size
-              : 1;
       const StorageIndex size = array_prod(evaluator.dimensions());
       device.parallelFor(size, evaluator.costPerCoeff(Vectorizable),
                          EvalRange::alignBlockSize,
-- 
cgit v1.2.3


From d0b01ebbf63f0594760b6e1568bec0228987157a Mon Sep 17 00:00:00 2001
From: Mehdi Goli <mehdi.goli@codeplay.com>
Date: Thu, 16 Aug 2018 13:21:36 +0100
Subject: Reverting the unitended delete from the code.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
index ab5990c14..dde1b449e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
@@ -30,6 +30,7 @@ struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
   typedef typename remove_reference<Nested>::type _Nested;
   static const int NumDimensions = traits<XprType>::NumDimensions;
   static const int Layout = traits<XprType>::Layout;
+  typedef typename traits<XprType>::PointerType PointerType;
 
 };
 
@@ -185,6 +186,8 @@ struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
   typedef typename remove_reference<RhsNested>::type _RhsNested;
   static const int NumDimensions = traits<LhsXprType>::NumDimensions;
   static const int Layout = traits<LhsXprType>::Layout;
+  typedef typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
+                                typename traits<LhsXprType>::PointerType, typename traits<RhsXprType>::PointerType>::type PointerType;
 
 };
 
-- 
cgit v1.2.3


From 80f1a76dec9a5fbe4305633ab0c8797a876e4ab5 Mon Sep 17 00:00:00 2001
From: Mehdi Goli <mehdi.goli@codeplay.com>
Date: Thu, 16 Aug 2018 13:33:24 +0100
Subject: removing the noises.

---
 unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
index dde1b449e..cbec5e9b4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
@@ -31,7 +31,6 @@ struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
   static const int NumDimensions = traits<XprType>::NumDimensions;
   static const int Layout = traits<XprType>::Layout;
   typedef typename traits<XprType>::PointerType PointerType;
-
 };
 
 template<typename CustomUnaryFunc, typename XprType>
@@ -188,7 +187,6 @@ struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
   static const int Layout = traits<LhsXprType>::Layout;
   typedef typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
                                 typename traits<LhsXprType>::PointerType, typename traits<RhsXprType>::PointerType>::type PointerType;
-
 };
 
 template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
-- 
cgit v1.2.3


From e23c8c294e57d6600086ede5480c2e6c89db56ec Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Thu, 16 Aug 2018 10:41:01 -0700
Subject: Use actual types instead of the auto keyword to make the code more
 portable

---
 unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index 5d619efd8..2ca91709f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -660,7 +660,7 @@ struct TensorContractionEvaluatorBase
 
           // call gebp (matrix kernel)
           // The parameters here are copied from Eigen's GEMM implementation
-          const auto output_mapper = output.getSubMapper(i2, j2);
+          const OutputMapper output_mapper = output.getSubMapper(i2, j2);
           gebp(output_mapper, blockA, blockB, actual_mc, actual_kc, actual_nc,
                Scalar(1), -1, -1, 0, 0);
 
-- 
cgit v1.2.3


From ede580ccdac3b964bdfcf12d55560a268c366c3c Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Thu, 16 Aug 2018 10:49:47 -0700
Subject: Avoid using the auto keyword to make the tensor block access test
 more portable

---
 unsupported/test/cxx11_tensor_block_access.cpp | 56 +++++++++++++-------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/unsupported/test/cxx11_tensor_block_access.cpp b/unsupported/test/cxx11_tensor_block_access.cpp
index 417b72201..da093166b 100644
--- a/unsupported/test/cxx11_tensor_block_access.cpp
+++ b/unsupported/test/cxx11_tensor_block_access.cpp
@@ -104,7 +104,7 @@ static void test_block_mapper_sanity()
   VERIFY_IS_EQUAL(uniform_block_mapper.block_dims_total_size(), 100);
 
   // 10x10 blocks
-  auto uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, NULL);
+  typename TensorBlockMapper::Block uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, NULL);
   VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(0), 10);
   VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(1), 10);
   // Depending on a layout we stride by cols rows.
@@ -122,7 +122,7 @@ static void test_block_mapper_sanity()
   VERIFY_IS_EQUAL(skewed_block_mapper.block_dims_total_size(), 100);
 
   // 1x100 (100x1) rows/cols depending on a tensor layout.
-  auto skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, NULL);
+  typename TensorBlockMapper::Block skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, NULL);
   VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(0), choose(Layout, 100, 1));
   VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(1), choose(Layout, 1, 100));
   // Depending on a layout we stride by cols rows.
@@ -203,7 +203,7 @@ static void test_slice_block_mapper_maps_every_element() {
   // Keep track of elements indices available via block access.
   std::set<Index> coeff_set;
 
-  auto total_coeffs = static_cast<int>(tensor_slice_extents.TotalSize());
+  int total_coeffs = static_cast<int>(tensor_slice_extents.TotalSize());
 
   // Pick a random dimension sizes for the tensor blocks.
   DSizes<Index, NumDims> block_sizes;
@@ -237,7 +237,7 @@ static void test_block_io_copy_data_from_source_to_target() {
       TensorBlockWriter;
 
   DSizes<Index, NumDims> input_tensor_dims = RandomDims<NumDims>();
-  const auto input_tensor_size = input_tensor_dims.TotalSize();
+  const Index input_tensor_size = input_tensor_dims.TotalSize();
 
   T* input_data = GenerateRandomData<T>(input_tensor_size);
   T* output_data = new T[input_tensor_size];
@@ -316,7 +316,7 @@ static void test_block_io_copy_using_reordered_dimensions() {
       TensorBlockWriter;
 
   DSizes<Index, NumDims> input_tensor_dims = RandomDims<NumDims>();
-  const auto input_tensor_size = input_tensor_dims.TotalSize();
+  const Index input_tensor_size = input_tensor_dims.TotalSize();
 
   // Create a random input tensor.
   T* input_data = GenerateRandomData<T>(input_tensor_size);
@@ -339,8 +339,8 @@ static void test_block_io_copy_using_reordered_dimensions() {
   TensorBlockMapper block_mapper(output_tensor_dims, RandomShape(),
                                  RandomTargetSize(input_tensor_dims));
 
-  auto* block_data = new T[block_mapper.block_dims_total_size()];
-  auto* output_data = new T[input_tensor_size];
+  T* block_data = new T[block_mapper.block_dims_total_size()];
+  T* output_data = new T[input_tensor_size];
 
   array<Index, NumDims> input_tensor_strides =
       ComputeStrides<Layout, NumDims>(input_tensor_dims);
@@ -382,8 +382,8 @@ static void test_block_io_zero_stride()
   input_tensor_dims[0] = 1;
   input_tensor_dims[2] = 1;
   input_tensor_dims[4] = 1;
-  const auto input_tensor_size = input_tensor_dims.TotalSize();
-  auto* input_data = GenerateRandomData<float>(input_tensor_size);
+  const Index input_tensor_size = input_tensor_dims.TotalSize();
+  float* input_data = GenerateRandomData<float>(input_tensor_size);
 
   DSizes<Index, 5> output_tensor_dims = rnd_dims;
 
@@ -424,7 +424,7 @@ static void test_block_io_zero_stride()
   };
 
   {
-    auto* output_data = new float[output_tensor_dims.TotalSize()];
+    float* output_data = new float[output_tensor_dims.TotalSize()];
     TensorBlock read_block(0, output_tensor_dims, output_tensor_strides,
                            input_tensor_strides_with_zeros, output_data);
     TensorBlockReader::Run(&read_block, input_data);
@@ -433,7 +433,7 @@ static void test_block_io_zero_stride()
   }
 
   {
-    auto* output_data = new float[output_tensor_dims.TotalSize()];
+    float* output_data = new float[output_tensor_dims.TotalSize()];
     TensorBlock write_block(0, output_tensor_dims,
                             input_tensor_strides_with_zeros,
                             output_tensor_strides, input_data);
@@ -456,14 +456,14 @@ static void test_block_io_squeeze_ones() {
   // Total size > 1.
   {
     DSizes<Index, 5> block_sizes(1, 2, 1, 2, 1);
-    const auto total_size = block_sizes.TotalSize();
+    const Index total_size = block_sizes.TotalSize();
 
     // Create a random input tensor.
-    auto* input_data = GenerateRandomData<float>(total_size);
+    float* input_data = GenerateRandomData<float>(total_size);
     DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
 
     {
-      auto* output_data = new float[block_sizes.TotalSize()];
+      float* output_data = new float[block_sizes.TotalSize()];
       TensorBlock read_block(0, block_sizes, strides, strides, output_data);
       TensorBlockReader::Run(&read_block, input_data);
       for (int i = 0; i < total_size; ++i) {
@@ -473,7 +473,7 @@ static void test_block_io_squeeze_ones() {
     }
 
     {
-      auto* output_data = new float[block_sizes.TotalSize()];
+      float* output_data = new float[block_sizes.TotalSize()];
       TensorBlock write_block(0, block_sizes, strides, strides, input_data);
       TensorBlockWriter::Run(write_block, output_data);
       for (int i = 0; i < total_size; ++i) {
@@ -486,14 +486,14 @@ static void test_block_io_squeeze_ones() {
   // Total size == 1.
   {
     DSizes<Index, 5> block_sizes(1, 1, 1, 1, 1);
-    const auto total_size = block_sizes.TotalSize();
+    const Index total_size = block_sizes.TotalSize();
 
     // Create a random input tensor.
-    auto* input_data = GenerateRandomData<float>(total_size);
+    float* input_data = GenerateRandomData<float>(total_size);
     DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
 
     {
-      auto* output_data = new float[block_sizes.TotalSize()];
+      float* output_data = new float[block_sizes.TotalSize()];
       TensorBlock read_block(0, block_sizes, strides, strides, output_data);
       TensorBlockReader::Run(&read_block, input_data);
       for (int i = 0; i < total_size; ++i) {
@@ -503,7 +503,7 @@ static void test_block_io_squeeze_ones() {
     }
 
     {
-      auto* output_data = new float[block_sizes.TotalSize()];
+      float* output_data = new float[block_sizes.TotalSize()];
       TensorBlock write_block(0, block_sizes, strides, strides, input_data);
       TensorBlockWriter::Run(write_block, output_data);
       for (int i = 0; i < total_size; ++i) {
@@ -524,7 +524,7 @@ static void test_block_cwise_binary_io_basic() {
   DSizes<Index, NumDims> block_sizes = RandomDims<NumDims>();
   DSizes<Index, NumDims> strides(ComputeStrides<Layout, NumDims>(block_sizes));
 
-  const auto total_size = block_sizes.TotalSize();
+  const Index total_size = block_sizes.TotalSize();
 
   // Create a random input tensors.
   T* left_data = GenerateRandomData<T>(total_size);
@@ -553,13 +553,13 @@ static void test_block_cwise_binary_io_squeeze_ones() {
   DSizes<Index, 5> block_sizes(1, 2, 1, 3, 1);
   DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
 
-  const auto total_size = block_sizes.TotalSize();
+  const Index total_size = block_sizes.TotalSize();
 
   // Create a random input tensors.
-  auto* left_data = GenerateRandomData<float>(total_size);
-  auto* right_data = GenerateRandomData<float>(total_size);
+  float* left_data = GenerateRandomData<float>(total_size);
+  float* right_data = GenerateRandomData<float>(total_size);
 
-  auto* output_data = new float[total_size];
+  float* output_data = new float[total_size];
   BinaryFunctor functor;
   TensorBlockCwiseBinaryIO::Run(functor, block_sizes, strides, output_data,
                                 strides, left_data, strides, right_data);
@@ -600,14 +600,14 @@ static void test_block_cwise_binary_io_zero_strides() {
   right_strides[3] = 0;
 
   // Generate random data.
-  auto* left_data = GenerateRandomData<float>(left_sizes.TotalSize());
-  auto* right_data = GenerateRandomData<float>(right_sizes.TotalSize());
+  float* left_data = GenerateRandomData<float>(left_sizes.TotalSize());
+  float* right_data = GenerateRandomData<float>(right_sizes.TotalSize());
 
   DSizes<Index, 5> output_sizes = rnd_dims;
   DSizes<Index, 5> output_strides(ComputeStrides<Layout, 5>(output_sizes));
 
-  const auto output_total_size = output_sizes.TotalSize();
-  auto* output_data = new float[output_total_size];
+  const Index output_total_size = output_sizes.TotalSize();
+  float* output_data = new float[output_total_size];
 
   BinaryFunctor functor;
   TensorBlockCwiseBinaryIO::Run(functor, output_sizes, output_strides,
-- 
cgit v1.2.3


From f641cf1253bc2f7388b632a50b818b9d15b7588d Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Thu, 16 Aug 2018 11:24:37 -0700
Subject: Adding missing at method in Eigen::array

---
 unsupported/Eigen/CXX11/src/util/EmulateArray.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/unsupported/Eigen/CXX11/src/util/EmulateArray.h b/unsupported/Eigen/CXX11/src/util/EmulateArray.h
index d91662d96..32db51592 100644
--- a/unsupported/Eigen/CXX11/src/util/EmulateArray.h
+++ b/unsupported/Eigen/CXX11/src/util/EmulateArray.h
@@ -25,6 +25,11 @@ template <typename T, size_t n> class array {
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; }
 
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE T& at(size_t index) { eigen_assert(index < size()); return values[index]; }
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE const T& at(size_t index) const { eigen_assert(index < size()); return values[index]; }
+
   EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE T& front() { return values[0]; }
   EIGEN_DEVICE_FUNC
-- 
cgit v1.2.3


From dbdeceabdd115293a2f6a9c17079940cf5b096dd Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Fri, 17 Aug 2018 16:26:11 +0200
Subject: Silence double-promotion warning (when converting double to
 complex<long double>)

---
 unsupported/Eigen/FFT | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT
index 2c45b3999..d8cf3e642 100644
--- a/unsupported/Eigen/FFT
+++ b/unsupported/Eigen/FFT
@@ -289,6 +289,7 @@ class FFT
     void inv( MatrixBase<OutputDerived> & dst, const MatrixBase<ComplexDerived> & src, Index nfft=-1)
     {
       typedef typename ComplexDerived::Scalar src_type;
+      typedef typename ComplexDerived::RealScalar real_type;
       typedef typename OutputDerived::Scalar dst_type;
       const bool realfft= (NumTraits<dst_type>::IsComplex == 0);
       EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived)
@@ -329,9 +330,9 @@ class FFT
             tmp.head(nhead) = src.head(nhead);
             tmp.tail(ntail) = src.tail(ntail);
             if (resize_input<0) { //shrinking -- create the Nyquist bin as the average of the two bins that fold into it
-              tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*src_type(.5);
+              tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*real_type(.5);
             }else{ // expanding -- split the old Nyquist bin into two halves
-              tmp(nhead) = src(nhead) * src_type(.5);
+              tmp(nhead) = src(nhead) * real_type(.5);
               tmp(tmp.size()-nhead) = tmp(nhead);
             }
           }
-- 
cgit v1.2.3


From c9b25fbefa44f684f76e9a669be217c9d3e7734d Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Fri, 17 Aug 2018 16:28:28 +0200
Subject: Silence unused parameter warning

---
 unsupported/Eigen/src/BVH/KdBVH.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/unsupported/Eigen/src/BVH/KdBVH.h b/unsupported/Eigen/src/BVH/KdBVH.h
index 13f792cd0..2d5b76ad0 100644
--- a/unsupported/Eigen/src/BVH/KdBVH.h
+++ b/unsupported/Eigen/src/BVH/KdBVH.h
@@ -35,6 +35,7 @@ struct get_boxes_helper {
   {
     outBoxes.insert(outBoxes.end(), boxBegin, boxEnd);
     eigen_assert(outBoxes.size() == objects.size());
+    EIGEN_ONLY_USED_FOR_DEBUG(objects);
   }
 };
 
-- 
cgit v1.2.3


From 595cae9b09bf322a747f8ff5aade76448db58a17 Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Fri, 17 Aug 2018 16:30:32 +0200
Subject: Silence logical-op-parentheses warning

---
 Eigen/src/SuperLUSupport/SuperLUSupport.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h
index 4bb95eb8b..354e33de5 100644
--- a/Eigen/src/SuperLUSupport/SuperLUSupport.h
+++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h
@@ -297,8 +297,8 @@ SluMatrix asSluMatrix(MatrixType& mat)
 template<typename Scalar, int Flags, typename Index>
 MappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat)
 {
-  eigen_assert((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR
-         || (Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC);
+  eigen_assert(((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR)
+         || ((Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC));
 
   Index outerSize = (Flags&RowMajor)==RowMajor ? sluMat.ncol : sluMat.nrow;
 
-- 
cgit v1.2.3


From 4713465eefbdb725ee3bb2cc3330bf77f51f1c6b Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Fri, 17 Aug 2018 16:39:43 +0200
Subject: Silence double-promotion warning

---
 unsupported/Eigen/OpenGLSupport | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsupported/Eigen/OpenGLSupport b/unsupported/Eigen/OpenGLSupport
index 11d99567e..489fd8354 100644
--- a/unsupported/Eigen/OpenGLSupport
+++ b/unsupported/Eigen/OpenGLSupport
@@ -184,7 +184,7 @@ inline void glRotate(const Rotation2D<float>& rot)
 }
 inline void glRotate(const Rotation2D<double>& rot)
 {
-  glRotated(rot.angle()*180.0/EIGEN_PI, 0.0, 0.0, 1.0);
+  glRotated(rot.angle()*180.0/double(EIGEN_PI), 0.0, 0.0, 1.0);
 }
 
 template<typename Derived> void glRotate(const RotationBase<Derived,3>& rot)
-- 
cgit v1.2.3


From 41f1cc67b8e55469367416151d6a82e3632cfda8 Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Fri, 17 Aug 2018 16:42:53 +0200
Subject: Assertion depended on a not yet initialized value

---
 Eigen/src/Core/MapBase.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h
index 020f939ad..668922ffc 100644
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -43,6 +43,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
     enum {
       RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
       ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+      InnerStrideAtCompileTime = internal::traits<Derived>::InnerStrideAtCompileTime,
       SizeAtCompileTime = Base::SizeAtCompileTime
     };
 
@@ -187,8 +188,11 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
     void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
     {
 #if EIGEN_MAX_ALIGN_BYTES>0
+      // innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value:
+      const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime);
+      EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride);
       eigen_assert((   ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
-                    || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
+                    || (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
 #endif
     }
 
-- 
cgit v1.2.3


From f76c802973a5e70309eb1d713adf59a5f9711ad1 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Fri, 17 Aug 2018 17:16:12 +0200
Subject: Add missing empty line

---
 test/main.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/main.h b/test/main.h
index de8a4865f..5d550969e 100644
--- a/test/main.h
+++ b/test/main.h
@@ -845,4 +845,4 @@ int main(int argc, char *argv[])
 #ifdef _MSC_VER
   // 4503 - decorated name length exceeded, name was truncated
   #pragma warning( disable : 4503)
-#endif
\ No newline at end of file
+#endif
-- 
cgit v1.2.3


From 43d9dd9b2844828de3996b762b2f760749df711b Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Fri, 17 Aug 2018 08:49:32 -0700
Subject: Removed more dependencies on cxx11.

---
 unsupported/test/cxx11_tensor_block_access.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/unsupported/test/cxx11_tensor_block_access.cpp b/unsupported/test/cxx11_tensor_block_access.cpp
index da093166b..7bf338522 100644
--- a/unsupported/test/cxx11_tensor_block_access.cpp
+++ b/unsupported/test/cxx11_tensor_block_access.cpp
@@ -11,7 +11,6 @@
 #include "main.h"
 
 #include <algorithm>
-#include <random>
 #include <set>
 
 #include <Eigen/CXX11/Tensor>
@@ -49,8 +48,8 @@ static DSizes<Index, NumDims> RandomDims() {
 
 /** Dummy data type to test TensorBlock copy ops. */
 struct Data {
-  Data() : Data(0) {}
-  explicit Data(int v) { value = v; }
+  Data() : value(0) {}
+  explicit Data(int v) : value(v) { }
   int value;
 };
 
@@ -324,7 +323,7 @@ static void test_block_io_copy_using_reordered_dimensions() {
   // Create a random dimension re-ordering/shuffle.
   std::vector<Index> shuffle;
   for (int i = 0; i < NumDims; ++i) shuffle.push_back(i);
-  std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937());
+  std::random_shuffle(shuffle.begin(), shuffle.end());
 
   DSizes<Index, NumDims> output_tensor_dims;
   array<Index, NumDims> input_to_output_dim_map;
-- 
cgit v1.2.3


From ff8e0ecc2fa83ca4de14fc2f1049bd48907df3f6 Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Fri, 17 Aug 2018 15:15:52 -0700
Subject: Updated one more line of code to avoid making the test dependent on
 cxx11 features.

---
 unsupported/test/cxx11_tensor_block_access.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/unsupported/test/cxx11_tensor_block_access.cpp b/unsupported/test/cxx11_tensor_block_access.cpp
index 7bf338522..f572e496d 100644
--- a/unsupported/test/cxx11_tensor_block_access.cpp
+++ b/unsupported/test/cxx11_tensor_block_access.cpp
@@ -143,7 +143,8 @@ static void UpdateCoeffSet(
 
   for (int i = 0; i < block_sizes[dim_index]; ++i) {
     if (tensor_strides[dim_index] == 1) {
-      auto inserted = visited_coeffs->insert(first_coeff_index + i);
+      typedef std::pair<std::set<Index>::iterator, bool> ReturnType;
+      ReturnType inserted = visited_coeffs->insert(first_coeff_index + i);
       VERIFY_IS_EQUAL(inserted.second, true);
     } else {
       int next_dim_index = dim_index + choose(Layout, -1, 1);
-- 
cgit v1.2.3


From 39335cf51e7ea5edfe9113cb91034625a039ccbf Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Thu, 23 Aug 2018 19:37:56 +0200
Subject: Make MaxSizeVector leak-safe

---
 unsupported/Eigen/CXX11/src/util/MaxSizeVector.h | 44 ++++++++++++++++--------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h b/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
index 4bc3dd1ba..bc5b3632c 100644
--- a/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
+++ b/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
@@ -35,7 +35,6 @@ class MaxSizeVector {
   explicit MaxSizeVector(size_t n)
       : reserve_(n), size_(0),
         data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
-    for (size_t i = 0; i < n; ++i) { new (&data_[i]) T; }
   }
 
   // Construct a new MaxSizeVector, reserve and resize to n.
@@ -44,35 +43,55 @@ class MaxSizeVector {
   MaxSizeVector(size_t n, const T& init)
       : reserve_(n), size_(n),
         data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
-    for (size_t i = 0; i < n; ++i) { new (&data_[i]) T(init); }
+    size_t i = 0;
+    EIGEN_TRY
+    {
+      for(; i < size_; ++i) { new (&data_[i]) T(init); }
+    }
+    EIGEN_CATCH(...)
+    {
+      // Construction failed, destruct in reverse order:
+      for(; (i+1) > 0; --i) { data_[i-1].~T(); }
+      internal::aligned_free(data_);
+      EIGEN_THROW;
+    }
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   ~MaxSizeVector() {
-    for (size_t i = 0; i < size_; ++i) {
-      data_[i].~T();
+    for (size_t i = size_; i > 0; --i) {
+      data_[i-1].~T();
     }
     internal::aligned_free(data_);
   }
 
   void resize(size_t n) {
     eigen_assert(n <= reserve_);
-    for (size_t i = size_; i < n; ++i) {
-      new (&data_[i]) T;
+    for (; size_ < n; ++size_) {
+      new (&data_[size_]) T;
     }
-    for (size_t i = n; i < size_; ++i) {
-      data_[i].~T();
+    for (; size_ > n; --size_) {
+      data_[size_-1].~T();
     }
-    size_ = n;
+    eigen_assert(size_ == n);
   }
 
   // Append new elements (up to reserved size).
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   void push_back(const T& t) {
     eigen_assert(size_ < reserve_);
-    data_[size_++] = t;
+    new (&data_[size_++]) T(t);
   }
 
+  // For C++03 compatibility this only takes one argument
+  template<class X>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  void emplace_back(const X& x) {
+    eigen_assert(size_ < reserve_);
+    new (&data_[size_++]) T(x);
+  }
+
+
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   const T& operator[] (size_t i) const {
     eigen_assert(i < size_);
@@ -99,11 +118,8 @@ class MaxSizeVector {
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   void pop_back() {
-    // NOTE: This does not destroy the value at the end the way
-    // std::vector's version of pop_back() does.  That happens when
-    // the Vector is destroyed.
     eigen_assert(size_ > 0);
-    size_--;
+    data_[--size_].~T();
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-- 
cgit v1.2.3


From a709c8efb4927ebac338cb93865e8d0bdfcac85d Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Thu, 23 Aug 2018 19:41:59 +0200
Subject: Replace pointers by values or unique_ptr for better leak-safety

---
 .../CXX11/src/ThreadPool/NonBlockingThreadPool.h   | 40 ++++++++++------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
index ecd49f382..a93e22a76 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
@@ -58,11 +58,9 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
         coprimes_.push_back(i);
       }
     }
+    queues_.resize(num_threads_);
     for (int i = 0; i < num_threads_; i++) {
-      queues_.push_back(new Queue());
-    }
-    for (int i = 0; i < num_threads_; i++) {
-      threads_.push_back(env_.CreateThread([this, i]() { WorkerLoop(i); }));
+      threads_.emplace_back(env_.CreateThread([this, i]() { WorkerLoop(i); }));
     }
   }
 
@@ -78,13 +76,12 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
       // Since we were cancelled, there might be entries in the queues.
       // Empty them to prevent their destructor from asserting.
       for (size_t i = 0; i < queues_.size(); i++) {
-        queues_[i]->Flush();
+        queues_[i].Flush();
       }
     }
 
     // Join threads explicitly to avoid destruction order issues.
-    for (size_t i = 0; i < num_threads_; i++) delete threads_[i];
-    for (size_t i = 0; i < num_threads_; i++) delete queues_[i];
+    threads_.resize(0);
   }
 
   void Schedule(std::function<void()> fn) {
@@ -92,13 +89,13 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     PerThread* pt = GetPerThread();
     if (pt->pool == this) {
       // Worker thread of this pool, push onto the thread's queue.
-      Queue* q = queues_[pt->thread_id];
-      t = q->PushFront(std::move(t));
+      Queue& q = queues_[pt->thread_id];
+      t = q.PushFront(std::move(t));
     } else {
       // A free-standing thread (or worker of another pool), push onto a random
       // queue.
-      Queue* q = queues_[Rand(&pt->rand) % queues_.size()];
-      t = q->PushBack(std::move(t));
+      Queue& q = queues_[Rand(&pt->rand) % queues_.size()];
+      t = q.PushBack(std::move(t));
     }
     // Note: below we touch this after making w available to worker threads.
     // Strictly speaking, this can lead to a racy-use-after-free. Consider that
@@ -157,8 +154,8 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
   Environment env_;
   const int num_threads_;
   const bool allow_spinning_;
-  MaxSizeVector<Thread*> threads_;
-  MaxSizeVector<Queue*> queues_;
+  MaxSizeVector<std::unique_ptr<Thread> > threads_;
+  MaxSizeVector<Queue> queues_;
   MaxSizeVector<unsigned> coprimes_;
   MaxSizeVector<EventCount::Waiter> waiters_;
   std::atomic<unsigned> blocked_;
@@ -173,7 +170,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     pt->pool = this;
     pt->rand = std::hash<std::thread::id>()(std::this_thread::get_id());
     pt->thread_id = thread_id;
-    Queue* q = queues_[thread_id];
+    Queue& q = queues_[thread_id];
     EventCount::Waiter* waiter = &waiters_[thread_id];
     // TODO(dvyukov,rmlarsen): The time spent in Steal() is proportional
     // to num_threads_ and we assume that new work is scheduled at a
@@ -189,10 +186,10 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
       // counter-productive for the types of I/O workloads the single thread
       // pools tend to be used for.
       while (!cancelled_) {
-        Task t = q->PopFront();
+        Task t = q.PopFront();
         for (int i = 0; i < spin_count && !t.f; i++) {
           if (!cancelled_.load(std::memory_order_relaxed)) {
-            t = q->PopFront();
+            t = q.PopFront();
           }
         }
         if (!t.f) {
@@ -206,7 +203,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
       }
     } else {
       while (!cancelled_) {
-        Task t = q->PopFront();
+        Task t = q.PopFront();
         if (!t.f) {
           t = Steal();
           if (!t.f) {
@@ -243,7 +240,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     unsigned inc = coprimes_[r % coprimes_.size()];
     unsigned victim = r % size;
     for (unsigned i = 0; i < size; i++) {
-      Task t = queues_[victim]->PopBack();
+      Task t = queues_[victim].PopBack();
       if (t.f) {
         return t;
       }
@@ -270,7 +267,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
       if (cancelled_) {
         return false;
       } else {
-        *t = queues_[victim]->PopBack();
+        *t = queues_[victim].PopBack();
         return true;
       }
     }
@@ -278,7 +275,8 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     // If we are shutting down and all worker threads blocked without work,
     // that's we are done.
     blocked_++;
-    if (done_ && blocked_ == num_threads_) {
+    // TODO is blocked_ required to be unsigned?
+    if (done_ && blocked_ == static_cast<unsigned>(num_threads_)) {
       ec_.CancelWait(waiter);
       // Almost done, but need to re-check queues.
       // Consider that all queues are empty and all worker threads are preempted
@@ -311,7 +309,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     unsigned inc = coprimes_[r % coprimes_.size()];
     unsigned victim = r % size;
     for (unsigned i = 0; i < size; i++) {
-      if (!queues_[victim]->Empty()) {
+      if (!queues_[victim].Empty()) {
         return victim;
       }
       victim += inc;
-- 
cgit v1.2.3


From e51d9e473aa1f882d3b3106ec2427a44d2a76ceb Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Thu, 23 Aug 2018 11:42:05 -0700
Subject: Protect #undef max with #ifdef max.

---
 unsupported/Eigen/CXX11/ThreadPool | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/unsupported/Eigen/CXX11/ThreadPool b/unsupported/Eigen/CXX11/ThreadPool
index 12aa07c7f..64ea83b7e 100644
--- a/unsupported/Eigen/CXX11/ThreadPool
+++ b/unsupported/Eigen/CXX11/ThreadPool
@@ -49,7 +49,9 @@
 // which trigger a check in test/main.h causing compilation to fail.
 // We work around the check here by removing the check for max in
 // the case where we have to emulate thread_local.
+#ifdef max
 #undef max
+#endif
 #include <unordered_map>
 #endif
 
-- 
cgit v1.2.3


From 6e0464004a7bcd666d3b5962c3c999ff78f416f1 Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Thu, 23 Aug 2018 12:10:08 -0700
Subject: Store std::unique_ptr instead of raw pointers in per_thread_map_.

---
 .../Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h   | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
index a800e827f..1ac4de3b5 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
@@ -58,9 +58,6 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
       }
     }
     queues_.resize(num_threads_);
-    for (int i = 0; i < num_threads_; i++) {
-      queues_.push_back(new Queue());
-    }
 #ifndef EIGEN_THREAD_LOCAL
     init_barrier_.reset(new Barrier(num_threads_));
 #endif
@@ -93,9 +90,6 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     // Join threads explicitly to avoid destruction order issues.
     threads_.resize(0);
     queues_.resize(0);
-#ifndef EIGEN_THREAD_LOCAL
-    for (auto it : per_thread_map_) delete it.second;
-#endif
   }
 
   void Schedule(std::function<void()> fn) {
@@ -176,21 +170,19 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
 #ifndef EIGEN_THREAD_LOCAL
   std::unique_ptr<Barrier> init_barrier_;
   std::mutex mu;  // Protects per_thread_map_.
-  std::unordered_map<uint64_t, PerThread*> per_thread_map_;
+  std::unordered_map<uint64_t, std::unique_ptr<PerThread>> per_thread_map_;
 #endif
 
   // Main worker thread loop.
   void WorkerLoop(int thread_id) {
 #ifndef EIGEN_THREAD_LOCAL
-    PerThread* pt = new PerThread();
     mu.lock();
-    per_thread_map_[GlobalThreadIdHash()] = pt;
+    eigen_assert(per_thread_map_.emplace(GlobalThreadIdHash(), new PerThread()).second);
     mu.unlock();
     init_barrier_->Notify();
     init_barrier_->Wait();
-#else
-    PerThread* pt = GetPerThread();
 #endif
+    PerThread* pt = GetPerThread();
     pt->pool = this;
     pt->rand = GlobalThreadIdHash();
     pt->thread_id = thread_id;
@@ -355,7 +347,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     if (it == per_thread_map_.end()) {
       return &dummy;
     } else {
-      return it->second;
+      return it->second.get();
     }
 #else
     EIGEN_THREAD_LOCAL PerThread per_thread_;
-- 
cgit v1.2.3


From 6cedc5a9b38d6ddda69d532b28dff9ee5c2d1c04 Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Thu, 23 Aug 2018 12:11:58 -0700
Subject: rename mu.

---
 unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
index 1ac4de3b5..d710faa94 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
@@ -169,16 +169,16 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
   EventCount ec_;
 #ifndef EIGEN_THREAD_LOCAL
   std::unique_ptr<Barrier> init_barrier_;
-  std::mutex mu;  // Protects per_thread_map_.
+  std::mutex per_thread_map_mutex_;  // Protects per_thread_map_.
   std::unordered_map<uint64_t, std::unique_ptr<PerThread>> per_thread_map_;
 #endif
 
   // Main worker thread loop.
   void WorkerLoop(int thread_id) {
 #ifndef EIGEN_THREAD_LOCAL
-    mu.lock();
+    per_thread_map_mutex_.lock();
     eigen_assert(per_thread_map_.emplace(GlobalThreadIdHash(), new PerThread()).second);
-    mu.unlock();
+    per_thread_map_mutex_.unlock();
     init_barrier_->Notify();
     init_barrier_->Wait();
 #endif
-- 
cgit v1.2.3


From 668690978ff66151b2a495767c7daf33d06be4a5 Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Thu, 23 Aug 2018 12:54:33 -0700
Subject: Pad PerThread when we emulate thread_local to prevent false sharing.

---
 unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
index d710faa94..1cb63bcfa 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
@@ -153,6 +153,10 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
     ThreadPoolTempl* pool;  // Parent pool, or null for normal threads.
     uint64_t rand;          // Random generator state.
     int thread_id;          // Worker thread index in pool.
+#ifndef EIGEN_THREAD_LOCAL
+    // Prevent false sharing.
+    char pad_[128];
+#endif
   };
 
   Environment env_;
-- 
cgit v1.2.3


From e9f9d70611d0a66751c34b1430ed7649aff6e2bf Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Thu, 23 Aug 2018 12:59:46 -0700
Subject: Don't rely on __had_feature for g++. Don't use __thread. Only use
 thread_local for gcc 4.8 or newer.

---
 unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
index f33759ba9..89ed6e5e5 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
@@ -12,10 +12,10 @@
 
 #undef EIGEN_THREAD_LOCAL
 
-#if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_thread_local))
-  #define EIGEN_THREAD_LOCAL static thread_local
-#elif (EIGEN_COMP_GNUC && EIGEN_GNUC_AT_MOST(4, 7)) || EIGEN_COMP_CLANG
-  #define EIGEN_THREAD_LOCAL static __thread
+#if EIGEN_MAX_CPP_VER >= 11 &&                         \
+    ((EIGEN_COMP_GNUC && EIGEN_GNUC_AT_LEAST(4, 8)) || \
+     __has_feature(cxx_thread_local))
+#define EIGEN_THREAD_LOCAL static thread_local
 #endif
 
 // Disable TLS for Apple and Android builds with older toolchains.
-- 
cgit v1.2.3


From 8d9bc5cc022bee4a06201c7c5a1dec2b73697f5f Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Thu, 23 Aug 2018 13:06:39 -0700
Subject: Fix g++ compilation.

---
 unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
index 1cb63bcfa..60a0c9fb6 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
@@ -180,8 +180,9 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
   // Main worker thread loop.
   void WorkerLoop(int thread_id) {
 #ifndef EIGEN_THREAD_LOCAL
+    std::unique_ptr<PerThread> new_pt(new PerThread());
     per_thread_map_mutex_.lock();
-    eigen_assert(per_thread_map_.emplace(GlobalThreadIdHash(), new PerThread()).second);
+    eigen_assert(per_thread_map_.emplace(GlobalThreadIdHash(), std::move(new_pt)).second);
     per_thread_map_mutex_.unlock();
     init_barrier_->Notify();
     init_barrier_->Wait();
-- 
cgit v1.2.3


From ad4a08fb68ecd3afde93e4714687e96164db704b Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Fri, 24 Aug 2018 19:04:33 +0200
Subject: Use Intel cast intrinsics, since MSVC does not allow direct casting.
 Reported by David Winkler.

---
 Eigen/src/Core/arch/AVX512/PacketMath.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 4e2e916de..9e66575a9 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -634,13 +634,13 @@ template<> EIGEN_STRONG_INLINE Packet8d preverse(const Packet8d& a)
 template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a)
 {
   // _mm512_abs_ps intrinsic not found, so hack around it
-  return (__m512)_mm512_and_si512((__m512i)a, _mm512_set1_epi32(0x7fffffff));
+  return _mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(a), _mm512_set1_epi32(0x7fffffff)));
 }
 template <>
 EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
   // _mm512_abs_ps intrinsic not found, so hack around it
-  return (__m512d)_mm512_and_si512((__m512i)a,
-                                   _mm512_set1_epi64(0x7fffffffffffffff));
+  return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(a),
+                                   _mm512_set1_epi64(0x7fffffffffffffff)));
 }
 
 #ifdef EIGEN_VECTORIZE_AVX512DQ
-- 
cgit v1.2.3


From 744e2fe0dedb697a8802a3e633e37a4f844da372 Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Fri, 24 Aug 2018 10:24:54 -0700
Subject: Address comments about EIGEN_THREAD_LOCAL.

---
 unsupported/Eigen/CXX11/ThreadPool                   | 9 ++++-----
 unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h | 2 --
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/unsupported/Eigen/CXX11/ThreadPool b/unsupported/Eigen/CXX11/ThreadPool
index 64ea83b7e..1dcc4eb6c 100644
--- a/unsupported/Eigen/CXX11/ThreadPool
+++ b/unsupported/Eigen/CXX11/ThreadPool
@@ -44,6 +44,10 @@
 #include <thread>
 #include <functional>
 #include <memory>
+#include "src/util/CXX11Meta.h"
+#include "src/util/MaxSizeVector.h"
+
+#include "src/ThreadPool/ThreadLocal.h"
 #ifndef EIGEN_THREAD_LOCAL
 // There are non-parenthesized calls to "max" in the  <unordered_map> header,
 // which trigger a check in test/main.h causing compilation to fail.
@@ -54,11 +58,6 @@
 #endif
 #include <unordered_map>
 #endif
-
-#include "src/util/CXX11Meta.h"
-#include "src/util/MaxSizeVector.h"
-
-#include "src/ThreadPool/ThreadLocal.h"
 #include "src/ThreadPool/ThreadYield.h"
 #include "src/ThreadPool/ThreadCancel.h"
 #include "src/ThreadPool/EventCount.h"
diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
index 89ed6e5e5..a41731c34 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
@@ -10,8 +10,6 @@
 #ifndef EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H
 #define EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H
 
-#undef EIGEN_THREAD_LOCAL
-
 #if EIGEN_MAX_CPP_VER >= 11 &&                         \
     ((EIGEN_COMP_GNUC && EIGEN_GNUC_AT_LEAST(4, 8)) || \
      __has_feature(cxx_thread_local))
-- 
cgit v1.2.3


From f7675b826b2f0a33b09b97342e855a8ef059927e Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Fri, 24 Aug 2018 22:58:55 +0200
Subject: Fix several integer conversion and sign-compare warnings

---
 unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h   | 27 ++++----
 .../Eigen/CXX11/src/Tensor/TensorDimensions.h      | 72 +++++++++++-----------
 .../Eigen/CXX11/src/Tensor/TensorExecutor.h        |  4 +-
 .../Eigen/CXX11/src/Tensor/TensorIndexList.h       |  2 +-
 unsupported/test/cxx11_tensor_block_access.cpp     | 54 ++++++++--------
 unsupported/test/cxx11_tensor_thread_pool.cpp      |  2 +-
 6 files changed, 80 insertions(+), 81 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index 24a6343e8..aa500eb70 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -212,11 +212,11 @@ class TensorBlockIO {
         num_size_one_inner_dims, NumDims - num_size_one_inner_dims - 1);
     const StorageIndex block_dim_for_tensor_stride1_dim =
         NumDims == 0 ? 1 : tensor_to_block_dim_map[tensor_stride1_dim];
-    size_t block_inner_dim_size =
+    StorageIndex block_inner_dim_size =
         NumDims == 0 ? 1
                      : block.block_sizes()[block_dim_for_tensor_stride1_dim];
-    for (int i = num_size_one_inner_dims + 1; i < NumDims; ++i) {
-      const int dim = cond<Layout>()(i, NumDims - i - 1);
+    for (Index i = num_size_one_inner_dims + 1; i < NumDims; ++i) {
+      const Index dim = cond<Layout>()(i, NumDims - i - 1);
       const StorageIndex block_stride =
           block.block_strides()[tensor_to_block_dim_map[dim]];
       if (block_inner_dim_size == block_stride &&
@@ -258,8 +258,8 @@ class TensorBlockIO {
 
     // Initialize block iterator state. Squeeze away any dimension of size 1.
     int num_squeezed_dims = 0;
-    for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) {
-      const int dim = cond<Layout>()(i + 1, NumDims - i - 2);
+    for (Index i = num_size_one_inner_dims; i < NumDims - 1; ++i) {
+      const Index dim = cond<Layout>()(i + 1, NumDims - i - 2);
       const StorageIndex size = block.block_sizes()[tensor_to_block_dim_map[dim]];
       if (size == 1) {
         continue;
@@ -626,7 +626,7 @@ class TensorBlockMapper {
                     const TensorBlockShapeType block_shape,
                     Index min_target_size)
       : m_dimensions(dims),
-        m_block_dim_sizes(BlockDimensions(dims, block_shape, min_target_size)) {
+        m_block_dim_sizes(BlockDimensions(dims, block_shape, internal::convert_index<StorageIndex>(min_target_size))) {
     // Calculate block counts by dimension and total block count.
     DSizes<StorageIndex, NumDims> block_count;
     for (Index i = 0; i < block_count.rank(); ++i) {
@@ -717,8 +717,8 @@ class TensorBlockMapper {
  private:
   static Dimensions BlockDimensions(const Dimensions& tensor_dims,
                                     const TensorBlockShapeType block_shape,
-                                    Index min_target_size) {
-    min_target_size = numext::maxi<Index>(1, min_target_size);
+                                    StorageIndex min_target_size) {
+    min_target_size = numext::maxi<StorageIndex>(1, min_target_size);
 
     // If tensor fully fits into the target size, we'll treat it a single block.
     Dimensions block_dim_sizes = tensor_dims;
@@ -735,16 +735,15 @@ class TensorBlockMapper {
       if (block_shape == kUniformAllDims) {
         // Tensor will not fit within 'min_target_size' budget: calculate tensor
         // block dimension sizes based on "square" dimension size target.
-        const size_t dim_size_target = static_cast<const size_t>(
+        const StorageIndex dim_size_target = internal::convert_index<StorageIndex>(
             std::pow(static_cast<float>(min_target_size),
                      1.0f / static_cast<float>(block_dim_sizes.rank())));
-        for (size_t i = 0; i < block_dim_sizes.rank(); ++i) {
+        for (Index i = 0; i < block_dim_sizes.rank(); ++i) {
           // TODO(andydavis) Adjust the inner most 'block_dim_size' to make it
           // a multiple of the packet size. Note that reducing
           // 'block_dim_size' in this manner can increase the number of
           // blocks, and so will amplify any per-block overhead.
-          block_dim_sizes[i] = numext::mini(
-              dim_size_target, static_cast<size_t>(tensor_dims[i]));
+          block_dim_sizes[i] = numext::mini(dim_size_target, tensor_dims[i]);
         }
         // Add any un-allocated coefficients to inner dimension(s).
         StorageIndex total_size = block_dim_sizes.TotalSize();
@@ -781,7 +780,7 @@ class TensorBlockMapper {
 
     eigen_assert(
         block_dim_sizes.TotalSize() >=
-        numext::mini<size_t>(min_target_size, tensor_dims.TotalSize()));
+        numext::mini<Index>(min_target_size, tensor_dims.TotalSize()));
 
     return block_dim_sizes;
   }
@@ -824,7 +823,7 @@ class TensorSliceBlockMapper {
         m_total_block_count(1) {
     // Calculate block counts by dimension and total block count.
     DSizes<StorageIndex, NumDims> block_count;
-    for (size_t i = 0; i < block_count.rank(); ++i) {
+    for (Index i = 0; i < block_count.rank(); ++i) {
       block_count[i] = divup(m_tensor_slice_extents[i], m_block_dim_sizes[i]);
     }
     m_total_block_count = array_prod(block_count);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
index 4f973a5b7..ce91bc2a6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
@@ -32,12 +32,12 @@ namespace Eigen {
 // Boilerplate code
 namespace internal {
 
-template<std::size_t n, typename Dimension> struct dget {
+template<std::ptrdiff_t n, typename Dimension> struct dget {
   static const std::ptrdiff_t value = get<n, Dimension>::value;
 };
 
 
-template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
+template<typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor>
 struct fixed_size_tensor_index_linearization_helper
 {
   template <typename Dimensions> EIGEN_DEVICE_FUNC
@@ -50,7 +50,7 @@ struct fixed_size_tensor_index_linearization_helper
   }
 };
 
-template<typename Index, std::size_t NumIndices, bool RowMajor>
+template<typename Index, std::ptrdiff_t NumIndices, bool RowMajor>
 struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor>
 {
   template <typename Dimensions> EIGEN_DEVICE_FUNC
@@ -60,7 +60,7 @@ struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMaj
   }
 };
 
-template<typename Index, std::size_t n>
+template<typename Index, std::ptrdiff_t n>
 struct fixed_size_tensor_index_extraction_helper
 {
   template <typename Dimensions> EIGEN_DEVICE_FUNC
@@ -94,7 +94,7 @@ struct Sizes {
   typedef internal::numeric_list<std::ptrdiff_t, Indices...> Base;
   const Base t = Base();
   static const std::ptrdiff_t total_size = internal::arg_prod(Indices...);
-  static const size_t count = Base::count;
+  static const ptrdiff_t count = Base::count;
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t rank() const {
     return Base::count;
@@ -121,16 +121,16 @@ struct Sizes {
     return *this;
   }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::size_t index) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::ptrdiff_t index) const {
     return internal::fixed_size_tensor_index_extraction_helper<std::ptrdiff_t, Base::count>::run(index, t);
   }
 
   template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
+  ptrdiff_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
     return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, t);
   }
   template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
+  ptrdiff_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
     return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, t);
   }
 };
@@ -144,25 +144,25 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<Indi
 
 #else
 
-template <std::size_t n>
+template <std::ptrdiff_t n>
 struct non_zero_size {
-  typedef internal::type2val<std::size_t, n> type;
+  typedef internal::type2val<std::ptrdiff_t, n> type;
 };
 template <>
 struct non_zero_size<0> {
   typedef internal::null_type type;
 };
 
-template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0, std::size_t V5=0> struct Sizes {
+template <std::ptrdiff_t V1=0, std::ptrdiff_t V2=0, std::ptrdiff_t V3=0, std::ptrdiff_t V4=0, std::ptrdiff_t V5=0> struct Sizes {
   typedef typename internal::make_type_list<typename non_zero_size<V1>::type, typename non_zero_size<V2>::type, typename non_zero_size<V3>::type, typename non_zero_size<V4>::type, typename non_zero_size<V5>::type >::type Base;
-  static const size_t count = Base::count;
-  static const std::size_t total_size = internal::arg_prod<Base>::value;
+  static const std::ptrdiff_t count = Base::count;
+  static const std::ptrdiff_t total_size = internal::arg_prod<Base>::value;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t rank() const {
     return count;
   }
 
-  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t TotalSize() {
     return internal::arg_prod<Base>::value;
   }
 
@@ -178,7 +178,7 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0
 
 #if EIGEN_HAS_VARIADIC_TEMPLATES
   template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { }
-  explicit Sizes(std::initializer_list<std::size_t>) {
+  explicit Sizes(std::initializer_list<std::ptrdiff_t>) {
     // todo: add assertion
   }
 #else
@@ -213,18 +213,18 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0
   }
 
   template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
+  ptrdiff_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
     return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *reinterpret_cast<const Base*>(this));
   }
   template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
+  ptrdiff_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
     return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *reinterpret_cast<const Base*>(this));
   }
 };
 
 namespace internal {
-template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) {
+template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) {
   return Sizes<V1, V2, V3, V4, V5>::total_size;
 }
 }
@@ -233,7 +233,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2,
 
 // Boilerplate
 namespace internal {
-template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
+template<typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor>
 struct tensor_index_linearization_helper
 {
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -245,7 +245,7 @@ struct tensor_index_linearization_helper
   }
 };
 
-template<typename Index, std::size_t NumIndices, bool RowMajor>
+template<typename Index, std::ptrdiff_t NumIndices, bool RowMajor>
 struct tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor>
 {
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -264,7 +264,7 @@ struct DSizes : array<DenseIndex, NumDims> {
   typedef array<DenseIndex, NumDims> Base;
   static const int count = NumDims;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const {
     return NumDims;
   }
 
@@ -298,7 +298,7 @@ struct DSizes : array<DenseIndex, NumDims> {
     }
   }
 #else
-  template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
+  template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5>
   EIGEN_DEVICE_FUNC DSizes(const Sizes<V1, V2, V3, V4, V5>& a) {
     for (int i = 0 ; i < NumDims; ++i) {
       (*this)[i] = a[i];
@@ -359,7 +359,7 @@ struct DSizes : array<DenseIndex, NumDims> {
 
 // Boilerplate
 namespace internal {
-template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
+template<typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor>
 struct tensor_vsize_index_linearization_helper
 {
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -371,7 +371,7 @@ struct tensor_vsize_index_linearization_helper
   }
 };
 
-template<typename Index, std::size_t NumIndices, bool RowMajor>
+template<typename Index, std::ptrdiff_t NumIndices, bool RowMajor>
 struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor>
 {
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -386,10 +386,10 @@ struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor>
 namespace internal {
 
 template <typename DenseIndex, int NumDims> struct array_size<const DSizes<DenseIndex, NumDims> > {
-  static const size_t value = NumDims;
+  static const ptrdiff_t value = NumDims;
 };
 template <typename DenseIndex, int NumDims> struct array_size<DSizes<DenseIndex, NumDims> > {
-  static const size_t value = NumDims;
+  static const ptrdiff_t value = NumDims;
 };
 #ifndef EIGEN_EMULATE_CXX11_META_H
 template <typename std::ptrdiff_t... Indices> struct array_size<const Sizes<Indices...> > {
@@ -399,33 +399,33 @@ template <typename std::ptrdiff_t... Indices> struct array_size<Sizes<Indices...
 static const std::ptrdiff_t value = Sizes<Indices...>::count;
 };
 template <std::ptrdiff_t n, typename std::ptrdiff_t... Indices> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<Indices...>&) {
-  return get<n, internal::numeric_list<std::size_t, Indices...> >::value;
+  return get<n, internal::numeric_list<std::ptrdiff_t, Indices...> >::value;
 }
 template <std::ptrdiff_t n> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<>&) {
   eigen_assert(false && "should never be called");
   return -1;
 }
 #else
-template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<const Sizes<V1,V2,V3,V4,V5> > {
-  static const size_t value = Sizes<V1,V2,V3,V4,V5>::count;
+template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> struct array_size<const Sizes<V1,V2,V3,V4,V5> > {
+  static const ptrdiff_t value = Sizes<V1,V2,V3,V4,V5>::count;
 };
-template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > {
-  static const size_t value = Sizes<V1,V2,V3,V4,V5>::count;
+template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > {
+  static const ptrdiff_t value = Sizes<V1,V2,V3,V4,V5>::count;
 };
-template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes<V1,V2,V3,V4,V5>&) {
+template <std::ptrdiff_t n, std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<V1,V2,V3,V4,V5>&) {
   return get<n, typename Sizes<V1,V2,V3,V4,V5>::Base>::value;
 }
 
 #endif
 
 
-template <typename Dims1, typename Dims2, size_t n, size_t m>
+template <typename Dims1, typename Dims2, ptrdiff_t n, ptrdiff_t m>
 struct sizes_match_below_dim {
   static EIGEN_DEVICE_FUNC  EIGEN_STRONG_INLINE bool run(Dims1&, Dims2&) {
     return false;
   }
 };
-template <typename Dims1, typename Dims2, size_t n>
+template <typename Dims1, typename Dims2, ptrdiff_t n>
 struct sizes_match_below_dim<Dims1, Dims2, n, n> {
   static EIGEN_DEVICE_FUNC  EIGEN_STRONG_INLINE bool run(Dims1& dims1, Dims2& dims2) {
     return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) &
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 9b9587de5..b756be3b3 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -256,8 +256,8 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
     typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper;
 
     Evaluator evaluator(expr, device);
-    StorageIndex total_size = array_prod(evaluator.dimensions());
-    StorageIndex cache_size = device.firstLevelCacheSize() / sizeof(Scalar);
+    Index total_size = array_prod(evaluator.dimensions());
+    Index cache_size = device.firstLevelCacheSize() / sizeof(Scalar);
     if (total_size < cache_size) {
       // TODO(andydavis) Reduce block management overhead for small tensors.
       internal::TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
index 98ad661ca..3f7d26b18 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
@@ -84,7 +84,7 @@ template<DenseIndex n> struct NumTraits<type2index<n> >
 namespace internal {
 template <typename T>
 EIGEN_DEVICE_FUNC void update_value(T& val, DenseIndex new_val) {
-  val = new_val;
+  val = internal::convert_index<T>(new_val);
 }
 template <DenseIndex n>
 EIGEN_DEVICE_FUNC void update_value(type2index<n>& val, DenseIndex new_val) {
diff --git a/unsupported/test/cxx11_tensor_block_access.cpp b/unsupported/test/cxx11_tensor_block_access.cpp
index f572e496d..24a95ab12 100644
--- a/unsupported/test/cxx11_tensor_block_access.cpp
+++ b/unsupported/test/cxx11_tensor_block_access.cpp
@@ -33,8 +33,8 @@ static internal::TensorBlockShapeType RandomShape() {
 }
 
 template <int NumDims>
-static std::size_t RandomTargetSize(const DSizes<Index, NumDims>& dims) {
-  return internal::random<int>(1, dims.TotalSize());
+static Index RandomTargetSize(const DSizes<Index, NumDims>& dims) {
+  return internal::random<Index>(1, dims.TotalSize());
 }
 
 template <int NumDims>
@@ -178,7 +178,7 @@ static void test_block_mapper_maps_every_element() {
   // Verify that every coefficient in the original Tensor is accessible through
   // TensorBlock only once.
   Index total_coeffs = dims.TotalSize();
-  VERIFY_IS_EQUAL(coeff_set.size(), total_coeffs);
+  VERIFY_IS_EQUAL(Index(coeff_set.size()), total_coeffs);
   VERIFY_IS_EQUAL(*coeff_set.begin(), 0);
   VERIFY_IS_EQUAL(*coeff_set.rbegin(), total_coeffs - 1);
 }
@@ -208,7 +208,7 @@ static void test_slice_block_mapper_maps_every_element() {
   // Pick a random dimension sizes for the tensor blocks.
   DSizes<Index, NumDims> block_sizes;
   for (int i = 0; i < NumDims; ++i) {
-    block_sizes[i] = internal::random<int>(1, tensor_slice_extents[i]);
+    block_sizes[i] = internal::random<Index>(1, tensor_slice_extents[i]);
   }
 
   TensorSliceBlockMapper block_mapper(tensor_dims, tensor_slice_offsets,
@@ -222,7 +222,7 @@ static void test_slice_block_mapper_maps_every_element() {
                                        &coeff_set);
   }
 
-  VERIFY_IS_EQUAL(coeff_set.size(), total_coeffs);
+  VERIFY_IS_EQUAL(Index(coeff_set.size()), total_coeffs);
 }
 
 template <typename T, int NumDims, int Layout>
@@ -262,14 +262,14 @@ static void test_block_io_copy_data_from_source_to_target() {
 }
 
 template <int Layout, int NumDims>
-static int GetInputIndex(Index output_index,
+static Index GetInputIndex(Index output_index,
                          const array<Index, NumDims>& output_to_input_dim_map,
                          const array<Index, NumDims>& input_strides,
                          const array<Index, NumDims>& output_strides) {
   int input_index = 0;
   if (Layout == ColMajor) {
     for (int i = NumDims - 1; i > 0; --i) {
-      const int idx = output_index / output_strides[i];
+      const Index idx = output_index / output_strides[i];
       input_index += idx * input_strides[output_to_input_dim_map[i]];
       output_index -= idx * output_strides[i];
     }
@@ -277,7 +277,7 @@ static int GetInputIndex(Index output_index,
            output_index * input_strides[output_to_input_dim_map[0]];
   } else {
     for (int i = 0; i < NumDims - 1; ++i) {
-      const int idx = output_index / output_strides[i];
+      const Index idx = output_index / output_strides[i];
       input_index += idx * input_strides[output_to_input_dim_map[i]];
       output_index -= idx * output_strides[i];
     }
@@ -650,7 +650,7 @@ static void test_uniform_block_shape()
   {
     // Test shape 'UniformAllDims' with uniform 'max_coeff count'.
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
-    const size_t max_coeff_count = 5 * 5 * 5 * 5 * 5;
+    const Index max_coeff_count = 5 * 5 * 5 * 5 * 5;
     TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -664,7 +664,7 @@ static void test_uniform_block_shape()
   // partially into first inner-most dimension.
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
-    const size_t max_coeff_count = 7 * 5 * 5 * 5 * 5;
+    const Index max_coeff_count = 7 * 5 * 5 * 5 * 5;
     TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -675,7 +675,7 @@ static void test_uniform_block_shape()
     VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
-    const size_t max_coeff_count = 5 * 5 * 5 * 5 * 6;
+    const Index max_coeff_count = 5 * 5 * 5 * 5 * 6;
     TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -690,7 +690,7 @@ static void test_uniform_block_shape()
   // fully into first inner-most dimension.
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
-    const size_t max_coeff_count = 11 * 5 * 5 * 5 * 5;
+    const Index max_coeff_count = 11 * 5 * 5 * 5 * 5;
     TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -701,7 +701,7 @@ static void test_uniform_block_shape()
     VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
-    const size_t max_coeff_count = 5 * 5 * 5 * 5 * 7;
+    const Index max_coeff_count = 5 * 5 * 5 * 5 * 7;
     TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -716,7 +716,7 @@ static void test_uniform_block_shape()
   // fully into first few inner-most dimensions.
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(7, 5, 6, 17, 7);
-    const size_t max_coeff_count = 7 * 5 * 6 * 7 * 5;
+    const Index max_coeff_count = 7 * 5 * 6 * 7 * 5;
     TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -728,7 +728,7 @@ static void test_uniform_block_shape()
     VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(7, 5, 6, 9, 7);
-    const size_t max_coeff_count = 5 * 5 * 5 * 6 * 7;
+    const Index max_coeff_count = 5 * 5 * 5 * 6 * 7;
     TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -743,7 +743,7 @@ static void test_uniform_block_shape()
   // Test shape 'UniformAllDims' with full allocation to all dims.
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(7, 5, 6, 17, 7);
-    const size_t max_coeff_count = 7 * 5 * 6 * 17 * 7;
+    const Index max_coeff_count = 7 * 5 * 6 * 17 * 7;
     TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -755,7 +755,7 @@ static void test_uniform_block_shape()
     VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(7, 5, 6, 9, 7);
-    const size_t max_coeff_count = 7 * 5 * 6 * 9 * 7;
+    const Index max_coeff_count = 7 * 5 * 6 * 9 * 7;
     TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -777,7 +777,7 @@ static void test_skewed_inner_dim_block_shape()
   // Test shape 'SkewedInnerDims' with partial allocation to inner-most dim.
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
-    const size_t max_coeff_count = 10 * 1 * 1 * 1 * 1;
+    const Index max_coeff_count = 10 * 1 * 1 * 1 * 1;
     TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -788,7 +788,7 @@ static void test_skewed_inner_dim_block_shape()
     VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
-    const size_t max_coeff_count = 1 * 1 * 1 * 1 * 6;
+    const Index max_coeff_count = 1 * 1 * 1 * 1 * 6;
     TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -802,7 +802,7 @@ static void test_skewed_inner_dim_block_shape()
   // Test shape 'SkewedInnerDims' with full allocation to inner-most dim.
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
-    const size_t max_coeff_count = 11 * 1 * 1 * 1 * 1;
+    const Index max_coeff_count = 11 * 1 * 1 * 1 * 1;
     TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -813,7 +813,7 @@ static void test_skewed_inner_dim_block_shape()
     VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
-    const size_t max_coeff_count = 1 * 1 * 1 * 1 * 7;
+    const Index max_coeff_count = 1 * 1 * 1 * 1 * 7;
     TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -828,7 +828,7 @@ static void test_skewed_inner_dim_block_shape()
   // and partial allocation to second inner-dim.
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
-    const size_t max_coeff_count = 11 * 3 * 1 * 1 * 1;
+    const Index max_coeff_count = 11 * 3 * 1 * 1 * 1;
     TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -840,7 +840,7 @@ static void test_skewed_inner_dim_block_shape()
     VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
-    const size_t max_coeff_count = 1 * 1 * 1 * 15 * 7;
+    const Index max_coeff_count = 1 * 1 * 1 * 15 * 7;
     TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -856,7 +856,7 @@ static void test_skewed_inner_dim_block_shape()
   // and partial allocation to third inner-dim.
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
-    const size_t max_coeff_count = 11 * 5 * 5 * 1 * 1;
+    const Index max_coeff_count = 11 * 5 * 5 * 1 * 1;
     TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -869,7 +869,7 @@ static void test_skewed_inner_dim_block_shape()
     VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
-    const size_t max_coeff_count = 1 * 1 * 5 * 17 * 7;
+    const Index max_coeff_count = 1 * 1 * 5 * 17 * 7;
     TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -885,7 +885,7 @@ static void test_skewed_inner_dim_block_shape()
   // Test shape 'SkewedInnerDims' with full allocation to all dims.
   if (Layout == ColMajor) {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
-    const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7;
+    const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
     TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
@@ -897,7 +897,7 @@ static void test_skewed_inner_dim_block_shape()
     VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
   } else {
     DSizes<Index, 5> dims(11, 5, 6, 17, 7);
-    const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7;
+    const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
     TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
                                    max_coeff_count);
     TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp
index 7606b0abf..6d8e58214 100644
--- a/unsupported/test/cxx11_tensor_thread_pool.cpp
+++ b/unsupported/test/cxx11_tensor_thread_pool.cpp
@@ -300,7 +300,7 @@ static void test_multithread_contraction_with_output_kernel() {
 
   m_result = m_left * m_right;
 
-  for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) {
+  for (Index i = 0; i < t_result.dimensions().TotalSize(); i++) {
     VERIFY(&t_result.data()[i] != &m_result.data()[i]);
     VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
   }
-- 
cgit v1.2.3


From 8295f02b36bc62584b71550cd48a5c56b173ee4c Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Fri, 24 Aug 2018 23:22:20 +0200
Subject: Hide "maybe uninitialized" warning on gcc

---
 unsupported/test/cxx11_tensor_block_access.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsupported/test/cxx11_tensor_block_access.cpp b/unsupported/test/cxx11_tensor_block_access.cpp
index 24a95ab12..74ce2cc80 100644
--- a/unsupported/test/cxx11_tensor_block_access.cpp
+++ b/unsupported/test/cxx11_tensor_block_access.cpp
@@ -138,8 +138,8 @@ template <typename T, int Layout, int NumDims>
 static void UpdateCoeffSet(
     const internal::TensorBlock<T, Index, NumDims, Layout>& block,
     Index first_coeff_index, int dim_index, std::set<Index>* visited_coeffs) {
-  const DSizes<Index, NumDims> block_sizes = block.block_sizes();
-  const DSizes<Index, NumDims> tensor_strides = block.tensor_strides();
+  const DSizes<Index, NumDims>& block_sizes = block.block_sizes();
+  const DSizes<Index, NumDims>& tensor_strides = block.tensor_strides();
 
   for (int i = 0; i < block_sizes[dim_index]; ++i) {
     if (tensor_strides[dim_index] == 1) {
-- 
cgit v1.2.3


From 5aaedbecedb6702c5102e13a54d5908b8964174b Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Fri, 24 Aug 2018 23:54:12 +0200
Subject: Fixed more sign-compare and type-limits warnings

---
 unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 2 +-
 unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index 2ca91709f..4b24e5fc1 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -347,7 +347,7 @@ struct TensorContractionEvaluatorBase
     // dimensions and right non-contracting dimensions.
     m_lhs_inner_dim_contiguous = true;
     int dim_idx = 0;
-    unsigned int nocontract_idx = 0;
+    Index nocontract_idx = 0;
 
     for (int i = 0; i < LDims; i++) {
       // find if we are contracting on index i of left tensor
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index 9a6431f29..4dd2e7c86 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -336,7 +336,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
       : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
   {
-    for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
+    for (Index i = 0; i < internal::array_size<Dimensions>::value; ++i) {
       eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
     }
 
@@ -739,7 +739,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
   {
     // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
     DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped;
-    for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
+    for (Index i = 0; i < internal::array_size<Dimensions>::value; ++i) {
       eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
       if(m_strides[i]>0){
         startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
-- 
cgit v1.2.3


From 495f6c3c3a93767832f962bf5426b25125917f78 Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Fri, 24 Aug 2018 23:56:13 +0200
Subject: Fix missing-braces warnings

---
 unsupported/test/cxx11_tensor_concatenation.cpp | 2 +-
 unsupported/test/cxx11_tensor_contraction.cpp   | 2 +-
 unsupported/test/cxx11_tensor_convolution.cpp   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/unsupported/test/cxx11_tensor_concatenation.cpp b/unsupported/test/cxx11_tensor_concatenation.cpp
index f53515b4e..2e1abdfb1 100644
--- a/unsupported/test/cxx11_tensor_concatenation.cpp
+++ b/unsupported/test/cxx11_tensor_concatenation.cpp
@@ -56,7 +56,7 @@ static void test_static_dimension_failure()
   // either the code should change to 
   //  Tensor<int, 2>::Dimensions{{2, 3}}
   // or Tensor<int, 2>::Dimensions{Tensor<int, 2>::Dimensions{{2, 3}}}
-      .concatenate(right.reshape(Tensor<int, 2>::Dimensions{{2, 3}}), 0);
+      .concatenate(right.reshape(Tensor<int, 2>::Dimensions{{{2, 3}}}), 0);
 }
 
 template<int DataLayout>
diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp
index 928d20f6e..4e5922440 100644
--- a/unsupported/test/cxx11_tensor_contraction.cpp
+++ b/unsupported/test/cxx11_tensor_contraction.cpp
@@ -471,7 +471,7 @@ static void test_tensor_product()
   mat1.setRandom();
   mat2.setRandom();
 
-  Tensor<float, 4, DataLayout> result = mat1.contract(mat2, Eigen::array<DimPair, 0>{});
+  Tensor<float, 4, DataLayout> result = mat1.contract(mat2, Eigen::array<DimPair, 0>{{}});
 
   VERIFY_IS_EQUAL(result.dimension(0), 2);
   VERIFY_IS_EQUAL(result.dimension(1), 3);
diff --git a/unsupported/test/cxx11_tensor_convolution.cpp b/unsupported/test/cxx11_tensor_convolution.cpp
index 9fe980648..01bc77bc1 100644
--- a/unsupported/test/cxx11_tensor_convolution.cpp
+++ b/unsupported/test/cxx11_tensor_convolution.cpp
@@ -25,7 +25,7 @@ static void test_evals()
 
   Tensor<float, 2, DataLayout> result(2,3);
   result.setZero();
-  Eigen::array<Tensor<float, 2>::Index, 1> dims3{0};
+  Eigen::array<Tensor<float, 2>::Index, 1> dims3{{0}};
 
   typedef TensorEvaluator<decltype(input.convolve(kernel, dims3)), DefaultDevice> Evaluator;
   Evaluator eval(input.convolve(kernel, dims3), DefaultDevice());
-- 
cgit v1.2.3


From 209b4972ece60c9193b492745b5bfac2bfd462d5 Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Sat, 25 Aug 2018 00:02:46 +0200
Subject: Fix conversion warning

---
 unsupported/test/EulerAngles.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/unsupported/test/EulerAngles.cpp b/unsupported/test/EulerAngles.cpp
index 572fc08a3..67533e364 100644
--- a/unsupported/test/EulerAngles.cpp
+++ b/unsupported/test/EulerAngles.cpp
@@ -197,6 +197,7 @@ template<typename Scalar> void check_singular_cases(const Scalar& singularBeta)
 template<typename Scalar> void eulerangles_manual()
 {
   typedef Matrix<Scalar,3,1> Vector3;
+  typedef Matrix<Scalar,Dynamic,1> VectorX;
   const Vector3 Zero = Vector3::Zero();
   const Scalar PI = Scalar(EIGEN_PI);
   
@@ -213,13 +214,13 @@ template<typename Scalar> void eulerangles_manual()
   check_singular_cases(-PI);
   
   // non-singular cases
-  VectorXd alpha = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI);
-  VectorXd beta = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.49) * PI, Scalar(0.49) * PI);
-  VectorXd gamma = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI);
+  VectorX alpha = VectorX::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI);
+  VectorX beta =  VectorX::LinSpaced(Eigen::Sequential, 20, Scalar(-0.49) * PI, Scalar(0.49) * PI);
+  VectorX gamma = VectorX::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI);
   for (int i = 0; i < alpha.size(); ++i) {
     for (int j = 0; j < beta.size(); ++j) {
       for (int k = 0; k < gamma.size(); ++k) {
-        check_all_var(Vector3d(alpha(i), beta(j), gamma(k)));
+        check_all_var(Vector3(alpha(i), beta(j), gamma(k)));
       }
     }
   }
-- 
cgit v1.2.3


From f155e97adb45dac0fd0f5e457d0300f0a5e3bada Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Sat, 25 Aug 2018 00:10:46 +0200
Subject: Previous fix broke compilation for clang

---
 unsupported/test/cxx11_tensor_concatenation.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unsupported/test/cxx11_tensor_concatenation.cpp b/unsupported/test/cxx11_tensor_concatenation.cpp
index 2e1abdfb1..e223d9ffd 100644
--- a/unsupported/test/cxx11_tensor_concatenation.cpp
+++ b/unsupported/test/cxx11_tensor_concatenation.cpp
@@ -56,7 +56,7 @@ static void test_static_dimension_failure()
   // either the code should change to 
   //  Tensor<int, 2>::Dimensions{{2, 3}}
   // or Tensor<int, 2>::Dimensions{Tensor<int, 2>::Dimensions{{2, 3}}}
-      .concatenate(right.reshape(Tensor<int, 2>::Dimensions{{{2, 3}}}), 0);
+      .concatenate(right.reshape(Tensor<int, 2>::Dimensions(2, 3)), 0);
 }
 
 template<int DataLayout>
-- 
cgit v1.2.3


From 117bc5d50587d13c80fbe11b9ae7f86873cf5d2d Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Sat, 25 Aug 2018 09:06:08 +0200
Subject: Fix some shadow warnings

---
 unsupported/Eigen/src/Splines/Spline.h |  7 +------
 unsupported/test/matrix_function.cpp   | 12 ++++--------
 unsupported/test/openglsupport.cpp     |  4 ----
 unsupported/test/polynomialsolver.cpp  |  1 -
 4 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/unsupported/Eigen/src/Splines/Spline.h b/unsupported/Eigen/src/Splines/Spline.h
index 627f6e482..c1cf5b7e4 100644
--- a/unsupported/Eigen/src/Splines/Spline.h
+++ b/unsupported/Eigen/src/Splines/Spline.h
@@ -249,15 +249,13 @@ namespace Eigen
     DenseIndex degree,
     const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& knots)
   {
-    typedef typename Spline<_Scalar, _Dim, _Degree>::BasisVectorType BasisVectorType;
-
     const DenseIndex p = degree;
     const DenseIndex i = Spline::Span(u, degree, knots);
 
     const KnotVectorType& U = knots;
 
     BasisVectorType left(p+1); left(0) = Scalar(0);
-    BasisVectorType right(p+1); right(0) = Scalar(0);        
+    BasisVectorType right(p+1); right(0) = Scalar(0);
 
     VectorBlock<BasisVectorType,Degree>(left,1,p) = u - VectorBlock<const KnotVectorType,Degree>(U,i+1-p,p).reverse();
     VectorBlock<BasisVectorType,Degree>(right,1,p) = VectorBlock<const KnotVectorType,Degree>(U,i+1,p) - u;
@@ -380,9 +378,6 @@ namespace Eigen
     typedef Spline<_Scalar, _Dim, _Degree> SplineType;
     enum { Order = SplineTraits<SplineType>::OrderAtCompileTime };
 
-    typedef typename SplineTraits<SplineType>::Scalar Scalar;
-    typedef typename SplineTraits<SplineType>::BasisVectorType BasisVectorType;
-  
     const DenseIndex span = SplineType::Span(u, p, U);
 
     const DenseIndex n = (std::min)(p, order);
diff --git a/unsupported/test/matrix_function.cpp b/unsupported/test/matrix_function.cpp
index 93fb71430..2049b8ba0 100644
--- a/unsupported/test/matrix_function.cpp
+++ b/unsupported/test/matrix_function.cpp
@@ -23,9 +23,8 @@ inline bool test_isApprox_abs(const Type1& a, const Type2& b)
 
 // Returns a matrix with eigenvalues clustered around 0, 1 and 2.
 template<typename MatrixType>
-MatrixType randomMatrixWithRealEivals(const typename MatrixType::Index size)
+MatrixType randomMatrixWithRealEivals(const Index size)
 {
-  typedef typename MatrixType::Index Index;
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::RealScalar RealScalar;
   MatrixType diag = MatrixType::Zero(size, size);
@@ -42,16 +41,15 @@ template <typename MatrixType, int IsComplex = NumTraits<typename internal::trai
 struct randomMatrixWithImagEivals
 {
   // Returns a matrix with eigenvalues clustered around 0 and +/- i.
-  static MatrixType run(const typename MatrixType::Index size);
+  static MatrixType run(const Index size);
 };
 
 // Partial specialization for real matrices
 template<typename MatrixType>
 struct randomMatrixWithImagEivals<MatrixType, 0>
 {
-  static MatrixType run(const typename MatrixType::Index size)
+  static MatrixType run(const Index size)
   {
-    typedef typename MatrixType::Index Index;
     typedef typename MatrixType::Scalar Scalar;
     MatrixType diag = MatrixType::Zero(size, size);
     Index i = 0;
@@ -77,9 +75,8 @@ struct randomMatrixWithImagEivals<MatrixType, 0>
 template<typename MatrixType>
 struct randomMatrixWithImagEivals<MatrixType, 1>
 {
-  static MatrixType run(const typename MatrixType::Index size)
+  static MatrixType run(const Index size)
   {
-    typedef typename MatrixType::Index Index;
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::RealScalar RealScalar;
     const Scalar imagUnit(0, 1);
@@ -171,7 +168,6 @@ void testMatrixType(const MatrixType& m)
 {
   // Matrices with clustered eigenvalue lead to different code paths
   // in MatrixFunction.h and are thus useful for testing.
-  typedef typename MatrixType::Index Index;
 
   const Index size = m.rows();
   for (int i = 0; i < g_repeat; i++) {
diff --git a/unsupported/test/openglsupport.cpp b/unsupported/test/openglsupport.cpp
index 460830086..eadd7f985 100644
--- a/unsupported/test/openglsupport.cpp
+++ b/unsupported/test/openglsupport.cpp
@@ -318,10 +318,6 @@ EIGEN_DECLARE_TEST(openglsupport)
         
       GLint prg_id = createShader(vtx,frg);
       
-      typedef Vector2d Vector2d;
-      typedef Vector3d Vector3d;
-      typedef Vector4d Vector4d;
-      
       VERIFY_UNIFORM(dv,v2d, Vector2d);
       VERIFY_UNIFORM(dv,v3d, Vector3d);
       VERIFY_UNIFORM(dv,v4d, Vector4d);
diff --git a/unsupported/test/polynomialsolver.cpp b/unsupported/test/polynomialsolver.cpp
index 65efea0cb..50c74f797 100644
--- a/unsupported/test/polynomialsolver.cpp
+++ b/unsupported/test/polynomialsolver.cpp
@@ -30,7 +30,6 @@ struct increment_if_fixed_size
 template<int Deg, typename POLYNOMIAL, typename SOLVER>
 bool aux_evalSolver( const POLYNOMIAL& pols, SOLVER& psolve )
 {
-  typedef typename POLYNOMIAL::Index Index;
   typedef typename POLYNOMIAL::Scalar Scalar;
   typedef typename POLYNOMIAL::RealScalar RealScalar;
 
-- 
cgit v1.2.3


From 4b1ad086b59a8efdc9a4971265868b3cc7a3d290 Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Sat, 25 Aug 2018 10:07:17 +0200
Subject: Fix shadow warnings in doc-snippets

---
 doc/snippets/DirectionWise_hnormalized.cpp | 3 +--
 doc/snippets/VectorwiseOp_homogeneous.cpp  | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/doc/snippets/DirectionWise_hnormalized.cpp b/doc/snippets/DirectionWise_hnormalized.cpp
index 3410790a8..2451f6e7b 100644
--- a/doc/snippets/DirectionWise_hnormalized.cpp
+++ b/doc/snippets/DirectionWise_hnormalized.cpp
@@ -1,7 +1,6 @@
-typedef Matrix<double,4,Dynamic> Matrix4Xd;
 Matrix4Xd M = Matrix4Xd::Random(4,5);
 Projective3d P(Matrix4d::Random());
 cout << "The matrix M is:" << endl << M << endl << endl;
 cout << "M.colwise().hnormalized():" << endl << M.colwise().hnormalized() << endl << endl;
 cout << "P*M:" << endl << P*M << endl << endl;
-cout << "(P*M).colwise().hnormalized():" << endl << (P*M).colwise().hnormalized() << endl << endl;
\ No newline at end of file
+cout << "(P*M).colwise().hnormalized():" << endl << (P*M).colwise().hnormalized() << endl << endl;
diff --git a/doc/snippets/VectorwiseOp_homogeneous.cpp b/doc/snippets/VectorwiseOp_homogeneous.cpp
index aba4fed0e..67cf5737d 100644
--- a/doc/snippets/VectorwiseOp_homogeneous.cpp
+++ b/doc/snippets/VectorwiseOp_homogeneous.cpp
@@ -1,7 +1,6 @@
-typedef Matrix<double,3,Dynamic> Matrix3Xd;
 Matrix3Xd M = Matrix3Xd::Random(3,5);
 Projective3d P(Matrix4d::Random());
 cout << "The matrix M is:" << endl << M << endl << endl;
 cout << "M.colwise().homogeneous():" << endl << M.colwise().homogeneous() << endl << endl;
 cout << "P * M.colwise().homogeneous():" << endl << P * M.colwise().homogeneous() << endl << endl;
-cout << "P * M.colwise().homogeneous().hnormalized(): " << endl << (P * M.colwise().homogeneous()).colwise().hnormalized() << endl << endl;
\ No newline at end of file
+cout << "P * M.colwise().homogeneous().hnormalized(): " << endl << (P * M.colwise().homogeneous()).colwise().hnormalized() << endl << endl;
-- 
cgit v1.2.3


From 42123ff38bb061a25861d3ec0f98c059d07ec4c1 Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Sat, 25 Aug 2018 11:53:28 +0200
Subject: Make unit test C++03 compatible

---
 unsupported/test/cxx11_tensor_block_access.cpp | 59 +++++++++++++++-----------
 1 file changed, 35 insertions(+), 24 deletions(-)

diff --git a/unsupported/test/cxx11_tensor_block_access.cpp b/unsupported/test/cxx11_tensor_block_access.cpp
index 74ce2cc80..40e6227e5 100644
--- a/unsupported/test/cxx11_tensor_block_access.cpp
+++ b/unsupported/test/cxx11_tensor_block_access.cpp
@@ -367,6 +367,40 @@ static void test_block_io_copy_using_reordered_dimensions() {
   delete[] output_data;
 }
 
+template<typename Scalar, typename StorageIndex, int Dim>
+class EqualityChecker
+{
+    const Scalar* input_data;
+    const DSizes<StorageIndex, Dim> &input_dims, &input_strides, &output_dims, &output_strides;
+    void check_recursive(const Scalar* input, const Scalar* output, int depth=0) const
+    {
+        if(depth==Dim)
+        {
+            VERIFY_IS_EQUAL(*input, *output);
+            return;
+        }
+
+        for(int i=0; i<output_dims[depth]; ++i)
+        {
+            check_recursive(input + i % input_dims[depth] * input_strides[depth], output + i*output_strides[depth], depth+1);
+        }
+    }
+public:
+    EqualityChecker(const Scalar* input_data_,
+            const DSizes<StorageIndex, Dim> &input_dims_, const DSizes<StorageIndex, Dim> &input_strides_,
+            const DSizes<StorageIndex, Dim> &output_dims_, const DSizes<StorageIndex, Dim> &output_strides_)
+        : input_data(input_data_)
+        , input_dims(input_dims_), input_strides(input_strides_)
+        , output_dims(output_dims_), output_strides(output_strides_)
+        {}
+
+    void operator()(const Scalar* output_data) const
+    {
+        check_recursive(input_data, output_data);
+    }
+};
+
+
 template <int Layout>
 static void test_block_io_zero_stride()
 {
@@ -398,30 +432,7 @@ static void test_block_io_zero_stride()
   input_tensor_strides_with_zeros[4] = 0;
 
   // Verify that data was correctly read/written from/into the block.
-  const auto verify_is_equal = [&](const float* output_data) {
-    for (int i = 0; i < output_tensor_dims[0]; ++i) {
-      for (int j = 0; j < output_tensor_dims[1]; ++j) {
-        for (int k = 0; k < output_tensor_dims[2]; ++k) {
-          for (int l = 0; l < output_tensor_dims[3]; ++l) {
-            for (int m = 0; m < output_tensor_dims[4]; ++m) {
-              const Index output_offset =
-                  i * output_tensor_strides[0] + j * output_tensor_strides[1] +
-                  k * output_tensor_strides[2] + l * output_tensor_strides[3] +
-                  m * output_tensor_strides[4];
-              const Index input_offset =
-                  i % input_tensor_dims[0] * input_tensor_strides[0] +
-                  j % input_tensor_dims[1] * input_tensor_strides[1] +
-                  k % input_tensor_dims[2] * input_tensor_strides[2] +
-                  l % input_tensor_dims[3] * input_tensor_strides[3] +
-                  m % input_tensor_dims[4] * input_tensor_strides[4];
-              VERIFY_IS_EQUAL(output_data[output_offset],
-                              input_data[input_offset]);
-            }
-          }
-        }
-      }
-    }
-  };
+  const EqualityChecker<float, Index, 5> verify_is_equal(input_data, input_tensor_dims, input_tensor_strides, output_tensor_dims, output_tensor_strides);
 
   {
     float* output_data = new float[output_tensor_dims.TotalSize()];
-- 
cgit v1.2.3


From b1653d15996b844852e2cefdd4d63e55dbc771f5 Mon Sep 17 00:00:00 2001
From: Christoph Hertzberg <chtz@informatik.uni-bremen.de>
Date: Sat, 25 Aug 2018 12:21:00 +0200
Subject: Fix some trivial C++11 vs C++03 compatibility warnings

---
 unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h        | 2 +-
 unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 4 ++--
 unsupported/Eigen/CXX11/src/util/EmulateArray.h         | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index aa500eb70..4cb49e9b8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -62,7 +62,7 @@ struct cond<RowMajor> {
  */
 enum TensorBlockShapeType {
   kUniformAllDims,
-  kSkewedInnerDims,
+  kSkewedInnerDims
 };
 
 struct TensorOpResourceRequirements {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index b4a77b022..560e3ec22 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -105,7 +105,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
   typedef typename XprType::CoeffReturnType CoeffReturnType;
   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
-  bool isCopy= false, nByOne = false, oneByN = false;
+  bool isCopy, nByOne, oneByN;
 
   enum {
     IsAligned = true,
@@ -116,7 +116,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
   };
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-    : m_broadcast(op.broadcast()),m_impl(op.expression(), device)
+    : isCopy(false), nByOne(false), oneByN(false), m_broadcast(op.broadcast()),m_impl(op.expression(), device)
   {
     // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar
     // and store the result in a scalar. Instead one should reshape the scalar into a a N-D
diff --git a/unsupported/Eigen/CXX11/src/util/EmulateArray.h b/unsupported/Eigen/CXX11/src/util/EmulateArray.h
index 32db51592..d5c000e08 100644
--- a/unsupported/Eigen/CXX11/src/util/EmulateArray.h
+++ b/unsupported/Eigen/CXX11/src/util/EmulateArray.h
@@ -207,16 +207,16 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array<T,N>& a) {
 }
 
 template<class T, std::size_t N> struct array_size<array<T,N> > {
-  static const size_t value = N;
+  enum { value = N };
 };
 template<class T, std::size_t N> struct array_size<array<T,N>& > {
-  static const size_t value = N;
+  enum { value = N };
 };
 template<class T, std::size_t N> struct array_size<const array<T,N> > {
-  static const size_t value = N;
+  enum { value = N };
 };
 template<class T, std::size_t N> struct array_size<const array<T,N>& > {
-  static const size_t value = N;
+  enum { value = N };
 };
 
 }  // end namespace internal
-- 
cgit v1.2.3


From d5ed64512fc79dff800c90acd73e3e6a08a3d2c3 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Mon, 27 Aug 2018 10:38:20 +0200
Subject: bug #1573: workaround gcc 4.7 and 4.8 bug

---
 test/geo_quaternion.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/geo_quaternion.cpp b/test/geo_quaternion.cpp
index ed801c71b..27219db10 100644
--- a/test/geo_quaternion.cpp
+++ b/test/geo_quaternion.cpp
@@ -290,6 +290,8 @@ template<typename PlainObjectType> void check_const_correctness(const PlainObjec
 
 // Regression for bug 1573
 struct MovableClass {
+  // The following line is a workaround for gcc 4.7 and 4.8 (see bug 1573 comments).
+  static_assert(std::is_nothrow_move_constructible<Quaternionf>::value,"");
   MovableClass() = default;
   MovableClass(const MovableClass&) = default;
   MovableClass(MovableClass&&) noexcept = default;
-- 
cgit v1.2.3


From 57472886764ff71ad45338c6538649f7a8fa3d0e Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Mon, 27 Aug 2018 13:07:34 +0200
Subject: Disable a bonus unit-test which is broken with gcc 4.7

---
 test/meta.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/meta.cpp b/test/meta.cpp
index a6a67b85c..ea9607fe7 100644
--- a/test/meta.cpp
+++ b/test/meta.cpp
@@ -102,7 +102,13 @@ EIGEN_DECLARE_TEST(meta)
   }
 
   STATIC_CHECK(( !internal::is_convertible<MyInterface, MyImpl>::value ));
+  #if (!EIGEN_COMP_GNUC_STRICT) || (EIGEN_GNUC_AT_LEAST(4,8))
+  // GCC prior to 4.8 fails to compile this test:
+  // error: cannot allocate an object of abstract type 'MyInterface'
+  // In other word, it does not obey SFINAE.
+  // Nevertheless, we don't really care about supporting abstract type as scalar type!
   STATIC_CHECK(( !internal::is_convertible<MyImpl, MyInterface>::value ));
+  #endif
   STATIC_CHECK((  internal::is_convertible<MyImpl, const MyInterface&>::value ));
   {
     int i;
-- 
cgit v1.2.3