88 files changed, 730 insertions, 807 deletions
diff --git a/Eigen/src/CMakeLists.txt b/Eigen/src/CMakeLists.txt
deleted file mode 100644
index c326f374d..000000000
--- a/Eigen/src/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-file(GLOB Eigen_src_subdirectories "*")
-escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
-foreach(f ${Eigen_src_subdirectories})
-  if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" )
-    add_subdirectory(${f})
-  endif()
-endforeach()
diff --git a/Eigen/src/Cholesky/CMakeLists.txt b/Eigen/src/Cholesky/CMakeLists.txt
deleted file mode 100644
index d01488b41..000000000
--- a/Eigen/src/Cholesky/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Cholesky_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_Cholesky_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Cholesky COMPONENT Devel
-  )
diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index 69e176110..fcee7b2e3 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -253,7 +253,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
     ComputationInfo info() const
     {
       eigen_assert(m_isInitialized && "LDLT is not initialized.");
-      return Success;
+      return m_info;
     }
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -281,6 +281,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
     TmpMatrixType m_temporary;
     internal::SignMatrix m_sign;
     bool m_isInitialized;
+    ComputationInfo m_info;
 };
 
 namespace internal {
@@ -298,6 +299,8 @@ template<> struct ldlt_inplace<Lower>
     typedef typename TranspositionType::StorageIndex IndexType;
     eigen_assert(mat.rows()==mat.cols());
     const Index size = mat.rows();
+    bool found_zero_pivot = false;
+    bool ret = true;
 
     if (size <= 1)
     {
@@ -356,9 +359,27 @@ template<> struct ldlt_inplace<Lower>
       // we should only make sure that we do not introduce INF or NaN values.
       // Remark that LAPACK also uses 0 as the cutoff value.
       RealScalar realAkk = numext::real(mat.coeffRef(k,k));
-      if((rs>0) && (abs(realAkk) > RealScalar(0)))
+      bool pivot_is_valid = (abs(realAkk) > RealScalar(0));
+
+      if(k==0 && !pivot_is_valid)
+      {
+        // The entire diagonal is zero, there is nothing more to do
+        // except filling the transpositions, and checking whether the matrix is zero.
+        sign = ZeroSign;
+        for(Index j = 0; j<size; ++j)
+        {
+          transpositions.coeffRef(j) = IndexType(j);
+          ret = ret && (mat.col(j).tail(size-j-1).array()==Scalar(0)).all();
+        }
+        return ret;
+      }
+
+      if((rs>0) && pivot_is_valid)
         A21 /= realAkk;
 
+      if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed
+      else if(!pivot_is_valid) found_zero_pivot = true;
+
       if (sign == PositiveSemiDef) {
         if (realAkk < static_cast<RealScalar>(0)) sign = Indefinite;
       } else if (sign == NegativeSemiDef) {
@@ -369,7 +390,7 @@ template<> struct ldlt_inplace<Lower>
       }
     }
 
-    return true;
+    return ret;
   }
 
   // Reference for the algorithm: Davis and Hager, "Multiple Rank
@@ -493,7 +514,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const EigenBase<InputTyp
   m_temporary.resize(size);
   m_sign = internal::ZeroSign;
 
-  internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign);
+  m_info = internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign) ? Success : NumericalIssue;
 
   m_isInitialized = true;
   return *this;
@@ -621,7 +642,6 @@ MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
   return res;
 }
 
-#ifndef __CUDACC__
 /** \cholesky_module
   * \returns the Cholesky decomposition with full pivoting without square root of \c *this
   * \sa MatrixBase::ldlt()
@@ -643,7 +663,6 @@ MatrixBase<Derived>::ldlt() const
 {
   return LDLT<PlainObject>(derived());
 }
-#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h
index bd966656d..ddf4875ab 100644
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -507,7 +507,6 @@ MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
   return matrixL() * matrixL().adjoint().toDenseMatrix();
 }
 
-#ifndef __CUDACC__
 /** \cholesky_module
   * \returns the LLT decomposition of \c *this
   * \sa SelfAdjointView::llt()
@@ -529,7 +528,6 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
 {
   return LLT<PlainObject,UpLo>(m_matrix);
 }
-#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/CholmodSupport/CMakeLists.txt b/Eigen/src/CholmodSupport/CMakeLists.txt
deleted file mode 100644
index 814dfa613..000000000
--- a/Eigen/src/CholmodSupport/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_CholmodSupport_SRCS "*.h")
-
-INSTALL(FILES 
-  ${Eigen_CholmodSupport_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/CholmodSupport COMPONENT Devel
-  )
diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h
index 7c2e0de16..0d34269fd 100644
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -37,7 +37,7 @@ struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : tra
   * storage layout.
   *
   * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
   *
   * \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy
   */
diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h
index 62851a0c2..3a66f0e40 100644
--- a/Eigen/src/Core/ArrayBase.h
+++ b/Eigen/src/Core/ArrayBase.h
@@ -32,7 +32,7 @@ template<typename ExpressionType> class MatrixWrapper;
   * \tparam Derived is the derived type, e.g., an array or an expression type.
   *
   * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN.
   *
   * \sa class MatrixBase, \ref TopicClassHierarchy
   */
diff --git a/Eigen/src/Core/CMakeLists.txt b/Eigen/src/Core/CMakeLists.txt
deleted file mode 100644
index 38c3afde9..000000000
--- a/Eigen/src/Core/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-FILE(GLOB Eigen_Core_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_Core_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core COMPONENT Devel
-  )
-
-ADD_SUBDIRECTORY(products)
-ADD_SUBDIRECTORY(util)
-ADD_SUBDIRECTORY(arch)
-ADD_SUBDIRECTORY(functors)
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 7ba92963c..7a5540593 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -337,6 +337,120 @@ protected:
 // Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator.
 // Likewise, there is not need to more sophisticated dispatching here.
 
+template<typename Scalar,typename NullaryOp,
+         bool has_nullary = has_nullary_operator<NullaryOp>::value,
+         bool has_unary   = has_unary_operator<NullaryOp>::value,
+         bool has_binary  = has_binary_operator<NullaryOp>::value>
+struct nullary_wrapper
+{
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { return op(i,j); }
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
+
+  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { return op.template packetOp<T>(i,j); }
+  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
+};
+
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,true,false,false>
+{
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType=0, IndexType=0) const { return op(); }
+  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType=0, IndexType=0) const { return op.template packetOp<T>(); }
+};
+
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,false,false,true>
+{
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); }
+  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j=0) const { return op.template packetOp<T>(i,j); }
+};
+
+// We need the following specialization for vector-only functors assigned to a runtime vector,
+// for instance, using linspace and assigning a RowVectorXd to a MatrixXd or even a row of a MatrixXd.
+// In this case, i==0 and j is used for the actual iteration.
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,false,true,false>
+{
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
+    eigen_assert(i==0 || j==0);
+    return op(i+j);
+  }
+  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
+    eigen_assert(i==0 || j==0);
+    return op.template packetOp<T>(i+j);
+  }
+
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
+  template <typename T, typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
+};
+
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,false,false,false> {};
+
+#if 0 && EIGEN_COMP_MSVC>0
+// Disable this ugly workaround. This is now handled in traits<Ref>::match,
+// but this piece of code might still become handly if some other weird compilation
+// erros pop up again.
+
+// MSVC exhibits a weird compilation error when
+// compiling:
+//    Eigen::MatrixXf A = MatrixXf::Random(3,3);
+//    Ref<const MatrixXf> R = 2.f*A;
+// and that has_*ary_operator<scalar_constant_op<float>> have not been instantiated yet.
+// The "problem" is that evaluator<2.f*A> is instantiated by traits<Ref>::match<2.f*A>
+// and at that time has_*ary_operator<T> returns true regardless of T.
+// Then nullary_wrapper is badly instantiated as nullary_wrapper<.,.,true,true,true>.
+// The trick is thus to defer the proper instantiation of nullary_wrapper when coeff(),
+// and packet() are really instantiated as implemented below:
+
+// This is a simple wrapper around Index to enforce the re-instantiation of
+// has_*ary_operator when needed.
+template<typename T> struct nullary_wrapper_workaround_msvc {
+  nullary_wrapper_workaround_msvc(const T&);
+  operator T()const;
+};
+
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,true,true,true>
+{
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
+    return nullary_wrapper<Scalar,NullaryOp,
+    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i,j);
+  }
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const {
+    return nullary_wrapper<Scalar,NullaryOp,
+    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i);
+  }
+
+  template <typename T, typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
+    return nullary_wrapper<Scalar,NullaryOp,
+    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i,j);
+  }
+  template <typename T, typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const {
+    return nullary_wrapper<Scalar,NullaryOp,
+    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i);
+  }
+};
+#endif // MSVC workaround
+
 template<typename NullaryOp, typename PlainObjectType>
 struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
   : evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> >
@@ -356,41 +470,44 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
   };
 
   EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n)
-    : m_functor(n.functor()) 
+    : m_functor(n.functor()), m_wrapper()
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
   }
 
   typedef typename XprType::CoeffReturnType CoeffReturnType;
 
+  template <typename IndexType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  CoeffReturnType coeff(Index row, Index col) const
+  CoeffReturnType coeff(IndexType row, IndexType col) const
   {
-    return m_functor(row, col);
+    return m_wrapper(m_functor, row, col);
   }
 
+  template <typename IndexType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  CoeffReturnType coeff(Index index) const
+  CoeffReturnType coeff(IndexType index) const
   {
-    return m_functor(index);
+    return m_wrapper(m_functor,index);
   }
 
-  template<int LoadMode, typename PacketType>
+  template<int LoadMode, typename PacketType, typename IndexType>
   EIGEN_STRONG_INLINE
-  PacketType packet(Index row, Index col) const
+  PacketType packet(IndexType row, IndexType col) const
   {
-    return m_functor.template packetOp<Index,PacketType>(row, col);
+    return m_wrapper.template packetOp<PacketType>(m_functor, row, col);
   }
 
-  template<int LoadMode, typename PacketType>
+  template<int LoadMode, typename PacketType, typename IndexType>
   EIGEN_STRONG_INLINE
-  PacketType packet(Index index) const
+  PacketType packet(IndexType index) const
   {
-    return m_functor.template packetOp<Index,PacketType>(index);
+    return m_wrapper.template packetOp<PacketType>(m_functor, index);
   }
 
 protected:
   const NullaryOp m_functor;
+  const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
 };
 
 // -------------------- CwiseUnaryOp --------------------
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index 3c6508cd0..e3f20894d 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -20,7 +20,8 @@ struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectT
     Flags = traits<PlainObjectType>::Flags & RowMajorBit
   };
 };
-}
+
+} // namespace internal
 
 /** \class CwiseNullaryOp
   * \ingroup Core_Module
@@ -37,7 +38,23 @@ struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectT
   * However, if you want to write a function returning such an expression, you
   * will need to use this class.
   *
-  * \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr()
+  * The functor NullaryOp must expose one of the following method:
+    <table class="manual">
+    <tr            ><td>\c operator()() </td><td>if the procedural generation does not depend on the coefficient entries (e.g., random numbers)</td></tr>
+    <tr class="alt"><td>\c operator()(Index i)</td><td>if the procedural generation makes sense for vectors only and that it depends on the coefficient index \c i (e.g., linspace) </td></tr>
+    <tr            ><td>\c operator()(Index i,Index j)</td><td>if the procedural generation depends on the matrix coordinates \c i, \c j (e.g., to generate a checkerboard with 0 and 1)</td></tr>
+    </table>
+  * It is also possible to expose the last two operators if the generation makes sense for matrices but can be optimized for vectors.
+  *
+  * See DenseBase::NullaryExpr(Index,const CustomNullaryOp&) for an example binding
+  * C++11 random number generators.
+  *
+  * A nullary expression can also be used to implement custom sophisticated matrix manipulations
+  * that cannot be covered by the existing set of natively supported matrix manipulations.
+  * See this \ref TopicCustomizing_NullaryExpr "page" for some examples and additional explanations
+  * on the behavior of CwiseNullaryOp.
+  *
+  * \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr
   */
 template<typename NullaryOp, typename PlainObjectType>
 class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator
@@ -62,30 +79,6 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }
 
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
-    {
-      return m_functor(rowId, colId);
-    }
-
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
-    {
-      return m_functor.packetOp(rowId, colId);
-    }
-
-    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
-    {
-      return m_functor(index);
-    }
-
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
-    {
-      return m_functor.packetOp(index);
-    }
-
     /** \returns the functor representing the nullary operation */
     EIGEN_DEVICE_FUNC
     const NullaryOp& functor() const { return m_functor; }
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index a60e5cb00..0ede9b041 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -34,7 +34,7 @@ static inline void check_DenseIndex_is_signed() {
   * \tparam Derived is the derived type, e.g., a matrix type or an expression.
   *
   * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
   *
   * \sa \blank \ref TopicClassHierarchy
   */
diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h
index c351c6b92..020f939ad 100644
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -26,7 +26,7 @@ namespace Eigen {
   * Typical users do not have to directly deal with this class.
   *
   * This class can be extended by through the macro plugin \c EIGEN_MAPBASE_PLUGIN.
-  * See \link TopicCustomizingEigen customizing Eigen \endlink for details.
+  * See \link TopicCustomizing_Plugins customizing Eigen \endlink for details.
   *
   * The \c Derived class has to provide the following two methods describing the memory layout:
   *  \code Index innerStride() const; \endcode
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 9934e5601..bf3044b96 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -459,30 +459,33 @@ struct arg_retval
 /****************************************************************************
 * Implementation of log1p                                                   *
 ****************************************************************************/
-template<typename Scalar, bool isComplex = NumTraits<Scalar>::IsComplex >
-struct log1p_impl
-{
-  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x)
-  {
+
+namespace std_fallback {
+  // fallback log1p implementation in case there is no log1p(Scalar) function in namespace of Scalar,
+  // or that there is no suitable std::log1p function available
+  template<typename Scalar>
+  EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) {
     EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
     typedef typename NumTraits<Scalar>::Real RealScalar;
     EIGEN_USING_STD_MATH(log);
     Scalar x1p = RealScalar(1) + x;
     return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
   }
-};
+}
 
-#if EIGEN_HAS_CXX11_MATH && !defined(__CUDACC__)
 template<typename Scalar>
-struct log1p_impl<Scalar, false> {
+struct log1p_impl {
   static inline Scalar run(const Scalar& x)
   {
     EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+    #if EIGEN_HAS_CXX11_MATH
     using std::log1p;
+    #endif
+    using std_fallback::log1p;
     return log1p(x);
   }
 };
-#endif
+
 
 template<typename Scalar>
 struct log1p_retval
@@ -615,16 +618,18 @@ struct random_default_impl<Scalar, false, true>
     typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
     if(y<x)
       return x;
+    // the following difference might overflow on a 32 bits system,
+    // but since y>=x the result converted to an unsigned long is still correct.
     std::size_t range = ScalarX(y)-ScalarX(x);
     std::size_t offset = 0;
     // rejection sampling
-    std::size_t divisor    = (range+RAND_MAX-1)/(range+1);
-    std::size_t multiplier = (range+RAND_MAX-1)/std::size_t(RAND_MAX);
-
+    std::size_t divisor = 1;
+    std::size_t multiplier = 1;
+    if(range<RAND_MAX) divisor = (std::size_t(RAND_MAX)+1)/(range+1);
+    else               multiplier = 1 + range/(std::size_t(RAND_MAX)+1);
     do {
-      offset = ( (std::size_t(std::rand()) * multiplier) / divisor );
+      offset = (std::size_t(std::rand()) * multiplier) / divisor;
     } while (offset > range);
-
     return Scalar(ScalarX(x) + offset);
   }
 
@@ -785,6 +790,8 @@ template<typename T> EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>&
 template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x);
 template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
 
+template<typename T> T generic_fast_tanh_float(const T& a_x);
+
 } // end namespace internal
 
 /****************************************************************************
@@ -921,6 +928,14 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
   return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
 }
 
+#ifdef __CUDACC__
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float log1p(const float &x) { return ::log1pf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double log1p(const double &x) { return ::log1p(x); }
+#endif
+
 template<typename ScalarX,typename ScalarY>
 EIGEN_DEVICE_FUNC
 inline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const ScalarX& x, const ScalarY& y)
@@ -1031,6 +1046,16 @@ float abs(const float &x) { return ::fabsf(x); }
 
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 double abs(const double &x) { return ::fabs(x); }
+
+template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float abs(const std::complex<float>& x) {
+  return ::hypotf(real(x), imag(x));
+}
+
+template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double abs(const std::complex<double>& x) {
+  return ::hypot(real(x), imag(x));
+}
 #endif
 
 template<typename T>
@@ -1176,6 +1201,11 @@ T tanh(const T &x) {
   return tanh(x);
 }
 
+#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float tanh(float x) { return internal::generic_fast_tanh_float(x); }
+#endif
+
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float tanh(const float &x) { return ::tanhf(x); }
diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h
new file mode 100644
index 000000000..0c77ee003
--- /dev/null
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@@ -0,0 +1,74 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
+// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATHFUNCTIONSIMPL_H
+#define EIGEN_MATHFUNCTIONSIMPL_H
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
+    Doesn't do anything fancy, just a 13/6-degree rational interpolant which
+    is accurate up to a couple of ulp in the range [-9, 9], outside of which
+    the tanh(x) = +/-1.
+
+    This implementation works on both scalars and packets.
+*/
+template<typename T>
+T generic_fast_tanh_float(const T& a_x)
+{
+  // Clamp the inputs to the range [-9, 9] since anything outside
+  // this range is +/-1.0f in single-precision.
+  const T plus_9 = pset1<T>(9.f);
+  const T minus_9 = pset1<T>(-9.f);
+  const T x = pmax(minus_9, pmin(plus_9, a_x));
+
+  // The monomial coefficients of the numerator polynomial (odd).
+  const T alpha_1 = pset1<T>(4.89352455891786e-03f);
+  const T alpha_3 = pset1<T>(6.37261928875436e-04f);
+  const T alpha_5 = pset1<T>(1.48572235717979e-05f);
+  const T alpha_7 = pset1<T>(5.12229709037114e-08f);
+  const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
+  const T alpha_11 = pset1<T>(2.00018790482477e-13f);
+  const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
+
+  // The monomial coefficients of the denominator polynomial (even).
+  const T beta_0 = pset1<T>(4.89352518554385e-03f);
+  const T beta_2 = pset1<T>(2.26843463243900e-03f);
+  const T beta_4 = pset1<T>(1.18534705686654e-04f);
+  const T beta_6 = pset1<T>(1.19825839466702e-06f);
+
+  // Since the polynomials are odd/even, we need x^2.
+  const T x2 = pmul(x, x);
+
+  // Evaluate the numerator polynomial p.
+  T p = pmadd(x2, alpha_13, alpha_11);
+  p = pmadd(x2, p, alpha_9);
+  p = pmadd(x2, p, alpha_7);
+  p = pmadd(x2, p, alpha_5);
+  p = pmadd(x2, p, alpha_3);
+  p = pmadd(x2, p, alpha_1);
+  p = pmul(x, p);
+
+  // Evaluate the denominator polynomial p.
+  T q = pmadd(x2, beta_6, beta_4);
+  q = pmadd(x2, q, beta_2);
+  q = pmadd(x2, q, beta_0);
+
+  // Divide the numerator by the denominator.
+  return pdiv(p, q);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATHFUNCTIONSIMPL_H
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 502b7935a..90c336d8c 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -106,7 +106,7 @@ public:
   * \endcode
   *
   * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN.
   *
   * <i><b>Some notes:</b></i>
   *
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index d9d2426ad..334a4d71e 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -41,7 +41,7 @@ namespace Eigen {
   * \endcode
   *
   * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN.
   *
   * \sa \blank \ref TopicClassHierarchy
   */
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index 42cffbd3b..dd61195bc 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -97,23 +97,6 @@ template<typename T> struct GenericNumTraits
     MulCost = 1
   };
 
-  // Division is messy but important, because it is expensive and throughput
-  // varies significantly. The following numbers are based on min division
-  // throughput on Haswell.
-  template<bool Vectorized>
-  struct Div {
-    enum {
-#ifdef EIGEN_VECTORIZE_AVX
-      AVX = true,
-#else
-      AVX = false,
-#endif
-      Cost = IsInteger ? (sizeof(T) == 8 ? (IsSigned ? 24 : 21) : (IsSigned ? 8 : 9)):
-          Vectorized ? (sizeof(T) == 8 ? (AVX ? 16 : 8) : (AVX ? 14 : 7)) : 8
-    };
-  };
-
-
   typedef T Real;
   typedef typename internal::conditional<
                      IsInteger,
@@ -255,6 +238,9 @@ private:
   static inline std::string quiet_NaN();
 };
 
+// Empty specialization for void to allow template specialization based on NumTraits<T>::Real with T==void and SFINAE.
+template<> struct NumTraits<void> {};
+
 } // end namespace Eigen
 
 #endif // EIGEN_NUMTRAITS_H
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 64f5eb052..55b4ac057 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -63,7 +63,7 @@ template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct m
   * \brief %Dense storage base class for matrices and arrays.
   *
   * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN.
   *
   * \sa \ref TopicClassHierarchy
   */
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 955668bef..b8f92a3dc 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -194,7 +194,6 @@ struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBi
 //----------------------------------------
 // Catch "Dense ?= xpr + Product<>" expression to save one temporary
 // FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
-// TODO enable it for "Dense ?= xpr - Product<>" as well.
 
 template<typename OtherXpr, typename Lhs, typename Rhs>
 struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
@@ -203,10 +202,9 @@ struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename
 };
 
 template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
-struct assignment_from_xpr_plus_product
+struct assignment_from_xpr_op_product
 {
-  typedef CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename ProductType::Scalar>, const OtherXpr, const ProductType> SrcXprType;
-  template<typename InitialFunc>
+  template<typename SrcXprType, typename InitialFunc>
   static EIGEN_STRONG_INLINE
   void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
   {
@@ -215,21 +213,21 @@ struct assignment_from_xpr_plus_product
   }
 };
 
-template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar>
-struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<OtherScalar,ProdScalar>, const OtherXpr,
-                                           const Product<Lhs,Rhs,DefaultProduct> >, internal::assign_op<DstScalar,SrcScalar>, Dense2Dense>
-  : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<DstScalar,OtherScalar>, internal::add_assign_op<DstScalar,ProdScalar> >
-{};
-template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar>
-struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<OtherScalar,ProdScalar>, const OtherXpr,
-                                           const Product<Lhs,Rhs,DefaultProduct> >, internal::add_assign_op<DstScalar,SrcScalar>, Dense2Dense>
-  : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<DstScalar,OtherScalar>, internal::add_assign_op<DstScalar,ProdScalar> >
-{};
-template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar>
-struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<OtherScalar,ProdScalar>, const OtherXpr,
-                                           const Product<Lhs,Rhs,DefaultProduct> >, internal::sub_assign_op<DstScalar,SrcScalar>, Dense2Dense>
-  : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<DstScalar,OtherScalar>, internal::sub_assign_op<DstScalar,ProdScalar> >
-{};
+#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \
+  template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \
+  struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<OtherScalar,ProdScalar>, const OtherXpr, \
+                                            const Product<Lhs,Rhs,DefaultProduct> >, internal::ASSIGN_OP<DstScalar,SrcScalar>, Dense2Dense> \
+    : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::ASSIGN_OP<DstScalar,OtherScalar>, internal::ASSIGN_OP2<DstScalar,ProdScalar> > \
+  {}
+
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op,    scalar_sum_op,add_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op);
+
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op,    scalar_difference_op,sub_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op);
+
 //----------------------------------------
 
 template<typename Lhs, typename Rhs>
@@ -532,8 +530,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
    */
   EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
   {
-    const Index row = RowsAtCompileTime == 1 ? 0 : index;
-    const Index col = RowsAtCompileTime == 1 ? index : 0;
+    const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
+    const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
     return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
   }
 
@@ -551,8 +549,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
   template<int LoadMode, typename PacketType>
   const PacketType packet(Index index) const
   {
-    const Index row = RowsAtCompileTime == 1 ? 0 : index;
-    const Index col = RowsAtCompileTime == 1 ? index : 0;
+    const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
+    const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
     return packet<LoadMode,PacketType>(row,col);
   }
 
diff --git a/Eigen/src/Core/Random.h b/Eigen/src/Core/Random.h
index 02038e9e3..6faf789c7 100644
--- a/Eigen/src/Core/Random.h
+++ b/Eigen/src/Core/Random.h
@@ -16,8 +16,7 @@ namespace internal {
 
 template<typename Scalar> struct scalar_random_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op)
-  template<typename Index>
-  inline const Scalar operator() (Index, Index = 0) const { return random<Scalar>(); }
+  inline const Scalar operator() () const { return random<Scalar>(); }
 };
 
 template<typename Scalar>
diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h
index 17065fdd5..bdf24f52a 100644
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@@ -35,7 +35,13 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
                       || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
       OuterStrideMatch = Derived::IsVectorAtCompileTime
                       || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
-      AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (int(evaluator<Derived>::Alignment) >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment
+      // NOTE, this indirection of evaluator<Derived>::Alignment is needed
+      // to workaround a very strange bug in MSVC related to the instantiation
+      // of has_*ary_operator in evaluator<CwiseNullaryOp>.
+      // This line is surprisingly very sensitive. For instance, simply adding parenthesis
+      // as "DerivedAlignment = (int(evaluator<Derived>::Alignment))," will make MSVC fail...
+      DerivedAlignment = int(evaluator<Derived>::Alignment),
+      AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (DerivedAlignment >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment
       ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
       MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
     };
diff --git a/Eigen/src/Core/arch/AVX/CMakeLists.txt b/Eigen/src/Core/arch/AVX/CMakeLists.txt
deleted file mode 100644
index bdb71ab99..000000000
--- a/Eigen/src/Core/arch/AVX/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Core_arch_AVX_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_Core_arch_AVX_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AVX COMPONENT Devel
-)
diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h
index 98d8e029f..d21ec39dd 100644
--- a/Eigen/src/Core/arch/AVX/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX/MathFunctions.h
@@ -266,52 +266,10 @@ pexp<Packet8f>(const Packet8f& _x) {
 }
 
 // Hyperbolic Tangent function.
-// Doesn't do anything fancy, just a 13/6-degree rational interpolant which
-// is accurate up to a couple of ulp in the range [-9, 9], outside of which the
-// fl(tanh(x)) = +/-1.
 template <>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
-ptanh<Packet8f>(const Packet8f& _x) {
-  // Clamp the inputs to the range [-9, 9] since anything outside
-  // this range is +/-1.0f in single-precision.
-  _EIGEN_DECLARE_CONST_Packet8f(plus_9, 9.0f);
-  _EIGEN_DECLARE_CONST_Packet8f(minus_9, -9.0f);
-  const Packet8f x = pmax(p8f_minus_9, pmin(p8f_plus_9, _x));
-
-  // The monomial coefficients of the numerator polynomial (odd).
-  _EIGEN_DECLARE_CONST_Packet8f(alpha_1, 4.89352455891786e-03f);
-  _EIGEN_DECLARE_CONST_Packet8f(alpha_3, 6.37261928875436e-04f);
-  _EIGEN_DECLARE_CONST_Packet8f(alpha_5, 1.48572235717979e-05f);
-  _EIGEN_DECLARE_CONST_Packet8f(alpha_7, 5.12229709037114e-08f);
-  _EIGEN_DECLARE_CONST_Packet8f(alpha_9, -8.60467152213735e-11f);
-  _EIGEN_DECLARE_CONST_Packet8f(alpha_11, 2.00018790482477e-13f);
-  _EIGEN_DECLARE_CONST_Packet8f(alpha_13, -2.76076847742355e-16f);
-
-  // The monomial coefficients of the denominator polynomial (even).
-  _EIGEN_DECLARE_CONST_Packet8f(beta_0, 4.89352518554385e-03f);
-  _EIGEN_DECLARE_CONST_Packet8f(beta_2, 2.26843463243900e-03f);
-  _EIGEN_DECLARE_CONST_Packet8f(beta_4, 1.18534705686654e-04f);
-  _EIGEN_DECLARE_CONST_Packet8f(beta_6, 1.19825839466702e-06f);
-
-  // Since the polynomials are odd/even, we need x^2.
-  const Packet8f x2 = pmul(x, x);
-
-  // Evaluate the numerator polynomial p.
-  Packet8f p = pmadd(x2, p8f_alpha_13, p8f_alpha_11);
-  p = pmadd(x2, p, p8f_alpha_9);
-  p = pmadd(x2, p, p8f_alpha_7);
-  p = pmadd(x2, p, p8f_alpha_5);
-  p = pmadd(x2, p, p8f_alpha_3);
-  p = pmadd(x2, p, p8f_alpha_1);
-  p = pmul(x, p);
-
-  // Evaluate the denominator polynomial p.
-  Packet8f q = pmadd(x2, p8f_beta_6, p8f_beta_4);
-  q = pmadd(x2, q, p8f_beta_2);
-  q = pmadd(x2, q, p8f_beta_0);
-
-  // Divide the numerator by the denominator.
-  return pdiv(p, q);
+ptanh<Packet8f>(const Packet8f& x) {
+  return internal::generic_fast_tanh_float(x);
 }
 
 template <>
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 4fec14f44..dae0ca5d0 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -94,6 +94,9 @@ template<> struct packet_traits<double> : default_packet_traits
   };
 };
 
+template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
+
 /* Proper support for integers is only provided by AVX2. In the meantime, we'll
    use SSE instructions and packets to deal with integers.
 template<> struct packet_traits<int>    : default_packet_traits
@@ -153,7 +156,7 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
 
 #ifdef __FMA__
 template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
-#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
+#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
   // clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
   // and gcc stupidly generates a vfmadd132ps instruction,
   // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
@@ -166,7 +169,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
 #endif
 }
 template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
-#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
+#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
   // see above
   Packet4d res = c;
   __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
diff --git a/Eigen/src/Core/arch/AltiVec/CMakeLists.txt b/Eigen/src/Core/arch/AltiVec/CMakeLists.txt
deleted file mode 100644
index 9f8d2e9c4..000000000
--- a/Eigen/src/Core/arch/AltiVec/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Core_arch_AltiVec_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_Core_arch_AltiVec_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AltiVec COMPONENT Devel
-)
diff --git a/Eigen/src/Core/arch/CMakeLists.txt b/Eigen/src/Core/arch/CMakeLists.txt
deleted file mode 100644
index 42b0b486e..000000000
--- a/Eigen/src/Core/arch/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-ADD_SUBDIRECTORY(AltiVec)
-ADD_SUBDIRECTORY(AVX)
-ADD_SUBDIRECTORY(CUDA)
-ADD_SUBDIRECTORY(Default)
-ADD_SUBDIRECTORY(NEON)
-ADD_SUBDIRECTORY(SSE)
-
-
-
diff --git a/Eigen/src/Core/arch/CUDA/CMakeLists.txt b/Eigen/src/Core/arch/CUDA/CMakeLists.txt
deleted file mode 100644
index 7ba28da7c..000000000
--- a/Eigen/src/Core/arch/CUDA/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Core_arch_CUDA_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_Core_arch_CUDA_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/CUDA COMPONENT Devel
-)
diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h
index 4d91420d0..52892db38 100644
--- a/Eigen/src/Core/arch/CUDA/Half.h
+++ b/Eigen/src/Core/arch/CUDA/Half.h
@@ -389,10 +389,14 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
   return half(::expf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+  return Eigen::half(::hlog(a));
+#else
   return half(::logf(float(a)));
+#endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log1p(const half& a) {
-  return half(::log1pf(float(a)));
+  return half(numext::log1p(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
   return half(::log10f(float(a)));
@@ -503,7 +507,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
   return Eigen::half(::expf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+  return Eigen::half(::hlog(a));
+#else
   return Eigen::half(::logf(float(a)));
+#endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) {
   return Eigen::half(::sqrtf(float(a)));
diff --git a/Eigen/src/Core/arch/Default/CMakeLists.txt b/Eigen/src/Core/arch/Default/CMakeLists.txt
deleted file mode 100644
index 339c091d1..000000000
--- a/Eigen/src/Core/arch/Default/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Core_arch_Default_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_Core_arch_Default_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/Default COMPONENT Devel
-)
diff --git a/Eigen/src/Core/arch/NEON/CMakeLists.txt b/Eigen/src/Core/arch/NEON/CMakeLists.txt
deleted file mode 100644
index fd4d4af50..000000000
--- a/Eigen/src/Core/arch/NEON/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Core_arch_NEON_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_Core_arch_NEON_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/NEON COMPONENT Devel
-)
diff --git a/Eigen/src/Core/arch/SSE/CMakeLists.txt b/Eigen/src/Core/arch/SSE/CMakeLists.txt
deleted file mode 100644
index 46ea7cc62..000000000
--- a/Eigen/src/Core/arch/SSE/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Core_arch_SSE_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_Core_arch_SSE_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/SSE COMPONENT Devel
-)
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index 28f103eeb..ac2fd8103 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -517,52 +517,10 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) {
 }
 
 // Hyperbolic Tangent function.
-// Doesn't do anything fancy, just a 13/6-degree rational interpolant which
-// is accurate up to a couple of ulp in the range [-9, 9], outside of which the
-// fl(tanh(x)) = +/-1.
 template <>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
-ptanh<Packet4f>(const Packet4f& _x) {
-  // Clamp the inputs to the range [-9, 9] since anything outside
-  // this range is +/-1.0f in single-precision.
-  _EIGEN_DECLARE_CONST_Packet4f(plus_9, 9.0f);
-  _EIGEN_DECLARE_CONST_Packet4f(minus_9, -9.0f);
-  const Packet4f x = pmax(p4f_minus_9, pmin(p4f_plus_9, _x));
-
-  // The monomial coefficients of the numerator polynomial (odd).
-  _EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-03f);
-  _EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-04f);
-  _EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-05f);
-  _EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-08f);
-  _EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f);
-  _EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f);
-  _EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f);
-
-  // The monomial coefficients of the denominator polynomial (even).
-  _EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-03f);
-  _EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-03f);
-  _EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-04f);
-  _EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-06f);
-
-  // Since the polynomials are odd/even, we need x^2.
-  const Packet4f x2 = pmul(x, x);
-
-  // Evaluate the numerator polynomial p.
-  Packet4f p = pmadd(x2, p4f_alpha_13, p4f_alpha_11);
-  p = pmadd(x2, p, p4f_alpha_9);
-  p = pmadd(x2, p, p4f_alpha_7);
-  p = pmadd(x2, p, p4f_alpha_5);
-  p = pmadd(x2, p, p4f_alpha_3);
-  p = pmadd(x2, p, p4f_alpha_1);
-  p = pmul(x, p);
-
-  // Evaluate the denominator polynomial p.
-  Packet4f q = pmadd(x2, p4f_beta_6, p4f_beta_4);
-  q = pmadd(x2, q, p4f_beta_2);
-  q = pmadd(x2, q, p4f_beta_0);
-
-  // Divide the numerator by the denominator.
-  return pdiv(p, q);
+ptanh<Packet4f>(const Packet4f& x) {
+  return internal::generic_fast_tanh_float(x);
 }
 
 } // end namespace internal
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 70839d68d..baad692e3 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -162,6 +162,11 @@ template<> struct unpacket_traits<Packet4f> { typedef float  type; enum {size=4,
 template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
 template<> struct unpacket_traits<Packet4i> { typedef int    type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
 
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
+#endif
+
 #if EIGEN_COMP_MSVC==1500
 // Workaround MSVC 9 internal compiler error.
 // TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
@@ -813,6 +818,16 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons
 #endif
 }
 
+// Scalar path for pmadd with FMA to ensure consistency with vectorized path.
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
+  return ::fmaf(a,b,c);
+}
+template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) {
+  return ::fma(a,b,c);
+}
+#endif
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/arch/ZVector/CMakeLists.txt b/Eigen/src/Core/arch/ZVector/CMakeLists.txt
deleted file mode 100644
index 5eb0957eb..000000000
--- a/Eigen/src/Core/arch/ZVector/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Core_arch_ZVector_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_Core_arch_ZVector_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/ZVector COMPONENT Devel
-)
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index dc3690444..d82ffed02 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -287,7 +287,7 @@ struct functor_traits<scalar_hypot_op<Scalar,Scalar> > {
   {
     Cost = 3 * NumTraits<Scalar>::AddCost +
            2 * NumTraits<Scalar>::MulCost +
-           2 * NumTraits<Scalar>::template Div<false>::Cost,
+           2 * scalar_div_cost<Scalar,false>::value,
     PacketAccess = false
   };
 };
@@ -375,7 +375,7 @@ struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
   typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type;
   enum {
     PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv,
-    Cost = NumTraits<result_type>::template Div<PacketAccess>::Cost
+    Cost = scalar_div_cost<result_type,PacketAccess>::value
   };
 };
 
diff --git a/Eigen/src/Core/functors/CMakeLists.txt b/Eigen/src/Core/functors/CMakeLists.txt
deleted file mode 100644
index f4b99a9c3..000000000
--- a/Eigen/src/Core/functors/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Core_Functor_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_Core_Functor_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/functors COMPONENT Devel
-  )
diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h
index eaa582f23..a2154d3b5 100644
--- a/Eigen/src/Core/functors/NullaryFunctors.h
+++ b/Eigen/src/Core/functors/NullaryFunctors.h
@@ -18,10 +18,9 @@ template<typename Scalar>
 struct scalar_constant_op {
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
-  template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
-  template<typename Index, typename PacketType>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp(Index, Index = 0) const { return internal::pset1<PacketType>(m_other); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() () const { return m_other; }
+  template<typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp() const { return internal::pset1<PacketType>(m_other); }
   const Scalar m_other;
 };
 template<typename Scalar>
@@ -31,8 +30,8 @@ struct functor_traits<scalar_constant_op<Scalar> >
 
 template<typename Scalar> struct scalar_identity_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
-  template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); }
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType row, IndexType col) const { return row==col ? Scalar(1) : Scalar(0); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_identity_op<Scalar> >
@@ -56,15 +55,15 @@ struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/false,/*IsInteger*/false>
     m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*m_step)),
     m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(m_step),plset<Packet>(-unpacket_traits<Packet>::size)))) {}
 
-  template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const 
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const
   { 
     m_base = padd(m_base, pset1<Packet>(m_step));
     return m_low+Scalar(i)*m_step; 
   }
 
-  template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType) const { return m_base = padd(m_base,m_packetStep); }
 
   const Scalar m_low;
   const Scalar m_step;
@@ -82,11 +81,11 @@ struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/false>
     m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
     m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {}
 
-  template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return m_low+i*m_step; }
 
-  template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const
   { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(Scalar(i)),m_interPacket))); }
 
   const Scalar m_low;
@@ -103,15 +102,15 @@ struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true>
     m_low(low), m_length(high-low), m_divisor(convert_index<Scalar>(num_steps==1?1:num_steps-1)), m_interPacket(plset<Packet>(0))
   {}
 
-  template<typename Index>
+  template<typename IndexType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const Scalar operator() (Index i) const {
+  const Scalar operator() (IndexType i) const {
     return m_low + (m_length*Scalar(i))/m_divisor;
   }
 
-  template<typename Index>
+  template<typename IndexType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const Packet packetOp(Index i) const {
+  const Packet packetOp(IndexType i) const {
     return internal::padd(pset1<Packet>(m_low), pdiv(pmul(pset1<Packet>(m_length), padd(pset1<Packet>(Scalar(i)),m_interPacket)),
                                                      pset1<Packet>(m_divisor))); }
 
@@ -143,29 +142,11 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa
     : impl((num_steps==1 ? high : low),high,num_steps)
   {}
 
-  template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return impl(i); }
 
-  // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
-  // there row==0 and col is used for the actual iteration.
-  template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const 
-  {
-    eigen_assert(col==0 || row==0);
-    return impl(col + row);
-  }
-
-  template<typename Index, typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }
-
-  // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
-  // there row==0 and col is used for the actual iteration.
-  template<typename Index, typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
-  {
-    eigen_assert(col==0 || row==0);
-    return impl.packetOp(col + row);
-  }
+  template<typename Packet,typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return impl.packetOp(i); }
 
   // This proxy object handles the actual required temporaries, the different
   // implementations (random vs. sequential access) as well as the
@@ -175,11 +156,11 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa
   const linspaced_op_impl<Scalar,PacketType,(NumTraits<Scalar>::IsInteger?true:RandomAccess),NumTraits<Scalar>::IsInteger> impl;
 };
 
-// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta
-// to indicate whether a functor allows linear access, just always answering 'yes' except for
-// scalar_identity_op.
-template<typename Functor> struct functor_has_linear_access { enum { ret = 1 }; };
-template<typename Scalar> struct functor_has_linear_access<scalar_identity_op<Scalar> > { enum { ret = 0 }; };
+// Linear access is automatically determined from the operator() prototypes available for the given functor.
+// If it exposes an operator()(i,j), then we assume the i and j coefficients are required independently
+// and linear access is not possible. In all other cases, linear access is enabled.
+// Users should not have to deal with this struture.
+template<typename Functor> struct functor_has_linear_access { enum { ret = !has_binary_operator<Functor>::value }; };
 
 } // end namespace internal
 
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index e2f3d869f..2009f8e57 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -248,7 +248,7 @@ struct functor_traits<scalar_exp_op<Scalar> > {
      // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div,  13 other
      : (14 * NumTraits<Scalar>::AddCost +
         6 * NumTraits<Scalar>::MulCost +
-        NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost))
+        scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
 #else
     Cost =
     (sizeof(Scalar) == 4
@@ -257,7 +257,7 @@ struct functor_traits<scalar_exp_op<Scalar> > {
      // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div,  13 other
      : (23 * NumTraits<Scalar>::AddCost +
         12 * NumTraits<Scalar>::MulCost +
-        NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost))
+        scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
 #endif
   };
 };
@@ -498,138 +498,32 @@ struct functor_traits<scalar_atan_op<Scalar> >
 template <typename Scalar>
 struct scalar_tanh_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op)
-  EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const {
-    /** \internal \returns the hyperbolic tan of \a a (coeff-wise)
-          Doesn't do anything fancy, just a 13/6-degree rational interpolant
-       which
-          is accurate up to a couple of ulp in the range [-9, 9], outside of
-       which
-          the fl(tanh(x)) = +/-1. */
-
-    // Clamp the inputs to the range [-9, 9] since anything outside
-    // this range is +/-1.0f in single-precision.
-    const Scalar plus_9 = static_cast<Scalar>(9.0);
-    const Scalar minus_9 = static_cast<Scalar>(-9.0);
-    const Scalar x = numext::maxi(minus_9, numext::mini(plus_9, a));
-    // Scalarhe monomial coefficients of the numerator polynomial (odd).
-    const Scalar alpha_1 = static_cast<Scalar>(4.89352455891786e-03);
-    const Scalar alpha_3 = static_cast<Scalar>(6.37261928875436e-04);
-    const Scalar alpha_5 = static_cast<Scalar>(1.48572235717979e-05);
-    const Scalar alpha_7 = static_cast<Scalar>(5.12229709037114e-08);
-    const Scalar alpha_9 = static_cast<Scalar>(-8.60467152213735e-11);
-    const Scalar alpha_11 = static_cast<Scalar>(2.00018790482477e-13);
-    const Scalar alpha_13 = static_cast<Scalar>(-2.76076847742355e-16);
-    // Scalarhe monomial coefficients of the denominator polynomial (even).
-    const Scalar beta_0 = static_cast<Scalar>(4.89352518554385e-03);
-    const Scalar beta_2 = static_cast<Scalar>(2.26843463243900e-03);
-    const Scalar beta_4 = static_cast<Scalar>(1.18534705686654e-04);
-    const Scalar beta_6 = static_cast<Scalar>(1.19825839466702e-06);
-    // Since the polynomials are odd/even, we need x^2.
-    const Scalar x2 = x * x;
-    // Evaluate the numerator polynomial p.
-    Scalar p = x2 * alpha_13 + alpha_11;
-    p = x2 * p + alpha_9;
-    p = x2 * p + alpha_7;
-    p = x2 * p + alpha_5;
-    p = x2 * p + alpha_3;
-    p = x2 * p + alpha_1;
-    p = x * p;
-    // Evaluate the denominator polynomial p.
-    Scalar q = x2 * beta_6 + beta_4;
-    q = x2 * q + beta_2;
-    q = x2 * q + beta_0;
-    // Divide the numerator by the denominator.
-    return p / q;
-  }
+  EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::tanh(a); }
   template <typename Packet>
-  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& _x) const {
-    /** \internal \returns the hyperbolic tan of \a a (coeff-wise)
-        Doesn't do anything fancy, just a 13/6-degree rational interpolant which
-        is accurate up to a couple of ulp in the range [-9, 9], outside of which
-       the
-        fl(tanh(x)) = +/-1. */
-
-    // Clamp the inputs to the range [-9, 9] since anything outside
-    // this range is +/-1.0f in single-precision.
-    const Packet plus_9 = pset1<Packet>(9.0);
-    const Packet minus_9 = pset1<Packet>(-9.0);
-    const Packet x = pmax(minus_9, pmin(plus_9, _x));
-
-    // The monomial coefficients of the numerator polynomial (odd).
-    const Packet alpha_1 = pset1<Packet>(4.89352455891786e-03);
-    const Packet alpha_3 = pset1<Packet>(6.37261928875436e-04);
-    const Packet alpha_5 = pset1<Packet>(1.48572235717979e-05);
-    const Packet alpha_7 = pset1<Packet>(5.12229709037114e-08);
-    const Packet alpha_9 = pset1<Packet>(-8.60467152213735e-11);
-    const Packet alpha_11 = pset1<Packet>(2.00018790482477e-13);
-    const Packet alpha_13 = pset1<Packet>(-2.76076847742355e-16);
-
-    // The monomial coefficients of the denominator polynomial (even).
-    const Packet beta_0 = pset1<Packet>(4.89352518554385e-03);
-    const Packet beta_2 = pset1<Packet>(2.26843463243900e-03);
-    const Packet beta_4 = pset1<Packet>(1.18534705686654e-04);
-    const Packet beta_6 = pset1<Packet>(1.19825839466702e-06);
-
-    // Since the polynomials are odd/even, we need x^2.
-    const Packet x2 = pmul(x, x);
-
-    // Evaluate the numerator polynomial p.
-    Packet p = pmadd(x2, alpha_13, alpha_11);
-    p = pmadd(x2, p, alpha_9);
-    p = pmadd(x2, p, alpha_7);
-    p = pmadd(x2, p, alpha_5);
-    p = pmadd(x2, p, alpha_3);
-    p = pmadd(x2, p, alpha_1);
-    p = pmul(x, p);
-
-    // Evaluate the denominator polynomial p.
-    Packet q = pmadd(x2, beta_6, beta_4);
-    q = pmadd(x2, q, beta_2);
-    q = pmadd(x2, q, beta_0);
-
-    // Divide the numerator by the denominator.
-    return pdiv(p, q);
-  }
-};
-template <>
-struct scalar_tanh_op<std::complex<double> > {
-  EIGEN_DEVICE_FUNC inline const std::complex<double> operator()(
-      const std::complex<double>& a) const {
-    return numext::tanh(a);
-  }
-};
-template <>
-struct scalar_tanh_op<std::complex<float> > {
-  EIGEN_DEVICE_FUNC inline const std::complex<float> operator()(
-      const std::complex<float>& a) const {
-    return numext::tanh(a);
-  }
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { return ptanh(x); }
 };
+
 template <typename Scalar>
 struct functor_traits<scalar_tanh_op<Scalar> > {
   enum {
     PacketAccess = packet_traits<Scalar>::HasTanh,
-    Cost = (PacketAccess && (!is_same<Scalar, std::complex<float> >::value) &&
-                    (!is_same<Scalar, std::complex<double> >::value)
+    Cost = ( (EIGEN_FAST_MATH && is_same<Scalar,float>::value)
 // The following numbers are based on the AVX implementation,
 #ifdef EIGEN_VECTORIZE_FMA
                 // Haswell can issue 2 add/mul/madd per cycle.
                 // 9 pmadd, 2 pmul, 1 div, 2 other
                 ? (2 * NumTraits<Scalar>::AddCost +
                    6 * NumTraits<Scalar>::MulCost +
-                   NumTraits<Scalar>::template Div<
-                       packet_traits<Scalar>::HasDiv>::Cost)
+                   scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
 #else
                 ? (11 * NumTraits<Scalar>::AddCost +
                    11 * NumTraits<Scalar>::MulCost +
-                   NumTraits<Scalar>::template Div<
-                       packet_traits<Scalar>::HasDiv>::Cost)
+                   scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
 #endif
                 // This number assumes a naive implementation of tanh
                 : (6 * NumTraits<Scalar>::AddCost +
                    3 * NumTraits<Scalar>::MulCost +
-                   2 * NumTraits<Scalar>::template Div<
-                           packet_traits<Scalar>::HasDiv>::Cost +
+                   2 * scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value +
                    functor_traits<scalar_exp_op<Scalar> >::Cost))
   };
 };
diff --git a/Eigen/src/Core/products/CMakeLists.txt b/Eigen/src/Core/products/CMakeLists.txt
deleted file mode 100644
index 21fc94ae3..000000000
--- a/Eigen/src/Core/products/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Core_Product_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_Core_Product_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/products COMPONENT Devel
-  )
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index 4a5cf3fb6..3c1a7fc40 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -183,8 +183,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,C
     alignmentPattern = AllAligned;
   }
 
-  const Index offset1 = (FirstAligned && alignmentStep==1?3:1);
-  const Index offset3 = (FirstAligned && alignmentStep==1?1:3);
+  const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
+  const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
 
   Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
   for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
@@ -457,8 +457,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
     alignmentPattern = AllAligned;
   }
 
-  const Index offset1 = (FirstAligned && alignmentStep==1?3:1);
-  const Index offset3 = (FirstAligned && alignmentStep==1?1:3);
+  const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
+  const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
 
   Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
   for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 8b3b44a58..6e6ee119b 100755
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -44,16 +44,29 @@ template<bool Conjugate> struct conj_if;
 
 template<> struct conj_if<true> {
   template<typename T>
-  inline T operator()(const T& x) { return numext::conj(x); }
+  inline T operator()(const T& x) const { return numext::conj(x); }
   template<typename T>
-  inline T pconj(const T& x) { return internal::pconj(x); }
+  inline T pconj(const T& x) const { return internal::pconj(x); }
 };
 
 template<> struct conj_if<false> {
   template<typename T>
-  inline const T& operator()(const T& x) { return x; }
+  inline const T& operator()(const T& x) const { return x; }
   template<typename T>
-  inline const T& pconj(const T& x) { return x; }
+  inline const T& pconj(const T& x) const { return x; }
+};
+
+// Generic implementation for custom complex types.
+template<typename LhsScalar, typename RhsScalar, bool ConjLhs, bool ConjRhs>
+struct conj_helper
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar>::ReturnType Scalar;
+
+  EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const
+  { return padd(c, pmul(x,y)); }
+
+  EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const
+  { return conj_if<ConjLhs>()(x) *  conj_if<ConjRhs>()(y); }
 };
 
 template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
@@ -315,6 +328,11 @@ struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const Cwi
   static inline Scalar extractScalarFactor(const XprType& x)
   { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; }
 };
+template<typename Scalar, typename Plain1, typename Plain2>
+struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1>,
+                                                            const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain2> > >
+ : blas_traits<CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1> >
+{};
 
 // pop opposite
 template<typename Scalar, typename NestedXpr>
diff --git a/Eigen/src/Core/util/CMakeLists.txt b/Eigen/src/Core/util/CMakeLists.txt
deleted file mode 100644
index a1e2e521f..000000000
--- a/Eigen/src/Core/util/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Core_util_SRCS "*.h")
-
-INSTALL(FILES 
-  ${Eigen_Core_util_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/util COMPONENT Devel
-  )
diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h
index dd44c7cbc..b13e5da25 100755
--- a/Eigen/src/Core/util/DisableStupidWarnings.h
+++ b/Eigen/src/Core/util/DisableStupidWarnings.h
@@ -56,7 +56,11 @@
   #pragma diag_suppress code_is_unreachable
   // Disable the "dynamic initialization in unreachable code" message
   #pragma diag_suppress initialization_not_reachable
-  // Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are 4 of them)
+  // Disable the "invalid error number" message that we get with older versions of nvcc
+  #pragma diag_suppress 1222
+  // Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are many of them and they seem to change with every version of the compiler)
+  #pragma diag_suppress 2527
+  #pragma diag_suppress 2529
   #pragma diag_suppress 2651
   #pragma diag_suppress 2653
   #pragma diag_suppress 2668
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index a7c4ec9a6..a9db2f4c7 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -28,9 +28,9 @@
   #define EIGEN_COMP_GNUC 0
 #endif
 
-/// \internal EIGEN_COMP_CLANG set to 1 if the compiler is clang (alias for __clang__)
+/// \internal EIGEN_COMP_CLANG set to major+minor version (e.g., 307 for clang 3.7) if the compiler is clang
 #if defined(__clang__)
-  #define EIGEN_COMP_CLANG 1
+  #define EIGEN_COMP_CLANG (__clang_major__*100+__clang_minor__)
 #else
   #define EIGEN_COMP_CLANG 0
 #endif
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 4b35761e0..d4460bb77 100755
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -22,6 +22,16 @@
 
 namespace Eigen {
 
+typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex;
+
+/**
+ * \brief The Index type as used for the API.
+ * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
+ * \sa \blank \ref TopicPreprocessorDirectives, StorageIndex.
+ */
+
+typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE Index;
+
 namespace internal {
 
 /** \internal
@@ -358,17 +368,46 @@ struct result_of<Func(ArgType0,ArgType1,ArgType2)> {
 };
 #endif
 
+struct meta_yes { char a[1]; };
+struct meta_no  { char a[2]; };
+
 // Check whether T::ReturnType does exist
 template <typename T>
 struct has_ReturnType
 {
-  typedef char yes[1];
-  typedef char no[2];
+  template <typename C> static meta_yes testFunctor(typename C::ReturnType const *);
+  template <typename C> static meta_no testFunctor(...);
+
+  enum { value = sizeof(testFunctor<T>(0)) == sizeof(meta_yes) };
+};
+
+template<typename T> const T& return_ref();
+
+template <typename T, typename IndexType=Index>
+struct has_nullary_operator
+{
+  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()())>0)>::type * = 0);
+  static meta_no testFunctor(...);
+
+  enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
+};
+
+template <typename T, typename IndexType=Index>
+struct has_unary_operator
+{
+  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()(IndexType(0)))>0)>::type * = 0);
+  static meta_no testFunctor(...);
+
+  enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
+};
 
-  template <typename C> static yes& testFunctor(C const *, typename C::ReturnType const * = 0);
-  static no& testFunctor(...);
+template <typename T, typename IndexType=Index>
+struct has_binary_operator
+{
+  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);
+  static meta_no testFunctor(...);
 
-  static const bool value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(yes);
+  enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
 };
 
 /** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer.
diff --git a/Eigen/src/Core/util/ReenableStupidWarnings.h b/Eigen/src/Core/util/ReenableStupidWarnings.h
index 5d1bbeef6..86b60f52f 100644
--- a/Eigen/src/Core/util/ReenableStupidWarnings.h
+++ b/Eigen/src/Core/util/ReenableStupidWarnings.h
@@ -15,7 +15,7 @@
   #if defined __NVCC__
 //    Don't reenable the diagnostic messages, as it turns out these messages need
 //    to be disabled at the point of the template instantiation (i.e the user code)
-//    otherwise they'll be triggeredby nvcc.
+//    otherwise they'll be triggered by nvcc.
 //    #pragma diag_default code_is_unreachable
 //    #pragma diag_default initialization_not_reachable
 //    #pragma diag_default 2651
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index a98ba6e86..fa60008ef 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -24,16 +24,6 @@
 
 namespace Eigen {
 
-typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex;
-
-/**
- * \brief The Index type as used for the API.
- * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
- * \sa \blank \ref TopicPreprocessorDirectives, StorageIndex.
- */
-
-typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE Index;
-
 namespace internal {
 
 template<typename IndexDest, typename IndexSrc>
@@ -674,6 +664,20 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_acces
   return false;
 }
 
+// Internal helper defining the cost of a scalar division for the type T.
+// The default heuristic can be specialized for each scalar type and architecture.
+template<typename T,bool Vectorized=false,typename EnaleIf = void>
+struct scalar_div_cost {
+  enum { value = 8*NumTraits<T>::MulCost };
+};
+
+
+template<bool Vectorized>
+struct scalar_div_cost<signed long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 24 }; };
+template<bool Vectorized>
+struct scalar_div_cost<unsigned long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 21 }; };
+
+
 #ifdef EIGEN_DEBUG_ASSIGN
 std::string demangle_traversal(int t)
 {
@@ -717,7 +721,7 @@ std::string demangle_flags(int f)
   * This class permits to control the scalar return type of any binary operation performed on two different scalar types through (partial) template specializations.
   *
   * For instance, let \c U1, \c U2 and \c U3 be three user defined scalar types for which most operations between instances of \c U1 and \c U2 returns an \c U3.
-  * You can let Eigen knows that by defining:
+  * You can let %Eigen knows that by defining:
     \code
     template<typename BinaryOp>
     struct ScalarBinaryOpTraits<U1,U2,BinaryOp> { typedef U3 ReturnType;  };
@@ -735,6 +739,14 @@ std::string demangle_flags(int f)
     struct ScalarBinaryOpTraits<U1,U2,internal::scalar_sum_op<U1,U2> > { typedef U1 ReturnType; };
     \endcode
   *
+  * By default, the following generic combinations are supported:
+  <table class="manual">
+  <tr><th>ScalarA</th><th>ScalarB</th><th>BinaryOp</th><th>ReturnType</th><th>Note</th></tr>
+  <tr            ><td>\c T </td><td>\c T </td><td>\c * </td><td>\c T </td><td></td></tr>
+  <tr class="alt"><td>\c NumTraits<T>::Real </td><td>\c T </td><td>\c * </td><td>\c T </td><td>Only if \c NumTraits<T>::IsComplex </td></tr>
+  <tr            ><td>\c T </td><td>\c NumTraits<T>::Real </td><td>\c * </td><td>\c T </td><td>Only if \c NumTraits<T>::IsComplex </td></tr>
+  </table>
+  *
   * \sa CwiseBinaryOp
   */
 template<typename ScalarA, typename ScalarB, typename BinaryOp=internal::scalar_product_op<ScalarA,ScalarB> >
@@ -751,6 +763,17 @@ struct ScalarBinaryOpTraits<T,T,BinaryOp>
   typedef T ReturnType;
 };
 
+template <typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<T, typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, BinaryOp>
+{
+  typedef T ReturnType;
+};
+template <typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, T, BinaryOp>
+{
+  typedef T ReturnType;
+};
+
 // For Matrix * Permutation
 template<typename T, typename BinaryOp>
 struct ScalarBinaryOpTraits<T,void,BinaryOp>
@@ -772,18 +795,6 @@ struct ScalarBinaryOpTraits<void,void,BinaryOp>
   typedef void ReturnType;
 };
 
-template<typename T, typename BinaryOp>
-struct ScalarBinaryOpTraits<T,std::complex<T>,BinaryOp>
-{
-  typedef std::complex<T> ReturnType;
-};
-
-template<typename T, typename BinaryOp>
-struct ScalarBinaryOpTraits<std::complex<T>, T,BinaryOp>
-{
-  typedef std::complex<T> ReturnType;
-};
-
 // We require Lhs and Rhs to have "compatible" scalar types.
 // It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
 // So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
diff --git a/Eigen/src/Eigenvalues/CMakeLists.txt b/Eigen/src/Eigenvalues/CMakeLists.txt
deleted file mode 100644
index 193e02685..000000000
--- a/Eigen/src/Eigenvalues/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_EIGENVALUES_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_EIGENVALUES_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Eigenvalues COMPONENT Devel
-  )
diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
index 6eeeb174e..36a91dffc 100644
--- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
+++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
@@ -1,8 +1,9 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2012-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2010,2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
+// Copyright (C) 2016 Tobias Wood <tobias@spinicist.org.uk>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -89,7 +90,7 @@ template<typename _MatrixType> class GeneralizedEigenSolver
       */
     typedef Matrix<Scalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> VectorType;
 
-    /** \brief Type for vector of complex scalar values eigenvalues as returned by betas().
+    /** \brief Type for vector of complex scalar values eigenvalues as returned by alphas().
       *
       * This is a column vector with entries of type #ComplexScalar.
       * The length of the vector is the size of #MatrixType.
@@ -114,7 +115,14 @@ template<typename _MatrixType> class GeneralizedEigenSolver
       *
       * \sa compute() for an example.
       */
-    GeneralizedEigenSolver() : m_eivec(), m_alphas(), m_betas(), m_isInitialized(false), m_realQZ(), m_matS(), m_tmp() {}
+    GeneralizedEigenSolver()
+      : m_eivec(),
+        m_alphas(),
+        m_betas(),
+        m_valuesOkay(false),
+        m_vectorsOkay(false),
+        m_realQZ()
+    {}
 
     /** \brief Default constructor with memory preallocation
       *
@@ -126,10 +134,9 @@ template<typename _MatrixType> class GeneralizedEigenSolver
       : m_eivec(size, size),
         m_alphas(size),
         m_betas(size),
-        m_isInitialized(false),
-        m_eigenvectorsOk(false),
+        m_valuesOkay(false),
+        m_vectorsOkay(false),
         m_realQZ(size),
-        m_matS(size, size),
         m_tmp(size)
     {}
 
@@ -149,10 +156,9 @@ template<typename _MatrixType> class GeneralizedEigenSolver
       : m_eivec(A.rows(), A.cols()),
         m_alphas(A.cols()),
         m_betas(A.cols()),
-        m_isInitialized(false),
-        m_eigenvectorsOk(false),
+        m_valuesOkay(false),
+        m_vectorsOkay(false),
         m_realQZ(A.cols()),
-        m_matS(A.rows(), A.cols()),
         m_tmp(A.cols())
     {
       compute(A, B, computeEigenvectors);
@@ -160,22 +166,20 @@ template<typename _MatrixType> class GeneralizedEigenSolver
 
     /* \brief Returns the computed generalized eigenvectors.
       *
-      * \returns  %Matrix whose columns are the (possibly complex) eigenvectors.
+      * \returns  %Matrix whose columns are the (possibly complex) right eigenvectors.
+      * i.e. the eigenvectors that solve (A - l*B)x = 0. The ordering matches the eigenvalues.
       *
       * \pre Either the constructor 
       * GeneralizedEigenSolver(const MatrixType&,const MatrixType&, bool) or the member function
       * compute(const MatrixType&, const MatrixType& bool) has been called before, and
       * \p computeEigenvectors was set to true (the default).
       *
-      * Column \f$ k \f$ of the returned matrix is an eigenvector corresponding
-      * to eigenvalue number \f$ k \f$ as returned by eigenvalues().  The
-      * eigenvectors are normalized to have (Euclidean) norm equal to one. The
-      * matrix returned by this function is the matrix \f$ V \f$ in the
-      * generalized eigendecomposition \f$ A = B V D V^{-1} \f$, if it exists.
-      *
       * \sa eigenvalues()
       */
-//    EigenvectorsType eigenvectors() const;
+    EigenvectorsType eigenvectors() const {
+      eigen_assert(m_vectorsOkay && "Eigenvectors for GeneralizedEigenSolver were not calculated.");
+      return m_eivec;
+    }
 
     /** \brief Returns an expression of the computed generalized eigenvalues.
       *
@@ -197,7 +201,7 @@ template<typename _MatrixType> class GeneralizedEigenSolver
       */
     EigenvalueType eigenvalues() const
     {
-      eigen_assert(m_isInitialized && "GeneralizedEigenSolver is not initialized.");
+      eigen_assert(m_valuesOkay && "GeneralizedEigenSolver is not initialized.");
       return EigenvalueType(m_alphas,m_betas);
     }
 
@@ -208,7 +212,7 @@ template<typename _MatrixType> class GeneralizedEigenSolver
       * \sa betas(), eigenvalues() */
     ComplexVectorType alphas() const
     {
-      eigen_assert(m_isInitialized && "GeneralizedEigenSolver is not initialized.");
+      eigen_assert(m_valuesOkay && "GeneralizedEigenSolver is not initialized.");
       return m_alphas;
     }
 
@@ -219,7 +223,7 @@ template<typename _MatrixType> class GeneralizedEigenSolver
       * \sa alphas(), eigenvalues() */
     VectorType betas() const
     {
-      eigen_assert(m_isInitialized && "GeneralizedEigenSolver is not initialized.");
+      eigen_assert(m_valuesOkay && "GeneralizedEigenSolver is not initialized.");
       return m_betas;
     }
 
@@ -250,7 +254,7 @@ template<typename _MatrixType> class GeneralizedEigenSolver
 
     ComputationInfo info() const
     {
-      eigen_assert(m_isInitialized && "EigenSolver is not initialized.");
+      eigen_assert(m_valuesOkay && "EigenSolver is not initialized.");
       return m_realQZ.info();
     }
 
@@ -270,29 +274,14 @@ template<typename _MatrixType> class GeneralizedEigenSolver
       EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
     }
     
-    MatrixType m_eivec;
+    EigenvectorsType m_eivec;
     ComplexVectorType m_alphas;
     VectorType m_betas;
-    bool m_isInitialized;
-    bool m_eigenvectorsOk;
+    bool m_valuesOkay, m_vectorsOkay;
     RealQZ<MatrixType> m_realQZ;
-    MatrixType m_matS;
-
-    typedef Matrix<Scalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> ColumnVectorType;
-    ColumnVectorType m_tmp;
+    ComplexVectorType m_tmp;
 };
 
-//template<typename MatrixType>
-//typename GeneralizedEigenSolver<MatrixType>::EigenvectorsType GeneralizedEigenSolver<MatrixType>::eigenvectors() const
-//{
-//  eigen_assert(m_isInitialized && "EigenSolver is not initialized.");
-//  eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues.");
-//  Index n = m_eivec.cols();
-//  EigenvectorsType matV(n,n);
-//  // TODO
-//  return matV;
-//}
-
 template<typename MatrixType>
 GeneralizedEigenSolver<MatrixType>&
 GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors)
@@ -302,28 +291,70 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
   using std::sqrt;
   using std::abs;
   eigen_assert(A.cols() == A.rows() && B.cols() == A.rows() && B.cols() == B.rows());
-
+  Index size = A.cols();
+  m_valuesOkay = false;
+  m_vectorsOkay = false;
   // Reduce to generalized real Schur form:
   // A = Q S Z and B = Q T Z
   m_realQZ.compute(A, B, computeEigenvectors);
-
   if (m_realQZ.info() == Success)
   {
-    m_matS = m_realQZ.matrixS();
-    const MatrixType &matT = m_realQZ.matrixT();
+    // Resize storage
+    m_alphas.resize(size);
+    m_betas.resize(size);
     if (computeEigenvectors)
-      m_eivec = m_realQZ.matrixZ().transpose();
-  
-    // Compute eigenvalues from matS
-    m_alphas.resize(A.cols());
-    m_betas.resize(A.cols());
+    {
+      m_eivec.resize(size,size);
+      m_tmp.resize(size);
+    }
+
+    // Aliases:
+    Map<VectorType> v(reinterpret_cast<Scalar*>(m_tmp.data()), size);
+    ComplexVectorType &cv = m_tmp;
+    const MatrixType &mZ = m_realQZ.matrixZ();
+    const MatrixType &mS = m_realQZ.matrixS();
+    const MatrixType &mT = m_realQZ.matrixT();
+
     Index i = 0;
-    while (i < A.cols())
+    while (i < size)
     {
-      if (i == A.cols() - 1 || m_matS.coeff(i+1, i) == Scalar(0))
+      if (i == size - 1 || mS.coeff(i+1, i) == Scalar(0))
       {
-        m_alphas.coeffRef(i) = m_matS.coeff(i, i);
-        m_betas.coeffRef(i)  = matT.coeff(i,i);
+        // Real eigenvalue
+        m_alphas.coeffRef(i) = mS.diagonal().coeff(i);
+        m_betas.coeffRef(i)  = mT.diagonal().coeff(i);
+        if (computeEigenvectors)
+        {
+          v.setConstant(Scalar(0.0));
+          v.coeffRef(i) = Scalar(1.0);
+          // For singular eigenvalues do nothing more
+          if(abs(m_betas.coeffRef(i)) >= (std::numeric_limits<RealScalar>::min)())
+          {
+            // Non-singular eigenvalue
+            const Scalar alpha = real(m_alphas.coeffRef(i));
+            const Scalar beta = m_betas.coeffRef(i);
+            for (Index j = i-1; j >= 0; j--)
+            {
+              const Index st = j+1;
+              const Index sz = i-j;
+              if (j > 0 && mS.coeff(j, j-1) != Scalar(0))
+              {
+                // 2x2 block
+                Matrix<Scalar, 2, 1> rhs = (alpha*mT.template block<2,Dynamic>(j-1,st,2,sz) - beta*mS.template block<2,Dynamic>(j-1,st,2,sz)) .lazyProduct( v.segment(st,sz) );
+                Matrix<Scalar, 2, 2> lhs = beta * mS.template block<2,2>(j-1,j-1) - alpha * mT.template block<2,2>(j-1,j-1);
+                v.template segment<2>(j-1) = lhs.partialPivLu().solve(rhs);
+                j--;
+              }
+              else
+              {
+                v.coeffRef(j) = -v.segment(st,sz).transpose().cwiseProduct(beta*mS.block(j,st,1,sz) - alpha*mT.block(j,st,1,sz)).sum() / (beta*mS.coeffRef(j,j) - alpha*mT.coeffRef(j,j));
+              }
+            }
+          }
+          m_eivec.col(i).real().noalias() = mZ.transpose() * v;
+          m_eivec.col(i).real().normalize();
+          m_eivec.col(i).imag().setConstant(0);
+        }
         ++i;
       }
       else
@@ -333,27 +364,53 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
 
         // T =  [a 0]
         //      [0 b]
-        RealScalar a = matT.diagonal().coeff(i),
-                   b = matT.diagonal().coeff(i+1);
+        RealScalar a = mT.diagonal().coeff(i),
+                   b = mT.diagonal().coeff(i+1);
+        const RealScalar beta = m_betas.coeffRef(i) = m_betas.coeffRef(i+1) = a*b;
+
         // ^^ NOTE: using diagonal()(i) instead of coeff(i,i) workarounds a MSVC bug.
-        Matrix<RealScalar,2,2> S2 = m_matS.template block<2,2>(i,i) * Matrix<Scalar,2,1>(b,a).asDiagonal();
+        Matrix<RealScalar,2,2> S2 = mS.template block<2,2>(i,i) * Matrix<Scalar,2,1>(b,a).asDiagonal();
 
         Scalar p = Scalar(0.5) * (S2.coeff(0,0) - S2.coeff(1,1));
         Scalar z = sqrt(abs(p * p + S2.coeff(1,0) * S2.coeff(0,1)));
-        m_alphas.coeffRef(i)   = ComplexScalar(S2.coeff(1,1) + p,  z);
-        m_alphas.coeffRef(i+1) = ComplexScalar(S2.coeff(1,1) + p, -z);
-
-        m_betas.coeffRef(i)   =
-        m_betas.coeffRef(i+1) = a*b;
-        
+        const ComplexScalar alpha = ComplexScalar(S2.coeff(1,1) + p, (beta > 0) ? z : -z);
+        m_alphas.coeffRef(i)   = conj(alpha);
+        m_alphas.coeffRef(i+1) = alpha;
+
+        if (computeEigenvectors) {
+          // Compute eigenvector in position (i+1) and then position (i) is just the conjugate
+          cv.setZero();
+          cv.coeffRef(i+1) = Scalar(1.0);
+          // here, the "static_cast" workaound expression template issues.
+          cv.coeffRef(i) = -(static_cast<Scalar>(beta*mS.coeffRef(i,i+1)) - alpha*mT.coeffRef(i,i+1))
+                          / (static_cast<Scalar>(beta*mS.coeffRef(i,i))   - alpha*mT.coeffRef(i,i));
+          for (Index j = i-1; j >= 0; j--)
+          {
+            const Index st = j+1;
+            const Index sz = i+1-j;
+            if (j > 0 && mS.coeff(j, j-1) != Scalar(0))
+            {
+              // 2x2 block
+              Matrix<ComplexScalar, 2, 1> rhs = (alpha*mT.template block<2,Dynamic>(j-1,st,2,sz) - beta*mS.template block<2,Dynamic>(j-1,st,2,sz)) .lazyProduct( cv.segment(st,sz) );
+              Matrix<ComplexScalar, 2, 2> lhs = beta * mS.template block<2,2>(j-1,j-1) - alpha * mT.template block<2,2>(j-1,j-1);
+              cv.template segment<2>(j-1) = lhs.partialPivLu().solve(rhs);
+              j--;
+            } else {
+              cv.coeffRef(j) =  cv.segment(st,sz).transpose().cwiseProduct(beta*mS.block(j,st,1,sz) - alpha*mT.block(j,st,1,sz)).sum()
+                              / (alpha*mT.coeffRef(j,j) - static_cast<Scalar>(beta*mS.coeffRef(j,j)));
+            }
+          }
+          m_eivec.col(i+1).noalias() = (mZ.transpose() * cv);
+          m_eivec.col(i+1).normalize();
+          m_eivec.col(i) = m_eivec.col(i+1).conjugate();
+        }
         i += 2;
       }
     }
-  }
-
-  m_isInitialized = true;
-  m_eigenvectorsOk = false;//computeEigenvectors;
 
+    m_valuesOkay = true;
+    m_vectorsOkay = computeEigenvectors;
+  }
   return *this;
 }
 
diff --git a/Eigen/src/Eigenvalues/RealSchur.h b/Eigen/src/Eigenvalues/RealSchur.h
index 9d9063004..d6a339f07 100644
--- a/Eigen/src/Eigenvalues/RealSchur.h
+++ b/Eigen/src/Eigenvalues/RealSchur.h
@@ -236,7 +236,7 @@ template<typename _MatrixType> class RealSchur
     typedef Matrix<Scalar,3,1> Vector3s;
 
     Scalar computeNormOfT();
-    Index findSmallSubdiagEntry(Index iu, const Scalar& maxDiagEntry);
+    Index findSmallSubdiagEntry(Index iu);
     void splitOffTwoRows(Index iu, bool computeU, const Scalar& exshift);
     void computeShift(Index iu, Index iter, Scalar& exshift, Vector3s& shiftInfo);
     void initFrancisQRStep(Index il, Index iu, const Vector3s& shiftInfo, Index& im, Vector3s& firstHouseholderVector);
@@ -293,18 +293,14 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
 
   if(norm!=0)
   {
-    Scalar maxDiagEntry = m_matT.cwiseAbs().diagonal().maxCoeff();
-
     while (iu >= 0)
     {
-      Index il = findSmallSubdiagEntry(iu,maxDiagEntry);
+      Index il = findSmallSubdiagEntry(iu);
 
       // Check for convergence
       if (il == iu) // One root found
       {
         m_matT.coeffRef(iu,iu) = m_matT.coeff(iu,iu) + exshift;
-        // keep track of the largest diagonal coefficient
-        maxDiagEntry = numext::maxi<Scalar>(maxDiagEntry,abs(m_matT.coeffRef(iu,iu)));
         if (iu > 0)
           m_matT.coeffRef(iu, iu-1) = Scalar(0);
         iu--;
@@ -313,8 +309,6 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
       else if (il == iu-1) // Two roots found
       {
         splitOffTwoRows(iu, computeU, exshift);
-        // keep track of the largest diagonal coefficient
-        maxDiagEntry = numext::maxi<Scalar>(maxDiagEntry,numext::maxi(abs(m_matT.coeff(iu,iu)), abs(m_matT.coeff(iu-1,iu-1))));
         iu -= 2;
         iter = 0;
       }
@@ -329,8 +323,6 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
         Index im;
         initFrancisQRStep(il, iu, shiftInfo, im, firstHouseholderVector);
         performFrancisQRStep(il, im, iu, computeU, firstHouseholderVector, workspace);
-        // keep track of the largest diagonal coefficient
-        maxDiagEntry = numext::maxi(maxDiagEntry,m_matT.cwiseAbs().diagonal().segment(im,iu-im).maxCoeff());
       }
     }
   }
@@ -360,13 +352,14 @@ inline typename MatrixType::Scalar RealSchur<MatrixType>::computeNormOfT()
 
 /** \internal Look for single small sub-diagonal element and returns its index */
 template<typename MatrixType>
-inline Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu, const Scalar& maxDiagEntry)
+inline Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu)
 {
   using std::abs;
   Index res = iu;
   while (res > 0)
   {
-    if (abs(m_matT.coeff(res,res-1)) <= NumTraits<Scalar>::epsilon() * maxDiagEntry)
+    Scalar s = abs(m_matT.coeff(res-1,res-1)) + abs(m_matT.coeff(res,res));
+    if (abs(m_matT.coeff(res,res-1)) <= NumTraits<Scalar>::epsilon() * s)
       break;
     res--;
   }
diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
index 8258fadbf..a9f56c4f5 100644
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@@ -501,7 +501,7 @@ ComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag
         subdiag[i] = 0;
 
     // find the largest unreduced block
-    while (end>0 && subdiag[end-1]==0)
+    while (end>0 && subdiag[end-1]==RealScalar(0))
     {
       end--;
     }
@@ -569,8 +569,8 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
     EIGEN_USING_STD_MATH(atan2)
     EIGEN_USING_STD_MATH(cos)
     EIGEN_USING_STD_MATH(sin)
-    const Scalar s_inv3 = Scalar(1.0)/Scalar(3.0);
-    const Scalar s_sqrt3 = sqrt(Scalar(3.0));
+    const Scalar s_inv3 = Scalar(1)/Scalar(3);
+    const Scalar s_sqrt3 = sqrt(Scalar(3));
 
     // The characteristic equation is x^3 - c2*x^2 + c1*x - c0 = 0.  The
     // eigenvalues are the roots to this equation, all guaranteed to be
@@ -815,14 +815,14 @@ static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index sta
 //   RealScalar mu = diag[end] - e2 / (td + (td>0 ? 1 : -1) * sqrt(td*td + e2));
   // This explain the following, somewhat more complicated, version:
   RealScalar mu = diag[end];
-  if(td==0)
+  if(td==RealScalar(0))
     mu -= abs(e);
   else
   {
     RealScalar e2 = numext::abs2(subdiag[end-1]);
     RealScalar h = numext::hypot(td,e);
-    if(e2==0)  mu -= (e / (td + (td>0 ? 1 : -1))) * (e / h);
-    else       mu -= e2 / (td + (td>0 ? h : -h));
+    if(e2==RealScalar(0)) mu -= (e / (td + (td>RealScalar(0) ? RealScalar(1) : RealScalar(-1)))) * (e / h);
+    else                  mu -= e2 / (td + (td>RealScalar(0) ? h : -h));
   }
   
   RealScalar x = diag[start] - mu;
diff --git a/Eigen/src/Geometry/CMakeLists.txt b/Eigen/src/Geometry/CMakeLists.txt
deleted file mode 100644
index f8f728b84..000000000
--- a/Eigen/src/Geometry/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-FILE(GLOB Eigen_Geometry_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_Geometry_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Geometry COMPONENT Devel
-  )
-
-ADD_SUBDIRECTORY(arch)
diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h
index 073f4dcd1..db5fd07c3 100644
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -193,7 +193,7 @@ template<int Mode> struct transform_make_affine;
   * preprocessor token EIGEN_QT_SUPPORT is defined.
   *
   * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_TRANSFORM_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_TRANSFORM_PLUGIN.
   *
   * \sa class Matrix, class Quaternion
   */
@@ -1073,7 +1073,7 @@ void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixTy
   }
 }
 
-/** decomposes the linear part of the transformation as a product rotation x scaling, the scaling being
+/** decomposes the linear part of the transformation as a product scaling x rotation, the scaling being
   * not necessarily positive.
   *
   * If either pointer is zero, the corresponding computation is skipped.
diff --git a/Eigen/src/Geometry/arch/CMakeLists.txt b/Eigen/src/Geometry/arch/CMakeLists.txt
deleted file mode 100644
index 1267a79c7..000000000
--- a/Eigen/src/Geometry/arch/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Geometry_arch_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_Geometry_arch_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Geometry/arch COMPONENT Devel
-  )
diff --git a/Eigen/src/Householder/CMakeLists.txt b/Eigen/src/Householder/CMakeLists.txt
deleted file mode 100644
index ce4937db0..000000000
--- a/Eigen/src/Householder/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Householder_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_Householder_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Householder COMPONENT Devel
-  )
diff --git a/Eigen/src/IterativeLinearSolvers/CMakeLists.txt b/Eigen/src/IterativeLinearSolvers/CMakeLists.txt
deleted file mode 100644
index 59ccc0072..000000000
--- a/Eigen/src/IterativeLinearSolvers/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_IterativeLinearSolvers_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_IterativeLinearSolvers_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/IterativeLinearSolvers COMPONENT Devel
-  )
diff --git a/Eigen/src/Jacobi/CMakeLists.txt b/Eigen/src/Jacobi/CMakeLists.txt
deleted file mode 100644
index 490dac626..000000000
--- a/Eigen/src/Jacobi/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Jacobi_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_Jacobi_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Jacobi COMPONENT Devel
-  )
diff --git a/Eigen/src/LU/CMakeLists.txt b/Eigen/src/LU/CMakeLists.txt
deleted file mode 100644
index e0d8d78c1..000000000
--- a/Eigen/src/LU/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-FILE(GLOB Eigen_LU_SRCS "*.h")
-
-INSTALL(FILES 
-  ${Eigen_LU_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU COMPONENT Devel
-  )
-
-ADD_SUBDIRECTORY(arch)
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 1632d3ac3..2b30fc146 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -436,7 +436,7 @@ template<typename _MatrixType> class FullPivLU
     Index m_nonzero_pivots;
     RealScalar m_l1_norm;
     RealScalar m_maxpivot, m_prescribedThreshold;
-    char m_det_pq;
+    signed char m_det_pq;
     bool m_isInitialized, m_usePrescribedThreshold;
 };
 
@@ -879,14 +879,12 @@ struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_
   *
   * \sa class FullPivLU
   */
-#ifndef __CUDACC__
 template<typename Derived>
 inline const FullPivLU<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::fullPivLu() const
 {
   return FullPivLU<PlainObject>(eval());
 }
-#endif
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index 87ac6a281..d43961887 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -284,7 +284,7 @@ template<typename _MatrixType> class PartialPivLU
     PermutationType m_p;
     TranspositionType m_rowsTranspositions;
     RealScalar m_l1_norm;
-    char m_det_p;
+    signed char m_det_p;
     bool m_isInitialized;
 };
 
@@ -584,14 +584,12 @@ struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assi
   *
   * \sa class PartialPivLU
   */
-#ifndef __CUDACC__
 template<typename Derived>
 inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::partialPivLu() const
 {
   return PartialPivLU<PlainObject>(eval());
 }
-#endif
 
 /** \lu_module
   *
@@ -601,14 +599,12 @@ MatrixBase<Derived>::partialPivLu() const
   *
   * \sa class PartialPivLU
   */
-#ifndef __CUDACC__
 template<typename Derived>
 inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::lu() const
 {
   return PartialPivLU<PlainObject>(eval());
 }
-#endif
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/LU/arch/CMakeLists.txt b/Eigen/src/LU/arch/CMakeLists.txt
deleted file mode 100644
index f6b7ed9ec..000000000
--- a/Eigen/src/LU/arch/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_LU_arch_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_LU_arch_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU/arch COMPONENT Devel
-  )
diff --git a/Eigen/src/LU/arch/Inverse_SSE.h b/Eigen/src/LU/arch/Inverse_SSE.h
index e1470c664..ebb64a62b 100644
--- a/Eigen/src/LU/arch/Inverse_SSE.h
+++ b/Eigen/src/LU/arch/Inverse_SSE.h
@@ -153,10 +153,12 @@ struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
     iC = _mm_mul_ps(rd,iC);
     iD = _mm_mul_ps(rd,iD);
 
-    result.template writePacket<ResultAlignment>( 0, _mm_shuffle_ps(iA,iB,0x77));
-    result.template writePacket<ResultAlignment>( 4, _mm_shuffle_ps(iA,iB,0x22));
-    result.template writePacket<ResultAlignment>( 8, _mm_shuffle_ps(iC,iD,0x77));
-    result.template writePacket<ResultAlignment>(12, _mm_shuffle_ps(iC,iD,0x22));
+    Index res_stride = result.outerStride();
+    float* res = result.data();
+    pstoret<float, Packet4f, ResultAlignment>(res+0,            _mm_shuffle_ps(iA,iB,0x77));
+    pstoret<float, Packet4f, ResultAlignment>(res+res_stride,   _mm_shuffle_ps(iA,iB,0x22));
+    pstoret<float, Packet4f, ResultAlignment>(res+2*res_stride, _mm_shuffle_ps(iC,iD,0x77));
+    pstoret<float, Packet4f, ResultAlignment>(res+3*res_stride, _mm_shuffle_ps(iC,iD,0x22));
   }
 
 };
@@ -316,14 +318,16 @@ struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType>
     iC1 = _mm_sub_pd(_mm_mul_pd(B1, dC), iC1);
     iC2 = _mm_sub_pd(_mm_mul_pd(B2, dC), iC2);
 
-    result.template writePacket<ResultAlignment>( 0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1));     // iA# / det
-    result.template writePacket<ResultAlignment>( 4, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2));
-    result.template writePacket<ResultAlignment>( 2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1));     // iB# / det
-    result.template writePacket<ResultAlignment>( 6, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2));
-    result.template writePacket<ResultAlignment>( 8, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1));     // iC# / det
-    result.template writePacket<ResultAlignment>(12, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2));
-    result.template writePacket<ResultAlignment>(10, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1));     // iD# / det
-    result.template writePacket<ResultAlignment>(14, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2));
+    Index res_stride = result.outerStride();
+    double* res = result.data();
+    pstoret<double, Packet2d, ResultAlignment>(res+0,             _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+res_stride,    _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2));
+    pstoret<double, Packet2d, ResultAlignment>(res+2,             _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+res_stride+2,  _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2));
+    pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride,  _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride,  _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2));
+    pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2));
   }
 };
 
diff --git a/Eigen/src/MetisSupport/CMakeLists.txt b/Eigen/src/MetisSupport/CMakeLists.txt
deleted file mode 100644
index 2bad31416..000000000
--- a/Eigen/src/MetisSupport/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_MetisSupport_SRCS "*.h")
-
-INSTALL(FILES 
-  ${Eigen_MetisSupport_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/MetisSupport COMPONENT Devel
-  )
diff --git a/Eigen/src/OrderingMethods/CMakeLists.txt b/Eigen/src/OrderingMethods/CMakeLists.txt
deleted file mode 100644
index 9f4bb2758..000000000
--- a/Eigen/src/OrderingMethods/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_OrderingMethods_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_OrderingMethods_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/OrderingMethods COMPONENT Devel
-  )
diff --git a/Eigen/src/PaStiXSupport/CMakeLists.txt b/Eigen/src/PaStiXSupport/CMakeLists.txt
deleted file mode 100644
index 28c657e9b..000000000
--- a/Eigen/src/PaStiXSupport/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_PastixSupport_SRCS "*.h")
-
-INSTALL(FILES 
-  ${Eigen_PastixSupport_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/PaStiXSupport COMPONENT Devel
-  )
diff --git a/Eigen/src/PardisoSupport/CMakeLists.txt b/Eigen/src/PardisoSupport/CMakeLists.txt
deleted file mode 100644
index a097ab401..000000000
--- a/Eigen/src/PardisoSupport/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_PardisoSupport_SRCS "*.h")
-
-INSTALL(FILES 
-  ${Eigen_PardisoSupport_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/PardisoSupport COMPONENT Devel
-  )
diff --git a/Eigen/src/QR/CMakeLists.txt b/Eigen/src/QR/CMakeLists.txt
deleted file mode 100644
index 96f43d7f5..000000000
--- a/Eigen/src/QR/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_QR_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_QR_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/QR COMPONENT Devel
-  )
diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h
index ee6989897..9650781d6 100644
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@@ -163,9 +163,6 @@ template<typename _MatrixType> class ColPivHouseholderQR
       *
       * \returns a solution.
       *
-      * \note The case where b is a matrix is not yet implemented. Also, this
-      *       code is space inefficient.
-      *
       * \note_about_checking_solutions
       *
       * \note_about_arbitrary_choice_of_solution
@@ -640,7 +637,6 @@ typename ColPivHouseholderQR<MatrixType>::HouseholderSequenceType ColPivHousehol
   return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate());
 }
 
-#ifndef __CUDACC__
 /** \return the column-pivoting Householder QR decomposition of \c *this.
   *
   * \sa class ColPivHouseholderQR
@@ -651,7 +647,6 @@ MatrixBase<Derived>::colPivHouseholderQr() const
 {
   return ColPivHouseholderQR<PlainObject>(eval());
 }
-#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/Eigen/src/QR/CompleteOrthogonalDecomposition.h
index f299d3c00..41e4ecdfd 100644
--- a/Eigen/src/QR/CompleteOrthogonalDecomposition.h
+++ b/Eigen/src/QR/CompleteOrthogonalDecomposition.h
@@ -547,7 +547,6 @@ CompleteOrthogonalDecomposition<MatrixType>::householderQ() const {
   return m_cpqr.householderQ();
 }
 
-#ifndef __CUDACC__
 /** \return the complete orthogonal decomposition of \c *this.
   *
   * \sa class CompleteOrthogonalDecomposition
@@ -557,7 +556,6 @@ const CompleteOrthogonalDecomposition<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::completeOrthogonalDecomposition() const {
   return CompleteOrthogonalDecomposition<PlainObject>(eval());
 }
-#endif  // __CUDACC__
 
 }  // end namespace Eigen
 
diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h
index 4c85a4693..e0e15100d 100644
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@@ -164,9 +164,6 @@ template<typename _MatrixType> class FullPivHouseholderQR
       * \returns the exact or least-square solution if the rank is greater or equal to the number of columns of A,
       * and an arbitrary solution otherwise.
       *
-      * \note The case where b is a matrix is not yet implemented. Also, this
-      *       code is space inefficient.
-      *
       * \note_about_checking_solutions
       *
       * \note_about_arbitrary_choice_of_solution
@@ -663,7 +660,6 @@ inline typename FullPivHouseholderQR<MatrixType>::MatrixQReturnType FullPivHouse
   return MatrixQReturnType(m_qr, m_hCoeffs, m_rows_transpositions);
 }
 
-#ifndef __CUDACC__
 /** \return the full-pivoting Householder QR decomposition of \c *this.
   *
   * \sa class FullPivHouseholderQR
@@ -674,7 +670,6 @@ MatrixBase<Derived>::fullPivHouseholderQr() const
 {
   return FullPivHouseholderQR<PlainObject>(eval());
 }
-#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h
index 2e64fa7fe..3513d995c 100644
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@@ -128,9 +128,6 @@ template<typename _MatrixType> class HouseholderQR
       *
       * \returns a solution.
       *
-      * \note The case where b is a matrix is not yet implemented. Also, this
-      *       code is space inefficient.
-      *
       * \note_about_checking_solutions
       *
       * \note_about_arbitrary_choice_of_solution
@@ -396,7 +393,6 @@ void HouseholderQR<MatrixType>::computeInPlace()
   m_isInitialized = true;
 }
 
-#ifndef __CUDACC__
 /** \return the Householder QR decomposition of \c *this.
   *
   * \sa class HouseholderQR
@@ -407,7 +403,6 @@ MatrixBase<Derived>::householderQr() const
 {
   return HouseholderQR<PlainObject>(eval());
 }
-#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/SPQRSupport/CMakeLists.txt b/Eigen/src/SPQRSupport/CMakeLists.txt
deleted file mode 100644
index 4968beaf2..000000000
--- a/Eigen/src/SPQRSupport/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_SPQRSupport_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_SPQRSupport_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SPQRSupport/ COMPONENT Devel
-  )
diff --git a/Eigen/src/SVD/CMakeLists.txt b/Eigen/src/SVD/CMakeLists.txt
deleted file mode 100644
index 55efc44b1..000000000
--- a/Eigen/src/SVD/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_SVD_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_SVD_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SVD COMPONENT Devel
-  )
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index 605c1a2a6..78dfd1d59 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -783,7 +783,6 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
   return *this;
 }
 
-#ifndef __CUDACC__
 /** \svd_module
   *
   * \return the singular value decomposition of \c *this computed by two-sided
@@ -797,7 +796,6 @@ MatrixBase<Derived>::jacobiSvd(unsigned int computationOptions) const
 {
   return JacobiSVD<PlainObject>(*this, computationOptions);
 }
-#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/SparseCholesky/CMakeLists.txt b/Eigen/src/SparseCholesky/CMakeLists.txt
deleted file mode 100644
index 375a59d7a..000000000
--- a/Eigen/src/SparseCholesky/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_SparseCholesky_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_SparseCholesky_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SparseCholesky COMPONENT Devel
-  )
diff --git a/Eigen/src/SparseCore/CMakeLists.txt b/Eigen/src/SparseCore/CMakeLists.txt
deleted file mode 100644
index d860452a6..000000000
--- a/Eigen/src/SparseCore/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_SparseCore_SRCS "*.h")
-
-INSTALL(FILES 
-  ${Eigen_SparseCore_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SparseCore COMPONENT Devel
-  )
diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h
index 15854a73b..55ad91f46 100644
--- a/Eigen/src/SparseCore/SparseCompressedBase.h
+++ b/Eigen/src/SparseCore/SparseCompressedBase.h
@@ -106,6 +106,25 @@ class SparseCompressedBase
     /** \returns whether \c *this is in compressed form. */
     inline bool isCompressed() const { return innerNonZeroPtr()==0; }
 
+    /** \returns a read-only view of the stored coefficients as a 1D array expression.
+      *
+      * \warning this method is for \b compressed \b storage \b only, and it will trigger an assertion otherwise.
+      *
+      * \sa valuePtr(), isCompressed() */
+    const Map<const Array<Scalar,Dynamic,1> > coeffs() const { eigen_assert(isCompressed()); return Array<Scalar,Dynamic,1>::Map(valuePtr(),nonZeros()); }
+
+    /** \returns a read-write view of the stored coefficients as a 1D array expression
+      *
+      * \warning this method is for \b compressed \b storage \b only, and it will trigger an assertion otherwise.
+      *
+      * Here is an example:
+      * \include SparseMatrix_coeffs.cpp
+      * and the output is:
+      * \include SparseMatrix_coeffs.out
+      *
+      * \sa valuePtr(), isCompressed() */
+    Map<Array<Scalar,Dynamic,1> > coeffs() { eigen_assert(isCompressed()); return Array<Scalar,Dynamic,1>::Map(valuePtr(),nonZeros()); }
+
   protected:
     /** Default constructor. Do nothing. */
     SparseCompressedBase() {}
diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h
index 531fea399..64ca5fc44 100644
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@@ -35,7 +35,7 @@ namespace Eigen {
   * \tparam _Index the type of the indices. It has to be a \b signed type (e.g., short, int, std::ptrdiff_t). Default is \c int.
   *
   * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_SPARSEMATRIX_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_SPARSEMATRIX_PLUGIN.
   */
 
 namespace internal {
diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h
index 45f64e7f2..96b1b0504 100644
--- a/Eigen/src/SparseCore/SparseMatrixBase.h
+++ b/Eigen/src/SparseCore/SparseMatrixBase.h
@@ -21,7 +21,7 @@ namespace Eigen {
   * \tparam Derived is the derived type, e.g. a sparse matrix type, or an expression, etc.
   *
   * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_SPARSEMATRIXBASE_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_SPARSEMATRIXBASE_PLUGIN.
   */
 template<typename Derived> class SparseMatrixBase
   : public EigenBase<Derived>
diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h
index a48520c0c..d31d9babf 100644
--- a/Eigen/src/SparseCore/SparseSelfAdjointView.h
+++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h
@@ -250,11 +250,11 @@ template<int Mode, typename SparseLhsType, typename DenseRhsType, typename Dense
 inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
 {
   EIGEN_ONLY_USED_FOR_DEBUG(alpha);
-  // TODO use alpha
-  eigen_assert(alpha==AlphaType(1) && "alpha != 1 is not implemented yet, sorry");
   
-  typedef evaluator<SparseLhsType> LhsEval;
-  typedef typename evaluator<SparseLhsType>::InnerIterator LhsIterator;
+  typedef typename internal::nested_eval<SparseLhsType,DenseRhsType::MaxColsAtCompileTime>::type SparseLhsTypeNested;
+  typedef typename internal::remove_all<SparseLhsTypeNested>::type SparseLhsTypeNestedCleaned;
+  typedef evaluator<SparseLhsTypeNestedCleaned> LhsEval;
+  typedef typename LhsEval::InnerIterator LhsIterator;
   typedef typename SparseLhsType::Scalar LhsScalar;
   
   enum {
@@ -266,39 +266,53 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons
     ProcessSecondHalf = !ProcessFirstHalf
   };
   
-  LhsEval lhsEval(lhs);
-  
-  for (Index j=0; j<lhs.outerSize(); ++j)
+  SparseLhsTypeNested lhs_nested(lhs);
+  LhsEval lhsEval(lhs_nested);
+
+  // work on one column at once
+  for (Index k=0; k<rhs.cols(); ++k)
   {
-    LhsIterator i(lhsEval,j);
-    if (ProcessSecondHalf)
+    for (Index j=0; j<lhs.outerSize(); ++j)
     {
-      while (i && i.index()<j) ++i;
-      if(i && i.index()==j)
+      LhsIterator i(lhsEval,j);
+      // handle diagonal coeff
+      if (ProcessSecondHalf)
       {
-        res.row(j) += i.value() * rhs.row(j);
-        ++i;
+        while (i && i.index()<j) ++i;
+        if(i && i.index()==j)
+        {
+          res(j,k) += alpha * i.value() * rhs(j,k);
+          ++i;
+        }
       }
+
+      // premultiplied rhs for scatters
+      typename ScalarBinaryOpTraits<AlphaType, typename DenseRhsType::Scalar>::ReturnType rhs_j(alpha*rhs(j,k));
+      // accumulator for partial scalar product
+      typename DenseResType::Scalar res_j(0);
+      for(; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i)
+      {
+        LhsScalar lhs_ij = i.value();
+        if(!LhsIsRowMajor) lhs_ij = numext::conj(lhs_ij);
+        res_j += lhs_ij * rhs(i.index(),k);
+        res(i.index(),k) += numext::conj(lhs_ij) * rhs_j;
+      }
+      res(j,k) += alpha * res_j;
+
+      // handle diagonal coeff
+      if (ProcessFirstHalf && i && (i.index()==j))
+        res(j,k) += alpha * i.value() * rhs(j,k);
     }
-    for(; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i)
-    {
-      Index a = LhsIsRowMajor ? j : i.index();
-      Index b = LhsIsRowMajor ? i.index() : j;
-      LhsScalar v = i.value();
-      res.row(a) += (v) * rhs.row(b);
-      res.row(b) += numext::conj(v) * rhs.row(a);
-    }
-    if (ProcessFirstHalf && i && (i.index()==j))
-      res.row(j) += i.value() * rhs.row(j);
   }
 }
 
 
 template<typename LhsView, typename Rhs, int ProductType>
 struct generic_product_impl<LhsView, Rhs, SparseSelfAdjointShape, DenseShape, ProductType>
+: generic_product_impl_base<LhsView, Rhs, generic_product_impl<LhsView, Rhs, SparseSelfAdjointShape, DenseShape, ProductType> >
 {
   template<typename Dest>
-  static void evalTo(Dest& dst, const LhsView& lhsView, const Rhs& rhs)
+  static void scaleAndAddTo(Dest& dst, const LhsView& lhsView, const Rhs& rhs, const typename Dest::Scalar& alpha)
   {
     typedef typename LhsView::_MatrixTypeNested Lhs;
     typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
@@ -306,16 +320,16 @@ struct generic_product_impl<LhsView, Rhs, SparseSelfAdjointShape, DenseShape, Pr
     LhsNested lhsNested(lhsView.matrix());
     RhsNested rhsNested(rhs);
     
-    dst.setZero();
-    internal::sparse_selfadjoint_time_dense_product<LhsView::Mode>(lhsNested, rhsNested, dst, typename Dest::Scalar(1));
+    internal::sparse_selfadjoint_time_dense_product<LhsView::Mode>(lhsNested, rhsNested, dst, alpha);
   }
 };
 
 template<typename Lhs, typename RhsView, int ProductType>
 struct generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, ProductType>
+: generic_product_impl_base<Lhs, RhsView, generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, ProductType> >
 {
   template<typename Dest>
-  static void evalTo(Dest& dst, const Lhs& lhs, const RhsView& rhsView)
+  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const RhsView& rhsView, const typename Dest::Scalar& alpha)
   {
     typedef typename RhsView::_MatrixTypeNested Rhs;
     typedef typename nested_eval<Lhs,Dynamic>::type LhsNested;
@@ -323,10 +337,9 @@ struct generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, Pr
     LhsNested lhsNested(lhs);
     RhsNested rhsNested(rhsView.matrix());
     
-    dst.setZero();
-    // transpoe everything
+    // transpose everything
     Transpose<Dest> dstT(dst);
-    internal::sparse_selfadjoint_time_dense_product<RhsView::Mode>(rhsNested.transpose(), lhsNested.transpose(), dstT, typename Dest::Scalar(1));
+    internal::sparse_selfadjoint_time_dense_product<RhsView::Mode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);
   }
 };
 
diff --git a/Eigen/src/SparseCore/SparseVector.h b/Eigen/src/SparseCore/SparseVector.h
index 167a9886c..00ee6ec89 100644
--- a/Eigen/src/SparseCore/SparseVector.h
+++ b/Eigen/src/SparseCore/SparseVector.h
@@ -22,7 +22,7 @@ namespace Eigen {
   * See http://www.netlib.org/linalg/html_templates/node91.html for details on the storage scheme.
   *
   * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_SPARSEVECTOR_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_SPARSEVECTOR_PLUGIN.
   */
 
 namespace internal {
diff --git a/Eigen/src/SparseLU/CMakeLists.txt b/Eigen/src/SparseLU/CMakeLists.txt
deleted file mode 100644
index 69729ee89..000000000
--- a/Eigen/src/SparseLU/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_SparseLU_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_SparseLU_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SparseLU COMPONENT Devel
-  )
diff --git a/Eigen/src/SparseQR/CMakeLists.txt b/Eigen/src/SparseQR/CMakeLists.txt
deleted file mode 100644
index f9ddf2bdb..000000000
--- a/Eigen/src/SparseQR/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_SparseQR_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_SparseQR_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SparseQR/ COMPONENT Devel
-  )
diff --git a/Eigen/src/StlSupport/CMakeLists.txt b/Eigen/src/StlSupport/CMakeLists.txt
deleted file mode 100644
index 0f094f637..000000000
--- a/Eigen/src/StlSupport/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_StlSupport_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_StlSupport_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/StlSupport COMPONENT Devel
-  )
diff --git a/Eigen/src/SuperLUSupport/CMakeLists.txt b/Eigen/src/SuperLUSupport/CMakeLists.txt
deleted file mode 100644
index b28ebe583..000000000
--- a/Eigen/src/SuperLUSupport/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_SuperLUSupport_SRCS "*.h")
-
-INSTALL(FILES 
-  ${Eigen_SuperLUSupport_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SuperLUSupport COMPONENT Devel
-  )
diff --git a/Eigen/src/UmfPackSupport/CMakeLists.txt b/Eigen/src/UmfPackSupport/CMakeLists.txt
deleted file mode 100644
index a57de0020..000000000
--- a/Eigen/src/UmfPackSupport/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_UmfPackSupport_SRCS "*.h")
-
-INSTALL(FILES 
-  ${Eigen_UmfPackSupport_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/UmfPackSupport COMPONENT Devel
-  )
diff --git a/Eigen/src/misc/CMakeLists.txt b/Eigen/src/misc/CMakeLists.txt
deleted file mode 100644
index a58ffb745..000000000
--- a/Eigen/src/misc/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_misc_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_misc_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/misc COMPONENT Devel
-  )
diff --git a/Eigen/src/plugins/CMakeLists.txt b/Eigen/src/plugins/CMakeLists.txt
deleted file mode 100644
index 1a1d3ffbd..000000000
--- a/Eigen/src/plugins/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_plugins_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_plugins_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/plugins COMPONENT Devel
-  )