Merged eigen/eigen into default

author: Konstantinos Margaritis <markos@freevec.org> 2017-08-24 12:24:01 +0300
committer: Konstantinos Margaritis <markos@freevec.org> 2017-08-24 12:24:01 +0300
commit: 1affe3d8dfa93ed10aea59d272263e78dda6769e (patch)
tree: ff8e62a5214a761cc5c1df4fa36a18d79979115d /Eigen/src/Core
parent: 4ce5ec5197b57d3060e8ac51c07f03198d5bf927 (diff)
parent: 21633e585b61564159d9cfbfbbad9006b8a09d64 (diff)
30 files changed, 341 insertions, 189 deletions
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index 144608ec2..b1923da0f 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -861,6 +861,42 @@ template<typename Derived>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
 { return Derived::Unit(3); }
 
+/** \brief Set the coefficients of \c *this to the i-th unit (basis) vector
+  *
+  * \param i index of the unique coefficient to be set to 1
+  *
+  * \only_for_vectors
+  *
+  * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Unit(Index,Index)
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setUnit(Index i)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  eigen_assert(i<size());
+  derived().setZero();
+  derived().coeffRef(i) = Scalar(1);
+  return derived();
+}
+
+/** \brief Resizes to the given \a newSize, and writes the i-th unit (basis) vector into *this.
+  *
+  * \param newSize the new size of the vector
+  * \param i index of the unique coefficient to be set to 1
+  *
+  * \only_for_vectors
+  *
+  * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Unit(Index,Index)
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setUnit(Index newSize, Index i)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  eigen_assert(i<newSize);
+  derived().resize(newSize);
+  return setUnit(i);
+}
+
 } // end namespace Eigen
 
 #endif // EIGEN_CWISE_NULLARY_OP_H
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index b206b0a7a..483277fe6 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -18,18 +18,33 @@ enum {
   Small = 3
 };
 
+// Define the threshold value to fallback from the generic matrix-matrix product
+// implementation (heavy) to the lightweight coeff-based product one.
+// See generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
+// in products/GeneralMatrixMatrix.h for more details.
+// TODO This threshold should also be used in the compile-time selector below.
+#ifndef EIGEN_GEMM_TO_COEFFBASED_THRESHOLD
+// This default value has been obtained on a Haswell architecture.
+#define EIGEN_GEMM_TO_COEFFBASED_THRESHOLD 20
+#endif
+
 namespace internal {
 
 template<int Rows, int Cols, int Depth> struct product_type_selector;
 
 template<int Size, int MaxSize> struct product_size_category
 {
-  enum { is_large = MaxSize == Dynamic ||
-                    Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
-                    (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
-         value = is_large  ? Large
-               : Size == 1 ? 1
-                           : Small
+  enum {
+    #ifndef EIGEN_CUDA_ARCH
+    is_large = MaxSize == Dynamic ||
+               Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
+               (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
+    #else
+    is_large = 0,
+    #endif
+    value = is_large  ? Large
+          : Size == 1 ? 1
+                      : Small
   };
 };
 
@@ -379,8 +394,6 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
   *
   * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
   */
-#ifndef __CUDACC__
-
 template<typename Derived>
 template<typename OtherDerived>
 inline const Product<Derived, OtherDerived>
@@ -412,8 +425,6 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
   return Product<Derived, OtherDerived>(derived(), other.derived());
 }
 
-#endif // __CUDACC__
-
 /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
   *
   * The returned product will behave like any other expressions: the coefficients of the product will be
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index d19d5bbd2..30878eda6 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -299,7 +299,7 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
 /** \internal tries to do cache prefetching of \a addr */
 template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
 {
-#ifdef __CUDA_ARCH__
+#ifdef EIGEN_CUDA_ARCH
 #if defined(__LP64__)
   // 64-bit pointer operand constraint for inlined asm
   asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
@@ -526,7 +526,7 @@ inline void palign(PacketType& first, const PacketType& second)
 ***************************************************************************/
 
 // Eigen+CUDA does not support complexes.
-#ifndef __CUDACC__
+#ifndef EIGEN_CUDACC
 
 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
 { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h
index 06d196702..7ca6a9280 100644
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -20,11 +20,17 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
 {
   typedef traits<PlainObjectType> TraitsBase;
   enum {
+    PlainObjectTypeInnerSize = ((traits<PlainObjectType>::Flags&RowMajorBit)==RowMajorBit)
+                             ? PlainObjectType::ColsAtCompileTime
+                             : PlainObjectType::RowsAtCompileTime,
+
     InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
                              ? int(PlainObjectType::InnerStrideAtCompileTime)
                              : int(StrideType::InnerStrideAtCompileTime),
     OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
-                             ? int(PlainObjectType::OuterStrideAtCompileTime)
+                             ? (InnerStrideAtCompileTime==Dynamic || PlainObjectTypeInnerSize==Dynamic
+                                ? Dynamic
+                                : int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize))
                              : int(StrideType::OuterStrideAtCompileTime),
     Alignment = int(MapOptions)&int(AlignedMask),
     Flags0 = TraitsBase::Flags & (~NestByRefBit),
@@ -108,9 +114,10 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
     inline Index outerStride() const
     {
       return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
-           : IsVectorAtCompileTime ? this->size()
-           : int(Flags)&RowMajorBit ? this->cols()
-           : this->rows();
+           : internal::traits<Map>::OuterStrideAtCompileTime != Dynamic ? internal::traits<Map>::OuterStrideAtCompileTime
+           : IsVectorAtCompileTime ? (this->size() * innerStride())
+           : int(Flags)&RowMajorBit ? (this->cols() * innerStride())
+           : (this->rows() * innerStride());
     }
 
     /** Constructor in the fixed-size case.
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 75f34aa91..5ba5293a0 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -96,7 +96,7 @@ struct real_default_impl<Scalar,true>
 
 template<typename Scalar> struct real_impl : real_default_impl<Scalar> {};
 
-#ifdef __CUDA_ARCH__
+#ifdef EIGEN_CUDA_ARCH
 template<typename T>
 struct real_impl<std::complex<T> >
 {
@@ -144,7 +144,7 @@ struct imag_default_impl<Scalar,true>
 
 template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {};
 
-#ifdef __CUDA_ARCH__
+#ifdef EIGEN_CUDA_ARCH
 template<typename T>
 struct imag_impl<std::complex<T> >
 {
@@ -778,7 +778,7 @@ EIGEN_DEVICE_FUNC
 typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
 isfinite_impl(const T& x)
 {
-  #ifdef __CUDA_ARCH__
+  #ifdef EIGEN_CUDA_ARCH
     return (::isfinite)(x);
   #elif EIGEN_USE_STD_FPCLASSIFY
     using std::isfinite;
@@ -793,7 +793,7 @@ EIGEN_DEVICE_FUNC
 typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
 isinf_impl(const T& x)
 {
-  #ifdef __CUDA_ARCH__
+  #ifdef EIGEN_CUDA_ARCH
     return (::isinf)(x);
   #elif EIGEN_USE_STD_FPCLASSIFY
     using std::isinf;
@@ -808,7 +808,7 @@ EIGEN_DEVICE_FUNC
 typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
 isnan_impl(const T& x)
 {
-  #ifdef __CUDA_ARCH__
+  #ifdef EIGEN_CUDA_ARCH
     return (::isnan)(x);
   #elif EIGEN_USE_STD_FPCLASSIFY
     using std::isnan;
@@ -874,7 +874,7 @@ template<typename T> T generic_fast_tanh_float(const T& a_x);
 
 namespace numext {
 
-#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__)
+#if !defined(EIGEN_CUDA_ARCH) && !defined(__SYCL_DEVICE_ONLY__)
 template<typename T>
 EIGEN_DEVICE_FUNC
 EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
@@ -1088,7 +1088,7 @@ EIGEN_ALWAYS_INLINE float   log1p(float x) { return cl::sycl::log1p(x); }
 EIGEN_ALWAYS_INLINE double  log1p(double x) { return cl::sycl::log1p(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float log1p(const float &x) { return ::log1pf(x); }
 
@@ -1146,7 +1146,7 @@ EIGEN_ALWAYS_INLINE float   floor(float x) { return cl::sycl::floor(x); }
 EIGEN_ALWAYS_INLINE double  floor(double x) { return cl::sycl::floor(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float floor(const float &x) { return ::floorf(x); }
 
@@ -1167,7 +1167,7 @@ EIGEN_ALWAYS_INLINE float   ceil(float x) { return cl::sycl::ceil(x); }
 EIGEN_ALWAYS_INLINE double  ceil(double x) { return cl::sycl::ceil(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float ceil(const float &x) { return ::ceilf(x); }
 
@@ -1225,7 +1225,7 @@ EIGEN_ALWAYS_INLINE double  log(double x) { return cl::sycl::log(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float log(const float &x) { return ::logf(x); }
 
@@ -1253,7 +1253,7 @@ EIGEN_ALWAYS_INLINE float   abs(float x) { return cl::sycl::fabs(x); }
 EIGEN_ALWAYS_INLINE double  abs(double x) { return cl::sycl::fabs(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float abs(const float &x) { return ::fabsf(x); }
 
@@ -1283,7 +1283,7 @@ EIGEN_ALWAYS_INLINE float   exp(float x) { return cl::sycl::exp(x); }
 EIGEN_ALWAYS_INLINE double  exp(double x) { return cl::sycl::exp(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float exp(const float &x) { return ::expf(x); }
 
@@ -1303,7 +1303,7 @@ EIGEN_ALWAYS_INLINE float   expm1(float x) { return cl::sycl::expm1(x); }
 EIGEN_ALWAYS_INLINE double  expm1(double x) { return cl::sycl::expm1(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float expm1(const float &x) { return ::expm1f(x); }
 
@@ -1323,7 +1323,7 @@ EIGEN_ALWAYS_INLINE float   cos(float x) { return cl::sycl::cos(x); }
 EIGEN_ALWAYS_INLINE double  cos(double x) { return cl::sycl::cos(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float cos(const float &x) { return ::cosf(x); }
 
@@ -1343,7 +1343,7 @@ EIGEN_ALWAYS_INLINE float   sin(float x) { return cl::sycl::sin(x); }
 EIGEN_ALWAYS_INLINE double  sin(double x) { return cl::sycl::sin(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float sin(const float &x) { return ::sinf(x); }
 
@@ -1363,7 +1363,7 @@ EIGEN_ALWAYS_INLINE float   tan(float x) { return cl::sycl::tan(x); }
 EIGEN_ALWAYS_INLINE double  tan(double x) { return cl::sycl::tan(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float tan(const float &x) { return ::tanf(x); }
 
@@ -1378,13 +1378,14 @@ T acos(const T &x) {
   return acos(x);
 }
 
-
+#if EIGEN_HAS_CXX11_MATH
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T acosh(const T &x) {
   EIGEN_USING_STD_MATH(acosh);
   return acosh(x);
 }
+#endif
 
 #if defined(__SYCL_DEVICE_ONLY__)
 EIGEN_ALWAYS_INLINE float   acos(float x) { return cl::sycl::acos(x); }
@@ -1393,7 +1394,7 @@ EIGEN_ALWAYS_INLINE float   acosh(float x) { return cl::sycl::acosh(x); }
 EIGEN_ALWAYS_INLINE double  acosh(double x) { return cl::sycl::acosh(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float acos(const float &x) { return ::acosf(x); }
 
@@ -1408,12 +1409,14 @@ T asin(const T &x) {
   return asin(x);
 }
 
+#if EIGEN_HAS_CXX11_MATH
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T asinh(const T &x) {
   EIGEN_USING_STD_MATH(asinh);
   return asinh(x);
 }
+#endif
 
 #if defined(__SYCL_DEVICE_ONLY__)
 EIGEN_ALWAYS_INLINE float   asin(float x) { return cl::sycl::asin(x); }
@@ -1422,7 +1425,7 @@ EIGEN_ALWAYS_INLINE float   asinh(float x) { return cl::sycl::asinh(x); }
 EIGEN_ALWAYS_INLINE double  asinh(double x) { return cl::sycl::asinh(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float asin(const float &x) { return ::asinf(x); }
 
@@ -1437,12 +1440,14 @@ T atan(const T &x) {
   return atan(x);
 }
 
+#if EIGEN_HAS_CXX11_MATH
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T atanh(const T &x) {
   EIGEN_USING_STD_MATH(atanh);
   return atanh(x);
 }
+#endif
 
 #if defined(__SYCL_DEVICE_ONLY__)
 EIGEN_ALWAYS_INLINE float   atan(float x) { return cl::sycl::atan(x); }
@@ -1451,7 +1456,7 @@ EIGEN_ALWAYS_INLINE float   atanh(float x) { return cl::sycl::atanh(x); }
 EIGEN_ALWAYS_INLINE double  atanh(double x) { return cl::sycl::atanh(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float atan(const float &x) { return ::atanf(x); }
 
@@ -1472,7 +1477,7 @@ EIGEN_ALWAYS_INLINE float   cosh(float x) { return cl::sycl::cosh(x); }
 EIGEN_ALWAYS_INLINE double  cosh(double x) { return cl::sycl::cosh(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float cosh(const float &x) { return ::coshf(x); }
 
@@ -1492,7 +1497,7 @@ EIGEN_ALWAYS_INLINE float   sinh(float x) { return cl::sycl::sinh(x); }
 EIGEN_ALWAYS_INLINE double  sinh(double x) { return cl::sycl::sinh(x); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float sinh(const float &x) { return ::sinhf(x); }
 
@@ -1510,12 +1515,12 @@ T tanh(const T &x) {
 #if defined(__SYCL_DEVICE_ONLY__)
 EIGEN_ALWAYS_INLINE float   tanh(float x) { return cl::sycl::tanh(x); }
 EIGEN_ALWAYS_INLINE double  tanh(double x) { return cl::sycl::tanh(x); }
-#elif (!defined(__CUDACC__)) && EIGEN_FAST_MATH
+#elif (!defined(EIGEN_CUDACC)) && EIGEN_FAST_MATH
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float tanh(float x) { return internal::generic_fast_tanh_float(x); }
 #endif
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float tanh(const float &x) { return ::tanhf(x); }
 
@@ -1535,7 +1540,7 @@ EIGEN_ALWAYS_INLINE float   fmod(float x, float y) { return cl::sycl::fmod(x, y)
 EIGEN_ALWAYS_INLINE double  fmod(double x, double y) { return cl::sycl::fmod(x, y); }
 #endif // defined(__SYCL_DEVICE_ONLY__)
 
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
 template <>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float fmod(const float& a, const float& b) {
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 200e57741..11435903b 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -160,20 +160,11 @@ template<typename Derived> class MatrixBase
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     Derived& operator-=(const MatrixBase<OtherDerived>& other);
 
-#ifdef __CUDACC__
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
-    const Product<Derived,OtherDerived,LazyProduct>
-    operator*(const MatrixBase<OtherDerived> &other) const
-    { return this->lazyProduct(other); }
-#else
-
-    template<typename OtherDerived>
     const Product<Derived,OtherDerived>
     operator*(const MatrixBase<OtherDerived> &other) const;
 
-#endif
-
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
     const Product<Derived,OtherDerived,LazyProduct>
@@ -277,6 +268,8 @@ template<typename Derived> class MatrixBase
     Derived& setIdentity();
     EIGEN_DEVICE_FUNC
     Derived& setIdentity(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC Derived& setUnit(Index i);
+    EIGEN_DEVICE_FUNC Derived& setUnit(Index newSize, Index i);
 
     bool isIdentity(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
     bool isDiagonal(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
@@ -305,7 +298,7 @@ template<typename Derived> class MatrixBase
     EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const
     { return cwiseNotEqual(other).any(); }
 
-    NoAlias<Derived,Eigen::MatrixBase > noalias();
+    NoAlias<Derived,Eigen::MatrixBase > EIGEN_DEVICE_FUNC noalias();
 
     // TODO forceAlignedAccess is temporarily disabled
     // Need to find a nicer workaround.
@@ -437,8 +430,10 @@ template<typename Derived> class MatrixBase
 ///////// Jacobi module /////////
 
     template<typename OtherScalar>
+    EIGEN_DEVICE_FUNC
     void applyOnTheLeft(Index p, Index q, const JacobiRotation<OtherScalar>& j);
     template<typename OtherScalar>
+    EIGEN_DEVICE_FUNC
     void applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j);
 
 ///////// SparseCore module /////////
diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h
index 33908010b..41fae5096 100644
--- a/Eigen/src/Core/NoAlias.h
+++ b/Eigen/src/Core/NoAlias.h
@@ -33,6 +33,7 @@ class NoAlias
   public:
     typedef typename ExpressionType::Scalar Scalar;
     
+    EIGEN_DEVICE_FUNC
     explicit NoAlias(ExpressionType& expression) : m_expression(expression) {}
     
     template<typename OtherDerived>
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 77f4f6066..1dc7e223a 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -577,6 +577,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
       * \a data pointers.
       *
+      * Here is an example using strides:
+      * \include Matrix_Map_stride.cpp
+      * Output: \verbinclude Matrix_Map_stride.out
+      *
       * \see class Map
       */
     //@{
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index c42725dbd..86966abdb 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -851,7 +851,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
     return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
   }
   
-#ifndef __CUDACC__
+#ifndef EIGEN_CUDACC
   template<int LoadMode,typename PacketType>
   EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
   {
@@ -895,7 +895,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
     return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
   }
   
-#ifndef __CUDACC__
+#ifndef EIGEN_CUDACC
   template<int LoadMode,typename PacketType>
   EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
   {
diff --git a/Eigen/src/Core/arch/CUDA/Complex.h b/Eigen/src/Core/arch/CUDA/Complex.h
index ca0aaed32..57d1201f4 100644
--- a/Eigen/src/Core/arch/CUDA/Complex.h
+++ b/Eigen/src/Core/arch/CUDA/Complex.h
@@ -16,7 +16,7 @@ namespace Eigen {
 
 namespace internal {
 
-#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
 
 // Many std::complex methods such as operator+, operator-, operator* and
 // operator/ are not constexpr. Due to this, clang does not treat them as device
diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h
index e4e639fcd..8cedd65ad 100644
--- a/Eigen/src/Core/arch/CUDA/Half.h
+++ b/Eigen/src/Core/arch/CUDA/Half.h
@@ -140,7 +140,7 @@ struct half : public half_impl::half_base {
 
 namespace half_impl {
 
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
 
 // Intrinsics for native fp16 support. Note that on current hardware,
 // these are no faster than fp32 arithmetic (you need to use the half2
@@ -281,7 +281,7 @@ union FP32 {
 };
 
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
   return __float2half(ff);
 
 #elif defined(EIGEN_HAS_FP16_C)
@@ -336,7 +336,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
 }
 
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
   return __half2float(h);
 
 #elif defined(EIGEN_HAS_FP16_C)
@@ -370,7 +370,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) {
   return (a.x & 0x7fff) == 0x7c00;
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
   return __hisnan(a);
 #else
   return (a.x & 0x7fff) > 0x7c00;
@@ -386,7 +386,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
   return result;
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
   return half(hexp(a));
 #else
    return half(::expf(float(a)));
@@ -396,7 +396,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) {
   return half(numext::expm1(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
   return half(::hlog(a));
 #else
   return half(::logf(float(a)));
@@ -409,7 +409,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
   return half(::log10f(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
   return half(hsqrt(a));
 #else
     return half(::sqrtf(float(a)));
@@ -431,14 +431,14 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
   return half(::tanhf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
   return half(hfloor(a));
 #else
   return half(::floorf(float(a)));
 #endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
   return half(hceil(a));
 #else
   return half(::ceilf(float(a)));
@@ -446,7 +446,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
 }
 
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
   return __hlt(b, a) ? b : a;
 #else
   const float f1 = static_cast<float>(a);
@@ -455,7 +455,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
 #endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
   return __hlt(a, b) ? b : a;
 #else
   const float f1 = static_cast<float>(a);
@@ -576,7 +576,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
   return Eigen::half(::expf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
   return Eigen::half(::hlog(a));
 #else
   return Eigen::half(::logf(float(a)));
@@ -610,14 +610,14 @@ struct hash<Eigen::half> {
 
 
 // Add the missing shfl_xor intrinsic
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
   return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
 }
 #endif
 
 // ldg() has an overload for __half, but we also need one for Eigen::half.
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
   return Eigen::half_impl::raw_uint16_to_half(
       __ldg(reinterpret_cast<const unsigned short*>(ptr)));
@@ -625,7 +625,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr)
 #endif
 
 
-#if defined(__CUDA_ARCH__)
+#if defined(EIGEN_CUDA_ARCH)
 namespace Eigen {
 namespace numext {
 
diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h
index 987a5291c..ff6256ce0 100644
--- a/Eigen/src/Core/arch/CUDA/MathFunctions.h
+++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h
@@ -17,7 +17,7 @@ namespace internal {
 // Make sure this is only available when targeting a GPU: we don't want to
 // introduce conflicts between these packet_traits definitions and the ones
 // we'll use on the host side (SSE, AVX, ...)
-#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 float4 plog<float4>(const float4& a)
 {
diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h
index 8c46af09b..97a8abe59 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMath.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMath.h
@@ -17,7 +17,7 @@ namespace internal {
 // Make sure this is only available when targeting a GPU: we don't want to
 // introduce conflicts between these packet_traits definitions and the ones
 // we'll use on the host side (SSE, AVX, ...)
-#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
 template<> struct is_arithmetic<float4>  { enum { value = true }; };
 template<> struct is_arithmetic<double2> { enum { value = true }; };
 
@@ -196,7 +196,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to
 
 template<>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
   return __ldg((const float4*)from);
 #else
   return make_float4(from[0], from[1], from[2], from[3]);
@@ -204,7 +204,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const fl
 }
 template<>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
   return __ldg((const double2*)from);
 #else
   return make_double2(from[0], from[1]);
@@ -213,7 +213,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const
 
 template<>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
   return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
 #else
   return make_float4(from[0], from[1], from[2], from[3]);
@@ -221,7 +221,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const
 }
 template<>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
   return make_double2(__ldg(from+0), __ldg(from+1));
 #else
   return make_double2(from[0], from[1]);
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
index f4ae3c3c5..ba6a7f920 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -15,7 +15,7 @@ namespace Eigen {
 namespace internal {
 
 // Most of the following operations require arch >= 3.0
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDACC__) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
 
 template<> struct is_arithmetic<half2> { enum { value = true }; };
 
@@ -69,7 +69,7 @@ template<> __device__ EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half*
 
 template<>
  __device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) {
-#if __CUDA_ARCH__ >= 350
+#if EIGEN_CUDA_ARCH >= 350
    return __ldg((const half2*)from);
 #else
   return __halves2half2(*(from+0), *(from+1));
@@ -78,7 +78,7 @@ template<>
 
 template<>
 __device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) {
-#if __CUDA_ARCH__ >= 350
+#if EIGEN_CUDA_ARCH >= 350
    return __halves2half2(__ldg(from+0), __ldg(from+1));
 #else
   return __halves2half2(*(from+0), *(from+1));
@@ -116,7 +116,7 @@ ptranspose(PacketBlock<half2,2>& kernel) {
 }
 
 template<> __device__ EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
   return __halves2half2(a, __hadd(a, __float2half(1.0f)));
 #else
   float f = __half2float(a) + 1.0f;
@@ -125,7 +125,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half&
 }
 
 template<> __device__ EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
   return __hadd2(a, b);
 #else
   float a1 = __low2float(a);
@@ -139,7 +139,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, cons
 }
 
 template<> __device__ EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
   return __hsub2(a, b);
 #else
   float a1 = __low2float(a);
@@ -153,7 +153,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, cons
 }
 
 template<> __device__ EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
   return __hneg2(a);
 #else
   float a1 = __low2float(a);
@@ -165,7 +165,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
 template<> __device__ EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
 
 template<> __device__ EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
   return __hmul2(a, b);
 #else
   float a1 = __low2float(a);
@@ -179,7 +179,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, cons
 }
 
 template<> __device__ EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
    return __hfma2(a, b, c);
 #else
   float a1 = __low2float(a);
@@ -225,7 +225,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, cons
 }
 
 template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& a) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
   return __hadd(__low2half(a), __high2half(a));
 #else
   float a1 = __low2float(a);
@@ -235,7 +235,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2&
 }
 
 template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const half2& a) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
   __half first = __low2half(a);
   __half second = __high2half(a);
   return __hgt(first, second) ? first : second;
@@ -247,7 +247,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const ha
 }
 
 template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const half2& a) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
   __half first = __low2half(a);
   __half second = __high2half(a);
   return __hlt(first, second) ? first : second;
@@ -259,7 +259,7 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const ha
 }
 
 template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const half2& a) {
-#if __CUDA_ARCH__ >= 530
+#if EIGEN_CUDA_ARCH >= 530
   return __hmul(__low2half(a), __high2half(a));
 #else
   float a1 = __low2float(a);
@@ -284,7 +284,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1<half2>(const half2& a) {
   return __floats2half2_rn(r1, r2);
 }
 
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
 
 template<>  __device__ EIGEN_STRONG_INLINE
 half2 plog<half2>(const half2& a) {
diff --git a/Eigen/src/Core/arch/CUDA/TypeCasting.h b/Eigen/src/Core/arch/CUDA/TypeCasting.h
index aa5fbce8e..30f870c3d 100644
--- a/Eigen/src/Core/arch/CUDA/TypeCasting.h
+++ b/Eigen/src/Core/arch/CUDA/TypeCasting.h
@@ -19,7 +19,7 @@ struct scalar_cast_op<float, Eigen::half> {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
   typedef Eigen::half result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const {
-    #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+    #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
       return __float2half(a);
     #else
       return Eigen::half(a);
@@ -37,7 +37,7 @@ struct scalar_cast_op<int, Eigen::half> {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
   typedef Eigen::half result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const {
-    #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+    #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
       return __float2half(static_cast<float>(a));
     #else
       return Eigen::half(static_cast<float>(a));
@@ -55,7 +55,7 @@ struct scalar_cast_op<Eigen::half, float> {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
   typedef float result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const {
-    #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+    #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
       return __half2float(a);
     #else
       return static_cast<float>(a);
@@ -69,7 +69,7 @@ struct functor_traits<scalar_cast_op<Eigen::half, float> >
 
 
 
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
 
 template <>
 struct type_casting_traits<Eigen::half, float> {
diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h
index 4153b877c..1077d8eb0 100644
--- a/Eigen/src/Core/functors/AssignmentFunctors.h
+++ b/Eigen/src/Core/functors/AssignmentFunctors.h
@@ -144,7 +144,7 @@ template<typename Scalar> struct swap_assign_op {
   EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
   {
-#ifdef __CUDACC__
+#ifdef EIGEN_CUDACC
     // FIXME is there some kind of cuda::swap?
     Scalar t=b; const_cast<Scalar&>(b)=a; a=t;
 #else
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 6440e1d09..ed4d3182b 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -427,7 +427,13 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
   template<typename Dst>
   static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
   {
-    if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+    // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=404 for a discussion and helper program
+    // to determine the following heuristic.
+    // EIGEN_GEMM_TO_COEFFBASED_THRESHOLD is typically defined to 20 in GeneralProduct.h,
+    // unless it has been specialized by the user or for a given architecture.
+    // Note that the condition rhs.rows()>0 was required because lazy produc is (was?) not happy with empty inputs.
+    // I'm not sure it is still required.
+    if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
       lazyproduct::evalTo(dst, lhs, rhs);
     else
     {
@@ -439,7 +445,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
   template<typename Dst>
   static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
   {
-    if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+    if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
       lazyproduct::addTo(dst, lhs, rhs);
     else
       scaleAndAddTo(dst,lhs, rhs, Scalar(1));
@@ -448,7 +454,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
   template<typename Dst>
   static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
   {
-    if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+    if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
       lazyproduct::subTo(dst, lhs, rhs);
     else
       scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
index 41e18ff07..9176a1382 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
@@ -88,7 +88,7 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
    BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
    char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \
    EIGTYPE beta(1); \
-   BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \
+   BLASFUNC(&uplo, &trans, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), lhs, &lda, (const BLASTYPE*)&numext::real_ref(beta), res, &ldc); \
   } \
 };
 
@@ -125,9 +125,13 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
   } \
 };
 
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk)
+EIGEN_BLAS_RANKUPDATE_R(float,  float,  ssyrk)
+#else
 EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)
 EIGEN_BLAS_RANKUPDATE_R(float,  float,  ssyrk_)
+#endif
 
 // TODO hanlde complex cases
 // EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h b/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
index 7a3bdbf20..b0f6b0d5b 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
@@ -46,7 +46,7 @@ namespace internal {
 
 // gemm specialization
 
-#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASPREFIX) \
+#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASFUNC) \
 template< \
   typename Index, \
   int LhsStorageOrder, bool ConjugateLhs, \
@@ -100,13 +100,20 @@ static void run(Index rows, Index cols, Index depth, \
     ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
   } else b = _rhs; \
 \
-  BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+  BLASFUNC(&transa, &transb, &m, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
 }};
 
-GEMM_SPECIALIZATION(double,   d,  double, d)
-GEMM_SPECIALIZATION(float,    f,  float,  s)
-GEMM_SPECIALIZATION(dcomplex, cd, double, z)
-GEMM_SPECIALIZATION(scomplex, cf, float,  c)
+#ifdef EIGEN_USE_MKL
+GEMM_SPECIALIZATION(double,   d,  double, dgemm)
+GEMM_SPECIALIZATION(float,    f,  float,  sgemm)
+GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, zgemm)
+GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8,  cgemm)
+#else
+GEMM_SPECIALIZATION(double,   d,  double, dgemm_)
+GEMM_SPECIALIZATION(float,    f,  float,  sgemm_)
+GEMM_SPECIALIZATION(dcomplex, cd, double, zgemm_)
+GEMM_SPECIALIZATION(scomplex, cf, float,  cgemm_)
+#endif
 
 } // end namespase internal
 
diff --git a/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h b/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
index e3a5d5892..6e36c2b3c 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
@@ -85,7 +85,7 @@ EIGEN_BLAS_GEMV_SPECIALIZE(float)
 EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
 EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
 
-#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASPREFIX) \
+#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
 template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
 struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
 { \
@@ -113,14 +113,21 @@ static void run( \
     x_ptr=x_tmp.data(); \
     incx=1; \
   } else x_ptr=rhs; \
-  BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
+  BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
 }\
 };
 
-EIGEN_BLAS_GEMV_SPECIALIZATION(double,   double, d)
-EIGEN_BLAS_GEMV_SPECIALIZATION(float,    float,  s)
-EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z)
-EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float,  c)
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_GEMV_SPECIALIZATION(double,   double, dgemv)
+EIGEN_BLAS_GEMV_SPECIALIZATION(float,    float,  sgemv)
+EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv)
+EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv)
+#else
+EIGEN_BLAS_GEMV_SPECIALIZATION(double,   double, dgemv_)
+EIGEN_BLAS_GEMV_SPECIALIZATION(float,    float,  sgemv_)
+EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_)
+EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float,  cgemv_)
+#endif
 
 } // end namespase internal
 
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
index a45238d69..9a5318507 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
@@ -40,7 +40,7 @@ namespace internal {
 
 /* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
 
-#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, \
           int LhsStorageOrder, bool ConjugateLhs, \
           int RhsStorageOrder, bool ConjugateRhs> \
@@ -81,13 +81,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
     } else b = _rhs; \
 \
-    BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
 \
   } \
 };
 
 
-#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, \
           int LhsStorageOrder, bool ConjugateLhs, \
           int RhsStorageOrder, bool ConjugateRhs> \
@@ -144,20 +144,26 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
     } \
 \
-    BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
 \
   } \
 };
 
-EIGEN_BLAS_SYMM_L(double, double, d, d)
-EIGEN_BLAS_SYMM_L(float, float, f, s)
-EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z)
-EIGEN_BLAS_HEMM_L(scomplex, float, cf, c)
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_SYMM_L(double, double, d, dsymm)
+EIGEN_BLAS_SYMM_L(float, float, f, ssymm)
+EIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm)
+EIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm)
+#else
+EIGEN_BLAS_SYMM_L(double, double, d, dsymm_)
+EIGEN_BLAS_SYMM_L(float, float, f, ssymm_)
+EIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_)
+EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
+#endif
 
 /* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
 
-#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, \
           int LhsStorageOrder, bool ConjugateLhs, \
           int RhsStorageOrder, bool ConjugateRhs> \
@@ -197,13 +203,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
     } else b = _lhs; \
 \
-    BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
 \
   } \
 };
 
 
-#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, \
           int LhsStorageOrder, bool ConjugateLhs, \
           int RhsStorageOrder, bool ConjugateRhs> \
@@ -259,15 +265,21 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
     } \
 \
-    BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
   } \
 };
 
-EIGEN_BLAS_SYMM_R(double, double, d, d)
-EIGEN_BLAS_SYMM_R(float, float, f, s)
-EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z)
-EIGEN_BLAS_HEMM_R(scomplex, float, cf, c)
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_SYMM_R(double, double, d, dsymm)
+EIGEN_BLAS_SYMM_R(float, float, f, ssymm)
+EIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm)
+EIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm)
+#else
+EIGEN_BLAS_SYMM_R(double, double, d, dsymm_)
+EIGEN_BLAS_SYMM_R(float, float, f, ssymm_)
+EIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_)
+EIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_)
+#endif
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h b/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
index 38f23accf..1238345e3 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
@@ -95,14 +95,21 @@ const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
     x_tmp=map_x.conjugate(); \
     x_ptr=x_tmp.data(); \
   } else x_ptr=_rhs; \
-  BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
+  BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
 }\
 };
 
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_SYMV_SPECIALIZATION(double,   double, dsymv)
+EIGEN_BLAS_SYMV_SPECIALIZATION(float,    float,  ssymv)
+EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
+EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8,  chemv)
+#else
 EIGEN_BLAS_SYMV_SPECIALIZATION(double,   double, dsymv_)
 EIGEN_BLAS_SYMV_SPECIALIZATION(float,    float,  ssymv_)
 EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)
 EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float,  chemv_)
+#endif
 
 } // end namespace internal
 
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h
index 6ec5a8a0b..539b6c0c6 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -137,7 +137,13 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
     ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
 
-    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert()));
+    // To work around an "error: member reference base type 'Matrix<...>
+    // (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
+    // not a structure or union" compilation error in nvcc (tested V8.0.61),
+    // create a dummy internal::constructor_without_unaligned_array_assert
+    // object to pass to the Matrix constructor.
+    internal::constructor_without_unaligned_array_assert a;
+    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a);
     triangularBuffer.setZero();
     if((Mode&ZeroDiag)==ZeroDiag)
       triangularBuffer.diagonal().setZero();
@@ -284,7 +290,8 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
     ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
 
-    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert()));
+    internal::constructor_without_unaligned_array_assert a;
+    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a);
     triangularBuffer.setZero();
     if((Mode&ZeroDiag)==ZeroDiag)
       triangularBuffer.diagonal().setZero();
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
index aecded6bb..a25197ab0 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
@@ -75,7 +75,7 @@ EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true)
 EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false)
 
 // implements col-major += alpha * op(triangular) * op(general)
-#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, int Mode, \
           int LhsStorageOrder, bool ConjugateLhs, \
           int RhsStorageOrder, bool ConjugateRhs> \
@@ -172,7 +172,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
    } \
    /*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \
 /* call ?trmm*/ \
-   BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
 \
 /* Add op(a_triangular)*b into res*/ \
    Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@@ -180,13 +180,20 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
   } \
 };
 
-EIGEN_BLAS_TRMM_L(double, double, d, d)
-EIGEN_BLAS_TRMM_L(dcomplex, double, cd, z)
-EIGEN_BLAS_TRMM_L(float, float, f, s)
-EIGEN_BLAS_TRMM_L(scomplex, float, cf, c)
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMM_L(double, double, d, dtrmm)
+EIGEN_BLAS_TRMM_L(dcomplex, MKL_Complex16, cd, ztrmm)
+EIGEN_BLAS_TRMM_L(float, float, f, strmm)
+EIGEN_BLAS_TRMM_L(scomplex, MKL_Complex8, cf, ctrmm)
+#else
+EIGEN_BLAS_TRMM_L(double, double, d, dtrmm_)
+EIGEN_BLAS_TRMM_L(dcomplex, double, cd, ztrmm_)
+EIGEN_BLAS_TRMM_L(float, float, f, strmm_)
+EIGEN_BLAS_TRMM_L(scomplex, float, cf, ctrmm_)
+#endif
 
 // implements col-major += alpha * op(general) * op(triangular)
-#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
 template <typename Index, int Mode, \
           int LhsStorageOrder, bool ConjugateLhs, \
           int RhsStorageOrder, bool ConjugateRhs> \
@@ -282,7 +289,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
    } \
    /*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \
 /* call ?trmm*/ \
-   BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
 \
 /* Add op(a_triangular)*b into res*/ \
    Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@@ -290,11 +297,17 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
   } \
 };
 
-EIGEN_BLAS_TRMM_R(double, double, d, d)
-EIGEN_BLAS_TRMM_R(dcomplex, double, cd, z)
-EIGEN_BLAS_TRMM_R(float, float, f, s)
-EIGEN_BLAS_TRMM_R(scomplex, float, cf, c)
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMM_R(double, double, d, dtrmm)
+EIGEN_BLAS_TRMM_R(dcomplex, MKL_Complex16, cd, ztrmm)
+EIGEN_BLAS_TRMM_R(float, float, f, strmm)
+EIGEN_BLAS_TRMM_R(scomplex, MKL_Complex8, cf, ctrmm)
+#else
+EIGEN_BLAS_TRMM_R(double, double, d, dtrmm_)
+EIGEN_BLAS_TRMM_R(dcomplex, double, cd, ztrmm_)
+EIGEN_BLAS_TRMM_R(float, float, f, strmm_)
+EIGEN_BLAS_TRMM_R(scomplex, float, cf, ctrmm_)
+#endif
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h b/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
index 07bf26ce5..3d47a2b94 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
@@ -71,7 +71,7 @@ EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex)
 EIGEN_BLAS_TRMV_SPECIALIZE(scomplex)
 
 // implements col-major: res += alpha * op(triangular) * vector
-#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
 template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
 struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
   enum { \
@@ -121,10 +121,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
    diag = IsUnitDiag ? 'U' : 'N'; \
 \
 /* call ?TRMV*/ \
-   BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
+   BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
 \
 /* Add op(a_tr)rhs into res*/ \
-   BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
+   BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
 /* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
    if (size<(std::max)(rows,cols)) { \
      if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
@@ -142,18 +142,25 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
        m = convert_index<BlasIndex>(size); \
        n = convert_index<BlasIndex>(cols-size); \
      } \
-     BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
+     BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
    } \
   } \
 };
 
-EIGEN_BLAS_TRMV_CM(double,   double, d,  d)
-EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z)
-EIGEN_BLAS_TRMV_CM(float,    float,  f,  s)
-EIGEN_BLAS_TRMV_CM(scomplex, float,  cf, c)
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMV_CM(double,   double, d,  d,)
+EIGEN_BLAS_TRMV_CM(dcomplex, MKL_Complex16, cd, z,)
+EIGEN_BLAS_TRMV_CM(float,    float,  f,  s,)
+EIGEN_BLAS_TRMV_CM(scomplex, MKL_Complex8,  cf, c,)
+#else
+EIGEN_BLAS_TRMV_CM(double,   double, d,  d, _)
+EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z, _)
+EIGEN_BLAS_TRMV_CM(float,    float,  f,  s, _)
+EIGEN_BLAS_TRMV_CM(scomplex, float,  cf, c, _)
+#endif
 
 // implements row-major: res += alpha * op(triangular) * vector
-#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
 template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
 struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
   enum { \
@@ -203,10 +210,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
    diag = IsUnitDiag ? 'U' : 'N'; \
 \
 /* call ?TRMV*/ \
-   BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
+   BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
 \
 /* Add op(a_tr)rhs into res*/ \
-   BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
+   BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
 /* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
    if (size<(std::max)(rows,cols)) { \
      if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
@@ -224,15 +231,22 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
        m = convert_index<BlasIndex>(size); \
        n = convert_index<BlasIndex>(cols-size); \
      } \
-     BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
+     BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
    } \
   } \
 };
 
-EIGEN_BLAS_TRMV_RM(double,   double, d,  d)
-EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z)
-EIGEN_BLAS_TRMV_RM(float,    float,  f,  s)
-EIGEN_BLAS_TRMV_RM(scomplex, float,  cf, c)
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMV_RM(double,   double, d,  d,)
+EIGEN_BLAS_TRMV_RM(dcomplex, MKL_Complex16, cd, z,)
+EIGEN_BLAS_TRMV_RM(float,    float,  f,  s,)
+EIGEN_BLAS_TRMV_RM(scomplex, MKL_Complex8,  cf, c,)
+#else
+EIGEN_BLAS_TRMV_RM(double,   double, d,  d,_)
+EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z,_)
+EIGEN_BLAS_TRMV_RM(float,    float,  f,  s,_)
+EIGEN_BLAS_TRMV_RM(scomplex, float,  cf, c,_)
+#endif
 
 } // end namespase internal
 
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h b/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
index 88c0fb794..f0775116a 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
@@ -38,7 +38,7 @@ namespace Eigen {
 namespace internal {
 
 // implements LeftSide op(triangular)^-1 * general
-#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \
+#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
 template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
 struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
 { \
@@ -80,18 +80,24 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
    } \
    if (IsUnitDiag) diag='U'; \
 /* call ?trsm*/ \
-   BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
  } \
 };
 
-EIGEN_BLAS_TRSM_L(double,   double, d)
-EIGEN_BLAS_TRSM_L(dcomplex, double, z)
-EIGEN_BLAS_TRSM_L(float,    float,  s)
-EIGEN_BLAS_TRSM_L(scomplex, float,  c)
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRSM_L(double,   double, dtrsm)
+EIGEN_BLAS_TRSM_L(dcomplex, MKL_Complex16, ztrsm)
+EIGEN_BLAS_TRSM_L(float,    float,  strsm)
+EIGEN_BLAS_TRSM_L(scomplex, MKL_Complex8, ctrsm)
+#else
+EIGEN_BLAS_TRSM_L(double,   double, dtrsm_)
+EIGEN_BLAS_TRSM_L(dcomplex, double, ztrsm_)
+EIGEN_BLAS_TRSM_L(float,    float,  strsm_)
+EIGEN_BLAS_TRSM_L(scomplex, float,  ctrsm_)
+#endif
 
 // implements RightSide general * op(triangular)^-1
-#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASPREFIX) \
+#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
 template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
 struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
 { \
@@ -133,16 +139,22 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
    } \
    if (IsUnitDiag) diag='U'; \
 /* call ?trsm*/ \
-   BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
    /*std::cout << "TRMS_L specialization!\n";*/ \
  } \
 };
 
-EIGEN_BLAS_TRSM_R(double,   double, d)
-EIGEN_BLAS_TRSM_R(dcomplex, double, z)
-EIGEN_BLAS_TRSM_R(float,    float,  s)
-EIGEN_BLAS_TRSM_R(scomplex, float,  c)
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRSM_R(double,   double, dtrsm)
+EIGEN_BLAS_TRSM_R(dcomplex, MKL_Complex16, ztrsm)
+EIGEN_BLAS_TRSM_R(float,    float,  strsm)
+EIGEN_BLAS_TRSM_R(scomplex, MKL_Complex8,  ctrsm)
+#else
+EIGEN_BLAS_TRSM_R(double,   double, dtrsm_)
+EIGEN_BLAS_TRSM_R(dcomplex, double, ztrsm_)
+EIGEN_BLAS_TRSM_R(float,    float,  strsm_)
+EIGEN_BLAS_TRSM_R(scomplex, float,  ctrsm_)
+#endif
 
 } // end namespace internal
 
diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h
index b91d1d1af..8ef0f3594 100755
--- a/Eigen/src/Core/util/DisableStupidWarnings.h
+++ b/Eigen/src/Core/util/DisableStupidWarnings.h
@@ -55,6 +55,7 @@
 #endif
 
 #if defined __NVCC__
+  #pragma diag_suppress boolean_controlling_expr_is_constant
   // Disable the "statement is unreachable" message
   #pragma diag_suppress code_is_unreachable
   // Disable the "dynamic initialization in unreachable code" message
diff --git a/Eigen/src/Core/util/MKL_support.h b/Eigen/src/Core/util/MKL_support.h
index 26b59669e..b7d6ecc76 100755
--- a/Eigen/src/Core/util/MKL_support.h
+++ b/Eigen/src/Core/util/MKL_support.h
@@ -49,10 +49,11 @@
   #define EIGEN_USE_LAPACKE
 #endif
 
-#if defined(EIGEN_USE_MKL_VML)
+#if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL)
   #define EIGEN_USE_MKL
 #endif
 
+
 #if defined EIGEN_USE_MKL
 #   include <mkl.h> 
 /*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/
@@ -108,6 +109,10 @@
 #endif
 #endif
 
+#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL)
+#include "../../misc/blas.h"
+#endif
+
 namespace Eigen {
 
 typedef std::complex<double> dcomplex;
@@ -121,8 +126,5 @@ typedef int BlasIndex;
 
 } // end namespace Eigen
 
-#if defined(EIGEN_USE_BLAS)
-#include "../../misc/blas.h"
-#endif
 
 #endif // EIGEN_MKL_SUPPORT_H
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 755646795..b63ea2697 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -413,7 +413,7 @@
 // Does the compiler support variadic templates?
 #ifndef EIGEN_HAS_VARIADIC_TEMPLATES
 #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
-  && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) )
+  && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_CUDACC_VER >= 80000) )
     // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
     //    this prevents nvcc from crashing when compiling Eigen on Tegra X1
 #define EIGEN_HAS_VARIADIC_TEMPLATES 1
@@ -427,9 +427,9 @@
 // Does the compiler fully support const expressions? (as in c++14)
 #ifndef EIGEN_HAS_CONSTEXPR
 
-#if defined(__CUDACC__)
+#if defined(EIGEN_CUDACC)
 // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
-#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500))
+#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500))
   #define EIGEN_HAS_CONSTEXPR 1
 #endif
 #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
@@ -669,7 +669,7 @@ namespace Eigen {
  * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
  * vectorized and non-vectorized code.
  */
-#if (defined __CUDACC__)
+#if (defined EIGEN_CUDACC)
   #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
 #elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
   #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
@@ -837,7 +837,8 @@ namespace Eigen {
 // just an empty macro !
 #define EIGEN_EMPTY
 
-#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 ||  defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
+#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0)
+  // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
   #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
     using Base::operator =;
 #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
@@ -990,7 +991,7 @@ namespace Eigen {
 #  define EIGEN_TRY try
 #  define EIGEN_CATCH(X) catch (X)
 #else
-#  ifdef __CUDA_ARCH__
+#  ifdef EIGEN_CUDA_ARCH
 #    define EIGEN_THROW_X(X) asm("trap;")
 #    define EIGEN_THROW asm("trap;")
 #  else
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 8de605500..0fa818008 100755
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -11,7 +11,7 @@
 #ifndef EIGEN_META_H
 #define EIGEN_META_H
 
-#if defined(__CUDA_ARCH__)
+#if defined(EIGEN_CUDA_ARCH)
 #include <cfloat>
 #include <math_constants.h>
 #endif
@@ -169,7 +169,7 @@ template<bool Condition, typename T=void> struct enable_if;
 template<typename T> struct enable_if<true,T>
 { typedef T type; };
 
-#if defined(__CUDA_ARCH__)
+#if defined(EIGEN_CUDA_ARCH)
 #if !defined(__FLT_EPSILON__)
 #define __FLT_EPSILON__ FLT_EPSILON
 #define __DBL_EPSILON__ DBL_EPSILON
@@ -523,13 +523,13 @@ template<typename T, typename U> struct scalar_product_traits
 
 namespace numext {
   
-#if defined(__CUDA_ARCH__)
+#if defined(EIGEN_CUDA_ARCH)
 template<typename T> EIGEN_DEVICE_FUNC   void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; }
 #else
 template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
 #endif
 
-#if defined(__CUDA_ARCH__)
+#if defined(EIGEN_CUDA_ARCH)
 using internal::device::numeric_limits;
 #else
 using std::numeric_limits;
author	Konstantinos Margaritis <markos@freevec.org>	2017-08-24 12:24:01 +0300
committer	Konstantinos Margaritis <markos@freevec.org>	2017-08-24 12:24:01 +0300
commit	1affe3d8dfa93ed10aea59d272263e78dda6769e (patch)
tree	ff8e62a5214a761cc5c1df4fa36a18d79979115d /Eigen/src/Core
parent	4ce5ec5197b57d3060e8ac51c07f03198d5bf927 (diff)
parent	21633e585b61564159d9cfbfbbad9006b8a09d64 (diff)