91 files changed, 3730 insertions, 882 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 20208bd23..4f5cc7376 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -437,6 +437,13 @@ else()
   add_subdirectory(lapack EXCLUDE_FROM_ALL)
 endif()
 
+# add SYCL
+option(EIGEN_TEST_SYCL "Add Sycl support." OFF)
+if(EIGEN_TEST_SYCL)
+  set (CMAKE_MODULE_PATH "${CMAKE_ROOT}/Modules" "cmake/Modules/" "${CMAKE_MODULE_PATH}")
+  include(FindComputeCpp)
+endif()
+
 add_subdirectory(unsupported)
 
 add_subdirectory(demos EXCLUDE_FROM_ALL)
diff --git a/Eigen/Core b/Eigen/Core
index f5eec0e39..f5d1126cc 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -14,9 +14,19 @@
 // first thing Eigen does: stop the compiler from committing suicide
 #include "src/Core/util/DisableStupidWarnings.h"
 
-// Handle NVCC/CUDA
-#ifdef __CUDACC__
-  // Do not try asserts on CUDA!
+/// This will no longer be needed after the next release of the computecppCE
+#ifdef EIGEN_USE_SYCL
+#undef min
+#undef max
+#undef isnan
+#undef isinf
+#undef isfinite
+#include <SYCL/sycl.hpp>
+#endif
+
+// Handle NVCC/CUDA/SYCL
+#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
+  // Do not try asserts on CUDA and SYCL!
   #ifndef EIGEN_NO_DEBUG
   #define EIGEN_NO_DEBUG
   #endif
@@ -25,17 +35,24 @@
   #undef EIGEN_INTERNAL_DEBUGGING
   #endif
 
-  // Do not try to vectorize on CUDA!
-  #ifndef EIGEN_DONT_VECTORIZE
-  #define EIGEN_DONT_VECTORIZE
-  #endif
-
   #ifdef EIGEN_EXCEPTIONS
   #undef EIGEN_EXCEPTIONS
   #endif
 
   // All functions callable from CUDA code must be qualified with __device__
-  #define EIGEN_DEVICE_FUNC __host__ __device__
+  #ifdef __CUDACC__
+    // Do not try to vectorize on CUDA and SYCL!
+    #ifndef EIGEN_DONT_VECTORIZE
+    #define EIGEN_DONT_VECTORIZE
+    #endif
+
+    #define EIGEN_DEVICE_FUNC __host__ __device__
+    // We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro
+    // works properly on the device side
+    #include <math_functions.hpp>
+  #else
+    #define EIGEN_DEVICE_FUNC
+  #endif
 
 #else
   #define EIGEN_DEVICE_FUNC
@@ -51,7 +68,7 @@
   #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
 #endif
 
-#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS)
+#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL)
   #define EIGEN_EXCEPTIONS
 #endif
 
diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h
index b8020a92c..4a4aa214c 100644
--- a/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/Eigen/src/CholmodSupport/CholmodSupport.h
@@ -14,34 +14,40 @@ namespace Eigen {
 
 namespace internal {
 
-template<typename Scalar, typename CholmodType>
-void cholmod_configure_matrix(CholmodType& mat)
-{
-  if (internal::is_same<Scalar,float>::value)
-  {
-    mat.xtype = CHOLMOD_REAL;
-    mat.dtype = CHOLMOD_SINGLE;
-  }
-  else if (internal::is_same<Scalar,double>::value)
-  {
+template<typename Scalar> struct cholmod_configure_matrix;
+
+template<> struct cholmod_configure_matrix<double> {
+  template<typename CholmodType>
+  static void run(CholmodType& mat) {
     mat.xtype = CHOLMOD_REAL;
     mat.dtype = CHOLMOD_DOUBLE;
   }
-  else if (internal::is_same<Scalar,std::complex<float> >::value)
-  {
-    mat.xtype = CHOLMOD_COMPLEX;
-    mat.dtype = CHOLMOD_SINGLE;
-  }
-  else if (internal::is_same<Scalar,std::complex<double> >::value)
-  {
+};
+
+template<> struct cholmod_configure_matrix<std::complex<double> > {
+  template<typename CholmodType>
+  static void run(CholmodType& mat) {
     mat.xtype = CHOLMOD_COMPLEX;
     mat.dtype = CHOLMOD_DOUBLE;
   }
-  else
-  {
-    eigen_assert(false && "Scalar type not supported by CHOLMOD");
-  }
-}
+};
+
+// Other scalar types are not yet suppotred by Cholmod
+// template<> struct cholmod_configure_matrix<float> {
+//   template<typename CholmodType>
+//   static void run(CholmodType& mat) {
+//     mat.xtype = CHOLMOD_REAL;
+//     mat.dtype = CHOLMOD_SINGLE;
+//   }
+// };
+//
+// template<> struct cholmod_configure_matrix<std::complex<float> > {
+//   template<typename CholmodType>
+//   static void run(CholmodType& mat) {
+//     mat.xtype = CHOLMOD_COMPLEX;
+//     mat.dtype = CHOLMOD_SINGLE;
+//   }
+// };
 
 } // namespace internal
 
@@ -53,7 +59,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
 {
   cholmod_sparse res;
   res.nzmax   = mat.nonZeros();
-  res.nrow    = mat.rows();;
+  res.nrow    = mat.rows();
   res.ncol    = mat.cols();
   res.p       = mat.outerIndexPtr();
   res.i       = mat.innerIndexPtr();
@@ -88,7 +94,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat)
   }
 
   // setup res.xtype
-  internal::cholmod_configure_matrix<_Scalar>(res);
+  internal::cholmod_configure_matrix<_Scalar>::run(res);
   
   res.stype = 0;
   
@@ -131,7 +137,7 @@ cholmod_dense viewAsCholmod(MatrixBase<Derived>& mat)
   res.x      = (void*)(mat.derived().data());
   res.z      = 0;
 
-  internal::cholmod_configure_matrix<Scalar>(res);
+  internal::cholmod_configure_matrix<Scalar>::run(res);
 
   return res;
 }
@@ -180,14 +186,16 @@ class CholmodBase : public SparseSolverBase<Derived>
     CholmodBase()
       : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
     {
-      m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
+      EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
+      m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
       cholmod_start(&m_cholmod);
     }
 
     explicit CholmodBase(const MatrixType& matrix)
       : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
     {
-      m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0);
+      EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
+      m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
       cholmod_start(&m_cholmod);
       compute(matrix);
     }
@@ -254,7 +262,7 @@ class CholmodBase : public SparseSolverBase<Derived>
       eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
       cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>());
       cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod);
-      
+
       // If the factorization failed, minor is the column at which it did. On success minor == n.
       this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue);
       m_factorizationIsOk = true;
@@ -324,7 +332,7 @@ class CholmodBase : public SparseSolverBase<Derived>
       */
     Derived& setShift(const RealScalar& offset)
     {
-      m_shiftOffset[0] = offset;
+      m_shiftOffset[0] = double(offset);
       return derived();
     }
     
@@ -386,7 +394,7 @@ class CholmodBase : public SparseSolverBase<Derived>
   protected:
     mutable cholmod_common m_cholmod;
     cholmod_factor* m_cholmodFactor;
-    RealScalar m_shiftOffset[2];
+    double m_shiftOffset[2];
     mutable ComputationInfo m_info;
     int m_factorizationIsOk;
     int m_analysisIsOk;
@@ -410,6 +418,8 @@ class CholmodBase : public SparseSolverBase<Derived>
   *
   * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
   *
+  * \warning Only double precision real and complex scalar types are supported by Cholmod.
+  *
   * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT
   */
 template<typename _MatrixType, int _UpLo = Lower>
@@ -459,6 +469,8 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl
   *
   * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
   *
+  * \warning Only double precision real and complex scalar types are supported by Cholmod.
+  *
   * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT
   */
 template<typename _MatrixType, int _UpLo = Lower>
@@ -506,6 +518,8 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp
   *
   * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
   *
+  * \warning Only double precision real and complex scalar types are supported by Cholmod.
+  *
   * \sa \ref TutorialSparseSolverConcept
   */
 template<typename _MatrixType, int _UpLo = Lower>
@@ -555,6 +569,8 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper
   *
   * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
   *
+  * \warning Only double precision real and complex scalar types are supported by Cholmod.
+  * 
   * \sa \ref TutorialSparseSolverConcept
   */
 template<typename _MatrixType, int _UpLo = Lower>
diff --git a/Eigen/src/Core/ArrayWrapper.h b/Eigen/src/Core/ArrayWrapper.h
index 6013d4d85..a04521a16 100644
--- a/Eigen/src/Core/ArrayWrapper.h
+++ b/Eigen/src/Core/ArrayWrapper.h
@@ -54,6 +54,8 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
 
     typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
 
+    using Base::coeffRef;
+
     EIGEN_DEVICE_FUNC
     explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
 
@@ -72,65 +74,17 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
     inline const Scalar* data() const { return m_expression.data(); }
 
     EIGEN_DEVICE_FUNC
-    inline CoeffReturnType coeff(Index rowId, Index colId) const
-    {
-      return m_expression.coeff(rowId, colId);
-    }
-
-    EIGEN_DEVICE_FUNC
-    inline Scalar& coeffRef(Index rowId, Index colId)
-    {
-      return m_expression.coeffRef(rowId, colId);
-    }
-
-    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index rowId, Index colId) const
     {
       return m_expression.coeffRef(rowId, colId);
     }
 
     EIGEN_DEVICE_FUNC
-    inline CoeffReturnType coeff(Index index) const
-    {
-      return m_expression.coeff(index);
-    }
-
-    EIGEN_DEVICE_FUNC
-    inline Scalar& coeffRef(Index index)
-    {
-      return m_expression.coeffRef(index);
-    }
-
-    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index index) const
     {
       return m_expression.coeffRef(index);
     }
 
-    template<int LoadMode>
-    inline const PacketScalar packet(Index rowId, Index colId) const
-    {
-      return m_expression.template packet<LoadMode>(rowId, colId);
-    }
-
-    template<int LoadMode>
-    inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
-    {
-      m_expression.template writePacket<LoadMode>(rowId, colId, val);
-    }
-
-    template<int LoadMode>
-    inline const PacketScalar packet(Index index) const
-    {
-      return m_expression.template packet<LoadMode>(index);
-    }
-
-    template<int LoadMode>
-    inline void writePacket(Index index, const PacketScalar& val)
-    {
-      m_expression.template writePacket<LoadMode>(index, val);
-    }
-
     template<typename Dest>
     EIGEN_DEVICE_FUNC
     inline void evalTo(Dest& dst) const { dst = m_expression; }
@@ -197,6 +151,8 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
 
     typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
 
+    using Base::coeffRef;
+
     EIGEN_DEVICE_FUNC
     explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
 
@@ -215,65 +171,17 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
     inline const Scalar* data() const { return m_expression.data(); }
 
     EIGEN_DEVICE_FUNC
-    inline CoeffReturnType coeff(Index rowId, Index colId) const
-    {
-      return m_expression.coeff(rowId, colId);
-    }
-
-    EIGEN_DEVICE_FUNC
-    inline Scalar& coeffRef(Index rowId, Index colId)
-    {
-      return m_expression.coeffRef(rowId, colId);
-    }
-
-    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index rowId, Index colId) const
     {
       return m_expression.derived().coeffRef(rowId, colId);
     }
 
     EIGEN_DEVICE_FUNC
-    inline CoeffReturnType coeff(Index index) const
-    {
-      return m_expression.coeff(index);
-    }
-
-    EIGEN_DEVICE_FUNC
-    inline Scalar& coeffRef(Index index)
-    {
-      return m_expression.coeffRef(index);
-    }
-
-    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index index) const
     {
       return m_expression.coeffRef(index);
     }
 
-    template<int LoadMode>
-    inline const PacketScalar packet(Index rowId, Index colId) const
-    {
-      return m_expression.template packet<LoadMode>(rowId, colId);
-    }
-
-    template<int LoadMode>
-    inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
-    {
-      m_expression.template writePacket<LoadMode>(rowId, colId, val);
-    }
-
-    template<int LoadMode>
-    inline const PacketScalar packet(Index index) const
-    {
-      return m_expression.template packet<LoadMode>(index);
-    }
-
-    template<int LoadMode>
-    inline void writePacket(Index index, const PacketScalar& val)
-    {
-      m_expression.template writePacket<LoadMode>(index, val);
-    }
-
     EIGEN_DEVICE_FUNC
     const typename internal::remove_all<NestedExpressionType>::type& 
     nestedExpression() const 
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index b7cc7c0e9..ffe1dd0ca 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -554,7 +554,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
       for(Index inner = alignedEnd; inner<innerSize ; ++inner)
         kernel.assignCoeffByOuterInner(outer, inner);
 
-      alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
+      alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
     }
   }
 };
@@ -697,15 +697,21 @@ protected:
 ***************************************************************************/
 
 template<typename DstXprType, typename SrcXprType, typename Functor>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
 {
-  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
-  
   typedef evaluator<DstXprType> DstEvaluatorType;
   typedef evaluator<SrcXprType> SrcEvaluatorType;
 
-  DstEvaluatorType dstEvaluator(dst);
   SrcEvaluatorType srcEvaluator(src);
+
+  // NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
+  // we need to resize the destination after the source evaluator has been created.
+  Index dstRows = src.rows();
+  Index dstCols = src.cols();
+  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+    dst.resize(dstRows, dstCols);
+
+  DstEvaluatorType dstEvaluator(dst);
     
   typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
   Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
@@ -714,7 +720,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstX
 }
 
 template<typename DstXprType, typename SrcXprType>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
 {
   call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
 }
@@ -796,11 +802,6 @@ void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
                       ) && int(Dst::SizeAtCompileTime) != 1
   };
 
-  Index dstRows = NeedToTranspose ? src.cols() : src.rows();
-  Index dstCols = NeedToTranspose ? src.rows() : src.cols();
-  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
-    dst.resize(dstRows, dstCols);
-  
   typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
   typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
   ActualDstType actualDst(dst);
@@ -823,15 +824,11 @@ template<typename Dst, typename Src, typename Func>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
 {
-  Index dstRows = src.rows();
-  Index dstCols = src.cols();
-  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
-    dst.resize(dstRows, dstCols);
-  
   // TODO check whether this is the right place to perform these checks:
   EIGEN_STATIC_ASSERT_LVALUE(Dst)
   EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
-  
+  EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
+
   Assignment<Dst,Src,Func>::run(dst, src, func);
 }
 template<typename Dst, typename Src>
@@ -853,8 +850,6 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
   EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
   {
-    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
-    
 #ifndef EIGEN_NO_DEBUG
     internal::check_for_aliasing(dst, src);
 #endif
@@ -873,6 +868,11 @@ struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
   EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
   {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
     eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
     src.evalTo(dst);
   }
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 00c079bd8..1d14af652 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -1302,7 +1302,7 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
   }
 
 protected:
-  const ArgTypeNested m_arg;
+  typename internal::add_const_on_value_type<ArgTypeNested>::type m_arg;
   const MemberOp m_functor;
 };
 
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index 25c3ef3d7..dd498f758 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -215,42 +215,29 @@ DenseBase<Derived>::Constant(const Scalar& value)
   return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op<Scalar>(value));
 }
 
-/**
-  * \brief Sets a linearly spaced vector.
-  *
-  * The function generates 'size' equally spaced values in the closed interval [low,high].
-  * This particular version of LinSpaced() uses sequential access, i.e. vector access is
-  * assumed to be a(0), a(1), ..., a(size-1). This assumption allows for better vectorization
-  * and yields faster code than the random access version.
-  *
-  * When size is set to 1, a vector of length 1 containing 'high' is returned.
-  *
-  * \only_for_vectors
-  *
-  * Example: \include DenseBase_LinSpaced_seq.cpp
-  * Output: \verbinclude DenseBase_LinSpaced_seq.out
+/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(Index,const Scalar&,const Scalar&)
   *
-  * \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Index,Scalar,Scalar), CwiseNullaryOp
+  * \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&)
   */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
 DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,size));
+  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
 }
 
-/**
-  * \copydoc DenseBase::LinSpaced(Sequential_t, Index, const Scalar&, const Scalar&)
-  * Special version for fixed size types which does not require the size parameter.
+/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(const Scalar&,const Scalar&)
+  *
+  * \sa LinSpaced(Scalar,Scalar)
   */
 template<typename Derived>
-EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
 DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
-  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,Derived::SizeAtCompileTime));
+  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
 }
 
 /**
@@ -264,14 +251,24 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
   * Example: \include DenseBase_LinSpaced.cpp
   * Output: \verbinclude DenseBase_LinSpaced.out
   *
-  * \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Sequential_t,Index,const Scalar&,const Scalar&,Index), CwiseNullaryOp
+  * For integer scalar types, an even spacing is possible if and only if the length of the range,
+  * i.e., \c high-low is a scalar multiple of \c size-1, or if \c size is a scalar multiple of the
+  * number of values \c high-low+1 (meaning each value can be repeated the same number of time).
+  * If one of these two considions is not satisfied, then \c high is lowered to the largest value
+  * satisfying one of this constraint.
+  * Here are some examples:
+  *
+  * Example: \include DenseBase_LinSpacedInt.cpp
+  * Output: \verbinclude DenseBase_LinSpacedInt.out
+  *
+  * \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
   */
 template<typename Derived>
 EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
 DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,size));
+  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
 }
 
 /**
@@ -284,7 +281,7 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
   EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
-  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar,true>(low,high,Derived::SizeAtCompileTime));
+  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
 }
 
 /** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
@@ -377,24 +374,30 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
   * Example: \include DenseBase_setLinSpaced.cpp
   * Output: \verbinclude DenseBase_setLinSpaced.out
   *
-  * \sa CwiseNullaryOp
+  * For integer scalar types, do not miss the explanations on the definition
+  * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
+  *
+  * \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
   */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar,false>(low,high,newSize));
+  return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
 }
 
 /**
   * \brief Sets a linearly spaced vector.
   *
-  * The function fills *this with equally spaced values in the closed interval [low,high].
+  * The function fills \c *this with equally spaced values in the closed interval [low,high].
   * When size is set to 1, a vector of length 1 containing 'high' is returned.
   *
   * \only_for_vectors
   *
-  * \sa setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
+  * For integer scalar types, do not miss the explanations on the definition
+  * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
+  *
+  * \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
   */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
@@ -752,7 +755,7 @@ struct setIdentity_impl<Derived, true>
   static EIGEN_STRONG_INLINE Derived& run(Derived& m)
   {
     m.setZero();
-    const Index size = (std::min)(m.rows(), m.cols());
+    const Index size = numext::mini(m.rows(), m.cols());
     for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
     return m;
   }
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index c110bbf11..bd74e8a13 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -260,10 +260,10 @@ template<typename Derived> class DenseBase
 #ifndef EIGEN_PARSED_BY_DOXYGEN
     /** \internal Represents a matrix with all coefficients equal to one another*/
     typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
-    /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
-    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,false>,PlainObject> SequentialLinSpacedReturnType;
+    /** \internal \deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */
+    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> SequentialLinSpacedReturnType;
     /** \internal Represents a vector with linearly spaced coefficients that allows random access. */
-    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar,true>,PlainObject> RandomAccessLinSpacedReturnType;
+    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> RandomAccessLinSpacedReturnType;
     /** \internal the return type of MatrixBase::eigenvalues() */
     typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
 
diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h
index 423ab167d..c4af48ab6 100644
--- a/Eigen/src/Core/DenseCoeffsBase.h
+++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -624,7 +624,7 @@ struct first_aligned_impl<Alignment, Derived, false>
 {
   static inline Index run(const Derived& m)
   {
-    return internal::first_aligned<Alignment>(&m.const_cast_derived().coeffRef(0,0), m.size());
+    return internal::first_aligned<Alignment>(m.data(), m.size());
   }
 };
 
diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h
index 92b2eee71..ecfdce8ef 100644
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -290,12 +290,11 @@ MatrixBase<Derived>::asDiagonal() const
 template<typename Derived>
 bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
 {
-  using std::abs;
   if(cols() != rows()) return false;
   RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
   for(Index j = 0; j < cols(); ++j)
   {
-    RealScalar absOnDiagonal = abs(coeff(j,j));
+    RealScalar absOnDiagonal = numext::abs(coeff(j,j));
     if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
   }
   for(Index j = 0; j < cols(); ++j)
@@ -321,6 +320,11 @@ struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense>
 {
   static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
   {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+    
     dst.setZero();
     dst.diagonal() = src.diagonal();
   }
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index afd806928..27033a2dd 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -558,6 +558,34 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
   return ifPacket.select[0] ? thenPacket : elsePacket;
 }
 
+/** \internal \returns \a a with the first coefficient replaced by the scalar b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
+{
+  // Default implementation based on pblend.
+  // It must be specialized for higher performance.
+  Selector<unpacket_traits<Packet>::size> mask;
+  mask.select[0] = true;
+  // This for loop should be optimized away by the compiler.
+  for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
+    mask.select[i] = false;
+  return pblend(mask, pset1<Packet>(b), a);
+}
+
+/** \internal \returns \a a with the last coefficient replaced by the scalar b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
+{
+  // Default implementation based on pblend.
+  // It must be specialized for higher performance.
+  Selector<unpacket_traits<Packet>::size> mask;
+  // This for loop should be optimized away by the compiler.
+  for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
+    mask.select[i] = false;
+  mask.select[unpacket_traits<Packet>::size-1] = true;
+  return pblend(mask, pset1<Packet>(b), a);
+}
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index d56df8249..f7cf04cde 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -399,12 +399,14 @@ template<typename Derived> class MatrixBase
     EIGEN_DEVICE_FUNC
     inline PlainObject unitOrthogonal(void) const;
 
+    EIGEN_DEVICE_FUNC
     inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
 
     // put this as separate enum value to work around possible GCC 4.3 bug (?)
     enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
                                           : ColsAtCompileTime==1 ? Vertical : Horizontal };
     typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
+    EIGEN_DEVICE_FUNC
     inline HomogeneousReturnType homogeneous() const;
 
     enum {
@@ -414,7 +416,7 @@ template<typename Derived> class MatrixBase
                   internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
                   internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
     typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType;
-
+    EIGEN_DEVICE_FUNC
     inline const HNormalizedReturnType hnormalized() const;
 
 ////////// Householder module ///////////
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 55b4ac057..2dcd929e6 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -916,8 +916,8 @@ struct conservative_resize_like_impl
     {
       // The storage order does not allow us to use reallocation.
       typename Derived::PlainObject tmp(rows,cols);
-      const Index common_rows = (std::min)(rows, _this.rows());
-      const Index common_cols = (std::min)(cols, _this.cols());
+      const Index common_rows = numext::mini(rows, _this.rows());
+      const Index common_cols = numext::mini(cols, _this.cols());
       tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
       _this.derived().swap(tmp);
     }
@@ -950,8 +950,8 @@ struct conservative_resize_like_impl
     {
       // The storage order does not allow us to use reallocation.
       typename Derived::PlainObject tmp(other);
-      const Index common_rows = (std::min)(tmp.rows(), _this.rows());
-      const Index common_cols = (std::min)(tmp.cols(), _this.cols());
+      const Index common_rows = numext::mini(tmp.rows(), _this.rows());
+      const Index common_cols = numext::mini(tmp.cols(), _this.cols());
       tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
       _this.derived().swap(tmp);
     }
diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h
index 63faca822..c9e2e1a07 100644
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@@ -140,6 +140,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
   static EIGEN_STRONG_INLINE
   void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
   {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
     // FIXME shall we handle nested_eval here?
     generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
   }
@@ -154,6 +158,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
   static EIGEN_STRONG_INLINE
   void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
   {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
     // FIXME shall we handle nested_eval here?
     generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
   }
@@ -168,6 +176,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
   static EIGEN_STRONG_INLINE
   void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
   {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
     // FIXME shall we handle nested_eval here?
     generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
   }
@@ -567,8 +579,8 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
   }
 
 protected:
-  const LhsNested m_lhs;
-  const RhsNested m_rhs;
+  typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
+  typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
   
   LhsEtorType m_lhsImpl;
   RhsEtorType m_rhsImpl;
diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h
index 8fc69c4b8..960a58597 100644
--- a/Eigen/src/Core/Solve.h
+++ b/Eigen/src/Core/Solve.h
@@ -139,7 +139,11 @@ struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar
   typedef Solve<DecType,RhsType> SrcXprType;
   static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
   {
-    // FIXME shall we resize dst here?
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
     src.dec()._solve_impl(src.rhs(), dst);
   }
 };
@@ -151,6 +155,11 @@ struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal:
   typedef Solve<Transpose<const DecType>,RhsType> SrcXprType;
   static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
   {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
     src.dec().nestedExpression().template _solve_impl_transposed<false>(src.rhs(), dst);
   }
 };
@@ -163,6 +172,11 @@ struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<t
   typedef Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType> SrcXprType;
   static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
   {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+    
     src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed<true>(src.rhs(), dst);
   }
 };
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index bc232526a..79b767bcc 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -78,6 +78,11 @@ template<typename MatrixType> class Transpose
     typename internal::remove_reference<MatrixTypeNested>::type&
     nestedExpression() { return m_matrix; }
 
+    /** \internal */
+    void resize(Index nrows, Index ncols) {
+      m_matrix.resize(ncols,nrows);
+    }
+
   protected:
     typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
 };
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index e9606ec33..641c20417 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -641,21 +641,20 @@ MatrixBase<Derived>::triangularView() const
 template<typename Derived>
 bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
 {
-  using std::abs;
   RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
   for(Index j = 0; j < cols(); ++j)
   {
-    Index maxi = (std::min)(j, rows()-1);
+    Index maxi = numext::mini(j, rows()-1);
     for(Index i = 0; i <= maxi; ++i)
     {
-      RealScalar absValue = abs(coeff(i,j));
+      RealScalar absValue = numext::abs(coeff(i,j));
       if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue;
     }
   }
   RealScalar threshold = maxAbsOnUpperPart * prec;
   for(Index j = 0; j < cols(); ++j)
     for(Index i = j+1; i < rows(); ++i)
-      if(abs(coeff(i, j)) > threshold) return false;
+      if(numext::abs(coeff(i, j)) > threshold) return false;
   return true;
 }
 
@@ -667,20 +666,19 @@ bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
 template<typename Derived>
 bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
 {
-  using std::abs;
   RealScalar maxAbsOnLowerPart = static_cast<RealScalar>(-1);
   for(Index j = 0; j < cols(); ++j)
     for(Index i = j; i < rows(); ++i)
     {
-      RealScalar absValue = abs(coeff(i,j));
+      RealScalar absValue = numext::abs(coeff(i,j));
       if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue;
     }
   RealScalar threshold = maxAbsOnLowerPart * prec;
   for(Index j = 1; j < cols(); ++j)
   {
-    Index maxi = (std::min)(j, rows()-1);
+    Index maxi = numext::mini(j, rows()-1);
     for(Index i = 0; i < maxi; ++i)
-      if(abs(coeff(i, j)) > threshold) return false;
+      if(numext::abs(coeff(i, j)) > threshold) return false;
   }
   return true;
 }
@@ -777,15 +775,18 @@ public:
 
 template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
+void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
 {
-  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
-  
   typedef evaluator<DstXprType> DstEvaluatorType;
   typedef evaluator<SrcXprType> SrcEvaluatorType;
 
-  DstEvaluatorType dstEvaluator(dst);
   SrcEvaluatorType srcEvaluator(src);
+
+  Index dstRows = src.rows();
+  Index dstCols = src.cols();
+  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+    dst.resize(dstRows, dstCols);
+  DstEvaluatorType dstEvaluator(dst);
     
   typedef triangular_dense_assignment_kernel< Mode&(Lower|Upper),Mode&(UnitDiag|ZeroDiag|SelfAdjoint),SetOpposite,
                                               DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
@@ -802,7 +803,7 @@ void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& sr
 
 template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src)
+void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src)
 {
   call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
 }
@@ -893,7 +894,7 @@ struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
   {
     for(Index j = 0; j < kernel.cols(); ++j)
     {
-      Index maxi = (std::min)(j, kernel.rows());
+      Index maxi = numext::mini(j, kernel.rows());
       Index i = 0;
       if (((Mode&Lower) && SetOpposite) || (Mode&Upper))
       {
@@ -938,6 +939,11 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_
   typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
   static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename SrcXprType::Scalar> &)
   {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
     dst.setZero();
     dst._assignProduct(src, 1);
   }
diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h
index dd382e990..4fe267e9f 100644
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -602,7 +602,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
       return m_matrix / extendedTo(other.derived());
     }
 
-    /** \returns an expression where each column of row of the referenced matrix are normalized.
+    /** \returns an expression where each column (or row) of the referenced matrix are normalized.
       * The referenced matrix is \b not modified.
       * \sa MatrixBase::normalized(), normalize()
       */
@@ -625,6 +625,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
 /////////// Geometry module ///////////
 
     typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;
+    EIGEN_DEVICE_FUNC
     HomogeneousReturnType homogeneous() const;
 
     typedef typename ExpressionType::PlainObject CrossReturnType;
@@ -654,6 +655,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
                   Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
             HNormalizedReturnType;
 
+    EIGEN_DEVICE_FUNC
     const HNormalizedReturnType hnormalized() const;
 
   protected:
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index b16e0ddd4..99439c8aa 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -456,6 +456,26 @@ ptranspose(PacketBlock<Packet2cd,2>& kernel) {
  kernel.packet[0].v = tmp;
 }
 
+template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b)
+{
+  return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
+{
+  return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b)
+{
+  return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
+{
+  return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
+}
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 05b15b852..e60ef307b 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -609,6 +609,26 @@ template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, cons
   return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
 }
 
+template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
+{
+  return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
+{
+  return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
+{
+  return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
+{
+  return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
+}
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
index d670f3718..ae54225f8 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -15,7 +15,7 @@ namespace Eigen {
 namespace internal {
 
 // Most of the following operations require arch >= 3.0
-#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDACC__) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
 
 template<> struct is_arithmetic<half2> { enum { value = true }; };
 
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index fd7f4d740..5607fe0ab 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -476,6 +476,26 @@ template<>  EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, co
   return Packet2cf(_mm_castpd_ps(result));
 }
 
+template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b)
+{
+  return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b)
+{
+  return pset1<Packet1cd>(b);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b)
+{
+  return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b)
+{
+  return pset1<Packet1cd>(b);
+}
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index baad692e3..6f31cf12b 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -818,6 +818,44 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons
 #endif
 }
 
+template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_blend_ps(a,pset1<Packet4f>(b),1);
+#else
+  return _mm_move_ss(a, _mm_load_ss(&b));
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_blend_pd(a,pset1<Packet2d>(b),1);
+#else
+  return _mm_move_sd(a, _mm_load_sd(&b));
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
+#else
+  const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
+  return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
+#else
+  const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
+  return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
+#endif
+}
+
 // Scalar path for pmadd with FMA to ensure consistency with vectorized path.
 #ifdef __FMA__
 template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index d82ffed02..96747bac7 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -266,7 +266,7 @@ struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
 //   typedef typename NumTraits<Scalar>::Real result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
   {
-    using std::sqrt;
+    EIGEN_USING_STD_MATH(sqrt)
     Scalar p, qp;
     if(_x>_y)
     {
diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h
index a2154d3b5..0000ea1f1 100644
--- a/Eigen/src/Core/functors/NullaryFunctors.h
+++ b/Eigen/src/Core/functors/NullaryFunctors.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -37,87 +37,78 @@ template<typename Scalar>
 struct functor_traits<scalar_identity_op<Scalar> >
 { enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
 
-template <typename Scalar, typename Packet, bool RandomAccess, bool IsInteger> struct linspaced_op_impl;
+template <typename Scalar, typename Packet, bool IsInteger> struct linspaced_op_impl;
 
-// linear access for packet ops:
-// 1) initialization
-//   base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
-// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
-//   base += [size*step, ..., size*step]
-//
-// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
-//       in order to avoid the padd() in operator() ?
 template <typename Scalar, typename Packet>
-struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/false,/*IsInteger*/false>
+struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
 {
   linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
-    m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
-    m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*m_step)),
-    m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(m_step),plset<Packet>(-unpacket_traits<Packet>::size)))) {}
+    m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
+    m_interPacket(plset<Packet>(0)),
+    m_flip(std::abs(high)<std::abs(low))
+  {}
 
   template<typename IndexType>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const
-  { 
-    m_base = padd(m_base, pset1<Packet>(m_step));
-    return m_low+Scalar(i)*m_step; 
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const {
+    if(m_flip)
+      return (i==0)? m_low : (m_high - (m_size1-i)*m_step);
+    else
+      return (i==m_size1)? m_high : (m_low + i*m_step);
   }
 
   template<typename IndexType>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType) const { return m_base = padd(m_base,m_packetStep); }
-
-  const Scalar m_low;
-  const Scalar m_step;
-  const Packet m_packetStep;
-  mutable Packet m_base;
-};
-
-// random access for packet ops:
-// 1) each step
-//   [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
-template <typename Scalar, typename Packet>
-struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/false>
-{
-  linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
-    m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
-    m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {}
-
-  template<typename IndexType>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return m_low+i*m_step; }
-
-  template<typename IndexType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const
-  { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(Scalar(i)),m_interPacket))); }
+  {
+    // Principle:
+    // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
+    if(m_flip)
+    {
+      Packet pi = padd(pset1<Packet>(Scalar(i-m_size1)),m_interPacket);
+      Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
+      if(i==0)
+        res = pinsertfirst(res, m_low);
+      return res;
+    }
+    else
+    {
+      Packet pi = padd(pset1<Packet>(Scalar(i)),m_interPacket);
+      Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
+      if(i==m_size1-unpacket_traits<Packet>::size+1)
+        res = pinsertlast(res, m_high);
+      return res;
+    }
+  }
 
   const Scalar m_low;
+  const Scalar m_high;
+  const Index m_size1;
   const Scalar m_step;
-  const Packet m_lowPacket;
-  const Packet m_stepPacket;
   const Packet m_interPacket;
+  const bool m_flip;
 };
 
 template <typename Scalar, typename Packet>
-struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true>
+struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/true>
 {
   linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
-    m_low(low), m_length(high-low), m_divisor(convert_index<Scalar>(num_steps==1?1:num_steps-1)), m_interPacket(plset<Packet>(0))
+    m_low(low),
+    m_multiplier((high-low)/convert_index<Scalar>(num_steps<=1 ? 1 : num_steps-1)),
+    m_divisor(convert_index<Scalar>(num_steps+high-low)/(high-low+1)),
+    m_use_divisor((high-low+1)<num_steps)
   {}
 
   template<typename IndexType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const Scalar operator() (IndexType i) const {
-    return m_low + (m_length*Scalar(i))/m_divisor;
+  const Scalar operator() (IndexType i) const
+  {
+    if(m_use_divisor) return m_low + convert_index<Scalar>(i)/m_divisor;
+    else              return m_low + convert_index<Scalar>(i)*m_multiplier;
   }
 
-  template<typename IndexType>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const Packet packetOp(IndexType i) const {
-    return internal::padd(pset1<Packet>(m_low), pdiv(pmul(pset1<Packet>(m_length), padd(pset1<Packet>(Scalar(i)),m_interPacket)),
-                                                     pset1<Packet>(m_divisor))); }
-
   const Scalar m_low;
-  const Scalar m_length;
-  const Scalar  m_divisor;
-  const Packet m_interPacket;
+  const Scalar m_multiplier;
+  const Scalar m_divisor;
+  const bool m_use_divisor;
 };
 
 // ----- Linspace functor ----------------------------------------------------------------
@@ -125,18 +116,18 @@ struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true>
 // Forward declaration (we default to random access which does not really give
 // us a speed gain when using packet access but it allows to use the functor in
 // nested expressions).
-template <typename Scalar, typename PacketType, bool RandomAccess = true> struct linspaced_op;
-template <typename Scalar, typename PacketType, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,PacketType,RandomAccess> >
+template <typename Scalar, typename PacketType> struct linspaced_op;
+template <typename Scalar, typename PacketType> struct functor_traits< linspaced_op<Scalar,PacketType> >
 {
   enum
   {
     Cost = 1,
-    PacketAccess =   packet_traits<Scalar>::HasSetLinear
-                  && ((!NumTraits<Scalar>::IsInteger) || packet_traits<Scalar>::HasDiv),
+    PacketAccess =   (!NumTraits<Scalar>::IsInteger) && packet_traits<Scalar>::HasSetLinear && packet_traits<Scalar>::HasBlend,
+                  /*&& ((!NumTraits<Scalar>::IsInteger) || packet_traits<Scalar>::HasDiv),*/ // <- vectorization for integer is currently disabled
     IsRepeatable = true
   };
 };
-template <typename Scalar, typename PacketType, bool RandomAccess> struct linspaced_op
+template <typename Scalar, typename PacketType> struct linspaced_op
 {
   linspaced_op(const Scalar& low, const Scalar& high, Index num_steps)
     : impl((num_steps==1 ? high : low),high,num_steps)
@@ -148,20 +139,42 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa
   template<typename Packet,typename IndexType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return impl.packetOp(i); }
 
-  // This proxy object handles the actual required temporaries, the different
-  // implementations (random vs. sequential access) as well as the
-  // correct piping to size 2/4 packet operations.
-  // As long as we don't have a Bresenham-like implementation for linear-access and integer types,
-  // we have to by-pass RandomAccess for integer types. See bug 698.
-  const linspaced_op_impl<Scalar,PacketType,(NumTraits<Scalar>::IsInteger?true:RandomAccess),NumTraits<Scalar>::IsInteger> impl;
+  // This proxy object handles the actual required temporaries and the different
+  // implementations (integer vs. floating point).
+  const linspaced_op_impl<Scalar,PacketType,NumTraits<Scalar>::IsInteger> impl;
 };
 
 // Linear access is automatically determined from the operator() prototypes available for the given functor.
 // If it exposes an operator()(i,j), then we assume the i and j coefficients are required independently
 // and linear access is not possible. In all other cases, linear access is enabled.
-// Users should not have to deal with this struture.
+// Users should not have to deal with this structure.
 template<typename Functor> struct functor_has_linear_access { enum { ret = !has_binary_operator<Functor>::value }; };
 
+// For unreliable compilers, let's specialize the has_*ary_operator
+// helpers so that at least built-in nullary functors work fine.
+#if !( (EIGEN_COMP_MSVC>1600) || (EIGEN_GNUC_AT_LEAST(4,8)) || (EIGEN_COMP_ICC>=1600))
+template<typename Scalar,typename IndexType>
+struct has_nullary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 1}; };
+template<typename Scalar,typename IndexType>
+struct has_unary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_binary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
+
+template<typename Scalar,typename IndexType>
+struct has_nullary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_unary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_binary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 1}; };
+
+template<typename Scalar, typename PacketType,typename IndexType>
+struct has_nullary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
+template<typename Scalar, typename PacketType,typename IndexType>
+struct has_unary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 1}; };
+template<typename Scalar, typename PacketType,typename IndexType>
+struct has_binary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
+#endif
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index 2009f8e57..2e6a00ffd 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -321,7 +321,7 @@ struct functor_traits<scalar_log1p_op<Scalar> > {
   */
 template<typename Scalar> struct scalar_log10_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op)
-  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log10; return log10(a); }
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_USING_STD_MATH(log10) return log10(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog10(a); }
 };
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index b1465c3b5..61df3be57 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -10,7 +10,7 @@
 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
 #define EIGEN_GENERAL_MATRIX_MATRIX_H
 
-namespace Eigen { 
+namespace Eigen {
 
 namespace internal {
 
@@ -24,7 +24,7 @@ template<
 struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
 {
   typedef gebp_traits<RhsScalar,LhsScalar> Traits;
-  
+
   typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   static EIGEN_STRONG_INLINE void run(
     Index rows, Index cols, Index depth,
@@ -54,7 +54,7 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh
 {
 
 typedef gebp_traits<LhsScalar,RhsScalar> Traits;
-  
+
 typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 static void run(Index rows, Index cols, Index depth,
   const LhsScalar* _lhs, Index lhsStride,
@@ -85,13 +85,13 @@ static void run(Index rows, Index cols, Index depth,
     // this is the parallel version!
     Index tid = omp_get_thread_num();
     Index threads = omp_get_num_threads();
-    
+
     LhsScalar* blockA = blocking.blockA();
     eigen_internal_assert(blockA!=0);
-    
+
     std::size_t sizeB = kc*nc;
     ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
-      
+
     // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
     for(Index k=0; k<depth; k+=kc)
     {
@@ -114,7 +114,7 @@ static void run(Index rows, Index cols, Index depth,
 
       // Notify the other threads that the part A'_i is ready to go.
       info[tid].sync = k;
-      
+
       // Computes C_i += A' * B' per A'_i
       for(Index shift=0; shift<threads; ++shift)
       {
@@ -161,7 +161,7 @@ static void run(Index rows, Index cols, Index depth,
 
     ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
     ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
-    
+
     const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;
 
     // For each horizontal panel of the rhs, and corresponding panel of the lhs...
@@ -172,24 +172,24 @@ static void run(Index rows, Index cols, Index depth,
       for(Index k2=0; k2<depth; k2+=kc)
       {
         const Index actual_kc = (std::min)(k2+kc,depth)-k2;
-        
+
         // OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
         // => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)
         // Note that this panel will be read as many times as the number of blocks in the rhs's
         // horizontal panel which is, in practice, a very low number.
         pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc);
-        
+
         // For each kc x nc block of the rhs's horizontal panel...
         for(Index j2=0; j2<cols; j2+=nc)
         {
           const Index actual_nc = (std::min)(j2+nc,cols)-j2;
-          
+
           // We pack the rhs's block into a sequential chunk of memory (L2 caching)
           // Note that this block will be read a very high number of times, which is equal to the number of
           // micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
           if((!pack_rhs_once) || i2==0)
             pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
-          
+
           // Everything is packed, we can now call the panel * block kernel:
           gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
         }
@@ -229,7 +229,7 @@ struct gemm_functor
               (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
               m_actualAlpha, m_blocking, info);
   }
-  
+
   typedef typename Gemm::Traits Traits;
 
   protected:
@@ -313,7 +313,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
       this->m_blockB = reinterpret_cast<RhsScalar*>((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
 #endif
     }
-    
+
     void initParallel(Index, Index, Index, Index)
     {}
 
@@ -359,14 +359,14 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
       m_sizeA = this->m_mc * this->m_kc;
       m_sizeB = this->m_kc * this->m_nc;
     }
-    
+
     void initParallel(Index rows, Index cols, Index depth, Index num_threads)
     {
       this->m_mc = Transpose ? cols : rows;
       this->m_nc = Transpose ? rows : cols;
       this->m_kc = depth;
-      
-      eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);      
+
+      eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
       Index m = this->m_mc;
       computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
       m_sizeA = this->m_mc * this->m_kc;
@@ -401,7 +401,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
 } // end namespace internal
 
 namespace internal {
-  
+
 template<typename Lhs, typename Rhs>
 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
   : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> >
@@ -409,21 +409,21 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
   typedef typename Product<Lhs,Rhs>::Scalar Scalar;
   typedef typename Lhs::Scalar LhsScalar;
   typedef typename Rhs::Scalar RhsScalar;
-  
+
   typedef internal::blas_traits<Lhs> LhsBlasTraits;
   typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
   typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
-  
+
   typedef internal::blas_traits<Rhs> RhsBlasTraits;
   typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
   typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
-  
+
   enum {
     MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
   };
-  
+
   typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;
-  
+
   template<typename Dst>
   static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
   {
@@ -453,7 +453,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
     else
       scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
   }
-  
+
   template<typename Dest>
   static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
   {
@@ -481,7 +481,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
 
     BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
     internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>
-                              (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), Dest::Flags&RowMajorBit);
+        (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(), Dest::Flags&RowMajorBit);
   }
 };
 
diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h
index e0bfcc356..2a31e4cbe 100644
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -10,7 +10,7 @@
 #ifndef EIGEN_PARALLELIZER_H
 #define EIGEN_PARALLELIZER_H
 
-namespace Eigen { 
+namespace Eigen {
 
 namespace internal {
 
@@ -83,7 +83,7 @@ template<typename Index> struct GemmParallelInfo
 };
 
 template<bool Condition, typename Functor, typename Index>
-void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
+void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
 {
   // TODO when EIGEN_USE_BLAS is defined,
   // we should still enable OMP for other scalar types
@@ -92,6 +92,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
   // the matrix product when multithreading is enabled. This is a temporary
   // fix to support row-major destination matrices. This whole
   // parallelizer mechanism has to be redisigned anyway.
+  EIGEN_UNUSED_VARIABLE(depth);
   EIGEN_UNUSED_VARIABLE(transpose);
   func(0,rows, 0,cols);
 #else
@@ -106,6 +107,12 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
   // FIXME this has to be fine tuned
   Index size = transpose ? rows : cols;
   Index pb_max_threads = std::max<Index>(1,size / 32);
+  // compute the maximal number of threads from the total amount of work:
+  double work = static_cast<double>(rows) * static_cast<double>(cols) *
+      static_cast<double>(depth);
+  double kMinTaskSize = 50000;  // Heuristic.
+  pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
+
   // compute the number of threads we are going to use
   Index threads = std::min<Index>(nbThreads(), pb_max_threads);
 
@@ -120,19 +127,19 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
 
   if(transpose)
     std::swap(rows,cols);
-  
+
   ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
-  
+
   #pragma omp parallel num_threads(threads)
   {
     Index i = omp_get_thread_num();
     // Note that the actual number of threads might be lower than the number of request ones.
     Index actual_threads = omp_get_num_threads();
-    
+
     Index blockCols = (cols / actual_threads) & ~Index(0x3);
     Index blockRows = (rows / actual_threads);
     blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
-  
+
     Index r0 = i*blockRows;
     Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
 
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 108504333..cfdbca5dd 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -356,6 +356,13 @@
 #define EIGEN_MAX_CPP_VER 99
 #endif
 
+#if EIGEN_MAX_CPP_VER>=11 && defined(__cplusplus) && (__cplusplus >= 201103L)
+#define EIGEN_HAS_CXX11 1
+#else
+#define EIGEN_HAS_CXX11 0
+#endif
+
+
 // Do we support r-value references?
 #ifndef EIGEN_HAS_RVALUE_REFERENCES
 #if EIGEN_MAX_CPP_VER>=11 && \
@@ -804,7 +811,7 @@ namespace Eigen {
 // just an empty macro !
 #define EIGEN_EMPTY
 
-#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900 // for older MSVC versions using the base operator is sufficient (cf Bug 1000)
+#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 ||  __CUDACC_VER__) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
   #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
     using Base::operator =;
 #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h
index 4fd8891c6..983361a45 100644
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -100,7 +100,8 @@
         MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
         THIS_TYPE_IS_NOT_SUPPORTED,
         STORAGE_KIND_MUST_MATCH,
-        STORAGE_INDEX_MUST_MATCH
+        STORAGE_INDEX_MUST_MATCH,
+        CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY
       };
     };
 
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 088a65240..7cfa2c49f 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -191,7 +191,7 @@ struct find_best_packet
 #if EIGEN_MAX_STATIC_ALIGN_BYTES>0
 template<int ArrayBytes, int AlignmentBytes,
          bool Match     =  bool((ArrayBytes%AlignmentBytes)==0),
-         bool TryHalf   =  bool(AlignmentBytes>EIGEN_MIN_ALIGN_BYTES) >
+         bool TryHalf   =  bool(EIGEN_MIN_ALIGN_BYTES<AlignmentBytes) >
 struct compute_default_alignment_helper
 {
   enum { value = 0 };
diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h
index d20d17492..066eae4f9 100644
--- a/Eigen/src/Geometry/AlignedBox.h
+++ b/Eigen/src/Geometry/AlignedBox.h
@@ -62,57 +62,57 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
 
 
   /** Default constructor initializing a null box. */
-  inline AlignedBox()
+  EIGEN_DEVICE_FUNC inline AlignedBox()
   { if (AmbientDimAtCompileTime!=Dynamic) setEmpty(); }
 
   /** Constructs a null box with \a _dim the dimension of the ambient space. */
-  inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim)
+  EIGEN_DEVICE_FUNC inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim)
   { setEmpty(); }
 
   /** Constructs a box with extremities \a _min and \a _max.
    * \warning If either component of \a _min is larger than the same component of \a _max, the constructed box is empty. */
   template<typename OtherVectorType1, typename OtherVectorType2>
-  inline AlignedBox(const OtherVectorType1& _min, const OtherVectorType2& _max) : m_min(_min), m_max(_max) {}
+  EIGEN_DEVICE_FUNC inline AlignedBox(const OtherVectorType1& _min, const OtherVectorType2& _max) : m_min(_min), m_max(_max) {}
 
   /** Constructs a box containing a single point \a p. */
   template<typename Derived>
-  inline explicit AlignedBox(const MatrixBase<Derived>& p) : m_min(p), m_max(m_min)
+  EIGEN_DEVICE_FUNC inline explicit AlignedBox(const MatrixBase<Derived>& p) : m_min(p), m_max(m_min)
   { }
 
-  ~AlignedBox() {}
+  EIGEN_DEVICE_FUNC ~AlignedBox() {}
 
   /** \returns the dimension in which the box holds */
-  inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size() : Index(AmbientDimAtCompileTime); }
+  EIGEN_DEVICE_FUNC inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size() : Index(AmbientDimAtCompileTime); }
 
   /** \deprecated use isEmpty() */
-  inline bool isNull() const { return isEmpty(); }
+  EIGEN_DEVICE_FUNC inline bool isNull() const { return isEmpty(); }
 
   /** \deprecated use setEmpty() */
-  inline void setNull() { setEmpty(); }
+  EIGEN_DEVICE_FUNC inline void setNull() { setEmpty(); }
 
   /** \returns true if the box is empty.
    * \sa setEmpty */
-  inline bool isEmpty() const { return (m_min.array() > m_max.array()).any(); }
+  EIGEN_DEVICE_FUNC inline bool isEmpty() const { return (m_min.array() > m_max.array()).any(); }
 
   /** Makes \c *this an empty box.
    * \sa isEmpty */
-  inline void setEmpty()
+  EIGEN_DEVICE_FUNC inline void setEmpty()
   {
     m_min.setConstant( ScalarTraits::highest() );
     m_max.setConstant( ScalarTraits::lowest() );
   }
 
   /** \returns the minimal corner */
-  inline const VectorType& (min)() const { return m_min; }
+  EIGEN_DEVICE_FUNC inline const VectorType& (min)() const { return m_min; }
   /** \returns a non const reference to the minimal corner */
-  inline VectorType& (min)() { return m_min; }
+  EIGEN_DEVICE_FUNC inline VectorType& (min)() { return m_min; }
   /** \returns the maximal corner */
-  inline const VectorType& (max)() const { return m_max; }
+  EIGEN_DEVICE_FUNC inline const VectorType& (max)() const { return m_max; }
   /** \returns a non const reference to the maximal corner */
-  inline VectorType& (max)() { return m_max; }
+  EIGEN_DEVICE_FUNC inline VectorType& (max)() { return m_max; }
 
   /** \returns the center of the box */
-  inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient)
+  EIGEN_DEVICE_FUNC inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient)
   center() const
   { return (m_min+m_max)/RealScalar(2); }
 
@@ -120,18 +120,18 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
     * Note that this function does not get the same
     * result for integral or floating scalar types: see
     */
-  inline const CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> sizes() const
+  EIGEN_DEVICE_FUNC inline const CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> sizes() const
   { return m_max - m_min; }
 
   /** \returns the volume of the bounding box */
-  inline Scalar volume() const
+  EIGEN_DEVICE_FUNC inline Scalar volume() const
   { return sizes().prod(); }
 
   /** \returns an expression for the bounding box diagonal vector
     * if the length of the diagonal is needed: diagonal().norm()
     * will provide it.
     */
-  inline CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> diagonal() const
+  EIGEN_DEVICE_FUNC inline CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> diagonal() const
   { return sizes(); }
 
   /** \returns the vertex of the bounding box at the corner defined by
@@ -143,7 +143,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
     * For 3D bounding boxes, the following names are added:
     * BottomLeftCeil, BottomRightCeil, TopLeftCeil, TopRightCeil.
     */
-  inline VectorType corner(CornerType corner) const
+  EIGEN_DEVICE_FUNC inline VectorType corner(CornerType corner) const
   {
     EIGEN_STATIC_ASSERT(_AmbientDim <= 3, THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE);
 
@@ -161,7 +161,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
 
   /** \returns a random point inside the bounding box sampled with
    * a uniform distribution */
-  inline VectorType sample() const
+  EIGEN_DEVICE_FUNC inline VectorType sample() const
   {
     VectorType r(dim());
     for(Index d=0; d<dim(); ++d)
@@ -179,25 +179,25 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
 
   /** \returns true if the point \a p is inside the box \c *this. */
   template<typename Derived>
-  inline bool contains(const MatrixBase<Derived>& p) const
+  EIGEN_DEVICE_FUNC inline bool contains(const MatrixBase<Derived>& p) const
   {
     typename internal::nested_eval<Derived,2>::type p_n(p.derived());
     return (m_min.array()<=p_n.array()).all() && (p_n.array()<=m_max.array()).all();
   }
 
   /** \returns true if the box \a b is entirely inside the box \c *this. */
-  inline bool contains(const AlignedBox& b) const
+  EIGEN_DEVICE_FUNC inline bool contains(const AlignedBox& b) const
   { return (m_min.array()<=(b.min)().array()).all() && ((b.max)().array()<=m_max.array()).all(); }
 
   /** \returns true if the box \a b is intersecting the box \c *this.
    * \sa intersection, clamp */
-  inline bool intersects(const AlignedBox& b) const
+  EIGEN_DEVICE_FUNC inline bool intersects(const AlignedBox& b) const
   { return (m_min.array()<=(b.max)().array()).all() && ((b.min)().array()<=m_max.array()).all(); }
 
   /** Extends \c *this such that it contains the point \a p and returns a reference to \c *this.
    * \sa extend(const AlignedBox&) */
   template<typename Derived>
-  inline AlignedBox& extend(const MatrixBase<Derived>& p)
+  EIGEN_DEVICE_FUNC inline AlignedBox& extend(const MatrixBase<Derived>& p)
   {
     typename internal::nested_eval<Derived,2>::type p_n(p.derived());
     m_min = m_min.cwiseMin(p_n);
@@ -207,7 +207,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
 
   /** Extends \c *this such that it contains the box \a b and returns a reference to \c *this.
    * \sa merged, extend(const MatrixBase&) */
-  inline AlignedBox& extend(const AlignedBox& b)
+  EIGEN_DEVICE_FUNC inline AlignedBox& extend(const AlignedBox& b)
   {
     m_min = m_min.cwiseMin(b.m_min);
     m_max = m_max.cwiseMax(b.m_max);
@@ -217,7 +217,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
   /** Clamps \c *this by the box \a b and returns a reference to \c *this.
    * \note If the boxes don't intersect, the resulting box is empty.
    * \sa intersection(), intersects() */
-  inline AlignedBox& clamp(const AlignedBox& b)
+  EIGEN_DEVICE_FUNC inline AlignedBox& clamp(const AlignedBox& b)
   {
     m_min = m_min.cwiseMax(b.m_min);
     m_max = m_max.cwiseMin(b.m_max);
@@ -227,18 +227,18 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
   /** Returns an AlignedBox that is the intersection of \a b and \c *this
    * \note If the boxes don't intersect, the resulting box is empty.
    * \sa intersects(), clamp, contains()  */
-  inline AlignedBox intersection(const AlignedBox& b) const
+  EIGEN_DEVICE_FUNC inline AlignedBox intersection(const AlignedBox& b) const
   {return AlignedBox(m_min.cwiseMax(b.m_min), m_max.cwiseMin(b.m_max)); }
 
   /** Returns an AlignedBox that is the union of \a b and \c *this.
    * \note Merging with an empty box may result in a box bigger than \c *this. 
    * \sa extend(const AlignedBox&) */
-  inline AlignedBox merged(const AlignedBox& b) const
+  EIGEN_DEVICE_FUNC inline AlignedBox merged(const AlignedBox& b) const
   { return AlignedBox(m_min.cwiseMin(b.m_min), m_max.cwiseMax(b.m_max)); }
 
   /** Translate \c *this by the vector \a t and returns a reference to \c *this. */
   template<typename Derived>
-  inline AlignedBox& translate(const MatrixBase<Derived>& a_t)
+  EIGEN_DEVICE_FUNC inline AlignedBox& translate(const MatrixBase<Derived>& a_t)
   {
     const typename internal::nested_eval<Derived,2>::type t(a_t.derived());
     m_min += t;
@@ -251,28 +251,28 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
     * \sa exteriorDistance(const MatrixBase&), squaredExteriorDistance(const AlignedBox&)
     */
   template<typename Derived>
-  inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& p) const;
+  EIGEN_DEVICE_FUNC inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& p) const;
 
   /** \returns the squared distance between the boxes \a b and \c *this,
     * and zero if the boxes intersect.
     * \sa exteriorDistance(const AlignedBox&), squaredExteriorDistance(const MatrixBase&)
     */
-  inline Scalar squaredExteriorDistance(const AlignedBox& b) const;
+  EIGEN_DEVICE_FUNC inline Scalar squaredExteriorDistance(const AlignedBox& b) const;
 
   /** \returns the distance between the point \a p and the box \c *this,
     * and zero if \a p is inside the box.
     * \sa squaredExteriorDistance(const MatrixBase&), exteriorDistance(const AlignedBox&)
     */
   template<typename Derived>
-  inline NonInteger exteriorDistance(const MatrixBase<Derived>& p) const
-  { using std::sqrt; return sqrt(NonInteger(squaredExteriorDistance(p))); }
+  EIGEN_DEVICE_FUNC inline NonInteger exteriorDistance(const MatrixBase<Derived>& p) const
+  { EIGEN_USING_STD_MATH(sqrt) return sqrt(NonInteger(squaredExteriorDistance(p))); }
 
   /** \returns the distance between the boxes \a b and \c *this,
     * and zero if the boxes intersect.
     * \sa squaredExteriorDistance(const AlignedBox&), exteriorDistance(const MatrixBase&)
     */
-  inline NonInteger exteriorDistance(const AlignedBox& b) const
-  { using std::sqrt; return sqrt(NonInteger(squaredExteriorDistance(b))); }
+  EIGEN_DEVICE_FUNC inline NonInteger exteriorDistance(const AlignedBox& b) const
+  { EIGEN_USING_STD_MATH(sqrt) return sqrt(NonInteger(squaredExteriorDistance(b))); }
 
   /** \returns \c *this with scalar type casted to \a NewScalarType
     *
@@ -280,7 +280,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<AlignedBox,
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<AlignedBox,
            AlignedBox<NewScalarType,AmbientDimAtCompileTime> >::type cast() const
   {
     return typename internal::cast_return_type<AlignedBox,
@@ -289,7 +289,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
 
   /** Copy constructor with scalar type conversion */
   template<typename OtherScalarType>
-  inline explicit AlignedBox(const AlignedBox<OtherScalarType,AmbientDimAtCompileTime>& other)
+  EIGEN_DEVICE_FUNC inline explicit AlignedBox(const AlignedBox<OtherScalarType,AmbientDimAtCompileTime>& other)
   {
     m_min = (other.min)().template cast<Scalar>();
     m_max = (other.max)().template cast<Scalar>();
@@ -299,7 +299,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
-  bool isApprox(const AlignedBox& other, const RealScalar& prec = ScalarTraits::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const AlignedBox& other, const RealScalar& prec = ScalarTraits::dummy_precision()) const
   { return m_min.isApprox(other.m_min, prec) && m_max.isApprox(other.m_max, prec); }
 
 protected:
@@ -311,7 +311,7 @@ protected:
 
 template<typename Scalar,int AmbientDim>
 template<typename Derived>
-inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const MatrixBase<Derived>& a_p) const
+EIGEN_DEVICE_FUNC inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const MatrixBase<Derived>& a_p) const
 {
   typename internal::nested_eval<Derived,2*AmbientDim>::type p(a_p.derived());
   Scalar dist2(0);
@@ -333,7 +333,7 @@ inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const Matri
 }
 
 template<typename Scalar,int AmbientDim>
-inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const AlignedBox& b) const
+EIGEN_DEVICE_FUNC inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const AlignedBox& b) const
 {
   Scalar dist2(0);
   Scalar aux;
diff --git a/Eigen/src/Geometry/AngleAxis.h b/Eigen/src/Geometry/AngleAxis.h
index 571062d00..0af3c1b08 100644
--- a/Eigen/src/Geometry/AngleAxis.h
+++ b/Eigen/src/Geometry/AngleAxis.h
@@ -69,59 +69,61 @@ protected:
 public:
 
   /** Default constructor without initialization. */
-  AngleAxis() {}
+  EIGEN_DEVICE_FUNC AngleAxis() {}
   /** Constructs and initialize the angle-axis rotation from an \a angle in radian
     * and an \a axis which \b must \b be \b normalized.
     *
     * \warning If the \a axis vector is not normalized, then the angle-axis object
     *          represents an invalid rotation. */
   template<typename Derived>
+  EIGEN_DEVICE_FUNC 
   inline AngleAxis(const Scalar& angle, const MatrixBase<Derived>& axis) : m_axis(axis), m_angle(angle) {}
   /** Constructs and initialize the angle-axis rotation from a quaternion \a q.
     * This function implicitly normalizes the quaternion \a q.
     */
-  template<typename QuatDerived> inline explicit AngleAxis(const QuaternionBase<QuatDerived>& q) { *this = q; }
+  template<typename QuatDerived> 
+  EIGEN_DEVICE_FUNC inline explicit AngleAxis(const QuaternionBase<QuatDerived>& q) { *this = q; }
   /** Constructs and initialize the angle-axis rotation from a 3x3 rotation matrix. */
   template<typename Derived>
-  inline explicit AngleAxis(const MatrixBase<Derived>& m) { *this = m; }
+  EIGEN_DEVICE_FUNC inline explicit AngleAxis(const MatrixBase<Derived>& m) { *this = m; }
 
   /** \returns the value of the rotation angle in radian */
-  Scalar angle() const { return m_angle; }
+  EIGEN_DEVICE_FUNC Scalar angle() const { return m_angle; }
   /** \returns a read-write reference to the stored angle in radian */
-  Scalar& angle() { return m_angle; }
+  EIGEN_DEVICE_FUNC Scalar& angle() { return m_angle; }
 
   /** \returns the rotation axis */
-  const Vector3& axis() const { return m_axis; }
+  EIGEN_DEVICE_FUNC const Vector3& axis() const { return m_axis; }
   /** \returns a read-write reference to the stored rotation axis.
     *
     * \warning The rotation axis must remain a \b unit vector.
     */
-  Vector3& axis() { return m_axis; }
+  EIGEN_DEVICE_FUNC Vector3& axis() { return m_axis; }
 
   /** Concatenates two rotations */
-  inline QuaternionType operator* (const AngleAxis& other) const
+  EIGEN_DEVICE_FUNC inline QuaternionType operator* (const AngleAxis& other) const
   { return QuaternionType(*this) * QuaternionType(other); }
 
   /** Concatenates two rotations */
-  inline QuaternionType operator* (const QuaternionType& other) const
+  EIGEN_DEVICE_FUNC inline QuaternionType operator* (const QuaternionType& other) const
   { return QuaternionType(*this) * other; }
 
   /** Concatenates two rotations */
-  friend inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b)
+  friend EIGEN_DEVICE_FUNC inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b)
   { return a * QuaternionType(b); }
 
   /** \returns the inverse rotation, i.e., an angle-axis with opposite rotation angle */
-  AngleAxis inverse() const
+  EIGEN_DEVICE_FUNC AngleAxis inverse() const
   { return AngleAxis(-m_angle, m_axis); }
 
   template<class QuatDerived>
-  AngleAxis& operator=(const QuaternionBase<QuatDerived>& q);
+  EIGEN_DEVICE_FUNC AngleAxis& operator=(const QuaternionBase<QuatDerived>& q);
   template<typename Derived>
-  AngleAxis& operator=(const MatrixBase<Derived>& m);
+  EIGEN_DEVICE_FUNC AngleAxis& operator=(const MatrixBase<Derived>& m);
 
   template<typename Derived>
-  AngleAxis& fromRotationMatrix(const MatrixBase<Derived>& m);
-  Matrix3 toRotationMatrix(void) const;
+  EIGEN_DEVICE_FUNC AngleAxis& fromRotationMatrix(const MatrixBase<Derived>& m);
+  EIGEN_DEVICE_FUNC Matrix3 toRotationMatrix(void) const;
 
   /** \returns \c *this with scalar type casted to \a NewScalarType
     *
@@ -129,24 +131,24 @@ public:
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type cast() const
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type cast() const
   { return typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type(*this); }
 
   /** Copy constructor with scalar type conversion */
   template<typename OtherScalarType>
-  inline explicit AngleAxis(const AngleAxis<OtherScalarType>& other)
+  EIGEN_DEVICE_FUNC inline explicit AngleAxis(const AngleAxis<OtherScalarType>& other)
   {
     m_axis = other.axis().template cast<Scalar>();
     m_angle = Scalar(other.angle());
   }
 
-  static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); }
+  EIGEN_DEVICE_FUNC static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); }
 
   /** \returns \c true if \c *this is approximately equal to \a other, within the precision
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
-  bool isApprox(const AngleAxis& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const AngleAxis& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
   { return m_axis.isApprox(other.m_axis, prec) && internal::isApprox(m_angle,other.m_angle, prec); }
 };
 
@@ -165,10 +167,10 @@ typedef AngleAxis<double> AngleAxisd;
   */
 template<typename Scalar>
 template<typename QuatDerived>
-AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived>& q)
+EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived>& q)
 {
-  using std::atan2;
-  using std::abs;
+  EIGEN_USING_STD_MATH(atan2)
+  EIGEN_USING_STD_MATH(abs)
   Scalar n = q.vec().norm();
   if(n<NumTraits<Scalar>::epsilon())
     n = q.vec().stableNorm();
@@ -192,7 +194,7 @@ AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived
   */
 template<typename Scalar>
 template<typename Derived>
-AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const MatrixBase<Derived>& mat)
+EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const MatrixBase<Derived>& mat)
 {
   // Since a direct conversion would not be really faster,
   // let's use the robust Quaternion implementation:
@@ -204,7 +206,7 @@ AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const MatrixBase<Derived>& mat)
 **/
 template<typename Scalar>
 template<typename Derived>
-AngleAxis<Scalar>& AngleAxis<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
+EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
 {
   return *this = QuaternionType(mat);
 }
@@ -213,10 +215,10 @@ AngleAxis<Scalar>& AngleAxis<Scalar>::fromRotationMatrix(const MatrixBase<Derive
   */
 template<typename Scalar>
 typename AngleAxis<Scalar>::Matrix3
-AngleAxis<Scalar>::toRotationMatrix(void) const
+EIGEN_DEVICE_FUNC AngleAxis<Scalar>::toRotationMatrix(void) const
 {
-  using std::sin;
-  using std::cos;
+  EIGEN_USING_STD_MATH(sin)
+  EIGEN_USING_STD_MATH(cos)
   Matrix3 res;
   Vector3 sin_axis  = sin(m_angle) * m_axis;
   Scalar c = cos(m_angle);
diff --git a/Eigen/src/Geometry/EulerAngles.h b/Eigen/src/Geometry/EulerAngles.h
index 4865e58aa..c633268af 100644
--- a/Eigen/src/Geometry/EulerAngles.h
+++ b/Eigen/src/Geometry/EulerAngles.h
@@ -33,12 +33,12 @@ namespace Eigen {
   * \sa class AngleAxis
   */
 template<typename Derived>
-inline Matrix<typename MatrixBase<Derived>::Scalar,3,1>
+EIGEN_DEVICE_FUNC inline Matrix<typename MatrixBase<Derived>::Scalar,3,1>
 MatrixBase<Derived>::eulerAngles(Index a0, Index a1, Index a2) const
 {
-  using std::atan2;
-  using std::sin;
-  using std::cos;
+  EIGEN_USING_STD_MATH(atan2)
+  EIGEN_USING_STD_MATH(sin)
+  EIGEN_USING_STD_MATH(cos)
   /* Implemented from Graphics Gems IV */
   EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived,3,3)
 
diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h
index a23068c8d..5f0da1a9e 100644
--- a/Eigen/src/Geometry/Homogeneous.h
+++ b/Eigen/src/Geometry/Homogeneous.h
@@ -68,17 +68,17 @@ template<typename MatrixType,int _Direction> class Homogeneous
     typedef MatrixBase<Homogeneous> Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(Homogeneous)
 
-    explicit inline Homogeneous(const MatrixType& matrix)
+    EIGEN_DEVICE_FUNC explicit inline Homogeneous(const MatrixType& matrix)
       : m_matrix(matrix)
     {}
 
-    inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical   ? 1 : 0); }
-    inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical   ? 1 : 0); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); }
     
-    const NestedExpression& nestedExpression() const { return m_matrix; }
+    EIGEN_DEVICE_FUNC const NestedExpression& nestedExpression() const { return m_matrix; }
 
     template<typename Rhs>
-    inline const Product<Homogeneous,Rhs>
+    EIGEN_DEVICE_FUNC inline const Product<Homogeneous,Rhs>
     operator* (const MatrixBase<Rhs>& rhs) const
     {
       eigen_assert(int(Direction)==Horizontal);
@@ -86,7 +86,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
     }
 
     template<typename Lhs> friend
-    inline const Product<Lhs,Homogeneous>
+    EIGEN_DEVICE_FUNC inline const Product<Lhs,Homogeneous>
     operator* (const MatrixBase<Lhs>& lhs, const Homogeneous& rhs)
     {
       eigen_assert(int(Direction)==Vertical);
@@ -94,7 +94,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
     }
 
     template<typename Scalar, int Dim, int Mode, int Options> friend
-    inline const Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous >
+    EIGEN_DEVICE_FUNC inline const Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous >
     operator* (const Transform<Scalar,Dim,Mode,Options>& lhs, const Homogeneous& rhs)
     {
       eigen_assert(int(Direction)==Vertical);
@@ -102,7 +102,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
     }
 
     template<typename Func>
-    EIGEN_STRONG_INLINE typename internal::result_of<Func(Scalar,Scalar)>::type
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::result_of<Func(Scalar,Scalar)>::type
     redux(const Func& func) const
     {
       return func(m_matrix.redux(func), Scalar(1));
@@ -114,7 +114,9 @@ template<typename MatrixType,int _Direction> class Homogeneous
 
 /** \geometry_module \ingroup Geometry_Module
   *
-  * \return an expression of the equivalent homogeneous vector
+  * \returns a vector expression that is one longer than the vector argument, with the value 1 symbolically appended as the last coefficient.
+  *
+  * This can be used to convert affine coordinates to homogeneous coordinates.
   *
   * \only_for_vectors
   *
@@ -124,7 +126,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
   * \sa VectorwiseOp::homogeneous(), class Homogeneous
   */
 template<typename Derived>
-inline typename MatrixBase<Derived>::HomogeneousReturnType
+EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::HomogeneousReturnType
 MatrixBase<Derived>::homogeneous() const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
@@ -133,14 +135,16 @@ MatrixBase<Derived>::homogeneous() const
 
 /** \geometry_module \ingroup Geometry_Module
   *
-  * \returns a matrix expression of homogeneous column (or row) vectors
+  * \returns an expression where the value 1 is symbolically appended as the final coefficient to each column (or row) of the matrix.
+  *
+  * This can be used to convert affine coordinates to homogeneous coordinates.
   *
   * Example: \include VectorwiseOp_homogeneous.cpp
   * Output: \verbinclude VectorwiseOp_homogeneous.out
   *
   * \sa MatrixBase::homogeneous(), class Homogeneous */
 template<typename ExpressionType, int Direction>
-inline Homogeneous<ExpressionType,Direction>
+EIGEN_DEVICE_FUNC inline Homogeneous<ExpressionType,Direction>
 VectorwiseOp<ExpressionType,Direction>::homogeneous() const
 {
   return HomogeneousReturnType(_expression());
@@ -148,14 +152,23 @@ VectorwiseOp<ExpressionType,Direction>::homogeneous() const
 
 /** \geometry_module \ingroup Geometry_Module
   *
-  * \returns an expression of the homogeneous normalized vector of \c *this
+  * \brief homogeneous normalization
+  *
+  * \returns a vector expression of the N-1 first coefficients of \c *this divided by that last coefficient.
+  *
+  * This can be used to convert homogeneous coordinates to affine coordinates.
+  *
+  * It is essentially a shortcut for:
+  * \code
+    this->head(this->size()-1)/this->coeff(this->size()-1);
+    \endcode
   *
   * Example: \include MatrixBase_hnormalized.cpp
   * Output: \verbinclude MatrixBase_hnormalized.out
   *
   * \sa VectorwiseOp::hnormalized() */
 template<typename Derived>
-inline const typename MatrixBase<Derived>::HNormalizedReturnType
+EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::HNormalizedReturnType
 MatrixBase<Derived>::hnormalized() const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
@@ -166,14 +179,20 @@ MatrixBase<Derived>::hnormalized() const
 
 /** \geometry_module \ingroup Geometry_Module
   *
-  * \returns an expression of the homogeneous normalized vector of \c *this
+  * \brief column or row-wise homogeneous normalization
+  *
+  * \returns an expression of the first N-1 coefficients of each column (or row) of \c *this divided by the last coefficient of each column (or row).
+  *
+  * This can be used to convert homogeneous coordinates to affine coordinates.
+  *
+  * It is conceptually equivalent to calling MatrixBase::hnormalized() to each column (or row) of \c *this.
   *
   * Example: \include DirectionWise_hnormalized.cpp
   * Output: \verbinclude DirectionWise_hnormalized.out
   *
   * \sa MatrixBase::hnormalized() */
 template<typename ExpressionType, int Direction>
-inline const typename VectorwiseOp<ExpressionType,Direction>::HNormalizedReturnType
+EIGEN_DEVICE_FUNC inline const typename VectorwiseOp<ExpressionType,Direction>::HNormalizedReturnType
 VectorwiseOp<ExpressionType,Direction>::hnormalized() const
 {
   return HNormalized_Block(_expression(),0,0,
@@ -197,7 +216,7 @@ template<typename MatrixOrTransformType>
 struct take_matrix_for_product
 {
   typedef MatrixOrTransformType type;
-  static const type& run(const type &x) { return x; }
+  EIGEN_DEVICE_FUNC static const type& run(const type &x) { return x; }
 };
 
 template<typename Scalar, int Dim, int Mode,int Options>
@@ -205,7 +224,7 @@ struct take_matrix_for_product<Transform<Scalar, Dim, Mode, Options> >
 {
   typedef Transform<Scalar, Dim, Mode, Options> TransformType;
   typedef typename internal::add_const<typename TransformType::ConstAffinePart>::type type;
-  static type run (const TransformType& x) { return x.affine(); }
+  EIGEN_DEVICE_FUNC static type run (const TransformType& x) { return x.affine(); }
 };
 
 template<typename Scalar, int Dim, int Options>
@@ -213,7 +232,7 @@ struct take_matrix_for_product<Transform<Scalar, Dim, Projective, Options> >
 {
   typedef Transform<Scalar, Dim, Projective, Options> TransformType;
   typedef typename TransformType::MatrixType type;
-  static const type& run (const TransformType& x) { return x.matrix(); }
+  EIGEN_DEVICE_FUNC static const type& run (const TransformType& x) { return x.matrix(); }
 };
 
 template<typename MatrixType,typename Lhs>
@@ -238,15 +257,15 @@ struct homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs>
   typedef typename traits<homogeneous_left_product_impl>::LhsMatrixType LhsMatrixType;
   typedef typename remove_all<LhsMatrixType>::type LhsMatrixTypeCleaned;
   typedef typename remove_all<typename LhsMatrixTypeCleaned::Nested>::type LhsMatrixTypeNested;
-  homogeneous_left_product_impl(const Lhs& lhs, const MatrixType& rhs)
+  EIGEN_DEVICE_FUNC homogeneous_left_product_impl(const Lhs& lhs, const MatrixType& rhs)
     : m_lhs(take_matrix_for_product<Lhs>::run(lhs)),
       m_rhs(rhs)
   {}
 
-  inline Index rows() const { return m_lhs.rows(); }
-  inline Index cols() const { return m_rhs.cols(); }
+  EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
+  EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
 
-  template<typename Dest> void evalTo(Dest& dst) const
+  template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const
   {
     // FIXME investigate how to allow lazy evaluation of this product when possible
     dst = Block<const LhsMatrixTypeNested,
@@ -277,14 +296,14 @@ struct homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs>
   : public ReturnByValue<homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs> >
 {
   typedef typename remove_all<typename Rhs::Nested>::type RhsNested;
-  homogeneous_right_product_impl(const MatrixType& lhs, const Rhs& rhs)
+  EIGEN_DEVICE_FUNC homogeneous_right_product_impl(const MatrixType& lhs, const Rhs& rhs)
     : m_lhs(lhs), m_rhs(rhs)
   {}
 
-  inline Index rows() const { return m_lhs.rows(); }
-  inline Index cols() const { return m_rhs.cols(); }
+  EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
+  EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
 
-  template<typename Dest> void evalTo(Dest& dst) const
+  template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const
   {
     // FIXME investigate how to allow lazy evaluation of this product when possible
     dst = m_lhs * Block<const RhsNested,
@@ -317,7 +336,7 @@ struct unary_evaluator<Homogeneous<ArgType,Direction>, IndexBased>
   typedef typename XprType::PlainObject PlainObject;
   typedef evaluator<PlainObject> Base;
 
-  explicit unary_evaluator(const XprType& op)
+  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
     : Base(), m_temp(op)
   {
     ::new (static_cast<Base*>(this)) Base(m_temp);
@@ -332,8 +351,13 @@ template< typename DstXprType, typename ArgType, typename Scalar>
 struct Assignment<DstXprType, Homogeneous<ArgType,Vertical>, internal::assign_op<Scalar,typename ArgType::Scalar>, Dense2Dense>
 {
   typedef Homogeneous<ArgType,Vertical> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
+  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
   {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
     dst.template topRows<ArgType::RowsAtCompileTime>(src.nestedExpression().rows()) = src.nestedExpression();
     dst.row(dst.rows()-1).setOnes();
   }
@@ -344,8 +368,13 @@ template< typename DstXprType, typename ArgType, typename Scalar>
 struct Assignment<DstXprType, Homogeneous<ArgType,Horizontal>, internal::assign_op<Scalar,typename ArgType::Scalar>, Dense2Dense>
 {
   typedef Homogeneous<ArgType,Horizontal> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
+  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
   {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
     dst.template leftCols<ArgType::ColsAtCompileTime>(src.nestedExpression().cols()) = src.nestedExpression();
     dst.col(dst.cols()-1).setOnes();
   }
@@ -355,7 +384,7 @@ template<typename LhsArg, typename Rhs, int ProductTag>
 struct generic_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs, HomogeneousShape, DenseShape, ProductTag>
 {
   template<typename Dest>
-  static void evalTo(Dest& dst, const Homogeneous<LhsArg,Horizontal>& lhs, const Rhs& rhs)
+  EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const Homogeneous<LhsArg,Horizontal>& lhs, const Rhs& rhs)
   {
     homogeneous_right_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs>(lhs.nestedExpression(), rhs).evalTo(dst);
   }
@@ -396,7 +425,7 @@ template<typename Lhs, typename RhsArg, int ProductTag>
 struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, DenseShape, HomogeneousShape, ProductTag>
 {
   template<typename Dest>
-  static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
+  EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
   {
     homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, Lhs>(lhs, rhs.nestedExpression()).evalTo(dst);
   }
@@ -450,7 +479,7 @@ struct generic_product_impl<Transform<Scalar,Dim,Mode,Options>, Homogeneous<RhsA
 {
   typedef Transform<Scalar,Dim,Mode,Options> TransformType;
   template<typename Dest>
-  static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
+  EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
   {
     homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, TransformType>(lhs, rhs.nestedExpression()).evalTo(dst);
   }
diff --git a/Eigen/src/Geometry/Hyperplane.h b/Eigen/src/Geometry/Hyperplane.h
index cc89639b6..07f2659b2 100644
--- a/Eigen/src/Geometry/Hyperplane.h
+++ b/Eigen/src/Geometry/Hyperplane.h
@@ -50,21 +50,21 @@ public:
   typedef const Block<const Coefficients,AmbientDimAtCompileTime,1> ConstNormalReturnType;
 
   /** Default constructor without initialization */
-  inline Hyperplane() {}
+  EIGEN_DEVICE_FUNC inline Hyperplane() {}
   
   template<int OtherOptions>
-  Hyperplane(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
+  EIGEN_DEVICE_FUNC Hyperplane(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
    : m_coeffs(other.coeffs())
   {}
 
   /** Constructs a dynamic-size hyperplane with \a _dim the dimension
     * of the ambient space */
-  inline explicit Hyperplane(Index _dim) : m_coeffs(_dim+1) {}
+  EIGEN_DEVICE_FUNC inline explicit Hyperplane(Index _dim) : m_coeffs(_dim+1) {}
 
   /** Construct a plane from its normal \a n and a point \a e onto the plane.
     * \warning the vector normal is assumed to be normalized.
     */
-  inline Hyperplane(const VectorType& n, const VectorType& e)
+  EIGEN_DEVICE_FUNC inline Hyperplane(const VectorType& n, const VectorType& e)
     : m_coeffs(n.size()+1)
   {
     normal() = n;
@@ -75,7 +75,7 @@ public:
     * such that the algebraic equation of the plane is \f$ n \cdot x + d = 0 \f$.
     * \warning the vector normal is assumed to be normalized.
     */
-  inline Hyperplane(const VectorType& n, const Scalar& d)
+  EIGEN_DEVICE_FUNC inline Hyperplane(const VectorType& n, const Scalar& d)
     : m_coeffs(n.size()+1)
   {
     normal() = n;
@@ -85,7 +85,7 @@ public:
   /** Constructs a hyperplane passing through the two points. If the dimension of the ambient space
     * is greater than 2, then there isn't uniqueness, so an arbitrary choice is made.
     */
-  static inline Hyperplane Through(const VectorType& p0, const VectorType& p1)
+  EIGEN_DEVICE_FUNC static inline Hyperplane Through(const VectorType& p0, const VectorType& p1)
   {
     Hyperplane result(p0.size());
     result.normal() = (p1 - p0).unitOrthogonal();
@@ -96,7 +96,7 @@ public:
   /** Constructs a hyperplane passing through the three points. The dimension of the ambient space
     * is required to be exactly 3.
     */
-  static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2)
+  EIGEN_DEVICE_FUNC static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2)
   {
     EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 3)
     Hyperplane result(p0.size());
@@ -120,19 +120,19 @@ public:
     * so an arbitrary choice is made.
     */
   // FIXME to be consitent with the rest this could be implemented as a static Through function ??
-  explicit Hyperplane(const ParametrizedLine<Scalar, AmbientDimAtCompileTime>& parametrized)
+  EIGEN_DEVICE_FUNC explicit Hyperplane(const ParametrizedLine<Scalar, AmbientDimAtCompileTime>& parametrized)
   {
     normal() = parametrized.direction().unitOrthogonal();
     offset() = -parametrized.origin().dot(normal());
   }
 
-  ~Hyperplane() {}
+  EIGEN_DEVICE_FUNC ~Hyperplane() {}
 
   /** \returns the dimension in which the plane holds */
-  inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_coeffs.size()-1 : Index(AmbientDimAtCompileTime); }
+  EIGEN_DEVICE_FUNC inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_coeffs.size()-1 : Index(AmbientDimAtCompileTime); }
 
   /** normalizes \c *this */
-  void normalize(void)
+  EIGEN_DEVICE_FUNC void normalize(void)
   {
     m_coeffs /= normal().norm();
   }
@@ -140,45 +140,45 @@ public:
   /** \returns the signed distance between the plane \c *this and a point \a p.
     * \sa absDistance()
     */
-  inline Scalar signedDistance(const VectorType& p) const { return normal().dot(p) + offset(); }
+  EIGEN_DEVICE_FUNC inline Scalar signedDistance(const VectorType& p) const { return normal().dot(p) + offset(); }
 
   /** \returns the absolute distance between the plane \c *this and a point \a p.
     * \sa signedDistance()
     */
-  inline Scalar absDistance(const VectorType& p) const { using std::abs; return abs(signedDistance(p)); }
+  EIGEN_DEVICE_FUNC inline Scalar absDistance(const VectorType& p) const { return numext::abs(signedDistance(p)); }
 
   /** \returns the projection of a point \a p onto the plane \c *this.
     */
-  inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); }
+  EIGEN_DEVICE_FUNC inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); }
 
   /** \returns a constant reference to the unit normal vector of the plane, which corresponds
     * to the linear part of the implicit equation.
     */
-  inline ConstNormalReturnType normal() const { return ConstNormalReturnType(m_coeffs,0,0,dim(),1); }
+  EIGEN_DEVICE_FUNC inline ConstNormalReturnType normal() const { return ConstNormalReturnType(m_coeffs,0,0,dim(),1); }
 
   /** \returns a non-constant reference to the unit normal vector of the plane, which corresponds
     * to the linear part of the implicit equation.
     */
-  inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); }
+  EIGEN_DEVICE_FUNC inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); }
 
   /** \returns the distance to the origin, which is also the "constant term" of the implicit equation
     * \warning the vector normal is assumed to be normalized.
     */
-  inline const Scalar& offset() const { return m_coeffs.coeff(dim()); }
+  EIGEN_DEVICE_FUNC inline const Scalar& offset() const { return m_coeffs.coeff(dim()); }
 
   /** \returns a non-constant reference to the distance to the origin, which is also the constant part
     * of the implicit equation */
-  inline Scalar& offset() { return m_coeffs(dim()); }
+  EIGEN_DEVICE_FUNC inline Scalar& offset() { return m_coeffs(dim()); }
 
   /** \returns a constant reference to the coefficients c_i of the plane equation:
     * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$
     */
-  inline const Coefficients& coeffs() const { return m_coeffs; }
+  EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }
 
   /** \returns a non-constant reference to the coefficients c_i of the plane equation:
     * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$
     */
-  inline Coefficients& coeffs() { return m_coeffs; }
+  EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }
 
   /** \returns the intersection of *this with \a other.
     *
@@ -186,16 +186,15 @@ public:
     *
     * \note If \a other is approximately parallel to *this, this method will return any point on *this.
     */
-  VectorType intersection(const Hyperplane& other) const
+  EIGEN_DEVICE_FUNC VectorType intersection(const Hyperplane& other) const
   {
-    using std::abs;
     EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)
     Scalar det = coeffs().coeff(0) * other.coeffs().coeff(1) - coeffs().coeff(1) * other.coeffs().coeff(0);
     // since the line equations ax+by=c are normalized with a^2+b^2=1, the following tests
     // whether the two lines are approximately parallel.
     if(internal::isMuchSmallerThan(det, Scalar(1)))
     {   // special case where the two lines are approximately parallel. Pick any point on the first line.
-        if(abs(coeffs().coeff(1))>abs(coeffs().coeff(0)))
+        if(numext::abs(coeffs().coeff(1))>numext::abs(coeffs().coeff(0)))
             return VectorType(coeffs().coeff(1), -coeffs().coeff(2)/coeffs().coeff(1)-coeffs().coeff(0));
         else
             return VectorType(-coeffs().coeff(2)/coeffs().coeff(0)-coeffs().coeff(1), coeffs().coeff(0));
@@ -215,7 +214,7 @@ public:
     *               or a more generic #Affine transformation. The default is #Affine.
     */
   template<typename XprType>
-  inline Hyperplane& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)
+  EIGEN_DEVICE_FUNC inline Hyperplane& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)
   {
     if (traits==Affine)
       normal() = mat.inverse().transpose() * normal();
@@ -236,7 +235,7 @@ public:
     *               Other kind of transformations are not supported.
     */
   template<int TrOptions>
-  inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine,TrOptions>& t,
+  EIGEN_DEVICE_FUNC inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine,TrOptions>& t,
                                 TransformTraits traits = Affine)
   {
     transform(t.linear(), traits);
@@ -250,7 +249,7 @@ public:
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<Hyperplane,
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Hyperplane,
            Hyperplane<NewScalarType,AmbientDimAtCompileTime,Options> >::type cast() const
   {
     return typename internal::cast_return_type<Hyperplane,
@@ -259,7 +258,7 @@ public:
 
   /** Copy constructor with scalar type conversion */
   template<typename OtherScalarType,int OtherOptions>
-  inline explicit Hyperplane(const Hyperplane<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
+  EIGEN_DEVICE_FUNC inline explicit Hyperplane(const Hyperplane<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
   { m_coeffs = other.coeffs().template cast<Scalar>(); }
 
   /** \returns \c true if \c *this is approximately equal to \a other, within the precision
@@ -267,7 +266,7 @@ public:
     *
     * \sa MatrixBase::isApprox() */
   template<int OtherOptions>
-  bool isApprox(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
   { return m_coeffs.isApprox(other.m_coeffs, prec); }
 
 protected:
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index c3648f51f..a035e6310 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -27,7 +27,7 @@ namespace Eigen {
 template<typename Derived>
 template<typename OtherDerived>
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename MatrixBase<Derived>::template cross_product_return_type<OtherDerived>::type
+EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::template cross_product_return_type<OtherDerived>::type
 #else
 inline typename MatrixBase<Derived>::PlainObject
 #endif
@@ -53,7 +53,7 @@ template< int Arch,typename VectorLhs,typename VectorRhs,
           typename Scalar = typename VectorLhs::Scalar,
           bool Vectorizable = bool((VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit)>
 struct cross3_impl {
-  static inline typename internal::plain_matrix_type<VectorLhs>::type
+  EIGEN_DEVICE_FUNC static inline typename internal::plain_matrix_type<VectorLhs>::type
   run(const VectorLhs& lhs, const VectorRhs& rhs)
   {
     return typename internal::plain_matrix_type<VectorLhs>::type(
@@ -78,7 +78,7 @@ struct cross3_impl {
   */
 template<typename Derived>
 template<typename OtherDerived>
-inline typename MatrixBase<Derived>::PlainObject
+EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,4)
@@ -105,6 +105,7 @@ MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
   * \sa MatrixBase::cross() */
 template<typename ExpressionType, int Direction>
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC 
 const typename VectorwiseOp<ExpressionType,Direction>::CrossReturnType
 VectorwiseOp<ExpressionType,Direction>::cross(const MatrixBase<OtherDerived>& other) const
 {
@@ -221,7 +222,7 @@ struct unitOrthogonal_selector<Derived,2>
   * \sa cross()
   */
 template<typename Derived>
-typename MatrixBase<Derived>::PlainObject
+EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::unitOrthogonal() const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
diff --git a/Eigen/src/Geometry/ParametrizedLine.h b/Eigen/src/Geometry/ParametrizedLine.h
index c43dce773..1e985d8cd 100644
--- a/Eigen/src/Geometry/ParametrizedLine.h
+++ b/Eigen/src/Geometry/ParametrizedLine.h
@@ -41,45 +41,45 @@ public:
   typedef Matrix<Scalar,AmbientDimAtCompileTime,1,Options> VectorType;
 
   /** Default constructor without initialization */
-  inline ParametrizedLine() {}
+  EIGEN_DEVICE_FUNC inline ParametrizedLine() {}
   
   template<int OtherOptions>
-  ParametrizedLine(const ParametrizedLine<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
+  EIGEN_DEVICE_FUNC ParametrizedLine(const ParametrizedLine<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
    : m_origin(other.origin()), m_direction(other.direction())
   {}
 
   /** Constructs a dynamic-size line with \a _dim the dimension
     * of the ambient space */
-  inline explicit ParametrizedLine(Index _dim) : m_origin(_dim), m_direction(_dim) {}
+  EIGEN_DEVICE_FUNC inline explicit ParametrizedLine(Index _dim) : m_origin(_dim), m_direction(_dim) {}
 
   /** Initializes a parametrized line of direction \a direction and origin \a origin.
     * \warning the vector direction is assumed to be normalized.
     */
-  ParametrizedLine(const VectorType& origin, const VectorType& direction)
+  EIGEN_DEVICE_FUNC ParametrizedLine(const VectorType& origin, const VectorType& direction)
     : m_origin(origin), m_direction(direction) {}
 
   template <int OtherOptions>
-  explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane);
+  EIGEN_DEVICE_FUNC explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane);
 
   /** Constructs a parametrized line going from \a p0 to \a p1. */
-  static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1)
+  EIGEN_DEVICE_FUNC static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1)
   { return ParametrizedLine(p0, (p1-p0).normalized()); }
 
-  ~ParametrizedLine() {}
+  EIGEN_DEVICE_FUNC ~ParametrizedLine() {}
 
   /** \returns the dimension in which the line holds */
-  inline Index dim() const { return m_direction.size(); }
+  EIGEN_DEVICE_FUNC inline Index dim() const { return m_direction.size(); }
 
-  const VectorType& origin() const { return m_origin; }
-  VectorType& origin() { return m_origin; }
+  EIGEN_DEVICE_FUNC const VectorType& origin() const { return m_origin; }
+  EIGEN_DEVICE_FUNC VectorType& origin() { return m_origin; }
 
-  const VectorType& direction() const { return m_direction; }
-  VectorType& direction() { return m_direction; }
+  EIGEN_DEVICE_FUNC const VectorType& direction() const { return m_direction; }
+  EIGEN_DEVICE_FUNC VectorType& direction() { return m_direction; }
 
   /** \returns the squared distance of a point \a p to its projection onto the line \c *this.
     * \sa distance()
     */
-  RealScalar squaredDistance(const VectorType& p) const
+  EIGEN_DEVICE_FUNC RealScalar squaredDistance(const VectorType& p) const
   {
     VectorType diff = p - origin();
     return (diff - direction().dot(diff) * direction()).squaredNorm();
@@ -87,22 +87,22 @@ public:
   /** \returns the distance of a point \a p to its projection onto the line \c *this.
     * \sa squaredDistance()
     */
-  RealScalar distance(const VectorType& p) const { using std::sqrt; return sqrt(squaredDistance(p)); }
+  EIGEN_DEVICE_FUNC RealScalar distance(const VectorType& p) const { EIGEN_USING_STD_MATH(sqrt) return sqrt(squaredDistance(p)); }
 
   /** \returns the projection of a point \a p onto the line \c *this. */
-  VectorType projection(const VectorType& p) const
+  EIGEN_DEVICE_FUNC VectorType projection(const VectorType& p) const
   { return origin() + direction().dot(p-origin()) * direction(); }
 
-  VectorType pointAt(const Scalar& t) const;
+  EIGEN_DEVICE_FUNC VectorType pointAt(const Scalar& t) const;
   
   template <int OtherOptions>
-  Scalar intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
+  EIGEN_DEVICE_FUNC Scalar intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
  
   template <int OtherOptions>
-  Scalar intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
+  EIGEN_DEVICE_FUNC Scalar intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
   
   template <int OtherOptions>
-  VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
+  EIGEN_DEVICE_FUNC VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
 
   /** \returns \c *this with scalar type casted to \a NewScalarType
     *
@@ -110,7 +110,7 @@ public:
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<ParametrizedLine,
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<ParametrizedLine,
            ParametrizedLine<NewScalarType,AmbientDimAtCompileTime,Options> >::type cast() const
   {
     return typename internal::cast_return_type<ParametrizedLine,
@@ -119,7 +119,7 @@ public:
 
   /** Copy constructor with scalar type conversion */
   template<typename OtherScalarType,int OtherOptions>
-  inline explicit ParametrizedLine(const ParametrizedLine<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
+  EIGEN_DEVICE_FUNC inline explicit ParametrizedLine(const ParametrizedLine<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
   {
     m_origin = other.origin().template cast<Scalar>();
     m_direction = other.direction().template cast<Scalar>();
@@ -129,7 +129,7 @@ public:
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
-  bool isApprox(const ParametrizedLine& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const ParametrizedLine& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
   { return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); }
 
 protected:
@@ -143,7 +143,7 @@ protected:
   */
 template <typename _Scalar, int _AmbientDim, int _Options>
 template <int OtherOptions>
-inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim,OtherOptions>& hyperplane)
+EIGEN_DEVICE_FUNC inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim,OtherOptions>& hyperplane)
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)
   direction() = hyperplane.normal().unitOrthogonal();
@@ -153,7 +153,7 @@ inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const H
 /** \returns the point at \a t along this line
   */
 template <typename _Scalar, int _AmbientDim, int _Options>
-inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
+EIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
 ParametrizedLine<_Scalar, _AmbientDim,_Options>::pointAt(const _Scalar& t) const
 {
   return origin() + (direction()*t); 
@@ -163,7 +163,7 @@ ParametrizedLine<_Scalar, _AmbientDim,_Options>::pointAt(const _Scalar& t) const
   */
 template <typename _Scalar, int _AmbientDim, int _Options>
 template <int OtherOptions>
-inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
+EIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
 {
   return -(hyperplane.offset()+hyperplane.normal().dot(origin()))
           / hyperplane.normal().dot(direction());
@@ -175,7 +175,7 @@ inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionPara
   */
 template <typename _Scalar, int _AmbientDim, int _Options>
 template <int OtherOptions>
-inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
+EIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
 {
   return intersectionParameter(hyperplane);
 }
@@ -184,7 +184,7 @@ inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(con
   */
 template <typename _Scalar, int _AmbientDim, int _Options>
 template <int OtherOptions>
-inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
+EIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
 ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
 {
   return pointAt(intersectionParameter(hyperplane));
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index c4a0eabb5..f6ef1bcf6 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -58,37 +58,37 @@ class QuaternionBase : public RotationBase<Derived, 3>
 
 
   /** \returns the \c x coefficient */
-  inline Scalar x() const { return this->derived().coeffs().coeff(0); }
+  EIGEN_DEVICE_FUNC inline Scalar x() const { return this->derived().coeffs().coeff(0); }
   /** \returns the \c y coefficient */
-  inline Scalar y() const { return this->derived().coeffs().coeff(1); }
+  EIGEN_DEVICE_FUNC inline Scalar y() const { return this->derived().coeffs().coeff(1); }
   /** \returns the \c z coefficient */
-  inline Scalar z() const { return this->derived().coeffs().coeff(2); }
+  EIGEN_DEVICE_FUNC inline Scalar z() const { return this->derived().coeffs().coeff(2); }
   /** \returns the \c w coefficient */
-  inline Scalar w() const { return this->derived().coeffs().coeff(3); }
+  EIGEN_DEVICE_FUNC inline Scalar w() const { return this->derived().coeffs().coeff(3); }
 
   /** \returns a reference to the \c x coefficient */
-  inline Scalar& x() { return this->derived().coeffs().coeffRef(0); }
+  EIGEN_DEVICE_FUNC inline Scalar& x() { return this->derived().coeffs().coeffRef(0); }
   /** \returns a reference to the \c y coefficient */
-  inline Scalar& y() { return this->derived().coeffs().coeffRef(1); }
+  EIGEN_DEVICE_FUNC inline Scalar& y() { return this->derived().coeffs().coeffRef(1); }
   /** \returns a reference to the \c z coefficient */
-  inline Scalar& z() { return this->derived().coeffs().coeffRef(2); }
+  EIGEN_DEVICE_FUNC inline Scalar& z() { return this->derived().coeffs().coeffRef(2); }
   /** \returns a reference to the \c w coefficient */
-  inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
+  EIGEN_DEVICE_FUNC inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
 
   /** \returns a read-only vector expression of the imaginary part (x,y,z) */
-  inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
+  EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
 
   /** \returns a vector expression of the imaginary part (x,y,z) */
-  inline VectorBlock<Coefficients,3> vec() { return coeffs().template head<3>(); }
+  EIGEN_DEVICE_FUNC inline VectorBlock<Coefficients,3> vec() { return coeffs().template head<3>(); }
 
   /** \returns a read-only vector expression of the coefficients (x,y,z,w) */
-  inline const typename internal::traits<Derived>::Coefficients& coeffs() const { return derived().coeffs(); }
+  EIGEN_DEVICE_FUNC inline const typename internal::traits<Derived>::Coefficients& coeffs() const { return derived().coeffs(); }
 
   /** \returns a vector expression of the coefficients (x,y,z,w) */
-  inline typename internal::traits<Derived>::Coefficients& coeffs() { return derived().coeffs(); }
+  EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Coefficients& coeffs() { return derived().coeffs(); }
 
-  EIGEN_STRONG_INLINE QuaternionBase<Derived>& operator=(const QuaternionBase<Derived>& other);
-  template<class OtherDerived> EIGEN_STRONG_INLINE Derived& operator=(const QuaternionBase<OtherDerived>& other);
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE QuaternionBase<Derived>& operator=(const QuaternionBase<Derived>& other);
+  template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const QuaternionBase<OtherDerived>& other);
 
 // disabled this copy operator as it is giving very strange compilation errors when compiling
 // test_stdvector with GCC 4.4.2. This looks like a GCC bug though, so feel free to re-enable it if it's
@@ -97,72 +97,72 @@ class QuaternionBase : public RotationBase<Derived, 3>
 //  Derived& operator=(const QuaternionBase& other)
 //  { return operator=<Derived>(other); }
 
-  Derived& operator=(const AngleAxisType& aa);
-  template<class OtherDerived> Derived& operator=(const MatrixBase<OtherDerived>& m);
+  EIGEN_DEVICE_FUNC Derived& operator=(const AngleAxisType& aa);
+  template<class OtherDerived> EIGEN_DEVICE_FUNC Derived& operator=(const MatrixBase<OtherDerived>& m);
 
   /** \returns a quaternion representing an identity rotation
     * \sa MatrixBase::Identity()
     */
-  static inline Quaternion<Scalar> Identity() { return Quaternion<Scalar>(Scalar(1), Scalar(0), Scalar(0), Scalar(0)); }
+  EIGEN_DEVICE_FUNC static inline Quaternion<Scalar> Identity() { return Quaternion<Scalar>(Scalar(1), Scalar(0), Scalar(0), Scalar(0)); }
 
   /** \sa QuaternionBase::Identity(), MatrixBase::setIdentity()
     */
-  inline QuaternionBase& setIdentity() { coeffs() << Scalar(0), Scalar(0), Scalar(0), Scalar(1); return *this; }
+  EIGEN_DEVICE_FUNC inline QuaternionBase& setIdentity() { coeffs() << Scalar(0), Scalar(0), Scalar(0), Scalar(1); return *this; }
 
   /** \returns the squared norm of the quaternion's coefficients
     * \sa QuaternionBase::norm(), MatrixBase::squaredNorm()
     */
-  inline Scalar squaredNorm() const { return coeffs().squaredNorm(); }
+  EIGEN_DEVICE_FUNC inline Scalar squaredNorm() const { return coeffs().squaredNorm(); }
 
   /** \returns the norm of the quaternion's coefficients
     * \sa QuaternionBase::squaredNorm(), MatrixBase::norm()
     */
-  inline Scalar norm() const { return coeffs().norm(); }
+  EIGEN_DEVICE_FUNC inline Scalar norm() const { return coeffs().norm(); }
 
   /** Normalizes the quaternion \c *this
     * \sa normalized(), MatrixBase::normalize() */
-  inline void normalize() { coeffs().normalize(); }
+  EIGEN_DEVICE_FUNC inline void normalize() { coeffs().normalize(); }
   /** \returns a normalized copy of \c *this
     * \sa normalize(), MatrixBase::normalized() */
-  inline Quaternion<Scalar> normalized() const { return Quaternion<Scalar>(coeffs().normalized()); }
+  EIGEN_DEVICE_FUNC inline Quaternion<Scalar> normalized() const { return Quaternion<Scalar>(coeffs().normalized()); }
 
     /** \returns the dot product of \c *this and \a other
     * Geometrically speaking, the dot product of two unit quaternions
     * corresponds to the cosine of half the angle between the two rotations.
     * \sa angularDistance()
     */
-  template<class OtherDerived> inline Scalar dot(const QuaternionBase<OtherDerived>& other) const { return coeffs().dot(other.coeffs()); }
+  template<class OtherDerived> EIGEN_DEVICE_FUNC inline Scalar dot(const QuaternionBase<OtherDerived>& other) const { return coeffs().dot(other.coeffs()); }
 
-  template<class OtherDerived> Scalar angularDistance(const QuaternionBase<OtherDerived>& other) const;
+  template<class OtherDerived> EIGEN_DEVICE_FUNC Scalar angularDistance(const QuaternionBase<OtherDerived>& other) const;
 
   /** \returns an equivalent 3x3 rotation matrix */
-  Matrix3 toRotationMatrix() const;
+  EIGEN_DEVICE_FUNC Matrix3 toRotationMatrix() const;
 
   /** \returns the quaternion which transform \a a into \a b through a rotation */
   template<typename Derived1, typename Derived2>
-  Derived& setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
+  EIGEN_DEVICE_FUNC Derived& setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
 
-  template<class OtherDerived> EIGEN_STRONG_INLINE Quaternion<Scalar> operator* (const QuaternionBase<OtherDerived>& q) const;
-  template<class OtherDerived> EIGEN_STRONG_INLINE Derived& operator*= (const QuaternionBase<OtherDerived>& q);
+  template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion<Scalar> operator* (const QuaternionBase<OtherDerived>& q) const;
+  template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*= (const QuaternionBase<OtherDerived>& q);
 
   /** \returns the quaternion describing the inverse rotation */
-  Quaternion<Scalar> inverse() const;
+  EIGEN_DEVICE_FUNC Quaternion<Scalar> inverse() const;
 
   /** \returns the conjugated quaternion */
-  Quaternion<Scalar> conjugate() const;
+  EIGEN_DEVICE_FUNC Quaternion<Scalar> conjugate() const;
 
-  template<class OtherDerived> Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;
+  template<class OtherDerived> EIGEN_DEVICE_FUNC Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;
 
   /** \returns \c true if \c *this is approximately equal to \a other, within the precision
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
   template<class OtherDerived>
-  bool isApprox(const QuaternionBase<OtherDerived>& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const QuaternionBase<OtherDerived>& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const
   { return coeffs().isApprox(other.coeffs(), prec); }
 
   /** return the result vector of \a v through the rotation*/
-  EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;
 
   /** \returns \c *this with scalar type casted to \a NewScalarType
     *
@@ -170,7 +170,7 @@ class QuaternionBase : public RotationBase<Derived, 3>
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const
   {
     return typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type(derived());
   }
@@ -239,7 +239,7 @@ public:
   typedef typename Base::AngleAxisType AngleAxisType;
 
   /** Default constructor leaving the quaternion uninitialized. */
-  inline Quaternion() {}
+  EIGEN_DEVICE_FUNC inline Quaternion() {}
 
   /** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from
     * its four coefficients \a w, \a x, \a y and \a z.
@@ -248,36 +248,36 @@ public:
     * while internally the coefficients are stored in the following order:
     * [\c x, \c y, \c z, \c w]
     */
-  inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){}
+  EIGEN_DEVICE_FUNC inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){}
 
   /** Constructs and initialize a quaternion from the array data */
-  explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {}
+  EIGEN_DEVICE_FUNC explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {}
 
   /** Copy constructor */
-  template<class Derived> EIGEN_STRONG_INLINE Quaternion(const QuaternionBase<Derived>& other) { this->Base::operator=(other); }
+  template<class Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion(const QuaternionBase<Derived>& other) { this->Base::operator=(other); }
 
   /** Constructs and initializes a quaternion from the angle-axis \a aa */
-  explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; }
+  EIGEN_DEVICE_FUNC explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; }
 
   /** Constructs and initializes a quaternion from either:
     *  - a rotation matrix expression,
     *  - a 4D vector expression representing quaternion coefficients.
     */
   template<typename Derived>
-  explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }
+  EIGEN_DEVICE_FUNC explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }
 
   /** Explicit copy constructor with scalar conversion */
   template<typename OtherScalar, int OtherOptions>
-  explicit inline Quaternion(const Quaternion<OtherScalar, OtherOptions>& other)
+  EIGEN_DEVICE_FUNC explicit inline Quaternion(const Quaternion<OtherScalar, OtherOptions>& other)
   { m_coeffs = other.coeffs().template cast<Scalar>(); }
 
-  static Quaternion UnitRandom();
+  EIGEN_DEVICE_FUNC static Quaternion UnitRandom();
 
   template<typename Derived1, typename Derived2>
-  static Quaternion FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
+  EIGEN_DEVICE_FUNC static Quaternion FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
 
-  inline Coefficients& coeffs() { return m_coeffs;}
-  inline const Coefficients& coeffs() const { return m_coeffs;}
+  EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs;}
+  EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}
 
   EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(NeedsAlignment))
   
@@ -357,9 +357,9 @@ class Map<const Quaternion<_Scalar>, _Options >
       * \code *coeffs == {x, y, z, w} \endcode
       *
       * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
-    explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {}
+    EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {}
 
-    inline const Coefficients& coeffs() const { return m_coeffs;}
+    EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}
 
   protected:
     const Coefficients m_coeffs;
@@ -394,10 +394,10 @@ class Map<Quaternion<_Scalar>, _Options >
       * \code *coeffs == {x, y, z, w} \endcode
       *
       * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
-    explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}
+    EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}
 
-    inline Coefficients& coeffs() { return m_coeffs; }
-    inline const Coefficients& coeffs() const { return m_coeffs; }
+    EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }
+    EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }
 
   protected:
     Coefficients m_coeffs;
@@ -425,7 +425,7 @@ typedef Map<Quaternion<double>, Aligned>  QuaternionMapAlignedd;
 namespace internal {
 template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options> struct quat_product
 {
-  static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
     return Quaternion<Scalar>
     (
       a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(),
@@ -440,7 +440,7 @@ template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options
 /** \returns the concatenation of two rotations as a quaternion-quaternion product */
 template <class Derived>
 template <class OtherDerived>
-EIGEN_STRONG_INLINE Quaternion<typename internal::traits<Derived>::Scalar>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion<typename internal::traits<Derived>::Scalar>
 QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) const
 {
   EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value),
@@ -453,7 +453,7 @@ QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) c
 /** \sa operator*(Quaternion) */
 template <class Derived>
 template <class OtherDerived>
-EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const QuaternionBase<OtherDerived>& other)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const QuaternionBase<OtherDerived>& other)
 {
   derived() = derived() * other.derived();
   return derived();
@@ -467,7 +467,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const Quaterni
   *   - Via a Matrix3: 24 + 15n
   */
 template <class Derived>
-EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3
 QuaternionBase<Derived>::_transformVector(const Vector3& v) const
 {
     // Note that this algorithm comes from the optimization by hand
@@ -481,7 +481,7 @@ QuaternionBase<Derived>::_transformVector(const Vector3& v) const
 }
 
 template<class Derived>
-EIGEN_STRONG_INLINE QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const QuaternionBase<Derived>& other)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const QuaternionBase<Derived>& other)
 {
   coeffs() = other.coeffs();
   return derived();
@@ -489,7 +489,7 @@ EIGEN_STRONG_INLINE QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(
 
 template<class Derived>
 template<class OtherDerived>
-EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const QuaternionBase<OtherDerived>& other)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const QuaternionBase<OtherDerived>& other)
 {
   coeffs() = other.coeffs();
   return derived();
@@ -498,10 +498,10 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const Quaternion
 /** Set \c *this from an angle-axis \a aa and returns a reference to \c *this
   */
 template<class Derived>
-EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const AngleAxisType& aa)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const AngleAxisType& aa)
 {
-  using std::cos;
-  using std::sin;
+  EIGEN_USING_STD_MATH(cos)
+  EIGEN_USING_STD_MATH(sin)
   Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings
   this->w() = cos(ha);
   this->vec() = sin(ha) * aa.axis();
@@ -516,7 +516,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const AngleAxisT
 
 template<class Derived>
 template<class MatrixDerived>
-inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerived>& xpr)
+EIGEN_DEVICE_FUNC inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerived>& xpr)
 {
   EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename MatrixDerived::Scalar>::value),
    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
@@ -528,7 +528,7 @@ inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerive
   * be normalized, otherwise the result is undefined.
   */
 template<class Derived>
-inline typename QuaternionBase<Derived>::Matrix3
+EIGEN_DEVICE_FUNC inline typename QuaternionBase<Derived>::Matrix3
 QuaternionBase<Derived>::toRotationMatrix(void) const
 {
   // NOTE if inlined, then gcc 4.2 and 4.4 get rid of the temporary (not gcc 4.3 !!)
@@ -575,9 +575,9 @@ QuaternionBase<Derived>::toRotationMatrix(void) const
   */
 template<class Derived>
 template<typename Derived1, typename Derived2>
-inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
+EIGEN_DEVICE_FUNC inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
 {
-  using std::sqrt;
+  EIGEN_USING_STD_MATH(sqrt)
   Vector3 v0 = a.normalized();
   Vector3 v1 = b.normalized();
   Scalar c = v1.dot(v0);
@@ -616,11 +616,11 @@ inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Deri
   * \note The implementation is based on http://planning.cs.uiuc.edu/node198.html
   */
 template<typename Scalar, int Options>
-Quaternion<Scalar,Options> Quaternion<Scalar,Options>::UnitRandom()
+EIGEN_DEVICE_FUNC Quaternion<Scalar,Options> Quaternion<Scalar,Options>::UnitRandom()
 {
-  using std::sqrt;
-  using std::sin;
-  using std::cos;
+  EIGEN_USING_STD_MATH(sqrt)
+  EIGEN_USING_STD_MATH(sin)
+  EIGEN_USING_STD_MATH(cos)
   const Scalar u1 = internal::random<Scalar>(0, 1),
                u2 = internal::random<Scalar>(0, 2*EIGEN_PI),
                u3 = internal::random<Scalar>(0, 2*EIGEN_PI);
@@ -642,7 +642,7 @@ Quaternion<Scalar,Options> Quaternion<Scalar,Options>::UnitRandom()
   */
 template<typename Scalar, int Options>
 template<typename Derived1, typename Derived2>
-Quaternion<Scalar,Options> Quaternion<Scalar,Options>::FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
+EIGEN_DEVICE_FUNC Quaternion<Scalar,Options> Quaternion<Scalar,Options>::FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
 {
     Quaternion quat;
     quat.setFromTwoVectors(a, b);
@@ -657,7 +657,7 @@ Quaternion<Scalar,Options> Quaternion<Scalar,Options>::FromTwoVectors(const Matr
   * \sa QuaternionBase::conjugate()
   */
 template <class Derived>
-inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::inverse() const
+EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::inverse() const
 {
   // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite()  ??
   Scalar n2 = this->squaredNorm();
@@ -674,7 +674,7 @@ inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Der
 namespace internal {
 template<int Arch, class Derived, typename Scalar, int _Options> struct quat_conj
 {
-  static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
     return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z());
   }
 };
@@ -687,7 +687,7 @@ template<int Arch, class Derived, typename Scalar, int _Options> struct quat_con
   * \sa Quaternion2::inverse()
   */
 template <class Derived>
-inline Quaternion<typename internal::traits<Derived>::Scalar>
+EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>
 QuaternionBase<Derived>::conjugate() const
 {
   return internal::quat_conj<Architecture::Target, Derived,
@@ -701,13 +701,12 @@ QuaternionBase<Derived>::conjugate() const
   */
 template <class Derived>
 template <class OtherDerived>
-inline typename internal::traits<Derived>::Scalar
+EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Scalar
 QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& other) const
 {
-  using std::atan2;
-  using std::abs;
+  EIGEN_USING_STD_MATH(atan2)
   Quaternion<Scalar> d = (*this) * other.conjugate();
-  return Scalar(2) * atan2( d.vec().norm(), abs(d.w()) );
+  return Scalar(2) * atan2( d.vec().norm(), numext::abs(d.w()) );
 }
 
  
@@ -720,15 +719,14 @@ QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& oth
   */
 template <class Derived>
 template <class OtherDerived>
-Quaternion<typename internal::traits<Derived>::Scalar>
+EIGEN_DEVICE_FUNC Quaternion<typename internal::traits<Derived>::Scalar>
 QuaternionBase<Derived>::slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const
 {
-  using std::acos;
-  using std::sin;
-  using std::abs;
+  EIGEN_USING_STD_MATH(acos)
+  EIGEN_USING_STD_MATH(sin)
   const Scalar one = Scalar(1) - NumTraits<Scalar>::epsilon();
   Scalar d = this->dot(other);
-  Scalar absD = abs(d);
+  Scalar absD = numext::abs(d);
 
   Scalar scale0;
   Scalar scale1;
@@ -759,10 +757,10 @@ template<typename Other>
 struct quaternionbase_assign_impl<Other,3,3>
 {
   typedef typename Other::Scalar Scalar;
-  template<class Derived> static inline void run(QuaternionBase<Derived>& q, const Other& a_mat)
+  template<class Derived> EIGEN_DEVICE_FUNC static inline void run(QuaternionBase<Derived>& q, const Other& a_mat)
   {
     const typename internal::nested_eval<Other,2>::type mat(a_mat);
-    using std::sqrt;
+    EIGEN_USING_STD_MATH(sqrt)
     // This algorithm comes from  "Quaternion Calculus and Fast Animation",
     // Ken Shoemake, 1987 SIGGRAPH course notes
     Scalar t = mat.trace();
@@ -800,7 +798,7 @@ template<typename Other>
 struct quaternionbase_assign_impl<Other,4,1>
 {
   typedef typename Other::Scalar Scalar;
-  template<class Derived> static inline void run(QuaternionBase<Derived>& q, const Other& vec)
+  template<class Derived> EIGEN_DEVICE_FUNC static inline void run(QuaternionBase<Derived>& q, const Other& vec)
   {
     q.coeffs() = vec;
   }
diff --git a/Eigen/src/Geometry/Rotation2D.h b/Eigen/src/Geometry/Rotation2D.h
index b42a7df70..884b7d0ee 100644
--- a/Eigen/src/Geometry/Rotation2D.h
+++ b/Eigen/src/Geometry/Rotation2D.h
@@ -59,35 +59,35 @@ protected:
 public:
 
   /** Construct a 2D counter clock wise rotation from the angle \a a in radian. */
-  explicit inline Rotation2D(const Scalar& a) : m_angle(a) {}
+  EIGEN_DEVICE_FUNC explicit inline Rotation2D(const Scalar& a) : m_angle(a) {}
   
   /** Default constructor wihtout initialization. The represented rotation is undefined. */
-  Rotation2D() {}
+  EIGEN_DEVICE_FUNC Rotation2D() {}
 
   /** Construct a 2D rotation from a 2x2 rotation matrix \a mat.
     *
     * \sa fromRotationMatrix()
     */
   template<typename Derived>
-  explicit Rotation2D(const MatrixBase<Derived>& m)
+  EIGEN_DEVICE_FUNC explicit Rotation2D(const MatrixBase<Derived>& m)
   {
     fromRotationMatrix(m.derived());
   }
 
   /** \returns the rotation angle */
-  inline Scalar angle() const { return m_angle; }
+  EIGEN_DEVICE_FUNC inline Scalar angle() const { return m_angle; }
 
   /** \returns a read-write reference to the rotation angle */
-  inline Scalar& angle() { return m_angle; }
+  EIGEN_DEVICE_FUNC inline Scalar& angle() { return m_angle; }
   
   /** \returns the rotation angle in [0,2pi] */
-  inline Scalar smallestPositiveAngle() const {
+  EIGEN_DEVICE_FUNC inline Scalar smallestPositiveAngle() const {
     Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI));
     return tmp<Scalar(0) ? tmp + Scalar(2*EIGEN_PI) : tmp;
   }
   
   /** \returns the rotation angle in [-pi,pi] */
-  inline Scalar smallestAngle() const {
+  EIGEN_DEVICE_FUNC inline Scalar smallestAngle() const {
     Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI));
     if(tmp>Scalar(EIGEN_PI))       tmp -= Scalar(2*EIGEN_PI);
     else if(tmp<-Scalar(EIGEN_PI)) tmp += Scalar(2*EIGEN_PI);
@@ -95,23 +95,23 @@ public:
   }
 
   /** \returns the inverse rotation */
-  inline Rotation2D inverse() const { return Rotation2D(-m_angle); }
+  EIGEN_DEVICE_FUNC inline Rotation2D inverse() const { return Rotation2D(-m_angle); }
 
   /** Concatenates two rotations */
-  inline Rotation2D operator*(const Rotation2D& other) const
+  EIGEN_DEVICE_FUNC inline Rotation2D operator*(const Rotation2D& other) const
   { return Rotation2D(m_angle + other.m_angle); }
 
   /** Concatenates two rotations */
-  inline Rotation2D& operator*=(const Rotation2D& other)
+  EIGEN_DEVICE_FUNC inline Rotation2D& operator*=(const Rotation2D& other)
   { m_angle += other.m_angle; return *this; }
 
   /** Applies the rotation to a 2D vector */
-  Vector2 operator* (const Vector2& vec) const
+  EIGEN_DEVICE_FUNC Vector2 operator* (const Vector2& vec) const
   { return toRotationMatrix() * vec; }
   
   template<typename Derived>
-  Rotation2D& fromRotationMatrix(const MatrixBase<Derived>& m);
-  Matrix2 toRotationMatrix() const;
+  EIGEN_DEVICE_FUNC Rotation2D& fromRotationMatrix(const MatrixBase<Derived>& m);
+  EIGEN_DEVICE_FUNC Matrix2 toRotationMatrix() const;
 
   /** Set \c *this from a 2x2 rotation matrix \a mat.
     * In other words, this function extract the rotation angle from the rotation matrix.
@@ -121,13 +121,13 @@ public:
     * \sa fromRotationMatrix()
     */
   template<typename Derived>
-  Rotation2D& operator=(const  MatrixBase<Derived>& m)
+  EIGEN_DEVICE_FUNC Rotation2D& operator=(const  MatrixBase<Derived>& m)
   { return fromRotationMatrix(m.derived()); }
 
   /** \returns the spherical interpolation between \c *this and \a other using
     * parameter \a t. It is in fact equivalent to a linear interpolation.
     */
-  inline Rotation2D slerp(const Scalar& t, const Rotation2D& other) const
+  EIGEN_DEVICE_FUNC inline Rotation2D slerp(const Scalar& t, const Rotation2D& other) const
   {
     Scalar dist = Rotation2D(other.m_angle-m_angle).smallestAngle();
     return Rotation2D(m_angle + dist*t);
@@ -139,23 +139,23 @@ public:
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type cast() const
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type cast() const
   { return typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type(*this); }
 
   /** Copy constructor with scalar type conversion */
   template<typename OtherScalarType>
-  inline explicit Rotation2D(const Rotation2D<OtherScalarType>& other)
+  EIGEN_DEVICE_FUNC inline explicit Rotation2D(const Rotation2D<OtherScalarType>& other)
   {
     m_angle = Scalar(other.angle());
   }
 
-  static inline Rotation2D Identity() { return Rotation2D(0); }
+  EIGEN_DEVICE_FUNC static inline Rotation2D Identity() { return Rotation2D(0); }
 
   /** \returns \c true if \c *this is approximately equal to \a other, within the precision
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
-  bool isApprox(const Rotation2D& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const Rotation2D& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
   { return internal::isApprox(m_angle,other.m_angle, prec); }
   
 };
@@ -173,9 +173,9 @@ typedef Rotation2D<double> Rotation2Dd;
   */
 template<typename Scalar>
 template<typename Derived>
-Rotation2D<Scalar>& Rotation2D<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
+EIGEN_DEVICE_FUNC Rotation2D<Scalar>& Rotation2D<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
 {
-  using std::atan2;
+  EIGEN_USING_STD_MATH(atan2)
   EIGEN_STATIC_ASSERT(Derived::RowsAtCompileTime==2 && Derived::ColsAtCompileTime==2,YOU_MADE_A_PROGRAMMING_MISTAKE)
   m_angle = atan2(mat.coeff(1,0), mat.coeff(0,0));
   return *this;
@@ -185,10 +185,10 @@ Rotation2D<Scalar>& Rotation2D<Scalar>::fromRotationMatrix(const MatrixBase<Deri
   */
 template<typename Scalar>
 typename Rotation2D<Scalar>::Matrix2
-Rotation2D<Scalar>::toRotationMatrix(void) const
+EIGEN_DEVICE_FUNC Rotation2D<Scalar>::toRotationMatrix(void) const
 {
-  using std::sin;
-  using std::cos;
+  EIGEN_USING_STD_MATH(sin)
+  EIGEN_USING_STD_MATH(cos)
   Scalar sinA = sin(m_angle);
   Scalar cosA = cos(m_angle);
   return (Matrix2() << cosA, -sinA, sinA, cosA).finished();
diff --git a/Eigen/src/Geometry/RotationBase.h b/Eigen/src/Geometry/RotationBase.h
index fadfd9151..f0ee0bd03 100644
--- a/Eigen/src/Geometry/RotationBase.h
+++ b/Eigen/src/Geometry/RotationBase.h
@@ -38,26 +38,26 @@ class RotationBase
     typedef Matrix<Scalar,Dim,1> VectorType;
 
   public:
-    inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
-    inline Derived& derived() { return *static_cast<Derived*>(this); }
+    EIGEN_DEVICE_FUNC inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
+    EIGEN_DEVICE_FUNC inline Derived& derived() { return *static_cast<Derived*>(this); }
 
     /** \returns an equivalent rotation matrix */
-    inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); }
+    EIGEN_DEVICE_FUNC inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); }
 
     /** \returns an equivalent rotation matrix 
       * This function is added to be conform with the Transform class' naming scheme.
       */
-    inline RotationMatrixType matrix() const { return derived().toRotationMatrix(); }
+    EIGEN_DEVICE_FUNC inline RotationMatrixType matrix() const { return derived().toRotationMatrix(); }
 
     /** \returns the inverse rotation */
-    inline Derived inverse() const { return derived().inverse(); }
+    EIGEN_DEVICE_FUNC inline Derived inverse() const { return derived().inverse(); }
 
     /** \returns the concatenation of the rotation \c *this with a translation \a t */
-    inline Transform<Scalar,Dim,Isometry> operator*(const Translation<Scalar,Dim>& t) const
+    EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Isometry> operator*(const Translation<Scalar,Dim>& t) const
     { return Transform<Scalar,Dim,Isometry>(*this) * t; }
 
     /** \returns the concatenation of the rotation \c *this with a uniform scaling \a s */
-    inline RotationMatrixType operator*(const UniformScaling<Scalar>& s) const
+    EIGEN_DEVICE_FUNC inline RotationMatrixType operator*(const UniformScaling<Scalar>& s) const
     { return toRotationMatrix() * s.factor(); }
 
     /** \returns the concatenation of the rotation \c *this with a generic expression \a e
@@ -67,17 +67,17 @@ class RotationBase
       *  - a vector of size Dim
       */
     template<typename OtherDerived>
-    EIGEN_STRONG_INLINE typename internal::rotation_base_generic_product_selector<Derived,OtherDerived,OtherDerived::IsVectorAtCompileTime>::ReturnType
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::rotation_base_generic_product_selector<Derived,OtherDerived,OtherDerived::IsVectorAtCompileTime>::ReturnType
     operator*(const EigenBase<OtherDerived>& e) const
     { return internal::rotation_base_generic_product_selector<Derived,OtherDerived>::run(derived(), e.derived()); }
 
     /** \returns the concatenation of a linear transformation \a l with the rotation \a r */
     template<typename OtherDerived> friend
-    inline RotationMatrixType operator*(const EigenBase<OtherDerived>& l, const Derived& r)
+    EIGEN_DEVICE_FUNC inline RotationMatrixType operator*(const EigenBase<OtherDerived>& l, const Derived& r)
     { return l.derived() * r.toRotationMatrix(); }
 
     /** \returns the concatenation of a scaling \a l with the rotation \a r */
-    friend inline Transform<Scalar,Dim,Affine> operator*(const DiagonalMatrix<Scalar,Dim>& l, const Derived& r)
+    EIGEN_DEVICE_FUNC friend inline Transform<Scalar,Dim,Affine> operator*(const DiagonalMatrix<Scalar,Dim>& l, const Derived& r)
     { 
       Transform<Scalar,Dim,Affine> res(r);
       res.linear().applyOnTheLeft(l);
@@ -86,11 +86,11 @@ class RotationBase
 
     /** \returns the concatenation of the rotation \c *this with a transformation \a t */
     template<int Mode, int Options>
-    inline Transform<Scalar,Dim,Mode> operator*(const Transform<Scalar,Dim,Mode,Options>& t) const
+    EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode> operator*(const Transform<Scalar,Dim,Mode,Options>& t) const
     { return toRotationMatrix() * t; }
 
     template<typename OtherVectorType>
-    inline VectorType _transformVector(const OtherVectorType& v) const
+    EIGEN_DEVICE_FUNC inline VectorType _transformVector(const OtherVectorType& v) const
     { return toRotationMatrix() * v; }
 };
 
@@ -102,7 +102,7 @@ struct rotation_base_generic_product_selector<RotationDerived,MatrixType,false>
 {
   enum { Dim = RotationDerived::Dim };
   typedef Matrix<typename RotationDerived::Scalar,Dim,Dim> ReturnType;
-  static inline ReturnType run(const RotationDerived& r, const MatrixType& m)
+  EIGEN_DEVICE_FUNC static inline ReturnType run(const RotationDerived& r, const MatrixType& m)
   { return r.toRotationMatrix() * m; }
 };
 
@@ -110,7 +110,7 @@ template<typename RotationDerived, typename Scalar, int Dim, int MaxDim>
 struct rotation_base_generic_product_selector< RotationDerived, DiagonalMatrix<Scalar,Dim,MaxDim>, false >
 {
   typedef Transform<Scalar,Dim,Affine> ReturnType;
-  static inline ReturnType run(const RotationDerived& r, const DiagonalMatrix<Scalar,Dim,MaxDim>& m)
+  EIGEN_DEVICE_FUNC static inline ReturnType run(const RotationDerived& r, const DiagonalMatrix<Scalar,Dim,MaxDim>& m)
   {
     ReturnType res(r);
     res.linear() *= m;
@@ -123,7 +123,7 @@ struct rotation_base_generic_product_selector<RotationDerived,OtherVectorType,tr
 {
   enum { Dim = RotationDerived::Dim };
   typedef Matrix<typename RotationDerived::Scalar,Dim,1> ReturnType;
-  static EIGEN_STRONG_INLINE ReturnType run(const RotationDerived& r, const OtherVectorType& v)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE ReturnType run(const RotationDerived& r, const OtherVectorType& v)
   {
     return r._transformVector(v);
   }
@@ -137,7 +137,7 @@ struct rotation_base_generic_product_selector<RotationDerived,OtherVectorType,tr
   */
 template<typename _Scalar, int _Rows, int _Cols, int _Storage, int _MaxRows, int _MaxCols>
 template<typename OtherDerived>
-Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
+EIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
 ::Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
 {
   EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim))
@@ -150,7 +150,7 @@ Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
   */
 template<typename _Scalar, int _Rows, int _Cols, int _Storage, int _MaxRows, int _MaxCols>
 template<typename OtherDerived>
-Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>&
+EIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>&
 Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
 ::operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
 {
@@ -179,20 +179,20 @@ namespace internal {
   * \sa class Transform, class Rotation2D, class Quaternion, class AngleAxis
   */
 template<typename Scalar, int Dim>
-static inline Matrix<Scalar,2,2> toRotationMatrix(const Scalar& s)
+EIGEN_DEVICE_FUNC static inline Matrix<Scalar,2,2> toRotationMatrix(const Scalar& s)
 {
   EIGEN_STATIC_ASSERT(Dim==2,YOU_MADE_A_PROGRAMMING_MISTAKE)
   return Rotation2D<Scalar>(s).toRotationMatrix();
 }
 
 template<typename Scalar, int Dim, typename OtherDerived>
-static inline Matrix<Scalar,Dim,Dim> toRotationMatrix(const RotationBase<OtherDerived,Dim>& r)
+EIGEN_DEVICE_FUNC static inline Matrix<Scalar,Dim,Dim> toRotationMatrix(const RotationBase<OtherDerived,Dim>& r)
 {
   return r.toRotationMatrix();
 }
 
 template<typename Scalar, int Dim, typename OtherDerived>
-static inline const MatrixBase<OtherDerived>& toRotationMatrix(const MatrixBase<OtherDerived>& mat)
+EIGEN_DEVICE_FUNC static inline const MatrixBase<OtherDerived>& toRotationMatrix(const MatrixBase<OtherDerived>& mat)
 {
   EIGEN_STATIC_ASSERT(OtherDerived::RowsAtCompileTime==Dim && OtherDerived::ColsAtCompileTime==Dim,
     YOU_MADE_A_PROGRAMMING_MISTAKE)
diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h
index 8f6c62d63..3f31ee45d 100644
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -253,43 +253,43 @@ public:
 
   /** Default constructor without initialization of the meaningful coefficients.
     * If Mode==Affine, then the last row is set to [0 ... 0 1] */
-  inline Transform()
+  EIGEN_DEVICE_FUNC inline Transform()
   {
     check_template_params();
     internal::transform_make_affine<(int(Mode)==Affine) ? Affine : AffineCompact>::run(m_matrix);
   }
 
-  inline Transform(const Transform& other)
+  EIGEN_DEVICE_FUNC inline Transform(const Transform& other)
   {
     check_template_params();
     m_matrix = other.m_matrix;
   }
 
-  inline explicit Transform(const TranslationType& t)
+  EIGEN_DEVICE_FUNC inline explicit Transform(const TranslationType& t)
   {
     check_template_params();
     *this = t;
   }
-  inline explicit Transform(const UniformScaling<Scalar>& s)
+  EIGEN_DEVICE_FUNC inline explicit Transform(const UniformScaling<Scalar>& s)
   {
     check_template_params();
     *this = s;
   }
   template<typename Derived>
-  inline explicit Transform(const RotationBase<Derived, Dim>& r)
+  EIGEN_DEVICE_FUNC inline explicit Transform(const RotationBase<Derived, Dim>& r)
   {
     check_template_params();
     *this = r;
   }
 
-  inline Transform& operator=(const Transform& other)
+  EIGEN_DEVICE_FUNC inline Transform& operator=(const Transform& other)
   { m_matrix = other.m_matrix; return *this; }
 
   typedef internal::transform_take_affine_part<Transform> take_affine_part;
 
   /** Constructs and initializes a transformation from a Dim^2 or a (Dim+1)^2 matrix. */
   template<typename OtherDerived>
-  inline explicit Transform(const EigenBase<OtherDerived>& other)
+  EIGEN_DEVICE_FUNC inline explicit Transform(const EigenBase<OtherDerived>& other)
   {
     EIGEN_STATIC_ASSERT((internal::is_same<Scalar,typename OtherDerived::Scalar>::value),
       YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY);
@@ -300,7 +300,7 @@ public:
 
   /** Set \c *this from a Dim^2 or (Dim+1)^2 matrix. */
   template<typename OtherDerived>
-  inline Transform& operator=(const EigenBase<OtherDerived>& other)
+  EIGEN_DEVICE_FUNC inline Transform& operator=(const EigenBase<OtherDerived>& other)
   {
     EIGEN_STATIC_ASSERT((internal::is_same<Scalar,typename OtherDerived::Scalar>::value),
       YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY);
@@ -310,7 +310,7 @@ public:
   }
   
   template<int OtherOptions>
-  inline Transform(const Transform<Scalar,Dim,Mode,OtherOptions>& other)
+  EIGEN_DEVICE_FUNC inline Transform(const Transform<Scalar,Dim,Mode,OtherOptions>& other)
   {
     check_template_params();
     // only the options change, we can directly copy the matrices
@@ -318,7 +318,7 @@ public:
   }
 
   template<int OtherMode,int OtherOptions>
-  inline Transform(const Transform<Scalar,Dim,OtherMode,OtherOptions>& other)
+  EIGEN_DEVICE_FUNC inline Transform(const Transform<Scalar,Dim,OtherMode,OtherOptions>& other)
   {
     check_template_params();
     // prevent conversions as:
@@ -359,14 +359,14 @@ public:
   }
 
   template<typename OtherDerived>
-  Transform(const ReturnByValue<OtherDerived>& other)
+  EIGEN_DEVICE_FUNC Transform(const ReturnByValue<OtherDerived>& other)
   {
     check_template_params();
     other.evalTo(*this);
   }
 
   template<typename OtherDerived>
-  Transform& operator=(const ReturnByValue<OtherDerived>& other)
+  EIGEN_DEVICE_FUNC Transform& operator=(const ReturnByValue<OtherDerived>& other)
   {
     other.evalTo(*this);
     return *this;
@@ -381,35 +381,35 @@ public:
   inline QTransform toQTransform(void) const;
   #endif
   
-  Index rows() const { return int(Mode)==int(Projective) ? m_matrix.cols() : (m_matrix.cols()-1); }
-  Index cols() const { return m_matrix.cols(); }
+  EIGEN_DEVICE_FUNC Index rows() const { return int(Mode)==int(Projective) ? m_matrix.cols() : (m_matrix.cols()-1); }
+  EIGEN_DEVICE_FUNC Index cols() const { return m_matrix.cols(); }
 
   /** shortcut for m_matrix(row,col);
     * \sa MatrixBase::operator(Index,Index) const */
-  inline Scalar operator() (Index row, Index col) const { return m_matrix(row,col); }
+  EIGEN_DEVICE_FUNC inline Scalar operator() (Index row, Index col) const { return m_matrix(row,col); }
   /** shortcut for m_matrix(row,col);
     * \sa MatrixBase::operator(Index,Index) */
-  inline Scalar& operator() (Index row, Index col) { return m_matrix(row,col); }
+  EIGEN_DEVICE_FUNC inline Scalar& operator() (Index row, Index col) { return m_matrix(row,col); }
 
   /** \returns a read-only expression of the transformation matrix */
-  inline const MatrixType& matrix() const { return m_matrix; }
+  EIGEN_DEVICE_FUNC inline const MatrixType& matrix() const { return m_matrix; }
   /** \returns a writable expression of the transformation matrix */
-  inline MatrixType& matrix() { return m_matrix; }
+  EIGEN_DEVICE_FUNC inline MatrixType& matrix() { return m_matrix; }
 
   /** \returns a read-only expression of the linear part of the transformation */
-  inline ConstLinearPart linear() const { return ConstLinearPart(m_matrix,0,0); }
+  EIGEN_DEVICE_FUNC inline ConstLinearPart linear() const { return ConstLinearPart(m_matrix,0,0); }
   /** \returns a writable expression of the linear part of the transformation */
-  inline LinearPart linear() { return LinearPart(m_matrix,0,0); }
+  EIGEN_DEVICE_FUNC inline LinearPart linear() { return LinearPart(m_matrix,0,0); }
 
   /** \returns a read-only expression of the Dim x HDim affine part of the transformation */
-  inline ConstAffinePart affine() const { return take_affine_part::run(m_matrix); }
+  EIGEN_DEVICE_FUNC inline ConstAffinePart affine() const { return take_affine_part::run(m_matrix); }
   /** \returns a writable expression of the Dim x HDim affine part of the transformation */
-  inline AffinePart affine() { return take_affine_part::run(m_matrix); }
+  EIGEN_DEVICE_FUNC inline AffinePart affine() { return take_affine_part::run(m_matrix); }
 
   /** \returns a read-only expression of the translation vector of the transformation */
-  inline ConstTranslationPart translation() const { return ConstTranslationPart(m_matrix,0,Dim); }
+  EIGEN_DEVICE_FUNC inline ConstTranslationPart translation() const { return ConstTranslationPart(m_matrix,0,Dim); }
   /** \returns a writable expression of the translation vector of the transformation */
-  inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); }
+  EIGEN_DEVICE_FUNC inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); }
 
   /** \returns an expression of the product between the transform \c *this and a matrix expression \a other.
     *
@@ -437,7 +437,7 @@ public:
     */
   // note: this function is defined here because some compilers cannot find the respective declaration
   template<typename OtherDerived>
-  EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl<Transform, OtherDerived>::ResultType
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl<Transform, OtherDerived>::ResultType
   operator * (const EigenBase<OtherDerived> &other) const
   { return internal::transform_right_product_impl<Transform, OtherDerived>::run(*this,other.derived()); }
 
@@ -449,7 +449,7 @@ public:
     * \li a general transformation matrix of size Dim+1 x Dim+1.
     */
   template<typename OtherDerived> friend
-  inline const typename internal::transform_left_product_impl<OtherDerived,Mode,Options,_Dim,_Dim+1>::ResultType
+  EIGEN_DEVICE_FUNC inline const typename internal::transform_left_product_impl<OtherDerived,Mode,Options,_Dim,_Dim+1>::ResultType
     operator * (const EigenBase<OtherDerived> &a, const Transform &b)
   { return internal::transform_left_product_impl<OtherDerived,Mode,Options,Dim,HDim>::run(a.derived(),b); }
 
@@ -460,7 +460,7 @@ public:
     * mode is no isometry. In that case, the returned transform is an affinity.
     */
   template<typename DiagonalDerived>
-  inline const TransformTimeDiagonalReturnType
+  EIGEN_DEVICE_FUNC inline const TransformTimeDiagonalReturnType
     operator * (const DiagonalBase<DiagonalDerived> &b) const
   {
     TransformTimeDiagonalReturnType res(*this);
@@ -475,7 +475,7 @@ public:
     * mode is no isometry. In that case, the returned transform is an affinity.
     */
   template<typename DiagonalDerived>
-  friend inline TransformTimeDiagonalReturnType
+  EIGEN_DEVICE_FUNC friend inline TransformTimeDiagonalReturnType
     operator * (const DiagonalBase<DiagonalDerived> &a, const Transform &b)
   {
     TransformTimeDiagonalReturnType res;
@@ -487,10 +487,10 @@ public:
   }
 
   template<typename OtherDerived>
-  inline Transform& operator*=(const EigenBase<OtherDerived>& other) { return *this = *this * other; }
+  EIGEN_DEVICE_FUNC inline Transform& operator*=(const EigenBase<OtherDerived>& other) { return *this = *this * other; }
 
   /** Concatenates two transformations */
-  inline const Transform operator * (const Transform& other) const
+  EIGEN_DEVICE_FUNC inline const Transform operator * (const Transform& other) const
   {
     return internal::transform_transform_product_impl<Transform,Transform>::run(*this,other);
   }
@@ -522,7 +522,7 @@ public:
   #else
   /** Concatenates two different transformations */
   template<int OtherMode,int OtherOptions>
-  inline typename internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType
+  EIGEN_DEVICE_FUNC inline typename internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType
     operator * (const Transform<Scalar,Dim,OtherMode,OtherOptions>& other) const
   {
     return internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::run(*this,other);
@@ -530,47 +530,61 @@ public:
   #endif
 
   /** \sa MatrixBase::setIdentity() */
-  void setIdentity() { m_matrix.setIdentity(); }
+  EIGEN_DEVICE_FUNC void setIdentity() { m_matrix.setIdentity(); }
 
   /**
    * \brief Returns an identity transformation.
    * \todo In the future this function should be returning a Transform expression.
    */
-  static const Transform Identity()
+  EIGEN_DEVICE_FUNC static const Transform Identity()
   {
     return Transform(MatrixType::Identity());
   }
 
   template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC 
   inline Transform& scale(const MatrixBase<OtherDerived> &other);
 
   template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC
   inline Transform& prescale(const MatrixBase<OtherDerived> &other);
 
-  inline Transform& scale(const Scalar& s);
-  inline Transform& prescale(const Scalar& s);
+  EIGEN_DEVICE_FUNC inline Transform& scale(const Scalar& s);
+  EIGEN_DEVICE_FUNC inline Transform& prescale(const Scalar& s);
 
   template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC
   inline Transform& translate(const MatrixBase<OtherDerived> &other);
 
   template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC
   inline Transform& pretranslate(const MatrixBase<OtherDerived> &other);
 
   template<typename RotationType>
+  EIGEN_DEVICE_FUNC
   inline Transform& rotate(const RotationType& rotation);
 
   template<typename RotationType>
+  EIGEN_DEVICE_FUNC
   inline Transform& prerotate(const RotationType& rotation);
 
-  Transform& shear(const Scalar& sx, const Scalar& sy);
-  Transform& preshear(const Scalar& sx, const Scalar& sy);
+  EIGEN_DEVICE_FUNC Transform& shear(const Scalar& sx, const Scalar& sy);
+  EIGEN_DEVICE_FUNC Transform& preshear(const Scalar& sx, const Scalar& sy);
 
-  inline Transform& operator=(const TranslationType& t);
+  EIGEN_DEVICE_FUNC inline Transform& operator=(const TranslationType& t);
+  
+  EIGEN_DEVICE_FUNC
   inline Transform& operator*=(const TranslationType& t) { return translate(t.vector()); }
-  inline Transform operator*(const TranslationType& t) const;
+  
+  EIGEN_DEVICE_FUNC inline Transform operator*(const TranslationType& t) const;
 
+  EIGEN_DEVICE_FUNC 
   inline Transform& operator=(const UniformScaling<Scalar>& t);
+  
+  EIGEN_DEVICE_FUNC
   inline Transform& operator*=(const UniformScaling<Scalar>& s) { return scale(s.factor()); }
+  
+  EIGEN_DEVICE_FUNC
   inline TransformTimeDiagonalReturnType operator*(const UniformScaling<Scalar>& s) const
   {
     TransformTimeDiagonalReturnType res = *this;
@@ -578,31 +592,36 @@ public:
     return res;
   }
 
+  EIGEN_DEVICE_FUNC
   inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linearExt() *= s; return *this; }
 
   template<typename Derived>
-  inline Transform& operator=(const RotationBase<Derived,Dim>& r);
+  EIGEN_DEVICE_FUNC inline Transform& operator=(const RotationBase<Derived,Dim>& r);
   template<typename Derived>
-  inline Transform& operator*=(const RotationBase<Derived,Dim>& r) { return rotate(r.toRotationMatrix()); }
+  EIGEN_DEVICE_FUNC inline Transform& operator*=(const RotationBase<Derived,Dim>& r) { return rotate(r.toRotationMatrix()); }
   template<typename Derived>
-  inline Transform operator*(const RotationBase<Derived,Dim>& r) const;
+  EIGEN_DEVICE_FUNC inline Transform operator*(const RotationBase<Derived,Dim>& r) const;
 
-  const LinearMatrixType rotation() const;
+  EIGEN_DEVICE_FUNC const LinearMatrixType rotation() const;
   template<typename RotationMatrixType, typename ScalingMatrixType>
+  EIGEN_DEVICE_FUNC
   void computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const;
   template<typename ScalingMatrixType, typename RotationMatrixType>
+  EIGEN_DEVICE_FUNC
   void computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const;
 
   template<typename PositionDerived, typename OrientationType, typename ScaleDerived>
+  EIGEN_DEVICE_FUNC
   Transform& fromPositionOrientationScale(const MatrixBase<PositionDerived> &position,
     const OrientationType& orientation, const MatrixBase<ScaleDerived> &scale);
 
+  EIGEN_DEVICE_FUNC
   inline Transform inverse(TransformTraits traits = (TransformTraits)Mode) const;
 
   /** \returns a const pointer to the column major internal matrix */
-  const Scalar* data() const { return m_matrix.data(); }
+  EIGEN_DEVICE_FUNC const Scalar* data() const { return m_matrix.data(); }
   /** \returns a non-const pointer to the column major internal matrix */
-  Scalar* data() { return m_matrix.data(); }
+  EIGEN_DEVICE_FUNC Scalar* data() { return m_matrix.data(); }
 
   /** \returns \c *this with scalar type casted to \a NewScalarType
     *
@@ -610,12 +629,12 @@ public:
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type cast() const
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type cast() const
   { return typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type(*this); }
 
   /** Copy constructor with scalar type conversion */
   template<typename OtherScalarType>
-  inline explicit Transform(const Transform<OtherScalarType,Dim,Mode,Options>& other)
+  EIGEN_DEVICE_FUNC inline explicit Transform(const Transform<OtherScalarType,Dim,Mode,Options>& other)
   {
     check_template_params();
     m_matrix = other.matrix().template cast<Scalar>();
@@ -625,12 +644,12 @@ public:
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
-  bool isApprox(const Transform& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const Transform& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
   { return m_matrix.isApprox(other.m_matrix, prec); }
 
   /** Sets the last row to [0 ... 0 1]
     */
-  void makeAffine()
+  EIGEN_DEVICE_FUNC void makeAffine()
   {
     internal::transform_make_affine<int(Mode)>::run(m_matrix);
   }
@@ -639,26 +658,26 @@ public:
     * \returns the Dim x Dim linear part if the transformation is affine,
     *          and the HDim x Dim part for projective transformations.
     */
-  inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt()
+  EIGEN_DEVICE_FUNC inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt()
   { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,Dim>(0,0); }
   /** \internal
     * \returns the Dim x Dim linear part if the transformation is affine,
     *          and the HDim x Dim part for projective transformations.
     */
-  inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt() const
+  EIGEN_DEVICE_FUNC inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt() const
   { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,Dim>(0,0); }
 
   /** \internal
     * \returns the translation part if the transformation is affine,
     *          and the last column for projective transformations.
     */
-  inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt()
+  EIGEN_DEVICE_FUNC inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt()
   { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,1>(0,Dim); }
   /** \internal
     * \returns the translation part if the transformation is affine,
     *          and the last column for projective transformations.
     */
-  inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt() const
+  EIGEN_DEVICE_FUNC inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt() const
   { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,1>(0,Dim); }
 
 
@@ -668,7 +687,7 @@ public:
   
 protected:
   #ifndef EIGEN_PARSED_BY_DOXYGEN
-    static EIGEN_STRONG_INLINE void check_template_params()
+    EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void check_template_params()
     {
       EIGEN_STATIC_ASSERT((Options & (DontAlign|RowMajor)) == Options, INVALID_MATRIX_TEMPLATE_PARAMETERS)
     }
@@ -821,7 +840,7 @@ QTransform Transform<Scalar,Dim,Mode,Options>::toQTransform(void) const
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename OtherDerived>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::scale(const MatrixBase<OtherDerived> &other)
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
@@ -835,7 +854,7 @@ Transform<Scalar,Dim,Mode,Options>::scale(const MatrixBase<OtherDerived> &other)
   * \sa prescale(Scalar)
   */
 template<typename Scalar, int Dim, int Mode, int Options>
-inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::scale(const Scalar& s)
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::scale(const Scalar& s)
 {
   EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
   linearExt() *= s;
@@ -848,7 +867,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::s
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename OtherDerived>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::prescale(const MatrixBase<OtherDerived> &other)
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
@@ -862,7 +881,7 @@ Transform<Scalar,Dim,Mode,Options>::prescale(const MatrixBase<OtherDerived> &oth
   * \sa scale(Scalar)
   */
 template<typename Scalar, int Dim, int Mode, int Options>
-inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::prescale(const Scalar& s)
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::prescale(const Scalar& s)
 {
   EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
   m_matrix.template topRows<Dim>() *= s;
@@ -875,7 +894,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::p
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename OtherDerived>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::translate(const MatrixBase<OtherDerived> &other)
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
@@ -889,7 +908,7 @@ Transform<Scalar,Dim,Mode,Options>::translate(const MatrixBase<OtherDerived> &ot
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename OtherDerived>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::pretranslate(const MatrixBase<OtherDerived> &other)
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
@@ -919,7 +938,7 @@ Transform<Scalar,Dim,Mode,Options>::pretranslate(const MatrixBase<OtherDerived>
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename RotationType>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::rotate(const RotationType& rotation)
 {
   linearExt() *= internal::toRotationMatrix<Scalar,Dim>(rotation);
@@ -935,7 +954,7 @@ Transform<Scalar,Dim,Mode,Options>::rotate(const RotationType& rotation)
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename RotationType>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::prerotate(const RotationType& rotation)
 {
   m_matrix.template block<Dim,HDim>(0,0) = internal::toRotationMatrix<Scalar,Dim>(rotation)
@@ -949,7 +968,7 @@ Transform<Scalar,Dim,Mode,Options>::prerotate(const RotationType& rotation)
   * \sa preshear()
   */
 template<typename Scalar, int Dim, int Mode, int Options>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::shear(const Scalar& sx, const Scalar& sy)
 {
   EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
@@ -965,7 +984,7 @@ Transform<Scalar,Dim,Mode,Options>::shear(const Scalar& sx, const Scalar& sy)
   * \sa shear()
   */
 template<typename Scalar, int Dim, int Mode, int Options>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::preshear(const Scalar& sx, const Scalar& sy)
 {
   EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
@@ -979,7 +998,7 @@ Transform<Scalar,Dim,Mode,Options>::preshear(const Scalar& sx, const Scalar& sy)
 ******************************************************/
 
 template<typename Scalar, int Dim, int Mode, int Options>
-inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const TranslationType& t)
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const TranslationType& t)
 {
   linear().setIdentity();
   translation() = t.vector();
@@ -988,7 +1007,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::o
 }
 
 template<typename Scalar, int Dim, int Mode, int Options>
-inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const TranslationType& t) const
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const TranslationType& t) const
 {
   Transform res = *this;
   res.translate(t.vector());
@@ -996,7 +1015,7 @@ inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::op
 }
 
 template<typename Scalar, int Dim, int Mode, int Options>
-inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const UniformScaling<Scalar>& s)
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const UniformScaling<Scalar>& s)
 {
   m_matrix.setZero();
   linear().diagonal().fill(s.factor());
@@ -1006,7 +1025,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::o
 
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename Derived>
-inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const RotationBase<Derived,Dim>& r)
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const RotationBase<Derived,Dim>& r)
 {
   linear() = internal::toRotationMatrix<Scalar,Dim>(r);
   translation().setZero();
@@ -1016,7 +1035,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::o
 
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename Derived>
-inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const RotationBase<Derived,Dim>& r) const
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const RotationBase<Derived,Dim>& r) const
 {
   Transform res = *this;
   res.rotate(r.derived());
@@ -1035,7 +1054,7 @@ inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::op
   * \sa computeRotationScaling(), computeScalingRotation(), class SVD
   */
 template<typename Scalar, int Dim, int Mode, int Options>
-const typename Transform<Scalar,Dim,Mode,Options>::LinearMatrixType
+EIGEN_DEVICE_FUNC const typename Transform<Scalar,Dim,Mode,Options>::LinearMatrixType
 Transform<Scalar,Dim,Mode,Options>::rotation() const
 {
   LinearMatrixType result;
@@ -1057,7 +1076,7 @@ Transform<Scalar,Dim,Mode,Options>::rotation() const
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename RotationMatrixType, typename ScalingMatrixType>
-void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const
+EIGEN_DEVICE_FUNC void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const
 {
   JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
 
@@ -1086,7 +1105,7 @@ void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixTy
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename ScalingMatrixType, typename RotationMatrixType>
-void Transform<Scalar,Dim,Mode,Options>::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const
+EIGEN_DEVICE_FUNC void Transform<Scalar,Dim,Mode,Options>::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const
 {
   JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
 
@@ -1107,7 +1126,7 @@ void Transform<Scalar,Dim,Mode,Options>::computeScalingRotation(ScalingMatrixTyp
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename PositionDerived, typename OrientationType, typename ScaleDerived>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::fromPositionOrientationScale(const MatrixBase<PositionDerived> &position,
   const OrientationType& orientation, const MatrixBase<ScaleDerived> &scale)
 {
@@ -1124,7 +1143,7 @@ template<int Mode>
 struct transform_make_affine
 {
   template<typename MatrixType>
-  static void run(MatrixType &mat)
+  EIGEN_DEVICE_FUNC static void run(MatrixType &mat)
   {
     static const int Dim = MatrixType::ColsAtCompileTime-1;
     mat.template block<1,Dim>(Dim,0).setZero();
@@ -1135,21 +1154,21 @@ struct transform_make_affine
 template<>
 struct transform_make_affine<AffineCompact>
 {
-  template<typename MatrixType> static void run(MatrixType &) { }
+  template<typename MatrixType> EIGEN_DEVICE_FUNC static void run(MatrixType &) { }
 };
     
 // selector needed to avoid taking the inverse of a 3x4 matrix
 template<typename TransformType, int Mode=TransformType::Mode>
 struct projective_transform_inverse
 {
-  static inline void run(const TransformType&, TransformType&)
+  EIGEN_DEVICE_FUNC static inline void run(const TransformType&, TransformType&)
   {}
 };
 
 template<typename TransformType>
 struct projective_transform_inverse<TransformType, Projective>
 {
-  static inline void run(const TransformType& m, TransformType& res)
+  EIGEN_DEVICE_FUNC static inline void run(const TransformType& m, TransformType& res)
   {
     res.matrix() = m.matrix().inverse();
   }
@@ -1179,7 +1198,7 @@ struct projective_transform_inverse<TransformType, Projective>
   * \sa MatrixBase::inverse()
   */
 template<typename Scalar, int Dim, int Mode, int Options>
-Transform<Scalar,Dim,Mode,Options>
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>
 Transform<Scalar,Dim,Mode,Options>::inverse(TransformTraits hint) const
 {
   Transform res;
diff --git a/Eigen/src/Geometry/Translation.h b/Eigen/src/Geometry/Translation.h
index b9b9a590c..51d9a82eb 100644
--- a/Eigen/src/Geometry/Translation.h
+++ b/Eigen/src/Geometry/Translation.h
@@ -51,16 +51,16 @@ protected:
 public:
 
   /** Default constructor without initialization. */
-  Translation() {}
+  EIGEN_DEVICE_FUNC Translation() {}
   /**  */
-  inline Translation(const Scalar& sx, const Scalar& sy)
+  EIGEN_DEVICE_FUNC inline Translation(const Scalar& sx, const Scalar& sy)
   {
     eigen_assert(Dim==2);
     m_coeffs.x() = sx;
     m_coeffs.y() = sy;
   }
   /**  */
-  inline Translation(const Scalar& sx, const Scalar& sy, const Scalar& sz)
+  EIGEN_DEVICE_FUNC inline Translation(const Scalar& sx, const Scalar& sy, const Scalar& sz)
   {
     eigen_assert(Dim==3);
     m_coeffs.x() = sx;
@@ -68,48 +68,48 @@ public:
     m_coeffs.z() = sz;
   }
   /** Constructs and initialize the translation transformation from a vector of translation coefficients */
-  explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {}
+  EIGEN_DEVICE_FUNC explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {}
 
   /** \brief Retruns the x-translation by value. **/
-  inline Scalar x() const { return m_coeffs.x(); }
+  EIGEN_DEVICE_FUNC inline Scalar x() const { return m_coeffs.x(); }
   /** \brief Retruns the y-translation by value. **/
-  inline Scalar y() const { return m_coeffs.y(); }
+  EIGEN_DEVICE_FUNC inline Scalar y() const { return m_coeffs.y(); }
   /** \brief Retruns the z-translation by value. **/
-  inline Scalar z() const { return m_coeffs.z(); }
+  EIGEN_DEVICE_FUNC inline Scalar z() const { return m_coeffs.z(); }
 
   /** \brief Retruns the x-translation as a reference. **/
-  inline Scalar& x() { return m_coeffs.x(); }
+  EIGEN_DEVICE_FUNC inline Scalar& x() { return m_coeffs.x(); }
   /** \brief Retruns the y-translation as a reference. **/
-  inline Scalar& y() { return m_coeffs.y(); }
+  EIGEN_DEVICE_FUNC inline Scalar& y() { return m_coeffs.y(); }
   /** \brief Retruns the z-translation as a reference. **/
-  inline Scalar& z() { return m_coeffs.z(); }
+  EIGEN_DEVICE_FUNC inline Scalar& z() { return m_coeffs.z(); }
 
-  const VectorType& vector() const { return m_coeffs; }
-  VectorType& vector() { return m_coeffs; }
+  EIGEN_DEVICE_FUNC const VectorType& vector() const { return m_coeffs; }
+  EIGEN_DEVICE_FUNC VectorType& vector() { return m_coeffs; }
 
-  const VectorType& translation() const { return m_coeffs; }
-  VectorType& translation() { return m_coeffs; }
+  EIGEN_DEVICE_FUNC const VectorType& translation() const { return m_coeffs; }
+  EIGEN_DEVICE_FUNC VectorType& translation() { return m_coeffs; }
 
   /** Concatenates two translation */
-  inline Translation operator* (const Translation& other) const
+  EIGEN_DEVICE_FUNC inline Translation operator* (const Translation& other) const
   { return Translation(m_coeffs + other.m_coeffs); }
 
   /** Concatenates a translation and a uniform scaling */
-  inline AffineTransformType operator* (const UniformScaling<Scalar>& other) const;
+  EIGEN_DEVICE_FUNC inline AffineTransformType operator* (const UniformScaling<Scalar>& other) const;
 
   /** Concatenates a translation and a linear transformation */
   template<typename OtherDerived>
-  inline AffineTransformType operator* (const EigenBase<OtherDerived>& linear) const;
+  EIGEN_DEVICE_FUNC inline AffineTransformType operator* (const EigenBase<OtherDerived>& linear) const;
 
   /** Concatenates a translation and a rotation */
   template<typename Derived>
-  inline IsometryTransformType operator*(const RotationBase<Derived,Dim>& r) const
+  EIGEN_DEVICE_FUNC inline IsometryTransformType operator*(const RotationBase<Derived,Dim>& r) const
   { return *this * IsometryTransformType(r); }
 
   /** \returns the concatenation of a linear transformation \a l with the translation \a t */
   // its a nightmare to define a templated friend function outside its declaration
   template<typename OtherDerived> friend
-  inline AffineTransformType operator*(const EigenBase<OtherDerived>& linear, const Translation& t)
+  EIGEN_DEVICE_FUNC inline AffineTransformType operator*(const EigenBase<OtherDerived>& linear, const Translation& t)
   {
     AffineTransformType res;
     res.matrix().setZero();
@@ -122,7 +122,7 @@ public:
 
   /** Concatenates a translation and a transformation */
   template<int Mode, int Options>
-  inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim,Mode,Options>& t) const
+  EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim,Mode,Options>& t) const
   {
     Transform<Scalar,Dim,Mode> res = t;
     res.pretranslate(m_coeffs);
@@ -152,19 +152,19 @@ public:
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type cast() const
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type cast() const
   { return typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type(*this); }
 
   /** Copy constructor with scalar type conversion */
   template<typename OtherScalarType>
-  inline explicit Translation(const Translation<OtherScalarType,Dim>& other)
+  EIGEN_DEVICE_FUNC inline explicit Translation(const Translation<OtherScalarType,Dim>& other)
   { m_coeffs = other.vector().template cast<Scalar>(); }
 
   /** \returns \c true if \c *this is approximately equal to \a other, within the precision
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
-  bool isApprox(const Translation& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const Translation& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
   { return m_coeffs.isApprox(other.m_coeffs, prec); }
 
 };
@@ -178,7 +178,7 @@ typedef Translation<double,3> Translation3d;
 //@}
 
 template<typename Scalar, int Dim>
-inline typename Translation<Scalar,Dim>::AffineTransformType
+EIGEN_DEVICE_FUNC inline typename Translation<Scalar,Dim>::AffineTransformType
 Translation<Scalar,Dim>::operator* (const UniformScaling<Scalar>& other) const
 {
   AffineTransformType res;
@@ -191,7 +191,7 @@ Translation<Scalar,Dim>::operator* (const UniformScaling<Scalar>& other) const
 
 template<typename Scalar, int Dim>
 template<typename OtherDerived>
-inline typename Translation<Scalar,Dim>::AffineTransformType
+EIGEN_DEVICE_FUNC inline typename Translation<Scalar,Dim>::AffineTransformType
 Translation<Scalar,Dim>::operator* (const EigenBase<OtherDerived>& linear) const
 {
   AffineTransformType res;
diff --git a/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h
index 0498db396..0ace45177 100644
--- a/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h
+++ b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h
@@ -98,7 +98,11 @@ struct Assignment<DstXprType, SolveWithGuess<DecType,RhsType,GuessType>, interna
   typedef SolveWithGuess<DecType,RhsType,GuessType> SrcXprType;
   static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
   {
-    // FIXME shall we resize dst here?
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
     dst = src.guess();
     src.dec()._solve_with_guess_impl(src.rhs(), dst/*, src.guess()*/);
   }
diff --git a/Eigen/src/LU/InverseImpl.h b/Eigen/src/LU/InverseImpl.h
index 3134632e1..018f99b58 100644
--- a/Eigen/src/LU/InverseImpl.h
+++ b/Eigen/src/LU/InverseImpl.h
@@ -292,7 +292,11 @@ struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<typename Dst
   typedef Inverse<XprType> SrcXprType;
   static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar> &)
   {
-    // FIXME shall we resize dst here?
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+    
     const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime);
     EIGEN_ONLY_USED_FOR_DEBUG(Size);
     eigen_assert(( (Size<=1) || (Size>4) || (extract_data(src.nestedExpression())!=extract_data(dst)))
diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h
index fa5386599..83776645b 100644
--- a/Eigen/src/SparseCore/SparseAssign.h
+++ b/Eigen/src/SparseCore/SparseAssign.h
@@ -139,13 +139,16 @@ struct Assignment<DstXprType, SrcXprType, Functor, Sparse2Dense>
 {
   static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
   {
-    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
-
     if(internal::is_same<Functor,internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> >::value)
       dst.setZero();
     
     internal::evaluator<SrcXprType> srcEval(src);
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
     internal::evaluator<DstXprType> dstEval(dst);
+    
     const Index outerEvaluationSize = (internal::evaluator<SrcXprType>::Flags&RowMajorBit) ? src.rows() : src.cols();
     for (Index j=0; j<outerEvaluationSize; ++j)
       for (typename internal::evaluator<SrcXprType>::InnerIterator i(srcEval,j); i; ++i)
@@ -161,6 +164,11 @@ struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar
   typedef Solve<DecType,RhsType> SrcXprType;
   static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
   {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
     src.dec()._solve_impl(src.rhs(), dst);
   }
 };
@@ -179,6 +187,11 @@ struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Sparse>
   template<int Options>
   static void run(SparseMatrix<Scalar,Options,StorageIndex> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
   {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
     Index size = src.diagonal().size();
     dst.makeCompressed();
     dst.resizeNonZeros(size);
diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h
index 7a5ad0635..4cbf68781 100644
--- a/Eigen/src/SparseCore/SparseProduct.h
+++ b/Eigen/src/SparseCore/SparseProduct.h
@@ -104,6 +104,11 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::assig
   typedef Product<Lhs,Rhs,AliasFreeProduct> SrcXprType;
   static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)
   {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+    
     generic_product_impl<Lhs, Rhs>::evalTo(dst,src.lhs(),src.rhs());
   }
 };
diff --git a/bench/tensors/README b/bench/tensors/README
index 803cb8ef8..3a5fdbe17 100644
--- a/bench/tensors/README
+++ b/bench/tensors/README
@@ -11,5 +11,11 @@ nvcc tensor_benchmarks_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBU
 We also provide a version of the generic GPU tensor benchmarks that uses half floats (aka fp16) instead of regular floats. To compile these benchmarks, simply call the command line below. You'll need a recent GPU that supports compute capability 5.3 or higher to run them and nvcc 7.5 or higher to compile the code.
 nvcc tensor_benchmarks_fp16_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBUG -use_fast_math -ftz=true -arch compute_53 -o benchmarks_fp16_gpu
 
-last but not least, we also provide a suite of benchmarks to measure the scalability of the contraction code on CPU. To compile these benchmarks, call 
+last but not least, we also provide a suite of benchmarks to measure the scalability of the contraction code on CPU. To compile these benchmarks, call
 g++ contraction_benchmarks_cpu.cc benchmark_main.cc -I ../../ -std=c++11 -O3 -DNDEBUG -pthread -mavx -o benchmarks_cpu
+
+To compile the benchmark for SYCL, using ComputeCpp you currently need 2 passes (only for translation units containing device code):
+1. The device compilation pass that generates the device code (SYCL kernels and referenced device functions) and glue code needed by the host compiler to reference the device code from host code.
+{ComputeCpp_ROOT}/bin/compute++ -I ../../ -I {ComputeCpp_ROOT}/include/ -std=c++11 -mllvm -inline-threshold=1000 -Wno-ignored-attributes -sycl -intelspirmetadata -emit-llvm -no-serial-memop -sycl-compress-name -DBUILD_PLATFORM_SPIR -DNDBUG -O3 -c tensor_benchmarks_sycl.cc
+2. The host compilation pass that generates the final host binary.
+clang++-3.7 -include tensor_benchmarks_sycl.sycl benchmark_main.cc tensor_benchmarks_sycl.cc -pthread -I ../../ -I {ComputeCpp_ROOT}/include/ -L {ComputeCpp_ROOT}/lib/ -lComputeCpp -lOpenCL -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -o tensor_benchmark_sycl
diff --git a/bench/tensors/tensor_benchmarks_sycl.cc b/bench/tensors/tensor_benchmarks_sycl.cc
new file mode 100644
index 000000000..7eca4d966
--- /dev/null
+++ b/bench/tensors/tensor_benchmarks_sycl.cc
@@ -0,0 +1,37 @@
+#define EIGEN_USE_SYCL
+
+#include <SYCL/sycl.hpp>
+#include <iostream>
+
+#include "tensor_benchmarks.h"
+
+using Eigen::array;
+using Eigen::SyclDevice;
+using Eigen::Tensor;
+using Eigen::TensorMap;
+// Simple functions
+template <typename device_selector>
+cl::sycl::queue sycl_queue() {
+  return cl::sycl::queue(device_selector(), [=](cl::sycl::exception_list l) {
+    for (const auto& e : l) {
+      try {
+        std::rethrow_exception(e);
+      } catch (cl::sycl::exception e) {
+        std::cout << e.what() << std::endl;
+      }
+    }
+  });
+}
+
+#define BM_FuncGPU(FUNC)                                       \
+  static void BM_##FUNC(int iters, int N) {                    \
+    StopBenchmarkTiming();                                     \
+    cl::sycl::queue q = sycl_queue<cl::sycl::gpu_selector>();  \
+    Eigen::SyclDevice device(q);                               \
+    BenchmarkSuite<Eigen::SyclDevice, float> suite(device, N); \
+    suite.FUNC(iters);                                         \
+  }                                                            \
+  BENCHMARK_RANGE(BM_##FUNC, 10, 5000);
+
+BM_FuncGPU(broadcasting);
+BM_FuncGPU(coeffWiseOp);
diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake
index f53f46087..9ebd06a04 100644
--- a/cmake/EigenTesting.cmake
+++ b/cmake/EigenTesting.cmake
@@ -61,7 +61,7 @@ macro(ei_add_test_internal testname testname_with_suffix)
 
   ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_FUNC=${testname}")
   
-  if(MSVC AND NOT EIGEN_SPLIT_LARGE_TESTS)
+  if(MSVC)
     ei_add_target_property(${targetname} COMPILE_FLAGS "/bigobj")
   endif()  
 
@@ -109,6 +109,103 @@ macro(ei_add_test_internal testname testname_with_suffix)
 
 endmacro(ei_add_test_internal)
 
+# SYCL
+macro(ei_add_test_internal_sycl testname testname_with_suffix)
+  include_directories( SYSTEM ${COMPUTECPP_PACKAGE_ROOT_DIR}/include)
+  set(targetname ${testname_with_suffix})
+
+  if(EIGEN_ADD_TEST_FILENAME_EXTENSION)
+    set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION})
+  else()
+    set(filename ${testname}.cpp)
+  endif()
+
+  set( include_file ${CMAKE_CURRENT_BINARY_DIR}/inc_${filename})
+  set( bc_file ${CMAKE_CURRENT_BINARY_DIR}/${filename})
+  set( host_file ${CMAKE_CURRENT_SOURCE_DIR}/${filename})
+
+  ADD_CUSTOM_COMMAND(
+    OUTPUT ${include_file}
+    COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${host_file}\\\"" > ${include_file}
+    COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${bc_file}.sycl\\\"" >> ${include_file}
+    DEPENDS ${filename}
+    COMMENT "Building ComputeCpp integration header file ${include_file}"
+  )
+  # Add a custom target for the generated integration header
+  add_custom_target(${testname}_integration_header_woho DEPENDS ${include_file})
+
+  add_executable(${targetname} ${include_file})
+  add_dependencies(${targetname} ${testname}_integration_header_woho)
+  add_sycl_to_target(${targetname} ${filename} ${CMAKE_CURRENT_BINARY_DIR})
+
+  if (targetname MATCHES "^eigen2_")
+    add_dependencies(eigen2_buildtests ${targetname})
+  else()
+    add_dependencies(buildtests ${targetname})
+  endif()
+
+  if(EIGEN_NO_ASSERTION_CHECKING)
+    ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_NO_ASSERTION_CHECKING=1")
+  else(EIGEN_NO_ASSERTION_CHECKING)
+    if(EIGEN_DEBUG_ASSERTS)
+      ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_DEBUG_ASSERTS=1")
+    endif(EIGEN_DEBUG_ASSERTS)
+  endif(EIGEN_NO_ASSERTION_CHECKING)
+
+  ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}")
+
+  ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_FUNC=${testname}")
+
+  if(MSVC AND NOT EIGEN_SPLIT_LARGE_TESTS)
+    ei_add_target_property(${targetname} COMPILE_FLAGS "/bigobj")
+  endif()
+
+  # let the user pass flags.
+  if(${ARGC} GREATER 2)
+    ei_add_target_property(${targetname} COMPILE_FLAGS "${ARGV2}")
+  endif(${ARGC} GREATER 2)
+
+  if(EIGEN_TEST_CUSTOM_CXX_FLAGS)
+    ei_add_target_property(${targetname} COMPILE_FLAGS "${EIGEN_TEST_CUSTOM_CXX_FLAGS}")
+  endif()
+
+  if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO)
+    target_link_libraries(${targetname} ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO})
+  endif()
+  if(EXTERNAL_LIBS)
+    target_link_libraries(${targetname} ${EXTERNAL_LIBS})
+  endif()
+  if(EIGEN_TEST_CUSTOM_LINKER_FLAGS)
+    target_link_libraries(${targetname} ${EIGEN_TEST_CUSTOM_LINKER_FLAGS})
+  endif()
+
+  if(${ARGC} GREATER 3)
+    set(libs_to_link ${ARGV3})
+    # it could be that some cmake module provides a bad library string " "  (just spaces),
+    # and that severely breaks target_link_libraries ("can't link to -l-lstdc++" errors).
+    # so we check for strings containing only spaces.
+    string(STRIP "${libs_to_link}" libs_to_link_stripped)
+    string(LENGTH "${libs_to_link_stripped}" libs_to_link_stripped_length)
+    if(${libs_to_link_stripped_length} GREATER 0)
+      # notice: no double quotes around ${libs_to_link} here. It may be a list.
+      target_link_libraries(${targetname} ${libs_to_link})
+    endif()
+  endif()
+
+  add_test(${testname_with_suffix} "${targetname}")
+
+  # Specify target and test labels according to EIGEN_CURRENT_SUBPROJECT
+  get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT)
+  if ((current_subproject) AND (NOT (current_subproject STREQUAL "")))
+    set_property(TARGET ${targetname} PROPERTY LABELS "Build${current_subproject}")
+    add_dependencies("Build${current_subproject}" ${targetname})
+    set_property(TEST ${testname_with_suffix} PROPERTY LABELS "${current_subproject}")
+  endif()
+
+
+endmacro(ei_add_test_internal_sycl)
+
+
 # Macro to add a test
 #
 # the unique mandatory parameter testname must correspond to a file
@@ -185,6 +282,39 @@ macro(ei_add_test testname)
   endif(EIGEN_SPLIT_LARGE_TESTS AND suffixes)
 endmacro(ei_add_test)
 
+macro(ei_add_test_sycl testname)
+  get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST)
+  set(EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}${testname}\n")
+  set_property(GLOBAL PROPERTY EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}")
+
+  if(EIGEN_ADD_TEST_FILENAME_EXTENSION)
+    set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION})
+  else()
+    set(filename ${testname}.cpp)
+  endif()
+
+  file(READ "${filename}" test_source)
+  set(parts 0)
+  string(REGEX MATCHALL "CALL_SUBTEST_[0-9]+|EIGEN_TEST_PART_[0-9]+|EIGEN_SUFFIXES(;[0-9]+)+"
+         occurences "${test_source}")
+  string(REGEX REPLACE "CALL_SUBTEST_|EIGEN_TEST_PART_|EIGEN_SUFFIXES" "" suffixes "${occurences}")
+  list(REMOVE_DUPLICATES suffixes)
+  if(EIGEN_SPLIT_LARGE_TESTS AND suffixes)
+    add_custom_target(${testname})
+    foreach(suffix ${suffixes})
+      ei_add_test_internal_sycl(${testname} ${testname}_${suffix}
+        "${ARGV1} -DEIGEN_TEST_PART_${suffix}=1" "${ARGV2}")
+      add_dependencies(${testname} ${testname}_${suffix})
+    endforeach(suffix)
+  else(EIGEN_SPLIT_LARGE_TESTS AND suffixes)
+    set(symbols_to_enable_all_parts "")
+    foreach(suffix ${suffixes})
+      set(symbols_to_enable_all_parts
+        "${symbols_to_enable_all_parts} -DEIGEN_TEST_PART_${suffix}=1")
+    endforeach(suffix)
+    ei_add_test_internal_sycl(${testname} ${testname} "${ARGV1} ${symbols_to_enable_all_parts}" "${ARGV2}")
+  endif(EIGEN_SPLIT_LARGE_TESTS AND suffixes)
+endmacro(ei_add_test_sycl)
 
 # adds a failtest, i.e. a test that succeed if the program fails to compile
 # note that the test runner for these is CMake itself, when passed -DEIGEN_FAILTEST=ON
@@ -336,6 +466,11 @@ macro(ei_testing_print_summary)
       message(STATUS "C++11:             OFF")
     endif()
 
+    if(EIGEN_TEST_SYCL)
+      message(STATUS "SYCL:              ON")
+    else()
+      message(STATUS "SYCL:              OFF")
+    endif()
     if(EIGEN_TEST_CUDA)
       if(EIGEN_TEST_CUDA_CLANG)
         message(STATUS "CUDA:              ON (using clang)")
diff --git a/cmake/FindComputeCpp.cmake b/cmake/FindComputeCpp.cmake
new file mode 100644
index 000000000..3aab5b833
--- /dev/null
+++ b/cmake/FindComputeCpp.cmake
@@ -0,0 +1,228 @@
+#.rst:
+# FindComputeCpp
+#---------------
+
+#########################
+#  FindComputeCpp.cmake
+#########################
+#
+#  Tools for finding and building with ComputeCpp.
+#
+
+# Require CMake version 3.2.2 or higher
+cmake_minimum_required(VERSION 3.2.2)
+
+# Check that a supported host compiler can be found
+if(CMAKE_COMPILER_IS_GNUCXX)
+    # Require at least gcc 4.8
+    if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
+      message(FATAL_ERROR
+        "host compiler - Not found! (gcc version must be at least 4.8)")
+    # Require the GCC dual ABI to be disabled for 5.1 or higher
+    elseif (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.1)
+      set(COMPUTECPP_DISABLE_GCC_DUAL_ABI "True")
+      message(STATUS
+        "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION} (note pre 5.1 gcc ABI enabled)")
+    else()
+      message(STATUS "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION}")
+    endif()
+elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+    # Require at least clang 3.6
+    if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6)
+      message(FATAL_ERROR
+        "host compiler - Not found! (clang version must be at least 3.6)")
+    else()
+			set(COMPUTECPP_DISABLE_GCC_DUAL_ABI "True")
+      message(STATUS "host compiler - clang ${CMAKE_CXX_COMPILER_VERSION}")
+    endif()
+else()
+  message(WARNING
+    "host compiler - Not found! (ComputeCpp supports GCC and Clang, see readme)")
+endif()
+
+set(COMPUTECPP_64_BIT_DEFAULT ON)
+option(COMPUTECPP_64_BIT_CODE "Compile device code in 64 bit mode"
+        ${COMPUTECPP_64_BIT_DEFAULT})
+mark_as_advanced(COMPUTECPP_64_BIT_CODE)
+
+# Find OpenCL package
+find_package(OpenCL REQUIRED)
+
+# Find ComputeCpp package
+if(EXISTS ${COMPUTECPP_PACKAGE_ROOT_DIR})
+  message(STATUS "ComputeCpp package - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+else()
+  message(FATAL_ERROR "ComputeCpp package - Not found! (please set COMPUTECPP_PACKAGE_ROOT_DIR) (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+endif()
+option(COMPUTECPP_PACKAGE_ROOT_DIR "Path to the ComputeCpp Package")
+
+# Obtain the path to compute++
+find_program(COMPUTECPP_DEVICE_COMPILER compute++ PATHS
+  ${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin)
+if (EXISTS ${COMPUTECPP_DEVICE_COMPILER})
+  mark_as_advanced(COMPUTECPP_DEVICE_COMPILER)
+  message(STATUS "compute++ - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+else()
+  message(FATAL_ERROR "compute++ - Not found! (${COMPUTECPP_DEVICE_COMPILER}) (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+endif()
+
+# Obtain the path to computecpp_info
+find_program(COMPUTECPP_INFO_TOOL computecpp_info PATHS
+  ${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin)
+if (EXISTS ${COMPUTECPP_INFO_TOOL})
+  mark_as_advanced(${COMPUTECPP_INFO_TOOL})
+  message(STATUS "computecpp_info - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+else()
+  message(FATAL_ERROR "computecpp_info - Not found! (${COMPUTECPP_INFO_TOOL}) (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+endif()
+
+# Obtain the path to the ComputeCpp runtime library
+find_library(COMPUTECPP_RUNTIME_LIBRARY ComputeCpp PATHS ${COMPUTECPP_PACKAGE_ROOT_DIR}
+  HINTS ${COMPUTECPP_PACKAGE_ROOT_DIR}/lib PATH_SUFFIXES lib
+  DOC "ComputeCpp Runtime Library" NO_DEFAULT_PATH)
+
+if (EXISTS ${COMPUTECPP_RUNTIME_LIBRARY})
+  mark_as_advanced(COMPUTECPP_RUNTIME_LIBRARY)
+  message(STATUS "libComputeCpp.so - Found")
+else()
+  message(FATAL_ERROR "libComputeCpp.so - Not found! (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+endif()
+
+# Obtain the ComputeCpp include directory
+set(COMPUTECPP_INCLUDE_DIRECTORY ${COMPUTECPP_PACKAGE_ROOT_DIR}/include/)
+if (NOT EXISTS ${COMPUTECPP_INCLUDE_DIRECTORY})
+  message(FATAL_ERROR "ComputeCpp includes - Not found! (${COMPUTECPP_PACKAGE_ROOT_DIR}/include/)")
+else()
+  message(STATUS "ComputeCpp includes - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})")
+endif()
+
+# Obtain the package version
+execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-version"
+  OUTPUT_VARIABLE COMPUTECPP_PACKAGE_VERSION
+  RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
+if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0")
+  message(FATAL_ERROR "Package version - Error obtaining version!")
+else()
+  mark_as_advanced(COMPUTECPP_PACKAGE_VERSION)
+  message(STATUS "Package version - ${COMPUTECPP_PACKAGE_VERSION}")
+endif()
+
+# Obtain the device compiler flags
+execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-device-compiler-flags"
+  OUTPUT_VARIABLE COMPUTECPP_DEVICE_COMPILER_FLAGS
+  RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
+if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0")
+  message(FATAL_ERROR "compute++ flags - Error obtaining compute++ flags!")
+else()
+  mark_as_advanced(COMPUTECPP_COMPILER_FLAGS)
+  message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}")
+endif()
+
+set(COMPUTECPP_DEVICE_COMPILER_FLAGS ${COMPUTECPP_DEVICE_COMPILER_FLAGS} -sycl-compress-name -no-serial-memop -DEIGEN_NO_ASSERTION_CHECKING=1)
+
+# Check if the platform is supported
+execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-is-supported"
+  OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED
+  RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
+if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0")
+  message(FATAL_ERROR "platform - Error checking platform support!")
+else()
+  mark_as_advanced(COMPUTECPP_PLATFORM_IS_SUPPORTED)
+  if (COMPUTECPP_PLATFORM_IS_SUPPORTED)
+    message(STATUS "platform - your system can support ComputeCpp")
+  else()
+    message(STATUS "platform - your system CANNOT support ComputeCpp")
+  endif()
+endif()
+
+####################
+#   __build_sycl
+####################
+#
+#  Adds a custom target for running compute++ and adding a dependency for the
+#  resulting integration header.
+#
+#  targetName : Name of the target.
+#  sourceFile : Source file to be compiled.
+#  binaryDir  : Intermediate output directory for the integration header.
+#
+function(__build_spir targetName sourceFile binaryDir)
+
+  # Retrieve source file name.
+  get_filename_component(sourceFileName ${sourceFile} NAME)
+
+  # Set the path to the Sycl file.
+  set(outputSyclFile ${binaryDir}/${sourceFileName}.sycl)
+
+  # Add any user-defined include to the device compiler
+  get_property(includeDirectories DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY
+    INCLUDE_DIRECTORIES)
+  set(device_compiler_includes "")
+  foreach(directory ${includeDirectories})
+    set(device_compiler_includes "-I${directory}" ${device_compiler_includes})
+  endforeach()
+  if (CMAKE_INCLUDE_PATH)
+    foreach(directory ${CMAKE_INCLUDE_PATH})
+      set(device_compiler_includes "-I${directory}"
+        ${device_compiler_includes})
+    endforeach()
+  endif()
+
+  # Convert argument list format
+  separate_arguments(COMPUTECPP_DEVICE_COMPILER_FLAGS)
+
+  # Add custom command for running compute++
+  add_custom_command(
+    OUTPUT ${outputSyclFile}
+    COMMAND ${COMPUTECPP_DEVICE_COMPILER}
+            ${COMPUTECPP_DEVICE_COMPILER_FLAGS}
+            -I${COMPUTECPP_INCLUDE_DIRECTORY}
+            ${COMPUTECPP_PLATFORM_SPECIFIC_ARGS}
+            ${device_compiler_includes}
+            -o ${outputSyclFile}
+            -c ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}
+    DEPENDS ${sourceFile}
+  COMMENT "Building ComputeCpp integration header file ${outputSyclFile}")
+
+  # Add a custom target for the generated integration header
+  add_custom_target(${targetName}_integration_header DEPENDS ${outputSyclFile})
+
+  # Add a dependency on the integration header
+  add_dependencies(${targetName} ${targetName}_integration_header)
+
+  # Force inclusion of the integration header for the host compiler
+  #set(compileFlags -include ${include_file} "-Wall")
+  target_compile_options(${targetName} PUBLIC ${compileFlags})
+
+  # Set the host compiler C++ standard to C++11
+  set_property(TARGET ${targetName} PROPERTY CXX_STANDARD 11)
+
+  # Disable GCC dual ABI on GCC 5.1 and higher
+  if(COMPUTECPP_DISABLE_GCC_DUAL_ABI)
+    set_property(TARGET ${targetName} APPEND PROPERTY COMPILE_DEFINITIONS
+      "_GLIBCXX_USE_CXX11_ABI=0")
+  endif()
+
+endfunction()
+
+#######################
+#  add_sycl_to_target
+#######################
+#
+#  Adds a SYCL compilation custom command associated with an existing
+#  target and sets a dependency on that new command.
+#
+#  targetName : Name of the target to add a SYCL to.
+#  sourceFile : Source file to be compiled for SYCL.
+#  binaryDir  : Intermediate output directory for the integration header.
+#
+function(add_sycl_to_target targetName sourceFile binaryDir)
+
+  # Add custom target to run compute++ and generate the integration header
+  __build_spir(${targetName} ${sourceFile} ${binaryDir})
+
+  # Link with the ComputeCpp runtime library
+  target_link_libraries(${targetName} PUBLIC ${COMPUTECPP_RUNTIME_LIBRARY}
+                        PUBLIC ${OpenCL_LIBRARIES})
+
+endfunction(add_sycl_to_target)
diff --git a/doc/snippets/DenseBase_LinSpacedInt.cpp b/doc/snippets/DenseBase_LinSpacedInt.cpp
new file mode 100644
index 000000000..0d7ae068e
--- /dev/null
+++ b/doc/snippets/DenseBase_LinSpacedInt.cpp
@@ -0,0 +1,8 @@
+cout << "Even spacing inputs:" << endl;
+cout << VectorXi::LinSpaced(8,1,4).transpose() << endl;
+cout << VectorXi::LinSpaced(8,1,8).transpose() << endl;
+cout << VectorXi::LinSpaced(8,1,15).transpose() << endl;
+cout << "Uneven spacing inputs:" << endl;
+cout << VectorXi::LinSpaced(8,1,7).transpose() << endl;
+cout << VectorXi::LinSpaced(8,1,9).transpose() << endl;
+cout << VectorXi::LinSpaced(8,1,16).transpose() << endl;
diff --git a/test/nullary.cpp b/test/nullary.cpp
index 9063c6de8..351d26e74 100644
--- a/test/nullary.cpp
+++ b/test/nullary.cpp
@@ -2,6 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2010-2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
+// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -29,12 +30,41 @@ bool equalsIdentity(const MatrixType& A)
 
   bool diagOK = (A.diagonal().array() == 1).all();
   return offDiagOK && diagOK;
+
+}
+
+template<typename VectorType>
+void check_extremity_accuracy(const VectorType &v, const typename VectorType::Scalar &low, const typename VectorType::Scalar &high)
+{
+  typedef typename VectorType::Scalar Scalar;
+  typedef typename VectorType::RealScalar RealScalar;
+
+  RealScalar prec = internal::is_same<RealScalar,float>::value ? NumTraits<RealScalar>::dummy_precision()*10 : NumTraits<RealScalar>::dummy_precision()/10;
+  Index size = v.size();
+
+  if(size<20)
+    return;
+
+  for (int i=0; i<size; ++i)
+  {
+    if(i<5 || i>size-6)
+    {
+      Scalar ref = (low*RealScalar(size-i-1))/RealScalar(size-1) + (high*RealScalar(i))/RealScalar(size-1);
+      if(std::abs(ref)>1)
+      {
+        if(!internal::isApprox(v(i), ref, prec))
+          std::cout << v(i) << " != " << ref << "  ; relative error: " << std::abs((v(i)-ref)/ref) << "  ; required precision: " << prec << "  ; range: " << low << "," << high << "  ; i: " << i << "\n";
+        VERIFY(internal::isApprox(v(i), (low*RealScalar(size-i-1))/RealScalar(size-1) + (high*RealScalar(i))/RealScalar(size-1), prec));
+      }
+    }
+  }
 }
 
 template<typename VectorType>
 void testVectorType(const VectorType& base)
 {
   typedef typename VectorType::Scalar Scalar;
+  typedef typename VectorType::RealScalar RealScalar;
 
   const Index size = base.size();
   
@@ -42,6 +72,13 @@ void testVectorType(const VectorType& base)
   Scalar low = (size == 1 ? high : internal::random<Scalar>(-500,500));
   if (low>high) std::swap(low,high);
 
+  // check low==high
+  if(internal::random<float>(0.f,1.f)<0.05f)
+    low = high;
+  // check abs(low) >> abs(high)
+  else if(size>2 && std::numeric_limits<RealScalar>::max_exponent10>0 && internal::random<float>(0.f,1.f)<0.1f)
+    low = -internal::random<Scalar>(1,2) * RealScalar(std::pow(RealScalar(10),std::numeric_limits<RealScalar>::max_exponent10/2));
+
   const Scalar step = ((size == 1) ? 1 : (high-low)/(size-1));
 
   // check whether the result yields what we expect it to do
@@ -54,26 +91,42 @@ void testVectorType(const VectorType& base)
     for (int i=0; i<size; ++i)
       n(i) = low+i*step;
     VERIFY_IS_APPROX(m,n);
+
+    CALL_SUBTEST( check_extremity_accuracy(m, low, high) );
   }
 
-  VectorType n(size);
-  for (int i=0; i<size; ++i)
-    n(i) = size==1 ? low : (low + ((high-low)*Scalar(i))/(size-1));
-  VERIFY_IS_APPROX(m,n);
+  if((!NumTraits<Scalar>::IsInteger) || ((high-low)>=size && (Index(high-low)%(size-1))==0) || (Index(high-low+1)<size && (size%Index(high-low+1))==0))
+  {
+    VectorType n(size);
+    if((!NumTraits<Scalar>::IsInteger) || (high-low>=size))
+      for (int i=0; i<size; ++i)
+        n(i) = size==1 ? low : (low + ((high-low)*Scalar(i))/(size-1));
+    else
+      for (int i=0; i<size; ++i)
+        n(i) = size==1 ? low : low + Scalar((double(high-low+1)*double(i))/double(size));
+    VERIFY_IS_APPROX(m,n);
 
-  // random access version
-  m = VectorType::LinSpaced(size,low,high);
-  VERIFY_IS_APPROX(m,n);
+    // random access version
+    m = VectorType::LinSpaced(size,low,high);
+    VERIFY_IS_APPROX(m,n);
+    VERIFY( internal::isApprox(m(m.size()-1),high) );
+    VERIFY( size==1 || internal::isApprox(m(0),low) );
+    VERIFY_IS_EQUAL(m(m.size()-1) , high);
+    if(!NumTraits<Scalar>::IsInteger)
+      CALL_SUBTEST( check_extremity_accuracy(m, low, high) );
+  }
 
-  VERIFY( internal::isApprox(m(m.size()-1),high) );
-  VERIFY( size==1 || internal::isApprox(m(0),low) );
+  VERIFY( m(m.size()-1) <= high );
+  VERIFY( (m.array() <= high).all() );
+  VERIFY( (m.array() >= low).all() );
 
-  // sequential access version
-  m = VectorType::LinSpaced(Sequential,size,low,high);
-  VERIFY_IS_APPROX(m,n);
 
-  VERIFY( internal::isApprox(m(m.size()-1),high) );
-  VERIFY( size==1 || internal::isApprox(m(0),low) );
+  VERIFY( m(m.size()-1) >= low );
+  if(size>=1)
+  {
+    VERIFY( internal::isApprox(m(0),low) );
+    VERIFY_IS_EQUAL(m(0) , low);
+  }
 
   // check whether everything works with row and col major vectors
   Matrix<Scalar,Dynamic,1> row_vector(size);
@@ -95,7 +148,7 @@ void testVectorType(const VectorType& base)
   VERIFY_IS_APPROX( ScalarMatrix::LinSpaced(1,low,high), ScalarMatrix::Constant(high) );
 
   // regression test for bug 526 (linear vectorized transversal)
-  if (size > 1) {
+  if (size > 1 && (!NumTraits<Scalar>::IsInteger)) {
     m.tail(size-1).setLinSpaced(low, high);
     VERIFY_IS_APPROX(m(size-1), high);
   }
@@ -135,11 +188,11 @@ void test_nullary()
   CALL_SUBTEST_2( testMatrixType(MatrixXcf(internal::random<int>(1,300),internal::random<int>(1,300))) );
   CALL_SUBTEST_3( testMatrixType(MatrixXf(internal::random<int>(1,300),internal::random<int>(1,300))) );
   
-  for(int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST_4( testVectorType(VectorXd(internal::random<int>(1,300))) );
+  for(int i = 0; i < g_repeat*10; i++) {
+    CALL_SUBTEST_4( testVectorType(VectorXd(internal::random<int>(1,30000))) );
     CALL_SUBTEST_5( testVectorType(Vector4d()) );  // regression test for bug 232
     CALL_SUBTEST_6( testVectorType(Vector3d()) );
-    CALL_SUBTEST_7( testVectorType(VectorXf(internal::random<int>(1,300))) );
+    CALL_SUBTEST_7( testVectorType(VectorXf(internal::random<int>(1,30000))) );
     CALL_SUBTEST_8( testVectorType(Vector3f()) );
     CALL_SUBTEST_8( testVectorType(Vector4f()) );
     CALL_SUBTEST_8( testVectorType(Matrix<float,8,1>()) );
@@ -154,6 +207,18 @@ void test_nullary()
   VERIFY( (MatrixXd(RowVectorXd::LinSpaced(3, 0, 1)) - RowVector3d(0, 0.5, 1)).norm() < std::numeric_limits<double>::epsilon() );
 #endif
 
+#ifdef EIGEN_TEST_PART_9
+  // Check possible overflow issue
+  {
+    int n = 60000;
+    ArrayXi a1(n), a2(n);
+    a1.setLinSpaced(n, 0, n-1);
+    for(int i=0; i<n; ++i)
+      a2(i) = i;
+    VERIFY_IS_APPROX(a1,a2);
+  }
+#endif
+
 #ifdef EIGEN_TEST_PART_10
   // check some internal logic
   VERIFY((  internal::has_nullary_operator<internal::scalar_constant_op<double> >::value ));
@@ -166,10 +231,10 @@ void test_nullary()
   VERIFY((  internal::has_binary_operator<internal::scalar_identity_op<double> >::value ));
   VERIFY(( !internal::functor_has_linear_access<internal::scalar_identity_op<double> >::ret ));
 
-  VERIFY(( !internal::has_nullary_operator<internal::linspaced_op<float,float,false> >::value ));
-  VERIFY((  internal::has_unary_operator<internal::linspaced_op<float,float,false> >::value ));
-  VERIFY(( !internal::has_binary_operator<internal::linspaced_op<float,float,false> >::value ));
-  VERIFY((  internal::functor_has_linear_access<internal::linspaced_op<float,float,false> >::ret ));
+  VERIFY(( !internal::has_nullary_operator<internal::linspaced_op<float,float> >::value ));
+  VERIFY((  internal::has_unary_operator<internal::linspaced_op<float,float> >::value ));
+  VERIFY(( !internal::has_binary_operator<internal::linspaced_op<float,float> >::value ));
+  VERIFY((  internal::functor_has_linear_access<internal::linspaced_op<float,float> >::ret ));
 
   // Regression unit test for a weird MSVC bug.
   // Search "nullary_wrapper_workaround_msvc" in CoreEvaluators.h for the details.
@@ -190,10 +255,10 @@ void test_nullary()
     VERIFY(( !internal::has_binary_operator<internal::scalar_constant_op<float> >::value ));
     VERIFY((  internal::functor_has_linear_access<internal::scalar_constant_op<float> >::ret ));
 
-    VERIFY(( !internal::has_nullary_operator<internal::linspaced_op<int,int,false> >::value ));
-    VERIFY((  internal::has_unary_operator<internal::linspaced_op<int,int,false> >::value ));
-    VERIFY(( !internal::has_binary_operator<internal::linspaced_op<int,int,false> >::value ));
-    VERIFY((  internal::functor_has_linear_access<internal::linspaced_op<int,int,false> >::ret ));
+    VERIFY(( !internal::has_nullary_operator<internal::linspaced_op<int,int> >::value ));
+    VERIFY((  internal::has_unary_operator<internal::linspaced_op<int,int> >::value ));
+    VERIFY(( !internal::has_binary_operator<internal::linspaced_op<int,int> >::value ));
+    VERIFY((  internal::functor_has_linear_access<internal::linspaced_op<int,int> >::ret ));
   }
 #endif
 }
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index d6854c8c3..c18d73496 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -16,6 +16,12 @@
 #endif
 // using namespace Eigen;
 
+#ifdef EIGEN_VECTORIZE_SSE
+const bool g_vectorize_sse = true;
+#else
+const bool g_vectorize_sse = false;
+#endif
+
 namespace Eigen {
 namespace internal {
 template<typename T> T negate(const T& x) { return -x; }
@@ -297,6 +303,26 @@ template<typename Scalar> void packetmath()
       VERIFY(isApproxAbs(result[i], (selector.select[i] ? data1[i] : data2[i]), refvalue));
     }
   }
+
+  if (PacketTraits::HasBlend || g_vectorize_sse) {
+    // pinsertfirst
+    for (int i=0; i<PacketSize; ++i)
+      ref[i] = data1[i];
+    Scalar s = internal::random<Scalar>();
+    ref[0] = s;
+    internal::pstore(data2, internal::pinsertfirst(internal::pload<Packet>(data1),s));
+    VERIFY(areApprox(ref, data2, PacketSize) && "internal::pinsertfirst");
+  }
+
+  if (PacketTraits::HasBlend || g_vectorize_sse) {
+    // pinsertlast
+    for (int i=0; i<PacketSize; ++i)
+      ref[i] = data1[i];
+    Scalar s = internal::random<Scalar>();
+    ref[PacketSize-1] = s;
+    internal::pstore(data2, internal::pinsertlast(internal::pload<Packet>(data1),s));
+    VERIFY(areApprox(ref, data2, PacketSize) && "internal::pinsertlast");
+  }
 }
 
 template<typename Scalar> void packetmath_real()
diff --git a/test/product_extra.cpp b/test/product_extra.cpp
index e4990ac8c..03d5c3657 100644
--- a/test/product_extra.cpp
+++ b/test/product_extra.cpp
@@ -256,7 +256,49 @@ Index compute_block_size()
   return ret;
 }
 
-
+template<typename>
+void aliasing_with_resize()
+{
+  Index m = internal::random<Index>(10,50);
+  Index n = internal::random<Index>(10,50);
+  MatrixXd A, B, C(m,n), D(m,m);
+  VectorXd a, b, c(n);
+  C.setRandom();
+  D.setRandom();
+  c.setRandom();
+  double s = internal::random<double>(1,10);
+
+  A = C;
+  B = A * A.transpose();
+  A = A * A.transpose();
+  VERIFY_IS_APPROX(A,B);
+
+  A = C;
+  B = (A * A.transpose())/s;
+  A = (A * A.transpose())/s;
+  VERIFY_IS_APPROX(A,B);
+
+  A = C;
+  B = (A * A.transpose()) + D;
+  A = (A * A.transpose()) + D;
+  VERIFY_IS_APPROX(A,B);
+
+  A = C;
+  B = D + (A * A.transpose());
+  A = D + (A * A.transpose());
+  VERIFY_IS_APPROX(A,B);
+
+  A = C;
+  B = s * (A * A.transpose());
+  A = s * (A * A.transpose());
+  VERIFY_IS_APPROX(A,B);
+
+  A = C;
+  a = c;
+  b = (A * a)/s;
+  a = (A * a)/s;
+  VERIFY_IS_APPROX(a,b);
+}
 
 template<int>
 void bug_1308()
@@ -318,5 +360,6 @@ void test_product_extra()
   CALL_SUBTEST_7( compute_block_size<float>() );
   CALL_SUBTEST_7( compute_block_size<double>() );
   CALL_SUBTEST_7( compute_block_size<std::complex<double> >() );
+  CALL_SUBTEST_8( aliasing_with_resize<void>() );
 
 }
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor
index 73aae3da8..388976d2e 100644
--- a/unsupported/Eigen/CXX11/Tensor
+++ b/unsupported/Eigen/CXX11/Tensor
@@ -69,6 +69,10 @@ typedef unsigned __int64 uint64_t;
 #endif
 #endif
 
+#ifdef EIGEN_USE_SYCL
+#include <SYCL/sycl.hpp>
+#endif
+
 #include "src/Tensor/TensorMacros.h"
 #include "src/Tensor/TensorForwardDeclarations.h"
 #include "src/Tensor/TensorMeta.h"
@@ -77,6 +81,8 @@ typedef unsigned __int64 uint64_t;
 #include "src/Tensor/TensorDeviceDefault.h"
 #include "src/Tensor/TensorDeviceThreadPool.h"
 #include "src/Tensor/TensorDeviceCuda.h"
+#include "src/Tensor/TensorSycl.h"
+#include "src/Tensor/TensorDeviceSycl.h"
 #include "src/Tensor/TensorIndexList.h"
 #include "src/Tensor/TensorDimensionList.h"
 #include "src/Tensor/TensorDimensions.h"
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
index cb615c75b..166be200c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
@@ -163,6 +163,11 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
            TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize);
   }
 
+  /// required by sycl in order to extract the accessor
+  const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; }
+  /// required by sycl in order to extract the accessor
+  const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; }
+
   EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_leftImpl.data(); }
 
  private:
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index 5d67f69f3..4cfe300eb 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -113,7 +113,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
   };
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-    : m_impl(op.expression(), device)
+    : m_broadcast(op.broadcast()),m_impl(op.expression(), device)
   {
     // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar
     // and store the result in a scalar. Instead one should reshape the scalar into a a N-D
@@ -374,7 +374,12 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
 
   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
 
+  const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
+
+  Broadcast functor() const { return m_broadcast; }
+
  protected:
+  const Broadcast m_broadcast;
   Dimensions m_dimensions;
   array<Index, NumDims> m_outputStrides;
   array<Index, NumDims> m_inputStrides;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
new file mode 100644
index 000000000..bfd36f5aa
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
@@ -0,0 +1,122 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Cummins Chris PhD student at The University of Edinburgh.
+// Contact: <eigen@codeplay.com>
+
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H)
+#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
+
+namespace Eigen {
+/// \struct BufferT is used to specialise add_sycl_buffer function for
+//  two types of buffer we have. When the MapAllocator is true, we create the
+//  sycl buffer with MapAllocator.
+/// We have to const_cast the input pointer in order to work around the fact
+/// that sycl does not accept map allocator for const pointer.
+template <typename T, bool MapAllocator>
+struct BufferT {
+  using Type = cl::sycl::buffer<T, 1, cl::sycl::map_allocator<T>>;
+  static inline void add_sycl_buffer(
+      const T *ptr, size_t num_bytes,
+      std::map<const void *, std::shared_ptr<void>> &buffer_map) {
+    buffer_map.insert(std::pair<const void *, std::shared_ptr<void>>(
+        ptr, std::shared_ptr<void>(std::make_shared<Type>(
+                 Type(const_cast<T *>(ptr), cl::sycl::range<1>(num_bytes))))));
+  }
+};
+
+/// specialisation of the \ref BufferT when the MapAllocator is false. In this
+/// case we only create the device-only buffer.
+template <typename T>
+struct BufferT<T, false> {
+  using Type = cl::sycl::buffer<T, 1>;
+  static inline void add_sycl_buffer(
+      const T *ptr, size_t num_bytes,
+      std::map<const void *, std::shared_ptr<void>> &buffer_map) {
+    buffer_map.insert(std::pair<const void *, std::shared_ptr<void>>(
+        ptr, std::shared_ptr<void>(
+                 std::make_shared<Type>(Type(cl::sycl::range<1>(num_bytes))))));
+  }
+};
+
+struct SyclDevice {
+  /// class members
+  /// sycl queue
+  cl::sycl::queue &m_queue;
+  /// std::map is the container used to make sure that we create only one buffer
+  /// per pointer. The lifespan of the buffer
+  /// now depends on the lifespan of SyclDevice. If a non-read-only pointer is
+  /// needed to be accessed on the host we should manually deallocate it.
+  mutable std::map<const void *, std::shared_ptr<void>> buffer_map;
+
+  SyclDevice(cl::sycl::queue &q) : m_queue(q) {}
+  // destructor
+  ~SyclDevice() { deallocate_all(); }
+
+  template <typename T>
+  void deallocate(const T *p) const {
+    auto it = buffer_map.find(p);
+    if (it != buffer_map.end()) {
+      buffer_map.erase(it);
+    }
+  }
+  void deallocate_all() const { buffer_map.clear(); }
+
+  /// creation of sycl accessor for a buffer. This function first tries to find
+  /// the buffer in the buffer_map.
+  /// If found it gets the accessor from it, if not, the function then adds an
+  /// entry by creating a sycl buffer
+  /// for that particular pointer.
+  template <cl::sycl::access::mode AcMd, bool MapAllocator, typename T>
+  inline cl::sycl::accessor<T, 1, AcMd, cl::sycl::access::target::global_buffer>
+  get_sycl_accessor(size_t num_bytes, cl::sycl::handler &cgh,
+                    const T *ptr) const {
+    auto it = buffer_map.find(ptr);
+    if (it == buffer_map.end()) {
+      BufferT<T, MapAllocator>::add_sycl_buffer(ptr, num_bytes, buffer_map);
+    }
+    return (
+        ((typename BufferT<T, MapAllocator>::Type *)(buffer_map.at(ptr).get()))
+            ->template get_access<AcMd>(cgh));
+  }
+
+  /// allocating memory on the cpu
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void *allocate(size_t num_bytes) const {
+    return internal::aligned_malloc(num_bytes);
+  }
+
+  // some runtime conditions that can be applied here
+  bool isDeviceSuitable() const { return true; }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void *buffer) const {
+    internal::aligned_free(buffer);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src,
+                                                    size_t n) const {
+    ::memcpy(dst, src, n);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(
+      void *dst, const void *src, size_t n) const {
+    memcpy(dst, src, n);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(
+      void *dst, const void *src, size_t n) const {
+    memcpy(dst, src, n);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void *buffer, int c,
+                                                    size_t n) const {
+    ::memset(buffer, c, n);
+  }
+};
+}  // end namespace Eigen
+
+#endif  // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
index a08dfa7c3..68d14a7e5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
@@ -20,8 +20,8 @@ namespace Eigen {
   *
   */
 namespace internal {
-template<typename XprType>
-struct traits<TensorEvalToOp<XprType> >
+template<typename XprType, template <class> class MakePointer_>
+struct traits<TensorEvalToOp<XprType, MakePointer_> >
 {
   // Type promotion to handle the case where the types of the lhs and the rhs are different.
   typedef typename XprType::Scalar Scalar;
@@ -36,16 +36,22 @@ struct traits<TensorEvalToOp<XprType> >
   enum {
     Flags = 0
   };
+  template <class T>
+  struct MakePointer {
+    // Intermediate typedef to workaround MSVC issue.
+    typedef MakePointer_<T> MakePointerT;
+    typedef typename MakePointerT::Type Type;
+  };
 };
 
-template<typename XprType>
-struct eval<TensorEvalToOp<XprType>, Eigen::Dense>
+template<typename XprType, template <class> class MakePointer_>
+struct eval<TensorEvalToOp<XprType, MakePointer_>, Eigen::Dense>
 {
   typedef const TensorEvalToOp<XprType>& type;
 };
 
-template<typename XprType>
-struct nested<TensorEvalToOp<XprType>, 1, typename eval<TensorEvalToOp<XprType> >::type>
+template<typename XprType, template <class> class MakePointer_>
+struct nested<TensorEvalToOp<XprType, MakePointer_>, 1, typename eval<TensorEvalToOp<XprType, MakePointer_> >::type>
 {
   typedef TensorEvalToOp<XprType> type;
 };
@@ -55,37 +61,38 @@ struct nested<TensorEvalToOp<XprType>, 1, typename eval<TensorEvalToOp<XprType>
 
 
 
-template<typename XprType>
-class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType>, ReadOnlyAccessors>
+template<typename XprType, template <class> class MakePointer_>
+class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType, MakePointer_>, ReadOnlyAccessors>
 {
   public:
   typedef typename Eigen::internal::traits<TensorEvalToOp>::Scalar Scalar;
   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
+  typedef typename MakePointer_<CoeffReturnType>::Type PointerType;
   typedef typename Eigen::internal::nested<TensorEvalToOp>::type Nested;
   typedef typename Eigen::internal::traits<TensorEvalToOp>::StorageKind StorageKind;
   typedef typename Eigen::internal::traits<TensorEvalToOp>::Index Index;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(CoeffReturnType* buffer, const XprType& expr)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr)
       : m_xpr(expr), m_buffer(buffer) {}
 
     EIGEN_DEVICE_FUNC
     const typename internal::remove_all<typename XprType::Nested>::type&
     expression() const { return m_xpr; }
 
-    EIGEN_DEVICE_FUNC CoeffReturnType* buffer() const { return m_buffer; }
+    EIGEN_DEVICE_FUNC PointerType buffer() const { return m_buffer; }
 
   protected:
     typename XprType::Nested m_xpr;
-    CoeffReturnType* m_buffer;
+    PointerType m_buffer;
 };
 
 
 
-template<typename ArgType, typename Device>
-struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
+template<typename ArgType, typename Device, template <class> class MakePointer_>
+struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
 {
-  typedef TensorEvalToOp<ArgType> XprType;
+  typedef TensorEvalToOp<ArgType, MakePointer_> XprType;
   typedef typename ArgType::Scalar Scalar;
   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
   typedef typename XprType::Index Index;
@@ -102,15 +109,22 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
   };
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device), m_device(device), m_buffer(op.buffer())
+      : m_impl(op.expression(), device), m_device(device),
+          m_buffer(op.buffer()), m_op(op), m_expression(op.expression())
   { }
 
+  // Used for accessor extraction in SYCL Managed TensorMap:
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& op() const {
+    return m_op;
+  }
+  
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() {
   }
 
+  typedef typename internal::traits<const TensorEvalToOp<ArgType, MakePointer_> >::template MakePointer<CoeffReturnType>::Type DevicePointer;
   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* scalar) {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(DevicePointer scalar) {
     EIGEN_UNUSED_VARIABLE(scalar);
     eigen_assert(scalar == NULL);
     return m_impl.evalSubExprsIfNeeded(m_buffer);
@@ -145,12 +159,20 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
         TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize);
   }
 
-  EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_buffer; }
+  EIGEN_DEVICE_FUNC DevicePointer data() const { return m_buffer; }
+  ArgType expression() const { return m_expression; }
+
+  /// required by sycl in order to extract the accessor
+  const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
+  /// added for sycl in order to construct the buffer from the sycl device
+  const Device& device() const{return m_device;}
 
  private:
   TensorEvaluator<ArgType, Device> m_impl;
   const Device& m_device;
-  CoeffReturnType* m_buffer;
+  DevicePointer m_buffer;
+  const XprType& m_op;
+  const ArgType m_expression;
 };
 
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index 61c111cec..834ce07df 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -46,9 +46,11 @@ struct TensorEvaluator
   };
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
-      : m_data(const_cast<Scalar*>(m.data())), m_dims(m.dimensions()), m_device(device)
+      : m_data(const_cast<typename internal::traits<Derived>::template MakePointer<Scalar>::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m)
   { }
 
+  // Used for accessor extraction in SYCL Managed TensorMap:
+  const Derived& derived() const { return m_impl; }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) {
@@ -106,12 +108,16 @@ struct TensorEvaluator
                         internal::unpacket_traits<PacketReturnType>::size);
   }
 
-  EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; }
+  EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<Scalar>::Type data() const { return m_data; }
+
+  /// required by sycl in order to construct sycl buffer from raw pointer
+  const Device& device() const{return m_device;}
 
  protected:
-  Scalar* m_data;
+  typename internal::traits<Derived>::template MakePointer<Scalar>::Type m_data;
   Dimensions m_dims;
   const Device& m_device;
+  const Derived& m_impl;
 };
 
 namespace {
@@ -159,8 +165,11 @@ struct TensorEvaluator<const Derived, Device>
     RawAccess = true
   };
 
+  // Used for accessor extraction in SYCL Managed TensorMap:
+  const Derived& derived() const { return m_impl; }
+
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
-      : m_data(m.data()), m_dims(m.dimensions()), m_device(device)
+      : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m)
   { }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
@@ -198,12 +207,16 @@ struct TensorEvaluator<const Derived, Device>
                         internal::unpacket_traits<PacketReturnType>::size);
   }
 
-  EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
+  EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<const Scalar>::Type data() const { return m_data; }
+
+  /// added for sycl in order to construct the buffer from the sycl device
+  const Device& device() const{return m_device;}
 
  protected:
-  const Scalar* m_data;
+  typename internal::traits<Derived>::template MakePointer<const Scalar>::Type m_data;
   Dimensions m_dims;
   const Device& m_device;
+  const Derived& m_impl;
 };
 
 
@@ -260,6 +273,12 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
 
   EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
 
+  /// required by sycl in order to extract the accessor
+  const TensorEvaluator<ArgType, Device>& impl() const { return m_argImpl; }
+  /// required by sycl in order to extract the accessor
+  NullaryOp functor() const { return m_functor; }
+
+
  private:
   const NullaryOp m_functor;
   TensorEvaluator<ArgType, Device> m_argImpl;
@@ -324,6 +343,12 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
 
   EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
 
+  /// required by sycl in order to extract the accessor
+  const TensorEvaluator<ArgType, Device> & impl() const { return m_argImpl; }
+  /// added for sycl in order to construct the buffer from sycl device
+  UnaryOp functor() const { return m_functor; }
+
+
  private:
   const UnaryOp m_functor;
   TensorEvaluator<ArgType, Device> m_argImpl;
@@ -397,6 +422,12 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
   }
 
   EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
+  /// required by sycl in order to extract the accessor
+  const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; }
+  /// required by sycl in order to extract the accessor
+  const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; }
+  /// required by sycl in order to extract the accessor
+  BinaryOp functor() const { return m_functor; }
 
  private:
   const BinaryOp m_functor;
@@ -492,10 +523,17 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
 
   EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
 
+  /// required by sycl in order to extract the accessor
+  const TensorEvaluator<Arg1Type, Device> & arg1Impl() const { return m_arg1Impl; }
+  /// required by sycl in order to extract the accessor
+  const TensorEvaluator<Arg2Type, Device>& arg2Impl() const { return m_arg2Impl; }
+  /// required by sycl in order to extract the accessor
+  const TensorEvaluator<Arg3Type, Device>& arg3Impl() const { return m_arg3Impl; }
+
  private:
   const TernaryOp m_functor;
   TensorEvaluator<Arg1Type, Device> m_arg1Impl;
-  TensorEvaluator<Arg1Type, Device> m_arg2Impl;
+  TensorEvaluator<Arg2Type, Device> m_arg2Impl;
   TensorEvaluator<Arg3Type, Device> m_arg3Impl;
 };
 
@@ -576,6 +614,12 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { return NULL; }
+  /// required by sycl in order to extract the accessor
+  const TensorEvaluator<IfArgType, Device> & cond_impl() const { return m_condImpl; }
+  /// required by sycl in order to extract the accessor
+  const TensorEvaluator<ThenArgType, Device>& then_impl() const { return m_thenImpl; }
+  /// required by sycl in order to extract the accessor
+  const TensorEvaluator<ElseArgType, Device>& else_impl() const { return m_elseImpl; }
 
  private:
   TensorEvaluator<IfArgType, Device> m_condImpl;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index 0cac7b179..f01d77c0a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -267,6 +267,20 @@ inline void TensorExecutor<Expression, GpuDevice, Vectorizable>::run(
 #endif  // __CUDACC__
 #endif  // EIGEN_USE_GPU
 
+// SYCL Executor policy
+#ifdef EIGEN_USE_SYCL
+
+template <typename Expression, bool Vectorizable>
+class TensorExecutor<Expression, SyclDevice, Vectorizable> {
+public:
+  static inline void run(const Expression &expr, const SyclDevice &device) {
+    // call TensorSYCL module
+    TensorSycl::run(expr, device);
+  }
+};
+
+#endif
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
index 5f2e329f2..85dfc7a69 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
@@ -283,7 +283,7 @@ class TensorCwiseTernaryOp : public TensorBase<TensorCwiseTernaryOp<TernaryOp, A
     arg1Expression() const { return m_arg1_xpr; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename Arg1XprType::Nested>::type&
+    const typename internal::remove_all<typename Arg2XprType::Nested>::type&
     arg2Expression() const { return m_arg2_xpr; }
 
     EIGEN_DEVICE_FUNC
@@ -292,7 +292,7 @@ class TensorCwiseTernaryOp : public TensorBase<TensorCwiseTernaryOp<TernaryOp, A
 
   protected:
     typename Arg1XprType::Nested m_arg1_xpr;
-    typename Arg1XprType::Nested m_arg2_xpr;
+    typename Arg2XprType::Nested m_arg2_xpr;
     typename Arg3XprType::Nested m_arg3_xpr;
     const TernaryOp m_functor;
 };
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
index c23ecdbc4..bbd5eb374 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
@@ -19,9 +19,15 @@ namespace Eigen {
   *
   *
   */
+/// template <class> class MakePointer_ is added to convert the host pointer to the device pointer.
+/// It is added due to the fact that for our device compiler T* is not allowed.
+/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T.
+/// This is done through our MakePointer_ class. By default the Type in the MakePointer_<T> is T* .
+/// Therefore, by adding the default value, we managed to convert the type and it does not break any
+/// existing code as its default value is T*.
 namespace internal {
-template<typename XprType>
-struct traits<TensorForcedEvalOp<XprType> >
+template<typename XprType, template <class> class MakePointer_>
+struct traits<TensorForcedEvalOp<XprType, MakePointer_> >
 {
   // Type promotion to handle the case where the types of the lhs and the rhs are different.
   typedef typename XprType::Scalar Scalar;
@@ -36,26 +42,31 @@ struct traits<TensorForcedEvalOp<XprType> >
   enum {
     Flags = 0
   };
+  template <class T> struct MakePointer {
+    // Intermediate typedef to workaround MSVC issue.
+    typedef MakePointer_<T> MakePointerT;
+    typedef typename MakePointerT::Type Type;
+  };
 };
 
-template<typename XprType>
-struct eval<TensorForcedEvalOp<XprType>, Eigen::Dense>
+template<typename XprType, template <class> class MakePointer_>
+struct eval<TensorForcedEvalOp<XprType, MakePointer_>, Eigen::Dense>
 {
-  typedef const TensorForcedEvalOp<XprType>& type;
+  typedef const TensorForcedEvalOp<XprType, MakePointer_>& type;
 };
 
-template<typename XprType>
-struct nested<TensorForcedEvalOp<XprType>, 1, typename eval<TensorForcedEvalOp<XprType> >::type>
+template<typename XprType, template <class> class MakePointer_>
+struct nested<TensorForcedEvalOp<XprType, MakePointer_>, 1, typename eval<TensorForcedEvalOp<XprType, MakePointer_> >::type>
 {
-  typedef TensorForcedEvalOp<XprType> type;
+  typedef TensorForcedEvalOp<XprType, MakePointer_> type;
 };
 
 }  // end namespace internal
 
 
 
-template<typename XprType>
-class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOnlyAccessors>
+template<typename XprType, template <class> class MakePointer_>
+class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType, MakePointer_>, ReadOnlyAccessors>
 {
   public:
   typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Scalar Scalar;
@@ -77,10 +88,10 @@ class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOn
 };
 
 
-template<typename ArgType, typename Device>
-struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
+template<typename ArgType, typename Device, template <class> class MakePointer_>
+struct TensorEvaluator<const TensorForcedEvalOp<ArgType, MakePointer_>, Device>
 {
-  typedef TensorForcedEvalOp<ArgType> XprType;
+  typedef TensorForcedEvalOp<ArgType, MakePointer_> XprType;
   typedef typename ArgType::Scalar Scalar;
   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
   typedef typename XprType::Index Index;
@@ -96,6 +107,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
   };
 
   EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
+	/// op_ is used for sycl
       : m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL)
   { }
 
@@ -110,10 +122,10 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
         new(m_buffer+i) CoeffReturnType();
       }
     }
-    typedef TensorEvalToOp<const ArgType> EvalTo;
+    typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
     EvalTo evalToTmp(m_buffer, m_op);
     const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value;
-    internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
+    internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, PacketAccess>::run(evalToTmp, m_device);
     return true;
   }
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
@@ -136,13 +148,17 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
   }
 
-  EIGEN_DEVICE_FUNC Scalar* data() const { return m_buffer; }
+  EIGEN_DEVICE_FUNC typename MakePointer<Scalar>::Type data() const { return m_buffer; }
 
+  /// required by sycl in order to extract the sycl accessor
+  const TensorEvaluator<ArgType, Device>& impl() { return m_impl; }
+  /// used by sycl in order to build the sycl buffer
+  const Device& device() const{return m_device;}
  private:
   TensorEvaluator<ArgType, Device> m_impl;
   const ArgType m_op;
   const Device& m_device;
-  CoeffReturnType* m_buffer;
+  typename MakePointer<CoeffReturnType>::Type m_buffer;
 };
 
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index 490ddd8bd..6497b1830 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -12,9 +12,19 @@
 
 namespace Eigen {
 
+// MakePointer class is used as a container of the adress space of the pointer
+// on the host and on the device. From the host side it generates the T* pointer
+// and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to
+// T* m_data on the host. It is always called on the device.
+// Specialisation of MakePointer class for creating the sycl buffer with
+// map_allocator.
+template<typename T> struct MakePointer {
+  typedef T* Type;
+};
+
+template<typename PlainObjectType, int Options_ = Unaligned, template <class> class MakePointer_ = MakePointer> class TensorMap;
 template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor;
 template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex> class TensorFixedSize;
-template<typename PlainObjectType, int Options_ = Unaligned> class TensorMap;
 template<typename PlainObjectType> class TensorRef;
 template<typename Derived, int AccessLevel> class TensorBase;
 
@@ -52,8 +62,8 @@ template<typename Op, typename XprType> class TensorScanOp;
 template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp;
 template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp;
 
-template<typename XprType> class TensorEvalToOp;
-template<typename XprType> class TensorForcedEvalOp;
+template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorEvalToOp;
+template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorForcedEvalOp;
 
 template<typename ExpressionType, typename DeviceType> class TensorDevice;
 template<typename Derived, typename Device> struct TensorEvaluator;
@@ -61,6 +71,7 @@ template<typename Derived, typename Device> struct TensorEvaluator;
 struct DefaultDevice;
 struct ThreadPoolDevice;
 struct GpuDevice;
+struct SyclDevice;
 
 enum FFTResultType {
   RealPart = 0,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
index 6fb4f4a31..a8e55757e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
@@ -18,11 +18,16 @@ namespace Eigen {
   * \brief A tensor expression mapping an existing array of data.
   *
   */
-
-template<typename PlainObjectType, int Options_> class TensorMap : public TensorBase<TensorMap<PlainObjectType, Options_> >
+/// template <class> class MakePointer_ is added to convert the host pointer to the device pointer.
+/// It is added due to the fact that for our device compiler T* is not allowed.
+/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T.
+/// This is done through our MakePointer_ class. By default the Type in the MakePointer_<T> is T* .
+/// Therefore, by adding the default value, we managed to convert the type and it does not break any
+/// existing code as its default value is T*.
+template<typename PlainObjectType, int Options_, template <class> class MakePointer_> class TensorMap : public TensorBase<TensorMap<PlainObjectType, Options_, MakePointer_> >
 {
   public:
-    typedef TensorMap<PlainObjectType, Options_> Self;
+    typedef TensorMap<PlainObjectType, Options_, MakePointer_> Self;
     typedef typename PlainObjectType::Base Base;
     typedef typename Eigen::internal::nested<Self>::type Nested;
     typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind;
@@ -36,7 +41,7 @@ template<typename PlainObjectType, int Options_> class TensorMap : public Tensor
                          Scalar *,
                          const Scalar *>::type
                      PointerType;*/
-    typedef Scalar* PointerType;
+    typedef typename MakePointer_<Scalar>::Type PointerType;
     typedef PointerType PointerArgType;
 
     static const int Options = Options_;
@@ -109,9 +114,9 @@ template<typename PlainObjectType, int Options_> class TensorMap : public Tensor
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); }
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Scalar* data() { return m_data; }
+    EIGEN_STRONG_INLINE PointerType data() { return m_data; }
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const Scalar* data() const { return m_data; }
+    EIGEN_STRONG_INLINE const PointerType data() const { return m_data; }
 
     EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
@@ -307,7 +312,7 @@ template<typename PlainObjectType, int Options_> class TensorMap : public Tensor
     }
 
   private:
-    Scalar* m_data;
+    typename MakePointer_<Scalar>::Type m_data;
     Dimensions m_dimensions;
 };
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
index fdb5ee6b8..615559d44 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
@@ -83,6 +83,27 @@ struct PacketType<half, GpuDevice> {
 };
 #endif
 
+#if defined(EIGEN_USE_SYCL)
+template <typename T>
+  struct PacketType<T, SyclDevice> {
+  typedef T type;
+  static const int size = 1;
+  enum {
+    HasAdd    = 0,
+    HasSub    = 0,
+    HasMul    = 0,
+    HasNegate = 0,
+    HasAbs    = 0,
+    HasArg    = 0,
+    HasAbs2   = 0,
+    HasMin    = 0,
+    HasMax    = 0,
+    HasConj   = 0,
+    HasSetLinear = 0,
+    HasBlend  = 0
+  };
+};
+#endif
 
 
 // Tuple mimics std::pair but works on e.g. nvcc.
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index a87777b22..d34ff98b0 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -423,15 +423,15 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
     // Precompute output strides.
     if (NumOutputDims > 0) {
       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-	m_outputStrides[0] = 1;
-	for (int i = 1; i < NumOutputDims; ++i) {
-	  m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
-	}
+        m_outputStrides[0] = 1;
+        for (int i = 1; i < NumOutputDims; ++i) {
+          m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
+        }
       } else {
-	m_outputStrides.back() = 1;
-	for (int i = NumOutputDims - 2; i >= 0; --i) {
-	  m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
-	}
+        m_outputStrides.back() = 1;
+        for (int i = NumOutputDims - 2; i >= 0; --i) {
+          m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
+        }
       }
     }
 
@@ -439,27 +439,27 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
     if (NumInputDims > 0) {
       array<Index, NumInputDims> input_strides;
       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
-	input_strides[0] = 1;
-	for (int i = 1; i < NumInputDims; ++i) {
-	  input_strides[i] = input_strides[i-1] * input_dims[i-1];
-	}
+        input_strides[0] = 1;
+        for (int i = 1; i < NumInputDims; ++i) {
+          input_strides[i] = input_strides[i-1] * input_dims[i-1];
+        }
       } else {
-	input_strides.back() = 1;
-	for (int i = NumInputDims - 2; i >= 0; --i) {
-	  input_strides[i] = input_strides[i + 1] * input_dims[i + 1];
-	}
+        input_strides.back() = 1;
+        for (int i = NumInputDims - 2; i >= 0; --i) {
+          input_strides[i] = input_strides[i + 1] * input_dims[i + 1];
+        }
       }
 
       int outputIndex = 0;
       int reduceIndex = 0;
       for (int i = 0; i < NumInputDims; ++i) {
-	if (m_reduced[i]) {
-	  m_reducedStrides[reduceIndex] = input_strides[i];
-	  ++reduceIndex;
-	} else {
-	  m_preservedStrides[outputIndex] = input_strides[i];
-	  ++outputIndex;
-	}
+        if (m_reduced[i]) {
+          m_reducedStrides[reduceIndex] = input_strides[i];
+          ++reduceIndex;
+        } else {
+          m_preservedStrides[outputIndex] = input_strides[i];
+          ++outputIndex;
+        }
       }
     }
 
@@ -578,7 +578,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
     Op reducer(m_reducer);
     if (ReducingInnerMostDims || RunningFullReduction) {
       const Index num_values_to_reduce =
-	(static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1];
+        (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1];
       return internal::InnerMostDimReducer<Self, Op>::reduce(*this, firstInput(index),
                                                              num_values_to_reduce, reducer);
     } else {
@@ -602,7 +602,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
     if (ReducingInnerMostDims) {
       const Index num_values_to_reduce =
-	(static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1];
+        (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1];
       const Index firstIndex = firstInput(index);
       for (Index i = 0; i < PacketSize; ++i) {
         Op reducer(m_reducer);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h
new file mode 100644
index 000000000..da15f7942
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h
@@ -0,0 +1,77 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: eigen@codeplay.com
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// General include header of SYCL target for Tensor Module
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H
+
+#ifdef EIGEN_USE_SYCL
+
+// global pointer to set different attribute state for a class
+template <class T>
+struct MakeGlobalPointer {
+  typedef typename cl::sycl::global_ptr<T>::pointer_t Type;
+};
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+
+/// This struct is used for special expression nodes with no operations (for example assign and selectOP).
+  struct NoOP;
+
+template<bool IsConst, typename T> struct GetType{
+  typedef const T Type;
+};
+template<typename T> struct GetType<false, T>{
+  typedef T Type;
+};
+
+}
+}
+}
+
+// tuple construction
+#include "TensorSyclTuple.h"
+
+// This file contains the PlaceHolder that replaces the actual data
+#include "TensorSyclPlaceHolder.h"
+
+#include "TensorSyclLeafCount.h"
+
+// The index PlaceHolder takes the actual expression and replaces the actual
+// data on it with the place holder. It uses the same pre-order expression tree
+// traverse as the leaf count in order to give the right access number to each
+// node in the expression
+#include "TensorSyclPlaceHolderExpr.h"
+
+// creation of an accessor tuple from a tuple of SYCL buffers
+#include "TensorSyclExtractAccessor.h"
+
+// actual data extraction using accessors
+//#include "GetDeviceData.h"
+
+// this is used to change the address space type in tensor map for GPU
+#include "TensorSyclConvertToDeviceExpression.h"
+
+// this is used to extract the functors
+#include "TensorSyclExtractFunctors.h"
+
+// this is used to create tensormap on the device
+// this is used to construct the expression on the device
+#include "TensorSyclExprConstructor.h"
+
+// kernel execution using fusion
+#include "TensorSyclRun.h"
+
+#endif  // end of EIGEN_USE_SYCL
+#endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
new file mode 100644
index 000000000..a94c30426
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
@@ -0,0 +1,109 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclConvertToDeviceExpression.h
+ *
+ * \brief:
+ *  Conversion from host pointer to device pointer
+ *  inside leaf nodes of the expression.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+
+/// \struct ConvertToDeviceExpression
+/// \brief This struct is used to convert the MakePointer in the host expression
+/// to the MakeGlobalPointer for the device expression. For the leafNodes
+/// containing the pointer. This is due to the fact that the address space of
+/// the pointer T* is different on the host and the device.
+template <typename Expr>
+struct ConvertToDeviceExpression;
+
+template<template<class...> class NonOpCategory, bool IsConst, typename... Args>
+struct NonOpConversion{
+  typedef typename GetType<IsConst, NonOpCategory<typename ConvertToDeviceExpression<Args>::Type...> >::Type Type;
+};
+
+
+template<template<class, template <class> class > class NonOpCategory, bool IsConst, typename Args>
+struct DeviceConvertor{
+  typedef typename GetType<IsConst, NonOpCategory<typename ConvertToDeviceExpression<Args>::Type, MakeGlobalPointer> >::Type Type;
+};
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is TensorMap
+#define TENSORMAPCONVERT(CVQual)\
+template <typename Scalar_, int Options_, int Options2_, int NumIndices_, typename IndexType_, template <class> class MakePointer_>\
+struct ConvertToDeviceExpression<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_> > {\
+  typedef CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer> Type;\
+};
+
+TENSORMAPCONVERT(const)
+TENSORMAPCONVERT()
+#undef TENSORMAPCONVERT
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is TensorCwiseNullaryOp, TensorCwiseUnaryOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp, TensorBroadcastingOp
+#define CATEGORYCONVERT(CVQual)\
+template <template<class, class...> class Category, typename OP, typename... subExprs>\
+struct ConvertToDeviceExpression<CVQual Category<OP, subExprs...> > {\
+  typedef CVQual Category<OP, typename ConvertToDeviceExpression<subExprs>::Type... > Type;\
+};
+CATEGORYCONVERT(const)
+CATEGORYCONVERT()
+#undef CATEGORYCONVERT
+
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is  TensorCwiseSelectOp
+#define SELECTOPCONVERT(CVQual, Res)\
+template <typename IfExpr, typename ThenExpr, typename ElseExpr>\
+struct ConvertToDeviceExpression<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr> >\
+: NonOpConversion<TensorSelectOp, Res, IfExpr, ThenExpr, ElseExpr> {};
+SELECTOPCONVERT(const, true)
+SELECTOPCONVERT(, false)
+#undef SELECTOPCONVERT
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is const AssingOP
+#define ASSIGNCONVERT(CVQual, Res)\
+template <typename LHSExpr, typename RHSExpr>\
+struct ConvertToDeviceExpression<CVQual TensorAssignOp<LHSExpr, RHSExpr> >\
+: NonOpConversion<TensorAssignOp, Res, LHSExpr, RHSExpr>{};
+
+ASSIGNCONVERT(const, true)
+ASSIGNCONVERT(, false)
+#undef ASSIGNCONVERT
+
+/// specialisation of the \ref ConvertToDeviceExpression struct when the node
+/// type is either TensorForcedEvalOp or TensorEvalToOp
+#define KERNELBROKERCONVERT(CVQual, Res, ExprNode)\
+template <typename Expr>\
+struct ConvertToDeviceExpression<CVQual ExprNode<Expr> > \
+: DeviceConvertor<ExprNode, Res, Expr>{};
+
+KERNELBROKERCONVERT(const, true, TensorForcedEvalOp)
+KERNELBROKERCONVERT(, false, TensorForcedEvalOp)
+KERNELBROKERCONVERT(const, true, TensorEvalToOp)
+KERNELBROKERCONVERT(, false, TensorEvalToOp)
+#undef KERNELBROKERCONVERT
+}  // namespace internal
+}  // namespace TensorSycl
+}  // namespace Eigen
+
+#endif  // UNSUPPORTED_EIGEN_CXX1
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
new file mode 100644
index 000000000..833d5e271
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
@@ -0,0 +1,213 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclExprConstructor.h
+ *
+ * \brief:
+ *  This file re-create an expression on the SYCL device in order
+ *  to use the original tensor evaluator.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// this class is used by EvalToOp in order to create an lhs expression which is
+/// a pointer from an accessor on device-only buffer
+template <typename PtrType, size_t N, typename... Params>
+struct EvalToLHSConstructor {
+  PtrType expr;
+  EvalToLHSConstructor(const utility::tuple::Tuple<Params...> &t): expr((&(*(utility::tuple::get<N>(t).get_pointer())))) {}
+};
+
+/// \struct ExprConstructor is used to reconstruct the expression on the device
+/// and
+/// recreate the expression with MakeGlobalPointer containing the device address
+/// space for the TensorMap pointers used in eval function.
+/// It receives the original expression type, the functor of the node, the tuple
+/// of accessors, and the device expression type to re-instantiate the
+/// expression tree for the device
+template <typename OrigExpr, typename IndexExpr, typename... Params>
+struct ExprConstructor;
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorMap
+#define TENSORMAP(CVQual)\
+template <typename Scalar_, int Options_, int Options2_, int Options3_, int NumIndices_, typename IndexType_,\
+template <class> class MakePointer_, size_t N, typename... Params>\
+struct ExprConstructor< CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer>,\
+CVQual Eigen::internal::PlaceHolder<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options3_, MakePointer_>, N>, Params...>{\
+  typedef  CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer>  Type;\
+  Type expr;\
+  template <typename FuncDetector>\
+  ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\
+  : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}\
+};
+
+TENSORMAP(const)
+TENSORMAP()
+#undef TENSORMAP
+
+#define UNARYCATEGORY(CVQual)\
+template <template<class, class> class UnaryCategory, typename OP, typename OrigRHSExpr, typename RHSExpr, typename... Params>\
+struct ExprConstructor<CVQual UnaryCategory<OP, OrigRHSExpr>, CVQual UnaryCategory<OP, RHSExpr>, Params...> {\
+  typedef  ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_type;\
+  my_type rhsExpr;\
+  typedef CVQual UnaryCategory<OP, typename my_type::Type> Type;\
+  Type expr;\
+  template <typename FuncDetector>\
+  ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
+  : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {}\
+};
+
+UNARYCATEGORY(const)
+UNARYCATEGORY()
+#undef UNARYCATEGORY
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorBinaryOp
+#define BINARYCATEGORY(CVQual)\
+template <template<class, class, class> class BinaryCategory, typename OP, typename OrigLHSExpr, typename OrigRHSExpr, typename LHSExpr,\
+typename RHSExpr, typename... Params>\
+struct ExprConstructor<CVQual BinaryCategory<OP, OrigLHSExpr, OrigRHSExpr>,  CVQual BinaryCategory<OP, LHSExpr, RHSExpr>, Params...> {\
+  typedef  ExprConstructor<OrigLHSExpr, LHSExpr, Params...> my_left_type;\
+  typedef  ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_right_type;\
+  typedef  CVQual BinaryCategory<OP, typename my_left_type::Type, typename my_right_type::Type> Type;\
+  my_left_type lhsExpr;\
+  my_right_type rhsExpr;\
+  Type expr;\
+  template <typename FuncDetector>\
+  ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
+  : lhsExpr(funcD.lhsExpr, t),rhsExpr(funcD.rhsExpr, t), expr(lhsExpr.expr, rhsExpr.expr, funcD.func) {}\
+};
+
+BINARYCATEGORY(const)
+BINARYCATEGORY()
+#undef BINARYCATEGORY
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorCwiseTernaryOp
+#define TERNARYCATEGORY(CVQual)\
+template <template <class, class, class, class> class TernaryCategory, typename OP, typename OrigArg1Expr, typename OrigArg2Expr,typename OrigArg3Expr,\
+typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename... Params>\
+struct ExprConstructor<CVQual TernaryCategory<OP, OrigArg1Expr, OrigArg2Expr, OrigArg3Expr>, CVQual TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Params...> {\
+  typedef ExprConstructor<OrigArg1Expr, Arg1Expr, Params...> my_arg1_type;\
+  typedef ExprConstructor<OrigArg2Expr, Arg2Expr, Params...> my_arg2_type;\
+  typedef ExprConstructor<OrigArg3Expr, Arg3Expr, Params...> my_arg3_type;\
+  typedef  CVQual TernaryCategory<OP, typename my_arg1_type::Type, typename my_arg2_type::Type, typename my_arg3_type::Type> Type;\
+  my_arg1_type arg1Expr;\
+  my_arg2_type arg2Expr;\
+  my_arg3_type arg3Expr;\
+  Type expr;\
+  template <typename FuncDetector>\
+  ExprConstructor(FuncDetector &funcD,const utility::tuple::Tuple<Params...> &t)\
+  : arg1Expr(funcD.arg1Expr, t), arg2Expr(funcD.arg2Expr, t), arg3Expr(funcD.arg3Expr, t), expr(arg1Expr.expr, arg2Expr.expr, arg3Expr.expr, funcD.func) {}\
+};
+
+TERNARYCATEGORY(const)
+TERNARYCATEGORY()
+#undef TERNARYCATEGORY
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorCwiseSelectOp
+#define SELECTOP(CVQual)\
+template <typename OrigIfExpr, typename OrigThenExpr, typename OrigElseExpr, typename IfExpr, typename ThenExpr, typename ElseExpr, typename... Params>\
+struct ExprConstructor< CVQual TensorSelectOp<OrigIfExpr, OrigThenExpr, OrigElseExpr>, CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Params...> {\
+  typedef  ExprConstructor<OrigIfExpr, IfExpr, Params...> my_if_type;\
+  typedef  ExprConstructor<OrigThenExpr, ThenExpr, Params...> my_then_type;\
+  typedef  ExprConstructor<OrigElseExpr, ElseExpr, Params...> my_else_type;\
+  typedef CVQual TensorSelectOp<typename my_if_type::Type, typename my_then_type::Type, typename my_else_type::Type> Type;\
+  my_if_type ifExpr;\
+  my_then_type thenExpr;\
+  my_else_type elseExpr;\
+  Type expr;\
+  template <typename FuncDetector>\
+  ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
+  : ifExpr(funcD.ifExpr, t), thenExpr(funcD.thenExpr, t), elseExpr(funcD.elseExpr, t), expr(ifExpr.expr, thenExpr.expr, elseExpr.expr) {}\
+};
+
+SELECTOP(const)
+SELECTOP()
+#undef SELECTOP
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// const TensorAssignOp
+#define ASSIGN(CVQual)\
+template <typename OrigLHSExpr, typename OrigRHSExpr, typename LHSExpr, typename RHSExpr, typename... Params>\
+struct ExprConstructor<CVQual TensorAssignOp<OrigLHSExpr, OrigRHSExpr>,  CVQual TensorAssignOp<LHSExpr, RHSExpr>, Params...> {\
+  typedef ExprConstructor<OrigLHSExpr, LHSExpr, Params...> my_left_type;\
+  typedef ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_right_type;\
+  typedef CVQual TensorAssignOp<typename my_left_type::Type, typename my_right_type::Type>  Type;\
+  my_left_type lhsExpr;\
+  my_right_type rhsExpr;\
+  Type expr;\
+  template <typename FuncDetector>\
+  ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
+  : lhsExpr(funcD.lhsExpr, t), rhsExpr(funcD.rhsExpr, t), expr(lhsExpr.expr, rhsExpr.expr) {}\
+ };
+
+ ASSIGN(const)
+ ASSIGN()
+ #undef ASSIGN
+/// specialisation of the \ref ExprConstructor struct when the node type is
+///  TensorEvalToOp
+#define EVALTO(CVQual)\
+template <typename OrigExpr, typename Expr, typename... Params>\
+struct ExprConstructor<CVQual TensorEvalToOp<OrigExpr, MakeGlobalPointer>, CVQual TensorEvalToOp<Expr>, Params...> {\
+  typedef ExprConstructor<OrigExpr, Expr, Params...> my_expr_type;\
+  typedef typename TensorEvalToOp<OrigExpr, MakeGlobalPointer>::PointerType my_buffer_type;\
+  typedef CVQual TensorEvalToOp<typename my_expr_type::Type, MakeGlobalPointer> Type;\
+  my_expr_type nestedExpression;\
+  EvalToLHSConstructor<my_buffer_type, 0, Params...> buffer;\
+  Type expr;\
+  template <typename FuncDetector>\
+  ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
+  : nestedExpression(funcD.rhsExpr, t), buffer(t), expr(buffer.expr, nestedExpression.expr) {}\
+};
+
+EVALTO(const)
+EVALTO()
+#undef EVALTO
+
+/// specialisation of the \ref ExprConstructor struct when the node type is
+/// TensorForcedEvalOp
+#define FORCEDEVAL(CVQual)\
+template <typename OrigExpr, typename DevExpr, size_t N, typename... Params>\
+struct ExprConstructor<CVQual TensorForcedEvalOp<OrigExpr, MakeGlobalPointer>,\
+CVQual Eigen::internal::PlaceHolder<CVQual TensorForcedEvalOp<DevExpr>, N>, Params...> {\
+  typedef CVQual TensorMap<Tensor<typename TensorForcedEvalOp<DevExpr, MakeGlobalPointer>::Scalar,\
+  TensorForcedEvalOp<DevExpr, MakeGlobalPointer>::NumDimensions, 0, typename TensorForcedEvalOp<DevExpr>::Index>, 0, MakeGlobalPointer> Type;\
+  Type expr;\
+  template <typename FuncDetector>\
+  ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\
+  : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}\
+};
+
+FORCEDEVAL(const)
+FORCEDEVAL()
+#undef FORCEDEVAL
+
+/// template deduction for \ref ExprConstructor struct
+template <typename OrigExpr, typename IndexExpr, typename FuncD, typename... Params>
+auto createDeviceExpression(FuncD &funcD, const utility::tuple::Tuple<Params...> &t)
+    -> decltype(ExprConstructor<OrigExpr, IndexExpr, Params...>(funcD, t)) {
+  return ExprConstructor<OrigExpr, IndexExpr, Params...>(funcD, t);
+}
+}
+}
+}  // namespace Eigen
+
+#endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
new file mode 100644
index 000000000..ceec528ea
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
@@ -0,0 +1,201 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclExtractAccessor.h
+ *
+ * \brief:
+ * ExtractAccessor takes Expression placeHolder expression and the tuple of sycl
+ * buffers as an input. Using pre-order tree traversal, ExtractAccessor
+ * recursively calls itself for its children in the expression tree. The
+ * leaf node in the PlaceHolder expression is nothing but a container preserving
+ * the order of the actual data in the tuple of sycl buffer. By invoking the
+ * extract accessor for the PlaceHolder<N>, an accessor is created for the Nth
+ * buffer in the tuple of buffers. This accessor is then added as an Nth
+ * element in the tuple of accessors. In this case we preserve the order of data
+ * in the expression tree.
+ *
+ * This is the specialisation of extract accessor method for different operation
+ * type in the PlaceHolder expression.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// \struct ExtractAccessor: Extract Accessor Class is used to extract the
+/// accessor from a buffer.
+/// Depending on the type of the leaf node we can get a read accessor or a
+/// read_write accessor
+template <typename Evaluator>
+struct ExtractAccessor;
+
+struct AccessorConstructor{
+  template<typename Arg> static inline auto getTuple(cl::sycl::handler& cgh, Arg eval)
+  -> decltype(ExtractAccessor<Arg>::getTuple(cgh, eval)) {
+  return ExtractAccessor<Arg>::getTuple(cgh, eval);
+  }
+
+  template<typename Arg1, typename Arg2> static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1, Arg2 eval2)
+  -> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2))) {
+    return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2));
+  }
+  template<typename Arg1, typename Arg2, typename Arg3>	static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1 , Arg2 eval2 , Arg3 eval3)
+  -> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3)))) {
+    return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3)));
+  }
+  template< cl::sycl::access::mode AcM, typename Arg> static inline auto getAccessor(cl::sycl::handler& cgh, Arg eval)
+  -> decltype(utility::tuple::make_tuple( eval.device().template get_sycl_accessor<AcM, true,
+  typename Eigen::internal::remove_all<typename Arg::CoeffReturnType>::type>(eval.dimensions().TotalSize(), cgh,eval.data()))){
+    return utility::tuple::make_tuple(eval.device().template get_sycl_accessor<AcM, true, typename Eigen::internal::remove_all<typename Arg::CoeffReturnType>::type>(eval.dimensions().TotalSize(), cgh,eval.data()));
+  }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp and const TensorBroadcastingOp
+template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {
+  static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> eval)
+  -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){
+    return AccessorConstructor::getTuple(cgh, eval.impl());
+  }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+///  TensorCwiseNullaryOp,  TensorCwiseUnaryOp and  TensorBroadcastingOp
+template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> >
+: ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorCwiseBinaryOp
+template <template<class, class, class> class BinaryCategory, typename OP,  typename LHSExpr, typename RHSExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > {
+  static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> eval)
+  -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){
+    return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());
+  }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorCwiseBinaryOp
+template <template<class, class, class> class BinaryCategory, typename OP,  typename LHSExpr, typename RHSExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >
+: ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >{};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorCwiseTernaryOp
+template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > {
+  static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> eval)
+  -> decltype(AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl())){
+    return AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl());
+  }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorCwiseTernaryOp
+template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >
+: ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorCwiseSelectOp. This is a special case where there is no OP
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {
+  static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> eval)
+  -> decltype(AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl())){
+    return AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl());
+  }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorCwiseSelectOp. This is a special case where there is no OP
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >
+: ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >{};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorAssignOp
+template <typename LHSExpr, typename RHSExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > {
+  static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> eval)
+  -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){
+    return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());
+ }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorAssignOp
+template <typename LHSExpr, typename RHSExpr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> >
+: ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorMap
+#define TENSORMAPEXPR(CVQual, ACCType)\
+template <typename PlainObjectType, int Options_, typename Dev>\
+struct ExtractAccessor<TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> > {\
+  static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> eval)\
+  -> decltype(AccessorConstructor::template getAccessor<ACCType>(cgh, eval)){\
+    return AccessorConstructor::template getAccessor<ACCType>(cgh, eval);\
+  }\
+};
+TENSORMAPEXPR(const, cl::sycl::access::mode::read)
+TENSORMAPEXPR(, cl::sycl::access::mode::read_write)
+#undef TENSORMAPEXPR
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorForcedEvalOp
+template <typename Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> > {
+  static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> eval)
+  -> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){
+    return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval);
+  }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorForcedEvalOp
+template <typename Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<TensorForcedEvalOp<Expr>, Dev> >
+: ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> >{};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// const TensorEvalToOp
+template <typename Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> > {
+  static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<const TensorEvalToOp<Expr>, Dev> eval)
+  -> decltype(utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()))){
+    return utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()));
+  }
+};
+
+/// specialisation of the \ref ExtractAccessor struct when the node type is
+/// TensorEvalToOp
+template <typename Expr, typename Dev>
+struct ExtractAccessor<TensorEvaluator<TensorEvalToOp<Expr>, Dev> >
+: ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> >{};
+
+/// template deduction for \ref ExtractAccessor
+template <typename Evaluator>
+auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& expr)
+-> decltype(ExtractAccessor<Evaluator>::getTuple(cgh, expr)) {
+  return ExtractAccessor<Evaluator>::getTuple(cgh, expr);
+}
+}
+}
+}
+#endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h
new file mode 100644
index 000000000..801b4f5d7
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h
@@ -0,0 +1,154 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclextractFunctors.h
+ *
+ * \brief:
+ *  Used to extract all the functors allocated to each node of the expression
+*tree.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// \struct FunctorExtractor:  This struct is used to extract the functors
+/// constructed on
+/// the host-side, to pack them and reuse them in reconstruction of the
+/// expression on the device.
+/// We have to do that as in Eigen the functors are not stateless so we cannot
+/// re-instantiate them on the device.
+/// We have to pass instantiated functors to the device.
+// This struct is used for leafNode (TensorMap) and nodes behaving like leafNode (TensorForcedEval).
+template <typename Evaluator> struct FunctorExtractor{
+  typedef typename Evaluator::Dimensions Dimensions;
+  const Dimensions m_dimensions;
+  const Dimensions& dimensions() const { return m_dimensions; }
+  FunctorExtractor(const Evaluator& expr)
+  : m_dimensions(expr.dimensions()) {}
+
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp, and const TensorBroadcastingOp
+template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {
+  FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
+  OP func;
+  FunctorExtractor(const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev>& expr)
+  : rhsExpr(expr.impl()), func(expr.functor()) {}
+};
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorCwiseNullaryOp, TensorCwiseUnaryOp, and TensorBroadcastingOp
+template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> >
+: FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> >{};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorCwiseBinaryOp
+template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > {
+  FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr;
+  FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
+  OP func;
+  FunctorExtractor(const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev>& expr)
+  : lhsExpr(expr.left_impl()),rhsExpr(expr.right_impl()),func(expr.functor()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorCwiseBinaryOp
+template <template <class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<BinaryCategory<OP,  LHSExpr, RHSExpr>, Dev> >
+: FunctorExtractor<TensorEvaluator<const BinaryCategory<OP,  LHSExpr, RHSExpr>, Dev> >{};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorCwiseTernaryOp
+template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,typename Dev>
+struct FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > {
+  FunctorExtractor<TensorEvaluator<Arg1Expr, Dev> > arg1Expr;
+  FunctorExtractor<TensorEvaluator<Arg2Expr, Dev> > arg2Expr;
+  FunctorExtractor<TensorEvaluator<Arg3Expr, Dev> > arg3Expr;
+  OP func;
+  FunctorExtractor(const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>& expr)
+  : arg1Expr(expr.arg1Impl()), arg2Expr(expr.arg2Impl()), arg3Expr(expr.arg3Impl()), func(expr.functor()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorCwiseTernaryOp
+template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>
+struct FunctorExtractor<TensorEvaluator< TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >
+:FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated.
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
+struct FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {
+  FunctorExtractor<TensorEvaluator<IfExpr, Dev> > ifExpr;
+  FunctorExtractor<TensorEvaluator<ThenExpr, Dev> > thenExpr;
+  FunctorExtractor<TensorEvaluator<ElseExpr, Dev> > elseExpr;
+  FunctorExtractor(const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>& expr)
+  : ifExpr(expr.cond_impl()), thenExpr(expr.then_impl()), elseExpr(expr.else_impl()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >
+:FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorAssignOp. This is an specialisation without OP so it has to be separated.
+template <typename LHSExpr, typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > {
+  FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr;
+  FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
+  FunctorExtractor(const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev>& expr)
+  : lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorAssignOp. This is an specialisation without OP so it has to be separated.
+template <typename LHSExpr, typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> >
+:FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{};
+
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// const TensorEvalToOp, This is an specialisation without OP so it has to be separated.
+template <typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > {
+  FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
+  FunctorExtractor(const TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev>& expr)
+  : rhsExpr(expr.impl()) {}
+};
+
+/// specialisation of the \ref FunctorExtractor struct when the node type is
+/// TensorEvalToOp. This is a specialisation without OP so it has to be separated.
+template <typename RHSExpr, typename Dev>
+struct FunctorExtractor<TensorEvaluator<TensorEvalToOp<RHSExpr>, Dev> >
+: FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > {};
+
+
+/// template deduction function for FunctorExtractor
+template <typename Evaluator>
+auto inline extractFunctors(const Evaluator& evaluator)-> FunctorExtractor<Evaluator> {
+  return FunctorExtractor<Evaluator>(evaluator);
+}
+}  // namespace internal
+}  // namespace TensorSycl
+}  // namespace Eigen
+
+#endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h
new file mode 100644
index 000000000..8d520d2da
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h
@@ -0,0 +1,111 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclLeafCount.h
+ *
+ * \brief:
+ *  The leaf count used the pre-order expression tree traverse in order to name
+ *  count the number of leaf nodes in the expression
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// \brief LeafCount used to counting terminal nodes. The total number of
+/// leaf nodes is used by MakePlaceHolderExprHelper to find the order
+/// of the leaf node in a expression tree at compile time.
+template <typename Expr>
+struct LeafCount;
+
+template<typename... Args> struct CategoryCount;
+
+template<> struct CategoryCount<>
+{
+  static const size_t Count =0;
+};
+
+template<typename Arg, typename... Args>
+struct CategoryCount<Arg,Args...>{
+  static const size_t Count = LeafCount<Arg>::Count + CategoryCount<Args...>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorMap
+template <typename PlainObjectType, int Options_, template <class> class MakePointer_>
+struct LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> > {
+  static const size_t Count =1;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is TensorMap
+template <typename PlainObjectType, int Options_, template <class> class MakePointer_>
+struct LeafCount<TensorMap<PlainObjectType, Options_, MakePointer_> > :LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> >{};
+
+// const TensorCwiseUnaryOp, const TensorCwiseNullaryOp, const TensorCwiseBinaryOp, const TensorCwiseTernaryOp, and Const TensorBroadcastingOp
+template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr>
+struct LeafCount<const CategoryExpr<OP, RHSExpr...> >: CategoryCount<RHSExpr...> {};
+// TensorCwiseUnaryOp,  TensorCwiseNullaryOp,  TensorCwiseBinaryOp,  TensorCwiseTernaryOp, and  TensorBroadcastingOp
+template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr>
+struct LeafCount<CategoryExpr<OP, RHSExpr...> > :LeafCount<const CategoryExpr<OP, RHSExpr...> >{};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// const TensorSelectOp is an exception
+template <typename IfExpr, typename ThenExpr, typename ElseExpr>
+struct LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > : CategoryCount<IfExpr, ThenExpr, ElseExpr> {};
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorSelectOp
+template <typename IfExpr, typename ThenExpr, typename ElseExpr>
+struct LeafCount<TensorSelectOp<IfExpr, ThenExpr, ElseExpr> >: LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > {};
+
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorAssignOp
+template <typename LHSExpr, typename RHSExpr>
+struct LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >: CategoryCount<LHSExpr,RHSExpr> {};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorAssignOp is an exception. It is not the same as Unary
+template <typename LHSExpr, typename RHSExpr>
+struct LeafCount<TensorAssignOp<LHSExpr, RHSExpr> > :LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >{};
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorForcedEvalOp
+template <typename Expr>
+struct LeafCount<const TensorForcedEvalOp<Expr> > {
+    static const size_t Count =1;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorForcedEvalOp
+template <typename Expr>
+struct LeafCount<TensorForcedEvalOp<Expr> >: LeafCount<const TensorForcedEvalOp<Expr> > {};
+
+/// specialisation of the \ref LeafCount struct when the node type is const
+/// TensorEvalToOp
+template <typename Expr>
+struct LeafCount<const TensorEvalToOp<Expr> > {
+  static const size_t Count = 1 + CategoryCount<Expr>::Count;
+};
+
+/// specialisation of the \ref LeafCount struct when the node type is
+/// TensorEvalToOp
+template <typename Expr>
+struct LeafCount<TensorEvalToOp<Expr> >: LeafCount<const TensorEvalToOp<Expr> >{};
+}
+}
+}  // namespace Eigen
+
+#endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h
new file mode 100644
index 000000000..43a63c73d
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h
@@ -0,0 +1,99 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclPlaceHolder.h
+ *
+ * \brief:
+ *  The PlaceHolder expression are nothing but a container preserving
+ *  the order of actual data in the tuple of sycl buffer.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_HPP
+
+namespace Eigen {
+namespace internal {
+/// \struct PlaceHolder
+/// \brief PlaceHolder is used to replace the \ref TensorMap in the expression
+/// tree.
+/// PlaceHolder contains the order of the leaf node in the expression tree.
+template <typename Scalar, size_t N>
+struct PlaceHolder {
+  static constexpr size_t I = N;
+  typedef Scalar Type;
+};
+
+/// \brief specialisation of the PlaceHolder node for const TensorMap
+#define TENSORMAPPLACEHOLDER(CVQual)\
+template <typename PlainObjectType, int Options_, template <class> class MakePointer_, size_t N>\
+struct PlaceHolder<CVQual TensorMap<PlainObjectType, Options_, MakePointer_>, N> {\
+  static const size_t I = N;\
+  typedef CVQual TensorMap<PlainObjectType, Options_, MakePointer_> Type;\
+  typedef typename Type::Self Self;\
+  typedef typename Type::Base Base;\
+  typedef typename Type::Nested Nested;\
+  typedef typename Type::StorageKind StorageKind;\
+  typedef typename Type::Index Index;\
+  typedef typename Type::Scalar Scalar;\
+  typedef typename Type::RealScalar RealScalar;\
+  typedef typename Type::CoeffReturnType CoeffReturnType;\
+};
+
+TENSORMAPPLACEHOLDER(const)
+TENSORMAPPLACEHOLDER()
+#undef TENSORMAPPLACEHOLDER
+
+/// \brief specialisation of the PlaceHolder node for TensorForcedEvalOp. The
+/// TensorForcedEvalOp acts as a leaf node for its parent node.
+#define TENSORFORCEDEVALPLACEHOLDER(CVQual)\
+template <typename Expression, size_t N>\
+struct PlaceHolder<CVQual TensorForcedEvalOp<Expression>, N> {\
+  static const size_t I = N;\
+  typedef CVQual  TensorForcedEvalOp<Expression> Type;\
+  typedef typename Type::Nested Nested;\
+  typedef typename Type::StorageKind StorageKind;\
+  typedef typename Type::Index Index;\
+  typedef typename Type::Scalar Scalar;\
+  typedef typename Type::Packet Packet;\
+  typedef typename Type::RealScalar RealScalar;\
+  typedef typename Type::CoeffReturnType CoeffReturnType;\
+  typedef typename Type::PacketReturnType PacketReturnType;\
+};
+
+TENSORFORCEDEVALPLACEHOLDER(const)
+TENSORFORCEDEVALPLACEHOLDER()
+#undef TENSORFORCEDEVALPLACEHOLDER
+
+template <typename PlainObjectType, int Options_, template <class> class Makepointer_, size_t N>
+struct traits<PlaceHolder<const TensorMap<PlainObjectType, Options_, Makepointer_>, N> >: public traits<PlainObjectType> {
+  typedef traits<PlainObjectType> BaseTraits;
+  typedef typename BaseTraits::Scalar Scalar;
+  typedef typename BaseTraits::StorageKind StorageKind;
+  typedef typename BaseTraits::Index Index;
+  static const int NumDimensions = BaseTraits::NumDimensions;
+  static const int Layout = BaseTraits::Layout;
+  enum {
+    Options = Options_,
+    Flags = BaseTraits::Flags,
+  };
+};
+
+template <typename PlainObjectType, int Options_, template <class> class Makepointer_, size_t N>
+struct traits<PlaceHolder<TensorMap<PlainObjectType, Options_, Makepointer_>, N> >
+: traits<PlaceHolder<const TensorMap<PlainObjectType, Options_, Makepointer_>, N> > {};
+
+}  // end namespace internal
+}  // end namespoace Eigen
+
+#endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
new file mode 100644
index 000000000..f456c35aa
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
@@ -0,0 +1,158 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclPlaceHolderExpr.h
+ *
+ * \brief:
+ *  This is the specialisation of the placeholder expression based on the
+ * operation type
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+/// \sttruct PlaceHolderExpression
+/// \brief it is used to create the PlaceHolder expression. The PlaceHolder
+/// expression is a copy of expression type in which the TensorMap of the has
+/// been replaced with PlaceHolder.
+template <typename Expr, size_t N>
+struct PlaceHolderExpression;
+
+template<size_t N, typename... Args>
+struct CalculateIndex;
+
+template<size_t N, typename Arg>
+struct CalculateIndex<N, Arg>{
+  typedef typename PlaceHolderExpression<Arg, N>::Type ArgType;
+  typedef utility::tuple::Tuple<ArgType> ArgsTuple;
+};
+
+template<size_t N, typename Arg1, typename Arg2>
+struct CalculateIndex<N, Arg1, Arg2>{
+  static const size_t Arg2LeafCount = LeafCount<Arg2>::Count;
+  typedef typename PlaceHolderExpression<Arg1, N - Arg2LeafCount>::Type Arg1Type;
+  typedef typename PlaceHolderExpression<Arg2, N>::Type Arg2Type;
+  typedef utility::tuple::Tuple<Arg1Type, Arg2Type> ArgsTuple;
+};
+
+template<size_t N, typename Arg1, typename Arg2, typename Arg3>
+struct CalculateIndex<N, Arg1, Arg2, Arg3> {
+  static const size_t Arg3LeafCount = LeafCount<Arg3>::Count;
+  static const size_t Arg2LeafCount = LeafCount<Arg2>::Count;
+  typedef typename PlaceHolderExpression<Arg1, N - Arg3LeafCount - Arg2LeafCount>::Type Arg1Type;
+  typedef typename PlaceHolderExpression<Arg2, N - Arg3LeafCount>::Type Arg2Type;
+  typedef typename PlaceHolderExpression<Arg3, N>::Type Arg3Type;
+  typedef utility::tuple::Tuple<Arg1Type, Arg2Type, Arg3Type> ArgsTuple;
+};
+
+template<template<class...> class Category , class OP, class TPL>
+struct CategoryHelper;
+
+template<template<class...> class Category , class OP, class ...T >
+struct CategoryHelper<Category, OP, utility::tuple::Tuple<T...> > {
+  typedef Category<OP, T... > Type;
+};
+
+template<template<class...> class Category , class ...T >
+struct CategoryHelper<Category, NoOP, utility::tuple::Tuple<T...> > {
+  typedef Category<T... > Type;
+};
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorCwiseNullaryOp, TensorCwiseUnaryOp, TensorBroadcastingOp, TensorCwiseBinaryOp,  TensorCwiseTernaryOp
+#define OPEXPRCATEGORY(CVQual)\
+template <template <class, class... > class Category, typename OP, typename... SubExpr, size_t N>\
+struct PlaceHolderExpression<CVQual Category<OP, SubExpr...>, N>{\
+  typedef CVQual typename CategoryHelper<Category, OP, typename CalculateIndex<N, SubExpr...>::ArgsTuple>::Type Type;\
+};
+
+OPEXPRCATEGORY(const)
+OPEXPRCATEGORY()
+#undef OPEXPRCATEGORY
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorCwiseSelectOp
+#define SELECTEXPR(CVQual)\
+template <typename IfExpr, typename ThenExpr, typename ElseExpr, size_t N>\
+struct PlaceHolderExpression<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, N> {\
+  typedef CVQual typename CategoryHelper<TensorSelectOp, NoOP, typename CalculateIndex<N, IfExpr, ThenExpr, ElseExpr>::ArgsTuple>::Type Type;\
+};
+
+SELECTEXPR(const)
+SELECTEXPR()
+#undef SELECTEXPR
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorAssignOp
+#define ASSIGNEXPR(CVQual)\
+template <typename LHSExpr, typename RHSExpr, size_t N>\
+struct PlaceHolderExpression<CVQual TensorAssignOp<LHSExpr, RHSExpr>, N> {\
+  typedef CVQual typename CategoryHelper<TensorAssignOp, NoOP, typename CalculateIndex<N, LHSExpr, RHSExpr>::ArgsTuple>::Type Type;\
+};
+
+ASSIGNEXPR(const)
+ASSIGNEXPR()
+#undef ASSIGNEXPR
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorMap
+#define TENSORMAPEXPR(CVQual)\
+template <typename Scalar_, int Options_, int Options2_, int NumIndices_, typename IndexType_, template <class> class MakePointer_, size_t N>\
+struct PlaceHolderExpression< CVQual TensorMap< Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_>, N> {\
+  typedef CVQual Eigen::internal::PlaceHolder<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_>, N> Type;\
+};
+
+TENSORMAPEXPR(const)
+TENSORMAPEXPR()
+#undef TENSORMAPEXPR
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorForcedEvalOp
+#define FORCEDEVAL(CVQual)\
+template <typename Expr, size_t N>\
+struct PlaceHolderExpression<CVQual TensorForcedEvalOp<Expr>, N> {\
+  typedef CVQual Eigen::internal::PlaceHolder<CVQual TensorForcedEvalOp<Expr>, N> Type;\
+};
+
+FORCEDEVAL(const)
+FORCEDEVAL()
+#undef FORCEDEVAL
+
+/// specialisation of the \ref PlaceHolderExpression when the node is
+/// TensorEvalToOp
+#define EVALTO(CVQual)\
+template <typename Expr, size_t N>\
+struct PlaceHolderExpression<CVQual TensorEvalToOp<Expr>, N> {\
+  typedef CVQual TensorEvalToOp<typename CalculateIndex <N, Expr>::ArgType> Type;\
+};
+
+EVALTO(const)
+EVALTO()
+#undef EVALTO
+
+/// template deduction for \ref PlaceHolderExpression struct
+template <typename Expr>
+struct createPlaceHolderExpression {
+  static const size_t TotalLeaves = LeafCount<Expr>::Count;
+  typedef typename PlaceHolderExpression<Expr, TotalLeaves - 1>::Type Type;
+};
+
+}
+}
+}  // namespace Eigen
+
+#endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h
new file mode 100644
index 000000000..57f2dda26
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h
@@ -0,0 +1,69 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Cummins Chris PhD student at The University of Edinburgh.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensorSyclRun.h
+ *
+ * \brief:
+ * Schedule_kernel invoke an specialised version of kernel struct. The
+ * specialisation is based on the data dimension in sycl buffer
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
+
+namespace Eigen {
+namespace TensorSycl {
+/// The run function in tensor sycl convert the expression tree to a buffer
+/// based expression tree;
+/// creates the expression tree for the device with accessor to buffers;
+/// construct the kernel and submit it to the sycl queue.
+template <typename Expr, typename Dev>
+void run(Expr &expr, Dev &dev) {
+  Eigen::TensorEvaluator<Expr, Dev> evaluator(expr, dev);
+  const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
+  if (needs_assign) {
+    typedef  typename internal::createPlaceHolderExpression<Expr>::Type PlaceHolderExpr;
+    auto functors = internal::extractFunctors(evaluator);
+
+    dev.m_queue.submit([&](cl::sycl::handler &cgh) {
+
+      // create a tuple of accessors from Evaluator
+      auto tuple_of_accessors = internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator);
+      const auto range = utility::tuple::get<0>(tuple_of_accessors).get_range()[0];
+
+      size_t outTileSize = range;
+      if (range > 64) outTileSize = 64;
+      size_t yMode = range % outTileSize;
+      int yRange = static_cast<int>(range);
+      if (yMode != 0) yRange += (outTileSize - yMode);
+
+      // run the kernel
+      cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(yRange), cl::sycl::range<1>(outTileSize)), [=](cl::sycl::nd_item<1> itemID) {
+        typedef  typename internal::ConvertToDeviceExpression<Expr>::Type DevExpr;
+        auto device_expr =internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
+        auto device_evaluator = Eigen::TensorEvaluator<decltype(device_expr.expr), Eigen::DefaultDevice>(device_expr.expr, Eigen::DefaultDevice());
+        if (itemID.get_global_linear_id() < range) {
+          device_evaluator.evalScalar(static_cast<int>(itemID.get_global_linear_id()));
+        }
+      });
+    });
+    dev.m_queue.throw_asynchronous();
+  }
+  evaluator.cleanup();
+}
+}  // namespace TensorSycl
+}  // namespace Eigen
+
+#endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h
new file mode 100644
index 000000000..063b027e8
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h
@@ -0,0 +1,234 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TensroSyclTuple.h
+ *
+ * \brief:
+ *  Minimal implementation of std::tuple that can be used inside a SYCL kernel.
+ *
+*****************************************************************/
+
+#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP
+#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP
+namespace utility {
+namespace tuple {
+/// \struct StaticIf
+/// \brief The StaticIf struct is used to statically choose the type based on the
+/// condition.
+template <bool, typename T = void> struct StaticIf;
+/// \brief specialisation of the \ref StaticIf when the condition is true
+template <typename T>
+struct StaticIf<true, T> {
+  typedef T type;
+};
+
+/// \struct Tuple
+/// \brief is a fixed-size collection of heterogeneous values
+/// \ztparam Ts...	-	the types of the elements that the tuple stores.
+/// Empty list is supported.
+template <class... Ts>
+struct Tuple {};
+
+/// \brief specialisation of the \ref Tuple class when the tuple has at least
+/// one element.
+/// \tparam T : the type of the first element in the tuple.
+/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty.
+template <class T, class... Ts>
+struct Tuple<T, Ts...> {
+  Tuple(T t, Ts... ts) : head(t), tail(ts...) {}
+  T head;
+  Tuple<Ts...> tail;
+};
+
+///\ struct ElemTypeHolder
+/// \brief ElemTypeHolder class is used to specify the types of the
+/// elements inside the tuple
+/// \tparam size_t the number of elements inside the tuple
+/// \tparam class the tuple class
+template <size_t, class>
+struct ElemTypeHolder;
+
+/// \brief specialisation of the \ref ElemTypeHolder class when the number of
+/// elements inside the tuple is 1
+template <class T, class... Ts>
+struct ElemTypeHolder<0, Tuple<T, Ts...> > {
+  typedef T type;
+};
+
+/// \brief specialisation of the \ref ElemTypeHolder class when the number of
+/// elements inside the tuple is bigger than 1. It recursively calls itself to
+/// detect the type of each element in the tuple
+/// \tparam T : the type of the first element in the tuple.
+/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty.
+/// \tparam K is the Kth element in the tuple
+template <size_t k, class T, class... Ts>
+struct ElemTypeHolder<k, Tuple<T, Ts...> > {
+  typedef typename ElemTypeHolder<k - 1, Tuple<Ts...> >::type type;
+};
+
+/// get
+/// \brief Extracts the first element from the tuple.
+/// K=0 represents the first element of the tuple. The tuple cannot be empty.
+/// \tparam Ts... are the type of the elements in the tuple.
+/// \param t is the tuple whose contents to extract
+/// \return  typename ElemTypeHolder<0, Tuple<Ts...> >::type &>::type
+
+#define TERMINATE_CONDS_TUPLE_GET(CVQual) \
+template <size_t k, class... Ts> \
+typename StaticIf<k == 0, CVQual typename ElemTypeHolder<0, Tuple<Ts...> >::type &>::type \
+get(CVQual Tuple<Ts...> &t) { \
+  static_assert(sizeof...(Ts)!=0, "The requseted value is bigger than the size of the tuple"); \
+  return t.head; \
+}
+
+TERMINATE_CONDS_TUPLE_GET(const)
+TERMINATE_CONDS_TUPLE_GET()
+#undef TERMINATE_CONDS_TUPLE_GET
+/// get
+/// \brief Extracts the Kth element from the tuple.
+///\tparam K is an integer value in [0,sizeof...(Types)).
+/// \tparam T is the (sizeof...(Types) -(K+1)) element in the tuple
+/// \tparam Ts... are the type of the elements  in the tuple.
+/// \param t is the tuple whose contents to extract
+/// \return  typename ElemTypeHolder<K, Tuple<Ts...> >::type &>::type
+#define RECURSIVE_TUPLE_GET(CVQual) \
+template <size_t k, class T, class... Ts> \
+typename StaticIf<k != 0, CVQual typename ElemTypeHolder<k, Tuple<T, Ts...> >::type &>::type \
+get(CVQual Tuple<T, Ts...> &t) { \
+  return utility::tuple::get<k - 1>(t.tail); \
+}
+RECURSIVE_TUPLE_GET(const)
+RECURSIVE_TUPLE_GET()
+#undef RECURSIVE_TUPLE_GET
+
+/// make_tuple
+/// \brief Creates a tuple object, deducing the target type from the types of
+/// arguments.
+/// \tparam Args the type of the arguments to construct the tuple from
+/// \param args zero or more arguments to construct the tuple from
+/// \return Tuple<Args...>
+template <typename... Args>
+Tuple<Args...> make_tuple(Args... args) {
+  return Tuple<Args...>(args...);
+}
+
+/// size
+/// \brief Provides access to the number of elements in a tuple as a
+/// compile-time constant expression.
+/// \tparam Args the type of the arguments to construct the tuple from
+/// \return size_t
+template <typename... Args>
+static constexpr size_t size(Tuple<Args...> &) {
+  return sizeof...(Args);
+}
+
+/// \struct IndexList
+/// \brief Creates a list of index from the elements in the tuple
+/// \tparam Is... a list of index from [0 to sizeof...(tuple elements))
+template <size_t... Is>
+struct IndexList {};
+
+/// \struct RangeBuilder
+/// \brief Collects internal details for generating index ranges [MIN, MAX)
+/// Declare primary template for index range builder
+/// \tparam MIN is the starting index in the tuple
+/// \tparam N represents sizeof..(elemens)- sizeof...(Is)
+/// \tparam Is... are the list of generated index so far
+template <size_t MIN, size_t N, size_t... Is>
+struct RangeBuilder;
+
+/// \brief base Step: Specialisation of the \ref RangeBuilder when the
+/// MIN==MAX. In this case the Is... is [0 to sizeof...(tuple elements))
+/// \tparam MIN is the starting index of the tuple
+/// \tparam Is is [0 to sizeof...(tuple elements))
+template <size_t MIN, size_t... Is>
+struct RangeBuilder<MIN, MIN, Is...> {
+  typedef IndexList<Is...> type;
+};
+
+/// Induction step: Specialisation of the RangeBuilder class when N!=MIN
+/// in this case we are recursively subtracting N by one and adding one
+/// index to Is... list until MIN==N
+/// \tparam MIN is the starting index in the tuple
+/// \tparam N represents sizeof..(elemens)- sizeof...(Is)
+/// \tparam Is... are the list of generated index so far
+template <size_t MIN, size_t N, size_t... Is>
+struct RangeBuilder : public RangeBuilder<MIN, N - 1, N - 1, Is...> {};
+
+/// \brief IndexRange that returns a [MIN, MAX) index range
+/// \tparam MIN is the starting index in the tuple
+/// \tparam MAX is the size of the tuple
+template <size_t MIN, size_t MAX>
+struct IndexRange: RangeBuilder<MIN, MAX>::type {};
+
+/// append_base
+/// \brief unpacking the elements of the input tuple t and creating a new tuple
+/// by adding element a at the end of it.
+///\tparam Args... the type of the elements inside the tuple t
+/// \tparam T the type of the new element going to be added at the end of tuple
+/// \tparam I... is the list of index from [0 to sizeof...(t))
+/// \param t the tuple on which we want to append a.
+/// \param a the new elements going to be added to the tuple
+/// \return Tuple<Args..., T>
+template <typename... Args, typename T, size_t... I>
+Tuple<Args..., T> append_base(Tuple<Args...> t, T a,IndexList<I...>) {
+  return utility::tuple::make_tuple(get<I>(t)..., a);
+}
+
+/// append
+/// \brief the deduction function for \ref append_base that automatically
+/// generate the \ref IndexRange
+///\tparam Args... the type of the elements inside the tuple t
+/// \tparam T the type of the new element going to be added at the end of tuple
+/// \param t the tuple on which we want to append a.
+/// \param a the new elements going to be added to the tuple
+/// \return Tuple<Args..., T>
+template <typename... Args, typename T>
+Tuple<Args..., T> append(Tuple<Args...> t, T a) {
+  return utility::tuple::append_base(t, a,  IndexRange<0, sizeof...(Args)>());
+}
+
+/// append_base
+/// \brief This is a specialisation of \ref append_base when we want to
+/// concatenate
+/// tuple t2 at the end of the tuple t1. Here we unpack both tuples, generate the
+/// IndexRange for each of them and create an output tuple T that contains both
+/// elements of t1 and t2.
+///\tparam Args1... the type of the elements inside the tuple t1
+///\tparam Args2... the type of the elements inside the tuple t2
+/// \tparam I1... is the list of index from [0 to sizeof...(t1))
+/// \tparam I2... is the list of index from [0 to sizeof...(t2))
+/// \param t1 is the tuple on which we want to append t2.
+/// \param t2 is the tuple that is going to be added on t1.
+/// \return Tuple<Args1..., Args2...>
+template <typename... Args1, typename... Args2, size_t... I1, size_t... I2>
+Tuple<Args1..., Args2...> append_base(Tuple<Args1...> t1, Tuple<Args2...> t2, IndexList<I1...>, IndexList<I2...>) {
+  return utility::tuple::make_tuple(get<I1>(t1)...,get<I2>(t2)...);
+}
+
+/// append
+/// \brief deduction function for \ref append_base when we are appending tuple
+/// t1 by tuple t2. In this case the \ref IndexRange for both tuple are
+/// automatically generated.
+///\tparam Args1... the type of the elements inside the tuple t1
+///\tparam Args2... the type of the elements inside the tuple t2
+/// \param t1 is the tuple on which we want to append t2.
+/// \param t2 is the tuple that is going to be added on t1.
+/// \return Tuple<Args1..., Args2...>
+template <typename... Args1, typename... Args2>
+Tuple<Args1..., Args2...> append(Tuple<Args1...> t1,Tuple<Args2...> t2) {
+  return utility::tuple::append_base(t1, t2, IndexRange<0, sizeof...(Args1)>(), IndexRange<0, sizeof...(Args2)>());
+}
+}  // tuple
+}  // utility
+#endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
index b7597b3a5..ffcf8b00f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
@@ -28,7 +28,7 @@ class compute_tensor_flags
 #else
             0
 #endif
-            ||
+            |
 #if EIGEN_MAX_ALIGN_BYTES>0
             is_dynamic_size_storage
 #else
@@ -56,6 +56,9 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
     Options = Options_,
     Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit)
   };
+  template <typename T> struct MakePointer {
+    typedef T* Type;
+  };
 };
 
 
@@ -71,11 +74,14 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> >
     Options = Options_,
     Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit)
   };
+  template <typename T> struct MakePointer {
+    typedef T* Type;
+  };
 };
 
 
-template<typename PlainObjectType, int Options_>
-struct traits<TensorMap<PlainObjectType, Options_> >
+template<typename PlainObjectType, int Options_, template <class> class MakePointer_>
+struct traits<TensorMap<PlainObjectType, Options_, MakePointer_> >
   : public traits<PlainObjectType>
 {
   typedef traits<PlainObjectType> BaseTraits;
@@ -88,6 +94,11 @@ struct traits<TensorMap<PlainObjectType, Options_> >
     Options = Options_,
     Flags = BaseTraits::Flags
   };
+  template <class T> struct MakePointer {
+    // Intermediate typedef to workaround MSVC issue.
+    typedef MakePointer_<T> MakePointerT;
+    typedef typename MakePointerT::Type Type;
+  };
 };
 
 template<typename PlainObjectType>
@@ -131,16 +142,16 @@ struct eval<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eig
   typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type;
 };
 
-template<typename PlainObjectType, int Options>
-struct eval<TensorMap<PlainObjectType, Options>, Eigen::Dense>
+template<typename PlainObjectType, int Options, template <class> class MakePointer>
+struct eval<TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense>
 {
-  typedef const TensorMap<PlainObjectType, Options>& type;
+  typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
 };
 
-template<typename PlainObjectType, int Options>
-struct eval<const TensorMap<PlainObjectType, Options>, Eigen::Dense>
+template<typename PlainObjectType, int Options, template <class> class MakePointer>
+struct eval<const TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense>
 {
-  typedef const TensorMap<PlainObjectType, Options>& type;
+  typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
 };
 
 template<typename PlainObjectType>
@@ -186,16 +197,16 @@ struct nested<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> >
 };
 
 
-template <typename PlainObjectType, int Options>
-struct nested<TensorMap<PlainObjectType, Options> >
+template <typename PlainObjectType, int Options, template <class> class MakePointer>
+struct nested<TensorMap<PlainObjectType, Options, MakePointer> >
 {
-  typedef const TensorMap<PlainObjectType, Options>& type;
+  typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
 };
 
-template <typename PlainObjectType, int Options>
-struct nested<const TensorMap<PlainObjectType, Options> >
+template <typename PlainObjectType, int Options, template <class> class MakePointer>
+struct nested<const TensorMap<PlainObjectType, Options, MakePointer> >
 {
-  typedef const TensorMap<PlainObjectType, Options>& type;
+  typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
 };
 
 template <typename PlainObjectType>
diff --git a/unsupported/Eigen/SpecialFunctions b/unsupported/Eigen/SpecialFunctions
index 7c7493c56..a2ad4925e 100644
--- a/unsupported/Eigen/SpecialFunctions
+++ b/unsupported/Eigen/SpecialFunctions
@@ -10,6 +10,8 @@
 #ifndef EIGEN_SPECIALFUNCTIONS_MODULE
 #define EIGEN_SPECIALFUNCTIONS_MODULE
 
+#include <math.h>
+
 #include "../../Eigen/Core"
 
 #include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index 52619fc0c..f524d7137 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -120,13 +120,27 @@ struct lgamma_retval {
 template <>
 struct lgamma_impl<float> {
   EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE float run(float x) { return ::lgammaf(x); }
+  static EIGEN_STRONG_INLINE float run(float x) {
+#if !defined(__CUDA_ARCH__) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
+    int signgam;
+    return ::lgammaf_r(x, &signgam);
+#else
+    return ::lgammaf(x);
+#endif
+  }
 };
 
 template <>
 struct lgamma_impl<double> {
   EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE double run(double x) { return ::lgamma(x); }
+  static EIGEN_STRONG_INLINE double run(double x) {
+#if !defined(__CUDA_ARCH__) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
+    int signgam;
+    return ::lgamma_r(x, &signgam);
+#else
+    return ::lgamma(x);
+#endif
+  }
 };
 #endif
 
@@ -794,7 +808,7 @@ template <>
 struct zeta_impl_series<float> {
   EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE bool run(float& a, float& b, float& s, const float x, const float machep) {
-    int i = 0;  
+    int i = 0;
     while(i < 9)
     {
         i += 1;
@@ -804,7 +818,7 @@ struct zeta_impl_series<float> {
         if( numext::abs(b/s) < machep )
             return true;
     }
-    
+
     //Return whether we are done
     return false;
   }
@@ -814,7 +828,7 @@ template <>
 struct zeta_impl_series<double> {
   EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE bool run(double& a, double& b, double& s, const double x, const double machep) {
-    int i = 0;  
+    int i = 0;
     while( (i < 9) || (a <= 9.0) )
     {
         i += 1;
@@ -824,12 +838,12 @@ struct zeta_impl_series<double> {
         if( numext::abs(b/s) < machep )
             return true;
     }
-    
+
     //Return whether we are done
     return false;
   }
 };
-    
+
 template <typename Scalar>
 struct zeta_impl {
     EIGEN_DEVICE_FUNC
@@ -894,10 +908,10 @@ struct zeta_impl {
          * Series, and Products, p. 1073; Academic Press, 1980.
          *
          */
-        
+
         int i;
         Scalar p, r, a, b, k, s, t, w;
-        
+
         const Scalar A[] = {
             Scalar(12.0),
             Scalar(-720.0),
@@ -912,20 +926,20 @@ struct zeta_impl {
             Scalar(1.8152105401943546773e17), /*1.5511210043330985984e23/854513*/
             Scalar(-7.1661652561756670113e18) /*1.6938241367317436694528e27/236364091*/
             };
-            
+
         const Scalar maxnum = NumTraits<Scalar>::infinity();
         const Scalar zero = 0.0, half = 0.5, one = 1.0;
         const Scalar machep = cephes_helper<Scalar>::machep();
         const Scalar nan = NumTraits<Scalar>::quiet_NaN();
-        
+
         if( x == one )
             return maxnum;
-        
+
         if( x < one )
         {
             return nan;
         }
-        
+
         if( q <= zero )
         {
             if(q == numext::floor(q))
@@ -937,7 +951,7 @@ struct zeta_impl {
             if (p != r)
                 return nan;
         }
-        
+
         /* Permit negative q but continue sum until n+q > +9 .
          * This case should be handled by a reflection formula.
          * If q<0 and x is an integer, there is a relation to
@@ -950,7 +964,7 @@ struct zeta_impl {
         if (zeta_impl_series<Scalar>::run(a, b, s, x, machep)) {
             return s;
         }
-        
+
         w = a;
         s += b*w/(x-one);
         s -= half * b;
@@ -983,9 +997,9 @@ template <typename Scalar>
 struct polygamma_retval {
     typedef Scalar type;
 };
-    
+
 #if !EIGEN_HAS_C99_MATH
-    
+
 template <typename Scalar>
 struct polygamma_impl {
     EIGEN_DEVICE_FUNC
@@ -995,9 +1009,9 @@ struct polygamma_impl {
         return Scalar(0);
     }
 };
-    
+
 #else
-    
+
 template <typename Scalar>
 struct polygamma_impl {
     EIGEN_DEVICE_FUNC
@@ -1005,7 +1019,7 @@ struct polygamma_impl {
         Scalar zero = 0.0, one = 1.0;
         Scalar nplus = n + one;
         const Scalar nan = NumTraits<Scalar>::quiet_NaN();
-        
+
         // Check that n is an integer
         if (numext::floor(n) != n) {
             return nan;
@@ -1021,7 +1035,7 @@ struct polygamma_impl {
         }
   }
 };
-    
+
 #endif  // EIGEN_HAS_C99_MATH
 
 /************************************************************************************************
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index a1823beaa..aeaea162a 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -140,6 +140,12 @@ endif()
 endif()
 
 if(EIGEN_TEST_CXX11)
+  if(EIGEN_TEST_SYCL)
+    ei_add_test_sycl(cxx11_tensor_sycl "-std=c++11")
+    ei_add_test_sycl(cxx11_tensor_forced_eval_sycl "-std=c++11")
+    ei_add_test_sycl(cxx11_tensor_broadcast_sycl "-std=c++11")
+    ei_add_test_sycl(cxx11_tensor_device_sycl "-std=c++11")
+  endif(EIGEN_TEST_SYCL)
   # It should be safe to always run these tests as there is some fallback code for
   # older compiler that don't support cxx11.
   set(CMAKE_CXX_STANDARD 11)
diff --git a/unsupported/test/cxx11_tensor_broadcast_sycl.cpp b/unsupported/test/cxx11_tensor_broadcast_sycl.cpp
new file mode 100644
index 000000000..ecebf7d68
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_broadcast_sycl.cpp
@@ -0,0 +1,79 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_broadcast_sycl
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_SYCL
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::array;
+using Eigen::SyclDevice;
+using Eigen::Tensor;
+using Eigen::TensorMap;
+
+// Types used in tests:
+using TestTensor = Tensor<float, 3>;
+using TestTensorMap = TensorMap<Tensor<float, 3>>;
+static void test_broadcast_sycl(){
+
+	cl::sycl::gpu_selector s;
+  cl::sycl::queue q(s, [=](cl::sycl::exception_list l) {
+    for (const auto& e : l) {
+      try {
+        std::rethrow_exception(e);
+      } catch (cl::sycl::exception e) {
+        std::cout << e.what() << std::endl;
+      }
+    }
+  });
+	SyclDevice sycl_device(q);
+	// BROADCAST test:
+	array<int, 4> in_range   = {{2, 3, 5, 7}};
+	array<int, in_range.size()> broadcasts = {{2, 3, 1, 4}};
+	array<int, in_range.size()> out_range;  // = in_range * broadcasts
+	for (size_t i = 0; i < out_range.size(); ++i)
+		out_range[i] = in_range[i] * broadcasts[i];
+
+	Tensor<float, in_range.size()>  input(in_range);
+	Tensor<float, out_range.size()> output(out_range);
+
+	for (int i = 0; i < input.size(); ++i)
+		input(i) = static_cast<float>(i);
+
+	TensorMap<decltype(input)>  gpu_in(input.data(), in_range);
+	TensorMap<decltype(output)> gpu_out(output.data(), out_range);
+		gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts);
+		sycl_device.deallocate(output.data());
+
+	for (size_t i = 0; i < in_range.size(); ++i)
+		VERIFY_IS_EQUAL(output.dimension(i), out_range[i]);
+
+	for (int i = 0; i < 4; ++i) {
+		for (int j = 0; j < 9; ++j) {
+			for (int k = 0; k < 5; ++k) {
+				for (int l = 0; l < 28; ++l) {
+					VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), output(i,j,k,l));
+				}
+			}
+		}
+	}
+	printf("Broadcast Test Passed\n");
+}
+
+void test_cxx11_tensor_broadcast_sycl() {
+  CALL_SUBTEST(test_broadcast_sycl());
+}
diff --git a/unsupported/test/cxx11_tensor_contract_cuda.cu b/unsupported/test/cxx11_tensor_contract_cuda.cu
index 767e9c678..dd68430ce 100644
--- a/unsupported/test/cxx11_tensor_contract_cuda.cu
+++ b/unsupported/test/cxx11_tensor_contract_cuda.cu
@@ -69,7 +69,7 @@ void test_cuda_contraction(int m_size, int k_size, int n_size)
   t_result = t_left.contract(t_right, dims);
 
   cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost);
-  for (size_t i = 0; i < t_result.size(); i++) {
+  for (DenseIndex i = 0; i < t_result.size(); i++) {
     if (fabs(t_result(i) - t_result_gpu(i)) < 1e-4f) {
       continue;
     }
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu
index bf216587a..0ba9d52e9 100644
--- a/unsupported/test/cxx11_tensor_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cuda.cu
@@ -303,8 +303,8 @@ void test_cuda_contraction()
 
   cudaMemcpy(t_result.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost);
 
-  for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) {
-    if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) {
+  for (DenseIndex i = 0; i < t_result.size(); i++) {
+    if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
       std::cout << "mismatch detected at index " << i << ": " << t_result.data()[i] << " vs " <<  m_result.data()[i] << std::endl;
       assert(false);
     }
diff --git a/unsupported/test/cxx11_tensor_device_sycl.cpp b/unsupported/test/cxx11_tensor_device_sycl.cpp
new file mode 100644
index 000000000..f54fc8786
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_device_sycl.cpp
@@ -0,0 +1,39 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_device_sycl
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_SYCL
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+void test_device_sycl() {
+  cl::sycl::gpu_selector s;
+  cl::sycl::queue q(s, [=](cl::sycl::exception_list l) {
+    for (const auto& e : l) {
+      try {
+        std::rethrow_exception(e);
+      } catch (cl::sycl::exception e) {
+        std::cout << e.what() << std::endl;
+      }
+    }
+  });
+  Eigen::SyclDevice sycl_device(q);
+	printf("Helo from ComputeCpp: Device Exists\n");
+}
+void test_cxx11_tensor_device_sycl() {
+  CALL_SUBTEST(test_device_sycl());
+}
diff --git a/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp b/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp
new file mode 100644
index 000000000..182ec7fa8
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp
@@ -0,0 +1,68 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_forced_eval_sycl
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_SYCL
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+void test_forced_eval_sycl() {
+	cl::sycl::gpu_selector s;
+  cl::sycl::queue q(s, [=](cl::sycl::exception_list l) {
+    for (const auto& e : l) {
+      try {
+        std::rethrow_exception(e);
+      } catch (cl::sycl::exception e) {
+        std::cout << e.what() << std::endl;
+      }
+    }
+  });
+	SyclDevice sycl_device(q);
+
+  int sizeDim1 = 100;
+  int sizeDim2 = 200;
+  int sizeDim3 = 200;
+  Eigen::array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
+  Eigen::Tensor<float, 3> in1(tensorRange);
+  Eigen::Tensor<float, 3> in2(tensorRange);
+  Eigen::Tensor<float, 3> out(tensorRange);
+
+  in1 = in1.random() + in1.constant(10.0f);
+  in2 = in2.random() + in2.constant(10.0f);
+
+	// creating TensorMap from tensor
+  Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_in1(in1.data(), tensorRange);
+  Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_in2(in2.data(), tensorRange);
+  Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_out(out.data(), tensorRange);
+
+  /// c=(a+b)*b
+	gpu_out.device(sycl_device) =(gpu_in1 + gpu_in2).eval() * gpu_in2;
+	sycl_device.deallocate(out.data());
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(out(i, j, k),
+                         (in1(i, j, k) + in2(i, j, k)) * in2(i, j, k));
+      }
+    }
+  }
+	printf("(a+b)*b Test Passed\n");
+}
+
+void test_cxx11_tensor_forced_eval_sycl() { CALL_SUBTEST(test_forced_eval_sycl()); }
diff --git a/unsupported/test/cxx11_tensor_image_patch.cpp b/unsupported/test/cxx11_tensor_image_patch.cpp
index 988b01481..475c59651 100644
--- a/unsupported/test/cxx11_tensor_image_patch.cpp
+++ b/unsupported/test/cxx11_tensor_image_patch.cpp
@@ -13,7 +13,7 @@
 
 using Eigen::Tensor;
 
-static void test_simple_patch()
+void test_simple_patch()
 {
   Tensor<float, 4> tensor(2,3,5,7);
   tensor.setRandom();
@@ -180,7 +180,7 @@ static void test_simple_patch()
 }
 
 // Verifies VALID padding (no padding) with incrementing values.
-static void test_patch_padding_valid()
+void test_patch_padding_valid()
 {
   int input_depth = 3;
   int input_rows = 3;
@@ -256,7 +256,7 @@ static void test_patch_padding_valid()
 }
 
 // Verifies VALID padding (no padding) with the same value.
-static void test_patch_padding_valid_same_value()
+void test_patch_padding_valid_same_value()
 {
   int input_depth = 1;
   int input_rows = 5;
@@ -329,7 +329,7 @@ static void test_patch_padding_valid_same_value()
 }
 
 // Verifies SAME padding.
-static void test_patch_padding_same()
+void test_patch_padding_same()
 {
   int input_depth = 3;
   int input_rows = 4;
@@ -405,7 +405,7 @@ static void test_patch_padding_same()
   }
 }
 
-static void test_patch_no_extra_dim()
+void test_patch_no_extra_dim()
 {
   Tensor<float, 3> tensor(2,3,5);
   tensor.setRandom();
@@ -553,7 +553,7 @@ static void test_patch_no_extra_dim()
   }
 }
 
-static void test_imagenet_patches()
+void test_imagenet_patches()
 {
   // Test the code on typical configurations used by the 'imagenet' benchmarks at
   // https://github.com/soumith/convnet-benchmarks
@@ -748,10 +748,10 @@ static void test_imagenet_patches()
 
 void test_cxx11_tensor_image_patch()
 {
-  CALL_SUBTEST(test_simple_patch());
-  CALL_SUBTEST(test_patch_no_extra_dim());
-  CALL_SUBTEST(test_patch_padding_valid());
-  CALL_SUBTEST(test_patch_padding_valid_same_value());
-  CALL_SUBTEST(test_patch_padding_same());
-  CALL_SUBTEST(test_imagenet_patches());
+  CALL_SUBTEST_1(test_simple_patch());
+  CALL_SUBTEST_2(test_patch_no_extra_dim());
+  CALL_SUBTEST_3(test_patch_padding_valid());
+  CALL_SUBTEST_4(test_patch_padding_valid_same_value());
+  CALL_SUBTEST_5(test_patch_padding_same());
+  CALL_SUBTEST_6(test_imagenet_patches());
 }
diff --git a/unsupported/test/cxx11_tensor_scan_cuda.cu b/unsupported/test/cxx11_tensor_scan_cuda.cu
index 761d11fd1..5f146f3c9 100644
--- a/unsupported/test/cxx11_tensor_scan_cuda.cu
+++ b/unsupported/test/cxx11_tensor_scan_cuda.cu
@@ -55,7 +55,7 @@ void test_cuda_cumsum(int m_size, int k_size, int n_size)
   t_result = t_input.cumsum(1);
 
   cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost);
-  for (size_t i = 0; i < t_result.size(); i++) {
+  for (DenseIndex i = 0; i < t_result.size(); i++) {
     if (fabs(t_result(i) - t_result_gpu(i)) < 1e-4f) {
       continue;
     }
diff --git a/unsupported/test/cxx11_tensor_simple.cpp b/unsupported/test/cxx11_tensor_simple.cpp
index fe860c970..5a0d339ef 100644
--- a/unsupported/test/cxx11_tensor_simple.cpp
+++ b/unsupported/test/cxx11_tensor_simple.cpp
@@ -299,22 +299,21 @@ static void test_resize()
   VERIFY_IS_EQUAL(epsilon.dimension(0), 2);
   VERIFY_IS_EQUAL(epsilon.dimension(1), 3);
   VERIFY_IS_EQUAL(epsilon.dimension(2), 7);
-  VERIFY_IS_EQUAL(epsilon.dimensions().TotalSize(), 2*3*7);
+  VERIFY_IS_EQUAL(epsilon.size(), 2*3*7);
 
   const int* old_data = epsilon.data();
   epsilon.resize(3,2,7);
   VERIFY_IS_EQUAL(epsilon.dimension(0), 3);
   VERIFY_IS_EQUAL(epsilon.dimension(1), 2);
   VERIFY_IS_EQUAL(epsilon.dimension(2), 7);
-  VERIFY_IS_EQUAL(epsilon.dimensions().TotalSize(), 2*3*7);
+  VERIFY_IS_EQUAL(epsilon.size(), 2*3*7);
   VERIFY_IS_EQUAL(epsilon.data(), old_data);
 
   epsilon.resize(3,5,7);
   VERIFY_IS_EQUAL(epsilon.dimension(0), 3);
   VERIFY_IS_EQUAL(epsilon.dimension(1), 5);
   VERIFY_IS_EQUAL(epsilon.dimension(2), 7);
-  VERIFY_IS_EQUAL(epsilon.dimensions().TotalSize(), 3*5*7);
-  VERIFY_IS_NOT_EQUAL(epsilon.data(), old_data);
+  VERIFY_IS_EQUAL(epsilon.size(), 3*5*7);
 }
 
 void test_cxx11_tensor_simple()
diff --git a/unsupported/test/cxx11_tensor_sycl.cpp b/unsupported/test/cxx11_tensor_sycl.cpp
new file mode 100644
index 000000000..0f66cd8f0
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_sycl.cpp
@@ -0,0 +1,162 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+// Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_sycl
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_SYCL
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::array;
+using Eigen::SyclDevice;
+using Eigen::Tensor;
+using Eigen::TensorMap;
+
+// Types used in tests:
+using TestTensor = Tensor<float, 3>;
+using TestTensorMap = TensorMap<Tensor<float, 3>>;
+
+void test_sycl_cpu() {
+  cl::sycl::gpu_selector s;
+  cl::sycl::queue q(s, [=](cl::sycl::exception_list l) {
+    for (const auto& e : l) {
+      try {
+        std::rethrow_exception(e);
+      } catch (cl::sycl::exception e) {
+        std::cout << e.what() << std::endl;
+      }
+    }
+  });
+  SyclDevice sycl_device(q);
+
+  int sizeDim1 = 100;
+  int sizeDim2 = 100;
+  int sizeDim3 = 100;
+  array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
+  TestTensor in1(tensorRange);
+  TestTensor in2(tensorRange);
+  TestTensor in3(tensorRange);
+  TestTensor out(tensorRange);
+  in1 = in1.random();
+  in2 = in2.random();
+  in3 = in3.random();
+  TestTensorMap gpu_in1(in1.data(), tensorRange);
+  TestTensorMap gpu_in2(in2.data(), tensorRange);
+  TestTensorMap gpu_in3(in3.data(), tensorRange);
+  TestTensorMap gpu_out(out.data(), tensorRange);
+
+  /// a=1.2f
+  gpu_in1.device(sycl_device) = gpu_in1.constant(1.2f);
+  sycl_device.deallocate(in1.data());
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(in1(i,j,k), 1.2f);
+      }
+    }
+  }
+  printf("a=1.2f Test passed\n");
+
+  /// a=b*1.2f
+  gpu_out.device(sycl_device) = gpu_in1 * 1.2f;
+  sycl_device.deallocate(out.data());
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k),
+                         in1(i,j,k) * 1.2f);
+      }
+    }
+  }
+  printf("a=b*1.2f Test Passed\n");
+
+  /// c=a*b
+  gpu_out.device(sycl_device) = gpu_in1 * gpu_in2;
+  sycl_device.deallocate(out.data());
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k),
+                         in1(i,j,k) *
+                             in2(i,j,k));
+      }
+    }
+  }
+  printf("c=a*b Test Passed\n");
+
+  /// c=a+b
+  gpu_out.device(sycl_device) = gpu_in1 + gpu_in2;
+  sycl_device.deallocate(out.data());
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k),
+                         in1(i,j,k) +
+                             in2(i,j,k));
+      }
+    }
+  }
+  printf("c=a+b Test Passed\n");
+
+  /// c=a*a
+  gpu_out.device(sycl_device) = gpu_in1 * gpu_in1;
+  sycl_device.deallocate(out.data());
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k),
+                         in1(i,j,k) *
+                             in1(i,j,k));
+      }
+    }
+  }
+
+  printf("c= a*a Test Passed\n");
+
+  //a*3.14f + b*2.7f
+  gpu_out.device(sycl_device) =  gpu_in1 * gpu_in1.constant(3.14f) + gpu_in2 * gpu_in2.constant(2.7f);
+  sycl_device.deallocate(out.data());
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k),
+                         in1(i,j,k) * 3.14f
+                       + in2(i,j,k) * 2.7f);
+      }
+    }
+  }
+  printf("a*3.14f + b*2.7f Test Passed\n");
+
+  ///d= (a>0.5? b:c)
+  gpu_out.device(sycl_device) =(gpu_in1 > gpu_in1.constant(0.5f)).select(gpu_in2, gpu_in3);
+  sycl_device.deallocate(out.data());
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(out(i, j, k), (in1(i, j, k) > 0.5f)
+                                                ? in2(i, j, k)
+                                                : in3(i, j, k));
+      }
+    }
+  }
+  printf("d= (a>0.5? b:c) Test Passed\n");
+
+}
+void test_cxx11_tensor_sycl() {
+  CALL_SUBTEST(test_sycl_cpu());
+}