15 files changed, 1541 insertions, 32 deletions
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index 5043b64fe..0ed65f9b4 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -42,14 +42,14 @@ struct tribb_kernel;
 template <typename Index,
           typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
           typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
-                              int ResStorageOrder, int  UpLo>
+                              int ResStorageOrder, int  UpLo, int Version = Specialized>
 struct general_matrix_matrix_triangular_product;
 
 // as usual if the result is row major => we transpose the product
 template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
-                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo>
-struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo>
-{  
+                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo, int Version>
+struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version>
+{
   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
                                       const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
@@ -63,8 +63,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
 };
 
 template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
-                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo>
-struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo>
+                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo, int Version>
+struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version>
 {
   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h
new file mode 100644
index 000000000..d86bc0542
--- /dev/null
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h
@@ -0,0 +1,144 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   Level 3 BLAS SYRK/HERK implementation.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
+
+#include "Eigen/src/Core/util/MKL_support.h"
+
+namespace internal {
+
+template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int  UpLo>
+struct general_matrix_matrix_rankupdate :
+       general_matrix_matrix_triangular_product<
+         Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {};
+
+
+// try to go to BLAS specialization
+#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \
+template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
+                          int RhsStorageOrder, bool ConjugateRhs, int  UpLo> \
+struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
+               Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Specialized> { \
+  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
+                          const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \
+  { \
+    if (lhs==rhs) { \
+      general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
+      ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
+    } else { \
+      general_matrix_matrix_triangular_product<Index, \
+        Scalar, LhsStorageOrder, ConjugateLhs, \
+        Scalar, RhsStorageOrder, ConjugateRhs, \
+        ColMajor, UpLo, BuiltIn> \
+      ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \
+    } \
+  } \
+};
+
+EIGEN_MKL_RANKUPDATE_SPECIALIZE(double)
+//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex)
+EIGEN_MKL_RANKUPDATE_SPECIALIZE(float)
+//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex)
+
+// SYRK for float/double
+#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \
+template <typename Index, int AStorageOrder, bool ConjugateA, int  UpLo> \
+struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
+  enum { \
+    IsLower = (UpLo&Lower) == Lower, \
+    LowUp = IsLower ? Lower : Upper, \
+    conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \
+  }; \
+  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
+                          const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
+  { \
+  /* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
+\
+   MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
+   char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \
+   MKLTYPE alpha_, beta_; \
+\
+/* Set alpha_ & beta_ */ \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
+   MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \
+  } \
+};
+
+// HERK for complex data
+#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \
+template <typename Index, int AStorageOrder, bool ConjugateA, int  UpLo> \
+struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
+  enum { \
+    IsLower = (UpLo&Lower) == Lower, \
+    LowUp = IsLower ? Lower : Upper, \
+    conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \
+  }; \
+  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
+                          const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \
+  { \
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
+\
+   MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \
+   char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \
+   RTYPE alpha_, beta_; \
+   const EIGTYPE* a_ptr; \
+\
+/* Set alpha_ & beta_ */ \
+/*   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); */\
+/*   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1));*/ \
+   alpha_ = alpha.real(); \
+   beta_ = 1.0; \
+/* Copy with conjugation in some cases*/ \
+   MatrixType a; \
+   if (conjA) { \
+     Map<const MatrixType, 0, OuterStride<> > mapA(lhs,n,k,OuterStride<>(lhsStride)); \
+     a = mapA.conjugate(); \
+     lda = a.outerStride(); \
+     a_ptr = a.data(); \
+   } else a_ptr=lhs; \
+   MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \
+  } \
+};
+
+
+EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk)
+EIGEN_MKL_RANKUPDATE_R(float,  float,  ssyrk)
+
+//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk)
+//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8,  double, cherk)
+
+
+} // end namespace internal
+
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h b/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h
new file mode 100644
index 000000000..9ad589e66
--- /dev/null
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h
@@ -0,0 +1,116 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   General matrix-matrix product functionality based on ?GEMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
+
+#include "Eigen/src/Core/util/MKL_support.h"
+
+namespace internal {
+
+/**********************************************************************
+* This file implements general matrix-matrix multiplication using BLAS
+* gemm function via partial specialization of
+* general_matrix_matrix_product::run(..) method for float, double,
+* std::complex<float> and std::complex<double> types
+**********************************************************************/
+
+// gemm specialization
+
+#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \
+template< \
+  typename Index, \
+  int LhsStorageOrder, bool ConjugateLhs, \
+  int RhsStorageOrder, bool ConjugateRhs> \
+struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor> \
+{ \
+static void run(Index rows, Index cols, Index depth, \
+  const EIGTYPE* _lhs, Index lhsStride, \
+  const EIGTYPE* _rhs, Index rhsStride, \
+  EIGTYPE* res, Index resStride, \
+  EIGTYPE alpha, \
+  level3_blocking<EIGTYPE, EIGTYPE>& blocking, \
+  GemmParallelInfo<Index>* info = 0) \
+{ \
+  using std::conj; \
+\
+  char transa, transb; \
+  MKL_INT m, n, k, lda, ldb, ldc; \
+  const EIGTYPE *a, *b; \
+  MKLTYPE alpha_, beta_; \
+  MatrixX##EIGPREFIX a_tmp, b_tmp; \
+  EIGTYPE myone(1);\
+\
+/* Set transpose options */ \
+  transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
+  transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
+\
+/* Set m, n, k */ \
+  m = (MKL_INT)rows;  \
+  n = (MKL_INT)cols;  \
+  k = (MKL_INT)depth; \
+\
+/* Set alpha_ & beta_ */ \
+  assign_scalar_eig2mkl(alpha_, alpha); \
+  assign_scalar_eig2mkl(beta_, myone); \
+\
+/* Set lda, ldb, ldc */ \
+  lda = (MKL_INT)lhsStride; \
+  ldb = (MKL_INT)rhsStride; \
+  ldc = (MKL_INT)resStride; \
+\
+/* Set a, b, c */ \
+  if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \
+    Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \
+    a_tmp = lhs.conjugate(); \
+    a = a_tmp.data(); \
+    lda = a_tmp.outerStride(); \
+  } else a = _lhs; \
+\
+  if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \
+    Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \
+    b_tmp = rhs.conjugate(); \
+    b = b_tmp.data(); \
+    ldb = b_tmp.outerStride(); \
+  } else b = _rhs; \
+\
+  MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+}};
+
+GEMM_SPECIALIZATION(double,   d,  double,        d)
+GEMM_SPECIALIZATION(float,    f,  float,         s)
+GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z)
+GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8,  c)
+
+} //end of namespase
+
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index e0e2cbf8f..1481c138d 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -40,8 +40,8 @@ namespace internal {
  *  |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
  *  |cplx |real |real | optimal case, vectorization possible via real-cplx mul
  */
-template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
-struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+struct general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
 {
 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 
@@ -296,8 +296,8 @@ EIGEN_DONT_INLINE static void run(
  *  - alpha is always a complex (or converted to a complex)
  *  - no vectorization
  */
-template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
-struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs>
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+struct general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>
 {
 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 
diff --git a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
new file mode 100644
index 000000000..20194c9fa
--- /dev/null
+++ b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
@@ -0,0 +1,129 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   General matrix-vector product functionality based on ?GEMV.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
+#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
+
+#include "Eigen/src/Core/util/MKL_support.h"
+
+namespace internal {
+
+/**********************************************************************
+* This file implements general matrix-vector multiplication using BLAS
+* gemv function via partial specialization of
+* general_matrix_vector_product::run(..) method for float, double,
+* std::complex<float> and std::complex<double> types
+**********************************************************************/
+
+// gemv specialization
+
+template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
+struct general_matrix_vector_product_gemv :
+  general_matrix_vector_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,ConjugateRhs,BuiltIn> {};
+
+#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
+template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
+struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
+static EIGEN_DONT_INLINE void run( \
+  Index rows, Index cols, \
+  const Scalar* lhs, Index lhsStride, \
+  const Scalar* rhs, Index rhsIncr, \
+  Scalar* res, Index resIncr, Scalar alpha) \
+{ \
+  if (ConjugateLhs) { \
+    general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,BuiltIn>::run( \
+      rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
+  } else { \
+    general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
+      rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
+  } \
+} \
+}; \
+template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
+struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
+static EIGEN_DONT_INLINE void run( \
+  Index rows, Index cols, \
+  const Scalar* lhs, Index lhsStride, \
+  const Scalar* rhs, Index rhsIncr, \
+  Scalar* res, Index resIncr, Scalar alpha) \
+{ \
+    general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
+      rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
+} \
+}; \
+
+EIGEN_MKL_GEMV_SPECIALIZE(double)
+EIGEN_MKL_GEMV_SPECIALIZE(float)
+EIGEN_MKL_GEMV_SPECIALIZE(dcomplex)
+EIGEN_MKL_GEMV_SPECIALIZE(scomplex)
+
+#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \
+template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
+struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
+{ \
+typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
+\
+static EIGEN_DONT_INLINE void run( \
+  Index rows, Index cols, \
+  const EIGTYPE* lhs, Index lhsStride, \
+  const EIGTYPE* rhs, Index rhsIncr, \
+  EIGTYPE* res, Index resIncr, EIGTYPE alpha) \
+{ \
+  MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \
+  MKLTYPE alpha_, beta_; \
+  const EIGTYPE *x_ptr, myone(1); \
+  char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
+  if (LhsStorageOrder==RowMajor) { \
+    m=cols; \
+    n=rows; \
+  }\
+  assign_scalar_eig2mkl(alpha_, alpha); \
+  assign_scalar_eig2mkl(beta_, myone); \
+  GEMVVector x_tmp; \
+  if (ConjugateRhs) { \
+    Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \
+    x_tmp=map_x.conjugate(); \
+    x_ptr=x_tmp.data(); \
+    incx=1; \
+  } else x_ptr=rhs; \
+  MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
+}\
+};
+
+EIGEN_MKL_GEMV_SPECIALIZATION(double,   double,        d)
+EIGEN_MKL_GEMV_SPECIALIZATION(float,    float,         s)
+EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z)
+EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8,  c)
+
+} //end of namespase
+
+#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H
diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h
index ecdedc363..d002f6d2e 100644
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -85,7 +85,7 @@ template<typename Index> struct GemmParallelInfo
 template<bool Condition, typename Functor, typename Index>
 void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
 {
-#ifndef EIGEN_HAS_OPENMP
+#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_MKL)
   // FIXME the transpose variable is only needed to properly split
   // the matrix product when multithreading is enabled. This is a temporary
   // fix to support row-major destination matrices. This whole
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
new file mode 100644
index 000000000..338a57a55
--- /dev/null
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
@@ -0,0 +1,293 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
+#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
+
+#include "Eigen/src/Core/util/MKL_support.h"
+
+namespace internal {
+
+
+/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
+
+#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
+{\
+\
+  static EIGEN_DONT_INLINE void run( \
+    Index rows, Index cols, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resStride, \
+    EIGTYPE alpha) \
+  { \
+    char side='L', uplo='L'; \
+    MKL_INT m, n, lda, ldb, ldc; \
+    const EIGTYPE *a, *b; \
+    MKLTYPE alpha_, beta_; \
+    MatrixX##EIGPREFIX b_tmp; \
+    EIGTYPE myone(1);\
+\
+/* Set transpose options */ \
+/* Set m, n, k */ \
+    m = (MKL_INT)rows;  \
+    n = (MKL_INT)cols;  \
+\
+/* Set alpha_ & beta_ */ \
+    assign_scalar_eig2mkl(alpha_, alpha); \
+    assign_scalar_eig2mkl(beta_, myone); \
+\
+/* Set lda, ldb, ldc */ \
+    lda = (MKL_INT)lhsStride; \
+    ldb = (MKL_INT)rhsStride; \
+    ldc = (MKL_INT)resStride; \
+\
+/* Set a, b, c */ \
+    if (LhsStorageOrder==RowMajor) uplo='U'; \
+    a = _lhs; \
+\
+    if (RhsStorageOrder==RowMajor) { \
+      Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
+      b_tmp = rhs.adjoint(); \
+      b = b_tmp.data(); \
+      ldb = b_tmp.outerStride(); \
+    } else b = _rhs; \
+\
+    MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+\
+  } \
+};
+
+
+#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
+{\
+  static EIGEN_DONT_INLINE void run( \
+    Index rows, Index cols, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resStride, \
+    EIGTYPE alpha) \
+  { \
+    char side='L', uplo='L'; \
+    MKL_INT m, n, lda, ldb, ldc; \
+    const EIGTYPE *a, *b; \
+    MKLTYPE alpha_, beta_; \
+    MatrixX##EIGPREFIX b_tmp; \
+    Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
+    EIGTYPE myone(1); \
+\
+/* Set transpose options */ \
+/* Set m, n, k */ \
+    m = (MKL_INT)rows; \
+    n = (MKL_INT)cols; \
+\
+/* Set alpha_ & beta_ */ \
+    assign_scalar_eig2mkl(alpha_, alpha); \
+    assign_scalar_eig2mkl(beta_, myone); \
+\
+/* Set lda, ldb, ldc */ \
+    lda = (MKL_INT)lhsStride; \
+    ldb = (MKL_INT)rhsStride; \
+    ldc = (MKL_INT)resStride; \
+\
+/* Set a, b, c */ \
+    if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
+      Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \
+      a_tmp = lhs.conjugate(); \
+      a = a_tmp.data(); \
+      lda = a_tmp.outerStride(); \
+    } else a = _lhs; \
+    if (LhsStorageOrder==RowMajor) uplo='U'; \
+\
+    if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \
+       b = _rhs; } \
+    else { \
+      if (RhsStorageOrder==ColMajor && ConjugateRhs) { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \
+        b_tmp = rhs.conjugate(); \
+      } else \
+      if (ConjugateRhs) { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
+        b_tmp = rhs.adjoint(); \
+      } else { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
+        b_tmp = rhs.transpose(); \
+      } \
+      b = b_tmp.data(); \
+      ldb = b_tmp.outerStride(); \
+    } \
+\
+    MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+\
+  } \
+};
+
+EIGEN_MKL_SYMM_L(double, double, d, d)
+EIGEN_MKL_SYMM_L(float, float, f, s)
+EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c)
+
+
+/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
+
+#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
+{\
+\
+  static EIGEN_DONT_INLINE void run( \
+    Index rows, Index cols, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resStride, \
+    EIGTYPE alpha) \
+  { \
+    char side='R', uplo='L'; \
+    MKL_INT m, n, lda, ldb, ldc; \
+    const EIGTYPE *a, *b; \
+    MKLTYPE alpha_, beta_; \
+    MatrixX##EIGPREFIX b_tmp; \
+    EIGTYPE myone(1);\
+\
+/* Set m, n, k */ \
+    m = (MKL_INT)rows;  \
+    n = (MKL_INT)cols;  \
+\
+/* Set alpha_ & beta_ */ \
+    assign_scalar_eig2mkl(alpha_, alpha); \
+    assign_scalar_eig2mkl(beta_, myone); \
+\
+/* Set lda, ldb, ldc */ \
+    lda = (MKL_INT)rhsStride; \
+    ldb = (MKL_INT)lhsStride; \
+    ldc = (MKL_INT)resStride; \
+\
+/* Set a, b, c */ \
+    if (RhsStorageOrder==RowMajor) uplo='U'; \
+    a = _rhs; \
+\
+    if (LhsStorageOrder==RowMajor) { \
+      Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
+      b_tmp = lhs.adjoint(); \
+      b = b_tmp.data(); \
+      ldb = b_tmp.outerStride(); \
+    } else b = _lhs; \
+\
+    MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+\
+  } \
+};
+
+
+#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
+{\
+  static EIGEN_DONT_INLINE void run( \
+    Index rows, Index cols, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resStride, \
+    EIGTYPE alpha) \
+  { \
+    char side='R', uplo='L'; \
+    MKL_INT m, n, lda, ldb, ldc; \
+    const EIGTYPE *a, *b; \
+    MKLTYPE alpha_, beta_; \
+    MatrixX##EIGPREFIX b_tmp; \
+    Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
+    EIGTYPE myone(1); \
+\
+/* Set m, n, k */ \
+    m = (MKL_INT)rows; \
+    n = (MKL_INT)cols; \
+\
+/* Set alpha_ & beta_ */ \
+    assign_scalar_eig2mkl(alpha_, alpha); \
+    assign_scalar_eig2mkl(beta_, myone); \
+\
+/* Set lda, ldb, ldc */ \
+    lda = (MKL_INT)rhsStride; \
+    ldb = (MKL_INT)lhsStride; \
+    ldc = (MKL_INT)resStride; \
+\
+/* Set a, b, c */ \
+    if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
+      Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
+      a_tmp = rhs.conjugate(); \
+      a = a_tmp.data(); \
+      lda = a_tmp.outerStride(); \
+    } else a = _rhs; \
+    if (RhsStorageOrder==RowMajor) uplo='U'; \
+\
+    if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \
+       b = _lhs; } \
+    else { \
+      if (LhsStorageOrder==ColMajor && ConjugateLhs) { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \
+        b_tmp = lhs.conjugate(); \
+      } else \
+      if (ConjugateLhs) { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
+        b_tmp = lhs.adjoint(); \
+      } else { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
+        b_tmp = lhs.transpose(); \
+      } \
+      b = b_tmp.data(); \
+      ldb = b_tmp.outerStride(); \
+    } \
+\
+    MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \
+  } \
+};
+
+EIGEN_MKL_SYMM_R(double, double, d, d)
+EIGEN_MKL_SYMM_R(float, float, f, s)
+EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c)
+
+} // end namespace internal
+
+#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index d6121fc07..14419e168 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -32,8 +32,15 @@ namespace internal {
  * the number of load/stores of the result by a factor 2 and to reduce
  * the instruction dependency.
  */
-template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
-static EIGEN_DONT_INLINE void product_selfadjoint_vector(
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized>
+struct selfadjoint_matrix_vector_product;
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
+struct selfadjoint_matrix_vector_product
+
+{
+static EIGEN_DONT_INLINE void run(
   Index size,
   const Scalar*  lhs, Index lhsStride,
   const Scalar* _rhs, Index rhsIncr,
@@ -159,6 +166,7 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
     res[j] += alpha * t2;
   }
 }
+};
 
 } // end namespace internal 
 
@@ -232,7 +240,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
     }
       
       
-    internal::product_selfadjoint_vector<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>
+    internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
       (
         lhs.rows(),                             // size
         &lhs.coeffRef(0,0),  lhs.outerStride(), // lhs info
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
new file mode 100644
index 000000000..37d20d581
--- /dev/null
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
@@ -0,0 +1,112 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
+#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
+
+#include "Eigen/src/Core/util/MKL_support.h"
+
+namespace internal {
+
+/**********************************************************************
+* This file implements selfadjoint matrix-vector multiplication using BLAS
+**********************************************************************/
+
+// symv/hemv specialization
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
+struct selfadjoint_matrix_vector_product_symv :
+  selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};
+
+#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
+template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
+struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
+static EIGEN_DONT_INLINE void run( \
+  Index size, const Scalar*  lhs, Index lhsStride, \
+  const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
+    enum {\
+      IsColMajor = StorageOrder==ColMajor \
+    }; \
+    if (IsColMajor == ConjugateLhs) {\
+      selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
+        size, lhs, lhsStride, _rhs, rhsIncr, res, alpha);  \
+    } else {\
+      selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
+        size, lhs, lhsStride, _rhs, rhsIncr, res, alpha);  \
+    }\
+  } \
+}; \
+
+EIGEN_MKL_SYMV_SPECIALIZE(double)
+EIGEN_MKL_SYMV_SPECIALIZE(float)
+EIGEN_MKL_SYMV_SPECIALIZE(dcomplex)
+EIGEN_MKL_SYMV_SPECIALIZE(scomplex)
+
+#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \
+template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
+struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
+{ \
+typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
+\
+static EIGEN_DONT_INLINE void run( \
+Index size, const EIGTYPE*  lhs, Index lhsStride, \
+const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
+{ \
+  enum {\
+    IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
+    IsLower = UpLo == Lower ? 1 : 0, \
+  }; \
+  MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \
+  MKLTYPE alpha_, beta_; \
+  const EIGTYPE *x_ptr, myone(1); \
+  char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
+  assign_scalar_eig2mkl(alpha_, alpha); \
+  assign_scalar_eig2mkl(beta_, myone); \
+  SYMVVector x_tmp; \
+  if (ConjugateRhs) { \
+    Map<const SYMVVector, 0, InnerStride<> > map_x(_rhs,size,1,InnerStride<>(incx)); \
+    x_tmp=map_x.conjugate(); \
+    x_ptr=x_tmp.data(); \
+    incx=1; \
+  } else x_ptr=_rhs; \
+  MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
+}\
+};
+
+EIGEN_MKL_SYMV_SPECIALIZATION(double,   double,        dsymv)
+EIGEN_MKL_SYMV_SPECIALIZATION(float,    float,         ssymv)
+EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
+EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8,  chemv)
+
+} //end of namespase
+
+#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h
index 3a4523fa4..1def96719 100644
--- a/Eigen/src/Core/products/SelfadjointProduct.h
+++ b/Eigen/src/Core/products/SelfadjointProduct.h
@@ -110,7 +110,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
     Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
 
     enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
-    
+
     internal::general_matrix_matrix_triangular_product<Index,
       Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor,   OtherBlasTraits::NeedToConjugate  && NumTraits<Scalar>::IsComplex,
       Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h
index e31727eb2..7632ba85a 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -58,16 +58,16 @@ template <typename Scalar, typename Index,
           int Mode, bool LhsIsTriangular,
           int LhsStorageOrder, bool ConjugateLhs,
           int RhsStorageOrder, bool ConjugateRhs,
-          int ResStorageOrder>
+          int ResStorageOrder, int Version = Specialized>
 struct product_triangular_matrix_matrix;
 
 template <typename Scalar, typename Index,
           int Mode, bool LhsIsTriangular,
           int LhsStorageOrder, bool ConjugateLhs,
-          int RhsStorageOrder, bool ConjugateRhs>
+          int RhsStorageOrder, bool ConjugateRhs, int Version>
 struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
                                            LhsStorageOrder,ConjugateLhs,
-                                           RhsStorageOrder,ConjugateRhs,RowMajor>
+                                           RhsStorageOrder,ConjugateRhs,RowMajor,Version>
 {
   static EIGEN_STRONG_INLINE void run(
     Index rows, Index cols, Index depth,
@@ -91,10 +91,10 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
 // implements col-major += alpha * op(triangular) * op(general)
 template <typename Scalar, typename Index, int Mode,
           int LhsStorageOrder, bool ConjugateLhs,
-          int RhsStorageOrder, bool ConjugateRhs>
+          int RhsStorageOrder, bool ConjugateRhs, int Version>
 struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
                                            LhsStorageOrder,ConjugateLhs,
-                                           RhsStorageOrder,ConjugateRhs,ColMajor>
+                                           RhsStorageOrder,ConjugateRhs,ColMajor,Version>
 {
   
   typedef gebp_traits<Scalar,Scalar> Traits;
@@ -220,10 +220,10 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
 // implements col-major += alpha * op(general) * op(triangular)
 template <typename Scalar, typename Index, int Mode,
           int LhsStorageOrder, bool ConjugateLhs,
-          int RhsStorageOrder, bool ConjugateRhs>
+          int RhsStorageOrder, bool ConjugateRhs, int Version>
 struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
                                            LhsStorageOrder,ConjugateLhs,
-                                           RhsStorageOrder,ConjugateRhs,ColMajor>
+                                           RhsStorageOrder,ConjugateRhs,ColMajor,Version>
 {
   typedef gebp_traits<Scalar,Scalar> Traits;
   enum {
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
new file mode 100644
index 000000000..5c0a07739
--- /dev/null
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
@@ -0,0 +1,307 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   Triangular matrix * matrix product functionality based on ?TRMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
+#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
+
+#include "Eigen/src/Core/util/MKL_support.h"
+
+namespace internal {
+
+
+template <typename Scalar, typename Index,
+          int Mode, bool LhsIsTriangular,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs,
+          int ResStorageOrder>
+struct product_triangular_matrix_matrix_trmm :
+       product_triangular_matrix_matrix<Scalar,Index,Mode,
+          LhsIsTriangular,LhsStorageOrder,ConjugateLhs,
+          RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {};
+
+
+// try to go to BLAS specialization
+#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
+template <typename Index, int Mode, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
+           LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \
+  inline static void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
+    const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) { \
+      product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
+        LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
+        RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
+        _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
+  } \
+};
+
+EIGEN_MKL_TRMM_SPECIALIZE(double, true)
+EIGEN_MKL_TRMM_SPECIALIZE(double, false)
+EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true)
+EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false)
+EIGEN_MKL_TRMM_SPECIALIZE(float, true)
+EIGEN_MKL_TRMM_SPECIALIZE(float, false)
+EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true)
+EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false)
+
+// implements col-major += alpha * op(triangular) * op(general)
+#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, int Mode, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
+         LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
+{ \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    LowUp = IsLower ? Lower : Upper, \
+    conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
+  }; \
+\
+  static EIGEN_DONT_INLINE void run( \
+    Index _rows, Index _cols, Index _depth, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resStride, \
+    EIGTYPE alpha) \
+  { \
+   Index diagSize  = (std::min)(_rows,_depth); \
+   Index rows      = IsLower ? _rows : diagSize; \
+   Index depth     = IsLower ? diagSize : _depth; \
+   Index cols      = _cols; \
+\
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
+\
+/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
+   if (rows != depth) { \
+\
+     int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
+\
+     if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
+     /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
+       product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
+       LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
+           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
+     /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
+     } else { \
+     /* Make sense to call GEMM */ \
+       Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
+       MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
+       MKL_INT aStride = aa_tmp.outerStride(); \
+       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \
+       general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
+       rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
+\
+     /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
+     } \
+     return; \
+   } \
+   char side = 'L', transa, uplo, diag = 'N'; \
+   EIGTYPE *b; \
+   const EIGTYPE *a; \
+   MKL_INT m, n, k, lda, ldb, ldc; \
+   MKLTYPE alpha_; \
+\
+/* Set alpha_*/ \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+\
+/* Set m, n */ \
+   m = (MKL_INT)diagSize; \
+   n = (MKL_INT)cols; \
+\
+/* Set trans */ \
+   transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
+\
+/* Set b, ldb */ \
+   Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \
+   MatrixX##EIGPREFIX b_tmp; \
+\
+   if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \
+   b = b_tmp.data(); \
+   ldb = b_tmp.outerStride(); \
+\
+/* Set uplo */ \
+   uplo = IsLower ? 'L' : 'U'; \
+   if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+   Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
+   MatrixLhs a_tmp; \
+\
+   if ((conjA!=0) || (SetDiag==0)) { \
+     if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \
+     if (IsZeroDiag) \
+       a_tmp.diagonal().setZero(); \
+     else if (IsUnitDiag) \
+       a_tmp.diagonal().setOnes();\
+     a = a_tmp.data(); \
+     lda = a_tmp.outerStride(); \
+   } else { \
+     a = _lhs; \
+     lda = lhsStride; \
+   } \
+   /*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \
+/* call ?trmm*/ \
+   MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
+\
+/* Add op(a_triangular)*b into res*/ \
+   Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
+   res_tmp=res_tmp+b_tmp; \
+  } \
+};
+
+EIGEN_MKL_TRMM_L(double, double, d, d)
+EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_TRMM_L(float, float, f, s)
+EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c)
+
+// implements col-major += alpha * op(general) * op(triangular)
+#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template <typename Index, int Mode, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
+         LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
+{ \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    LowUp = IsLower ? Lower : Upper, \
+    conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
+  }; \
+\
+  static EIGEN_DONT_INLINE void run( \
+    Index _rows, Index _cols, Index _depth, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resStride, \
+    EIGTYPE alpha) \
+  { \
+   Index diagSize  = (std::min)(_cols,_depth); \
+   Index rows      = _rows; \
+   Index depth     = IsLower ? _depth : diagSize; \
+   Index cols      = IsLower ? diagSize : _cols; \
+\
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
+\
+/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
+   if (cols != depth) { \
+\
+     int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
+\
+     if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
+     /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
+       product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
+       LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
+           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
+       /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
+     } else { \
+     /* Make sense to call GEMM */ \
+       Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
+       MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
+       MKL_INT aStride = aa_tmp.outerStride(); \
+       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \
+       general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
+       rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, blocking); \
+\
+     /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
+     } \
+     return; \
+   } \
+   char side = 'R', transa, uplo, diag = 'N'; \
+   EIGTYPE *b; \
+   const EIGTYPE *a; \
+   MKL_INT m, n, k, lda, ldb, ldc; \
+   MKLTYPE alpha_; \
+\
+/* Set alpha_*/ \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+\
+/* Set m, n */ \
+   m = (MKL_INT)rows; \
+   n = (MKL_INT)diagSize; \
+\
+/* Set trans */ \
+   transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
+\
+/* Set b, ldb */ \
+   Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
+   MatrixX##EIGPREFIX b_tmp; \
+\
+   if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \
+   b = b_tmp.data(); \
+   ldb = b_tmp.outerStride(); \
+\
+/* Set uplo */ \
+   uplo = IsLower ? 'L' : 'U'; \
+   if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+   Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \
+   MatrixRhs a_tmp; \
+\
+   if ((conjA!=0) || (SetDiag==0)) { \
+     if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \
+     if (IsZeroDiag) \
+       a_tmp.diagonal().setZero(); \
+     else if (IsUnitDiag) \
+       a_tmp.diagonal().setOnes();\
+     a = a_tmp.data(); \
+     lda = a_tmp.outerStride(); \
+   } else { \
+     a = _rhs; \
+     lda = rhsStride; \
+   } \
+   /*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \
+/* call ?trmm*/ \
+   MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
+\
+/* Add op(a_triangular)*b into res*/ \
+   Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
+   res_tmp=res_tmp+b_tmp; \
+  } \
+};
+
+EIGEN_MKL_TRMM_R(double, double, d, d)
+EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_TRMM_R(float, float, f, s)
+EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c)
+
+} // end namespace internal
+
+#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index 7bfff504c..ebda994f6 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -27,11 +27,11 @@
 
 namespace internal {
 
-template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
-struct product_triangular_matrix_vector;
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder, int Version=Specialized>
+struct triangular_matrix_vector_product;
 
-template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs>
-struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor>
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
+struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
 {
   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   enum {
@@ -75,7 +75,7 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
       if (r>0)
       {
         Index s = IsLower ? pi+actualPanelWidth : 0;
-        general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run(
+        general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
             r, actualPanelWidth,
             &lhs.coeffRef(s,pi), lhsStride,
             &rhs.coeffRef(pi), rhsIncr,
@@ -93,8 +93,8 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
   }
 };
 
-template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs>
-struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor>
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
+struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
 {
   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   enum {
@@ -138,7 +138,7 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
       if (r>0)
       {
         Index s = IsLower ? 0 : pi + actualPanelWidth;
-        general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run(
+        general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
             actualPanelWidth, r,
             &lhs.coeffRef(pi,s), lhsStride,
             &rhs.coeffRef(s), rhsIncr,
@@ -271,7 +271,7 @@ template<> struct trmv_selector<ColMajor>
         MappedDest(actualDestPtr, dest.size()) = dest;
     }
     
-    internal::product_triangular_matrix_vector
+    internal::triangular_matrix_vector_product
       <Index,Mode,
        LhsScalar, LhsBlasTraits::NeedToConjugate,
        RhsScalar, RhsBlasTraits::NeedToConjugate,
@@ -331,7 +331,7 @@ template<> struct trmv_selector<RowMajor>
       Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
     }
     
-    internal::product_triangular_matrix_vector
+    internal::triangular_matrix_vector_product
       <Index,Mode,
        LhsScalar, LhsBlasTraits::NeedToConjugate,
        RhsScalar, RhsBlasTraits::NeedToConjugate,
diff --git a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
new file mode 100644
index 000000000..6acd43c88
--- /dev/null
+++ b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
@@ -0,0 +1,247 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   Triangular matrix-vector product functionality based on ?TRMV.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
+#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
+
+#include "Eigen/src/Core/util/MKL_support.h"
+
+namespace internal {
+
+/**********************************************************************
+* This file implements triangular matrix-vector multiplication using BLAS
+**********************************************************************/
+
+// trmv/hemv specialization
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
+struct triangular_matrix_vector_product_trmv :
+  triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {};
+
+#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
+ static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+                                     const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
+      triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
+        _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+  } \
+}; \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
+ static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+                                     const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
+      triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
+        _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+  } \
+};
+
+EIGEN_MKL_TRMV_SPECIALIZE(double)
+EIGEN_MKL_TRMV_SPECIALIZE(float)
+EIGEN_MKL_TRMV_SPECIALIZE(dcomplex)
+EIGEN_MKL_TRMV_SPECIALIZE(scomplex)
+
+// implements col-major: res += alpha * op(triangular) * vector
+#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    LowUp = IsLower ? Lower : Upper \
+  }; \
+ static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+                             const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ { \
+   if (ConjLhs || IsZeroDiag) { \
+     triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
+       _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+     return; \
+   }\
+   Index size = (std::min)(_rows,_cols); \
+   Index rows = IsLower ? _rows : size; \
+   Index cols = IsLower ? size : _cols; \
+\
+   typedef VectorX##EIGPREFIX VectorRhs; \
+   EIGTYPE *x, *y;\
+\
+/* Set x*/ \
+   Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
+   VectorRhs x_tmp; \
+   if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+   x = x_tmp.data(); \
+\
+/* Square part handling */\
+\
+   char trans, uplo, diag; \
+   MKL_INT m, n, k, lda, incx, incy; \
+   EIGTYPE const *a; \
+   MKLTYPE alpha_, beta_; \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
+\
+/* Set m, n */ \
+   n = (MKL_INT)size; \
+   lda = lhsStride; \
+   incx = 1; \
+   incy = resIncr; \
+\
+/* Set uplo, trans and diag*/ \
+   trans = 'N'; \
+   uplo = IsLower ? 'L' : 'U'; \
+   diag = IsUnitDiag ? 'U' : 'N'; \
+\
+/* call ?TRMV*/ \
+      std::cout << "TRMV: CM\n";\
+   MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
+\
+/* Add op(a_tr)rhs into res*/ \
+   MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
+/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
+   if (size<(std::max)(rows,cols)) { \
+     typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
+     if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+     x = x_tmp.data(); \
+     if (size<rows) { \
+       y = _res + size*resIncr; \
+       a = _lhs + size; \
+       m = rows-size; \
+       n = size; \
+     } \
+     if (size<cols) { \
+       x += size; \
+       y = _res; \
+       a = _lhs + size*lda; \
+       m = size; \
+       n = cols-size; \
+     } \
+     MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
+   } \
+  } \
+};
+
+EIGEN_MKL_TRMV_CM(double, double, d, d)
+EIGEN_MKL_TRMV_CM(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_TRMV_CM(float, float, f, s)
+EIGEN_MKL_TRMV_CM(scomplex, MKL_Complex8, cf, c)
+
+// implements row-major: res += alpha * op(triangular) * vector
+#define EIGEN_MKL_TRMV_RM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    LowUp = IsLower ? Lower : Upper \
+  }; \
+ static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+                             const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ { \
+   if (IsZeroDiag) { \
+     triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
+       _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+     return; \
+   }\
+   Index size = (std::min)(_rows,_cols); \
+   Index rows = IsLower ? _rows : size; \
+   Index cols = IsLower ? size : _cols; \
+\
+   typedef VectorX##EIGPREFIX VectorRhs; \
+   EIGTYPE *x, *y;\
+\
+/* Set x*/ \
+   Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
+   VectorRhs x_tmp; \
+   if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+   x = x_tmp.data(); \
+\
+/* Square part handling */\
+\
+   char trans, uplo, diag; \
+   MKL_INT m, n, k, lda, incx, incy; \
+   EIGTYPE const *a; \
+   MKLTYPE alpha_, beta_; \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
+   assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
+\
+/* Set m, n */ \
+   n = (MKL_INT)size; \
+   lda = lhsStride; \
+   incx = 1; \
+   incy = resIncr; \
+\
+/* Set uplo, trans and diag*/ \
+   trans = ConjLhs ? 'C' : 'T'; \
+   uplo = IsLower ? 'U' : 'L'; \
+   diag = IsUnitDiag ? 'U' : 'N'; \
+\
+/* call ?TRMV*/ \
+      std::cout << "TRMV: RM\n";\
+   MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
+\
+/* Add op(a_tr)rhs into res*/ \
+   MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
+/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
+   if (size<(std::max)(rows,cols)) { \
+     typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
+     if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+     x = x_tmp.data(); \
+     if (size<rows) { \
+       y = _res + size*resIncr; \
+       a = _lhs + size*lda; \
+       m = rows-size; \
+       n = size; \
+     } \
+     if (size<cols) { \
+       x += size; \
+       y = _res; \
+       a = _lhs + size; \
+       m = size; \
+       n = cols-size; \
+     } \
+     MKLPREFIX##gemv(&trans, &n, &m, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
+   } \
+  } \
+};
+
+EIGEN_MKL_TRMV_RM(double, double, d, d)
+EIGEN_MKL_TRMV_RM(dcomplex, MKL_Complex16, cd, z)
+EIGEN_MKL_TRMV_RM(float, float, f, s)
+EIGEN_MKL_TRMV_RM(scomplex, MKL_Complex8, cf, c)
+
+} //end of namespase
+
+#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
new file mode 100644
index 000000000..644ef35c5
--- /dev/null
+++ b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
@@ -0,0 +1,153 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   Triangular matrix * matrix product functionality based on ?TRMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
+#define EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
+
+#include "Eigen/src/Core/util/MKL_support.h"
+
+namespace internal {
+
+// implements LeftSide op(triangular)^-1 * general
+#define EIGEN_MKL_TRSM_L(EIGTYPE, MKLTYPE, MKLPREFIX) \
+template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
+struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
+{ \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
+  }; \
+  static EIGEN_DONT_INLINE void run( \
+      Index size, Index otherSize, \
+      const EIGTYPE* _tri, Index triStride, \
+      EIGTYPE* _other, Index otherStride) \
+  { \
+   MKL_INT m = size, n = otherSize, lda, ldb; \
+   char side = 'L', uplo, diag='N', transa; \
+   /* Set alpha_ */ \
+   MKLTYPE alpha; \
+   EIGTYPE myone(1); \
+   assign_scalar_eig2mkl(alpha, myone); \
+   ldb = otherStride;\
+\
+   const EIGTYPE *a; \
+/* Set trans */ \
+   transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
+/* Set uplo */ \
+   uplo = IsLower ? 'L' : 'U'; \
+   if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
+   Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
+   MatrixTri a_tmp; \
+\
+   if (conjA) { \
+     a_tmp = tri.conjugate(); \
+     a = a_tmp.data(); \
+     lda = a_tmp.outerStride(); \
+   } else { \
+     a = _tri; \
+     lda = triStride; \
+   } \
+   if (IsUnitDiag) diag='U'; \
+/* call ?trsm*/ \
+   MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
+ } \
+};
+
+EIGEN_MKL_TRSM_L(double, double, d)
+EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z)
+EIGEN_MKL_TRSM_L(float, float, s)
+EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c)
+
+
+// implements RightSide general * op(triangular)^-1
+#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \
+template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
+struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
+{ \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
+  }; \
+  static EIGEN_DONT_INLINE void run( \
+      Index size, Index otherSize, \
+      const EIGTYPE* _tri, Index triStride, \
+      EIGTYPE* _other, Index otherStride) \
+  { \
+   MKL_INT m = otherSize, n = size, lda, ldb; \
+   char side = 'R', uplo, diag='N', transa; \
+   /* Set alpha_ */ \
+   MKLTYPE alpha; \
+   EIGTYPE myone(1); \
+   assign_scalar_eig2mkl(alpha, myone); \
+   ldb = otherStride;\
+\
+   const EIGTYPE *a; \
+/* Set trans */ \
+   transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
+/* Set uplo */ \
+   uplo = IsLower ? 'L' : 'U'; \
+   if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
+   Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
+   MatrixTri a_tmp; \
+\
+   if (conjA) { \
+     a_tmp = tri.conjugate(); \
+     a = a_tmp.data(); \
+     lda = a_tmp.outerStride(); \
+   } else { \
+     a = _tri; \
+     lda = triStride; \
+   } \
+   if (IsUnitDiag) diag='U'; \
+/* call ?trsm*/ \
+   MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
+   /*std::cout << "TRMS_L specialization!\n";*/ \
+ } \
+};
+
+EIGEN_MKL_TRSM_R(double, double, d)
+EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z)
+EIGEN_MKL_TRSM_R(float, float, s)
+EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c)
+
+
+} // end namespace internal
+
+#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H